10b57cec5SDimitry Andric //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements some loop unrolling utilities for loops with run-time 100b57cec5SDimitry Andric // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time 110b57cec5SDimitry Andric // trip counts. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The functions in this file are used to generate extra code when the 140b57cec5SDimitry Andric // run-time trip count modulo the unroll factor is not 0. When this is the 150b57cec5SDimitry Andric // case, we need to generate code to execute these 'left over' iterations. 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric // The current strategy generates an if-then-else sequence prior to the 180b57cec5SDimitry Andric // unrolled loop to execute the 'left over' iterations before or after the 190b57cec5SDimitry Andric // unrolled loop. 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 2481ad6265SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 25349cc55cSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 2881ad6265SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 290b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 300b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 31e8d8bef9SDimitry Andric #include "llvm/IR/MDBuilder.h" 320b57cec5SDimitry Andric #include "llvm/IR/Module.h" 33bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 34480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 350b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 360b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 370b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 380b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 39349cc55cSDimitry Andric #include "llvm/Transforms/Utils/Local.h" 400b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 415ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 430b57cec5SDimitry Andric #include <algorithm> 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric using namespace llvm; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric #define DEBUG_TYPE "loop-unroll" 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric STATISTIC(NumRuntimeUnrolled, 500b57cec5SDimitry Andric "Number of loops unrolled with run-time trip counts"); 510b57cec5SDimitry Andric static cl::opt<bool> UnrollRuntimeMultiExit( 520b57cec5SDimitry Andric "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, 530b57cec5SDimitry Andric cl::desc("Allow runtime unrolling for loops with multiple exits, when " 540b57cec5SDimitry Andric "epilog is generated")); 55fe6060f1SDimitry Andric static cl::opt<bool> UnrollRuntimeOtherExitPredictable( 56fe6060f1SDimitry Andric "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, 57fe6060f1SDimitry Andric cl::desc("Assume the non latch exit block to be predictable")); 580b57cec5SDimitry Andric 595f757f3fSDimitry Andric // Probability that the loop trip count is so small that after the prolog 605f757f3fSDimitry Andric // we do not enter the unrolled loop at all. 615f757f3fSDimitry Andric // It is unlikely that the loop trip count is smaller than the unroll factor; 625f757f3fSDimitry Andric // other than that, the choice of constant is not tuned yet. 635f757f3fSDimitry Andric static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127}; 645f757f3fSDimitry Andric // Probability that the loop trip count is so small that we skip the unrolled 655f757f3fSDimitry Andric // loop completely and immediately enter the epilogue loop. 665f757f3fSDimitry Andric // It is unlikely that the loop trip count is smaller than the unroll factor; 675f757f3fSDimitry Andric // other than that, the choice of constant is not tuned yet. 685f757f3fSDimitry Andric static const uint32_t EpilogHeaderWeights[] = {1, 127}; 695f757f3fSDimitry Andric 700b57cec5SDimitry Andric /// Connect the unrolling prolog code to the original loop. 710b57cec5SDimitry Andric /// The unrolling prolog code contains code to execute the 720b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the 730b57cec5SDimitry Andric /// unroll count is non-zero. 740b57cec5SDimitry Andric /// 750b57cec5SDimitry Andric /// This function performs the following: 760b57cec5SDimitry Andric /// - Create PHI nodes at prolog end block to combine values 770b57cec5SDimitry Andric /// that exit the prolog code and jump around the prolog. 780b57cec5SDimitry Andric /// - Add a PHI operand to a PHI node at the loop exit block 790b57cec5SDimitry Andric /// for values that exit the prolog and go around the loop. 800b57cec5SDimitry Andric /// - Branch around the original loop if the trip count is less 810b57cec5SDimitry Andric /// than the unroll factor. 820b57cec5SDimitry Andric /// 830b57cec5SDimitry Andric static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, 840b57cec5SDimitry Andric BasicBlock *PrologExit, 850b57cec5SDimitry Andric BasicBlock *OriginalLoopLatchExit, 860b57cec5SDimitry Andric BasicBlock *PreHeader, BasicBlock *NewPreHeader, 870b57cec5SDimitry Andric ValueToValueMapTy &VMap, DominatorTree *DT, 8881ad6265SDimitry Andric LoopInfo *LI, bool PreserveLCSSA, 8981ad6265SDimitry Andric ScalarEvolution &SE) { 900b57cec5SDimitry Andric // Loop structure should be the following: 910b57cec5SDimitry Andric // Preheader 920b57cec5SDimitry Andric // PrologHeader 930b57cec5SDimitry Andric // ... 940b57cec5SDimitry Andric // PrologLatch 950b57cec5SDimitry Andric // PrologExit 960b57cec5SDimitry Andric // NewPreheader 970b57cec5SDimitry Andric // Header 980b57cec5SDimitry Andric // ... 990b57cec5SDimitry Andric // Latch 1000b57cec5SDimitry Andric // LatchExit 1010b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 1020b57cec5SDimitry Andric assert(Latch && "Loop must have a latch"); 1030b57cec5SDimitry Andric BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric // Create a PHI node for each outgoing value from the original loop 1060b57cec5SDimitry Andric // (which means it is an outgoing value from the prolog code too). 1070b57cec5SDimitry Andric // The new PHI node is inserted in the prolog end basic block. 1080b57cec5SDimitry Andric // The new PHI node value is added as an operand of a PHI node in either 1090b57cec5SDimitry Andric // the loop header or the loop exit block. 1100b57cec5SDimitry Andric for (BasicBlock *Succ : successors(Latch)) { 1110b57cec5SDimitry Andric for (PHINode &PN : Succ->phis()) { 1120b57cec5SDimitry Andric // Add a new PHI node to the prolog end block and add the 1130b57cec5SDimitry Andric // appropriate incoming values. 1140b57cec5SDimitry Andric // TODO: This code assumes that the PrologExit (or the LatchExit block for 1150b57cec5SDimitry Andric // prolog loop) contains only one predecessor from the loop, i.e. the 1160b57cec5SDimitry Andric // PrologLatch. When supporting multiple-exiting block loops, we can have 1170b57cec5SDimitry Andric // two or more blocks that have the LatchExit as the target in the 1180b57cec5SDimitry Andric // original loop. 1195f757f3fSDimitry Andric PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr"); 1205f757f3fSDimitry Andric NewPN->insertBefore(PrologExit->getFirstNonPHIIt()); 1210b57cec5SDimitry Andric // Adding a value to the new PHI node from the original loop preheader. 1220b57cec5SDimitry Andric // This is the value that skips all the prolog code. 1230b57cec5SDimitry Andric if (L->contains(&PN)) { 1240b57cec5SDimitry Andric // Succ is loop header. 1250b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), 1260b57cec5SDimitry Andric PreHeader); 1270b57cec5SDimitry Andric } else { 1280b57cec5SDimitry Andric // Succ is LatchExit. 129*0fca6ea1SDimitry Andric NewPN->addIncoming(PoisonValue::get(PN.getType()), PreHeader); 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric Value *V = PN.getIncomingValueForBlock(Latch); 1330b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V)) { 1340b57cec5SDimitry Andric if (L->contains(I)) { 1350b57cec5SDimitry Andric V = VMap.lookup(I); 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric // Adding a value to the new PHI node from the last prolog block 1390b57cec5SDimitry Andric // that was created. 1400b57cec5SDimitry Andric NewPN->addIncoming(V, PrologLatch); 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // Update the existing PHI node operand with the value from the 1430b57cec5SDimitry Andric // new PHI node. How this is done depends on if the existing 1440b57cec5SDimitry Andric // PHI node is in the original loop block, or the exit block. 1450b57cec5SDimitry Andric if (L->contains(&PN)) 1460b57cec5SDimitry Andric PN.setIncomingValueForBlock(NewPreHeader, NewPN); 1470b57cec5SDimitry Andric else 1480b57cec5SDimitry Andric PN.addIncoming(NewPN, PrologExit); 14981ad6265SDimitry Andric SE.forgetValue(&PN); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric // Make sure that created prolog loop is in simplified form 1540b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> PrologExitPreds; 1550b57cec5SDimitry Andric Loop *PrologLoop = LI->getLoopFor(PrologLatch); 1560b57cec5SDimitry Andric if (PrologLoop) { 1570b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(PrologExit)) 1580b57cec5SDimitry Andric if (PrologLoop->contains(PredBB)) 1590b57cec5SDimitry Andric PrologExitPreds.push_back(PredBB); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI, 1620b57cec5SDimitry Andric nullptr, PreserveLCSSA); 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric // Create a branch around the original loop, which is taken if there are no 1660b57cec5SDimitry Andric // iterations remaining to be executed after running the prologue. 1670b57cec5SDimitry Andric Instruction *InsertPt = PrologExit->getTerminator(); 1680b57cec5SDimitry Andric IRBuilder<> B(InsertPt); 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric assert(Count != 0 && "nonsensical Count!"); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1) 1730b57cec5SDimitry Andric // This means %xtraiter is (BECount + 1) and all of the iterations of this 1740b57cec5SDimitry Andric // loop were executed by the prologue. Note that if BECount <u (Count - 1) 1750b57cec5SDimitry Andric // then (BECount + 1) cannot unsigned-overflow. 1760b57cec5SDimitry Andric Value *BrLoopExit = 1770b57cec5SDimitry Andric B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); 1780b57cec5SDimitry Andric // Split the exit to maintain loop canonicalization guarantees 1790b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); 1800b57cec5SDimitry Andric SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, 1810b57cec5SDimitry Andric nullptr, PreserveLCSSA); 1820b57cec5SDimitry Andric // Add the branch to the exit block (around the unrolled loop) 1835f757f3fSDimitry Andric MDNode *BranchWeights = nullptr; 1845f757f3fSDimitry Andric if (hasBranchWeightMD(*Latch->getTerminator())) { 1855f757f3fSDimitry Andric // Assume loop is nearly always entered. 1865f757f3fSDimitry Andric MDBuilder MDB(B.getContext()); 1875f757f3fSDimitry Andric BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights); 1885f757f3fSDimitry Andric } 1895f757f3fSDimitry Andric B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader, 1905f757f3fSDimitry Andric BranchWeights); 1910b57cec5SDimitry Andric InsertPt->eraseFromParent(); 192349cc55cSDimitry Andric if (DT) { 193349cc55cSDimitry Andric auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit, 194349cc55cSDimitry Andric PrologExit); 195349cc55cSDimitry Andric DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom); 196349cc55cSDimitry Andric } 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// Connect the unrolling epilog code to the original loop. 2000b57cec5SDimitry Andric /// The unrolling epilog code contains code to execute the 2010b57cec5SDimitry Andric /// 'extra' iterations if the run-time trip count modulo the 2020b57cec5SDimitry Andric /// unroll count is non-zero. 2030b57cec5SDimitry Andric /// 2040b57cec5SDimitry Andric /// This function performs the following: 2050b57cec5SDimitry Andric /// - Update PHI nodes at the unrolling loop exit and epilog loop exit 2060b57cec5SDimitry Andric /// - Create PHI nodes at the unrolling loop exit to combine 2070b57cec5SDimitry Andric /// values that exit the unrolling loop code and jump around it. 2080b57cec5SDimitry Andric /// - Update PHI operands in the epilog loop by the new PHI nodes 2090b57cec5SDimitry Andric /// - Branch around the epilog loop if extra iters (ModVal) is zero. 2100b57cec5SDimitry Andric /// 2110b57cec5SDimitry Andric static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, 2120b57cec5SDimitry Andric BasicBlock *Exit, BasicBlock *PreHeader, 2130b57cec5SDimitry Andric BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, 2140b57cec5SDimitry Andric ValueToValueMapTy &VMap, DominatorTree *DT, 2155f757f3fSDimitry Andric LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, 2165f757f3fSDimitry Andric unsigned Count) { 2170b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 2180b57cec5SDimitry Andric assert(Latch && "Loop must have a latch"); 2190b57cec5SDimitry Andric BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric // Loop structure should be the following: 2220b57cec5SDimitry Andric // 2230b57cec5SDimitry Andric // PreHeader 2240b57cec5SDimitry Andric // NewPreHeader 2250b57cec5SDimitry Andric // Header 2260b57cec5SDimitry Andric // ... 2270b57cec5SDimitry Andric // Latch 2280b57cec5SDimitry Andric // NewExit (PN) 2290b57cec5SDimitry Andric // EpilogPreHeader 2300b57cec5SDimitry Andric // EpilogHeader 2310b57cec5SDimitry Andric // ... 2320b57cec5SDimitry Andric // EpilogLatch 2330b57cec5SDimitry Andric // Exit (EpilogPN) 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric // Update PHI nodes at NewExit and Exit. 2360b57cec5SDimitry Andric for (PHINode &PN : NewExit->phis()) { 2370b57cec5SDimitry Andric // PN should be used in another PHI located in Exit block as 2380b57cec5SDimitry Andric // Exit was split by SplitBlockPredecessors into Exit and NewExit 239bdd1243dSDimitry Andric // Basically it should look like: 2400b57cec5SDimitry Andric // NewExit: 2410b57cec5SDimitry Andric // PN = PHI [I, Latch] 2420b57cec5SDimitry Andric // ... 2430b57cec5SDimitry Andric // Exit: 244349cc55cSDimitry Andric // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil] 245349cc55cSDimitry Andric // 246349cc55cSDimitry Andric // Exits from non-latch blocks point to the original exit block and the 247349cc55cSDimitry Andric // epilogue edges have already been added. 2480b57cec5SDimitry Andric // 2490b57cec5SDimitry Andric // There is EpilogPreHeader incoming block instead of NewExit as 2500b57cec5SDimitry Andric // NewExit was spilt 1 more time to get EpilogPreHeader. 2510b57cec5SDimitry Andric assert(PN.hasOneUse() && "The phi should have 1 use"); 2520b57cec5SDimitry Andric PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser()); 2530b57cec5SDimitry Andric assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric // Add incoming PreHeader from branch around the Loop 256*0fca6ea1SDimitry Andric PN.addIncoming(PoisonValue::get(PN.getType()), PreHeader); 25781ad6265SDimitry Andric SE.forgetValue(&PN); 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric Value *V = PN.getIncomingValueForBlock(Latch); 2600b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V); 2610b57cec5SDimitry Andric if (I && L->contains(I)) 2620b57cec5SDimitry Andric // If value comes from an instruction in the loop add VMap value. 2630b57cec5SDimitry Andric V = VMap.lookup(I); 2640b57cec5SDimitry Andric // For the instruction out of the loop, constant or undefined value 2650b57cec5SDimitry Andric // insert value itself. 2660b57cec5SDimitry Andric EpilogPN->addIncoming(V, EpilogLatch); 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 && 2690b57cec5SDimitry Andric "EpilogPN should have EpilogPreHeader incoming block"); 2700b57cec5SDimitry Andric // Change EpilogPreHeader incoming block to NewExit. 2710b57cec5SDimitry Andric EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader), 2720b57cec5SDimitry Andric NewExit); 2730b57cec5SDimitry Andric // Now PHIs should look like: 2740b57cec5SDimitry Andric // NewExit: 275*0fca6ea1SDimitry Andric // PN = PHI [I, Latch], [poison, PreHeader] 2760b57cec5SDimitry Andric // ... 2770b57cec5SDimitry Andric // Exit: 2780b57cec5SDimitry Andric // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch] 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader). 2820b57cec5SDimitry Andric // Update corresponding PHI nodes in epilog loop. 2830b57cec5SDimitry Andric for (BasicBlock *Succ : successors(Latch)) { 2840b57cec5SDimitry Andric // Skip this as we already updated phis in exit blocks. 2850b57cec5SDimitry Andric if (!L->contains(Succ)) 2860b57cec5SDimitry Andric continue; 2870b57cec5SDimitry Andric for (PHINode &PN : Succ->phis()) { 2880b57cec5SDimitry Andric // Add new PHI nodes to the loop exit block and update epilog 2890b57cec5SDimitry Andric // PHIs with the new PHI values. 2905f757f3fSDimitry Andric PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr"); 2915f757f3fSDimitry Andric NewPN->insertBefore(NewExit->getFirstNonPHIIt()); 2920b57cec5SDimitry Andric // Adding a value to the new PHI node from the unrolling loop preheader. 2930b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); 2940b57cec5SDimitry Andric // Adding a value to the new PHI node from the unrolling loop latch. 2950b57cec5SDimitry Andric NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric // Update the existing PHI node operand with the value from the new PHI 2980b57cec5SDimitry Andric // node. Corresponding instruction in epilog loop should be PHI. 2990b57cec5SDimitry Andric PHINode *VPN = cast<PHINode>(VMap[&PN]); 3000b57cec5SDimitry Andric VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric Instruction *InsertPt = NewExit->getTerminator(); 3050b57cec5SDimitry Andric IRBuilder<> B(InsertPt); 3060b57cec5SDimitry Andric Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); 3070b57cec5SDimitry Andric assert(Exit && "Loop must have a single exit block only"); 3080b57cec5SDimitry Andric // Split the epilogue exit to maintain loop canonicalization guarantees 3090b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); 3100b57cec5SDimitry Andric SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, 3110b57cec5SDimitry Andric PreserveLCSSA); 3120b57cec5SDimitry Andric // Add the branch to the exit block (around the unrolling loop) 3135f757f3fSDimitry Andric MDNode *BranchWeights = nullptr; 3145f757f3fSDimitry Andric if (hasBranchWeightMD(*Latch->getTerminator())) { 3155f757f3fSDimitry Andric // Assume equal distribution in interval [0, Count). 3165f757f3fSDimitry Andric MDBuilder MDB(B.getContext()); 3175f757f3fSDimitry Andric BranchWeights = MDB.createBranchWeights(1, Count - 1); 3185f757f3fSDimitry Andric } 3195f757f3fSDimitry Andric B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights); 3200b57cec5SDimitry Andric InsertPt->eraseFromParent(); 321349cc55cSDimitry Andric if (DT) { 322349cc55cSDimitry Andric auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit); 323349cc55cSDimitry Andric DT->changeImmediateDominator(Exit, NewDom); 324349cc55cSDimitry Andric } 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric // Split the main loop exit to maintain canonicalization guarantees. 3270b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> NewExitPreds{Latch}; 3280b57cec5SDimitry Andric SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, 3290b57cec5SDimitry Andric PreserveLCSSA); 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric 332349cc55cSDimitry Andric /// Create a clone of the blocks in a loop and connect them together. A new 333349cc55cSDimitry Andric /// loop will be created including all cloned blocks, and the iterator of the 334349cc55cSDimitry Andric /// new loop switched to count NewIter down to 0. 3350b57cec5SDimitry Andric /// The cloned blocks should be inserted between InsertTop and InsertBot. 336349cc55cSDimitry Andric /// InsertTop should be new preheader, InsertBot new loop exit. 337349cc55cSDimitry Andric /// Returns the new cloned loop that is created. 3380b57cec5SDimitry Andric static Loop * 339349cc55cSDimitry Andric CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, 340349cc55cSDimitry Andric const bool UnrollRemainder, 3410b57cec5SDimitry Andric BasicBlock *InsertTop, 3420b57cec5SDimitry Andric BasicBlock *InsertBot, BasicBlock *Preheader, 3435f757f3fSDimitry Andric std::vector<BasicBlock *> &NewBlocks, 3445f757f3fSDimitry Andric LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, 3455f757f3fSDimitry Andric DominatorTree *DT, LoopInfo *LI, unsigned Count) { 3460b57cec5SDimitry Andric StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; 3470b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 3480b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 3490b57cec5SDimitry Andric Function *F = Header->getParent(); 3500b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); 3510b57cec5SDimitry Andric LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); 3520b57cec5SDimitry Andric Loop *ParentLoop = L->getParentLoop(); 3530b57cec5SDimitry Andric NewLoopsMap NewLoops; 3540b57cec5SDimitry Andric NewLoops[ParentLoop] = ParentLoop; 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric // For each block in the original loop, create a new copy, 3570b57cec5SDimitry Andric // and update the value map with the newly created values. 3580b57cec5SDimitry Andric for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { 3590b57cec5SDimitry Andric BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); 3600b57cec5SDimitry Andric NewBlocks.push_back(NewBB); 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric VMap[*BB] = NewBB; 3650b57cec5SDimitry Andric if (Header == *BB) { 3660b57cec5SDimitry Andric // For the first block, add a CFG connection to this newly 3670b57cec5SDimitry Andric // created block. 3680b57cec5SDimitry Andric InsertTop->getTerminator()->setSuccessor(0, NewBB); 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric if (DT) { 3720b57cec5SDimitry Andric if (Header == *BB) { 3730b57cec5SDimitry Andric // The header is dominated by the preheader. 3740b57cec5SDimitry Andric DT->addNewBlock(NewBB, InsertTop); 3750b57cec5SDimitry Andric } else { 3760b57cec5SDimitry Andric // Copy information from original loop to unrolled loop. 3770b57cec5SDimitry Andric BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); 3780b57cec5SDimitry Andric DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric if (Latch == *BB) { 383349cc55cSDimitry Andric // For the last block, create a loop back to cloned head. 3840b57cec5SDimitry Andric VMap.erase((*BB)->getTerminator()); 385349cc55cSDimitry Andric // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. 386349cc55cSDimitry Andric // Subtle: NewIter can be 0 if we wrapped when computing the trip count, 387349cc55cSDimitry Andric // thus we must compare the post-increment (wrapping) value. 3880b57cec5SDimitry Andric BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]); 3890b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator()); 3900b57cec5SDimitry Andric IRBuilder<> Builder(LatchBR); 3915f757f3fSDimitry Andric PHINode *NewIdx = 3925f757f3fSDimitry Andric PHINode::Create(NewIter->getType(), 2, suffix + ".iter"); 3935f757f3fSDimitry Andric NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt()); 394349cc55cSDimitry Andric auto *Zero = ConstantInt::get(NewIdx->getType(), 0); 395349cc55cSDimitry Andric auto *One = ConstantInt::get(NewIdx->getType(), 1); 3965f757f3fSDimitry Andric Value *IdxNext = 3975f757f3fSDimitry Andric Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); 398349cc55cSDimitry Andric Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp"); 3995f757f3fSDimitry Andric MDNode *BranchWeights = nullptr; 4005f757f3fSDimitry Andric if (hasBranchWeightMD(*LatchBR)) { 4015f757f3fSDimitry Andric uint32_t ExitWeight; 4025f757f3fSDimitry Andric uint32_t BackEdgeWeight; 4035f757f3fSDimitry Andric if (Count >= 3) { 4045f757f3fSDimitry Andric // Note: We do not enter this loop for zero-remainders. The check 4055f757f3fSDimitry Andric // is at the end of the loop. We assume equal distribution between 4065f757f3fSDimitry Andric // possible remainders in [1, Count). 4075f757f3fSDimitry Andric ExitWeight = 1; 4085f757f3fSDimitry Andric BackEdgeWeight = (Count - 2) / 2; 4095f757f3fSDimitry Andric } else { 4105f757f3fSDimitry Andric // Unnecessary backedge, should never be taken. The conditional 4115f757f3fSDimitry Andric // jump should be optimized away later. 4125f757f3fSDimitry Andric ExitWeight = 1; 4135f757f3fSDimitry Andric BackEdgeWeight = 0; 4145f757f3fSDimitry Andric } 4155f757f3fSDimitry Andric MDBuilder MDB(Builder.getContext()); 4165f757f3fSDimitry Andric BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight); 4175f757f3fSDimitry Andric } 4185f757f3fSDimitry Andric Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights); 419349cc55cSDimitry Andric NewIdx->addIncoming(Zero, InsertTop); 420349cc55cSDimitry Andric NewIdx->addIncoming(IdxNext, NewBB); 4210b57cec5SDimitry Andric LatchBR->eraseFromParent(); 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric // Change the incoming values to the ones defined in the preheader or 4260b57cec5SDimitry Andric // cloned loop. 4270b57cec5SDimitry Andric for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { 4280b57cec5SDimitry Andric PHINode *NewPHI = cast<PHINode>(VMap[&*I]); 4290b57cec5SDimitry Andric unsigned idx = NewPHI->getBasicBlockIndex(Preheader); 4300b57cec5SDimitry Andric NewPHI->setIncomingBlock(idx, InsertTop); 4310b57cec5SDimitry Andric BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]); 4320b57cec5SDimitry Andric idx = NewPHI->getBasicBlockIndex(Latch); 4330b57cec5SDimitry Andric Value *InVal = NewPHI->getIncomingValue(idx); 4340b57cec5SDimitry Andric NewPHI->setIncomingBlock(idx, NewLatch); 4350b57cec5SDimitry Andric if (Value *V = VMap.lookup(InVal)) 4360b57cec5SDimitry Andric NewPHI->setIncomingValue(idx, V); 4370b57cec5SDimitry Andric } 438349cc55cSDimitry Andric 4390b57cec5SDimitry Andric Loop *NewLoop = NewLoops[L]; 4400b57cec5SDimitry Andric assert(NewLoop && "L should have been cloned"); 441480093f4SDimitry Andric MDNode *LoopID = NewLoop->getLoopID(); 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric // Only add loop metadata if the loop is not going to be completely 4440b57cec5SDimitry Andric // unrolled. 4450b57cec5SDimitry Andric if (UnrollRemainder) 4460b57cec5SDimitry Andric return NewLoop; 4470b57cec5SDimitry Andric 448bdd1243dSDimitry Andric std::optional<MDNode *> NewLoopID = makeFollowupLoopID( 4490b57cec5SDimitry Andric LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder}); 45081ad6265SDimitry Andric if (NewLoopID) { 451bdd1243dSDimitry Andric NewLoop->setLoopID(*NewLoopID); 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // Do not setLoopAlreadyUnrolled if loop attributes have been defined 4540b57cec5SDimitry Andric // explicitly. 4550b57cec5SDimitry Andric return NewLoop; 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric // Add unroll disable metadata to disable future unrolling for this loop. 4590b57cec5SDimitry Andric NewLoop->setLoopAlreadyUnrolled(); 4600b57cec5SDimitry Andric return NewLoop; 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric /// Returns true if we can profitably unroll the multi-exit loop L. Currently, 4640b57cec5SDimitry Andric /// we return true only if UnrollRuntimeMultiExit is set to true. 4650b57cec5SDimitry Andric static bool canProfitablyUnrollMultiExitLoop( 4660b57cec5SDimitry Andric Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit, 467349cc55cSDimitry Andric bool UseEpilogRemainder) { 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric // Priority goes to UnrollRuntimeMultiExit if it's supplied. 4700b57cec5SDimitry Andric if (UnrollRuntimeMultiExit.getNumOccurrences()) 4710b57cec5SDimitry Andric return UnrollRuntimeMultiExit; 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric // The main pain point with multi-exit loop unrolling is that once unrolled, 4740b57cec5SDimitry Andric // we will not be able to merge all blocks into a straight line code. 4750b57cec5SDimitry Andric // There are branches within the unrolled loop that go to the OtherExits. 4760b57cec5SDimitry Andric // The second point is the increase in code size, but this is true 4770b57cec5SDimitry Andric // irrespective of multiple exits. 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric // Note: Both the heuristics below are coarse grained. We are essentially 4800b57cec5SDimitry Andric // enabling unrolling of loops that have a single side exit other than the 4810b57cec5SDimitry Andric // normal LatchExit (i.e. exiting into a deoptimize block). 4820b57cec5SDimitry Andric // The heuristics considered are: 4830b57cec5SDimitry Andric // 1. low number of branches in the unrolled version. 4840b57cec5SDimitry Andric // 2. high predictability of these extra branches. 4850b57cec5SDimitry Andric // We avoid unrolling loops that have more than two exiting blocks. This 4860b57cec5SDimitry Andric // limits the total number of branches in the unrolled loop to be atmost 4870b57cec5SDimitry Andric // the unroll factor (since one of the exiting blocks is the latch block). 4880b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> ExitingBlocks; 4890b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks); 4900b57cec5SDimitry Andric if (ExitingBlocks.size() > 2) 4910b57cec5SDimitry Andric return false; 4920b57cec5SDimitry Andric 493fe6060f1SDimitry Andric // Allow unrolling of loops with no non latch exit blocks. 494fe6060f1SDimitry Andric if (OtherExits.size() == 0) 495fe6060f1SDimitry Andric return true; 496fe6060f1SDimitry Andric 4970b57cec5SDimitry Andric // The second heuristic is that L has one exit other than the latchexit and 4980b57cec5SDimitry Andric // that exit is a deoptimize block. We know that deoptimize blocks are rarely 4990b57cec5SDimitry Andric // taken, which also implies the branch leading to the deoptimize block is 500fe6060f1SDimitry Andric // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we 501fe6060f1SDimitry Andric // assume the other exit branch is predictable even if it has no deoptimize 502fe6060f1SDimitry Andric // call. 5030b57cec5SDimitry Andric return (OtherExits.size() == 1 && 504fe6060f1SDimitry Andric (UnrollRuntimeOtherExitPredictable || 50506c3fb27SDimitry Andric OtherExits[0]->getPostdominatingDeoptimizeCall())); 5060b57cec5SDimitry Andric // TODO: These can be fine-tuned further to consider code size or deopt states 5070b57cec5SDimitry Andric // that are captured by the deoptimize exit block. 5080b57cec5SDimitry Andric // Also, we can extend this to support more cases, if we actually 5090b57cec5SDimitry Andric // know of kinds of multiexit loops that would benefit from unrolling. 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 512349cc55cSDimitry Andric /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain 513349cc55cSDimitry Andric /// accounting for the possibility of unsigned overflow in the 2s complement 514349cc55cSDimitry Andric /// domain. Preconditions: 515349cc55cSDimitry Andric /// 1) TripCount = BECount + 1 (allowing overflow) 516349cc55cSDimitry Andric /// 2) Log2(Count) <= BitWidth(BECount) 517349cc55cSDimitry Andric static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount, 518349cc55cSDimitry Andric Value *TripCount, unsigned Count) { 519349cc55cSDimitry Andric // Note that TripCount is BECount + 1. 520349cc55cSDimitry Andric if (isPowerOf2_32(Count)) 521349cc55cSDimitry Andric // If the expression is zero, then either: 522349cc55cSDimitry Andric // 1. There are no iterations to be run in the prolog/epilog loop. 523349cc55cSDimitry Andric // OR 524349cc55cSDimitry Andric // 2. The addition computing TripCount overflowed. 525349cc55cSDimitry Andric // 526349cc55cSDimitry Andric // If (2) is true, we know that TripCount really is (1 << BEWidth) and so 527349cc55cSDimitry Andric // the number of iterations that remain to be run in the original loop is a 528349cc55cSDimitry Andric // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a 529349cc55cSDimitry Andric // precondition of this method). 530349cc55cSDimitry Andric return B.CreateAnd(TripCount, Count - 1, "xtraiter"); 531349cc55cSDimitry Andric 532349cc55cSDimitry Andric // As (BECount + 1) can potentially unsigned overflow we count 533349cc55cSDimitry Andric // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. 534349cc55cSDimitry Andric Constant *CountC = ConstantInt::get(BECount->getType(), Count); 535349cc55cSDimitry Andric Value *ModValTmp = B.CreateURem(BECount, CountC); 536349cc55cSDimitry Andric Value *ModValAdd = B.CreateAdd(ModValTmp, 537349cc55cSDimitry Andric ConstantInt::get(ModValTmp->getType(), 1)); 538349cc55cSDimitry Andric // At that point (BECount % Count) + 1 could be equal to Count. 539349cc55cSDimitry Andric // To handle this case we need to take mod by Count one more time. 540349cc55cSDimitry Andric return B.CreateURem(ModValAdd, CountC, "xtraiter"); 541e8d8bef9SDimitry Andric } 542e8d8bef9SDimitry Andric 543349cc55cSDimitry Andric 5440b57cec5SDimitry Andric /// Insert code in the prolog/epilog code when unrolling a loop with a 5450b57cec5SDimitry Andric /// run-time trip-count. 5460b57cec5SDimitry Andric /// 5470b57cec5SDimitry Andric /// This method assumes that the loop unroll factor is total number 5480b57cec5SDimitry Andric /// of loop bodies in the loop after unrolling. (Some folks refer 5490b57cec5SDimitry Andric /// to the unroll factor as the number of *extra* copies added). 5500b57cec5SDimitry Andric /// We assume also that the loop unroll factor is a power-of-two. So, after 5510b57cec5SDimitry Andric /// unrolling the loop, the number of loop bodies executed is 2, 5520b57cec5SDimitry Andric /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch 5530b57cec5SDimitry Andric /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for 5540b57cec5SDimitry Andric /// the switch instruction is generated. 5550b57cec5SDimitry Andric /// 5560b57cec5SDimitry Andric /// ***Prolog case*** 5570b57cec5SDimitry Andric /// extraiters = tripcount % loopfactor 5580b57cec5SDimitry Andric /// if (extraiters == 0) jump Loop: 5590b57cec5SDimitry Andric /// else jump Prol: 5600b57cec5SDimitry Andric /// Prol: LoopBody; 5610b57cec5SDimitry Andric /// extraiters -= 1 // Omitted if unroll factor is 2. 5620b57cec5SDimitry Andric /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2. 5630b57cec5SDimitry Andric /// if (tripcount < loopfactor) jump End: 5640b57cec5SDimitry Andric /// Loop: 5650b57cec5SDimitry Andric /// ... 5660b57cec5SDimitry Andric /// End: 5670b57cec5SDimitry Andric /// 5680b57cec5SDimitry Andric /// ***Epilog case*** 5690b57cec5SDimitry Andric /// extraiters = tripcount % loopfactor 5700b57cec5SDimitry Andric /// if (tripcount < loopfactor) jump LoopExit: 5710b57cec5SDimitry Andric /// unroll_iters = tripcount - extraiters 5720b57cec5SDimitry Andric /// Loop: LoopBody; (executes unroll_iter times); 5730b57cec5SDimitry Andric /// unroll_iter -= 1 5740b57cec5SDimitry Andric /// if (unroll_iter != 0) jump Loop: 5750b57cec5SDimitry Andric /// LoopExit: 5760b57cec5SDimitry Andric /// if (extraiters == 0) jump EpilExit: 5770b57cec5SDimitry Andric /// Epil: LoopBody; (executes extraiters times) 5780b57cec5SDimitry Andric /// extraiters -= 1 // Omitted if unroll factor is 2. 5790b57cec5SDimitry Andric /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2. 5800b57cec5SDimitry Andric /// EpilExit: 5810b57cec5SDimitry Andric 5825ffd83dbSDimitry Andric bool llvm::UnrollRuntimeLoopRemainder( 5835ffd83dbSDimitry Andric Loop *L, unsigned Count, bool AllowExpensiveTripCount, 5845ffd83dbSDimitry Andric bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, 5855ffd83dbSDimitry Andric LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, 5865ffd83dbSDimitry Andric const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) { 5870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); 5880b57cec5SDimitry Andric LLVM_DEBUG(L->dump()); 5890b57cec5SDimitry Andric LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" 5900b57cec5SDimitry Andric : dbgs() << "Using prolog remainder.\n"); 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric // Make sure the loop is in canonical form. 5930b57cec5SDimitry Andric if (!L->isLoopSimplifyForm()) { 5940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not in simplify form!\n"); 5950b57cec5SDimitry Andric return false; 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric // Guaranteed by LoopSimplifyForm. 5990b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 6000b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); 6030b57cec5SDimitry Andric 6040b57cec5SDimitry Andric if (!LatchBR || LatchBR->isUnconditional()) { 6050b57cec5SDimitry Andric // The loop-rotate pass can be helpful to avoid this in many cases. 6060b57cec5SDimitry Andric LLVM_DEBUG( 6070b57cec5SDimitry Andric dbgs() 6080b57cec5SDimitry Andric << "Loop latch not terminated by a conditional branch.\n"); 6090b57cec5SDimitry Andric return false; 6100b57cec5SDimitry Andric } 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; 6130b57cec5SDimitry Andric BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric if (L->contains(LatchExit)) { 6160b57cec5SDimitry Andric // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the 6170b57cec5SDimitry Andric // targets of the Latch be an exit block out of the loop. 6180b57cec5SDimitry Andric LLVM_DEBUG( 6190b57cec5SDimitry Andric dbgs() 6200b57cec5SDimitry Andric << "One of the loop latch successors must be the exit block.\n"); 6210b57cec5SDimitry Andric return false; 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric // These are exit blocks other than the target of the latch exiting block. 6250b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> OtherExits; 6260b57cec5SDimitry Andric L->getUniqueNonLatchExitBlocks(OtherExits); 627349cc55cSDimitry Andric // Support only single exit and exiting block unless multi-exit loop 628349cc55cSDimitry Andric // unrolling is enabled. 629349cc55cSDimitry Andric if (!L->getExitingBlock() || OtherExits.size()) { 630349cc55cSDimitry Andric // We rely on LCSSA form being preserved when the exit blocks are transformed. 631349cc55cSDimitry Andric // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.) 632349cc55cSDimitry Andric if (!PreserveLCSSA) 633349cc55cSDimitry Andric return false; 634349cc55cSDimitry Andric 635349cc55cSDimitry Andric if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, 636349cc55cSDimitry Andric UseEpilogRemainder)) { 6370b57cec5SDimitry Andric LLVM_DEBUG( 6380b57cec5SDimitry Andric dbgs() 6390b57cec5SDimitry Andric << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " 6400b57cec5SDimitry Andric "enabled!\n"); 6410b57cec5SDimitry Andric return false; 6420b57cec5SDimitry Andric } 643349cc55cSDimitry Andric } 6440b57cec5SDimitry Andric // Use Scalar Evolution to compute the trip count. This allows more loops to 6450b57cec5SDimitry Andric // be unrolled than relying on induction var simplification. 6460b57cec5SDimitry Andric if (!SE) 6470b57cec5SDimitry Andric return false; 6480b57cec5SDimitry Andric 6494824e7fdSDimitry Andric // Only unroll loops with a computable trip count. 6500b57cec5SDimitry Andric // We calculate the backedge count by using getExitCount on the Latch block, 6510b57cec5SDimitry Andric // which is proven to be the only exiting block in this loop. This is same as 6520b57cec5SDimitry Andric // calculating getBackedgeTakenCount on the loop (which computes SCEV for all 6530b57cec5SDimitry Andric // exiting blocks). 6540b57cec5SDimitry Andric const SCEV *BECountSC = SE->getExitCount(L, Latch); 6554824e7fdSDimitry Andric if (isa<SCEVCouldNotCompute>(BECountSC)) { 6560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); 6570b57cec5SDimitry Andric return false; 6580b57cec5SDimitry Andric } 6590b57cec5SDimitry Andric 6600b57cec5SDimitry Andric unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric // Add 1 since the backedge count doesn't include the first loop iteration. 663349cc55cSDimitry Andric // (Note that overflow can occur, this is handled explicitly below) 6640b57cec5SDimitry Andric const SCEV *TripCountSC = 6650b57cec5SDimitry Andric SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); 6660b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(TripCountSC)) { 6670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); 6680b57cec5SDimitry Andric return false; 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric BasicBlock *PreHeader = L->getLoopPreheader(); 6720b57cec5SDimitry Andric BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 673*0fca6ea1SDimitry Andric const DataLayout &DL = Header->getDataLayout(); 6740b57cec5SDimitry Andric SCEVExpander Expander(*SE, DL, "loop-unroll"); 6750b57cec5SDimitry Andric if (!AllowExpensiveTripCount && 6765ffd83dbSDimitry Andric Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, 6775ffd83dbSDimitry Andric TTI, PreHeaderBR)) { 6780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); 6790b57cec5SDimitry Andric return false; 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric // This constraint lets us deal with an overflowing trip count easily; see the 6830b57cec5SDimitry Andric // comment on ModVal below. 6840b57cec5SDimitry Andric if (Log2_32(Count) > BEWidth) { 6850b57cec5SDimitry Andric LLVM_DEBUG( 6860b57cec5SDimitry Andric dbgs() 6870b57cec5SDimitry Andric << "Count failed constraint on overflow trip count calculation.\n"); 6880b57cec5SDimitry Andric return false; 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric // Loop structure is the following: 6920b57cec5SDimitry Andric // 6930b57cec5SDimitry Andric // PreHeader 6940b57cec5SDimitry Andric // Header 6950b57cec5SDimitry Andric // ... 6960b57cec5SDimitry Andric // Latch 6970b57cec5SDimitry Andric // LatchExit 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric BasicBlock *NewPreHeader; 7000b57cec5SDimitry Andric BasicBlock *NewExit = nullptr; 7010b57cec5SDimitry Andric BasicBlock *PrologExit = nullptr; 7020b57cec5SDimitry Andric BasicBlock *EpilogPreHeader = nullptr; 7030b57cec5SDimitry Andric BasicBlock *PrologPreHeader = nullptr; 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric if (UseEpilogRemainder) { 7060b57cec5SDimitry Andric // If epilog remainder 7070b57cec5SDimitry Andric // Split PreHeader to insert a branch around loop for unrolling. 7080b57cec5SDimitry Andric NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI); 7090b57cec5SDimitry Andric NewPreHeader->setName(PreHeader->getName() + ".new"); 7100b57cec5SDimitry Andric // Split LatchExit to create phi nodes from branch above. 711349cc55cSDimitry Andric NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI, 7120b57cec5SDimitry Andric nullptr, PreserveLCSSA); 7130b57cec5SDimitry Andric // NewExit gets its DebugLoc from LatchExit, which is not part of the 7140b57cec5SDimitry Andric // original Loop. 7150b57cec5SDimitry Andric // Fix this by setting Loop's DebugLoc to NewExit. 7160b57cec5SDimitry Andric auto *NewExitTerminator = NewExit->getTerminator(); 7170b57cec5SDimitry Andric NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); 7180b57cec5SDimitry Andric // Split NewExit to insert epilog remainder loop. 7190b57cec5SDimitry Andric EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); 7200b57cec5SDimitry Andric EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); 721349cc55cSDimitry Andric 722349cc55cSDimitry Andric // If the latch exits from multiple level of nested loops, then 723349cc55cSDimitry Andric // by assumption there must be another loop exit which branches to the 724349cc55cSDimitry Andric // outer loop and we must adjust the loop for the newly inserted blocks 725349cc55cSDimitry Andric // to account for the fact that our epilogue is still in the same outer 726349cc55cSDimitry Andric // loop. Note that this leaves loopinfo temporarily out of sync with the 727349cc55cSDimitry Andric // CFG until the actual epilogue loop is inserted. 728349cc55cSDimitry Andric if (auto *ParentL = L->getParentLoop()) 729349cc55cSDimitry Andric if (LI->getLoopFor(LatchExit) != ParentL) { 730349cc55cSDimitry Andric LI->removeBlock(NewExit); 731349cc55cSDimitry Andric ParentL->addBasicBlockToLoop(NewExit, *LI); 732349cc55cSDimitry Andric LI->removeBlock(EpilogPreHeader); 733349cc55cSDimitry Andric ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI); 734349cc55cSDimitry Andric } 735349cc55cSDimitry Andric 7360b57cec5SDimitry Andric } else { 7370b57cec5SDimitry Andric // If prolog remainder 7380b57cec5SDimitry Andric // Split the original preheader twice to insert prolog remainder loop 7390b57cec5SDimitry Andric PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI); 7400b57cec5SDimitry Andric PrologPreHeader->setName(Header->getName() + ".prol.preheader"); 7410b57cec5SDimitry Andric PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(), 7420b57cec5SDimitry Andric DT, LI); 7430b57cec5SDimitry Andric PrologExit->setName(Header->getName() + ".prol.loopexit"); 7440b57cec5SDimitry Andric // Split PrologExit to get NewPreHeader. 7450b57cec5SDimitry Andric NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI); 7460b57cec5SDimitry Andric NewPreHeader->setName(PreHeader->getName() + ".new"); 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric // Loop structure should be the following: 7490b57cec5SDimitry Andric // Epilog Prolog 7500b57cec5SDimitry Andric // 7510b57cec5SDimitry Andric // PreHeader PreHeader 7520b57cec5SDimitry Andric // *NewPreHeader *PrologPreHeader 7530b57cec5SDimitry Andric // Header *PrologExit 7540b57cec5SDimitry Andric // ... *NewPreHeader 7550b57cec5SDimitry Andric // Latch Header 7560b57cec5SDimitry Andric // *NewExit ... 7570b57cec5SDimitry Andric // *EpilogPreHeader Latch 7580b57cec5SDimitry Andric // LatchExit LatchExit 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric // Calculate conditions for branch around loop for unrolling 7610b57cec5SDimitry Andric // in epilog case and around prolog remainder loop in prolog case. 7620b57cec5SDimitry Andric // Compute the number of extra iterations required, which is: 7630b57cec5SDimitry Andric // extra iterations = run-time trip count % loop unroll factor 7640b57cec5SDimitry Andric PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 76581ad6265SDimitry Andric IRBuilder<> B(PreHeaderBR); 7660b57cec5SDimitry Andric Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), 7670b57cec5SDimitry Andric PreHeaderBR); 76881ad6265SDimitry Andric Value *BECount; 76981ad6265SDimitry Andric // If there are other exits before the latch, that may cause the latch exit 77081ad6265SDimitry Andric // branch to never be executed, and the latch exit count may be poison. 77181ad6265SDimitry Andric // In this case, freeze the TripCount and base BECount on the frozen 77281ad6265SDimitry Andric // TripCount. We will introduce two branches using these values, and it's 77381ad6265SDimitry Andric // important that they see a consistent value (which would not be guaranteed 77481ad6265SDimitry Andric // if were frozen independently.) 77581ad6265SDimitry Andric if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) && 77681ad6265SDimitry Andric !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) { 77781ad6265SDimitry Andric TripCount = B.CreateFreeze(TripCount); 77881ad6265SDimitry Andric BECount = 779*0fca6ea1SDimitry Andric B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType())); 78081ad6265SDimitry Andric } else { 78181ad6265SDimitry Andric // If we don't need to freeze, use SCEVExpander for BECount as well, to 78281ad6265SDimitry Andric // allow slightly better value reuse. 78381ad6265SDimitry Andric BECount = 78481ad6265SDimitry Andric Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR); 78581ad6265SDimitry Andric } 78681ad6265SDimitry Andric 787349cc55cSDimitry Andric Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count); 788349cc55cSDimitry Andric 7890b57cec5SDimitry Andric Value *BranchVal = 7900b57cec5SDimitry Andric UseEpilogRemainder ? B.CreateICmpULT(BECount, 7910b57cec5SDimitry Andric ConstantInt::get(BECount->getType(), 7920b57cec5SDimitry Andric Count - 1)) : 7930b57cec5SDimitry Andric B.CreateIsNotNull(ModVal, "lcmp.mod"); 7940b57cec5SDimitry Andric BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; 7950b57cec5SDimitry Andric BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; 7960b57cec5SDimitry Andric // Branch to either remainder (extra iterations) loop or unrolling loop. 7975f757f3fSDimitry Andric MDNode *BranchWeights = nullptr; 7985f757f3fSDimitry Andric if (hasBranchWeightMD(*Latch->getTerminator())) { 7995f757f3fSDimitry Andric // Assume loop is nearly always entered. 8005f757f3fSDimitry Andric MDBuilder MDB(B.getContext()); 8015f757f3fSDimitry Andric BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights); 8025f757f3fSDimitry Andric } 8035f757f3fSDimitry Andric B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights); 8040b57cec5SDimitry Andric PreHeaderBR->eraseFromParent(); 8050b57cec5SDimitry Andric if (DT) { 8060b57cec5SDimitry Andric if (UseEpilogRemainder) 8070b57cec5SDimitry Andric DT->changeImmediateDominator(NewExit, PreHeader); 8080b57cec5SDimitry Andric else 8090b57cec5SDimitry Andric DT->changeImmediateDominator(PrologExit, PreHeader); 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric Function *F = Header->getParent(); 8120b57cec5SDimitry Andric // Get an ordered list of blocks in the loop to help with the ordering of the 8130b57cec5SDimitry Andric // cloned blocks in the prolog/epilog code 8140b57cec5SDimitry Andric LoopBlocksDFS LoopBlocks(L); 8150b57cec5SDimitry Andric LoopBlocks.perform(LI); 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric // 8180b57cec5SDimitry Andric // For each extra loop iteration, create a copy of the loop's basic blocks 8190b57cec5SDimitry Andric // and generate a condition that branches to the copy depending on the 8200b57cec5SDimitry Andric // number of 'left over' iterations. 8210b57cec5SDimitry Andric // 8220b57cec5SDimitry Andric std::vector<BasicBlock *> NewBlocks; 8230b57cec5SDimitry Andric ValueToValueMapTy VMap; 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric // Clone all the basic blocks in the loop. If Count is 2, we don't clone 8260b57cec5SDimitry Andric // the loop, otherwise we create a cloned loop to execute the extra 8270b57cec5SDimitry Andric // iterations. This function adds the appropriate CFG connections. 8280b57cec5SDimitry Andric BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; 8290b57cec5SDimitry Andric BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; 8300b57cec5SDimitry Andric Loop *remainderLoop = CloneLoopBlocks( 831349cc55cSDimitry Andric L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, 8325f757f3fSDimitry Andric NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count); 833e8d8bef9SDimitry Andric 8340b57cec5SDimitry Andric // Insert the cloned blocks into the function. 835bdd1243dSDimitry Andric F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end()); 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric // Now the loop blocks are cloned and the other exiting blocks from the 8380b57cec5SDimitry Andric // remainder are connected to the original Loop's exit blocks. The remaining 8390b57cec5SDimitry Andric // work is to update the phi nodes in the original loop, and take in the 8400b57cec5SDimitry Andric // values from the cloned region. 8410b57cec5SDimitry Andric for (auto *BB : OtherExits) { 8420b57cec5SDimitry Andric // Given we preserve LCSSA form, we know that the values used outside the 8430b57cec5SDimitry Andric // loop will be used through these phi nodes at the exit blocks that are 8440b57cec5SDimitry Andric // transformed below. 845349cc55cSDimitry Andric for (PHINode &PN : BB->phis()) { 846349cc55cSDimitry Andric unsigned oldNumOperands = PN.getNumIncomingValues(); 8470b57cec5SDimitry Andric // Add the incoming values from the remainder code to the end of the phi 8480b57cec5SDimitry Andric // node. 8490b57cec5SDimitry Andric for (unsigned i = 0; i < oldNumOperands; i++){ 850349cc55cSDimitry Andric auto *PredBB =PN.getIncomingBlock(i); 851349cc55cSDimitry Andric if (PredBB == Latch) 852*0fca6ea1SDimitry Andric // The latch exit is handled separately, see connectX 853349cc55cSDimitry Andric continue; 854349cc55cSDimitry Andric if (!L->contains(PredBB)) 855349cc55cSDimitry Andric // Even if we had dedicated exits, the code above inserted an 856349cc55cSDimitry Andric // extra branch which can reach the latch exit. 857349cc55cSDimitry Andric continue; 858349cc55cSDimitry Andric 859349cc55cSDimitry Andric auto *V = PN.getIncomingValue(i); 860349cc55cSDimitry Andric if (Instruction *I = dyn_cast<Instruction>(V)) 861349cc55cSDimitry Andric if (L->contains(I)) 862349cc55cSDimitry Andric V = VMap.lookup(I); 863349cc55cSDimitry Andric PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB])); 8640b57cec5SDimitry Andric } 8650b57cec5SDimitry Andric } 8660b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 8670b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(BB)) { 868349cc55cSDimitry Andric assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) && 8690b57cec5SDimitry Andric "Breaks the definition of dedicated exits!"); 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric #endif 8720b57cec5SDimitry Andric } 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric // Update the immediate dominator of the exit blocks and blocks that are 8750b57cec5SDimitry Andric // reachable from the exit blocks. This is needed because we now have paths 8760b57cec5SDimitry Andric // from both the original loop and the remainder code reaching the exit 8770b57cec5SDimitry Andric // blocks. While the IDom of these exit blocks were from the original loop, 8780b57cec5SDimitry Andric // now the IDom is the preheader (which decides whether the original loop or 8790b57cec5SDimitry Andric // remainder code should run). 8800b57cec5SDimitry Andric if (DT && !L->getExitingBlock()) { 8810b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> ChildrenToUpdate; 8820b57cec5SDimitry Andric // NB! We have to examine the dom children of all loop blocks, not just 8830b57cec5SDimitry Andric // those which are the IDom of the exit blocks. This is because blocks 8840b57cec5SDimitry Andric // reachable from the exit blocks can have their IDom as the nearest common 8850b57cec5SDimitry Andric // dominator of the exit blocks. 8860b57cec5SDimitry Andric for (auto *BB : L->blocks()) { 8870b57cec5SDimitry Andric auto *DomNodeBB = DT->getNode(BB); 8885ffd83dbSDimitry Andric for (auto *DomChild : DomNodeBB->children()) { 8890b57cec5SDimitry Andric auto *DomChildBB = DomChild->getBlock(); 8900b57cec5SDimitry Andric if (!L->contains(LI->getLoopFor(DomChildBB))) 8910b57cec5SDimitry Andric ChildrenToUpdate.push_back(DomChildBB); 8920b57cec5SDimitry Andric } 8930b57cec5SDimitry Andric } 8940b57cec5SDimitry Andric for (auto *BB : ChildrenToUpdate) 8950b57cec5SDimitry Andric DT->changeImmediateDominator(BB, PreHeader); 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric 8980b57cec5SDimitry Andric // Loop structure should be the following: 8990b57cec5SDimitry Andric // Epilog Prolog 9000b57cec5SDimitry Andric // 9010b57cec5SDimitry Andric // PreHeader PreHeader 9020b57cec5SDimitry Andric // NewPreHeader PrologPreHeader 9030b57cec5SDimitry Andric // Header PrologHeader 9040b57cec5SDimitry Andric // ... ... 9050b57cec5SDimitry Andric // Latch PrologLatch 9060b57cec5SDimitry Andric // NewExit PrologExit 9070b57cec5SDimitry Andric // EpilogPreHeader NewPreHeader 9080b57cec5SDimitry Andric // EpilogHeader Header 9090b57cec5SDimitry Andric // ... ... 9100b57cec5SDimitry Andric // EpilogLatch Latch 9110b57cec5SDimitry Andric // LatchExit LatchExit 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric // Rewrite the cloned instruction operands to use the values created when the 9140b57cec5SDimitry Andric // clone is created. 9150b57cec5SDimitry Andric for (BasicBlock *BB : NewBlocks) { 9165f757f3fSDimitry Andric Module *M = BB->getModule(); 9170b57cec5SDimitry Andric for (Instruction &I : *BB) { 9180b57cec5SDimitry Andric RemapInstruction(&I, VMap, 9190b57cec5SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 920*0fca6ea1SDimitry Andric RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap, 9215f757f3fSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric if (UseEpilogRemainder) { 9260b57cec5SDimitry Andric // Connect the epilog code to the original loop and update the 9270b57cec5SDimitry Andric // PHI functions. 92881ad6265SDimitry Andric ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader, 9295f757f3fSDimitry Andric NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count); 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andric // Update counter in loop for unrolling. 932349cc55cSDimitry Andric // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count. 933349cc55cSDimitry Andric // Subtle: TestVal can be 0 if we wrapped when computing the trip count, 934349cc55cSDimitry Andric // thus we must compare the post-increment (wrapping) value. 9350b57cec5SDimitry Andric IRBuilder<> B2(NewPreHeader->getTerminator()); 9360b57cec5SDimitry Andric Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter"); 9370b57cec5SDimitry Andric BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); 9385f757f3fSDimitry Andric PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter"); 9395f757f3fSDimitry Andric NewIdx->insertBefore(Header->getFirstNonPHIIt()); 940349cc55cSDimitry Andric B2.SetInsertPoint(LatchBR); 941349cc55cSDimitry Andric auto *Zero = ConstantInt::get(NewIdx->getType(), 0); 942349cc55cSDimitry Andric auto *One = ConstantInt::get(NewIdx->getType(), 1); 943349cc55cSDimitry Andric Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next"); 944349cc55cSDimitry Andric auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 945349cc55cSDimitry Andric Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp"); 946349cc55cSDimitry Andric NewIdx->addIncoming(Zero, NewPreHeader); 947349cc55cSDimitry Andric NewIdx->addIncoming(IdxNext, Latch); 9480b57cec5SDimitry Andric LatchBR->setCondition(IdxCmp); 9490b57cec5SDimitry Andric } else { 9500b57cec5SDimitry Andric // Connect the prolog code to the original loop and update the 9510b57cec5SDimitry Andric // PHI functions. 9520b57cec5SDimitry Andric ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, 95381ad6265SDimitry Andric NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE); 9540b57cec5SDimitry Andric } 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // If this loop is nested, then the loop unroller changes the code in the any 9570b57cec5SDimitry Andric // of its parent loops, so the Scalar Evolution pass needs to be run again. 9580b57cec5SDimitry Andric SE->forgetTopmostLoop(L); 9590b57cec5SDimitry Andric 960349cc55cSDimitry Andric // Verify that the Dom Tree and Loop Info are correct. 9610b57cec5SDimitry Andric #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG) 962349cc55cSDimitry Andric if (DT) { 9630b57cec5SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Full)); 964349cc55cSDimitry Andric LI->verify(*DT); 965349cc55cSDimitry Andric } 9660b57cec5SDimitry Andric #endif 9670b57cec5SDimitry Andric 968349cc55cSDimitry Andric // For unroll factor 2 remainder loop will have 1 iteration. 969349cc55cSDimitry Andric if (Count == 2 && DT && LI && SE) { 970349cc55cSDimitry Andric // TODO: This code could probably be pulled out into a helper function 971349cc55cSDimitry Andric // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion. 972349cc55cSDimitry Andric BasicBlock *RemainderLatch = remainderLoop->getLoopLatch(); 973349cc55cSDimitry Andric assert(RemainderLatch); 974349cc55cSDimitry Andric SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(), 975349cc55cSDimitry Andric remainderLoop->getBlocks().end()); 976349cc55cSDimitry Andric breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr); 977349cc55cSDimitry Andric remainderLoop = nullptr; 978349cc55cSDimitry Andric 979349cc55cSDimitry Andric // Simplify loop values after breaking the backedge 980*0fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 981349cc55cSDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 982349cc55cSDimitry Andric for (BasicBlock *BB : RemainderBlocks) { 983349cc55cSDimitry Andric for (Instruction &Inst : llvm::make_early_inc_range(*BB)) { 98481ad6265SDimitry Andric if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC})) 985349cc55cSDimitry Andric if (LI->replacementPreservesLCSSAForm(&Inst, V)) 986349cc55cSDimitry Andric Inst.replaceAllUsesWith(V); 987349cc55cSDimitry Andric if (isInstructionTriviallyDead(&Inst)) 988349cc55cSDimitry Andric DeadInsts.emplace_back(&Inst); 989349cc55cSDimitry Andric } 990349cc55cSDimitry Andric // We can't do recursive deletion until we're done iterating, as we might 991349cc55cSDimitry Andric // have a phi which (potentially indirectly) uses instructions later in 992349cc55cSDimitry Andric // the block we're iterating through. 993349cc55cSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); 994349cc55cSDimitry Andric } 995349cc55cSDimitry Andric 996349cc55cSDimitry Andric // Merge latch into exit block. 997349cc55cSDimitry Andric auto *ExitBB = RemainderLatch->getSingleSuccessor(); 998349cc55cSDimitry Andric assert(ExitBB && "required after breaking cond br backedge"); 999349cc55cSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 1000349cc55cSDimitry Andric MergeBlockIntoPredecessor(ExitBB, &DTU, LI); 1001349cc55cSDimitry Andric } 1002349cc55cSDimitry Andric 10030b57cec5SDimitry Andric // Canonicalize to LoopSimplifyForm both original and remainder loops. We 10040b57cec5SDimitry Andric // cannot rely on the LoopUnrollPass to do this because it only does 10050b57cec5SDimitry Andric // canonicalization for parent/subloops and not the sibling loops. 10060b57cec5SDimitry Andric if (OtherExits.size() > 0) { 10070b57cec5SDimitry Andric // Generate dedicated exit blocks for the original loop, to preserve 10080b57cec5SDimitry Andric // LoopSimplifyForm. 10090b57cec5SDimitry Andric formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA); 10100b57cec5SDimitry Andric // Generate dedicated exit blocks for the remainder loop if one exists, to 10110b57cec5SDimitry Andric // preserve LoopSimplifyForm. 10120b57cec5SDimitry Andric if (remainderLoop) 10130b57cec5SDimitry Andric formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA); 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric auto UnrollResult = LoopUnrollResult::Unmodified; 10170b57cec5SDimitry Andric if (remainderLoop && UnrollRemainder) { 10180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); 1019*0fca6ea1SDimitry Andric UnrollLoopOptions ULO; 1020*0fca6ea1SDimitry Andric ULO.Count = Count - 1; 1021*0fca6ea1SDimitry Andric ULO.Force = false; 1022*0fca6ea1SDimitry Andric ULO.Runtime = false; 1023*0fca6ea1SDimitry Andric ULO.AllowExpensiveTripCount = false; 1024*0fca6ea1SDimitry Andric ULO.UnrollRemainder = false; 1025*0fca6ea1SDimitry Andric ULO.ForgetAllSCEV = ForgetAllSCEV; 1026*0fca6ea1SDimitry Andric assert(!getLoopConvergenceHeart(L) && 1027*0fca6ea1SDimitry Andric "A loop with a convergence heart does not allow runtime unrolling."); 1028*0fca6ea1SDimitry Andric UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI, 1029*0fca6ea1SDimitry Andric /*ORE*/ nullptr, PreserveLCSSA); 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) 10330b57cec5SDimitry Andric *ResultLoop = remainderLoop; 10340b57cec5SDimitry Andric NumRuntimeUnrolled++; 10350b57cec5SDimitry Andric return true; 10360b57cec5SDimitry Andric } 1037