10b57cec5SDimitry Andric ///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 100b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 110b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 120b57cec5SDimitry Andric #include "llvm/ADT/Sequence.h" 130b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 140b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 170b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 19bdd1243dSDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/CFG.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 2206c3fb27SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/GuardUtils.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 280b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 29e8d8bef9SDimitry Andric #include "llvm/Analysis/MustExecute.h" 30bdd1243dSDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 31e8d8bef9SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 3281ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 33349cc55cSDimitry Andric #include "llvm/Analysis/ValueTracking.h" 340b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 350b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 360b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 370b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 380b57cec5SDimitry Andric #include "llvm/IR/Function.h" 39e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h" 400b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 410b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 420b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 430b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 440fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 45fe6060f1SDimitry Andric #include "llvm/IR/PatternMatch.h" 4606c3fb27SDimitry Andric #include "llvm/IR/ProfDataUtils.h" 470b57cec5SDimitry Andric #include "llvm/IR/Use.h" 480b57cec5SDimitry Andric #include "llvm/IR/Value.h" 490b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 50480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 510b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 520b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 530b57cec5SDimitry Andric #include "llvm/Support/GenericDomTree.h" 5481ad6265SDimitry Andric #include "llvm/Support/InstructionCost.h" 550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 5681ad6265SDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h" 570b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 580b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 59e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 600b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 610b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 620b57cec5SDimitry Andric #include <algorithm> 630b57cec5SDimitry Andric #include <cassert> 640b57cec5SDimitry Andric #include <iterator> 650b57cec5SDimitry Andric #include <numeric> 66bdd1243dSDimitry Andric #include <optional> 670b57cec5SDimitry Andric #include <utility> 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric #define DEBUG_TYPE "simple-loop-unswitch" 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric using namespace llvm; 72fe6060f1SDimitry Andric using namespace llvm::PatternMatch; 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric STATISTIC(NumBranches, "Number of branches unswitched"); 750b57cec5SDimitry Andric STATISTIC(NumSwitches, "Number of switches unswitched"); 7606c3fb27SDimitry Andric STATISTIC(NumSelects, "Number of selects turned into branches for unswitching"); 770b57cec5SDimitry Andric STATISTIC(NumGuards, "Number of guards turned into branches for unswitching"); 780b57cec5SDimitry Andric STATISTIC(NumTrivial, "Number of unswitches that are trivial"); 790b57cec5SDimitry Andric STATISTIC( 800b57cec5SDimitry Andric NumCostMultiplierSkipped, 810b57cec5SDimitry Andric "Number of unswitch candidates that had their cost multiplier skipped"); 8206c3fb27SDimitry Andric STATISTIC(NumInvariantConditionsInjected, 8306c3fb27SDimitry Andric "Number of invariant conditions injected and unswitched"); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric static cl::opt<bool> EnableNonTrivialUnswitch( 860b57cec5SDimitry Andric "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, 870b57cec5SDimitry Andric cl::desc("Forcibly enables non-trivial loop unswitching rather than " 880b57cec5SDimitry Andric "following the configuration passed into the pass.")); 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric static cl::opt<int> 910b57cec5SDimitry Andric UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden, 920b57cec5SDimitry Andric cl::desc("The cost threshold for unswitching a loop.")); 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric static cl::opt<bool> EnableUnswitchCostMultiplier( 950b57cec5SDimitry Andric "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden, 960b57cec5SDimitry Andric cl::desc("Enable unswitch cost multiplier that prohibits exponential " 970b57cec5SDimitry Andric "explosion in nontrivial unswitch.")); 980b57cec5SDimitry Andric static cl::opt<int> UnswitchSiblingsToplevelDiv( 990b57cec5SDimitry Andric "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden, 1000b57cec5SDimitry Andric cl::desc("Toplevel siblings divisor for cost multiplier.")); 1010b57cec5SDimitry Andric static cl::opt<int> UnswitchNumInitialUnscaledCandidates( 1020b57cec5SDimitry Andric "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden, 1030b57cec5SDimitry Andric cl::desc("Number of unswitch candidates that are ignored when calculating " 1040b57cec5SDimitry Andric "cost multiplier.")); 1050b57cec5SDimitry Andric static cl::opt<bool> UnswitchGuards( 1060b57cec5SDimitry Andric "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, 1070b57cec5SDimitry Andric cl::desc("If enabled, simple loop unswitching will also consider " 1080b57cec5SDimitry Andric "llvm.experimental.guard intrinsics as unswitch candidates.")); 109e8d8bef9SDimitry Andric static cl::opt<bool> DropNonTrivialImplicitNullChecks( 110e8d8bef9SDimitry Andric "simple-loop-unswitch-drop-non-trivial-implicit-null-checks", 111e8d8bef9SDimitry Andric cl::init(false), cl::Hidden, 112e8d8bef9SDimitry Andric cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " 113e8d8bef9SDimitry Andric "null checks to save time analyzing if we can keep it.")); 114fe6060f1SDimitry Andric static cl::opt<unsigned> 115fe6060f1SDimitry Andric MSSAThreshold("simple-loop-unswitch-memoryssa-threshold", 116fe6060f1SDimitry Andric cl::desc("Max number of memory uses to explore during " 117fe6060f1SDimitry Andric "partial unswitching analysis"), 118fe6060f1SDimitry Andric cl::init(100), cl::Hidden); 119349cc55cSDimitry Andric static cl::opt<bool> FreezeLoopUnswitchCond( 12081ad6265SDimitry Andric "freeze-loop-unswitch-cond", cl::init(true), cl::Hidden, 121349cc55cSDimitry Andric cl::desc("If enabled, the freeze instruction will be added to condition " 122349cc55cSDimitry Andric "of loop unswitch to prevent miscompilation.")); 1230b57cec5SDimitry Andric 12406c3fb27SDimitry Andric static cl::opt<bool> InjectInvariantConditions( 12506c3fb27SDimitry Andric "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden, 12606c3fb27SDimitry Andric cl::desc("Whether we should inject new invariants and unswitch them to " 12706c3fb27SDimitry Andric "eliminate some existing (non-invariant) conditions."), 12806c3fb27SDimitry Andric cl::init(true)); 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold( 13106c3fb27SDimitry Andric "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", 13206c3fb27SDimitry Andric cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " 13306c3fb27SDimitry Andric "unswitch on them to eliminate branches that are " 13406c3fb27SDimitry Andric "not-taken 1/<this option> times or less."), 13506c3fb27SDimitry Andric cl::init(16)); 13606c3fb27SDimitry Andric 1370fca6ea1SDimitry Andric AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; 138bdd1243dSDimitry Andric namespace { 13906c3fb27SDimitry Andric struct CompareDesc { 14006c3fb27SDimitry Andric BranchInst *Term; 14106c3fb27SDimitry Andric Value *Invariant; 14206c3fb27SDimitry Andric BasicBlock *InLoopSucc; 14306c3fb27SDimitry Andric 14406c3fb27SDimitry Andric CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc) 14506c3fb27SDimitry Andric : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {} 14606c3fb27SDimitry Andric }; 14706c3fb27SDimitry Andric 14806c3fb27SDimitry Andric struct InjectedInvariant { 14906c3fb27SDimitry Andric ICmpInst::Predicate Pred; 15006c3fb27SDimitry Andric Value *LHS; 15106c3fb27SDimitry Andric Value *RHS; 15206c3fb27SDimitry Andric BasicBlock *InLoopSucc; 15306c3fb27SDimitry Andric 15406c3fb27SDimitry Andric InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS, 15506c3fb27SDimitry Andric BasicBlock *InLoopSucc) 15606c3fb27SDimitry Andric : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {} 15706c3fb27SDimitry Andric }; 15806c3fb27SDimitry Andric 159bdd1243dSDimitry Andric struct NonTrivialUnswitchCandidate { 160bdd1243dSDimitry Andric Instruction *TI = nullptr; 161bdd1243dSDimitry Andric TinyPtrVector<Value *> Invariants; 162bdd1243dSDimitry Andric std::optional<InstructionCost> Cost; 16306c3fb27SDimitry Andric std::optional<InjectedInvariant> PendingInjection; 164bdd1243dSDimitry Andric NonTrivialUnswitchCandidate( 165bdd1243dSDimitry Andric Instruction *TI, ArrayRef<Value *> Invariants, 16606c3fb27SDimitry Andric std::optional<InstructionCost> Cost = std::nullopt, 16706c3fb27SDimitry Andric std::optional<InjectedInvariant> PendingInjection = std::nullopt) 16806c3fb27SDimitry Andric : TI(TI), Invariants(Invariants), Cost(Cost), 16906c3fb27SDimitry Andric PendingInjection(PendingInjection) {}; 17006c3fb27SDimitry Andric 17106c3fb27SDimitry Andric bool hasPendingInjection() const { return PendingInjection.has_value(); } 172bdd1243dSDimitry Andric }; 173bdd1243dSDimitry Andric } // end anonymous namespace. 174bdd1243dSDimitry Andric 17581ad6265SDimitry Andric // Helper to skip (select x, true, false), which matches both a logical AND and 17681ad6265SDimitry Andric // OR and can confuse code that tries to determine if \p Cond is either a 17781ad6265SDimitry Andric // logical AND or OR but not both. 17881ad6265SDimitry Andric static Value *skipTrivialSelect(Value *Cond) { 17981ad6265SDimitry Andric Value *CondNext; 18081ad6265SDimitry Andric while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero()))) 18181ad6265SDimitry Andric Cond = CondNext; 18281ad6265SDimitry Andric return Cond; 18381ad6265SDimitry Andric } 18481ad6265SDimitry Andric 1850b57cec5SDimitry Andric /// Collect all of the loop invariant input values transitively used by the 1860b57cec5SDimitry Andric /// homogeneous instruction graph from a given root. 1870b57cec5SDimitry Andric /// 1880b57cec5SDimitry Andric /// This essentially walks from a root recursively through loop variant operands 18981ad6265SDimitry Andric /// which have perform the same logical operation (AND or OR) and finds all 19081ad6265SDimitry Andric /// inputs which are loop invariant. For some operations these can be 19181ad6265SDimitry Andric /// re-associated and unswitched out of the loop entirely. 1920b57cec5SDimitry Andric static TinyPtrVector<Value *> 193bdd1243dSDimitry Andric collectHomogenousInstGraphLoopInvariants(const Loop &L, Instruction &Root, 194bdd1243dSDimitry Andric const LoopInfo &LI) { 1950b57cec5SDimitry Andric assert(!L.isLoopInvariant(&Root) && 1960b57cec5SDimitry Andric "Only need to walk the graph if root itself is not invariant."); 1970b57cec5SDimitry Andric TinyPtrVector<Value *> Invariants; 1980b57cec5SDimitry Andric 199fe6060f1SDimitry Andric bool IsRootAnd = match(&Root, m_LogicalAnd()); 200fe6060f1SDimitry Andric bool IsRootOr = match(&Root, m_LogicalOr()); 201fe6060f1SDimitry Andric 2020b57cec5SDimitry Andric // Build a worklist and recurse through operators collecting invariants. 2030b57cec5SDimitry Andric SmallVector<Instruction *, 4> Worklist; 2040b57cec5SDimitry Andric SmallPtrSet<Instruction *, 8> Visited; 2050b57cec5SDimitry Andric Worklist.push_back(&Root); 2060b57cec5SDimitry Andric Visited.insert(&Root); 2070b57cec5SDimitry Andric do { 2080b57cec5SDimitry Andric Instruction &I = *Worklist.pop_back_val(); 2090b57cec5SDimitry Andric for (Value *OpV : I.operand_values()) { 2100b57cec5SDimitry Andric // Skip constants as unswitching isn't interesting for them. 2110b57cec5SDimitry Andric if (isa<Constant>(OpV)) 2120b57cec5SDimitry Andric continue; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric // Add it to our result if loop invariant. 2150b57cec5SDimitry Andric if (L.isLoopInvariant(OpV)) { 2160b57cec5SDimitry Andric Invariants.push_back(OpV); 2170b57cec5SDimitry Andric continue; 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric // If not an instruction with the same opcode, nothing we can do. 22181ad6265SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(skipTrivialSelect(OpV)); 2220b57cec5SDimitry Andric 223fe6060f1SDimitry Andric if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) || 224fe6060f1SDimitry Andric (IsRootOr && match(OpI, m_LogicalOr())))) { 2250b57cec5SDimitry Andric // Visit this operand. 2260b57cec5SDimitry Andric if (Visited.insert(OpI).second) 2270b57cec5SDimitry Andric Worklist.push_back(OpI); 2280b57cec5SDimitry Andric } 229fe6060f1SDimitry Andric } 2300b57cec5SDimitry Andric } while (!Worklist.empty()); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric return Invariants; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric 235bdd1243dSDimitry Andric static void replaceLoopInvariantUses(const Loop &L, Value *Invariant, 2360b57cec5SDimitry Andric Constant &Replacement) { 2370b57cec5SDimitry Andric assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?"); 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric // Replace uses of LIC in the loop with the given constant. 240fe6060f1SDimitry Andric // We use make_early_inc_range as set invalidates the iterator. 241fe6060f1SDimitry Andric for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { 242fe6060f1SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U.getUser()); 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric // Replace this use within the loop body. 2450b57cec5SDimitry Andric if (UserI && L.contains(UserI)) 246fe6060f1SDimitry Andric U.set(&Replacement); 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric } 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric /// Check that all the LCSSA PHI nodes in the loop exit block have trivial 2510b57cec5SDimitry Andric /// incoming values along this edge. 252bdd1243dSDimitry Andric static bool areLoopExitPHIsLoopInvariant(const Loop &L, 253bdd1243dSDimitry Andric const BasicBlock &ExitingBB, 254bdd1243dSDimitry Andric const BasicBlock &ExitBB) { 255bdd1243dSDimitry Andric for (const Instruction &I : ExitBB) { 2560b57cec5SDimitry Andric auto *PN = dyn_cast<PHINode>(&I); 2570b57cec5SDimitry Andric if (!PN) 2580b57cec5SDimitry Andric // No more PHIs to check. 2590b57cec5SDimitry Andric return true; 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric // If the incoming value for this edge isn't loop invariant the unswitch 2620b57cec5SDimitry Andric // won't be trivial. 2630b57cec5SDimitry Andric if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) 2640b57cec5SDimitry Andric return false; 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric llvm_unreachable("Basic blocks should never be empty!"); 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric 269fe6060f1SDimitry Andric /// Copy a set of loop invariant values \p ToDuplicate and insert them at the 270fe6060f1SDimitry Andric /// end of \p BB and conditionally branch on the copied condition. We only 271fe6060f1SDimitry Andric /// branch on a single value. 272349cc55cSDimitry Andric static void buildPartialUnswitchConditionalBranch( 273349cc55cSDimitry Andric BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction, 27481ad6265SDimitry Andric BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, 275bdd1243dSDimitry Andric const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { 2760b57cec5SDimitry Andric IRBuilder<> IRB(&BB); 2770b57cec5SDimitry Andric 27881ad6265SDimitry Andric SmallVector<Value *> FrozenInvariants; 27981ad6265SDimitry Andric for (Value *Inv : Invariants) { 28081ad6265SDimitry Andric if (InsertFreeze && !isGuaranteedNotToBeUndefOrPoison(Inv, AC, I, &DT)) 28181ad6265SDimitry Andric Inv = IRB.CreateFreeze(Inv, Inv->getName() + ".fr"); 28281ad6265SDimitry Andric FrozenInvariants.push_back(Inv); 28381ad6265SDimitry Andric } 28481ad6265SDimitry Andric 28581ad6265SDimitry Andric Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) 28681ad6265SDimitry Andric : IRB.CreateAnd(FrozenInvariants); 2870b57cec5SDimitry Andric IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, 2880b57cec5SDimitry Andric Direction ? &NormalSucc : &UnswitchedSucc); 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 291fe6060f1SDimitry Andric /// Copy a set of loop invariant values, and conditionally branch on them. 292fe6060f1SDimitry Andric static void buildPartialInvariantUnswitchConditionalBranch( 293fe6060f1SDimitry Andric BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction, 294fe6060f1SDimitry Andric BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L, 295fe6060f1SDimitry Andric MemorySSAUpdater *MSSAU) { 296fe6060f1SDimitry Andric ValueToValueMapTy VMap; 297fe6060f1SDimitry Andric for (auto *Val : reverse(ToDuplicate)) { 298fe6060f1SDimitry Andric Instruction *Inst = cast<Instruction>(Val); 299fe6060f1SDimitry Andric Instruction *NewInst = Inst->clone(); 300bdd1243dSDimitry Andric NewInst->insertInto(&BB, BB.end()); 301fe6060f1SDimitry Andric RemapInstruction(NewInst, VMap, 302fe6060f1SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 303fe6060f1SDimitry Andric VMap[Val] = NewInst; 304fe6060f1SDimitry Andric 305fe6060f1SDimitry Andric if (!MSSAU) 306fe6060f1SDimitry Andric continue; 307fe6060f1SDimitry Andric 308fe6060f1SDimitry Andric MemorySSA *MSSA = MSSAU->getMemorySSA(); 309fe6060f1SDimitry Andric if (auto *MemUse = 310fe6060f1SDimitry Andric dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) { 311fe6060f1SDimitry Andric auto *DefiningAccess = MemUse->getDefiningAccess(); 312fe6060f1SDimitry Andric // Get the first defining access before the loop. 313fe6060f1SDimitry Andric while (L.contains(DefiningAccess->getBlock())) { 314fe6060f1SDimitry Andric // If the defining access is a MemoryPhi, get the incoming 315fe6060f1SDimitry Andric // value for the pre-header as defining access. 316fe6060f1SDimitry Andric if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) 317fe6060f1SDimitry Andric DefiningAccess = 318fe6060f1SDimitry Andric MemPhi->getIncomingValueForBlock(L.getLoopPreheader()); 319fe6060f1SDimitry Andric else 320fe6060f1SDimitry Andric DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); 321fe6060f1SDimitry Andric } 322fe6060f1SDimitry Andric MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess, 323fe6060f1SDimitry Andric NewInst->getParent(), 324fe6060f1SDimitry Andric MemorySSA::BeforeTerminator); 325fe6060f1SDimitry Andric } 326fe6060f1SDimitry Andric } 327fe6060f1SDimitry Andric 328fe6060f1SDimitry Andric IRBuilder<> IRB(&BB); 329fe6060f1SDimitry Andric Value *Cond = VMap[ToDuplicate[0]]; 330fe6060f1SDimitry Andric IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, 331fe6060f1SDimitry Andric Direction ? &NormalSucc : &UnswitchedSucc); 332fe6060f1SDimitry Andric } 333fe6060f1SDimitry Andric 3340b57cec5SDimitry Andric /// Rewrite the PHI nodes in an unswitched loop exit basic block. 3350b57cec5SDimitry Andric /// 3360b57cec5SDimitry Andric /// Requires that the loop exit and unswitched basic block are the same, and 3370b57cec5SDimitry Andric /// that the exiting block was a unique predecessor of that block. Rewrites the 3380b57cec5SDimitry Andric /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial 3390b57cec5SDimitry Andric /// PHI nodes from the old preheader that now contains the unswitched 3400b57cec5SDimitry Andric /// terminator. 3410b57cec5SDimitry Andric static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, 3420b57cec5SDimitry Andric BasicBlock &OldExitingBB, 3430b57cec5SDimitry Andric BasicBlock &OldPH) { 3440b57cec5SDimitry Andric for (PHINode &PN : UnswitchedBB.phis()) { 3450b57cec5SDimitry Andric // When the loop exit is directly unswitched we just need to update the 3460b57cec5SDimitry Andric // incoming basic block. We loop to handle weird cases with repeated 3470b57cec5SDimitry Andric // incoming blocks, but expect to typically only have one operand here. 3480b57cec5SDimitry Andric for (auto i : seq<int>(0, PN.getNumOperands())) { 3490b57cec5SDimitry Andric assert(PN.getIncomingBlock(i) == &OldExitingBB && 3500b57cec5SDimitry Andric "Found incoming block different from unique predecessor!"); 3510b57cec5SDimitry Andric PN.setIncomingBlock(i, &OldPH); 3520b57cec5SDimitry Andric } 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric /// Rewrite the PHI nodes in the loop exit basic block and the split off 3570b57cec5SDimitry Andric /// unswitched block. 3580b57cec5SDimitry Andric /// 3590b57cec5SDimitry Andric /// Because the exit block remains an exit from the loop, this rewrites the 3600b57cec5SDimitry Andric /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI 3610b57cec5SDimitry Andric /// nodes into the unswitched basic block to select between the value in the 3620b57cec5SDimitry Andric /// old preheader and the loop exit. 3630b57cec5SDimitry Andric static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, 3640b57cec5SDimitry Andric BasicBlock &UnswitchedBB, 3650b57cec5SDimitry Andric BasicBlock &OldExitingBB, 3660b57cec5SDimitry Andric BasicBlock &OldPH, 3670b57cec5SDimitry Andric bool FullUnswitch) { 3680b57cec5SDimitry Andric assert(&ExitBB != &UnswitchedBB && 3690b57cec5SDimitry Andric "Must have different loop exit and unswitched blocks!"); 3705f757f3fSDimitry Andric BasicBlock::iterator InsertPt = UnswitchedBB.begin(); 3710b57cec5SDimitry Andric for (PHINode &PN : ExitBB.phis()) { 3720b57cec5SDimitry Andric auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2, 3735f757f3fSDimitry Andric PN.getName() + ".split"); 3745f757f3fSDimitry Andric NewPN->insertBefore(InsertPt); 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric // Walk backwards over the old PHI node's inputs to minimize the cost of 3770b57cec5SDimitry Andric // removing each one. We have to do this weird loop manually so that we 3780b57cec5SDimitry Andric // create the same number of new incoming edges in the new PHI as we expect 3790b57cec5SDimitry Andric // each case-based edge to be included in the unswitched switch in some 3800b57cec5SDimitry Andric // cases. 3810b57cec5SDimitry Andric // FIXME: This is really, really gross. It would be much cleaner if LLVM 3820b57cec5SDimitry Andric // allowed us to create a single entry for a predecessor block without 3830b57cec5SDimitry Andric // having separate entries for each "edge" even though these edges are 3840b57cec5SDimitry Andric // required to produce identical results. 3850b57cec5SDimitry Andric for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) { 3860b57cec5SDimitry Andric if (PN.getIncomingBlock(i) != &OldExitingBB) 3870b57cec5SDimitry Andric continue; 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric Value *Incoming = PN.getIncomingValue(i); 3900b57cec5SDimitry Andric if (FullUnswitch) 3910b57cec5SDimitry Andric // No more edge from the old exiting block to the exit block. 3920b57cec5SDimitry Andric PN.removeIncomingValue(i); 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric NewPN->addIncoming(Incoming, &OldPH); 3950b57cec5SDimitry Andric } 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric // Now replace the old PHI with the new one and wire the old one in as an 3980b57cec5SDimitry Andric // input to the new one. 3990b57cec5SDimitry Andric PN.replaceAllUsesWith(NewPN); 4000b57cec5SDimitry Andric NewPN->addIncoming(&PN, &ExitBB); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric /// Hoist the current loop up to the innermost loop containing a remaining exit. 4050b57cec5SDimitry Andric /// 4060b57cec5SDimitry Andric /// Because we've removed an exit from the loop, we may have changed the set of 4070b57cec5SDimitry Andric /// loops reachable and need to move the current loop up the loop nest or even 4080b57cec5SDimitry Andric /// to an entirely separate nest. 4090b57cec5SDimitry Andric static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader, 4100b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI, 411480093f4SDimitry Andric MemorySSAUpdater *MSSAU, ScalarEvolution *SE) { 4120b57cec5SDimitry Andric // If the loop is already at the top level, we can't hoist it anywhere. 4130b57cec5SDimitry Andric Loop *OldParentL = L.getParentLoop(); 4140b57cec5SDimitry Andric if (!OldParentL) 4150b57cec5SDimitry Andric return; 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> Exits; 4180b57cec5SDimitry Andric L.getExitBlocks(Exits); 4190b57cec5SDimitry Andric Loop *NewParentL = nullptr; 4200b57cec5SDimitry Andric for (auto *ExitBB : Exits) 4210b57cec5SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) 4220b57cec5SDimitry Andric if (!NewParentL || NewParentL->contains(ExitL)) 4230b57cec5SDimitry Andric NewParentL = ExitL; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric if (NewParentL == OldParentL) 4260b57cec5SDimitry Andric return; 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric // The new parent loop (if different) should always contain the old one. 4290b57cec5SDimitry Andric if (NewParentL) 4300b57cec5SDimitry Andric assert(NewParentL->contains(OldParentL) && 4310b57cec5SDimitry Andric "Can only hoist this loop up the nest!"); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric // The preheader will need to move with the body of this loop. However, 4340b57cec5SDimitry Andric // because it isn't in this loop we also need to update the primary loop map. 4350b57cec5SDimitry Andric assert(OldParentL == LI.getLoopFor(&Preheader) && 4360b57cec5SDimitry Andric "Parent loop of this loop should contain this loop's preheader!"); 4370b57cec5SDimitry Andric LI.changeLoopFor(&Preheader, NewParentL); 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric // Remove this loop from its old parent. 4400b57cec5SDimitry Andric OldParentL->removeChildLoop(&L); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric // Add the loop either to the new parent or as a top-level loop. 4430b57cec5SDimitry Andric if (NewParentL) 4440b57cec5SDimitry Andric NewParentL->addChildLoop(&L); 4450b57cec5SDimitry Andric else 4460b57cec5SDimitry Andric LI.addTopLevelLoop(&L); 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric // Remove this loops blocks from the old parent and every other loop up the 4490b57cec5SDimitry Andric // nest until reaching the new parent. Also update all of these 4500b57cec5SDimitry Andric // no-longer-containing loops to reflect the nesting change. 4510b57cec5SDimitry Andric for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL; 4520b57cec5SDimitry Andric OldContainingL = OldContainingL->getParentLoop()) { 4530b57cec5SDimitry Andric llvm::erase_if(OldContainingL->getBlocksVector(), 4540b57cec5SDimitry Andric [&](const BasicBlock *BB) { 4550b57cec5SDimitry Andric return BB == &Preheader || L.contains(BB); 4560b57cec5SDimitry Andric }); 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric OldContainingL->getBlocksSet().erase(&Preheader); 4590b57cec5SDimitry Andric for (BasicBlock *BB : L.blocks()) 4600b57cec5SDimitry Andric OldContainingL->getBlocksSet().erase(BB); 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric // Because we just hoisted a loop out of this one, we have essentially 4630b57cec5SDimitry Andric // created new exit paths from it. That means we need to form LCSSA PHI 4640b57cec5SDimitry Andric // nodes for values used in the no-longer-nested loop. 465480093f4SDimitry Andric formLCSSA(*OldContainingL, DT, &LI, SE); 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric // We shouldn't need to form dedicated exits because the exit introduced 4680b57cec5SDimitry Andric // here is the (just split by unswitching) preheader. However, after trivial 4690b57cec5SDimitry Andric // unswitching it is possible to get new non-dedicated exits out of parent 4700b57cec5SDimitry Andric // loop so let's conservatively form dedicated exit blocks and figure out 4710b57cec5SDimitry Andric // if we can optimize later. 4720b57cec5SDimitry Andric formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU, 4730b57cec5SDimitry Andric /*PreserveLCSSA*/ true); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric 477480093f4SDimitry Andric // Return the top-most loop containing ExitBB and having ExitBB as exiting block 478480093f4SDimitry Andric // or the loop containing ExitBB, if there is no parent loop containing ExitBB 479480093f4SDimitry Andric // as exiting block. 48006c3fb27SDimitry Andric static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB, 481bdd1243dSDimitry Andric const LoopInfo &LI) { 48206c3fb27SDimitry Andric Loop *TopMost = LI.getLoopFor(ExitBB); 48306c3fb27SDimitry Andric Loop *Current = TopMost; 484480093f4SDimitry Andric while (Current) { 485480093f4SDimitry Andric if (Current->isLoopExiting(ExitBB)) 486480093f4SDimitry Andric TopMost = Current; 487480093f4SDimitry Andric Current = Current->getParentLoop(); 488480093f4SDimitry Andric } 489480093f4SDimitry Andric return TopMost; 490480093f4SDimitry Andric } 491480093f4SDimitry Andric 4920b57cec5SDimitry Andric /// Unswitch a trivial branch if the condition is loop invariant. 4930b57cec5SDimitry Andric /// 4940b57cec5SDimitry Andric /// This routine should only be called when loop code leading to the branch has 4950b57cec5SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the 4960b57cec5SDimitry Andric /// condition is invariant and one of the successors is a loop exit. This 4970b57cec5SDimitry Andric /// allows us to unswitch without duplicating the loop, making it trivial. 4980b57cec5SDimitry Andric /// 4990b57cec5SDimitry Andric /// If this routine fails to unswitch the branch it returns false. 5000b57cec5SDimitry Andric /// 5010b57cec5SDimitry Andric /// If the branch can be unswitched, this routine splits the preheader and 5020b57cec5SDimitry Andric /// hoists the branch above that split. Preserves loop simplified form 5030b57cec5SDimitry Andric /// (splitting the exit block as necessary). It simplifies the branch within 5040b57cec5SDimitry Andric /// the loop to an unconditional branch but doesn't remove it entirely. Further 505fe6060f1SDimitry Andric /// cleanup can be done with some simplifycfg like pass. 5060b57cec5SDimitry Andric /// 5070b57cec5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs 5080b57cec5SDimitry Andric /// invalidated by this. 5090b57cec5SDimitry Andric static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, 5100b57cec5SDimitry Andric LoopInfo &LI, ScalarEvolution *SE, 5110b57cec5SDimitry Andric MemorySSAUpdater *MSSAU) { 5120b57cec5SDimitry Andric assert(BI.isConditional() && "Can only unswitch a conditional branch!"); 5130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric // The loop invariant values that we want to unswitch. 5160b57cec5SDimitry Andric TinyPtrVector<Value *> Invariants; 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric // When true, we're fully unswitching the branch rather than just unswitching 5190b57cec5SDimitry Andric // some input conditions to the branch. 5200b57cec5SDimitry Andric bool FullUnswitch = false; 5210b57cec5SDimitry Andric 52281ad6265SDimitry Andric Value *Cond = skipTrivialSelect(BI.getCondition()); 52381ad6265SDimitry Andric if (L.isLoopInvariant(Cond)) { 52481ad6265SDimitry Andric Invariants.push_back(Cond); 5250b57cec5SDimitry Andric FullUnswitch = true; 5260b57cec5SDimitry Andric } else { 52781ad6265SDimitry Andric if (auto *CondInst = dyn_cast<Instruction>(Cond)) 5280b57cec5SDimitry Andric Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI); 529fe6060f1SDimitry Andric if (Invariants.empty()) { 530fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n"); 5310b57cec5SDimitry Andric return false; 5320b57cec5SDimitry Andric } 533fe6060f1SDimitry Andric } 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Check that one of the branch's successors exits, and which one. 5360b57cec5SDimitry Andric bool ExitDirection = true; 5370b57cec5SDimitry Andric int LoopExitSuccIdx = 0; 5380b57cec5SDimitry Andric auto *LoopExitBB = BI.getSuccessor(0); 5390b57cec5SDimitry Andric if (L.contains(LoopExitBB)) { 5400b57cec5SDimitry Andric ExitDirection = false; 5410b57cec5SDimitry Andric LoopExitSuccIdx = 1; 5420b57cec5SDimitry Andric LoopExitBB = BI.getSuccessor(1); 543fe6060f1SDimitry Andric if (L.contains(LoopExitBB)) { 544fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n"); 5450b57cec5SDimitry Andric return false; 5460b57cec5SDimitry Andric } 547fe6060f1SDimitry Andric } 5480b57cec5SDimitry Andric auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); 5490b57cec5SDimitry Andric auto *ParentBB = BI.getParent(); 550fe6060f1SDimitry Andric if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) { 551fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n"); 5520b57cec5SDimitry Andric return false; 553fe6060f1SDimitry Andric } 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric // When unswitching only part of the branch's condition, we need the exit 5560b57cec5SDimitry Andric // block to be reached directly from the partially unswitched input. This can 5570b57cec5SDimitry Andric // be done when the exit block is along the true edge and the branch condition 5580b57cec5SDimitry Andric // is a graph of `or` operations, or the exit block is along the false edge 5590b57cec5SDimitry Andric // and the condition is a graph of `and` operations. 5600b57cec5SDimitry Andric if (!FullUnswitch) { 56181ad6265SDimitry Andric if (ExitDirection ? !match(Cond, m_LogicalOr()) 56281ad6265SDimitry Andric : !match(Cond, m_LogicalAnd())) { 563fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Branch condition is in improper form for " 564fe6060f1SDimitry Andric "non-full unswitch!\n"); 5650b57cec5SDimitry Andric return false; 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric LLVM_DEBUG({ 5700b57cec5SDimitry Andric dbgs() << " unswitching trivial invariant conditions for: " << BI 5710b57cec5SDimitry Andric << "\n"; 5720b57cec5SDimitry Andric for (Value *Invariant : Invariants) { 5730b57cec5SDimitry Andric dbgs() << " " << *Invariant << " == true"; 5740b57cec5SDimitry Andric if (Invariant != Invariants.back()) 5750b57cec5SDimitry Andric dbgs() << " ||"; 5760b57cec5SDimitry Andric dbgs() << "\n"; 5770b57cec5SDimitry Andric } 5780b57cec5SDimitry Andric }); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric // If we have scalar evolutions, we need to invalidate them including this 581480093f4SDimitry Andric // loop, the loop containing the exit block and the topmost parent loop 582480093f4SDimitry Andric // exiting via LoopExitBB. 5830b57cec5SDimitry Andric if (SE) { 584bdd1243dSDimitry Andric if (const Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI)) 5850b57cec5SDimitry Andric SE->forgetLoop(ExitL); 5860b57cec5SDimitry Andric else 5870b57cec5SDimitry Andric // Forget the entire nest as this exits the entire nest. 5880b57cec5SDimitry Andric SE->forgetTopmostLoop(&L); 589bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 5930b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert 5960b57cec5SDimitry Andric // the conditional branch. We will change the preheader to have a conditional 5970b57cec5SDimitry Andric // branch on LoopCond. 5980b57cec5SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader(); 5990b57cec5SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric // Now that we have a place to insert the conditional branch, create a place 6020b57cec5SDimitry Andric // to branch to: this is the exit block out of the loop that we are 6030b57cec5SDimitry Andric // unswitching. We need to split this if there are other loop predecessors. 6040b57cec5SDimitry Andric // Because the loop is in simplified form, *any* other predecessor is enough. 6050b57cec5SDimitry Andric BasicBlock *UnswitchedBB; 6060b57cec5SDimitry Andric if (FullUnswitch && LoopExitBB->getUniquePredecessor()) { 6070b57cec5SDimitry Andric assert(LoopExitBB->getUniquePredecessor() == BI.getParent() && 6080b57cec5SDimitry Andric "A branch's parent isn't a predecessor!"); 6090b57cec5SDimitry Andric UnswitchedBB = LoopExitBB; 6100b57cec5SDimitry Andric } else { 6110b57cec5SDimitry Andric UnswitchedBB = 6125f757f3fSDimitry Andric SplitBlock(LoopExitBB, LoopExitBB->begin(), &DT, &LI, MSSAU, "", false); 6130b57cec5SDimitry Andric } 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 6160b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric // Actually move the invariant uses into the unswitched position. If possible, 6190b57cec5SDimitry Andric // we do this by moving the instructions, but when doing partial unswitching 6200b57cec5SDimitry Andric // we do it by building a new merge of the values in the unswitched position. 6210b57cec5SDimitry Andric OldPH->getTerminator()->eraseFromParent(); 6220b57cec5SDimitry Andric if (FullUnswitch) { 6230b57cec5SDimitry Andric // If fully unswitching, we can use the existing branch instruction. 6240b57cec5SDimitry Andric // Splice it into the old PH to gate reaching the new preheader and re-point 6250b57cec5SDimitry Andric // its successors. 6265f757f3fSDimitry Andric BI.moveBefore(*OldPH, OldPH->end()); 62781ad6265SDimitry Andric BI.setCondition(Cond); 6280b57cec5SDimitry Andric if (MSSAU) { 6290b57cec5SDimitry Andric // Temporarily clone the terminator, to make MSSA update cheaper by 6300b57cec5SDimitry Andric // separating "insert edge" updates from "remove edge" ones. 631bdd1243dSDimitry Andric BI.clone()->insertInto(ParentBB, ParentBB->end()); 6320b57cec5SDimitry Andric } else { 6330b57cec5SDimitry Andric // Create a new unconditional branch that will continue the loop as a new 6340b57cec5SDimitry Andric // terminator. 6350fca6ea1SDimitry Andric Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB); 6360fca6ea1SDimitry Andric NewBI->setDebugLoc(BI.getDebugLoc()); 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); 6390b57cec5SDimitry Andric BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); 6400b57cec5SDimitry Andric } else { 6410b57cec5SDimitry Andric // Only unswitching a subset of inputs to the condition, so we will need to 6420b57cec5SDimitry Andric // build a new branch that merges the invariant inputs. 6430b57cec5SDimitry Andric if (ExitDirection) 64481ad6265SDimitry Andric assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalOr()) && 645fe6060f1SDimitry Andric "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the " 646fe6060f1SDimitry Andric "condition!"); 6470b57cec5SDimitry Andric else 64881ad6265SDimitry Andric assert(match(skipTrivialSelect(BI.getCondition()), m_LogicalAnd()) && 649fe6060f1SDimitry Andric "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the" 650fe6060f1SDimitry Andric " condition!"); 65181ad6265SDimitry Andric buildPartialUnswitchConditionalBranch( 65281ad6265SDimitry Andric *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, 65381ad6265SDimitry Andric FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric // Update the dominator tree with the added edge. 6570b57cec5SDimitry Andric DT.insertEdge(OldPH, UnswitchedBB); 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric // After the dominator tree was updated with the added edge, update MemorySSA 6600b57cec5SDimitry Andric // if available. 6610b57cec5SDimitry Andric if (MSSAU) { 6620b57cec5SDimitry Andric SmallVector<CFGUpdate, 1> Updates; 6630b57cec5SDimitry Andric Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB}); 6640b57cec5SDimitry Andric MSSAU->applyInsertUpdates(Updates, DT); 6650b57cec5SDimitry Andric } 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric // Finish updating dominator tree and memory ssa for full unswitch. 6680b57cec5SDimitry Andric if (FullUnswitch) { 6690b57cec5SDimitry Andric if (MSSAU) { 6700fca6ea1SDimitry Andric Instruction *Term = ParentBB->getTerminator(); 6710fca6ea1SDimitry Andric // Remove the cloned branch instruction and create unconditional branch 6720fca6ea1SDimitry Andric // now. 6730fca6ea1SDimitry Andric Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB); 6740fca6ea1SDimitry Andric NewBI->setDebugLoc(Term->getDebugLoc()); 6750fca6ea1SDimitry Andric Term->eraseFromParent(); 6760b57cec5SDimitry Andric MSSAU->removeEdge(ParentBB, LoopExitBB); 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric DT.deleteEdge(ParentBB, LoopExitBB); 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 6820b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 6830b57cec5SDimitry Andric 6840b57cec5SDimitry Andric // Rewrite the relevant PHI nodes. 6850b57cec5SDimitry Andric if (UnswitchedBB == LoopExitBB) 6860b57cec5SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); 6870b57cec5SDimitry Andric else 6880b57cec5SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, 6890b57cec5SDimitry Andric *ParentBB, *OldPH, FullUnswitch); 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric // The constant we can replace all of our invariants with inside the loop 6920b57cec5SDimitry Andric // body. If any of the invariants have a value other than this the loop won't 6930b57cec5SDimitry Andric // be entered. 6940b57cec5SDimitry Andric ConstantInt *Replacement = ExitDirection 6950b57cec5SDimitry Andric ? ConstantInt::getFalse(BI.getContext()) 6960b57cec5SDimitry Andric : ConstantInt::getTrue(BI.getContext()); 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric // Since this is an i1 condition we can also trivially replace uses of it 6990b57cec5SDimitry Andric // within the loop with a constant. 7000b57cec5SDimitry Andric for (Value *Invariant : Invariants) 7010b57cec5SDimitry Andric replaceLoopInvariantUses(L, Invariant, *Replacement); 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric // If this was full unswitching, we may have changed the nesting relationship 7040b57cec5SDimitry Andric // for this loop so hoist it to its correct parent if needed. 7050b57cec5SDimitry Andric if (FullUnswitch) 706480093f4SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE); 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 7090b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n"); 7120b57cec5SDimitry Andric ++NumTrivial; 7130b57cec5SDimitry Andric ++NumBranches; 7140b57cec5SDimitry Andric return true; 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric /// Unswitch a trivial switch if the condition is loop invariant. 7180b57cec5SDimitry Andric /// 7190b57cec5SDimitry Andric /// This routine should only be called when loop code leading to the switch has 7200b57cec5SDimitry Andric /// been validated as trivial (no side effects). This routine checks if the 7210b57cec5SDimitry Andric /// condition is invariant and that at least one of the successors is a loop 7220b57cec5SDimitry Andric /// exit. This allows us to unswitch without duplicating the loop, making it 7230b57cec5SDimitry Andric /// trivial. 7240b57cec5SDimitry Andric /// 7250b57cec5SDimitry Andric /// If this routine fails to unswitch the switch it returns false. 7260b57cec5SDimitry Andric /// 7270b57cec5SDimitry Andric /// If the switch can be unswitched, this routine splits the preheader and 7280b57cec5SDimitry Andric /// copies the switch above that split. If the default case is one of the 7290b57cec5SDimitry Andric /// exiting cases, it copies the non-exiting cases and points them at the new 7300b57cec5SDimitry Andric /// preheader. If the default case is not exiting, it copies the exiting cases 7310b57cec5SDimitry Andric /// and points the default at the preheader. It preserves loop simplified form 7320b57cec5SDimitry Andric /// (splitting the exit blocks as necessary). It simplifies the switch within 7330b57cec5SDimitry Andric /// the loop by removing now-dead cases. If the default case is one of those 7340b57cec5SDimitry Andric /// unswitched, it replaces its destination with a new basic block containing 7350b57cec5SDimitry Andric /// only unreachable. Such basic blocks, while technically loop exits, are not 7360b57cec5SDimitry Andric /// considered for unswitching so this is a stable transform and the same 7370b57cec5SDimitry Andric /// switch will not be revisited. If after unswitching there is only a single 7380b57cec5SDimitry Andric /// in-loop successor, the switch is further simplified to an unconditional 739fe6060f1SDimitry Andric /// branch. Still more cleanup can be done with some simplifycfg like pass. 7400b57cec5SDimitry Andric /// 7410b57cec5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs 7420b57cec5SDimitry Andric /// invalidated by this. 7430b57cec5SDimitry Andric static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, 7440b57cec5SDimitry Andric LoopInfo &LI, ScalarEvolution *SE, 7450b57cec5SDimitry Andric MemorySSAUpdater *MSSAU) { 7460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); 7470b57cec5SDimitry Andric Value *LoopCond = SI.getCondition(); 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric // If this isn't switching on an invariant condition, we can't unswitch it. 7500b57cec5SDimitry Andric if (!L.isLoopInvariant(LoopCond)) 7510b57cec5SDimitry Andric return false; 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric auto *ParentBB = SI.getParent(); 7540b57cec5SDimitry Andric 7555ffd83dbSDimitry Andric // The same check must be used both for the default and the exit cases. We 7565ffd83dbSDimitry Andric // should never leave edges from the switch instruction to a basic block that 7575ffd83dbSDimitry Andric // we are unswitching, hence the condition used to determine the default case 7585ffd83dbSDimitry Andric // needs to also be used to populate ExitCaseIndices, which is then used to 7595ffd83dbSDimitry Andric // remove cases from the switch. 7605ffd83dbSDimitry Andric auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) { 7615ffd83dbSDimitry Andric // BBToCheck is not an exit block if it is inside loop L. 7625ffd83dbSDimitry Andric if (L.contains(&BBToCheck)) 7635ffd83dbSDimitry Andric return false; 7645ffd83dbSDimitry Andric // BBToCheck is not trivial to unswitch if its phis aren't loop invariant. 7655ffd83dbSDimitry Andric if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck)) 7665ffd83dbSDimitry Andric return false; 7675ffd83dbSDimitry Andric // We do not unswitch a block that only has an unreachable statement, as 7685ffd83dbSDimitry Andric // it's possible this is a previously unswitched block. Only unswitch if 7695ffd83dbSDimitry Andric // either the terminator is not unreachable, or, if it is, it's not the only 7705ffd83dbSDimitry Andric // instruction in the block. 7715ffd83dbSDimitry Andric auto *TI = BBToCheck.getTerminator(); 7725ffd83dbSDimitry Andric bool isUnreachable = isa<UnreachableInst>(TI); 7735ffd83dbSDimitry Andric return !isUnreachable || 7745ffd83dbSDimitry Andric (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI)); 7755ffd83dbSDimitry Andric }; 7765ffd83dbSDimitry Andric 7770b57cec5SDimitry Andric SmallVector<int, 4> ExitCaseIndices; 7785ffd83dbSDimitry Andric for (auto Case : SI.cases()) 7795ffd83dbSDimitry Andric if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor())) 7800b57cec5SDimitry Andric ExitCaseIndices.push_back(Case.getCaseIndex()); 7810b57cec5SDimitry Andric BasicBlock *DefaultExitBB = nullptr; 7820b57cec5SDimitry Andric SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight = 7830b57cec5SDimitry Andric SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0); 7845ffd83dbSDimitry Andric if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) { 7850b57cec5SDimitry Andric DefaultExitBB = SI.getDefaultDest(); 7860b57cec5SDimitry Andric } else if (ExitCaseIndices.empty()) 7870b57cec5SDimitry Andric return false; 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n"); 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 7920b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric // We may need to invalidate SCEVs for the outermost loop reached by any of 7950b57cec5SDimitry Andric // the exits. 7960b57cec5SDimitry Andric Loop *OuterL = &L; 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric if (DefaultExitBB) { 79906c3fb27SDimitry Andric // Check the loop containing this exit. 80006c3fb27SDimitry Andric Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI); 80106c3fb27SDimitry Andric if (!ExitL || ExitL->contains(OuterL)) 80206c3fb27SDimitry Andric OuterL = ExitL; 80306c3fb27SDimitry Andric } 80406c3fb27SDimitry Andric for (unsigned Index : ExitCaseIndices) { 80506c3fb27SDimitry Andric auto CaseI = SI.case_begin() + Index; 80606c3fb27SDimitry Andric // Compute the outer loop from this exit. 80706c3fb27SDimitry Andric Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI); 80806c3fb27SDimitry Andric if (!ExitL || ExitL->contains(OuterL)) 80906c3fb27SDimitry Andric OuterL = ExitL; 81006c3fb27SDimitry Andric } 81106c3fb27SDimitry Andric 81206c3fb27SDimitry Andric if (SE) { 81306c3fb27SDimitry Andric if (OuterL) 81406c3fb27SDimitry Andric SE->forgetLoop(OuterL); 81506c3fb27SDimitry Andric else 81606c3fb27SDimitry Andric SE->forgetTopmostLoop(&L); 81706c3fb27SDimitry Andric } 81806c3fb27SDimitry Andric 81906c3fb27SDimitry Andric if (DefaultExitBB) { 8200b57cec5SDimitry Andric // Clear out the default destination temporarily to allow accurate 8210b57cec5SDimitry Andric // predecessor lists to be examined below. 8220b57cec5SDimitry Andric SI.setDefaultDest(nullptr); 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric // Store the exit cases into a separate data structure and remove them from 8260b57cec5SDimitry Andric // the switch. 8270b57cec5SDimitry Andric SmallVector<std::tuple<ConstantInt *, BasicBlock *, 8280b57cec5SDimitry Andric SwitchInstProfUpdateWrapper::CaseWeightOpt>, 8290b57cec5SDimitry Andric 4> ExitCases; 8300b57cec5SDimitry Andric ExitCases.reserve(ExitCaseIndices.size()); 8310b57cec5SDimitry Andric SwitchInstProfUpdateWrapper SIW(SI); 8320b57cec5SDimitry Andric // We walk the case indices backwards so that we remove the last case first 8330b57cec5SDimitry Andric // and don't disrupt the earlier indices. 8340b57cec5SDimitry Andric for (unsigned Index : reverse(ExitCaseIndices)) { 8350b57cec5SDimitry Andric auto CaseI = SI.case_begin() + Index; 8360b57cec5SDimitry Andric // Save the value of this case. 8370b57cec5SDimitry Andric auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex()); 8380b57cec5SDimitry Andric ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W); 8390b57cec5SDimitry Andric // Delete the unswitched cases. 8400b57cec5SDimitry Andric SIW.removeCase(CaseI); 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric 8430b57cec5SDimitry Andric // Check if after this all of the remaining cases point at the same 8440b57cec5SDimitry Andric // successor. 8450b57cec5SDimitry Andric BasicBlock *CommonSuccBB = nullptr; 8460b57cec5SDimitry Andric if (SI.getNumCases() > 0 && 847e8d8bef9SDimitry Andric all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) { 848e8d8bef9SDimitry Andric return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor(); 8490b57cec5SDimitry Andric })) 8500b57cec5SDimitry Andric CommonSuccBB = SI.case_begin()->getCaseSuccessor(); 8510b57cec5SDimitry Andric if (!DefaultExitBB) { 8520b57cec5SDimitry Andric // If we're not unswitching the default, we need it to match any cases to 8530b57cec5SDimitry Andric // have a common successor or if we have no cases it is the common 8540b57cec5SDimitry Andric // successor. 8550b57cec5SDimitry Andric if (SI.getNumCases() == 0) 8560b57cec5SDimitry Andric CommonSuccBB = SI.getDefaultDest(); 8570b57cec5SDimitry Andric else if (SI.getDefaultDest() != CommonSuccBB) 8580b57cec5SDimitry Andric CommonSuccBB = nullptr; 8590b57cec5SDimitry Andric } 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert 8620b57cec5SDimitry Andric // the switch. 8630b57cec5SDimitry Andric BasicBlock *OldPH = L.getLoopPreheader(); 8640b57cec5SDimitry Andric BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU); 8650b57cec5SDimitry Andric OldPH->getTerminator()->eraseFromParent(); 8660b57cec5SDimitry Andric 8670fca6ea1SDimitry Andric // Now add the unswitched switch. This new switch instruction inherits the 8680fca6ea1SDimitry Andric // debug location of the old switch, because it semantically replace the old 8690fca6ea1SDimitry Andric // one. 8700b57cec5SDimitry Andric auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); 8710fca6ea1SDimitry Andric NewSI->setDebugLoc(SIW->getDebugLoc()); 8720b57cec5SDimitry Andric SwitchInstProfUpdateWrapper NewSIW(*NewSI); 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric // Rewrite the IR for the unswitched basic blocks. This requires two steps. 8750b57cec5SDimitry Andric // First, we split any exit blocks with remaining in-loop predecessors. Then 8760b57cec5SDimitry Andric // we update the PHIs in one of two ways depending on if there was a split. 8770b57cec5SDimitry Andric // We walk in reverse so that we split in the same order as the cases 8780b57cec5SDimitry Andric // appeared. This is purely for convenience of reading the resulting IR, but 8790b57cec5SDimitry Andric // it doesn't cost anything really. 8800b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs; 8810b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; 8820b57cec5SDimitry Andric // Handle the default exit if necessary. 8830b57cec5SDimitry Andric // FIXME: It'd be great if we could merge this with the loop below but LLVM's 8840b57cec5SDimitry Andric // ranges aren't quite powerful enough yet. 8850b57cec5SDimitry Andric if (DefaultExitBB) { 8860b57cec5SDimitry Andric if (pred_empty(DefaultExitBB)) { 8870b57cec5SDimitry Andric UnswitchedExitBBs.insert(DefaultExitBB); 8880b57cec5SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); 8890b57cec5SDimitry Andric } else { 8900b57cec5SDimitry Andric auto *SplitBB = 8915f757f3fSDimitry Andric SplitBlock(DefaultExitBB, DefaultExitBB->begin(), &DT, &LI, MSSAU); 8920b57cec5SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, 8930b57cec5SDimitry Andric *ParentBB, *OldPH, 8940b57cec5SDimitry Andric /*FullUnswitch*/ true); 8950b57cec5SDimitry Andric DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric // Note that we must use a reference in the for loop so that we update the 8990b57cec5SDimitry Andric // container. 9000b57cec5SDimitry Andric for (auto &ExitCase : reverse(ExitCases)) { 9010b57cec5SDimitry Andric // Grab a reference to the exit block in the pair so that we can update it. 9020b57cec5SDimitry Andric BasicBlock *ExitBB = std::get<1>(ExitCase); 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric // If this case is the last edge into the exit block, we can simply reuse it 9050b57cec5SDimitry Andric // as it will no longer be a loop exit. No mapping necessary. 9060b57cec5SDimitry Andric if (pred_empty(ExitBB)) { 9070b57cec5SDimitry Andric // Only rewrite once. 9080b57cec5SDimitry Andric if (UnswitchedExitBBs.insert(ExitBB).second) 9090b57cec5SDimitry Andric rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); 9100b57cec5SDimitry Andric continue; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric // Otherwise we need to split the exit block so that we retain an exit 9140b57cec5SDimitry Andric // block from the loop and a target for the unswitched condition. 9150b57cec5SDimitry Andric BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; 9160b57cec5SDimitry Andric if (!SplitExitBB) { 9170b57cec5SDimitry Andric // If this is the first time we see this, do the split and remember it. 9185f757f3fSDimitry Andric SplitExitBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU); 9190b57cec5SDimitry Andric rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, 9200b57cec5SDimitry Andric *ParentBB, *OldPH, 9210b57cec5SDimitry Andric /*FullUnswitch*/ true); 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric // Update the case pair to point to the split block. 9240b57cec5SDimitry Andric std::get<1>(ExitCase) = SplitExitBB; 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric // Now add the unswitched cases. We do this in reverse order as we built them 9280b57cec5SDimitry Andric // in reverse order. 9290b57cec5SDimitry Andric for (auto &ExitCase : reverse(ExitCases)) { 9300b57cec5SDimitry Andric ConstantInt *CaseVal = std::get<0>(ExitCase); 9310b57cec5SDimitry Andric BasicBlock *UnswitchedBB = std::get<1>(ExitCase); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase)); 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric 9360b57cec5SDimitry Andric // If the default was unswitched, re-point it and add explicit cases for 9370b57cec5SDimitry Andric // entering the loop. 9380b57cec5SDimitry Andric if (DefaultExitBB) { 9390b57cec5SDimitry Andric NewSIW->setDefaultDest(DefaultExitBB); 9400b57cec5SDimitry Andric NewSIW.setSuccessorWeight(0, DefaultCaseWeight); 9410b57cec5SDimitry Andric 9420b57cec5SDimitry Andric // We removed all the exit cases, so we just copy the cases to the 9430b57cec5SDimitry Andric // unswitched switch. 9440b57cec5SDimitry Andric for (const auto &Case : SI.cases()) 9450b57cec5SDimitry Andric NewSIW.addCase(Case.getCaseValue(), NewPH, 9460b57cec5SDimitry Andric SIW.getSuccessorWeight(Case.getSuccessorIndex())); 9470b57cec5SDimitry Andric } else if (DefaultCaseWeight) { 9480b57cec5SDimitry Andric // We have to set branch weight of the default case. 9490b57cec5SDimitry Andric uint64_t SW = *DefaultCaseWeight; 9500b57cec5SDimitry Andric for (const auto &Case : SI.cases()) { 9510b57cec5SDimitry Andric auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex()); 9520b57cec5SDimitry Andric assert(W && 9530b57cec5SDimitry Andric "case weight must be defined as default case weight is defined"); 9540b57cec5SDimitry Andric SW += *W; 9550b57cec5SDimitry Andric } 9560b57cec5SDimitry Andric NewSIW.setSuccessorWeight(0, SW); 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric // If we ended up with a common successor for every path through the switch 9600b57cec5SDimitry Andric // after unswitching, rewrite it to an unconditional branch to make it easy 9610b57cec5SDimitry Andric // to recognize. Otherwise we potentially have to recognize the default case 9620b57cec5SDimitry Andric // pointing at unreachable and other complexity. 9630b57cec5SDimitry Andric if (CommonSuccBB) { 9640b57cec5SDimitry Andric BasicBlock *BB = SI.getParent(); 9650b57cec5SDimitry Andric // We may have had multiple edges to this common successor block, so remove 9660b57cec5SDimitry Andric // them as predecessors. We skip the first one, either the default or the 9670b57cec5SDimitry Andric // actual first case. 9680b57cec5SDimitry Andric bool SkippedFirst = DefaultExitBB == nullptr; 9690b57cec5SDimitry Andric for (auto Case : SI.cases()) { 9700b57cec5SDimitry Andric assert(Case.getCaseSuccessor() == CommonSuccBB && 9710b57cec5SDimitry Andric "Non-common successor!"); 9720b57cec5SDimitry Andric (void)Case; 9730b57cec5SDimitry Andric if (!SkippedFirst) { 9740b57cec5SDimitry Andric SkippedFirst = true; 9750b57cec5SDimitry Andric continue; 9760b57cec5SDimitry Andric } 9770b57cec5SDimitry Andric CommonSuccBB->removePredecessor(BB, 9780b57cec5SDimitry Andric /*KeepOneInputPHIs*/ true); 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric // Now nuke the switch and replace it with a direct branch. 9810fca6ea1SDimitry Andric Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB); 9820fca6ea1SDimitry Andric NewBI->setDebugLoc(SIW->getDebugLoc()); 9830b57cec5SDimitry Andric SIW.eraseFromParent(); 9840b57cec5SDimitry Andric } else if (DefaultExitBB) { 9850b57cec5SDimitry Andric assert(SI.getNumCases() > 0 && 9860b57cec5SDimitry Andric "If we had no cases we'd have a common successor!"); 9870b57cec5SDimitry Andric // Move the last case to the default successor. This is valid as if the 9880b57cec5SDimitry Andric // default got unswitched it cannot be reached. This has the advantage of 9890b57cec5SDimitry Andric // being simple and keeping the number of edges from this switch to 9900b57cec5SDimitry Andric // successors the same, and avoiding any PHI update complexity. 9910b57cec5SDimitry Andric auto LastCaseI = std::prev(SI.case_end()); 9920b57cec5SDimitry Andric 9930b57cec5SDimitry Andric SI.setDefaultDest(LastCaseI->getCaseSuccessor()); 9940b57cec5SDimitry Andric SIW.setSuccessorWeight( 9950b57cec5SDimitry Andric 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex())); 9960b57cec5SDimitry Andric SIW.removeCase(LastCaseI); 9970b57cec5SDimitry Andric } 9980b57cec5SDimitry Andric 9990b57cec5SDimitry Andric // Walk the unswitched exit blocks and the unswitched split blocks and update 10000b57cec5SDimitry Andric // the dominator tree based on the CFG edits. While we are walking unordered 10010b57cec5SDimitry Andric // containers here, the API for applyUpdates takes an unordered list of 10020b57cec5SDimitry Andric // updates and requires them to not contain duplicates. 10030b57cec5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates; 10040b57cec5SDimitry Andric for (auto *UnswitchedExitBB : UnswitchedExitBBs) { 10050b57cec5SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB}); 10060b57cec5SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB}); 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric for (auto SplitUnswitchedPair : SplitExitBBMap) { 10090b57cec5SDimitry Andric DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first}); 10100b57cec5SDimitry Andric DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second}); 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric if (MSSAU) { 1014e8d8bef9SDimitry Andric MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); 10150b57cec5SDimitry Andric if (VerifyMemorySSA) 10160b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 1017e8d8bef9SDimitry Andric } else { 1018e8d8bef9SDimitry Andric DT.applyUpdates(DTUpdates); 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast)); 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric // We may have changed the nesting relationship for this loop so hoist it to 10240b57cec5SDimitry Andric // its correct parent if needed. 1025480093f4SDimitry Andric hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE); 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 10280b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric ++NumTrivial; 10310b57cec5SDimitry Andric ++NumSwitches; 10320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n"); 10330b57cec5SDimitry Andric return true; 10340b57cec5SDimitry Andric } 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric /// This routine scans the loop to find a branch or switch which occurs before 10370b57cec5SDimitry Andric /// any side effects occur. These can potentially be unswitched without 10380b57cec5SDimitry Andric /// duplicating the loop. If a branch or switch is successfully unswitched the 10390b57cec5SDimitry Andric /// scanning continues to see if subsequent branches or switches have become 10400b57cec5SDimitry Andric /// trivial. Once all trivial candidates have been unswitched, this routine 10410b57cec5SDimitry Andric /// returns. 10420b57cec5SDimitry Andric /// 10430b57cec5SDimitry Andric /// The return value indicates whether anything was unswitched (and therefore 10440b57cec5SDimitry Andric /// changed). 10450b57cec5SDimitry Andric /// 10460b57cec5SDimitry Andric /// If `SE` is not null, it will be updated based on the potential loop SCEVs 10470b57cec5SDimitry Andric /// invalidated by this. 10480b57cec5SDimitry Andric static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, 10490b57cec5SDimitry Andric LoopInfo &LI, ScalarEvolution *SE, 10500b57cec5SDimitry Andric MemorySSAUpdater *MSSAU) { 10510b57cec5SDimitry Andric bool Changed = false; 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric // If loop header has only one reachable successor we should keep looking for 10540b57cec5SDimitry Andric // trivial condition candidates in the successor as well. An alternative is 10550b57cec5SDimitry Andric // to constant fold conditions and merge successors into loop header (then we 10560b57cec5SDimitry Andric // only need to check header's terminator). The reason for not doing this in 10570b57cec5SDimitry Andric // LoopUnswitch pass is that it could potentially break LoopPassManager's 10580b57cec5SDimitry Andric // invariants. Folding dead branches could either eliminate the current loop 10590b57cec5SDimitry Andric // or make other loops unreachable. LCSSA form might also not be preserved 10600b57cec5SDimitry Andric // after deleting branches. The following code keeps traversing loop header's 10610b57cec5SDimitry Andric // successors until it finds the trivial condition candidate (condition that 10620b57cec5SDimitry Andric // is not a constant). Since unswitching generates branches with constant 10630b57cec5SDimitry Andric // conditions, this scenario could be very common in practice. 10640b57cec5SDimitry Andric BasicBlock *CurrentBB = L.getHeader(); 10650b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited; 10660b57cec5SDimitry Andric Visited.insert(CurrentBB); 10670b57cec5SDimitry Andric do { 10680b57cec5SDimitry Andric // Check if there are any side-effecting instructions (e.g. stores, calls, 10690b57cec5SDimitry Andric // volatile loads) in the part of the loop that the code *would* execute 10700b57cec5SDimitry Andric // without unswitching. 10710b57cec5SDimitry Andric if (MSSAU) // Possible early exit with MSSA 10720b57cec5SDimitry Andric if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB)) 10730b57cec5SDimitry Andric if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end())) 10740b57cec5SDimitry Andric return Changed; 10750b57cec5SDimitry Andric if (llvm::any_of(*CurrentBB, 10760b57cec5SDimitry Andric [](Instruction &I) { return I.mayHaveSideEffects(); })) 10770b57cec5SDimitry Andric return Changed; 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric Instruction *CurrentTerm = CurrentBB->getTerminator(); 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 10820b57cec5SDimitry Andric // Don't bother trying to unswitch past a switch with a constant 10830b57cec5SDimitry Andric // condition. This should be removed prior to running this pass by 1084fe6060f1SDimitry Andric // simplifycfg. 10850b57cec5SDimitry Andric if (isa<Constant>(SI->getCondition())) 10860b57cec5SDimitry Andric return Changed; 10870b57cec5SDimitry Andric 10880b57cec5SDimitry Andric if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU)) 10890b57cec5SDimitry Andric // Couldn't unswitch this one so we're done. 10900b57cec5SDimitry Andric return Changed; 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric // Mark that we managed to unswitch something. 10930b57cec5SDimitry Andric Changed = true; 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric // If unswitching turned the terminator into an unconditional branch then 10960b57cec5SDimitry Andric // we can continue. The unswitching logic specifically works to fold any 10970b57cec5SDimitry Andric // cases it can into an unconditional branch to make it easier to 10980b57cec5SDimitry Andric // recognize here. 10990b57cec5SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); 11000b57cec5SDimitry Andric if (!BI || BI->isConditional()) 11010b57cec5SDimitry Andric return Changed; 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric CurrentBB = BI->getSuccessor(0); 11040b57cec5SDimitry Andric continue; 11050b57cec5SDimitry Andric } 11060b57cec5SDimitry Andric 11070b57cec5SDimitry Andric auto *BI = dyn_cast<BranchInst>(CurrentTerm); 11080b57cec5SDimitry Andric if (!BI) 11090b57cec5SDimitry Andric // We do not understand other terminator instructions. 11100b57cec5SDimitry Andric return Changed; 11110b57cec5SDimitry Andric 11120b57cec5SDimitry Andric // Don't bother trying to unswitch past an unconditional branch or a branch 1113fe6060f1SDimitry Andric // with a constant value. These should be removed by simplifycfg prior to 11140b57cec5SDimitry Andric // running this pass. 111581ad6265SDimitry Andric if (!BI->isConditional() || 111681ad6265SDimitry Andric isa<Constant>(skipTrivialSelect(BI->getCondition()))) 11170b57cec5SDimitry Andric return Changed; 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric // Found a trivial condition candidate: non-foldable conditional branch. If 11200b57cec5SDimitry Andric // we fail to unswitch this, we can't do anything else that is trivial. 11210b57cec5SDimitry Andric if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU)) 11220b57cec5SDimitry Andric return Changed; 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric // Mark that we managed to unswitch something. 11250b57cec5SDimitry Andric Changed = true; 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric // If we only unswitched some of the conditions feeding the branch, we won't 11280b57cec5SDimitry Andric // have collapsed it to a single successor. 11290b57cec5SDimitry Andric BI = cast<BranchInst>(CurrentBB->getTerminator()); 11300b57cec5SDimitry Andric if (BI->isConditional()) 11310b57cec5SDimitry Andric return Changed; 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric // Follow the newly unconditional branch into its successor. 11340b57cec5SDimitry Andric CurrentBB = BI->getSuccessor(0); 11350b57cec5SDimitry Andric 11360b57cec5SDimitry Andric // When continuing, if we exit the loop or reach a previous visited block, 11370b57cec5SDimitry Andric // then we can not reach any trivial condition candidates (unfoldable 11380b57cec5SDimitry Andric // branch instructions or switch instructions) and no unswitch can happen. 11390b57cec5SDimitry Andric } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); 11400b57cec5SDimitry Andric 11410b57cec5SDimitry Andric return Changed; 11420b57cec5SDimitry Andric } 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric /// Build the cloned blocks for an unswitched copy of the given loop. 11450b57cec5SDimitry Andric /// 11460b57cec5SDimitry Andric /// The cloned blocks are inserted before the loop preheader (`LoopPH`) and 11470b57cec5SDimitry Andric /// after the split block (`SplitBB`) that will be used to select between the 11480b57cec5SDimitry Andric /// cloned and original loop. 11490b57cec5SDimitry Andric /// 11500b57cec5SDimitry Andric /// This routine handles cloning all of the necessary loop blocks and exit 11510b57cec5SDimitry Andric /// blocks including rewriting their instructions and the relevant PHI nodes. 11520b57cec5SDimitry Andric /// Any loop blocks or exit blocks which are dominated by a different successor 11530b57cec5SDimitry Andric /// than the one for this clone of the loop blocks can be trivially skipped. We 11540b57cec5SDimitry Andric /// use the `DominatingSucc` map to determine whether a block satisfies that 11550b57cec5SDimitry Andric /// property with a simple map lookup. 11560b57cec5SDimitry Andric /// 11570b57cec5SDimitry Andric /// It also correctly creates the unconditional branch in the cloned 11580b57cec5SDimitry Andric /// unswitched parent block to only point at the unswitched successor. 11590b57cec5SDimitry Andric /// 11600b57cec5SDimitry Andric /// This does not handle most of the necessary updates to `LoopInfo`. Only exit 11610b57cec5SDimitry Andric /// block splitting is correctly reflected in `LoopInfo`, essentially all of 11620b57cec5SDimitry Andric /// the cloned blocks (and their loops) are left without full `LoopInfo` 11630b57cec5SDimitry Andric /// updates. This also doesn't fully update `DominatorTree`. It adds the cloned 11640b57cec5SDimitry Andric /// blocks to them but doesn't create the cloned `DominatorTree` structure and 11650b57cec5SDimitry Andric /// instead the caller must recompute an accurate DT. It *does* correctly 11660b57cec5SDimitry Andric /// update the `AssumptionCache` provided in `AC`. 11670b57cec5SDimitry Andric static BasicBlock *buildClonedLoopBlocks( 11680b57cec5SDimitry Andric Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB, 11690b57cec5SDimitry Andric ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB, 11700b57cec5SDimitry Andric BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB, 11710b57cec5SDimitry Andric const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc, 11720b57cec5SDimitry Andric ValueToValueMapTy &VMap, 11730b57cec5SDimitry Andric SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC, 1174bdd1243dSDimitry Andric DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, 1175bdd1243dSDimitry Andric ScalarEvolution *SE) { 11760b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> NewBlocks; 11770b57cec5SDimitry Andric NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size()); 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric // We will need to clone a bunch of blocks, wrap up the clone operation in 11800b57cec5SDimitry Andric // a helper. 11810b57cec5SDimitry Andric auto CloneBlock = [&](BasicBlock *OldBB) { 11820b57cec5SDimitry Andric // Clone the basic block and insert it before the new preheader. 11830b57cec5SDimitry Andric BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent()); 11840b57cec5SDimitry Andric NewBB->moveBefore(LoopPH); 11850b57cec5SDimitry Andric 11860b57cec5SDimitry Andric // Record this block and the mapping. 11870b57cec5SDimitry Andric NewBlocks.push_back(NewBB); 11880b57cec5SDimitry Andric VMap[OldBB] = NewBB; 11890b57cec5SDimitry Andric 11900b57cec5SDimitry Andric return NewBB; 11910b57cec5SDimitry Andric }; 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric // We skip cloning blocks when they have a dominating succ that is not the 11940b57cec5SDimitry Andric // succ we are cloning for. 11950b57cec5SDimitry Andric auto SkipBlock = [&](BasicBlock *BB) { 11960b57cec5SDimitry Andric auto It = DominatingSucc.find(BB); 11970b57cec5SDimitry Andric return It != DominatingSucc.end() && It->second != UnswitchedSuccBB; 11980b57cec5SDimitry Andric }; 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric // First, clone the preheader. 12010b57cec5SDimitry Andric auto *ClonedPH = CloneBlock(LoopPH); 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric // Then clone all the loop blocks, skipping the ones that aren't necessary. 12040b57cec5SDimitry Andric for (auto *LoopBB : L.blocks()) 12050b57cec5SDimitry Andric if (!SkipBlock(LoopBB)) 12060b57cec5SDimitry Andric CloneBlock(LoopBB); 12070b57cec5SDimitry Andric 12080b57cec5SDimitry Andric // Split all the loop exit edges so that when we clone the exit blocks, if 12090b57cec5SDimitry Andric // any of the exit blocks are *also* a preheader for some other loop, we 12100b57cec5SDimitry Andric // don't create multiple predecessors entering the loop header. 12110b57cec5SDimitry Andric for (auto *ExitBB : ExitBlocks) { 12120b57cec5SDimitry Andric if (SkipBlock(ExitBB)) 12130b57cec5SDimitry Andric continue; 12140b57cec5SDimitry Andric 12150b57cec5SDimitry Andric // When we are going to clone an exit, we don't need to clone all the 12160b57cec5SDimitry Andric // instructions in the exit block and we want to ensure we have an easy 12170b57cec5SDimitry Andric // place to merge the CFG, so split the exit first. This is always safe to 12180b57cec5SDimitry Andric // do because there cannot be any non-loop predecessors of a loop exit in 12190b57cec5SDimitry Andric // loop simplified form. 12205f757f3fSDimitry Andric auto *MergeBB = SplitBlock(ExitBB, ExitBB->begin(), &DT, &LI, MSSAU); 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric // Rearrange the names to make it easier to write test cases by having the 12230b57cec5SDimitry Andric // exit block carry the suffix rather than the merge block carrying the 12240b57cec5SDimitry Andric // suffix. 12250b57cec5SDimitry Andric MergeBB->takeName(ExitBB); 12260b57cec5SDimitry Andric ExitBB->setName(Twine(MergeBB->getName()) + ".split"); 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric // Now clone the original exit block. 12290b57cec5SDimitry Andric auto *ClonedExitBB = CloneBlock(ExitBB); 12300b57cec5SDimitry Andric assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 && 12310b57cec5SDimitry Andric "Exit block should have been split to have one successor!"); 12320b57cec5SDimitry Andric assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB && 12330b57cec5SDimitry Andric "Cloned exit block has the wrong successor!"); 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric // Remap any cloned instructions and create a merge phi node for them. 12360b57cec5SDimitry Andric for (auto ZippedInsts : llvm::zip_first( 12370b57cec5SDimitry Andric llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())), 12380b57cec5SDimitry Andric llvm::make_range(ClonedExitBB->begin(), 12390b57cec5SDimitry Andric std::prev(ClonedExitBB->end())))) { 12400b57cec5SDimitry Andric Instruction &I = std::get<0>(ZippedInsts); 12410b57cec5SDimitry Andric Instruction &ClonedI = std::get<1>(ZippedInsts); 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric // The only instructions in the exit block should be PHI nodes and 12440b57cec5SDimitry Andric // potentially a landing pad. 12450b57cec5SDimitry Andric assert( 12460b57cec5SDimitry Andric (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) && 12470b57cec5SDimitry Andric "Bad instruction in exit block!"); 12480b57cec5SDimitry Andric // We should have a value map between the instruction and its clone. 12490b57cec5SDimitry Andric assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!"); 12500b57cec5SDimitry Andric 1251bdd1243dSDimitry Andric // Forget SCEVs based on exit phis in case SCEV looked through the phi. 1252*6c05f3a7SDimitry Andric if (SE) 1253*6c05f3a7SDimitry Andric if (auto *PN = dyn_cast<PHINode>(&I)) 1254*6c05f3a7SDimitry Andric SE->forgetLcssaPhiWithNewPredecessor(&L, PN); 1255bdd1243dSDimitry Andric 12560fca6ea1SDimitry Andric BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt(); 12570fca6ea1SDimitry Andric 12580b57cec5SDimitry Andric auto *MergePN = 12595f757f3fSDimitry Andric PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi"); 12600fca6ea1SDimitry Andric MergePN->insertBefore(InsertPt); 12610fca6ea1SDimitry Andric MergePN->setDebugLoc(InsertPt->getDebugLoc()); 12620b57cec5SDimitry Andric I.replaceAllUsesWith(MergePN); 12630b57cec5SDimitry Andric MergePN->addIncoming(&I, ExitBB); 12640b57cec5SDimitry Andric MergePN->addIncoming(&ClonedI, ClonedExitBB); 12650b57cec5SDimitry Andric } 12660b57cec5SDimitry Andric } 12670b57cec5SDimitry Andric 12680b57cec5SDimitry Andric // Rewrite the instructions in the cloned blocks to refer to the instructions 12690b57cec5SDimitry Andric // in the cloned blocks. We have to do this as a second pass so that we have 12700b57cec5SDimitry Andric // everything available. Also, we have inserted new instructions which may 12710b57cec5SDimitry Andric // include assume intrinsics, so we update the assumption cache while 12720b57cec5SDimitry Andric // processing this. 12735f757f3fSDimitry Andric Module *M = ClonedPH->getParent()->getParent(); 12740b57cec5SDimitry Andric for (auto *ClonedBB : NewBlocks) 12750b57cec5SDimitry Andric for (Instruction &I : *ClonedBB) { 12760fca6ea1SDimitry Andric RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap, 12775f757f3fSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 12780b57cec5SDimitry Andric RemapInstruction(&I, VMap, 12790b57cec5SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1280fe6060f1SDimitry Andric if (auto *II = dyn_cast<AssumeInst>(&I)) 12810b57cec5SDimitry Andric AC.registerAssumption(II); 12820b57cec5SDimitry Andric } 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric // Update any PHI nodes in the cloned successors of the skipped blocks to not 12850b57cec5SDimitry Andric // have spurious incoming values. 12860b57cec5SDimitry Andric for (auto *LoopBB : L.blocks()) 12870b57cec5SDimitry Andric if (SkipBlock(LoopBB)) 12880b57cec5SDimitry Andric for (auto *SuccBB : successors(LoopBB)) 12890b57cec5SDimitry Andric if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB))) 12900b57cec5SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis()) 12910b57cec5SDimitry Andric PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false); 12920b57cec5SDimitry Andric 12930b57cec5SDimitry Andric // Remove the cloned parent as a predecessor of any successor we ended up 12940b57cec5SDimitry Andric // cloning other than the unswitched one. 12950b57cec5SDimitry Andric auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB)); 12960b57cec5SDimitry Andric for (auto *SuccBB : successors(ParentBB)) { 12970b57cec5SDimitry Andric if (SuccBB == UnswitchedSuccBB) 12980b57cec5SDimitry Andric continue; 12990b57cec5SDimitry Andric 13000b57cec5SDimitry Andric auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)); 13010b57cec5SDimitry Andric if (!ClonedSuccBB) 13020b57cec5SDimitry Andric continue; 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric ClonedSuccBB->removePredecessor(ClonedParentBB, 13050b57cec5SDimitry Andric /*KeepOneInputPHIs*/ true); 13060b57cec5SDimitry Andric } 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric // Replace the cloned branch with an unconditional branch to the cloned 13090b57cec5SDimitry Andric // unswitched successor. 13100b57cec5SDimitry Andric auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB)); 1311e8d8bef9SDimitry Andric Instruction *ClonedTerminator = ClonedParentBB->getTerminator(); 1312e8d8bef9SDimitry Andric // Trivial Simplification. If Terminator is a conditional branch and 1313e8d8bef9SDimitry Andric // condition becomes dead - erase it. 1314e8d8bef9SDimitry Andric Value *ClonedConditionToErase = nullptr; 1315e8d8bef9SDimitry Andric if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator)) 1316e8d8bef9SDimitry Andric ClonedConditionToErase = BI->getCondition(); 1317e8d8bef9SDimitry Andric else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator)) 1318e8d8bef9SDimitry Andric ClonedConditionToErase = SI->getCondition(); 1319e8d8bef9SDimitry Andric 13200fca6ea1SDimitry Andric Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB); 13210fca6ea1SDimitry Andric BI->setDebugLoc(ClonedTerminator->getDebugLoc()); 1322e8d8bef9SDimitry Andric ClonedTerminator->eraseFromParent(); 13230b57cec5SDimitry Andric 1324e8d8bef9SDimitry Andric if (ClonedConditionToErase) 1325e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr, 1326e8d8bef9SDimitry Andric MSSAU); 1327e8d8bef9SDimitry Andric 13280b57cec5SDimitry Andric // If there are duplicate entries in the PHI nodes because of multiple edges 13290b57cec5SDimitry Andric // to the unswitched successor, we need to nuke all but one as we replaced it 13300b57cec5SDimitry Andric // with a direct branch. 13310b57cec5SDimitry Andric for (PHINode &PN : ClonedSuccBB->phis()) { 13320b57cec5SDimitry Andric bool Found = false; 13330b57cec5SDimitry Andric // Loop over the incoming operands backwards so we can easily delete as we 13340b57cec5SDimitry Andric // go without invalidating the index. 13350b57cec5SDimitry Andric for (int i = PN.getNumOperands() - 1; i >= 0; --i) { 13360b57cec5SDimitry Andric if (PN.getIncomingBlock(i) != ClonedParentBB) 13370b57cec5SDimitry Andric continue; 13380b57cec5SDimitry Andric if (!Found) { 13390b57cec5SDimitry Andric Found = true; 13400b57cec5SDimitry Andric continue; 13410b57cec5SDimitry Andric } 13420b57cec5SDimitry Andric PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false); 13430b57cec5SDimitry Andric } 13440b57cec5SDimitry Andric } 13450b57cec5SDimitry Andric 13460b57cec5SDimitry Andric // Record the domtree updates for the new blocks. 13470b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> SuccSet; 13480b57cec5SDimitry Andric for (auto *ClonedBB : NewBlocks) { 13490b57cec5SDimitry Andric for (auto *SuccBB : successors(ClonedBB)) 13500b57cec5SDimitry Andric if (SuccSet.insert(SuccBB).second) 13510b57cec5SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB}); 13520b57cec5SDimitry Andric SuccSet.clear(); 13530b57cec5SDimitry Andric } 13540b57cec5SDimitry Andric 13550b57cec5SDimitry Andric return ClonedPH; 13560b57cec5SDimitry Andric } 13570b57cec5SDimitry Andric 13580b57cec5SDimitry Andric /// Recursively clone the specified loop and all of its children. 13590b57cec5SDimitry Andric /// 13600b57cec5SDimitry Andric /// The target parent loop for the clone should be provided, or can be null if 13610b57cec5SDimitry Andric /// the clone is a top-level loop. While cloning, all the blocks are mapped 13620b57cec5SDimitry Andric /// with the provided value map. The entire original loop must be present in 13630b57cec5SDimitry Andric /// the value map. The cloned loop is returned. 13640b57cec5SDimitry Andric static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL, 13650b57cec5SDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI) { 13660b57cec5SDimitry Andric auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) { 13670b57cec5SDimitry Andric assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!"); 13680b57cec5SDimitry Andric ClonedL.reserveBlocks(OrigL.getNumBlocks()); 13690b57cec5SDimitry Andric for (auto *BB : OrigL.blocks()) { 13700b57cec5SDimitry Andric auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB)); 13710b57cec5SDimitry Andric ClonedL.addBlockEntry(ClonedBB); 13720b57cec5SDimitry Andric if (LI.getLoopFor(BB) == &OrigL) 13730b57cec5SDimitry Andric LI.changeLoopFor(ClonedBB, &ClonedL); 13740b57cec5SDimitry Andric } 13750b57cec5SDimitry Andric }; 13760b57cec5SDimitry Andric 13770b57cec5SDimitry Andric // We specially handle the first loop because it may get cloned into 13780b57cec5SDimitry Andric // a different parent and because we most commonly are cloning leaf loops. 13790b57cec5SDimitry Andric Loop *ClonedRootL = LI.AllocateLoop(); 13800b57cec5SDimitry Andric if (RootParentL) 13810b57cec5SDimitry Andric RootParentL->addChildLoop(ClonedRootL); 13820b57cec5SDimitry Andric else 13830b57cec5SDimitry Andric LI.addTopLevelLoop(ClonedRootL); 13840b57cec5SDimitry Andric AddClonedBlocksToLoop(OrigRootL, *ClonedRootL); 13850b57cec5SDimitry Andric 1386e8d8bef9SDimitry Andric if (OrigRootL.isInnermost()) 13870b57cec5SDimitry Andric return ClonedRootL; 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric // If we have a nest, we can quickly clone the entire loop nest using an 13900b57cec5SDimitry Andric // iterative approach because it is a tree. We keep the cloned parent in the 13910b57cec5SDimitry Andric // data structure to avoid repeatedly querying through a map to find it. 13920b57cec5SDimitry Andric SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone; 13930b57cec5SDimitry Andric // Build up the loops to clone in reverse order as we'll clone them from the 13940b57cec5SDimitry Andric // back. 13950b57cec5SDimitry Andric for (Loop *ChildL : llvm::reverse(OrigRootL)) 13960b57cec5SDimitry Andric LoopsToClone.push_back({ClonedRootL, ChildL}); 13970b57cec5SDimitry Andric do { 13980b57cec5SDimitry Andric Loop *ClonedParentL, *L; 13990b57cec5SDimitry Andric std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val(); 14000b57cec5SDimitry Andric Loop *ClonedL = LI.AllocateLoop(); 14010b57cec5SDimitry Andric ClonedParentL->addChildLoop(ClonedL); 14020b57cec5SDimitry Andric AddClonedBlocksToLoop(*L, *ClonedL); 14030b57cec5SDimitry Andric for (Loop *ChildL : llvm::reverse(*L)) 14040b57cec5SDimitry Andric LoopsToClone.push_back({ClonedL, ChildL}); 14050b57cec5SDimitry Andric } while (!LoopsToClone.empty()); 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric return ClonedRootL; 14080b57cec5SDimitry Andric } 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andric /// Build the cloned loops of an original loop from unswitching. 14110b57cec5SDimitry Andric /// 14120b57cec5SDimitry Andric /// Because unswitching simplifies the CFG of the loop, this isn't a trivial 14130b57cec5SDimitry Andric /// operation. We need to re-verify that there even is a loop (as the backedge 14140b57cec5SDimitry Andric /// may not have been cloned), and even if there are remaining backedges the 14150b57cec5SDimitry Andric /// backedge set may be different. However, we know that each child loop is 14160b57cec5SDimitry Andric /// undisturbed, we only need to find where to place each child loop within 14170b57cec5SDimitry Andric /// either any parent loop or within a cloned version of the original loop. 14180b57cec5SDimitry Andric /// 14190b57cec5SDimitry Andric /// Because child loops may end up cloned outside of any cloned version of the 14200b57cec5SDimitry Andric /// original loop, multiple cloned sibling loops may be created. All of them 14210b57cec5SDimitry Andric /// are returned so that the newly introduced loop nest roots can be 14220b57cec5SDimitry Andric /// identified. 14230b57cec5SDimitry Andric static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks, 14240b57cec5SDimitry Andric const ValueToValueMapTy &VMap, LoopInfo &LI, 14250b57cec5SDimitry Andric SmallVectorImpl<Loop *> &NonChildClonedLoops) { 14260b57cec5SDimitry Andric Loop *ClonedL = nullptr; 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andric auto *OrigPH = OrigL.getLoopPreheader(); 14290b57cec5SDimitry Andric auto *OrigHeader = OrigL.getHeader(); 14300b57cec5SDimitry Andric 14310b57cec5SDimitry Andric auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH)); 14320b57cec5SDimitry Andric auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader)); 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric // We need to know the loops of the cloned exit blocks to even compute the 14350b57cec5SDimitry Andric // accurate parent loop. If we only clone exits to some parent of the 14360b57cec5SDimitry Andric // original parent, we want to clone into that outer loop. We also keep track 14370b57cec5SDimitry Andric // of the loops that our cloned exit blocks participate in. 14380b57cec5SDimitry Andric Loop *ParentL = nullptr; 14390b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ClonedExitsInLoops; 14400b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap; 14410b57cec5SDimitry Andric ClonedExitsInLoops.reserve(ExitBlocks.size()); 14420b57cec5SDimitry Andric for (auto *ExitBB : ExitBlocks) 14430b57cec5SDimitry Andric if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB))) 14440b57cec5SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) { 14450b57cec5SDimitry Andric ExitLoopMap[ClonedExitBB] = ExitL; 14460b57cec5SDimitry Andric ClonedExitsInLoops.push_back(ClonedExitBB); 14470b57cec5SDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL))) 14480b57cec5SDimitry Andric ParentL = ExitL; 14490b57cec5SDimitry Andric } 14500b57cec5SDimitry Andric assert((!ParentL || ParentL == OrigL.getParentLoop() || 14510b57cec5SDimitry Andric ParentL->contains(OrigL.getParentLoop())) && 14520b57cec5SDimitry Andric "The computed parent loop should always contain (or be) the parent of " 14530b57cec5SDimitry Andric "the original loop."); 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric // We build the set of blocks dominated by the cloned header from the set of 14560b57cec5SDimitry Andric // cloned blocks out of the original loop. While not all of these will 14570b57cec5SDimitry Andric // necessarily be in the cloned loop, it is enough to establish that they 14580b57cec5SDimitry Andric // aren't in unreachable cycles, etc. 14590b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks; 14600b57cec5SDimitry Andric for (auto *BB : OrigL.blocks()) 14610b57cec5SDimitry Andric if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB))) 14620b57cec5SDimitry Andric ClonedLoopBlocks.insert(ClonedBB); 14630b57cec5SDimitry Andric 14640b57cec5SDimitry Andric // Rebuild the set of blocks that will end up in the cloned loop. We may have 14650b57cec5SDimitry Andric // skipped cloning some region of this loop which can in turn skip some of 14660b57cec5SDimitry Andric // the backedges so we have to rebuild the blocks in the loop based on the 14670b57cec5SDimitry Andric // backedges that remain after cloning. 14680b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> Worklist; 14690b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop; 14700b57cec5SDimitry Andric for (auto *Pred : predecessors(ClonedHeader)) { 14710b57cec5SDimitry Andric // The only possible non-loop header predecessor is the preheader because 14720b57cec5SDimitry Andric // we know we cloned the loop in simplified form. 14730b57cec5SDimitry Andric if (Pred == ClonedPH) 14740b57cec5SDimitry Andric continue; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor 14770b57cec5SDimitry Andric // should be the preheader. 14780b57cec5SDimitry Andric assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop " 14790b57cec5SDimitry Andric "header other than the preheader " 14800b57cec5SDimitry Andric "that is not part of the loop!"); 14810b57cec5SDimitry Andric 14820b57cec5SDimitry Andric // Insert this block into the loop set and on the first visit (and if it 14830b57cec5SDimitry Andric // isn't the header we're currently walking) put it into the worklist to 14840b57cec5SDimitry Andric // recurse through. 14850b57cec5SDimitry Andric if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader) 14860b57cec5SDimitry Andric Worklist.push_back(Pred); 14870b57cec5SDimitry Andric } 14880b57cec5SDimitry Andric 14890b57cec5SDimitry Andric // If we had any backedges then there *is* a cloned loop. Put the header into 14900b57cec5SDimitry Andric // the loop set and then walk the worklist backwards to find all the blocks 14910b57cec5SDimitry Andric // that remain within the loop after cloning. 14920b57cec5SDimitry Andric if (!BlocksInClonedLoop.empty()) { 14930b57cec5SDimitry Andric BlocksInClonedLoop.insert(ClonedHeader); 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric while (!Worklist.empty()) { 14960b57cec5SDimitry Andric BasicBlock *BB = Worklist.pop_back_val(); 14970b57cec5SDimitry Andric assert(BlocksInClonedLoop.count(BB) && 14980b57cec5SDimitry Andric "Didn't put block into the loop set!"); 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric // Insert any predecessors that are in the possible set into the cloned 15010b57cec5SDimitry Andric // set, and if the insert is successful, add them to the worklist. Note 15020b57cec5SDimitry Andric // that we filter on the blocks that are definitely reachable via the 15030b57cec5SDimitry Andric // backedge to the loop header so we may prune out dead code within the 15040b57cec5SDimitry Andric // cloned loop. 15050b57cec5SDimitry Andric for (auto *Pred : predecessors(BB)) 15060b57cec5SDimitry Andric if (ClonedLoopBlocks.count(Pred) && 15070b57cec5SDimitry Andric BlocksInClonedLoop.insert(Pred).second) 15080b57cec5SDimitry Andric Worklist.push_back(Pred); 15090b57cec5SDimitry Andric } 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric ClonedL = LI.AllocateLoop(); 15120b57cec5SDimitry Andric if (ParentL) { 15130b57cec5SDimitry Andric ParentL->addBasicBlockToLoop(ClonedPH, LI); 15140b57cec5SDimitry Andric ParentL->addChildLoop(ClonedL); 15150b57cec5SDimitry Andric } else { 15160b57cec5SDimitry Andric LI.addTopLevelLoop(ClonedL); 15170b57cec5SDimitry Andric } 15180b57cec5SDimitry Andric NonChildClonedLoops.push_back(ClonedL); 15190b57cec5SDimitry Andric 15200b57cec5SDimitry Andric ClonedL->reserveBlocks(BlocksInClonedLoop.size()); 15210b57cec5SDimitry Andric // We don't want to just add the cloned loop blocks based on how we 15220b57cec5SDimitry Andric // discovered them. The original order of blocks was carefully built in 15230b57cec5SDimitry Andric // a way that doesn't rely on predecessor ordering. Rather than re-invent 15240b57cec5SDimitry Andric // that logic, we just re-walk the original blocks (and those of the child 15250b57cec5SDimitry Andric // loops) and filter them as we add them into the cloned loop. 15260b57cec5SDimitry Andric for (auto *BB : OrigL.blocks()) { 15270b57cec5SDimitry Andric auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)); 15280b57cec5SDimitry Andric if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB)) 15290b57cec5SDimitry Andric continue; 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andric // Directly add the blocks that are only in this loop. 15320b57cec5SDimitry Andric if (LI.getLoopFor(BB) == &OrigL) { 15330b57cec5SDimitry Andric ClonedL->addBasicBlockToLoop(ClonedBB, LI); 15340b57cec5SDimitry Andric continue; 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 15370b57cec5SDimitry Andric // We want to manually add it to this loop and parents. 15380b57cec5SDimitry Andric // Registering it with LoopInfo will happen when we clone the top 15390b57cec5SDimitry Andric // loop for this block. 15400b57cec5SDimitry Andric for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop()) 15410b57cec5SDimitry Andric PL->addBlockEntry(ClonedBB); 15420b57cec5SDimitry Andric } 15430b57cec5SDimitry Andric 15440b57cec5SDimitry Andric // Now add each child loop whose header remains within the cloned loop. All 15450b57cec5SDimitry Andric // of the blocks within the loop must satisfy the same constraints as the 15460b57cec5SDimitry Andric // header so once we pass the header checks we can just clone the entire 15470b57cec5SDimitry Andric // child loop nest. 15480b57cec5SDimitry Andric for (Loop *ChildL : OrigL) { 15490b57cec5SDimitry Andric auto *ClonedChildHeader = 15500b57cec5SDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader())); 15510b57cec5SDimitry Andric if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader)) 15520b57cec5SDimitry Andric continue; 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric #ifndef NDEBUG 15550b57cec5SDimitry Andric // We should never have a cloned child loop header but fail to have 15560b57cec5SDimitry Andric // all of the blocks for that child loop. 15570b57cec5SDimitry Andric for (auto *ChildLoopBB : ChildL->blocks()) 15580b57cec5SDimitry Andric assert(BlocksInClonedLoop.count( 15590b57cec5SDimitry Andric cast<BasicBlock>(VMap.lookup(ChildLoopBB))) && 15600b57cec5SDimitry Andric "Child cloned loop has a header within the cloned outer " 15610b57cec5SDimitry Andric "loop but not all of its blocks!"); 15620b57cec5SDimitry Andric #endif 15630b57cec5SDimitry Andric 15640b57cec5SDimitry Andric cloneLoopNest(*ChildL, ClonedL, VMap, LI); 15650b57cec5SDimitry Andric } 15660b57cec5SDimitry Andric } 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andric // Now that we've handled all the components of the original loop that were 15690b57cec5SDimitry Andric // cloned into a new loop, we still need to handle anything from the original 15700b57cec5SDimitry Andric // loop that wasn't in a cloned loop. 15710b57cec5SDimitry Andric 15720b57cec5SDimitry Andric // Figure out what blocks are left to place within any loop nest containing 15730b57cec5SDimitry Andric // the unswitched loop. If we never formed a loop, the cloned PH is one of 15740b57cec5SDimitry Andric // them. 15750b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet; 15760b57cec5SDimitry Andric if (BlocksInClonedLoop.empty()) 15770b57cec5SDimitry Andric UnloopedBlockSet.insert(ClonedPH); 15780b57cec5SDimitry Andric for (auto *ClonedBB : ClonedLoopBlocks) 15790b57cec5SDimitry Andric if (!BlocksInClonedLoop.count(ClonedBB)) 15800b57cec5SDimitry Andric UnloopedBlockSet.insert(ClonedBB); 15810b57cec5SDimitry Andric 15820b57cec5SDimitry Andric // Copy the cloned exits and sort them in ascending loop depth, we'll work 15830b57cec5SDimitry Andric // backwards across these to process them inside out. The order shouldn't 15840b57cec5SDimitry Andric // matter as we're just trying to build up the map from inside-out; we use 15850b57cec5SDimitry Andric // the map in a more stably ordered way below. 15860b57cec5SDimitry Andric auto OrderedClonedExitsInLoops = ClonedExitsInLoops; 15870b57cec5SDimitry Andric llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) { 15880b57cec5SDimitry Andric return ExitLoopMap.lookup(LHS)->getLoopDepth() < 15890b57cec5SDimitry Andric ExitLoopMap.lookup(RHS)->getLoopDepth(); 15900b57cec5SDimitry Andric }); 15910b57cec5SDimitry Andric 15920b57cec5SDimitry Andric // Populate the existing ExitLoopMap with everything reachable from each 15930b57cec5SDimitry Andric // exit, starting from the inner most exit. 15940b57cec5SDimitry Andric while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) { 15950b57cec5SDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!"); 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val(); 15980b57cec5SDimitry Andric Loop *ExitL = ExitLoopMap.lookup(ExitBB); 15990b57cec5SDimitry Andric 16000b57cec5SDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable 16010b57cec5SDimitry Andric // and in the unlooped set to this exit block's loop. 16020b57cec5SDimitry Andric Worklist.push_back(ExitBB); 16030b57cec5SDimitry Andric do { 16040b57cec5SDimitry Andric BasicBlock *BB = Worklist.pop_back_val(); 16050b57cec5SDimitry Andric // We can stop recursing at the cloned preheader (if we get there). 16060b57cec5SDimitry Andric if (BB == ClonedPH) 16070b57cec5SDimitry Andric continue; 16080b57cec5SDimitry Andric 16090b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) { 16100b57cec5SDimitry Andric // If this pred has already been moved to our set or is part of some 16110b57cec5SDimitry Andric // (inner) loop, no update needed. 16120b57cec5SDimitry Andric if (!UnloopedBlockSet.erase(PredBB)) { 16130b57cec5SDimitry Andric assert( 16140b57cec5SDimitry Andric (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) && 16150b57cec5SDimitry Andric "Predecessor not mapped to a loop!"); 16160b57cec5SDimitry Andric continue; 16170b57cec5SDimitry Andric } 16180b57cec5SDimitry Andric 16190b57cec5SDimitry Andric // We just insert into the loop set here. We'll add these blocks to the 16200b57cec5SDimitry Andric // exit loop after we build up the set in an order that doesn't rely on 16210b57cec5SDimitry Andric // predecessor order (which in turn relies on use list order). 16220b57cec5SDimitry Andric bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second; 16230b57cec5SDimitry Andric (void)Inserted; 16240b57cec5SDimitry Andric assert(Inserted && "Should only visit an unlooped block once!"); 16250b57cec5SDimitry Andric 16260b57cec5SDimitry Andric // And recurse through to its predecessors. 16270b57cec5SDimitry Andric Worklist.push_back(PredBB); 16280b57cec5SDimitry Andric } 16290b57cec5SDimitry Andric } while (!Worklist.empty()); 16300b57cec5SDimitry Andric } 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric // Now that the ExitLoopMap gives as mapping for all the non-looping cloned 16330b57cec5SDimitry Andric // blocks to their outer loops, walk the cloned blocks and the cloned exits 16340b57cec5SDimitry Andric // in their original order adding them to the correct loop. 16350b57cec5SDimitry Andric 16360b57cec5SDimitry Andric // We need a stable insertion order. We use the order of the original loop 16370b57cec5SDimitry Andric // order and map into the correct parent loop. 16380b57cec5SDimitry Andric for (auto *BB : llvm::concat<BasicBlock *const>( 1639bdd1243dSDimitry Andric ArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops)) 16400b57cec5SDimitry Andric if (Loop *OuterL = ExitLoopMap.lookup(BB)) 16410b57cec5SDimitry Andric OuterL->addBasicBlockToLoop(BB, LI); 16420b57cec5SDimitry Andric 16430b57cec5SDimitry Andric #ifndef NDEBUG 16440b57cec5SDimitry Andric for (auto &BBAndL : ExitLoopMap) { 16450b57cec5SDimitry Andric auto *BB = BBAndL.first; 16460b57cec5SDimitry Andric auto *OuterL = BBAndL.second; 16470b57cec5SDimitry Andric assert(LI.getLoopFor(BB) == OuterL && 16480b57cec5SDimitry Andric "Failed to put all blocks into outer loops!"); 16490b57cec5SDimitry Andric } 16500b57cec5SDimitry Andric #endif 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric // Now that all the blocks are placed into the correct containing loop in the 16530b57cec5SDimitry Andric // absence of child loops, find all the potentially cloned child loops and 16540b57cec5SDimitry Andric // clone them into whatever outer loop we placed their header into. 16550b57cec5SDimitry Andric for (Loop *ChildL : OrigL) { 16560b57cec5SDimitry Andric auto *ClonedChildHeader = 16570b57cec5SDimitry Andric cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader())); 16580b57cec5SDimitry Andric if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader)) 16590b57cec5SDimitry Andric continue; 16600b57cec5SDimitry Andric 16610b57cec5SDimitry Andric #ifndef NDEBUG 16620b57cec5SDimitry Andric for (auto *ChildLoopBB : ChildL->blocks()) 16630b57cec5SDimitry Andric assert(VMap.count(ChildLoopBB) && 16640b57cec5SDimitry Andric "Cloned a child loop header but not all of that loops blocks!"); 16650b57cec5SDimitry Andric #endif 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andric NonChildClonedLoops.push_back(cloneLoopNest( 16680b57cec5SDimitry Andric *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI)); 16690b57cec5SDimitry Andric } 16700b57cec5SDimitry Andric } 16710b57cec5SDimitry Andric 16720b57cec5SDimitry Andric static void 16730b57cec5SDimitry Andric deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, 16740b57cec5SDimitry Andric ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, 16750b57cec5SDimitry Andric DominatorTree &DT, MemorySSAUpdater *MSSAU) { 16760b57cec5SDimitry Andric // Find all the dead clones, and remove them from their successors. 16770b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> DeadBlocks; 16780b57cec5SDimitry Andric for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks)) 1679bdd1243dSDimitry Andric for (const auto &VMap : VMaps) 16800b57cec5SDimitry Andric if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB))) 16810b57cec5SDimitry Andric if (!DT.isReachableFromEntry(ClonedBB)) { 16820b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(ClonedBB)) 16830b57cec5SDimitry Andric SuccBB->removePredecessor(ClonedBB); 16840b57cec5SDimitry Andric DeadBlocks.push_back(ClonedBB); 16850b57cec5SDimitry Andric } 16860b57cec5SDimitry Andric 16870b57cec5SDimitry Andric // Remove all MemorySSA in the dead blocks 16880b57cec5SDimitry Andric if (MSSAU) { 16890b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(), 16900b57cec5SDimitry Andric DeadBlocks.end()); 16910b57cec5SDimitry Andric MSSAU->removeBlocks(DeadBlockSet); 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric 16940b57cec5SDimitry Andric // Drop any remaining references to break cycles. 16950b57cec5SDimitry Andric for (BasicBlock *BB : DeadBlocks) 16960b57cec5SDimitry Andric BB->dropAllReferences(); 16970b57cec5SDimitry Andric // Erase them from the IR. 16980b57cec5SDimitry Andric for (BasicBlock *BB : DeadBlocks) 16990b57cec5SDimitry Andric BB->eraseFromParent(); 17000b57cec5SDimitry Andric } 17010b57cec5SDimitry Andric 17025f757f3fSDimitry Andric static void deleteDeadBlocksFromLoop(Loop &L, 17030b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks, 17040b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI, 17058c6f6c0cSDimitry Andric MemorySSAUpdater *MSSAU, 1706bdd1243dSDimitry Andric ScalarEvolution *SE, 17075f757f3fSDimitry Andric LPMUpdater &LoopUpdater) { 17080b57cec5SDimitry Andric // Find all the dead blocks tied to this loop, and remove them from their 17090b57cec5SDimitry Andric // successors. 17100b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 8> DeadBlockSet; 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric // Start with loop/exit blocks and get a transitive closure of reachable dead 17130b57cec5SDimitry Andric // blocks. 17140b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(), 17150b57cec5SDimitry Andric ExitBlocks.end()); 17160b57cec5SDimitry Andric DeathCandidates.append(L.blocks().begin(), L.blocks().end()); 17170b57cec5SDimitry Andric while (!DeathCandidates.empty()) { 17180b57cec5SDimitry Andric auto *BB = DeathCandidates.pop_back_val(); 17190b57cec5SDimitry Andric if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) { 17200b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(BB)) { 17210b57cec5SDimitry Andric SuccBB->removePredecessor(BB); 17220b57cec5SDimitry Andric DeathCandidates.push_back(SuccBB); 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric DeadBlockSet.insert(BB); 17250b57cec5SDimitry Andric } 17260b57cec5SDimitry Andric } 17270b57cec5SDimitry Andric 17280b57cec5SDimitry Andric // Remove all MemorySSA in the dead blocks 17290b57cec5SDimitry Andric if (MSSAU) 17300b57cec5SDimitry Andric MSSAU->removeBlocks(DeadBlockSet); 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric // Filter out the dead blocks from the exit blocks list so that it can be 17330b57cec5SDimitry Andric // used in the caller. 17340b57cec5SDimitry Andric llvm::erase_if(ExitBlocks, 17350b57cec5SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); 17360b57cec5SDimitry Andric 17370b57cec5SDimitry Andric // Walk from this loop up through its parents removing all of the dead blocks. 17380b57cec5SDimitry Andric for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) { 17390b57cec5SDimitry Andric for (auto *BB : DeadBlockSet) 17400b57cec5SDimitry Andric ParentL->getBlocksSet().erase(BB); 17410b57cec5SDimitry Andric llvm::erase_if(ParentL->getBlocksVector(), 17420b57cec5SDimitry Andric [&](BasicBlock *BB) { return DeadBlockSet.count(BB); }); 17430b57cec5SDimitry Andric } 17440b57cec5SDimitry Andric 17450b57cec5SDimitry Andric // Now delete the dead child loops. This raw delete will clear them 17460b57cec5SDimitry Andric // recursively. 17470b57cec5SDimitry Andric llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) { 17480b57cec5SDimitry Andric if (!DeadBlockSet.count(ChildL->getHeader())) 17490b57cec5SDimitry Andric return false; 17500b57cec5SDimitry Andric 17510b57cec5SDimitry Andric assert(llvm::all_of(ChildL->blocks(), 17520b57cec5SDimitry Andric [&](BasicBlock *ChildBB) { 17530b57cec5SDimitry Andric return DeadBlockSet.count(ChildBB); 17540b57cec5SDimitry Andric }) && 17550b57cec5SDimitry Andric "If the child loop header is dead all blocks in the child loop must " 17560b57cec5SDimitry Andric "be dead as well!"); 17575f757f3fSDimitry Andric LoopUpdater.markLoopAsDeleted(*ChildL, ChildL->getName()); 1758bdd1243dSDimitry Andric if (SE) 1759bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 17600b57cec5SDimitry Andric LI.destroy(ChildL); 17610b57cec5SDimitry Andric return true; 17620b57cec5SDimitry Andric }); 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric // Remove the loop mappings for the dead blocks and drop all the references 17650b57cec5SDimitry Andric // from these blocks to others to handle cyclic references as we start 17660b57cec5SDimitry Andric // deleting the blocks themselves. 17670b57cec5SDimitry Andric for (auto *BB : DeadBlockSet) { 17680b57cec5SDimitry Andric // Check that the dominator tree has already been updated. 17690b57cec5SDimitry Andric assert(!DT.getNode(BB) && "Should already have cleared domtree!"); 17700b57cec5SDimitry Andric LI.changeLoopFor(BB, nullptr); 17715ffd83dbSDimitry Andric // Drop all uses of the instructions to make sure we won't have dangling 17725ffd83dbSDimitry Andric // uses in other blocks. 17735ffd83dbSDimitry Andric for (auto &I : *BB) 17745ffd83dbSDimitry Andric if (!I.use_empty()) 177581ad6265SDimitry Andric I.replaceAllUsesWith(PoisonValue::get(I.getType())); 17760b57cec5SDimitry Andric BB->dropAllReferences(); 17770b57cec5SDimitry Andric } 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric // Actually delete the blocks now that they've been fully unhooked from the 17800b57cec5SDimitry Andric // IR. 17810b57cec5SDimitry Andric for (auto *BB : DeadBlockSet) 17820b57cec5SDimitry Andric BB->eraseFromParent(); 17830b57cec5SDimitry Andric } 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric /// Recompute the set of blocks in a loop after unswitching. 17860b57cec5SDimitry Andric /// 17870b57cec5SDimitry Andric /// This walks from the original headers predecessors to rebuild the loop. We 17880b57cec5SDimitry Andric /// take advantage of the fact that new blocks can't have been added, and so we 17890b57cec5SDimitry Andric /// filter by the original loop's blocks. This also handles potentially 17900b57cec5SDimitry Andric /// unreachable code that we don't want to explore but might be found examining 17910b57cec5SDimitry Andric /// the predecessors of the header. 17920b57cec5SDimitry Andric /// 17930b57cec5SDimitry Andric /// If the original loop is no longer a loop, this will return an empty set. If 17940b57cec5SDimitry Andric /// it remains a loop, all the blocks within it will be added to the set 17950b57cec5SDimitry Andric /// (including those blocks in inner loops). 17960b57cec5SDimitry Andric static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L, 17970b57cec5SDimitry Andric LoopInfo &LI) { 17980b57cec5SDimitry Andric SmallPtrSet<const BasicBlock *, 16> LoopBlockSet; 17990b57cec5SDimitry Andric 18000b57cec5SDimitry Andric auto *PH = L.getLoopPreheader(); 18010b57cec5SDimitry Andric auto *Header = L.getHeader(); 18020b57cec5SDimitry Andric 18030b57cec5SDimitry Andric // A worklist to use while walking backwards from the header. 18040b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> Worklist; 18050b57cec5SDimitry Andric 18060b57cec5SDimitry Andric // First walk the predecessors of the header to find the backedges. This will 18070b57cec5SDimitry Andric // form the basis of our walk. 18080b57cec5SDimitry Andric for (auto *Pred : predecessors(Header)) { 18090b57cec5SDimitry Andric // Skip the preheader. 18100b57cec5SDimitry Andric if (Pred == PH) 18110b57cec5SDimitry Andric continue; 18120b57cec5SDimitry Andric 18130b57cec5SDimitry Andric // Because the loop was in simplified form, the only non-loop predecessor 18140b57cec5SDimitry Andric // is the preheader. 18150b57cec5SDimitry Andric assert(L.contains(Pred) && "Found a predecessor of the loop header other " 18160b57cec5SDimitry Andric "than the preheader that is not part of the " 18170b57cec5SDimitry Andric "loop!"); 18180b57cec5SDimitry Andric 18190b57cec5SDimitry Andric // Insert this block into the loop set and on the first visit and, if it 18200b57cec5SDimitry Andric // isn't the header we're currently walking, put it into the worklist to 18210b57cec5SDimitry Andric // recurse through. 18220b57cec5SDimitry Andric if (LoopBlockSet.insert(Pred).second && Pred != Header) 18230b57cec5SDimitry Andric Worklist.push_back(Pred); 18240b57cec5SDimitry Andric } 18250b57cec5SDimitry Andric 18260b57cec5SDimitry Andric // If no backedges were found, we're done. 18270b57cec5SDimitry Andric if (LoopBlockSet.empty()) 18280b57cec5SDimitry Andric return LoopBlockSet; 18290b57cec5SDimitry Andric 18300b57cec5SDimitry Andric // We found backedges, recurse through them to identify the loop blocks. 18310b57cec5SDimitry Andric while (!Worklist.empty()) { 18320b57cec5SDimitry Andric BasicBlock *BB = Worklist.pop_back_val(); 18330b57cec5SDimitry Andric assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!"); 18340b57cec5SDimitry Andric 18350b57cec5SDimitry Andric // No need to walk past the header. 18360b57cec5SDimitry Andric if (BB == Header) 18370b57cec5SDimitry Andric continue; 18380b57cec5SDimitry Andric 18390b57cec5SDimitry Andric // Because we know the inner loop structure remains valid we can use the 18400b57cec5SDimitry Andric // loop structure to jump immediately across the entire nested loop. 18410b57cec5SDimitry Andric // Further, because it is in loop simplified form, we can directly jump 18420b57cec5SDimitry Andric // to its preheader afterward. 18430b57cec5SDimitry Andric if (Loop *InnerL = LI.getLoopFor(BB)) 18440b57cec5SDimitry Andric if (InnerL != &L) { 18450b57cec5SDimitry Andric assert(L.contains(InnerL) && 18460b57cec5SDimitry Andric "Should not reach a loop *outside* this loop!"); 18470b57cec5SDimitry Andric // The preheader is the only possible predecessor of the loop so 18480b57cec5SDimitry Andric // insert it into the set and check whether it was already handled. 18490b57cec5SDimitry Andric auto *InnerPH = InnerL->getLoopPreheader(); 18500b57cec5SDimitry Andric assert(L.contains(InnerPH) && "Cannot contain an inner loop block " 18510b57cec5SDimitry Andric "but not contain the inner loop " 18520b57cec5SDimitry Andric "preheader!"); 18530b57cec5SDimitry Andric if (!LoopBlockSet.insert(InnerPH).second) 18540b57cec5SDimitry Andric // The only way to reach the preheader is through the loop body 18550b57cec5SDimitry Andric // itself so if it has been visited the loop is already handled. 18560b57cec5SDimitry Andric continue; 18570b57cec5SDimitry Andric 18580b57cec5SDimitry Andric // Insert all of the blocks (other than those already present) into 18590b57cec5SDimitry Andric // the loop set. We expect at least the block that led us to find the 18600b57cec5SDimitry Andric // inner loop to be in the block set, but we may also have other loop 18610b57cec5SDimitry Andric // blocks if they were already enqueued as predecessors of some other 18620b57cec5SDimitry Andric // outer loop block. 18630b57cec5SDimitry Andric for (auto *InnerBB : InnerL->blocks()) { 18640b57cec5SDimitry Andric if (InnerBB == BB) { 18650b57cec5SDimitry Andric assert(LoopBlockSet.count(InnerBB) && 18660b57cec5SDimitry Andric "Block should already be in the set!"); 18670b57cec5SDimitry Andric continue; 18680b57cec5SDimitry Andric } 18690b57cec5SDimitry Andric 18700b57cec5SDimitry Andric LoopBlockSet.insert(InnerBB); 18710b57cec5SDimitry Andric } 18720b57cec5SDimitry Andric 18730b57cec5SDimitry Andric // Add the preheader to the worklist so we will continue past the 18740b57cec5SDimitry Andric // loop body. 18750b57cec5SDimitry Andric Worklist.push_back(InnerPH); 18760b57cec5SDimitry Andric continue; 18770b57cec5SDimitry Andric } 18780b57cec5SDimitry Andric 18790b57cec5SDimitry Andric // Insert any predecessors that were in the original loop into the new 18800b57cec5SDimitry Andric // set, and if the insert is successful, add them to the worklist. 18810b57cec5SDimitry Andric for (auto *Pred : predecessors(BB)) 18820b57cec5SDimitry Andric if (L.contains(Pred) && LoopBlockSet.insert(Pred).second) 18830b57cec5SDimitry Andric Worklist.push_back(Pred); 18840b57cec5SDimitry Andric } 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!"); 18870b57cec5SDimitry Andric 18880b57cec5SDimitry Andric // We've found all the blocks participating in the loop, return our completed 18890b57cec5SDimitry Andric // set. 18900b57cec5SDimitry Andric return LoopBlockSet; 18910b57cec5SDimitry Andric } 18920b57cec5SDimitry Andric 18930b57cec5SDimitry Andric /// Rebuild a loop after unswitching removes some subset of blocks and edges. 18940b57cec5SDimitry Andric /// 18950b57cec5SDimitry Andric /// The removal may have removed some child loops entirely but cannot have 18960b57cec5SDimitry Andric /// disturbed any remaining child loops. However, they may need to be hoisted 18970b57cec5SDimitry Andric /// to the parent loop (or to be top-level loops). The original loop may be 18980b57cec5SDimitry Andric /// completely removed. 18990b57cec5SDimitry Andric /// 19000b57cec5SDimitry Andric /// The sibling loops resulting from this update are returned. If the original 19010b57cec5SDimitry Andric /// loop remains a valid loop, it will be the first entry in this list with all 19020b57cec5SDimitry Andric /// of the newly sibling loops following it. 19030b57cec5SDimitry Andric /// 19040b57cec5SDimitry Andric /// Returns true if the loop remains a loop after unswitching, and false if it 19050b57cec5SDimitry Andric /// is no longer a loop after unswitching (and should not continue to be 19060b57cec5SDimitry Andric /// referenced). 19070b57cec5SDimitry Andric static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, 19080b57cec5SDimitry Andric LoopInfo &LI, 1909bdd1243dSDimitry Andric SmallVectorImpl<Loop *> &HoistedLoops, 1910bdd1243dSDimitry Andric ScalarEvolution *SE) { 19110b57cec5SDimitry Andric auto *PH = L.getLoopPreheader(); 19120b57cec5SDimitry Andric 19130b57cec5SDimitry Andric // Compute the actual parent loop from the exit blocks. Because we may have 19140b57cec5SDimitry Andric // pruned some exits the loop may be different from the original parent. 19150b57cec5SDimitry Andric Loop *ParentL = nullptr; 19160b57cec5SDimitry Andric SmallVector<Loop *, 4> ExitLoops; 19170b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ExitsInLoops; 19180b57cec5SDimitry Andric ExitsInLoops.reserve(ExitBlocks.size()); 19190b57cec5SDimitry Andric for (auto *ExitBB : ExitBlocks) 19200b57cec5SDimitry Andric if (Loop *ExitL = LI.getLoopFor(ExitBB)) { 19210b57cec5SDimitry Andric ExitLoops.push_back(ExitL); 19220b57cec5SDimitry Andric ExitsInLoops.push_back(ExitBB); 19230b57cec5SDimitry Andric if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL))) 19240b57cec5SDimitry Andric ParentL = ExitL; 19250b57cec5SDimitry Andric } 19260b57cec5SDimitry Andric 19270b57cec5SDimitry Andric // Recompute the blocks participating in this loop. This may be empty if it 19280b57cec5SDimitry Andric // is no longer a loop. 19290b57cec5SDimitry Andric auto LoopBlockSet = recomputeLoopBlockSet(L, LI); 19300b57cec5SDimitry Andric 19310b57cec5SDimitry Andric // If we still have a loop, we need to re-set the loop's parent as the exit 19320b57cec5SDimitry Andric // block set changing may have moved it within the loop nest. Note that this 19330b57cec5SDimitry Andric // can only happen when this loop has a parent as it can only hoist the loop 19340b57cec5SDimitry Andric // *up* the nest. 19350b57cec5SDimitry Andric if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) { 19360b57cec5SDimitry Andric // Remove this loop's (original) blocks from all of the intervening loops. 19370b57cec5SDimitry Andric for (Loop *IL = L.getParentLoop(); IL != ParentL; 19380b57cec5SDimitry Andric IL = IL->getParentLoop()) { 19390b57cec5SDimitry Andric IL->getBlocksSet().erase(PH); 19400b57cec5SDimitry Andric for (auto *BB : L.blocks()) 19410b57cec5SDimitry Andric IL->getBlocksSet().erase(BB); 19420b57cec5SDimitry Andric llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) { 19430b57cec5SDimitry Andric return BB == PH || L.contains(BB); 19440b57cec5SDimitry Andric }); 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric LI.changeLoopFor(PH, ParentL); 19480b57cec5SDimitry Andric L.getParentLoop()->removeChildLoop(&L); 19490b57cec5SDimitry Andric if (ParentL) 19500b57cec5SDimitry Andric ParentL->addChildLoop(&L); 19510b57cec5SDimitry Andric else 19520b57cec5SDimitry Andric LI.addTopLevelLoop(&L); 19530b57cec5SDimitry Andric } 19540b57cec5SDimitry Andric 19550b57cec5SDimitry Andric // Now we update all the blocks which are no longer within the loop. 19560b57cec5SDimitry Andric auto &Blocks = L.getBlocksVector(); 19570b57cec5SDimitry Andric auto BlocksSplitI = 19580b57cec5SDimitry Andric LoopBlockSet.empty() 19590b57cec5SDimitry Andric ? Blocks.begin() 19600b57cec5SDimitry Andric : std::stable_partition( 19610b57cec5SDimitry Andric Blocks.begin(), Blocks.end(), 19620b57cec5SDimitry Andric [&](BasicBlock *BB) { return LoopBlockSet.count(BB); }); 19630b57cec5SDimitry Andric 19640b57cec5SDimitry Andric // Before we erase the list of unlooped blocks, build a set of them. 19650b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end()); 19660b57cec5SDimitry Andric if (LoopBlockSet.empty()) 19670b57cec5SDimitry Andric UnloopedBlocks.insert(PH); 19680b57cec5SDimitry Andric 19690b57cec5SDimitry Andric // Now erase these blocks from the loop. 19700b57cec5SDimitry Andric for (auto *BB : make_range(BlocksSplitI, Blocks.end())) 19710b57cec5SDimitry Andric L.getBlocksSet().erase(BB); 19720b57cec5SDimitry Andric Blocks.erase(BlocksSplitI, Blocks.end()); 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric // Sort the exits in ascending loop depth, we'll work backwards across these 19750b57cec5SDimitry Andric // to process them inside out. 19760b57cec5SDimitry Andric llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) { 19770b57cec5SDimitry Andric return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS); 19780b57cec5SDimitry Andric }); 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric // We'll build up a set for each exit loop. 19810b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks; 19820b57cec5SDimitry Andric Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop. 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric auto RemoveUnloopedBlocksFromLoop = 19850b57cec5SDimitry Andric [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) { 19860b57cec5SDimitry Andric for (auto *BB : UnloopedBlocks) 19870b57cec5SDimitry Andric L.getBlocksSet().erase(BB); 19880b57cec5SDimitry Andric llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) { 19890b57cec5SDimitry Andric return UnloopedBlocks.count(BB); 19900b57cec5SDimitry Andric }); 19910b57cec5SDimitry Andric }; 19920b57cec5SDimitry Andric 19930b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> Worklist; 19940b57cec5SDimitry Andric while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) { 19950b57cec5SDimitry Andric assert(Worklist.empty() && "Didn't clear worklist!"); 19960b57cec5SDimitry Andric assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!"); 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric // Grab the next exit block, in decreasing loop depth order. 19990b57cec5SDimitry Andric BasicBlock *ExitBB = ExitsInLoops.pop_back_val(); 20000b57cec5SDimitry Andric Loop &ExitL = *LI.getLoopFor(ExitBB); 20010b57cec5SDimitry Andric assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!"); 20020b57cec5SDimitry Andric 20030b57cec5SDimitry Andric // Erase all of the unlooped blocks from the loops between the previous 20040b57cec5SDimitry Andric // exit loop and this exit loop. This works because the ExitInLoops list is 20050b57cec5SDimitry Andric // sorted in increasing order of loop depth and thus we visit loops in 20060b57cec5SDimitry Andric // decreasing order of loop depth. 20070b57cec5SDimitry Andric for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop()) 20080b57cec5SDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks); 20090b57cec5SDimitry Andric 20100b57cec5SDimitry Andric // Walk the CFG back until we hit the cloned PH adding everything reachable 20110b57cec5SDimitry Andric // and in the unlooped set to this exit block's loop. 20120b57cec5SDimitry Andric Worklist.push_back(ExitBB); 20130b57cec5SDimitry Andric do { 20140b57cec5SDimitry Andric BasicBlock *BB = Worklist.pop_back_val(); 20150b57cec5SDimitry Andric // We can stop recursing at the cloned preheader (if we get there). 20160b57cec5SDimitry Andric if (BB == PH) 20170b57cec5SDimitry Andric continue; 20180b57cec5SDimitry Andric 20190b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(BB)) { 20200b57cec5SDimitry Andric // If this pred has already been moved to our set or is part of some 20210b57cec5SDimitry Andric // (inner) loop, no update needed. 20220b57cec5SDimitry Andric if (!UnloopedBlocks.erase(PredBB)) { 20230b57cec5SDimitry Andric assert((NewExitLoopBlocks.count(PredBB) || 20240b57cec5SDimitry Andric ExitL.contains(LI.getLoopFor(PredBB))) && 20250b57cec5SDimitry Andric "Predecessor not in a nested loop (or already visited)!"); 20260b57cec5SDimitry Andric continue; 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric // We just insert into the loop set here. We'll add these blocks to the 20300b57cec5SDimitry Andric // exit loop after we build up the set in a deterministic order rather 20310b57cec5SDimitry Andric // than the predecessor-influenced visit order. 20320b57cec5SDimitry Andric bool Inserted = NewExitLoopBlocks.insert(PredBB).second; 20330b57cec5SDimitry Andric (void)Inserted; 20340b57cec5SDimitry Andric assert(Inserted && "Should only visit an unlooped block once!"); 20350b57cec5SDimitry Andric 20360b57cec5SDimitry Andric // And recurse through to its predecessors. 20370b57cec5SDimitry Andric Worklist.push_back(PredBB); 20380b57cec5SDimitry Andric } 20390b57cec5SDimitry Andric } while (!Worklist.empty()); 20400b57cec5SDimitry Andric 20410b57cec5SDimitry Andric // If blocks in this exit loop were directly part of the original loop (as 20420b57cec5SDimitry Andric // opposed to a child loop) update the map to point to this exit loop. This 20430b57cec5SDimitry Andric // just updates a map and so the fact that the order is unstable is fine. 20440b57cec5SDimitry Andric for (auto *BB : NewExitLoopBlocks) 20450b57cec5SDimitry Andric if (Loop *BBL = LI.getLoopFor(BB)) 20460b57cec5SDimitry Andric if (BBL == &L || !L.contains(BBL)) 20470b57cec5SDimitry Andric LI.changeLoopFor(BB, &ExitL); 20480b57cec5SDimitry Andric 20490b57cec5SDimitry Andric // We will remove the remaining unlooped blocks from this loop in the next 20500b57cec5SDimitry Andric // iteration or below. 20510b57cec5SDimitry Andric NewExitLoopBlocks.clear(); 20520b57cec5SDimitry Andric } 20530b57cec5SDimitry Andric 20540b57cec5SDimitry Andric // Any remaining unlooped blocks are no longer part of any loop unless they 20550b57cec5SDimitry Andric // are part of some child loop. 20560b57cec5SDimitry Andric for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop()) 20570b57cec5SDimitry Andric RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks); 20580b57cec5SDimitry Andric for (auto *BB : UnloopedBlocks) 20590b57cec5SDimitry Andric if (Loop *BBL = LI.getLoopFor(BB)) 20600b57cec5SDimitry Andric if (BBL == &L || !L.contains(BBL)) 20610b57cec5SDimitry Andric LI.changeLoopFor(BB, nullptr); 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andric // Sink all the child loops whose headers are no longer in the loop set to 20640b57cec5SDimitry Andric // the parent (or to be top level loops). We reach into the loop and directly 20650b57cec5SDimitry Andric // update its subloop vector to make this batch update efficient. 20660b57cec5SDimitry Andric auto &SubLoops = L.getSubLoopsVector(); 20670b57cec5SDimitry Andric auto SubLoopsSplitI = 20680b57cec5SDimitry Andric LoopBlockSet.empty() 20690b57cec5SDimitry Andric ? SubLoops.begin() 20700b57cec5SDimitry Andric : std::stable_partition( 20710b57cec5SDimitry Andric SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) { 20720b57cec5SDimitry Andric return LoopBlockSet.count(SubL->getHeader()); 20730b57cec5SDimitry Andric }); 20740b57cec5SDimitry Andric for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) { 20750b57cec5SDimitry Andric HoistedLoops.push_back(HoistedL); 20760b57cec5SDimitry Andric HoistedL->setParentLoop(nullptr); 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric // To compute the new parent of this hoisted loop we look at where we 20790b57cec5SDimitry Andric // placed the preheader above. We can't lookup the header itself because we 20800b57cec5SDimitry Andric // retained the mapping from the header to the hoisted loop. But the 20810b57cec5SDimitry Andric // preheader and header should have the exact same new parent computed 20820b57cec5SDimitry Andric // based on the set of exit blocks from the original loop as the preheader 20830b57cec5SDimitry Andric // is a predecessor of the header and so reached in the reverse walk. And 20840b57cec5SDimitry Andric // because the loops were all in simplified form the preheader of the 20850b57cec5SDimitry Andric // hoisted loop can't be part of some *other* loop. 20860b57cec5SDimitry Andric if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader())) 20870b57cec5SDimitry Andric NewParentL->addChildLoop(HoistedL); 20880b57cec5SDimitry Andric else 20890b57cec5SDimitry Andric LI.addTopLevelLoop(HoistedL); 20900b57cec5SDimitry Andric } 20910b57cec5SDimitry Andric SubLoops.erase(SubLoopsSplitI, SubLoops.end()); 20920b57cec5SDimitry Andric 20930b57cec5SDimitry Andric // Actually delete the loop if nothing remained within it. 20940b57cec5SDimitry Andric if (Blocks.empty()) { 20950b57cec5SDimitry Andric assert(SubLoops.empty() && 20960b57cec5SDimitry Andric "Failed to remove all subloops from the original loop!"); 20970b57cec5SDimitry Andric if (Loop *ParentL = L.getParentLoop()) 20980b57cec5SDimitry Andric ParentL->removeChildLoop(llvm::find(*ParentL, &L)); 20990b57cec5SDimitry Andric else 21000b57cec5SDimitry Andric LI.removeLoop(llvm::find(LI, &L)); 21015f757f3fSDimitry Andric // markLoopAsDeleted for L should be triggered by the caller (it is 21025f757f3fSDimitry Andric // typically done within postUnswitch). 2103bdd1243dSDimitry Andric if (SE) 2104bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 21050b57cec5SDimitry Andric LI.destroy(&L); 21060b57cec5SDimitry Andric return false; 21070b57cec5SDimitry Andric } 21080b57cec5SDimitry Andric 21090b57cec5SDimitry Andric return true; 21100b57cec5SDimitry Andric } 21110b57cec5SDimitry Andric 21120b57cec5SDimitry Andric /// Helper to visit a dominator subtree, invoking a callable on each node. 21130b57cec5SDimitry Andric /// 21140b57cec5SDimitry Andric /// Returning false at any point will stop walking past that node of the tree. 21150b57cec5SDimitry Andric template <typename CallableT> 21160b57cec5SDimitry Andric void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) { 21170b57cec5SDimitry Andric SmallVector<DomTreeNode *, 4> DomWorklist; 21180b57cec5SDimitry Andric DomWorklist.push_back(DT[BB]); 21190b57cec5SDimitry Andric #ifndef NDEBUG 21200b57cec5SDimitry Andric SmallPtrSet<DomTreeNode *, 4> Visited; 21210b57cec5SDimitry Andric Visited.insert(DT[BB]); 21220b57cec5SDimitry Andric #endif 21230b57cec5SDimitry Andric do { 21240b57cec5SDimitry Andric DomTreeNode *N = DomWorklist.pop_back_val(); 21250b57cec5SDimitry Andric 21260b57cec5SDimitry Andric // Visit this node. 21270b57cec5SDimitry Andric if (!Callable(N->getBlock())) 21280b57cec5SDimitry Andric continue; 21290b57cec5SDimitry Andric 21300b57cec5SDimitry Andric // Accumulate the child nodes. 21310b57cec5SDimitry Andric for (DomTreeNode *ChildN : *N) { 21320b57cec5SDimitry Andric assert(Visited.insert(ChildN).second && 21330b57cec5SDimitry Andric "Cannot visit a node twice when walking a tree!"); 21340b57cec5SDimitry Andric DomWorklist.push_back(ChildN); 21350b57cec5SDimitry Andric } 21360b57cec5SDimitry Andric } while (!DomWorklist.empty()); 21370b57cec5SDimitry Andric } 21380b57cec5SDimitry Andric 21395f757f3fSDimitry Andric void postUnswitch(Loop &L, LPMUpdater &U, StringRef LoopName, 21405f757f3fSDimitry Andric bool CurrentLoopValid, bool PartiallyInvariant, 21415f757f3fSDimitry Andric bool InjectedCondition, ArrayRef<Loop *> NewLoops) { 21425f757f3fSDimitry Andric // If we did a non-trivial unswitch, we have added new (cloned) loops. 21435f757f3fSDimitry Andric if (!NewLoops.empty()) 21445f757f3fSDimitry Andric U.addSiblingLoops(NewLoops); 21455f757f3fSDimitry Andric 21465f757f3fSDimitry Andric // If the current loop remains valid, we should revisit it to catch any 21475f757f3fSDimitry Andric // other unswitch opportunities. Otherwise, we need to mark it as deleted. 21485f757f3fSDimitry Andric if (CurrentLoopValid) { 21495f757f3fSDimitry Andric if (PartiallyInvariant) { 21505f757f3fSDimitry Andric // Mark the new loop as partially unswitched, to avoid unswitching on 21515f757f3fSDimitry Andric // the same condition again. 21525f757f3fSDimitry Andric auto &Context = L.getHeader()->getContext(); 21535f757f3fSDimitry Andric MDNode *DisableUnswitchMD = MDNode::get( 21545f757f3fSDimitry Andric Context, 21555f757f3fSDimitry Andric MDString::get(Context, "llvm.loop.unswitch.partial.disable")); 21565f757f3fSDimitry Andric MDNode *NewLoopID = makePostTransformationMetadata( 21575f757f3fSDimitry Andric Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, 21585f757f3fSDimitry Andric {DisableUnswitchMD}); 21595f757f3fSDimitry Andric L.setLoopID(NewLoopID); 21605f757f3fSDimitry Andric } else if (InjectedCondition) { 21615f757f3fSDimitry Andric // Do the same for injection of invariant conditions. 21625f757f3fSDimitry Andric auto &Context = L.getHeader()->getContext(); 21635f757f3fSDimitry Andric MDNode *DisableUnswitchMD = MDNode::get( 21645f757f3fSDimitry Andric Context, 21655f757f3fSDimitry Andric MDString::get(Context, "llvm.loop.unswitch.injection.disable")); 21665f757f3fSDimitry Andric MDNode *NewLoopID = makePostTransformationMetadata( 21675f757f3fSDimitry Andric Context, L.getLoopID(), {"llvm.loop.unswitch.injection"}, 21685f757f3fSDimitry Andric {DisableUnswitchMD}); 21695f757f3fSDimitry Andric L.setLoopID(NewLoopID); 21705f757f3fSDimitry Andric } else 21715f757f3fSDimitry Andric U.revisitCurrentLoop(); 21725f757f3fSDimitry Andric } else 21735f757f3fSDimitry Andric U.markLoopAsDeleted(L, LoopName); 21745f757f3fSDimitry Andric } 21755f757f3fSDimitry Andric 21760b57cec5SDimitry Andric static void unswitchNontrivialInvariants( 21770b57cec5SDimitry Andric Loop &L, Instruction &TI, ArrayRef<Value *> Invariants, 2178bdd1243dSDimitry Andric IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, 21795f757f3fSDimitry Andric AssumptionCache &AC, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, 21805f757f3fSDimitry Andric LPMUpdater &LoopUpdater, bool InsertFreeze, bool InjectedCondition) { 21810b57cec5SDimitry Andric auto *ParentBB = TI.getParent(); 21820b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI); 21830b57cec5SDimitry Andric SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI); 21840b57cec5SDimitry Andric 21855f757f3fSDimitry Andric // Save the current loop name in a variable so that we can report it even 21865f757f3fSDimitry Andric // after it has been deleted. 21875f757f3fSDimitry Andric std::string LoopName(L.getName()); 21885f757f3fSDimitry Andric 21890b57cec5SDimitry Andric // We can only unswitch switches, conditional branches with an invariant 2190fe6060f1SDimitry Andric // condition, or combining invariant conditions with an instruction or 2191fe6060f1SDimitry Andric // partially invariant instructions. 21928bcb0991SDimitry Andric assert((SI || (BI && BI->isConditional())) && 21930b57cec5SDimitry Andric "Can only unswitch switches and conditional branch!"); 2194fe6060f1SDimitry Andric bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty(); 2195fe6060f1SDimitry Andric bool FullUnswitch = 219681ad6265SDimitry Andric SI || (skipTrivialSelect(BI->getCondition()) == Invariants[0] && 219781ad6265SDimitry Andric !PartiallyInvariant); 21980b57cec5SDimitry Andric if (FullUnswitch) 21990b57cec5SDimitry Andric assert(Invariants.size() == 1 && 22000b57cec5SDimitry Andric "Cannot have other invariants with full unswitching!"); 22010b57cec5SDimitry Andric else 220281ad6265SDimitry Andric assert(isa<Instruction>(skipTrivialSelect(BI->getCondition())) && 22030b57cec5SDimitry Andric "Partial unswitching requires an instruction as the condition!"); 22040b57cec5SDimitry Andric 22050b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 22060b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric // Constant and BBs tracking the cloned and continuing successor. When we are 22090b57cec5SDimitry Andric // unswitching the entire condition, this can just be trivially chosen to 22100b57cec5SDimitry Andric // unswitch towards `true`. However, when we are unswitching a set of 2211fe6060f1SDimitry Andric // invariants combined with `and` or `or` or partially invariant instructions, 2212fe6060f1SDimitry Andric // the combining operation determines the best direction to unswitch: we want 2213fe6060f1SDimitry Andric // to unswitch the direction that will collapse the branch. 22140b57cec5SDimitry Andric bool Direction = true; 22150b57cec5SDimitry Andric int ClonedSucc = 0; 22160b57cec5SDimitry Andric if (!FullUnswitch) { 221781ad6265SDimitry Andric Value *Cond = skipTrivialSelect(BI->getCondition()); 2218fe6060f1SDimitry Andric (void)Cond; 2219fe6060f1SDimitry Andric assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) || 2220fe6060f1SDimitry Andric PartiallyInvariant) && 2221fe6060f1SDimitry Andric "Only `or`, `and`, an `select`, partially invariant instructions " 2222fe6060f1SDimitry Andric "can combine invariants being unswitched."); 222381ad6265SDimitry Andric if (!match(Cond, m_LogicalOr())) { 222481ad6265SDimitry Andric if (match(Cond, m_LogicalAnd()) || 2225fe6060f1SDimitry Andric (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) { 22260b57cec5SDimitry Andric Direction = false; 22270b57cec5SDimitry Andric ClonedSucc = 1; 22280b57cec5SDimitry Andric } 22290b57cec5SDimitry Andric } 2230fe6060f1SDimitry Andric } 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric BasicBlock *RetainedSuccBB = 22330b57cec5SDimitry Andric BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest(); 22340b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs; 22350b57cec5SDimitry Andric if (BI) 22360b57cec5SDimitry Andric UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc)); 22370b57cec5SDimitry Andric else 22380b57cec5SDimitry Andric for (auto Case : SI->cases()) 22390b57cec5SDimitry Andric if (Case.getCaseSuccessor() != RetainedSuccBB) 22400b57cec5SDimitry Andric UnswitchedSuccBBs.insert(Case.getCaseSuccessor()); 22410b57cec5SDimitry Andric 22420b57cec5SDimitry Andric assert(!UnswitchedSuccBBs.count(RetainedSuccBB) && 22430b57cec5SDimitry Andric "Should not unswitch the same successor we are retaining!"); 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric // The branch should be in this exact loop. Any inner loop's invariant branch 22460b57cec5SDimitry Andric // should be handled by unswitching that inner loop. The caller of this 22470b57cec5SDimitry Andric // routine should filter out any candidates that remain (but were skipped for 22480b57cec5SDimitry Andric // whatever reason). 22490b57cec5SDimitry Andric assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!"); 22500b57cec5SDimitry Andric 22510b57cec5SDimitry Andric // Compute the parent loop now before we start hacking on things. 22520b57cec5SDimitry Andric Loop *ParentL = L.getParentLoop(); 22530b57cec5SDimitry Andric // Get blocks in RPO order for MSSA update, before changing the CFG. 22540b57cec5SDimitry Andric LoopBlocksRPO LBRPO(&L); 22550b57cec5SDimitry Andric if (MSSAU) 22560b57cec5SDimitry Andric LBRPO.perform(&LI); 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric // Compute the outer-most loop containing one of our exit blocks. This is the 22590b57cec5SDimitry Andric // furthest up our loopnest which can be mutated, which we will use below to 22600b57cec5SDimitry Andric // update things. 22610b57cec5SDimitry Andric Loop *OuterExitL = &L; 2262bdd1243dSDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks; 2263bdd1243dSDimitry Andric L.getUniqueExitBlocks(ExitBlocks); 22640b57cec5SDimitry Andric for (auto *ExitBB : ExitBlocks) { 226506c3fb27SDimitry Andric // ExitBB can be an exit block for several levels in the loop nest. Make 226606c3fb27SDimitry Andric // sure we find the top most. 226706c3fb27SDimitry Andric Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI); 22680b57cec5SDimitry Andric if (!NewOuterExitL) { 22690b57cec5SDimitry Andric // We exited the entire nest with this block, so we're done. 22700b57cec5SDimitry Andric OuterExitL = nullptr; 22710b57cec5SDimitry Andric break; 22720b57cec5SDimitry Andric } 22730b57cec5SDimitry Andric if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL)) 22740b57cec5SDimitry Andric OuterExitL = NewOuterExitL; 22750b57cec5SDimitry Andric } 22760b57cec5SDimitry Andric 22770b57cec5SDimitry Andric // At this point, we're definitely going to unswitch something so invalidate 22780b57cec5SDimitry Andric // any cached information in ScalarEvolution for the outer most loop 22790b57cec5SDimitry Andric // containing an exit block and all nested loops. 22800b57cec5SDimitry Andric if (SE) { 22810b57cec5SDimitry Andric if (OuterExitL) 22820b57cec5SDimitry Andric SE->forgetLoop(OuterExitL); 22830b57cec5SDimitry Andric else 22840b57cec5SDimitry Andric SE->forgetTopmostLoop(&L); 2285bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 22860b57cec5SDimitry Andric } 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric // If the edge from this terminator to a successor dominates that successor, 22890b57cec5SDimitry Andric // store a map from each block in its dominator subtree to it. This lets us 22900b57cec5SDimitry Andric // tell when cloning for a particular successor if a block is dominated by 22910b57cec5SDimitry Andric // some *other* successor with a single data structure. We use this to 22920b57cec5SDimitry Andric // significantly reduce cloning. 22930b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc; 2294bdd1243dSDimitry Andric for (auto *SuccBB : llvm::concat<BasicBlock *const>(ArrayRef(RetainedSuccBB), 2295bdd1243dSDimitry Andric UnswitchedSuccBBs)) 22960b57cec5SDimitry Andric if (SuccBB->getUniquePredecessor() || 22970b57cec5SDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { 22980b57cec5SDimitry Andric return PredBB == ParentBB || DT.dominates(SuccBB, PredBB); 22990b57cec5SDimitry Andric })) 23000b57cec5SDimitry Andric visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) { 23010b57cec5SDimitry Andric DominatingSucc[BB] = SuccBB; 23020b57cec5SDimitry Andric return true; 23030b57cec5SDimitry Andric }); 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric // Split the preheader, so that we know that there is a safe place to insert 23060b57cec5SDimitry Andric // the conditional branch. We will change the preheader to have a conditional 23070b57cec5SDimitry Andric // branch on LoopCond. The original preheader will become the split point 23080b57cec5SDimitry Andric // between the unswitched versions, and we will have a new preheader for the 23090b57cec5SDimitry Andric // original loop. 23100b57cec5SDimitry Andric BasicBlock *SplitBB = L.getLoopPreheader(); 23110b57cec5SDimitry Andric BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU); 23120b57cec5SDimitry Andric 23130b57cec5SDimitry Andric // Keep track of the dominator tree updates needed. 23140b57cec5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates; 23150b57cec5SDimitry Andric 23160b57cec5SDimitry Andric // Clone the loop for each unswitched successor. 23170b57cec5SDimitry Andric SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps; 23180b57cec5SDimitry Andric VMaps.reserve(UnswitchedSuccBBs.size()); 23190b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs; 23200b57cec5SDimitry Andric for (auto *SuccBB : UnswitchedSuccBBs) { 23210b57cec5SDimitry Andric VMaps.emplace_back(new ValueToValueMapTy()); 23220b57cec5SDimitry Andric ClonedPHs[SuccBB] = buildClonedLoopBlocks( 23230b57cec5SDimitry Andric L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB, 2324bdd1243dSDimitry Andric DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU, SE); 23250b57cec5SDimitry Andric } 23260b57cec5SDimitry Andric 2327e8d8bef9SDimitry Andric // Drop metadata if we may break its semantics by moving this instr into the 2328e8d8bef9SDimitry Andric // split block. 2329e8d8bef9SDimitry Andric if (TI.getMetadata(LLVMContext::MD_make_implicit)) { 2330e8d8bef9SDimitry Andric if (DropNonTrivialImplicitNullChecks) 2331e8d8bef9SDimitry Andric // Do not spend time trying to understand if we can keep it, just drop it 2332e8d8bef9SDimitry Andric // to save compile time. 2333e8d8bef9SDimitry Andric TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); 2334e8d8bef9SDimitry Andric else { 2335e8d8bef9SDimitry Andric // It is only legal to preserve make.implicit metadata if we are 2336e8d8bef9SDimitry Andric // guaranteed no reach implicit null check after following this branch. 2337e8d8bef9SDimitry Andric ICFLoopSafetyInfo SafetyInfo; 2338e8d8bef9SDimitry Andric SafetyInfo.computeLoopSafetyInfo(&L); 2339e8d8bef9SDimitry Andric if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) 2340e8d8bef9SDimitry Andric TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); 2341e8d8bef9SDimitry Andric } 2342e8d8bef9SDimitry Andric } 2343e8d8bef9SDimitry Andric 23440b57cec5SDimitry Andric // The stitching of the branched code back together depends on whether we're 23450b57cec5SDimitry Andric // doing full unswitching or not with the exception that we always want to 23460b57cec5SDimitry Andric // nuke the initial terminator placed in the split block. 23470b57cec5SDimitry Andric SplitBB->getTerminator()->eraseFromParent(); 23480b57cec5SDimitry Andric if (FullUnswitch) { 23490b57cec5SDimitry Andric // Keep a clone of the terminator for MSSA updates. 23500b57cec5SDimitry Andric Instruction *NewTI = TI.clone(); 2351bdd1243dSDimitry Andric NewTI->insertInto(ParentBB, ParentBB->end()); 23520b57cec5SDimitry Andric 23530fca6ea1SDimitry Andric // Splice the terminator from the original loop and rewrite its 23540fca6ea1SDimitry Andric // successors. 23550fca6ea1SDimitry Andric TI.moveBefore(*SplitBB, SplitBB->end()); 23560fca6ea1SDimitry Andric TI.dropLocation(); 23570fca6ea1SDimitry Andric 23580b57cec5SDimitry Andric // First wire up the moved terminator to the preheaders. 23590b57cec5SDimitry Andric if (BI) { 23600b57cec5SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second; 23610b57cec5SDimitry Andric BI->setSuccessor(ClonedSucc, ClonedPH); 23620b57cec5SDimitry Andric BI->setSuccessor(1 - ClonedSucc, LoopPH); 236306c3fb27SDimitry Andric Value *Cond = skipTrivialSelect(BI->getCondition()); 23640fca6ea1SDimitry Andric if (InsertFreeze) { 23650fca6ea1SDimitry Andric // We don't give any debug location to the new freeze, because the 23660fca6ea1SDimitry Andric // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted 23670fca6ea1SDimitry Andric // out of the loop. 23680fca6ea1SDimitry Andric Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator()); 23690fca6ea1SDimitry Andric } 237006c3fb27SDimitry Andric BI->setCondition(Cond); 23710b57cec5SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); 23720b57cec5SDimitry Andric } else { 23730b57cec5SDimitry Andric assert(SI && "Must either be a branch or switch!"); 23740b57cec5SDimitry Andric 23750b57cec5SDimitry Andric // Walk the cases and directly update their successors. 23760b57cec5SDimitry Andric assert(SI->getDefaultDest() == RetainedSuccBB && 23770b57cec5SDimitry Andric "Not retaining default successor!"); 23780b57cec5SDimitry Andric SI->setDefaultDest(LoopPH); 2379bdd1243dSDimitry Andric for (const auto &Case : SI->cases()) 23800b57cec5SDimitry Andric if (Case.getCaseSuccessor() == RetainedSuccBB) 23810b57cec5SDimitry Andric Case.setSuccessor(LoopPH); 23820b57cec5SDimitry Andric else 23830b57cec5SDimitry Andric Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second); 23840b57cec5SDimitry Andric 2385bdd1243dSDimitry Andric if (InsertFreeze) 23860fca6ea1SDimitry Andric SI->setCondition(new FreezeInst(SI->getCondition(), 23870fca6ea1SDimitry Andric SI->getCondition()->getName() + ".fr", 23880fca6ea1SDimitry Andric SI->getIterator())); 2389bdd1243dSDimitry Andric 23900b57cec5SDimitry Andric // We need to use the set to populate domtree updates as even when there 23910b57cec5SDimitry Andric // are multiple cases pointing at the same successor we only want to 23920b57cec5SDimitry Andric // remove and insert one edge in the domtree. 23930b57cec5SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs) 23940b57cec5SDimitry Andric DTUpdates.push_back( 23950b57cec5SDimitry Andric {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second}); 23960b57cec5SDimitry Andric } 23970b57cec5SDimitry Andric 23980b57cec5SDimitry Andric if (MSSAU) { 23990b57cec5SDimitry Andric DT.applyUpdates(DTUpdates); 24000b57cec5SDimitry Andric DTUpdates.clear(); 24010b57cec5SDimitry Andric 24020b57cec5SDimitry Andric // Remove all but one edge to the retained block and all unswitched 24030b57cec5SDimitry Andric // blocks. This is to avoid having duplicate entries in the cloned Phis, 24040b57cec5SDimitry Andric // when we know we only keep a single edge for each case. 24050b57cec5SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB); 24060b57cec5SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs) 24070b57cec5SDimitry Andric MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB); 24080b57cec5SDimitry Andric 24090b57cec5SDimitry Andric for (auto &VMap : VMaps) 24100b57cec5SDimitry Andric MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, 24110b57cec5SDimitry Andric /*IgnoreIncomingWithNoClones=*/true); 24120b57cec5SDimitry Andric MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); 24130b57cec5SDimitry Andric 24140b57cec5SDimitry Andric // Remove all edges to unswitched blocks. 24150b57cec5SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs) 24160b57cec5SDimitry Andric MSSAU->removeEdge(ParentBB, SuccBB); 24170b57cec5SDimitry Andric } 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric // Now unhook the successor relationship as we'll be replacing 24200b57cec5SDimitry Andric // the terminator with a direct branch. This is much simpler for branches 24210b57cec5SDimitry Andric // than switches so we handle those first. 24220b57cec5SDimitry Andric if (BI) { 24230b57cec5SDimitry Andric // Remove the parent as a predecessor of the unswitched successor. 24240b57cec5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 && 24250b57cec5SDimitry Andric "Only one possible unswitched block for a branch!"); 24260b57cec5SDimitry Andric BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin(); 24270b57cec5SDimitry Andric UnswitchedSuccBB->removePredecessor(ParentBB, 24280b57cec5SDimitry Andric /*KeepOneInputPHIs*/ true); 24290b57cec5SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB}); 24300b57cec5SDimitry Andric } else { 24310b57cec5SDimitry Andric // Note that we actually want to remove the parent block as a predecessor 24320b57cec5SDimitry Andric // of *every* case successor. The case successor is either unswitched, 24330b57cec5SDimitry Andric // completely eliminating an edge from the parent to that successor, or it 24340b57cec5SDimitry Andric // is a duplicate edge to the retained successor as the retained successor 24350b57cec5SDimitry Andric // is always the default successor and as we'll replace this with a direct 24360b57cec5SDimitry Andric // branch we no longer need the duplicate entries in the PHI nodes. 24370b57cec5SDimitry Andric SwitchInst *NewSI = cast<SwitchInst>(NewTI); 24380b57cec5SDimitry Andric assert(NewSI->getDefaultDest() == RetainedSuccBB && 24390b57cec5SDimitry Andric "Not retaining default successor!"); 2440bdd1243dSDimitry Andric for (const auto &Case : NewSI->cases()) 24410b57cec5SDimitry Andric Case.getCaseSuccessor()->removePredecessor( 24420b57cec5SDimitry Andric ParentBB, 24430b57cec5SDimitry Andric /*KeepOneInputPHIs*/ true); 24440b57cec5SDimitry Andric 24450b57cec5SDimitry Andric // We need to use the set to populate domtree updates as even when there 24460b57cec5SDimitry Andric // are multiple cases pointing at the same successor we only want to 24470b57cec5SDimitry Andric // remove and insert one edge in the domtree. 24480b57cec5SDimitry Andric for (BasicBlock *SuccBB : UnswitchedSuccBBs) 24490b57cec5SDimitry Andric DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB}); 24500b57cec5SDimitry Andric } 24510b57cec5SDimitry Andric 24520b57cec5SDimitry Andric // Create a new unconditional branch to the continuing block (as opposed to 24530b57cec5SDimitry Andric // the one cloned). 24540fca6ea1SDimitry Andric Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB); 24550fca6ea1SDimitry Andric NewBI->setDebugLoc(NewTI->getDebugLoc()); 24560fca6ea1SDimitry Andric 24570fca6ea1SDimitry Andric // After MSSAU update, remove the cloned terminator instruction NewTI. 24580fca6ea1SDimitry Andric NewTI->eraseFromParent(); 24590b57cec5SDimitry Andric } else { 24600b57cec5SDimitry Andric assert(BI && "Only branches have partial unswitching."); 24610b57cec5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 && 24620b57cec5SDimitry Andric "Only one possible unswitched block for a branch!"); 24630b57cec5SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second; 24640b57cec5SDimitry Andric // When doing a partial unswitch, we have to do a bit more work to build up 24650b57cec5SDimitry Andric // the branch in the split block. 2466fe6060f1SDimitry Andric if (PartiallyInvariant) 2467fe6060f1SDimitry Andric buildPartialInvariantUnswitchConditionalBranch( 2468fe6060f1SDimitry Andric *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU); 246981ad6265SDimitry Andric else { 247081ad6265SDimitry Andric buildPartialUnswitchConditionalBranch( 247181ad6265SDimitry Andric *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, 247281ad6265SDimitry Andric FreezeLoopUnswitchCond, BI, &AC, DT); 247381ad6265SDimitry Andric } 24740b57cec5SDimitry Andric DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); 24758bcb0991SDimitry Andric 24768bcb0991SDimitry Andric if (MSSAU) { 24778bcb0991SDimitry Andric DT.applyUpdates(DTUpdates); 24788bcb0991SDimitry Andric DTUpdates.clear(); 24798bcb0991SDimitry Andric 24808bcb0991SDimitry Andric // Perform MSSA cloning updates. 24818bcb0991SDimitry Andric for (auto &VMap : VMaps) 24828bcb0991SDimitry Andric MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap, 24838bcb0991SDimitry Andric /*IgnoreIncomingWithNoClones=*/true); 24848bcb0991SDimitry Andric MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT); 24858bcb0991SDimitry Andric } 24860b57cec5SDimitry Andric } 24870b57cec5SDimitry Andric 24880b57cec5SDimitry Andric // Apply the updates accumulated above to get an up-to-date dominator tree. 24890b57cec5SDimitry Andric DT.applyUpdates(DTUpdates); 24900b57cec5SDimitry Andric 24910b57cec5SDimitry Andric // Now that we have an accurate dominator tree, first delete the dead cloned 24920b57cec5SDimitry Andric // blocks so that we can accurately build any cloned loops. It is important to 24930b57cec5SDimitry Andric // not delete the blocks from the original loop yet because we still want to 24940b57cec5SDimitry Andric // reference the original loop to understand the cloned loop's structure. 24950b57cec5SDimitry Andric deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU); 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric // Build the cloned loop structure itself. This may be substantially 24980b57cec5SDimitry Andric // different from the original structure due to the simplified CFG. This also 24990b57cec5SDimitry Andric // handles inserting all the cloned blocks into the correct loops. 25000b57cec5SDimitry Andric SmallVector<Loop *, 4> NonChildClonedLoops; 25010b57cec5SDimitry Andric for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps) 25020b57cec5SDimitry Andric buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops); 25030b57cec5SDimitry Andric 25040b57cec5SDimitry Andric // Now that our cloned loops have been built, we can update the original loop. 25050b57cec5SDimitry Andric // First we delete the dead blocks from it and then we rebuild the loop 25060b57cec5SDimitry Andric // structure taking these deletions into account. 25075f757f3fSDimitry Andric deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, SE, LoopUpdater); 25080b57cec5SDimitry Andric 25090b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 25100b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 25110b57cec5SDimitry Andric 25120b57cec5SDimitry Andric SmallVector<Loop *, 4> HoistedLoops; 2513bdd1243dSDimitry Andric bool IsStillLoop = 2514bdd1243dSDimitry Andric rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops, SE); 25150b57cec5SDimitry Andric 25160b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 25170b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 25180b57cec5SDimitry Andric 25190b57cec5SDimitry Andric // This transformation has a high risk of corrupting the dominator tree, and 25200b57cec5SDimitry Andric // the below steps to rebuild loop structures will result in hard to debug 25210b57cec5SDimitry Andric // errors in that case so verify that the dominator tree is sane first. 25220b57cec5SDimitry Andric // FIXME: Remove this when the bugs stop showing up and rely on existing 25230b57cec5SDimitry Andric // verification steps. 25240b57cec5SDimitry Andric assert(DT.verify(DominatorTree::VerificationLevel::Fast)); 25250b57cec5SDimitry Andric 2526fe6060f1SDimitry Andric if (BI && !PartiallyInvariant) { 25270b57cec5SDimitry Andric // If we unswitched a branch which collapses the condition to a known 25280b57cec5SDimitry Andric // constant we want to replace all the uses of the invariants within both 25290b57cec5SDimitry Andric // the original and cloned blocks. We do this here so that we can use the 25300b57cec5SDimitry Andric // now updated dominator tree to identify which side the users are on. 25310b57cec5SDimitry Andric assert(UnswitchedSuccBBs.size() == 1 && 25320b57cec5SDimitry Andric "Only one possible unswitched block for a branch!"); 25330b57cec5SDimitry Andric BasicBlock *ClonedPH = ClonedPHs.begin()->second; 25340b57cec5SDimitry Andric 25350b57cec5SDimitry Andric // When considering multiple partially-unswitched invariants 25360b57cec5SDimitry Andric // we cant just go replace them with constants in both branches. 25370b57cec5SDimitry Andric // 25380b57cec5SDimitry Andric // For 'AND' we infer that true branch ("continue") means true 25390b57cec5SDimitry Andric // for each invariant operand. 25400b57cec5SDimitry Andric // For 'OR' we can infer that false branch ("continue") means false 25410b57cec5SDimitry Andric // for each invariant operand. 25420b57cec5SDimitry Andric // So it happens that for multiple-partial case we dont replace 25430b57cec5SDimitry Andric // in the unswitched branch. 2544fe6060f1SDimitry Andric bool ReplaceUnswitched = 2545fe6060f1SDimitry Andric FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant; 25460b57cec5SDimitry Andric 25470b57cec5SDimitry Andric ConstantInt *UnswitchedReplacement = 25480b57cec5SDimitry Andric Direction ? ConstantInt::getTrue(BI->getContext()) 25490b57cec5SDimitry Andric : ConstantInt::getFalse(BI->getContext()); 25500b57cec5SDimitry Andric ConstantInt *ContinueReplacement = 25510b57cec5SDimitry Andric Direction ? ConstantInt::getFalse(BI->getContext()) 25520b57cec5SDimitry Andric : ConstantInt::getTrue(BI->getContext()); 2553349cc55cSDimitry Andric for (Value *Invariant : Invariants) { 2554349cc55cSDimitry Andric assert(!isa<Constant>(Invariant) && 2555349cc55cSDimitry Andric "Should not be replacing constant values!"); 2556fe6060f1SDimitry Andric // Use make_early_inc_range here as set invalidates the iterator. 2557fe6060f1SDimitry Andric for (Use &U : llvm::make_early_inc_range(Invariant->uses())) { 2558fe6060f1SDimitry Andric Instruction *UserI = dyn_cast<Instruction>(U.getUser()); 25590b57cec5SDimitry Andric if (!UserI) 25600b57cec5SDimitry Andric continue; 25610b57cec5SDimitry Andric 25620b57cec5SDimitry Andric // Replace it with the 'continue' side if in the main loop body, and the 25630b57cec5SDimitry Andric // unswitched if in the cloned blocks. 25640b57cec5SDimitry Andric if (DT.dominates(LoopPH, UserI->getParent())) 2565fe6060f1SDimitry Andric U.set(ContinueReplacement); 25660b57cec5SDimitry Andric else if (ReplaceUnswitched && 25670b57cec5SDimitry Andric DT.dominates(ClonedPH, UserI->getParent())) 2568fe6060f1SDimitry Andric U.set(UnswitchedReplacement); 25690b57cec5SDimitry Andric } 25700b57cec5SDimitry Andric } 2571349cc55cSDimitry Andric } 25720b57cec5SDimitry Andric 25730b57cec5SDimitry Andric // We can change which blocks are exit blocks of all the cloned sibling 25740b57cec5SDimitry Andric // loops, the current loop, and any parent loops which shared exit blocks 25750b57cec5SDimitry Andric // with the current loop. As a consequence, we need to re-form LCSSA for 25760b57cec5SDimitry Andric // them. But we shouldn't need to re-form LCSSA for any child loops. 25770b57cec5SDimitry Andric // FIXME: This could be made more efficient by tracking which exit blocks are 25780b57cec5SDimitry Andric // new, and focusing on them, but that isn't likely to be necessary. 25790b57cec5SDimitry Andric // 25800b57cec5SDimitry Andric // In order to reasonably rebuild LCSSA we need to walk inside-out across the 25810b57cec5SDimitry Andric // loop nest and update every loop that could have had its exits changed. We 25820b57cec5SDimitry Andric // also need to cover any intervening loops. We add all of these loops to 25830b57cec5SDimitry Andric // a list and sort them by loop depth to achieve this without updating 25840b57cec5SDimitry Andric // unnecessary loops. 25850b57cec5SDimitry Andric auto UpdateLoop = [&](Loop &UpdateL) { 25860b57cec5SDimitry Andric #ifndef NDEBUG 25870b57cec5SDimitry Andric UpdateL.verifyLoop(); 25880b57cec5SDimitry Andric for (Loop *ChildL : UpdateL) { 25890b57cec5SDimitry Andric ChildL->verifyLoop(); 25900b57cec5SDimitry Andric assert(ChildL->isRecursivelyLCSSAForm(DT, LI) && 25910b57cec5SDimitry Andric "Perturbed a child loop's LCSSA form!"); 25920b57cec5SDimitry Andric } 25930b57cec5SDimitry Andric #endif 25940b57cec5SDimitry Andric // First build LCSSA for this loop so that we can preserve it when 25950b57cec5SDimitry Andric // forming dedicated exits. We don't want to perturb some other loop's 25960b57cec5SDimitry Andric // LCSSA while doing that CFG edit. 2597480093f4SDimitry Andric formLCSSA(UpdateL, DT, &LI, SE); 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric // For loops reached by this loop's original exit blocks we may 26000b57cec5SDimitry Andric // introduced new, non-dedicated exits. At least try to re-form dedicated 26010b57cec5SDimitry Andric // exits for these loops. This may fail if they couldn't have dedicated 26020b57cec5SDimitry Andric // exits to start with. 26030b57cec5SDimitry Andric formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true); 26040b57cec5SDimitry Andric }; 26050b57cec5SDimitry Andric 26060b57cec5SDimitry Andric // For non-child cloned loops and hoisted loops, we just need to update LCSSA 26070b57cec5SDimitry Andric // and we can do it in any order as they don't nest relative to each other. 26080b57cec5SDimitry Andric // 26090b57cec5SDimitry Andric // Also check if any of the loops we have updated have become top-level loops 26100b57cec5SDimitry Andric // as that will necessitate widening the outer loop scope. 26110b57cec5SDimitry Andric for (Loop *UpdatedL : 26120b57cec5SDimitry Andric llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) { 26130b57cec5SDimitry Andric UpdateLoop(*UpdatedL); 2614e8d8bef9SDimitry Andric if (UpdatedL->isOutermost()) 26150b57cec5SDimitry Andric OuterExitL = nullptr; 26160b57cec5SDimitry Andric } 26170b57cec5SDimitry Andric if (IsStillLoop) { 26180b57cec5SDimitry Andric UpdateLoop(L); 2619e8d8bef9SDimitry Andric if (L.isOutermost()) 26200b57cec5SDimitry Andric OuterExitL = nullptr; 26210b57cec5SDimitry Andric } 26220b57cec5SDimitry Andric 26230b57cec5SDimitry Andric // If the original loop had exit blocks, walk up through the outer most loop 26240b57cec5SDimitry Andric // of those exit blocks to update LCSSA and form updated dedicated exits. 26250b57cec5SDimitry Andric if (OuterExitL != &L) 26260b57cec5SDimitry Andric for (Loop *OuterL = ParentL; OuterL != OuterExitL; 26270b57cec5SDimitry Andric OuterL = OuterL->getParentLoop()) 26280b57cec5SDimitry Andric UpdateLoop(*OuterL); 26290b57cec5SDimitry Andric 26300b57cec5SDimitry Andric #ifndef NDEBUG 26310b57cec5SDimitry Andric // Verify the entire loop structure to catch any incorrect updates before we 26320b57cec5SDimitry Andric // progress in the pass pipeline. 26330b57cec5SDimitry Andric LI.verify(DT); 26340b57cec5SDimitry Andric #endif 26350b57cec5SDimitry Andric 26360b57cec5SDimitry Andric // Now that we've unswitched something, make callbacks to report the changes. 26370b57cec5SDimitry Andric // For that we need to merge together the updated loops and the cloned loops 26380b57cec5SDimitry Andric // and check whether the original loop survived. 26390b57cec5SDimitry Andric SmallVector<Loop *, 4> SibLoops; 26400b57cec5SDimitry Andric for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) 26410b57cec5SDimitry Andric if (UpdatedL->getParentLoop() == ParentL) 26420b57cec5SDimitry Andric SibLoops.push_back(UpdatedL); 26435f757f3fSDimitry Andric postUnswitch(L, LoopUpdater, LoopName, IsStillLoop, PartiallyInvariant, 26445f757f3fSDimitry Andric InjectedCondition, SibLoops); 26450b57cec5SDimitry Andric 26460b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 26470b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 26480b57cec5SDimitry Andric 26490b57cec5SDimitry Andric if (BI) 26500b57cec5SDimitry Andric ++NumBranches; 26510b57cec5SDimitry Andric else 26520b57cec5SDimitry Andric ++NumSwitches; 26530b57cec5SDimitry Andric } 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric /// Recursively compute the cost of a dominator subtree based on the per-block 26560b57cec5SDimitry Andric /// cost map provided. 26570b57cec5SDimitry Andric /// 26580b57cec5SDimitry Andric /// The recursive computation is memozied into the provided DT-indexed cost map 26590b57cec5SDimitry Andric /// to allow querying it for most nodes in the domtree without it becoming 26600b57cec5SDimitry Andric /// quadratic. 2661fe6060f1SDimitry Andric static InstructionCost computeDomSubtreeCost( 2662fe6060f1SDimitry Andric DomTreeNode &N, 2663fe6060f1SDimitry Andric const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap, 2664fe6060f1SDimitry Andric SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) { 26650b57cec5SDimitry Andric // Don't accumulate cost (or recurse through) blocks not in our block cost 26660b57cec5SDimitry Andric // map and thus not part of the duplication cost being considered. 26670b57cec5SDimitry Andric auto BBCostIt = BBCostMap.find(N.getBlock()); 26680b57cec5SDimitry Andric if (BBCostIt == BBCostMap.end()) 26690b57cec5SDimitry Andric return 0; 26700b57cec5SDimitry Andric 26710b57cec5SDimitry Andric // Lookup this node to see if we already computed its cost. 26720b57cec5SDimitry Andric auto DTCostIt = DTCostMap.find(&N); 26730b57cec5SDimitry Andric if (DTCostIt != DTCostMap.end()) 26740b57cec5SDimitry Andric return DTCostIt->second; 26750b57cec5SDimitry Andric 26760b57cec5SDimitry Andric // If not, we have to compute it. We can't use insert above and update 26770b57cec5SDimitry Andric // because computing the cost may insert more things into the map. 2678fe6060f1SDimitry Andric InstructionCost Cost = std::accumulate( 2679fe6060f1SDimitry Andric N.begin(), N.end(), BBCostIt->second, 2680fe6060f1SDimitry Andric [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost { 26810b57cec5SDimitry Andric return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap); 26820b57cec5SDimitry Andric }); 26830b57cec5SDimitry Andric bool Inserted = DTCostMap.insert({&N, Cost}).second; 26840b57cec5SDimitry Andric (void)Inserted; 26850b57cec5SDimitry Andric assert(Inserted && "Should not insert a node while visiting children!"); 26860b57cec5SDimitry Andric return Cost; 26870b57cec5SDimitry Andric } 26880b57cec5SDimitry Andric 268906c3fb27SDimitry Andric /// Turns a select instruction into implicit control flow branch, 269006c3fb27SDimitry Andric /// making the following replacement: 269106c3fb27SDimitry Andric /// 269206c3fb27SDimitry Andric /// head: 269306c3fb27SDimitry Andric /// --code before select-- 269406c3fb27SDimitry Andric /// select %cond, %trueval, %falseval 269506c3fb27SDimitry Andric /// --code after select-- 269606c3fb27SDimitry Andric /// 269706c3fb27SDimitry Andric /// into 269806c3fb27SDimitry Andric /// 269906c3fb27SDimitry Andric /// head: 270006c3fb27SDimitry Andric /// --code before select-- 270106c3fb27SDimitry Andric /// br i1 %cond, label %then, label %tail 270206c3fb27SDimitry Andric /// 270306c3fb27SDimitry Andric /// then: 270406c3fb27SDimitry Andric /// br %tail 270506c3fb27SDimitry Andric /// 270606c3fb27SDimitry Andric /// tail: 270706c3fb27SDimitry Andric /// phi [ %trueval, %then ], [ %falseval, %head] 270806c3fb27SDimitry Andric /// unreachable 270906c3fb27SDimitry Andric /// 271006c3fb27SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in 271106c3fb27SDimitry Andric /// valid state after this transform. 271206c3fb27SDimitry Andric static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT, 271306c3fb27SDimitry Andric LoopInfo &LI, MemorySSAUpdater *MSSAU, 271406c3fb27SDimitry Andric AssumptionCache *AC) { 271506c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n"); 271606c3fb27SDimitry Andric BasicBlock *HeadBB = SI->getParent(); 271706c3fb27SDimitry Andric 271806c3fb27SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 271906c3fb27SDimitry Andric SplitBlockAndInsertIfThen(SI->getCondition(), SI, false, 272006c3fb27SDimitry Andric SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI); 272106c3fb27SDimitry Andric auto *CondBr = cast<BranchInst>(HeadBB->getTerminator()); 272206c3fb27SDimitry Andric BasicBlock *ThenBB = CondBr->getSuccessor(0), 272306c3fb27SDimitry Andric *TailBB = CondBr->getSuccessor(1); 272406c3fb27SDimitry Andric if (MSSAU) 272506c3fb27SDimitry Andric MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI); 272606c3fb27SDimitry Andric 27270fca6ea1SDimitry Andric PHINode *Phi = 27280fca6ea1SDimitry Andric PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator()); 272906c3fb27SDimitry Andric Phi->addIncoming(SI->getTrueValue(), ThenBB); 273006c3fb27SDimitry Andric Phi->addIncoming(SI->getFalseValue(), HeadBB); 27310fca6ea1SDimitry Andric Phi->setDebugLoc(SI->getDebugLoc()); 273206c3fb27SDimitry Andric SI->replaceAllUsesWith(Phi); 273306c3fb27SDimitry Andric SI->eraseFromParent(); 273406c3fb27SDimitry Andric 273506c3fb27SDimitry Andric if (MSSAU && VerifyMemorySSA) 273606c3fb27SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 273706c3fb27SDimitry Andric 273806c3fb27SDimitry Andric ++NumSelects; 273906c3fb27SDimitry Andric return CondBr; 274006c3fb27SDimitry Andric } 274106c3fb27SDimitry Andric 27420b57cec5SDimitry Andric /// Turns a llvm.experimental.guard intrinsic into implicit control flow branch, 27430b57cec5SDimitry Andric /// making the following replacement: 27440b57cec5SDimitry Andric /// 27450b57cec5SDimitry Andric /// --code before guard-- 27460b57cec5SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] 27470b57cec5SDimitry Andric /// --code after guard-- 27480b57cec5SDimitry Andric /// 27490b57cec5SDimitry Andric /// into 27500b57cec5SDimitry Andric /// 27510b57cec5SDimitry Andric /// --code before guard-- 27520b57cec5SDimitry Andric /// br i1 %cond, label %guarded, label %deopt 27530b57cec5SDimitry Andric /// 27540b57cec5SDimitry Andric /// guarded: 27550b57cec5SDimitry Andric /// --code after guard-- 27560b57cec5SDimitry Andric /// 27570b57cec5SDimitry Andric /// deopt: 27580b57cec5SDimitry Andric /// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ] 27590b57cec5SDimitry Andric /// unreachable 27600b57cec5SDimitry Andric /// 27610b57cec5SDimitry Andric /// It also makes all relevant DT and LI updates, so that all structures are in 27620b57cec5SDimitry Andric /// valid state after this transform. 2763bdd1243dSDimitry Andric static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L, 2764bdd1243dSDimitry Andric DominatorTree &DT, LoopInfo &LI, 2765bdd1243dSDimitry Andric MemorySSAUpdater *MSSAU) { 27660b57cec5SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates; 27670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n"); 27680b57cec5SDimitry Andric BasicBlock *CheckBB = GI->getParent(); 27690b57cec5SDimitry Andric 27700b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 27710b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 27720b57cec5SDimitry Andric 277306c3fb27SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 27740b57cec5SDimitry Andric Instruction *DeoptBlockTerm = 277506c3fb27SDimitry Andric SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true, 277606c3fb27SDimitry Andric GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI); 27770b57cec5SDimitry Andric BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator()); 27780b57cec5SDimitry Andric // SplitBlockAndInsertIfThen inserts control flow that branches to 27790b57cec5SDimitry Andric // DeoptBlockTerm if the condition is true. We want the opposite. 27800b57cec5SDimitry Andric CheckBI->swapSuccessors(); 27810b57cec5SDimitry Andric 27820b57cec5SDimitry Andric BasicBlock *GuardedBlock = CheckBI->getSuccessor(0); 27830b57cec5SDimitry Andric GuardedBlock->setName("guarded"); 27840b57cec5SDimitry Andric CheckBI->getSuccessor(1)->setName("deopt"); 27850b57cec5SDimitry Andric BasicBlock *DeoptBlock = CheckBI->getSuccessor(1); 27860b57cec5SDimitry Andric 27870b57cec5SDimitry Andric if (MSSAU) 27880b57cec5SDimitry Andric MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI); 27890b57cec5SDimitry Andric 27900b57cec5SDimitry Andric GI->moveBefore(DeoptBlockTerm); 27910b57cec5SDimitry Andric GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext())); 27920b57cec5SDimitry Andric 27930b57cec5SDimitry Andric if (MSSAU) { 27940b57cec5SDimitry Andric MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI)); 2795480093f4SDimitry Andric MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator); 27960b57cec5SDimitry Andric if (VerifyMemorySSA) 27970b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 27980b57cec5SDimitry Andric } 27990b57cec5SDimitry Andric 280006c3fb27SDimitry Andric if (VerifyLoopInfo) 280106c3fb27SDimitry Andric LI.verify(DT); 28020b57cec5SDimitry Andric ++NumGuards; 28030b57cec5SDimitry Andric return CheckBI; 28040b57cec5SDimitry Andric } 28050b57cec5SDimitry Andric 28060b57cec5SDimitry Andric /// Cost multiplier is a way to limit potentially exponential behavior 28070b57cec5SDimitry Andric /// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch 28080b57cec5SDimitry Andric /// candidates available. Also accounting for the number of "sibling" loops with 28090b57cec5SDimitry Andric /// the idea to account for previous unswitches that already happened on this 28100b57cec5SDimitry Andric /// cluster of loops. There was an attempt to keep this formula simple, 28110b57cec5SDimitry Andric /// just enough to limit the worst case behavior. Even if it is not that simple 28120b57cec5SDimitry Andric /// now it is still not an attempt to provide a detailed heuristic size 28130b57cec5SDimitry Andric /// prediction. 28140b57cec5SDimitry Andric /// 28150b57cec5SDimitry Andric /// TODO: Make a proper accounting of "explosion" effect for all kinds of 28160b57cec5SDimitry Andric /// unswitch candidates, making adequate predictions instead of wild guesses. 28170b57cec5SDimitry Andric /// That requires knowing not just the number of "remaining" candidates but 28180b57cec5SDimitry Andric /// also costs of unswitching for each of these candidates. 28195ffd83dbSDimitry Andric static int CalculateUnswitchCostMultiplier( 2820bdd1243dSDimitry Andric const Instruction &TI, const Loop &L, const LoopInfo &LI, 2821bdd1243dSDimitry Andric const DominatorTree &DT, 2822bdd1243dSDimitry Andric ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates) { 28230b57cec5SDimitry Andric 28240b57cec5SDimitry Andric // Guards and other exiting conditions do not contribute to exponential 28250b57cec5SDimitry Andric // explosion as soon as they dominate the latch (otherwise there might be 28260b57cec5SDimitry Andric // another path to the latch remaining that does not allow to eliminate the 28270b57cec5SDimitry Andric // loop copy on unswitch). 2828bdd1243dSDimitry Andric const BasicBlock *Latch = L.getLoopLatch(); 2829bdd1243dSDimitry Andric const BasicBlock *CondBlock = TI.getParent(); 28300b57cec5SDimitry Andric if (DT.dominates(CondBlock, Latch) && 28310b57cec5SDimitry Andric (isGuard(&TI) || 283206c3fb27SDimitry Andric (TI.isTerminator() && 2833bdd1243dSDimitry Andric llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) { 28340b57cec5SDimitry Andric return L.contains(SuccBB); 283506c3fb27SDimitry Andric }) <= 1))) { 28360b57cec5SDimitry Andric NumCostMultiplierSkipped++; 28370b57cec5SDimitry Andric return 1; 28380b57cec5SDimitry Andric } 28390b57cec5SDimitry Andric 28400b57cec5SDimitry Andric auto *ParentL = L.getParentLoop(); 28410b57cec5SDimitry Andric int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size() 28420b57cec5SDimitry Andric : std::distance(LI.begin(), LI.end())); 28430b57cec5SDimitry Andric // Count amount of clones that all the candidates might cause during 284406c3fb27SDimitry Andric // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its 284506c3fb27SDimitry Andric // cases. 28460b57cec5SDimitry Andric int UnswitchedClones = 0; 284706c3fb27SDimitry Andric for (const auto &Candidate : UnswitchCandidates) { 2848bdd1243dSDimitry Andric const Instruction *CI = Candidate.TI; 2849bdd1243dSDimitry Andric const BasicBlock *CondBlock = CI->getParent(); 28500b57cec5SDimitry Andric bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch); 285106c3fb27SDimitry Andric if (isa<SelectInst>(CI)) { 285206c3fb27SDimitry Andric UnswitchedClones++; 285306c3fb27SDimitry Andric continue; 285406c3fb27SDimitry Andric } 28550b57cec5SDimitry Andric if (isGuard(CI)) { 28560b57cec5SDimitry Andric if (!SkipExitingSuccessors) 28570b57cec5SDimitry Andric UnswitchedClones++; 28580b57cec5SDimitry Andric continue; 28590b57cec5SDimitry Andric } 2860bdd1243dSDimitry Andric int NonExitingSuccessors = 2861bdd1243dSDimitry Andric llvm::count_if(successors(CondBlock), 2862bdd1243dSDimitry Andric [SkipExitingSuccessors, &L](const BasicBlock *SuccBB) { 28630b57cec5SDimitry Andric return !SkipExitingSuccessors || L.contains(SuccBB); 28640b57cec5SDimitry Andric }); 28650b57cec5SDimitry Andric UnswitchedClones += Log2_32(NonExitingSuccessors); 28660b57cec5SDimitry Andric } 28670b57cec5SDimitry Andric 28680b57cec5SDimitry Andric // Ignore up to the "unscaled candidates" number of unswitch candidates 28690b57cec5SDimitry Andric // when calculating the power-of-two scaling of the cost. The main idea 28700b57cec5SDimitry Andric // with this control is to allow a small number of unswitches to happen 28710b57cec5SDimitry Andric // and rely more on siblings multiplier (see below) when the number 28720b57cec5SDimitry Andric // of candidates is small. 28730b57cec5SDimitry Andric unsigned ClonesPower = 28740b57cec5SDimitry Andric std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0); 28750b57cec5SDimitry Andric 28760b57cec5SDimitry Andric // Allowing top-level loops to spread a bit more than nested ones. 28770b57cec5SDimitry Andric int SiblingsMultiplier = 28780b57cec5SDimitry Andric std::max((ParentL ? SiblingsCount 28790b57cec5SDimitry Andric : SiblingsCount / (int)UnswitchSiblingsToplevelDiv), 28800b57cec5SDimitry Andric 1); 28810b57cec5SDimitry Andric // Compute the cost multiplier in a way that won't overflow by saturating 28820b57cec5SDimitry Andric // at an upper bound. 28830b57cec5SDimitry Andric int CostMultiplier; 28840b57cec5SDimitry Andric if (ClonesPower > Log2_32(UnswitchThreshold) || 28850b57cec5SDimitry Andric SiblingsMultiplier > UnswitchThreshold) 28860b57cec5SDimitry Andric CostMultiplier = UnswitchThreshold; 28870b57cec5SDimitry Andric else 28880b57cec5SDimitry Andric CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower), 28890b57cec5SDimitry Andric (int)UnswitchThreshold); 28900b57cec5SDimitry Andric 28910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplier 28920b57cec5SDimitry Andric << " (siblings " << SiblingsMultiplier << " * clones " 28930b57cec5SDimitry Andric << (1 << ClonesPower) << ")" 28940b57cec5SDimitry Andric << " for unswitch candidate: " << TI << "\n"); 28950b57cec5SDimitry Andric return CostMultiplier; 28960b57cec5SDimitry Andric } 28970b57cec5SDimitry Andric 2898bdd1243dSDimitry Andric static bool collectUnswitchCandidates( 2899bdd1243dSDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, 2900bdd1243dSDimitry Andric IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, 2901bdd1243dSDimitry Andric const Loop &L, const LoopInfo &LI, AAResults &AA, 2902bdd1243dSDimitry Andric const MemorySSAUpdater *MSSAU) { 2903bdd1243dSDimitry Andric assert(UnswitchCandidates.empty() && "Should be!"); 290406c3fb27SDimitry Andric 290506c3fb27SDimitry Andric auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) { 290606c3fb27SDimitry Andric Cond = skipTrivialSelect(Cond); 290706c3fb27SDimitry Andric if (isa<Constant>(Cond)) 290806c3fb27SDimitry Andric return; 290906c3fb27SDimitry Andric if (L.isLoopInvariant(Cond)) { 291006c3fb27SDimitry Andric UnswitchCandidates.push_back({I, {Cond}}); 291106c3fb27SDimitry Andric return; 291206c3fb27SDimitry Andric } 291306c3fb27SDimitry Andric if (match(Cond, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) { 291406c3fb27SDimitry Andric TinyPtrVector<Value *> Invariants = 291506c3fb27SDimitry Andric collectHomogenousInstGraphLoopInvariants( 291606c3fb27SDimitry Andric L, *static_cast<Instruction *>(Cond), LI); 291706c3fb27SDimitry Andric if (!Invariants.empty()) 291806c3fb27SDimitry Andric UnswitchCandidates.push_back({I, std::move(Invariants)}); 291906c3fb27SDimitry Andric } 292006c3fb27SDimitry Andric }; 292106c3fb27SDimitry Andric 29220b57cec5SDimitry Andric // Whether or not we should also collect guards in the loop. 29230b57cec5SDimitry Andric bool CollectGuards = false; 29240b57cec5SDimitry Andric if (UnswitchGuards) { 29250b57cec5SDimitry Andric auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction( 29260b57cec5SDimitry Andric Intrinsic::getName(Intrinsic::experimental_guard)); 29270b57cec5SDimitry Andric if (GuardDecl && !GuardDecl->use_empty()) 29280b57cec5SDimitry Andric CollectGuards = true; 29290b57cec5SDimitry Andric } 29300b57cec5SDimitry Andric 29310b57cec5SDimitry Andric for (auto *BB : L.blocks()) { 29320b57cec5SDimitry Andric if (LI.getLoopFor(BB) != &L) 29330b57cec5SDimitry Andric continue; 29340b57cec5SDimitry Andric 293506c3fb27SDimitry Andric for (auto &I : *BB) { 293606c3fb27SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(&I)) { 293706c3fb27SDimitry Andric auto *Cond = SI->getCondition(); 293806c3fb27SDimitry Andric // Do not unswitch vector selects and logical and/or selects 293906c3fb27SDimitry Andric if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1)) 294006c3fb27SDimitry Andric AddUnswitchCandidatesForInst(SI, Cond); 294106c3fb27SDimitry Andric } else if (CollectGuards && isGuard(&I)) { 2942bdd1243dSDimitry Andric auto *Cond = 2943bdd1243dSDimitry Andric skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0)); 29440b57cec5SDimitry Andric // TODO: Support AND, OR conditions and partial unswitching. 29450b57cec5SDimitry Andric if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond)) 29460b57cec5SDimitry Andric UnswitchCandidates.push_back({&I, {Cond}}); 29470b57cec5SDimitry Andric } 294806c3fb27SDimitry Andric } 29490b57cec5SDimitry Andric 29500b57cec5SDimitry Andric if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { 29510b57cec5SDimitry Andric // We can only consider fully loop-invariant switch conditions as we need 29520b57cec5SDimitry Andric // to completely eliminate the switch after unswitching. 29530b57cec5SDimitry Andric if (!isa<Constant>(SI->getCondition()) && 29540b57cec5SDimitry Andric L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor()) 29550b57cec5SDimitry Andric UnswitchCandidates.push_back({SI, {SI->getCondition()}}); 29560b57cec5SDimitry Andric continue; 29570b57cec5SDimitry Andric } 29580b57cec5SDimitry Andric 29590b57cec5SDimitry Andric auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); 296006c3fb27SDimitry Andric if (!BI || !BI->isConditional() || 29610b57cec5SDimitry Andric BI->getSuccessor(0) == BI->getSuccessor(1)) 29620b57cec5SDimitry Andric continue; 29630b57cec5SDimitry Andric 296406c3fb27SDimitry Andric AddUnswitchCandidatesForInst(BI, BI->getCondition()); 2965fe6060f1SDimitry Andric } 2966fe6060f1SDimitry Andric 2967fe6060f1SDimitry Andric if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") && 2968fe6060f1SDimitry Andric !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) { 2969bdd1243dSDimitry Andric return TerminatorAndInvariants.TI == L.getHeader()->getTerminator(); 2970fe6060f1SDimitry Andric })) { 2971fe6060f1SDimitry Andric MemorySSA *MSSA = MSSAU->getMemorySSA(); 2972fe6060f1SDimitry Andric if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) { 2973fe6060f1SDimitry Andric LLVM_DEBUG( 2974fe6060f1SDimitry Andric dbgs() << "simple-loop-unswitch: Found partially invariant condition " 2975fe6060f1SDimitry Andric << *Info->InstToDuplicate[0] << "\n"); 2976fe6060f1SDimitry Andric PartialIVInfo = *Info; 2977fe6060f1SDimitry Andric PartialIVCondBranch = L.getHeader()->getTerminator(); 2978fe6060f1SDimitry Andric TinyPtrVector<Value *> ValsToDuplicate; 297981ad6265SDimitry Andric llvm::append_range(ValsToDuplicate, Info->InstToDuplicate); 2980fe6060f1SDimitry Andric UnswitchCandidates.push_back( 2981fe6060f1SDimitry Andric {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)}); 2982fe6060f1SDimitry Andric } 29830b57cec5SDimitry Andric } 2984bdd1243dSDimitry Andric return !UnswitchCandidates.empty(); 2985bdd1243dSDimitry Andric } 29860b57cec5SDimitry Andric 298706c3fb27SDimitry Andric /// Tries to canonicalize condition described by: 298806c3fb27SDimitry Andric /// 298906c3fb27SDimitry Andric /// br (LHS pred RHS), label IfTrue, label IfFalse 299006c3fb27SDimitry Andric /// 299106c3fb27SDimitry Andric /// into its equivalent where `Pred` is something that we support for injected 299206c3fb27SDimitry Andric /// invariants (so far it is limited to ult), LHS in canonicalized form is 299306c3fb27SDimitry Andric /// non-invariant and RHS is an invariant. 299406c3fb27SDimitry Andric static void canonicalizeForInvariantConditionInjection( 299506c3fb27SDimitry Andric ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue, 299606c3fb27SDimitry Andric BasicBlock *&IfFalse, const Loop &L) { 299706c3fb27SDimitry Andric if (!L.contains(IfTrue)) { 299806c3fb27SDimitry Andric Pred = ICmpInst::getInversePredicate(Pred); 299906c3fb27SDimitry Andric std::swap(IfTrue, IfFalse); 300006c3fb27SDimitry Andric } 300106c3fb27SDimitry Andric 300206c3fb27SDimitry Andric // Move loop-invariant argument to RHS position. 300306c3fb27SDimitry Andric if (L.isLoopInvariant(LHS)) { 300406c3fb27SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred); 300506c3fb27SDimitry Andric std::swap(LHS, RHS); 300606c3fb27SDimitry Andric } 300706c3fb27SDimitry Andric 300806c3fb27SDimitry Andric if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) { 300906c3fb27SDimitry Andric // Turn "x >=s 0" into "x <u UMIN_INT" 301006c3fb27SDimitry Andric Pred = ICmpInst::ICMP_ULT; 301106c3fb27SDimitry Andric RHS = ConstantInt::get( 301206c3fb27SDimitry Andric RHS->getContext(), 301306c3fb27SDimitry Andric APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth())); 301406c3fb27SDimitry Andric } 301506c3fb27SDimitry Andric } 301606c3fb27SDimitry Andric 301706c3fb27SDimitry Andric /// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS ) 301806c3fb27SDimitry Andric /// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by 301906c3fb27SDimitry Andric /// injecting a loop-invariant condition. 302006c3fb27SDimitry Andric static bool shouldTryInjectInvariantCondition( 302106c3fb27SDimitry Andric const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS, 302206c3fb27SDimitry Andric const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) { 302306c3fb27SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) 302406c3fb27SDimitry Andric return false; 302506c3fb27SDimitry Andric // TODO: Support other predicates. 302606c3fb27SDimitry Andric if (Pred != ICmpInst::ICMP_ULT) 302706c3fb27SDimitry Andric return false; 302806c3fb27SDimitry Andric // TODO: Support non-loop-exiting branches? 302906c3fb27SDimitry Andric if (!L.contains(IfTrue) || L.contains(IfFalse)) 303006c3fb27SDimitry Andric return false; 303106c3fb27SDimitry Andric // FIXME: For some reason this causes problems with MSSA updates, need to 303206c3fb27SDimitry Andric // investigate why. So far, just don't unswitch latch. 303306c3fb27SDimitry Andric if (L.getHeader() == IfTrue) 303406c3fb27SDimitry Andric return false; 303506c3fb27SDimitry Andric return true; 303606c3fb27SDimitry Andric } 303706c3fb27SDimitry Andric 303806c3fb27SDimitry Andric /// Returns true, if metadata on \p BI allows us to optimize branching into \p 303906c3fb27SDimitry Andric /// TakenSucc via injection of invariant conditions. The branch should be not 304006c3fb27SDimitry Andric /// enough and not previously unswitched, the information about this comes from 304106c3fb27SDimitry Andric /// the metadata. 304206c3fb27SDimitry Andric bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, 304306c3fb27SDimitry Andric const BasicBlock *TakenSucc) { 304406c3fb27SDimitry Andric SmallVector<uint32_t> Weights; 304506c3fb27SDimitry Andric if (!extractBranchWeights(*BI, Weights)) 304606c3fb27SDimitry Andric return false; 304706c3fb27SDimitry Andric unsigned T = InjectInvariantConditionHotnesThreshold; 304806c3fb27SDimitry Andric BranchProbability LikelyTaken(T - 1, T); 304906c3fb27SDimitry Andric 305006c3fb27SDimitry Andric assert(Weights.size() == 2 && "Unexpected profile data!"); 305106c3fb27SDimitry Andric size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1; 305206c3fb27SDimitry Andric auto Num = Weights[Idx]; 305306c3fb27SDimitry Andric auto Denom = Weights[0] + Weights[1]; 305406c3fb27SDimitry Andric // Degenerate or overflowed metadata. 305506c3fb27SDimitry Andric if (Denom == 0 || Num > Denom) 305606c3fb27SDimitry Andric return false; 305706c3fb27SDimitry Andric BranchProbability ActualTaken(Num, Denom); 305806c3fb27SDimitry Andric if (LikelyTaken > ActualTaken) 305906c3fb27SDimitry Andric return false; 306006c3fb27SDimitry Andric return true; 306106c3fb27SDimitry Andric } 306206c3fb27SDimitry Andric 306306c3fb27SDimitry Andric /// Materialize pending invariant condition of the given candidate into IR. The 306406c3fb27SDimitry Andric /// injected loop-invariant condition implies the original loop-variant branch 306506c3fb27SDimitry Andric /// condition, so the materialization turns 306606c3fb27SDimitry Andric /// 306706c3fb27SDimitry Andric /// loop_block: 306806c3fb27SDimitry Andric /// ... 306906c3fb27SDimitry Andric /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc 307006c3fb27SDimitry Andric /// 307106c3fb27SDimitry Andric /// into 307206c3fb27SDimitry Andric /// 307306c3fb27SDimitry Andric /// preheader: 307406c3fb27SDimitry Andric /// %invariant_cond = LHS pred RHS 307506c3fb27SDimitry Andric /// ... 307606c3fb27SDimitry Andric /// loop_block: 307706c3fb27SDimitry Andric /// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck 307806c3fb27SDimitry Andric /// OriginalCheck: 307906c3fb27SDimitry Andric /// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc 308006c3fb27SDimitry Andric /// ... 308106c3fb27SDimitry Andric static NonTrivialUnswitchCandidate 308206c3fb27SDimitry Andric injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, 308306c3fb27SDimitry Andric DominatorTree &DT, LoopInfo &LI, 308406c3fb27SDimitry Andric AssumptionCache &AC, MemorySSAUpdater *MSSAU) { 308506c3fb27SDimitry Andric assert(Candidate.hasPendingInjection() && "Nothing to inject!"); 308606c3fb27SDimitry Andric BasicBlock *Preheader = L.getLoopPreheader(); 308706c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplified form?"); 308806c3fb27SDimitry Andric assert(LI.getLoopFor(Candidate.TI->getParent()) == &L && 308906c3fb27SDimitry Andric "Unswitching branch of inner loop!"); 309006c3fb27SDimitry Andric 309106c3fb27SDimitry Andric auto Pred = Candidate.PendingInjection->Pred; 309206c3fb27SDimitry Andric auto *LHS = Candidate.PendingInjection->LHS; 309306c3fb27SDimitry Andric auto *RHS = Candidate.PendingInjection->RHS; 309406c3fb27SDimitry Andric auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc; 309506c3fb27SDimitry Andric auto *TI = cast<BranchInst>(Candidate.TI); 309606c3fb27SDimitry Andric auto *BB = Candidate.TI->getParent(); 309706c3fb27SDimitry Andric auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1) 309806c3fb27SDimitry Andric : TI->getSuccessor(0); 309906c3fb27SDimitry Andric // FIXME: Remove this once limitation on successors is lifted. 310006c3fb27SDimitry Andric assert(L.contains(InLoopSucc) && "Not supported yet!"); 310106c3fb27SDimitry Andric assert(!L.contains(OutOfLoopSucc) && "Not supported yet!"); 310206c3fb27SDimitry Andric auto &Ctx = BB->getContext(); 310306c3fb27SDimitry Andric 310406c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 310506c3fb27SDimitry Andric assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!"); 310606c3fb27SDimitry Andric if (LHS->getType() != RHS->getType()) { 310706c3fb27SDimitry Andric if (LHS->getType()->getIntegerBitWidth() < 310806c3fb27SDimitry Andric RHS->getType()->getIntegerBitWidth()) 310906c3fb27SDimitry Andric LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide"); 311006c3fb27SDimitry Andric else 311106c3fb27SDimitry Andric RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide"); 311206c3fb27SDimitry Andric } 311306c3fb27SDimitry Andric // Do not use builder here: CreateICmp may simplify this into a constant and 311406c3fb27SDimitry Andric // unswitching will break. Better optimize it away later. 311506c3fb27SDimitry Andric auto *InjectedCond = 311606c3fb27SDimitry Andric ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond", 31170fca6ea1SDimitry Andric Preheader->getTerminator()->getIterator()); 311806c3fb27SDimitry Andric 311906c3fb27SDimitry Andric BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check", 312006c3fb27SDimitry Andric BB->getParent(), InLoopSucc); 312106c3fb27SDimitry Andric Builder.SetInsertPoint(TI); 312206c3fb27SDimitry Andric auto *InvariantBr = 312306c3fb27SDimitry Andric Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); 312406c3fb27SDimitry Andric 312506c3fb27SDimitry Andric Builder.SetInsertPoint(CheckBlock); 31264542f901SDimitry Andric Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0), 31274542f901SDimitry Andric TI->getSuccessor(1)); 312806c3fb27SDimitry Andric TI->eraseFromParent(); 312906c3fb27SDimitry Andric 313006c3fb27SDimitry Andric // Fixup phis. 313106c3fb27SDimitry Andric for (auto &I : *InLoopSucc) { 313206c3fb27SDimitry Andric auto *PN = dyn_cast<PHINode>(&I); 313306c3fb27SDimitry Andric if (!PN) 313406c3fb27SDimitry Andric break; 313506c3fb27SDimitry Andric auto *Inc = PN->getIncomingValueForBlock(BB); 313606c3fb27SDimitry Andric PN->addIncoming(Inc, CheckBlock); 313706c3fb27SDimitry Andric } 313806c3fb27SDimitry Andric OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock); 313906c3fb27SDimitry Andric 314006c3fb27SDimitry Andric SmallVector<DominatorTree::UpdateType, 4> DTUpdates = { 314106c3fb27SDimitry Andric { DominatorTree::Insert, BB, CheckBlock }, 314206c3fb27SDimitry Andric { DominatorTree::Insert, CheckBlock, InLoopSucc }, 314306c3fb27SDimitry Andric { DominatorTree::Insert, CheckBlock, OutOfLoopSucc }, 314406c3fb27SDimitry Andric { DominatorTree::Delete, BB, OutOfLoopSucc } 314506c3fb27SDimitry Andric }; 314606c3fb27SDimitry Andric 314706c3fb27SDimitry Andric DT.applyUpdates(DTUpdates); 314806c3fb27SDimitry Andric if (MSSAU) 314906c3fb27SDimitry Andric MSSAU->applyUpdates(DTUpdates, DT); 315006c3fb27SDimitry Andric L.addBasicBlockToLoop(CheckBlock, LI); 315106c3fb27SDimitry Andric 315206c3fb27SDimitry Andric #ifndef NDEBUG 315306c3fb27SDimitry Andric DT.verify(); 315406c3fb27SDimitry Andric LI.verify(DT); 315506c3fb27SDimitry Andric if (MSSAU && VerifyMemorySSA) 315606c3fb27SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 315706c3fb27SDimitry Andric #endif 315806c3fb27SDimitry Andric 315906c3fb27SDimitry Andric // TODO: In fact, cost of unswitching a new invariant candidate is *slightly* 316006c3fb27SDimitry Andric // higher because we have just inserted a new block. Need to think how to 316106c3fb27SDimitry Andric // adjust the cost of injected candidates when it was first computed. 316206c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr 316306c3fb27SDimitry Andric << " and considering it for unswitching."); 316406c3fb27SDimitry Andric ++NumInvariantConditionsInjected; 316506c3fb27SDimitry Andric return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond }, 316606c3fb27SDimitry Andric Candidate.Cost); 316706c3fb27SDimitry Andric } 316806c3fb27SDimitry Andric 316906c3fb27SDimitry Andric /// Given chain of loop branch conditions looking like: 317006c3fb27SDimitry Andric /// br (Variant < Invariant1) 317106c3fb27SDimitry Andric /// br (Variant < Invariant2) 317206c3fb27SDimitry Andric /// br (Variant < Invariant3) 317306c3fb27SDimitry Andric /// ... 317406c3fb27SDimitry Andric /// collect set of invariant conditions on which we want to unswitch, which 317506c3fb27SDimitry Andric /// look like: 317606c3fb27SDimitry Andric /// Invariant1 <= Invariant2 317706c3fb27SDimitry Andric /// Invariant2 <= Invariant3 317806c3fb27SDimitry Andric /// ... 317906c3fb27SDimitry Andric /// Though they might not immediately exist in the IR, we can still inject them. 318006c3fb27SDimitry Andric static bool insertCandidatesWithPendingInjections( 318106c3fb27SDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L, 318206c3fb27SDimitry Andric ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares, 318306c3fb27SDimitry Andric const DominatorTree &DT) { 318406c3fb27SDimitry Andric 318506c3fb27SDimitry Andric assert(ICmpInst::isRelational(Pred)); 318606c3fb27SDimitry Andric assert(ICmpInst::isStrictPredicate(Pred)); 318706c3fb27SDimitry Andric if (Compares.size() < 2) 318806c3fb27SDimitry Andric return false; 318906c3fb27SDimitry Andric ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred); 319006c3fb27SDimitry Andric for (auto Prev = Compares.begin(), Next = Compares.begin() + 1; 319106c3fb27SDimitry Andric Next != Compares.end(); ++Prev, ++Next) { 319206c3fb27SDimitry Andric Value *LHS = Next->Invariant; 319306c3fb27SDimitry Andric Value *RHS = Prev->Invariant; 319406c3fb27SDimitry Andric BasicBlock *InLoopSucc = Prev->InLoopSucc; 319506c3fb27SDimitry Andric InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc); 319606c3fb27SDimitry Andric NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS }, 319706c3fb27SDimitry Andric std::nullopt, std::move(ToInject)); 319806c3fb27SDimitry Andric UnswitchCandidates.push_back(std::move(Candidate)); 319906c3fb27SDimitry Andric } 320006c3fb27SDimitry Andric return true; 320106c3fb27SDimitry Andric } 320206c3fb27SDimitry Andric 320306c3fb27SDimitry Andric /// Collect unswitch candidates by invariant conditions that are not immediately 320406c3fb27SDimitry Andric /// present in the loop. However, they can be injected into the code if we 320506c3fb27SDimitry Andric /// decide it's profitable. 320606c3fb27SDimitry Andric /// An example of such conditions is following: 320706c3fb27SDimitry Andric /// 320806c3fb27SDimitry Andric /// for (...) { 320906c3fb27SDimitry Andric /// x = load ... 321006c3fb27SDimitry Andric /// if (! x <u C1) break; 321106c3fb27SDimitry Andric /// if (! x <u C2) break; 321206c3fb27SDimitry Andric /// <do something> 321306c3fb27SDimitry Andric /// } 321406c3fb27SDimitry Andric /// 321506c3fb27SDimitry Andric /// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <= 321606c3fb27SDimitry Andric /// C2" automatically implies "x <u C2", so we can get rid of one of 321706c3fb27SDimitry Andric /// loop-variant checks in unswitched loop version. 321806c3fb27SDimitry Andric static bool collectUnswitchCandidatesWithInjections( 321906c3fb27SDimitry Andric SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, 322006c3fb27SDimitry Andric IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L, 322106c3fb27SDimitry Andric const DominatorTree &DT, const LoopInfo &LI, AAResults &AA, 322206c3fb27SDimitry Andric const MemorySSAUpdater *MSSAU) { 322306c3fb27SDimitry Andric if (!InjectInvariantConditions) 322406c3fb27SDimitry Andric return false; 322506c3fb27SDimitry Andric 322606c3fb27SDimitry Andric if (!DT.isReachableFromEntry(L.getHeader())) 322706c3fb27SDimitry Andric return false; 322806c3fb27SDimitry Andric auto *Latch = L.getLoopLatch(); 322906c3fb27SDimitry Andric // Need to have a single latch and a preheader. 323006c3fb27SDimitry Andric if (!Latch) 323106c3fb27SDimitry Andric return false; 323206c3fb27SDimitry Andric assert(L.getLoopPreheader() && "Must have a preheader!"); 323306c3fb27SDimitry Andric 323406c3fb27SDimitry Andric DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT; 323506c3fb27SDimitry Andric // Traverse the conditions that dominate latch (and therefore dominate each 323606c3fb27SDimitry Andric // other). 323706c3fb27SDimitry Andric for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock()); 323806c3fb27SDimitry Andric DTN = DTN->getIDom()) { 323906c3fb27SDimitry Andric ICmpInst::Predicate Pred; 324006c3fb27SDimitry Andric Value *LHS = nullptr, *RHS = nullptr; 324106c3fb27SDimitry Andric BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; 324206c3fb27SDimitry Andric auto *BB = DTN->getBlock(); 324306c3fb27SDimitry Andric // Ignore inner loops. 324406c3fb27SDimitry Andric if (LI.getLoopFor(BB) != &L) 324506c3fb27SDimitry Andric continue; 324606c3fb27SDimitry Andric auto *Term = BB->getTerminator(); 324706c3fb27SDimitry Andric if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 324806c3fb27SDimitry Andric m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) 324906c3fb27SDimitry Andric continue; 325006c3fb27SDimitry Andric if (!LHS->getType()->isIntegerTy()) 325106c3fb27SDimitry Andric continue; 325206c3fb27SDimitry Andric canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse, 325306c3fb27SDimitry Andric L); 325406c3fb27SDimitry Andric if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L)) 325506c3fb27SDimitry Andric continue; 325606c3fb27SDimitry Andric if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue)) 325706c3fb27SDimitry Andric continue; 325806c3fb27SDimitry Andric // Strip ZEXT for unsigned predicate. 325906c3fb27SDimitry Andric // TODO: once signed predicates are supported, also strip SEXT. 326006c3fb27SDimitry Andric CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue); 326106c3fb27SDimitry Andric while (auto *Zext = dyn_cast<ZExtInst>(LHS)) 326206c3fb27SDimitry Andric LHS = Zext->getOperand(0); 326306c3fb27SDimitry Andric CandidatesULT[LHS].push_back(Desc); 326406c3fb27SDimitry Andric } 326506c3fb27SDimitry Andric 326606c3fb27SDimitry Andric bool Found = false; 326706c3fb27SDimitry Andric for (auto &It : CandidatesULT) 326806c3fb27SDimitry Andric Found |= insertCandidatesWithPendingInjections( 326906c3fb27SDimitry Andric UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT); 327006c3fb27SDimitry Andric return Found; 327106c3fb27SDimitry Andric } 327206c3fb27SDimitry Andric 3273bdd1243dSDimitry Andric static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { 3274bdd1243dSDimitry Andric if (!L.isSafeToClone()) 32750b57cec5SDimitry Andric return false; 3276bdd1243dSDimitry Andric for (auto *BB : L.blocks()) 3277bdd1243dSDimitry Andric for (auto &I : *BB) { 3278bdd1243dSDimitry Andric if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) 3279bdd1243dSDimitry Andric return false; 3280bdd1243dSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 3281bdd1243dSDimitry Andric assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone()."); 3282bdd1243dSDimitry Andric if (CB->isConvergent()) 3283bdd1243dSDimitry Andric return false; 3284bdd1243dSDimitry Andric } 3285bdd1243dSDimitry Andric } 32860b57cec5SDimitry Andric 32870b57cec5SDimitry Andric // Check if there are irreducible CFG cycles in this loop. If so, we cannot 32880b57cec5SDimitry Andric // easily unswitch non-trivial edges out of the loop. Doing so might turn the 32890b57cec5SDimitry Andric // irreducible control flow into reducible control flow and introduce new 32900b57cec5SDimitry Andric // loops "out of thin air". If we ever discover important use cases for doing 32910b57cec5SDimitry Andric // this, we can add support to loop unswitch, but it is a lot of complexity 32920b57cec5SDimitry Andric // for what seems little or no real world benefit. 32930b57cec5SDimitry Andric LoopBlocksRPO RPOT(&L); 32940b57cec5SDimitry Andric RPOT.perform(&LI); 32950b57cec5SDimitry Andric if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) 32960b57cec5SDimitry Andric return false; 32970b57cec5SDimitry Andric 32980b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ExitBlocks; 32990b57cec5SDimitry Andric L.getUniqueExitBlocks(ExitBlocks); 3300fe6060f1SDimitry Andric // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch 3301fe6060f1SDimitry Andric // instruction as we don't know how to split those exit blocks. 33020b57cec5SDimitry Andric // FIXME: We should teach SplitBlock to handle this and remove this 33030b57cec5SDimitry Andric // restriction. 3304fe6060f1SDimitry Andric for (auto *ExitBB : ExitBlocks) { 3305fe6060f1SDimitry Andric auto *I = ExitBB->getFirstNonPHI(); 3306fe6060f1SDimitry Andric if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) { 3307fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch " 3308fe6060f1SDimitry Andric "in exit block\n"); 33090b57cec5SDimitry Andric return false; 33100b57cec5SDimitry Andric } 3311fe6060f1SDimitry Andric } 33120b57cec5SDimitry Andric 3313bdd1243dSDimitry Andric return true; 3314bdd1243dSDimitry Andric } 33150b57cec5SDimitry Andric 3316bdd1243dSDimitry Andric static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate( 3317bdd1243dSDimitry Andric ArrayRef<NonTrivialUnswitchCandidate> UnswitchCandidates, const Loop &L, 3318bdd1243dSDimitry Andric const DominatorTree &DT, const LoopInfo &LI, AssumptionCache &AC, 3319bdd1243dSDimitry Andric const TargetTransformInfo &TTI, const IVConditionInfo &PartialIVInfo) { 33200b57cec5SDimitry Andric // Given that unswitching these terminators will require duplicating parts of 33210b57cec5SDimitry Andric // the loop, so we need to be able to model that cost. Compute the ephemeral 33220b57cec5SDimitry Andric // values and set up a data structure to hold per-BB costs. We cache each 33230b57cec5SDimitry Andric // block's cost so that we don't recompute this when considering different 33240b57cec5SDimitry Andric // subsets of the loop for duplication during unswitching. 33250b57cec5SDimitry Andric SmallPtrSet<const Value *, 4> EphValues; 33260b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(&L, &AC, EphValues); 3327fe6060f1SDimitry Andric SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap; 33280b57cec5SDimitry Andric 33290b57cec5SDimitry Andric // Compute the cost of each block, as well as the total loop cost. Also, bail 33300b57cec5SDimitry Andric // out if we see instructions which are incompatible with loop unswitching 33310b57cec5SDimitry Andric // (convergent, noduplicate, or cross-basic-block tokens). 33320b57cec5SDimitry Andric // FIXME: We might be able to safely handle some of these in non-duplicated 33330b57cec5SDimitry Andric // regions. 3334e8d8bef9SDimitry Andric TargetTransformInfo::TargetCostKind CostKind = 3335e8d8bef9SDimitry Andric L.getHeader()->getParent()->hasMinSize() 3336e8d8bef9SDimitry Andric ? TargetTransformInfo::TCK_CodeSize 3337e8d8bef9SDimitry Andric : TargetTransformInfo::TCK_SizeAndLatency; 3338fe6060f1SDimitry Andric InstructionCost LoopCost = 0; 33390b57cec5SDimitry Andric for (auto *BB : L.blocks()) { 3340fe6060f1SDimitry Andric InstructionCost Cost = 0; 33410b57cec5SDimitry Andric for (auto &I : *BB) { 33420b57cec5SDimitry Andric if (EphValues.count(&I)) 33430b57cec5SDimitry Andric continue; 3344bdd1243dSDimitry Andric Cost += TTI.getInstructionCost(&I, CostKind); 33450b57cec5SDimitry Andric } 33460b57cec5SDimitry Andric assert(Cost >= 0 && "Must not have negative costs!"); 33470b57cec5SDimitry Andric LoopCost += Cost; 33480b57cec5SDimitry Andric assert(LoopCost >= 0 && "Must not have negative loop costs!"); 33490b57cec5SDimitry Andric BBCostMap[BB] = Cost; 33500b57cec5SDimitry Andric } 33510b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n"); 33520b57cec5SDimitry Andric 33530b57cec5SDimitry Andric // Now we find the best candidate by searching for the one with the following 33540b57cec5SDimitry Andric // properties in order: 33550b57cec5SDimitry Andric // 33560b57cec5SDimitry Andric // 1) An unswitching cost below the threshold 33570b57cec5SDimitry Andric // 2) The smallest number of duplicated unswitch candidates (to avoid 33580b57cec5SDimitry Andric // creating redundant subsequent unswitching) 33590b57cec5SDimitry Andric // 3) The smallest cost after unswitching. 33600b57cec5SDimitry Andric // 33610b57cec5SDimitry Andric // We prioritize reducing fanout of unswitch candidates provided the cost 33620b57cec5SDimitry Andric // remains below the threshold because this has a multiplicative effect. 33630b57cec5SDimitry Andric // 33640b57cec5SDimitry Andric // This requires memoizing each dominator subtree to avoid redundant work. 33650b57cec5SDimitry Andric // 33660b57cec5SDimitry Andric // FIXME: Need to actually do the number of candidates part above. 3367fe6060f1SDimitry Andric SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap; 33680b57cec5SDimitry Andric // Given a terminator which might be unswitched, computes the non-duplicated 33690b57cec5SDimitry Andric // cost for that terminator. 3370fe6060f1SDimitry Andric auto ComputeUnswitchedCost = [&](Instruction &TI, 3371fe6060f1SDimitry Andric bool FullUnswitch) -> InstructionCost { 337206c3fb27SDimitry Andric // Unswitching selects unswitches the entire loop. 337306c3fb27SDimitry Andric if (isa<SelectInst>(TI)) 337406c3fb27SDimitry Andric return LoopCost; 337506c3fb27SDimitry Andric 33760b57cec5SDimitry Andric BasicBlock &BB = *TI.getParent(); 33770b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> Visited; 33780b57cec5SDimitry Andric 3379fe6060f1SDimitry Andric InstructionCost Cost = 0; 33800b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(&BB)) { 33810b57cec5SDimitry Andric // Don't count successors more than once. 33820b57cec5SDimitry Andric if (!Visited.insert(SuccBB).second) 33830b57cec5SDimitry Andric continue; 33840b57cec5SDimitry Andric 33850b57cec5SDimitry Andric // If this is a partial unswitch candidate, then it must be a conditional 3386fe6060f1SDimitry Andric // branch with a condition of either `or`, `and`, their corresponding 3387fe6060f1SDimitry Andric // select forms or partially invariant instructions. In that case, one of 33880b57cec5SDimitry Andric // the successors is necessarily duplicated, so don't even try to remove 33890b57cec5SDimitry Andric // its cost. 33900b57cec5SDimitry Andric if (!FullUnswitch) { 33910b57cec5SDimitry Andric auto &BI = cast<BranchInst>(TI); 339281ad6265SDimitry Andric Value *Cond = skipTrivialSelect(BI.getCondition()); 339381ad6265SDimitry Andric if (match(Cond, m_LogicalAnd())) { 33940b57cec5SDimitry Andric if (SuccBB == BI.getSuccessor(1)) 33950b57cec5SDimitry Andric continue; 339681ad6265SDimitry Andric } else if (match(Cond, m_LogicalOr())) { 33970b57cec5SDimitry Andric if (SuccBB == BI.getSuccessor(0)) 33980b57cec5SDimitry Andric continue; 3399fe6060f1SDimitry Andric } else if ((PartialIVInfo.KnownValue->isOneValue() && 3400fe6060f1SDimitry Andric SuccBB == BI.getSuccessor(0)) || 3401fe6060f1SDimitry Andric (!PartialIVInfo.KnownValue->isOneValue() && 3402fe6060f1SDimitry Andric SuccBB == BI.getSuccessor(1))) 3403fe6060f1SDimitry Andric continue; 34040b57cec5SDimitry Andric } 34050b57cec5SDimitry Andric 34060b57cec5SDimitry Andric // This successor's domtree will not need to be duplicated after 34070b57cec5SDimitry Andric // unswitching if the edge to the successor dominates it (and thus the 34080b57cec5SDimitry Andric // entire tree). This essentially means there is no other path into this 34090b57cec5SDimitry Andric // subtree and so it will end up live in only one clone of the loop. 34100b57cec5SDimitry Andric if (SuccBB->getUniquePredecessor() || 34110b57cec5SDimitry Andric llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) { 34120b57cec5SDimitry Andric return PredBB == &BB || DT.dominates(SuccBB, PredBB); 34130b57cec5SDimitry Andric })) { 3414fe6060f1SDimitry Andric Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap); 3415fe6060f1SDimitry Andric assert(Cost <= LoopCost && 34160b57cec5SDimitry Andric "Non-duplicated cost should never exceed total loop cost!"); 34170b57cec5SDimitry Andric } 34180b57cec5SDimitry Andric } 34190b57cec5SDimitry Andric 34200b57cec5SDimitry Andric // Now scale the cost by the number of unique successors minus one. We 34210b57cec5SDimitry Andric // subtract one because there is already at least one copy of the entire 34220b57cec5SDimitry Andric // loop. This is computing the new cost of unswitching a condition. 34230b57cec5SDimitry Andric // Note that guards always have 2 unique successors that are implicit and 34240b57cec5SDimitry Andric // will be materialized if we decide to unswitch it. 34250b57cec5SDimitry Andric int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size(); 34260b57cec5SDimitry Andric assert(SuccessorsCount > 1 && 34270b57cec5SDimitry Andric "Cannot unswitch a condition without multiple distinct successors!"); 3428fe6060f1SDimitry Andric return (LoopCost - Cost) * (SuccessorsCount - 1); 34290b57cec5SDimitry Andric }; 3430bdd1243dSDimitry Andric 3431bdd1243dSDimitry Andric std::optional<NonTrivialUnswitchCandidate> Best; 3432bdd1243dSDimitry Andric for (auto &Candidate : UnswitchCandidates) { 3433bdd1243dSDimitry Andric Instruction &TI = *Candidate.TI; 3434bdd1243dSDimitry Andric ArrayRef<Value *> Invariants = Candidate.Invariants; 34350b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(&TI); 343606c3fb27SDimitry Andric bool FullUnswitch = 343706c3fb27SDimitry Andric !BI || Candidate.hasPendingInjection() || 343881ad6265SDimitry Andric (Invariants.size() == 1 && 343906c3fb27SDimitry Andric Invariants[0] == skipTrivialSelect(BI->getCondition())); 344006c3fb27SDimitry Andric InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch); 34410b57cec5SDimitry Andric // Calculate cost multiplier which is a tool to limit potentially 34420b57cec5SDimitry Andric // exponential behavior of loop-unswitch. 34430b57cec5SDimitry Andric if (EnableUnswitchCostMultiplier) { 34440b57cec5SDimitry Andric int CostMultiplier = 34455ffd83dbSDimitry Andric CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates); 34460b57cec5SDimitry Andric assert( 34470b57cec5SDimitry Andric (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) && 34480b57cec5SDimitry Andric "cost multiplier needs to be in the range of 1..UnswitchThreshold"); 34490b57cec5SDimitry Andric CandidateCost *= CostMultiplier; 34500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost 34510b57cec5SDimitry Andric << " (multiplier: " << CostMultiplier << ")" 34520b57cec5SDimitry Andric << " for unswitch candidate: " << TI << "\n"); 34530b57cec5SDimitry Andric } else { 34540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost 34550b57cec5SDimitry Andric << " for unswitch candidate: " << TI << "\n"); 34560b57cec5SDimitry Andric } 34570b57cec5SDimitry Andric 3458bdd1243dSDimitry Andric if (!Best || CandidateCost < Best->Cost) { 3459bdd1243dSDimitry Andric Best = Candidate; 3460bdd1243dSDimitry Andric Best->Cost = CandidateCost; 34610b57cec5SDimitry Andric } 34620b57cec5SDimitry Andric } 3463bdd1243dSDimitry Andric assert(Best && "Must be!"); 3464bdd1243dSDimitry Andric return *Best; 3465bdd1243dSDimitry Andric } 34660b57cec5SDimitry Andric 346706c3fb27SDimitry Andric // Insert a freeze on an unswitched branch if all is true: 346806c3fb27SDimitry Andric // 1. freeze-loop-unswitch-cond option is true 346906c3fb27SDimitry Andric // 2. The branch may not execute in the loop pre-transformation. If a branch may 347006c3fb27SDimitry Andric // not execute and could cause UB, it would always cause UB if it is hoisted outside 347106c3fb27SDimitry Andric // of the loop. Insert a freeze to prevent this case. 347206c3fb27SDimitry Andric // 3. The branch condition may be poison or undef 347306c3fb27SDimitry Andric static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, 347406c3fb27SDimitry Andric AssumptionCache &AC) { 347506c3fb27SDimitry Andric assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI)); 347606c3fb27SDimitry Andric if (!FreezeLoopUnswitchCond) 347706c3fb27SDimitry Andric return false; 347806c3fb27SDimitry Andric 347906c3fb27SDimitry Andric ICFLoopSafetyInfo SafetyInfo; 348006c3fb27SDimitry Andric SafetyInfo.computeLoopSafetyInfo(&L); 348106c3fb27SDimitry Andric if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) 348206c3fb27SDimitry Andric return false; 348306c3fb27SDimitry Andric 348406c3fb27SDimitry Andric Value *Cond; 348506c3fb27SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) 348606c3fb27SDimitry Andric Cond = skipTrivialSelect(BI->getCondition()); 348706c3fb27SDimitry Andric else 348806c3fb27SDimitry Andric Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition()); 348906c3fb27SDimitry Andric return !isGuaranteedNotToBeUndefOrPoison( 349006c3fb27SDimitry Andric Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT); 349106c3fb27SDimitry Andric } 349206c3fb27SDimitry Andric 34935f757f3fSDimitry Andric static bool unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI, 34945f757f3fSDimitry Andric AssumptionCache &AC, AAResults &AA, 34955f757f3fSDimitry Andric TargetTransformInfo &TTI, ScalarEvolution *SE, 34965f757f3fSDimitry Andric MemorySSAUpdater *MSSAU, 34975f757f3fSDimitry Andric LPMUpdater &LoopUpdater) { 3498bdd1243dSDimitry Andric // Collect all invariant conditions within this loop (as opposed to an inner 3499bdd1243dSDimitry Andric // loop which would be handled when visiting that inner loop). 3500bdd1243dSDimitry Andric SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates; 3501bdd1243dSDimitry Andric IVConditionInfo PartialIVInfo; 3502bdd1243dSDimitry Andric Instruction *PartialIVCondBranch = nullptr; 350306c3fb27SDimitry Andric collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, 350406c3fb27SDimitry Andric PartialIVCondBranch, L, LI, AA, MSSAU); 35054542f901SDimitry Andric if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable")) 350606c3fb27SDimitry Andric collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, 350706c3fb27SDimitry Andric PartialIVCondBranch, L, DT, LI, AA, 350806c3fb27SDimitry Andric MSSAU); 3509bdd1243dSDimitry Andric // If we didn't find any candidates, we're done. 351006c3fb27SDimitry Andric if (UnswitchCandidates.empty()) 3511bdd1243dSDimitry Andric return false; 3512bdd1243dSDimitry Andric 3513bdd1243dSDimitry Andric LLVM_DEBUG( 3514bdd1243dSDimitry Andric dbgs() << "Considering " << UnswitchCandidates.size() 3515bdd1243dSDimitry Andric << " non-trivial loop invariant conditions for unswitching.\n"); 3516bdd1243dSDimitry Andric 3517bdd1243dSDimitry Andric NonTrivialUnswitchCandidate Best = findBestNonTrivialUnswitchCandidate( 3518bdd1243dSDimitry Andric UnswitchCandidates, L, DT, LI, AC, TTI, PartialIVInfo); 3519bdd1243dSDimitry Andric 3520bdd1243dSDimitry Andric assert(Best.TI && "Failed to find loop unswitch candidate"); 3521bdd1243dSDimitry Andric assert(Best.Cost && "Failed to compute cost"); 3522bdd1243dSDimitry Andric 3523bdd1243dSDimitry Andric if (*Best.Cost >= UnswitchThreshold) { 3524bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: " << *Best.Cost 3525bdd1243dSDimitry Andric << "\n"); 35260b57cec5SDimitry Andric return false; 35270b57cec5SDimitry Andric } 35280b57cec5SDimitry Andric 35294542f901SDimitry Andric bool InjectedCondition = false; 35304542f901SDimitry Andric if (Best.hasPendingInjection()) { 353106c3fb27SDimitry Andric Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU); 35324542f901SDimitry Andric InjectedCondition = true; 35334542f901SDimitry Andric } 353406c3fb27SDimitry Andric assert(!Best.hasPendingInjection() && 353506c3fb27SDimitry Andric "All injections should have been done by now!"); 353606c3fb27SDimitry Andric 3537bdd1243dSDimitry Andric if (Best.TI != PartialIVCondBranch) 3538fe6060f1SDimitry Andric PartialIVInfo.InstToDuplicate.clear(); 3539fe6060f1SDimitry Andric 354006c3fb27SDimitry Andric bool InsertFreeze; 354106c3fb27SDimitry Andric if (auto *SI = dyn_cast<SelectInst>(Best.TI)) { 354206c3fb27SDimitry Andric // If the best candidate is a select, turn it into a branch. Select 354306c3fb27SDimitry Andric // instructions with a poison conditional do not propagate poison, but 354406c3fb27SDimitry Andric // branching on poison causes UB. Insert a freeze on the select 354506c3fb27SDimitry Andric // conditional to prevent UB after turning the select into a branch. 354606c3fb27SDimitry Andric InsertFreeze = !isGuaranteedNotToBeUndefOrPoison( 354706c3fb27SDimitry Andric SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT); 354806c3fb27SDimitry Andric Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC); 354906c3fb27SDimitry Andric } else { 35500b57cec5SDimitry Andric // If the best candidate is a guard, turn it into a branch. 3551bdd1243dSDimitry Andric if (isGuard(Best.TI)) 3552bdd1243dSDimitry Andric Best.TI = 3553bdd1243dSDimitry Andric turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU); 355406c3fb27SDimitry Andric InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC); 355506c3fb27SDimitry Andric } 35560b57cec5SDimitry Andric 3557bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost 3558bdd1243dSDimitry Andric << ") terminator: " << *Best.TI << "\n"); 3559bdd1243dSDimitry Andric unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, 35605f757f3fSDimitry Andric LI, AC, SE, MSSAU, LoopUpdater, InsertFreeze, 35615f757f3fSDimitry Andric InjectedCondition); 35620b57cec5SDimitry Andric return true; 35630b57cec5SDimitry Andric } 35640b57cec5SDimitry Andric 35650b57cec5SDimitry Andric /// Unswitch control flow predicated on loop invariant conditions. 35660b57cec5SDimitry Andric /// 35670b57cec5SDimitry Andric /// This first hoists all branches or switches which are trivial (IE, do not 35680b57cec5SDimitry Andric /// require duplicating any part of the loop) out of the loop body. It then 35690b57cec5SDimitry Andric /// looks at other loop invariant control flows and tries to unswitch those as 35700b57cec5SDimitry Andric /// well by cloning the loop if the result is small enough. 35710b57cec5SDimitry Andric /// 3572fe6060f1SDimitry Andric /// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are 3573fe6060f1SDimitry Andric /// also updated based on the unswitch. The `MSSA` analysis is also updated if 3574fe6060f1SDimitry Andric /// valid (i.e. its use is enabled). 35750b57cec5SDimitry Andric /// 35760b57cec5SDimitry Andric /// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is 35770b57cec5SDimitry Andric /// true, we will attempt to do non-trivial unswitching as well as trivial 35780b57cec5SDimitry Andric /// unswitching. 35790b57cec5SDimitry Andric /// 35805f757f3fSDimitry Andric /// The `postUnswitch` function will be run after unswitching is complete 35815f757f3fSDimitry Andric /// with information on whether or not the provided loop remains a loop and 35825f757f3fSDimitry Andric /// a list of new sibling loops created. 35830b57cec5SDimitry Andric /// 35840b57cec5SDimitry Andric /// If `SE` is non-null, we will update that analysis based on the unswitching 35850b57cec5SDimitry Andric /// done. 35865f757f3fSDimitry Andric static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, 35875f757f3fSDimitry Andric AssumptionCache &AC, AAResults &AA, 35885f757f3fSDimitry Andric TargetTransformInfo &TTI, bool Trivial, 35895f757f3fSDimitry Andric bool NonTrivial, ScalarEvolution *SE, 35905f757f3fSDimitry Andric MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, 35915f757f3fSDimitry Andric BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) { 35920b57cec5SDimitry Andric assert(L.isRecursivelyLCSSAForm(DT, LI) && 35930b57cec5SDimitry Andric "Loops must be in LCSSA form before unswitching."); 35940b57cec5SDimitry Andric 35950b57cec5SDimitry Andric // Must be in loop simplified form: we need a preheader and dedicated exits. 35960b57cec5SDimitry Andric if (!L.isLoopSimplifyForm()) 35970b57cec5SDimitry Andric return false; 35980b57cec5SDimitry Andric 35990b57cec5SDimitry Andric // Try trivial unswitch first before loop over other basic blocks in the loop. 3600fe6060f1SDimitry Andric if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { 36010b57cec5SDimitry Andric // If we unswitched successfully we will want to clean up the loop before 36020b57cec5SDimitry Andric // processing it further so just mark it as unswitched and return. 36035f757f3fSDimitry Andric postUnswitch(L, LoopUpdater, L.getName(), 36045f757f3fSDimitry Andric /*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false, 36054542f901SDimitry Andric /*InjectedCondition*/ false, {}); 36060b57cec5SDimitry Andric return true; 36070b57cec5SDimitry Andric } 36080b57cec5SDimitry Andric 360906c3fb27SDimitry Andric const Function *F = L.getHeader()->getParent(); 361006c3fb27SDimitry Andric 3611fe6060f1SDimitry Andric // Check whether we should continue with non-trivial conditions. 3612fe6060f1SDimitry Andric // EnableNonTrivialUnswitch: Global variable that forces non-trivial 3613fe6060f1SDimitry Andric // unswitching for testing and debugging. 3614fe6060f1SDimitry Andric // NonTrivial: Parameter that enables non-trivial unswitching for this 3615fe6060f1SDimitry Andric // invocation of the transform. But this should be allowed only 3616fe6060f1SDimitry Andric // for targets without branch divergence. 3617fe6060f1SDimitry Andric // 3618fe6060f1SDimitry Andric // FIXME: If divergence analysis becomes available to a loop 3619fe6060f1SDimitry Andric // transform, we should allow unswitching for non-trivial uniform 3620fe6060f1SDimitry Andric // branches even on targets that have divergence. 3621fe6060f1SDimitry Andric // https://bugs.llvm.org/show_bug.cgi?id=48819 3622fe6060f1SDimitry Andric bool ContinueWithNonTrivial = 362306c3fb27SDimitry Andric EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F)); 3624fe6060f1SDimitry Andric if (!ContinueWithNonTrivial) 36250b57cec5SDimitry Andric return false; 36260b57cec5SDimitry Andric 3627e8d8bef9SDimitry Andric // Skip non-trivial unswitching for optsize functions. 362806c3fb27SDimitry Andric if (F->hasOptSize()) 3629e8d8bef9SDimitry Andric return false; 3630e8d8bef9SDimitry Andric 363106c3fb27SDimitry Andric // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, 363206c3fb27SDimitry Andric // of the loops L is nested in, and of the loops nested in L are all cold. 363306c3fb27SDimitry Andric auto IsLoopNestCold = [&](const Loop *L) { 363406c3fb27SDimitry Andric // Check L and all of its parent loops. 363506c3fb27SDimitry Andric auto *Parent = L; 363606c3fb27SDimitry Andric while (Parent) { 363706c3fb27SDimitry Andric if (!PSI->isColdBlock(Parent->getHeader(), BFI)) 363806c3fb27SDimitry Andric return false; 363906c3fb27SDimitry Andric Parent = Parent->getParentLoop(); 364006c3fb27SDimitry Andric } 364106c3fb27SDimitry Andric // Next check all loops nested within L. 364206c3fb27SDimitry Andric SmallVector<const Loop *, 4> Worklist; 364306c3fb27SDimitry Andric Worklist.insert(Worklist.end(), L->getSubLoops().begin(), 364406c3fb27SDimitry Andric L->getSubLoops().end()); 364506c3fb27SDimitry Andric while (!Worklist.empty()) { 364606c3fb27SDimitry Andric auto *CurLoop = Worklist.pop_back_val(); 364706c3fb27SDimitry Andric if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) 364806c3fb27SDimitry Andric return false; 364906c3fb27SDimitry Andric Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(), 365006c3fb27SDimitry Andric CurLoop->getSubLoops().end()); 365106c3fb27SDimitry Andric } 365206c3fb27SDimitry Andric return true; 365306c3fb27SDimitry Andric }; 365406c3fb27SDimitry Andric 365506c3fb27SDimitry Andric // Skip cold loops in cold loop nests, as unswitching them brings little 365606c3fb27SDimitry Andric // benefit but increases the code size 365706c3fb27SDimitry Andric if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) { 3658bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); 3659bdd1243dSDimitry Andric return false; 3660bdd1243dSDimitry Andric } 3661bdd1243dSDimitry Andric 3662bdd1243dSDimitry Andric // Perform legality checks. 3663bdd1243dSDimitry Andric if (!isSafeForNoNTrivialUnswitching(L, LI)) 3664fe6060f1SDimitry Andric return false; 3665fe6060f1SDimitry Andric 36660b57cec5SDimitry Andric // For non-trivial unswitching, because it often creates new loops, we rely on 36670b57cec5SDimitry Andric // the pass manager to iterate on the loops rather than trying to immediately 36680b57cec5SDimitry Andric // reach a fixed point. There is no substantial advantage to iterating 36690b57cec5SDimitry Andric // internally, and if any of the new loops are simplified enough to contain 36700b57cec5SDimitry Andric // trivial unswitching we want to prefer those. 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andric // Try to unswitch the best invariant condition. We prefer this full unswitch to 36730b57cec5SDimitry Andric // a partial unswitch when possible below the threshold. 36745f757f3fSDimitry Andric if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, SE, MSSAU, LoopUpdater)) 36750b57cec5SDimitry Andric return true; 36760b57cec5SDimitry Andric 36770b57cec5SDimitry Andric // No other opportunities to unswitch. 3678e8d8bef9SDimitry Andric return false; 36790b57cec5SDimitry Andric } 36800b57cec5SDimitry Andric 36810b57cec5SDimitry Andric PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, 36820b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, 36830b57cec5SDimitry Andric LPMUpdater &U) { 36840b57cec5SDimitry Andric Function &F = *L.getHeader()->getParent(); 36850b57cec5SDimitry Andric (void)F; 3686bdd1243dSDimitry Andric ProfileSummaryInfo *PSI = nullptr; 3687bdd1243dSDimitry Andric if (auto OuterProxy = 3688bdd1243dSDimitry Andric AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR) 3689bdd1243dSDimitry Andric .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F)) 3690bdd1243dSDimitry Andric PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 36910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L 36920b57cec5SDimitry Andric << "\n"); 36930b57cec5SDimitry Andric 3694bdd1243dSDimitry Andric std::optional<MemorySSAUpdater> MSSAU; 36950b57cec5SDimitry Andric if (AR.MSSA) { 36960b57cec5SDimitry Andric MSSAU = MemorySSAUpdater(AR.MSSA); 36970b57cec5SDimitry Andric if (VerifyMemorySSA) 36980b57cec5SDimitry Andric AR.MSSA->verifyMemorySSA(); 36990b57cec5SDimitry Andric } 3700fe6060f1SDimitry Andric if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, 37015f757f3fSDimitry Andric &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U)) 37020b57cec5SDimitry Andric return PreservedAnalyses::all(); 37030b57cec5SDimitry Andric 37040b57cec5SDimitry Andric if (AR.MSSA && VerifyMemorySSA) 37050b57cec5SDimitry Andric AR.MSSA->verifyMemorySSA(); 37060b57cec5SDimitry Andric 37070b57cec5SDimitry Andric // Historically this pass has had issues with the dominator tree so verify it 37080b57cec5SDimitry Andric // in asserts builds. 37090b57cec5SDimitry Andric assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast)); 37100b57cec5SDimitry Andric 37110b57cec5SDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 37128bcb0991SDimitry Andric if (AR.MSSA) 37130b57cec5SDimitry Andric PA.preserve<MemorySSAAnalysis>(); 37140b57cec5SDimitry Andric return PA; 37150b57cec5SDimitry Andric } 37160b57cec5SDimitry Andric 3717349cc55cSDimitry Andric void SimpleLoopUnswitchPass::printPipeline( 3718349cc55cSDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 3719349cc55cSDimitry Andric static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline( 3720349cc55cSDimitry Andric OS, MapClassName2PassName); 3721349cc55cSDimitry Andric 372206c3fb27SDimitry Andric OS << '<'; 3723349cc55cSDimitry Andric OS << (NonTrivial ? "" : "no-") << "nontrivial;"; 3724349cc55cSDimitry Andric OS << (Trivial ? "" : "no-") << "trivial"; 372506c3fb27SDimitry Andric OS << '>'; 3726349cc55cSDimitry Andric } 3727