10b57cec5SDimitry Andric //===----------------- LoopRotationUtils.cpp -----------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file provides utilities to convert a loop into a loop with bottom test. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopRotationUtils.h" 140b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 150b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 160b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h" 170b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 1981ad6265SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 240b57cec5SDimitry Andric #include "llvm/IR/CFG.h" 25fe6060f1SDimitry Andric #include "llvm/IR/DebugInfo.h" 260b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 270b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 285f757f3fSDimitry Andric #include "llvm/IR/MDBuilder.h" 295f757f3fSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 300b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 310b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 320b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 330b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 350b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 360b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdater.h" 370b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h" 380b57cec5SDimitry Andric using namespace llvm; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric #define DEBUG_TYPE "loop-rotate" 410b57cec5SDimitry Andric 42e8d8bef9SDimitry Andric STATISTIC(NumNotRotatedDueToHeaderSize, 43e8d8bef9SDimitry Andric "Number of loops not rotated due to the header size"); 44fe6060f1SDimitry Andric STATISTIC(NumInstrsHoisted, 45fe6060f1SDimitry Andric "Number of instructions hoisted into loop preheader"); 46fe6060f1SDimitry Andric STATISTIC(NumInstrsDuplicated, 47fe6060f1SDimitry Andric "Number of instructions cloned into loop preheader"); 480b57cec5SDimitry Andric STATISTIC(NumRotated, "Number of loops rotated"); 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric static cl::opt<bool> 515ffd83dbSDimitry Andric MultiRotate("loop-rotate-multi", cl::init(false), cl::Hidden, 525ffd83dbSDimitry Andric cl::desc("Allow loop rotation multiple times in order to reach " 535ffd83dbSDimitry Andric "a better latch exit")); 545ffd83dbSDimitry Andric 555f757f3fSDimitry Andric // Probability that a rotated loop has zero trip count / is never entered. 565f757f3fSDimitry Andric static constexpr uint32_t ZeroTripCountWeights[] = {1, 127}; 575f757f3fSDimitry Andric 580b57cec5SDimitry Andric namespace { 590b57cec5SDimitry Andric /// A simple loop rotation transformation. 600b57cec5SDimitry Andric class LoopRotate { 610b57cec5SDimitry Andric const unsigned MaxHeaderSize; 620b57cec5SDimitry Andric LoopInfo *LI; 630b57cec5SDimitry Andric const TargetTransformInfo *TTI; 640b57cec5SDimitry Andric AssumptionCache *AC; 650b57cec5SDimitry Andric DominatorTree *DT; 660b57cec5SDimitry Andric ScalarEvolution *SE; 670b57cec5SDimitry Andric MemorySSAUpdater *MSSAU; 680b57cec5SDimitry Andric const SimplifyQuery &SQ; 690b57cec5SDimitry Andric bool RotationOnly; 700b57cec5SDimitry Andric bool IsUtilMode; 71e8d8bef9SDimitry Andric bool PrepareForLTO; 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric public: 740b57cec5SDimitry Andric LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, 750b57cec5SDimitry Andric const TargetTransformInfo *TTI, AssumptionCache *AC, 760b57cec5SDimitry Andric DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, 77e8d8bef9SDimitry Andric const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, 78e8d8bef9SDimitry Andric bool PrepareForLTO) 790b57cec5SDimitry Andric : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), 800b57cec5SDimitry Andric MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), 81e8d8bef9SDimitry Andric IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} 820b57cec5SDimitry Andric bool processLoop(Loop *L); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric private: 850b57cec5SDimitry Andric bool rotateLoop(Loop *L, bool SimplifiedLatch); 860b57cec5SDimitry Andric bool simplifyLoopLatch(Loop *L); 870b57cec5SDimitry Andric }; 880b57cec5SDimitry Andric } // end anonymous namespace 890b57cec5SDimitry Andric 90480093f4SDimitry Andric /// Insert (K, V) pair into the ValueToValueMap, and verify the key did not 91480093f4SDimitry Andric /// previously exist in the map, and the value was inserted. 92480093f4SDimitry Andric static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) { 93480093f4SDimitry Andric bool Inserted = VM.insert({K, V}).second; 94480093f4SDimitry Andric assert(Inserted); 95480093f4SDimitry Andric (void)Inserted; 96480093f4SDimitry Andric } 970b57cec5SDimitry Andric /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the 980b57cec5SDimitry Andric /// old header into the preheader. If there were uses of the values produced by 990b57cec5SDimitry Andric /// these instruction that were outside of the loop, we have to insert PHI nodes 1000b57cec5SDimitry Andric /// to merge the two values. Do this now. 1010b57cec5SDimitry Andric static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, 1020b57cec5SDimitry Andric BasicBlock *OrigPreheader, 1030b57cec5SDimitry Andric ValueToValueMapTy &ValueMap, 104349cc55cSDimitry Andric ScalarEvolution *SE, 1050b57cec5SDimitry Andric SmallVectorImpl<PHINode*> *InsertedPHIs) { 1060b57cec5SDimitry Andric // Remove PHI node entries that are no longer live. 1070b57cec5SDimitry Andric BasicBlock::iterator I, E = OrigHeader->end(); 1080b57cec5SDimitry Andric for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) 1090b57cec5SDimitry Andric PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric // Now fix up users of the instructions in OrigHeader, inserting PHI nodes 1120b57cec5SDimitry Andric // as necessary. 1130b57cec5SDimitry Andric SSAUpdater SSA(InsertedPHIs); 1140b57cec5SDimitry Andric for (I = OrigHeader->begin(); I != E; ++I) { 1150b57cec5SDimitry Andric Value *OrigHeaderVal = &*I; 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric // If there are no uses of the value (e.g. because it returns void), there 1180b57cec5SDimitry Andric // is nothing to rewrite. 1190b57cec5SDimitry Andric if (OrigHeaderVal->use_empty()) 1200b57cec5SDimitry Andric continue; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric // The value now exits in two versions: the initial value in the preheader 1250b57cec5SDimitry Andric // and the loop "next" value in the original header. 1260b57cec5SDimitry Andric SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); 127349cc55cSDimitry Andric // Force re-computation of OrigHeaderVal, as some users now need to use the 128349cc55cSDimitry Andric // new PHI node. 129349cc55cSDimitry Andric if (SE) 130349cc55cSDimitry Andric SE->forgetValue(OrigHeaderVal); 1310b57cec5SDimitry Andric SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); 1320b57cec5SDimitry Andric SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric // Visit each use of the OrigHeader instruction. 135349cc55cSDimitry Andric for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) { 1360b57cec5SDimitry Andric // SSAUpdater can't handle a non-PHI use in the same block as an 1370b57cec5SDimitry Andric // earlier def. We can easily handle those cases manually. 1380b57cec5SDimitry Andric Instruction *UserInst = cast<Instruction>(U.getUser()); 1390b57cec5SDimitry Andric if (!isa<PHINode>(UserInst)) { 1400b57cec5SDimitry Andric BasicBlock *UserBB = UserInst->getParent(); 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // The original users in the OrigHeader are already using the 1430b57cec5SDimitry Andric // original definitions. 1440b57cec5SDimitry Andric if (UserBB == OrigHeader) 1450b57cec5SDimitry Andric continue; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric // Users in the OrigPreHeader need to use the value to which the 1480b57cec5SDimitry Andric // original definitions are mapped. 1490b57cec5SDimitry Andric if (UserBB == OrigPreheader) { 1500b57cec5SDimitry Andric U = OrigPreHeaderVal; 1510b57cec5SDimitry Andric continue; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric // Anything else can be handled by SSAUpdater. 1560b57cec5SDimitry Andric SSA.RewriteUse(U); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug 1600b57cec5SDimitry Andric // intrinsics. 1610b57cec5SDimitry Andric SmallVector<DbgValueInst *, 1> DbgValues; 162*0fca6ea1SDimitry Andric SmallVector<DbgVariableRecord *, 1> DbgVariableRecords; 163*0fca6ea1SDimitry Andric llvm::findDbgValues(DbgValues, OrigHeaderVal, &DbgVariableRecords); 1640b57cec5SDimitry Andric for (auto &DbgValue : DbgValues) { 1650b57cec5SDimitry Andric // The original users in the OrigHeader are already using the original 1660b57cec5SDimitry Andric // definitions. 1670b57cec5SDimitry Andric BasicBlock *UserBB = DbgValue->getParent(); 1680b57cec5SDimitry Andric if (UserBB == OrigHeader) 1690b57cec5SDimitry Andric continue; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric // Users in the OrigPreHeader need to use the value to which the 1720b57cec5SDimitry Andric // original definitions are mapped and anything else can be handled by 1730b57cec5SDimitry Andric // the SSAUpdater. To avoid adding PHINodes, check if the value is 1740b57cec5SDimitry Andric // available in UserBB, if not substitute undef. 1750b57cec5SDimitry Andric Value *NewVal; 1760b57cec5SDimitry Andric if (UserBB == OrigPreheader) 1770b57cec5SDimitry Andric NewVal = OrigPreHeaderVal; 1780b57cec5SDimitry Andric else if (SSA.HasValueForBlock(UserBB)) 1790b57cec5SDimitry Andric NewVal = SSA.GetValueInMiddleOfBlock(UserBB); 1800b57cec5SDimitry Andric else 1810b57cec5SDimitry Andric NewVal = UndefValue::get(OrigHeaderVal->getType()); 182fe6060f1SDimitry Andric DbgValue->replaceVariableLocationOp(OrigHeaderVal, NewVal); 1830b57cec5SDimitry Andric } 1845f757f3fSDimitry Andric 1855f757f3fSDimitry Andric // RemoveDIs: duplicate implementation for non-instruction debug-info 186*0fca6ea1SDimitry Andric // storage in DbgVariableRecords. 187*0fca6ea1SDimitry Andric for (DbgVariableRecord *DVR : DbgVariableRecords) { 1885f757f3fSDimitry Andric // The original users in the OrigHeader are already using the original 1895f757f3fSDimitry Andric // definitions. 190*0fca6ea1SDimitry Andric BasicBlock *UserBB = DVR->getMarker()->getParent(); 1915f757f3fSDimitry Andric if (UserBB == OrigHeader) 1925f757f3fSDimitry Andric continue; 1935f757f3fSDimitry Andric 1945f757f3fSDimitry Andric // Users in the OrigPreHeader need to use the value to which the 1955f757f3fSDimitry Andric // original definitions are mapped and anything else can be handled by 1965f757f3fSDimitry Andric // the SSAUpdater. To avoid adding PHINodes, check if the value is 1975f757f3fSDimitry Andric // available in UserBB, if not substitute undef. 1985f757f3fSDimitry Andric Value *NewVal; 1995f757f3fSDimitry Andric if (UserBB == OrigPreheader) 2005f757f3fSDimitry Andric NewVal = OrigPreHeaderVal; 2015f757f3fSDimitry Andric else if (SSA.HasValueForBlock(UserBB)) 2025f757f3fSDimitry Andric NewVal = SSA.GetValueInMiddleOfBlock(UserBB); 2035f757f3fSDimitry Andric else 2045f757f3fSDimitry Andric NewVal = UndefValue::get(OrigHeaderVal->getType()); 205*0fca6ea1SDimitry Andric DVR->replaceVariableLocationOp(OrigHeaderVal, NewVal); 2065f757f3fSDimitry Andric } 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2105ffd83dbSDimitry Andric // Assuming both header and latch are exiting, look for a phi which is only 2115ffd83dbSDimitry Andric // used outside the loop (via a LCSSA phi) in the exit from the header. 2125ffd83dbSDimitry Andric // This means that rotating the loop can remove the phi. 2135ffd83dbSDimitry Andric static bool profitableToRotateLoopExitingLatch(Loop *L) { 2140b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 2155ffd83dbSDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator()); 2165ffd83dbSDimitry Andric assert(BI && BI->isConditional() && "need header with conditional exit"); 2175ffd83dbSDimitry Andric BasicBlock *HeaderExit = BI->getSuccessor(0); 2180b57cec5SDimitry Andric if (L->contains(HeaderExit)) 2195ffd83dbSDimitry Andric HeaderExit = BI->getSuccessor(1); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric for (auto &Phi : Header->phis()) { 2220b57cec5SDimitry Andric // Look for uses of this phi in the loop/via exits other than the header. 2230b57cec5SDimitry Andric if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { 2240b57cec5SDimitry Andric return cast<Instruction>(U)->getParent() != HeaderExit; 2250b57cec5SDimitry Andric })) 2260b57cec5SDimitry Andric continue; 2270b57cec5SDimitry Andric return true; 2280b57cec5SDimitry Andric } 2295ffd83dbSDimitry Andric return false; 2305ffd83dbSDimitry Andric } 2310b57cec5SDimitry Andric 2325ffd83dbSDimitry Andric // Check that latch exit is deoptimizing (which means - very unlikely to happen) 2335ffd83dbSDimitry Andric // and there is another exit from the loop which is non-deoptimizing. 2345ffd83dbSDimitry Andric // If we rotate latch to that exit our loop has a better chance of being fully 2355ffd83dbSDimitry Andric // canonical. 2365ffd83dbSDimitry Andric // 2375ffd83dbSDimitry Andric // It can give false positives in some rare cases. 2385ffd83dbSDimitry Andric static bool canRotateDeoptimizingLatchExit(Loop *L) { 2395ffd83dbSDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 2405ffd83dbSDimitry Andric assert(Latch && "need latch"); 2415ffd83dbSDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); 2425ffd83dbSDimitry Andric // Need normal exiting latch. 2435ffd83dbSDimitry Andric if (!BI || !BI->isConditional()) 2445ffd83dbSDimitry Andric return false; 2455ffd83dbSDimitry Andric 2465ffd83dbSDimitry Andric BasicBlock *Exit = BI->getSuccessor(1); 2475ffd83dbSDimitry Andric if (L->contains(Exit)) 2485ffd83dbSDimitry Andric Exit = BI->getSuccessor(0); 2495ffd83dbSDimitry Andric 2505ffd83dbSDimitry Andric // Latch exit is non-deoptimizing, no need to rotate. 2515ffd83dbSDimitry Andric if (!Exit->getPostdominatingDeoptimizeCall()) 2525ffd83dbSDimitry Andric return false; 2535ffd83dbSDimitry Andric 2545ffd83dbSDimitry Andric SmallVector<BasicBlock *, 4> Exits; 2555ffd83dbSDimitry Andric L->getUniqueExitBlocks(Exits); 2565ffd83dbSDimitry Andric if (!Exits.empty()) { 2575ffd83dbSDimitry Andric // There is at least one non-deoptimizing exit. 2585ffd83dbSDimitry Andric // 2595ffd83dbSDimitry Andric // Note, that BasicBlock::getPostdominatingDeoptimizeCall is not exact, 2605ffd83dbSDimitry Andric // as it can conservatively return false for deoptimizing exits with 2615ffd83dbSDimitry Andric // complex enough control flow down to deoptimize call. 2625ffd83dbSDimitry Andric // 2635ffd83dbSDimitry Andric // That means here we can report success for a case where 2645ffd83dbSDimitry Andric // all exits are deoptimizing but one of them has complex enough 2655ffd83dbSDimitry Andric // control flow (e.g. with loops). 2665ffd83dbSDimitry Andric // 2675ffd83dbSDimitry Andric // That should be a very rare case and false positives for this function 2685ffd83dbSDimitry Andric // have compile-time effect only. 2695ffd83dbSDimitry Andric return any_of(Exits, [](const BasicBlock *BB) { 2705ffd83dbSDimitry Andric return !BB->getPostdominatingDeoptimizeCall(); 2715ffd83dbSDimitry Andric }); 2725ffd83dbSDimitry Andric } 2730b57cec5SDimitry Andric return false; 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric 2765f757f3fSDimitry Andric static void updateBranchWeights(BranchInst &PreHeaderBI, BranchInst &LoopBI, 2775f757f3fSDimitry Andric bool HasConditionalPreHeader, 2785f757f3fSDimitry Andric bool SuccsSwapped) { 2795f757f3fSDimitry Andric MDNode *WeightMD = getBranchWeightMDNode(PreHeaderBI); 2805f757f3fSDimitry Andric if (WeightMD == nullptr) 2815f757f3fSDimitry Andric return; 2825f757f3fSDimitry Andric 2835f757f3fSDimitry Andric // LoopBI should currently be a clone of PreHeaderBI with the same 2845f757f3fSDimitry Andric // metadata. But we double check to make sure we don't have a degenerate case 2855f757f3fSDimitry Andric // where instsimplify changed the instructions. 2865f757f3fSDimitry Andric if (WeightMD != getBranchWeightMDNode(LoopBI)) 2875f757f3fSDimitry Andric return; 2885f757f3fSDimitry Andric 2895f757f3fSDimitry Andric SmallVector<uint32_t, 2> Weights; 290*0fca6ea1SDimitry Andric extractFromBranchWeightMD32(WeightMD, Weights); 2915f757f3fSDimitry Andric if (Weights.size() != 2) 2925f757f3fSDimitry Andric return; 2935f757f3fSDimitry Andric uint32_t OrigLoopExitWeight = Weights[0]; 2945f757f3fSDimitry Andric uint32_t OrigLoopBackedgeWeight = Weights[1]; 2955f757f3fSDimitry Andric 2965f757f3fSDimitry Andric if (SuccsSwapped) 2975f757f3fSDimitry Andric std::swap(OrigLoopExitWeight, OrigLoopBackedgeWeight); 2985f757f3fSDimitry Andric 2995f757f3fSDimitry Andric // Update branch weights. Consider the following edge-counts: 3005f757f3fSDimitry Andric // 3015f757f3fSDimitry Andric // | |-------- | 3025f757f3fSDimitry Andric // V V | V 3035f757f3fSDimitry Andric // Br i1 ... | Br i1 ... 3045f757f3fSDimitry Andric // | | | | | 3055f757f3fSDimitry Andric // x| y| | becomes: | y0| |----- 3065f757f3fSDimitry Andric // V V | | V V | 3075f757f3fSDimitry Andric // Exit Loop | | Loop | 3085f757f3fSDimitry Andric // | | | Br i1 ... | 3095f757f3fSDimitry Andric // ----- | | | | 3105f757f3fSDimitry Andric // x0| x1| y1 | | 3115f757f3fSDimitry Andric // V V ---- 3125f757f3fSDimitry Andric // Exit 3135f757f3fSDimitry Andric // 3145f757f3fSDimitry Andric // The following must hold: 3155f757f3fSDimitry Andric // - x == x0 + x1 # counts to "exit" must stay the same. 3165f757f3fSDimitry Andric // - y0 == x - x0 == x1 # how often loop was entered at all. 3175f757f3fSDimitry Andric // - y1 == y - y0 # How often loop was repeated (after first iter.). 3185f757f3fSDimitry Andric // 3195f757f3fSDimitry Andric // We cannot generally deduce how often we had a zero-trip count loop so we 3205f757f3fSDimitry Andric // have to make a guess for how to distribute x among the new x0 and x1. 3215f757f3fSDimitry Andric 3225f757f3fSDimitry Andric uint32_t ExitWeight0; // aka x0 3235f757f3fSDimitry Andric uint32_t ExitWeight1; // aka x1 3245f757f3fSDimitry Andric uint32_t EnterWeight; // aka y0 3255f757f3fSDimitry Andric uint32_t LoopBackWeight; // aka y1 3265f757f3fSDimitry Andric if (OrigLoopExitWeight > 0 && OrigLoopBackedgeWeight > 0) { 3275f757f3fSDimitry Andric ExitWeight0 = 0; 3285f757f3fSDimitry Andric if (HasConditionalPreHeader) { 3295f757f3fSDimitry Andric // Here we cannot know how many 0-trip count loops we have, so we guess: 3305f757f3fSDimitry Andric if (OrigLoopBackedgeWeight >= OrigLoopExitWeight) { 3315f757f3fSDimitry Andric // If the loop count is bigger than the exit count then we set 3325f757f3fSDimitry Andric // probabilities as if 0-trip count nearly never happens. 3335f757f3fSDimitry Andric ExitWeight0 = ZeroTripCountWeights[0]; 3345f757f3fSDimitry Andric // Scale up counts if necessary so we can match `ZeroTripCountWeights` 3355f757f3fSDimitry Andric // for the `ExitWeight0`:`ExitWeight1` (aka `x0`:`x1` ratio`) ratio. 3365f757f3fSDimitry Andric while (OrigLoopExitWeight < ZeroTripCountWeights[1] + ExitWeight0) { 3375f757f3fSDimitry Andric // ... but don't overflow. 3385f757f3fSDimitry Andric uint32_t const HighBit = uint32_t{1} << (sizeof(uint32_t) * 8 - 1); 3395f757f3fSDimitry Andric if ((OrigLoopBackedgeWeight & HighBit) != 0 || 3405f757f3fSDimitry Andric (OrigLoopExitWeight & HighBit) != 0) 3415f757f3fSDimitry Andric break; 3425f757f3fSDimitry Andric OrigLoopBackedgeWeight <<= 1; 3435f757f3fSDimitry Andric OrigLoopExitWeight <<= 1; 3445f757f3fSDimitry Andric } 3455f757f3fSDimitry Andric } else { 3465f757f3fSDimitry Andric // If there's a higher exit-count than backedge-count then we set 3475f757f3fSDimitry Andric // probabilities as if there are only 0-trip and 1-trip cases. 3485f757f3fSDimitry Andric ExitWeight0 = OrigLoopExitWeight - OrigLoopBackedgeWeight; 3495f757f3fSDimitry Andric } 350*0fca6ea1SDimitry Andric } else { 351*0fca6ea1SDimitry Andric // Theoretically, if the loop body must be executed at least once, the 352*0fca6ea1SDimitry Andric // backedge count must be not less than exit count. However the branch 353*0fca6ea1SDimitry Andric // weight collected by sampling-based PGO may be not very accurate due to 354*0fca6ea1SDimitry Andric // sampling. Therefore this workaround is required here to avoid underflow 355*0fca6ea1SDimitry Andric // of unsigned in following update of branch weight. 356*0fca6ea1SDimitry Andric if (OrigLoopExitWeight > OrigLoopBackedgeWeight) 357*0fca6ea1SDimitry Andric OrigLoopBackedgeWeight = OrigLoopExitWeight; 3585f757f3fSDimitry Andric } 359*0fca6ea1SDimitry Andric assert(OrigLoopExitWeight >= ExitWeight0 && "Bad branch weight"); 3605f757f3fSDimitry Andric ExitWeight1 = OrigLoopExitWeight - ExitWeight0; 3615f757f3fSDimitry Andric EnterWeight = ExitWeight1; 362*0fca6ea1SDimitry Andric assert(OrigLoopBackedgeWeight >= EnterWeight && "Bad branch weight"); 3635f757f3fSDimitry Andric LoopBackWeight = OrigLoopBackedgeWeight - EnterWeight; 3645f757f3fSDimitry Andric } else if (OrigLoopExitWeight == 0) { 3655f757f3fSDimitry Andric if (OrigLoopBackedgeWeight == 0) { 3665f757f3fSDimitry Andric // degenerate case... keep everything zero... 3675f757f3fSDimitry Andric ExitWeight0 = 0; 3685f757f3fSDimitry Andric ExitWeight1 = 0; 3695f757f3fSDimitry Andric EnterWeight = 0; 3705f757f3fSDimitry Andric LoopBackWeight = 0; 3715f757f3fSDimitry Andric } else { 3725f757f3fSDimitry Andric // Special case "LoopExitWeight == 0" weights which behaves like an 3735f757f3fSDimitry Andric // endless where we don't want loop-enttry (y0) to be the same as 3745f757f3fSDimitry Andric // loop-exit (x1). 3755f757f3fSDimitry Andric ExitWeight0 = 0; 3765f757f3fSDimitry Andric ExitWeight1 = 0; 3775f757f3fSDimitry Andric EnterWeight = 1; 3785f757f3fSDimitry Andric LoopBackWeight = OrigLoopBackedgeWeight; 3795f757f3fSDimitry Andric } 3805f757f3fSDimitry Andric } else { 3815f757f3fSDimitry Andric // loop is never entered. 3825f757f3fSDimitry Andric assert(OrigLoopBackedgeWeight == 0 && "remaining case is backedge zero"); 3835f757f3fSDimitry Andric ExitWeight0 = 1; 3845f757f3fSDimitry Andric ExitWeight1 = 1; 3855f757f3fSDimitry Andric EnterWeight = 0; 3865f757f3fSDimitry Andric LoopBackWeight = 0; 3875f757f3fSDimitry Andric } 3885f757f3fSDimitry Andric 3895f757f3fSDimitry Andric const uint32_t LoopBIWeights[] = { 3905f757f3fSDimitry Andric SuccsSwapped ? LoopBackWeight : ExitWeight1, 3915f757f3fSDimitry Andric SuccsSwapped ? ExitWeight1 : LoopBackWeight, 3925f757f3fSDimitry Andric }; 393*0fca6ea1SDimitry Andric setBranchWeights(LoopBI, LoopBIWeights, /*IsExpected=*/false); 3945f757f3fSDimitry Andric if (HasConditionalPreHeader) { 3955f757f3fSDimitry Andric const uint32_t PreHeaderBIWeights[] = { 3965f757f3fSDimitry Andric SuccsSwapped ? EnterWeight : ExitWeight0, 3975f757f3fSDimitry Andric SuccsSwapped ? ExitWeight0 : EnterWeight, 3985f757f3fSDimitry Andric }; 399*0fca6ea1SDimitry Andric setBranchWeights(PreHeaderBI, PreHeaderBIWeights, /*IsExpected=*/false); 4005f757f3fSDimitry Andric } 4015f757f3fSDimitry Andric } 4025f757f3fSDimitry Andric 4030b57cec5SDimitry Andric /// Rotate loop LP. Return true if the loop is rotated. 4040b57cec5SDimitry Andric /// 4050b57cec5SDimitry Andric /// \param SimplifiedLatch is true if the latch was just folded into the final 4060b57cec5SDimitry Andric /// loop exit. In this case we may want to rotate even though the new latch is 4070b57cec5SDimitry Andric /// now an exiting branch. This rotation would have happened had the latch not 4080b57cec5SDimitry Andric /// been simplified. However, if SimplifiedLatch is false, then we avoid 4090b57cec5SDimitry Andric /// rotating loops in which the latch exits to avoid excessive or endless 4100b57cec5SDimitry Andric /// rotation. LoopRotate should be repeatable and converge to a canonical 4110b57cec5SDimitry Andric /// form. This property is satisfied because simplifying the loop latch can only 4120b57cec5SDimitry Andric /// happen once across multiple invocations of the LoopRotate pass. 4135ffd83dbSDimitry Andric /// 4145ffd83dbSDimitry Andric /// If -loop-rotate-multi is enabled we can do multiple rotations in one go 4155ffd83dbSDimitry Andric /// so to reach a suitable (non-deoptimizing) exit. 4160b57cec5SDimitry Andric bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { 4170b57cec5SDimitry Andric // If the loop has only one block then there is not much to rotate. 4180b57cec5SDimitry Andric if (L->getBlocks().size() == 1) 4190b57cec5SDimitry Andric return false; 4200b57cec5SDimitry Andric 4215ffd83dbSDimitry Andric bool Rotated = false; 4225ffd83dbSDimitry Andric do { 4230b57cec5SDimitry Andric BasicBlock *OrigHeader = L->getHeader(); 4240b57cec5SDimitry Andric BasicBlock *OrigLatch = L->getLoopLatch(); 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); 4270b57cec5SDimitry Andric if (!BI || BI->isUnconditional()) 4285ffd83dbSDimitry Andric return Rotated; 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric // If the loop header is not one of the loop exiting blocks then 4310b57cec5SDimitry Andric // either this loop is already rotated or it is not 4320b57cec5SDimitry Andric // suitable for loop rotation transformations. 4330b57cec5SDimitry Andric if (!L->isLoopExiting(OrigHeader)) 4345ffd83dbSDimitry Andric return Rotated; 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric // If the loop latch already contains a branch that leaves the loop then the 4370b57cec5SDimitry Andric // loop is already rotated. 4380b57cec5SDimitry Andric if (!OrigLatch) 4395ffd83dbSDimitry Andric return Rotated; 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric // Rotate if either the loop latch does *not* exit the loop, or if the loop 4420b57cec5SDimitry Andric // latch was just simplified. Or if we think it will be profitable. 4430b57cec5SDimitry Andric if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && 4445ffd83dbSDimitry Andric !profitableToRotateLoopExitingLatch(L) && 4455ffd83dbSDimitry Andric !canRotateDeoptimizingLatchExit(L)) 4465ffd83dbSDimitry Andric return Rotated; 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric // Check size of original header and reject loop if it is very big or we can't 4490b57cec5SDimitry Andric // duplicate blocks inside it. 4500b57cec5SDimitry Andric { 4510b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues; 4520b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, AC, EphValues); 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric CodeMetrics Metrics; 455e8d8bef9SDimitry Andric Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues, PrepareForLTO); 4560b57cec5SDimitry Andric if (Metrics.notDuplicatable) { 4570b57cec5SDimitry Andric LLVM_DEBUG( 4580b57cec5SDimitry Andric dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" 4590b57cec5SDimitry Andric << " instructions: "; 4600b57cec5SDimitry Andric L->dump()); 4615ffd83dbSDimitry Andric return Rotated; 4620b57cec5SDimitry Andric } 463*0fca6ea1SDimitry Andric if (Metrics.Convergence != ConvergenceKind::None) { 4640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " 4650b57cec5SDimitry Andric "instructions: "; 4660b57cec5SDimitry Andric L->dump()); 4675ffd83dbSDimitry Andric return Rotated; 4680b57cec5SDimitry Andric } 46981ad6265SDimitry Andric if (!Metrics.NumInsts.isValid()) { 47081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions" 47181ad6265SDimitry Andric " with invalid cost: "; 47281ad6265SDimitry Andric L->dump()); 47381ad6265SDimitry Andric return Rotated; 47481ad6265SDimitry Andric } 475bdd1243dSDimitry Andric if (Metrics.NumInsts > MaxHeaderSize) { 4765ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains " 4775ffd83dbSDimitry Andric << Metrics.NumInsts 4785ffd83dbSDimitry Andric << " instructions, which is more than the threshold (" 4795ffd83dbSDimitry Andric << MaxHeaderSize << " instructions): "; 4805ffd83dbSDimitry Andric L->dump()); 481e8d8bef9SDimitry Andric ++NumNotRotatedDueToHeaderSize; 4825ffd83dbSDimitry Andric return Rotated; 4835ffd83dbSDimitry Andric } 484e8d8bef9SDimitry Andric 485e8d8bef9SDimitry Andric // When preparing for LTO, avoid rotating loops with calls that could be 486e8d8bef9SDimitry Andric // inlined during the LTO stage. 487e8d8bef9SDimitry Andric if (PrepareForLTO && Metrics.NumInlineCandidates > 0) 488e8d8bef9SDimitry Andric return Rotated; 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric // Now, this loop is suitable for rotation. 4920b57cec5SDimitry Andric BasicBlock *OrigPreheader = L->getLoopPreheader(); 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric // If the loop could not be converted to canonical form, it must have an 4950b57cec5SDimitry Andric // indirectbr in it, just give up. 4960b57cec5SDimitry Andric if (!OrigPreheader || !L->hasDedicatedExits()) 4975ffd83dbSDimitry Andric return Rotated; 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric // Anything ScalarEvolution may know about this loop or the PHI nodes 5000b57cec5SDimitry Andric // in its header will soon be invalidated. We should also invalidate 5010b57cec5SDimitry Andric // all outer loops because insertion and deletion of blocks that happens 5020b57cec5SDimitry Andric // during the rotation may violate invariants related to backedge taken 5030b57cec5SDimitry Andric // infos in them. 504bdd1243dSDimitry Andric if (SE) { 5050b57cec5SDimitry Andric SE->forgetTopmostLoop(L); 506bdd1243dSDimitry Andric // We may hoist some instructions out of loop. In case if they were cached 507bdd1243dSDimitry Andric // as "loop variant" or "loop computable", these caches must be dropped. 508bdd1243dSDimitry Andric // We also may fold basic blocks, so cached block dispositions also need 509bdd1243dSDimitry Andric // to be dropped. 510bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 511bdd1243dSDimitry Andric } 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); 5140b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 5150b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric // Find new Loop header. NewHeader is a Header's one and only successor 5180b57cec5SDimitry Andric // that is inside loop. Header's other successor is outside the 5190b57cec5SDimitry Andric // loop. Otherwise loop is not suitable for rotation. 5200b57cec5SDimitry Andric BasicBlock *Exit = BI->getSuccessor(0); 5210b57cec5SDimitry Andric BasicBlock *NewHeader = BI->getSuccessor(1); 5225f757f3fSDimitry Andric bool BISuccsSwapped = L->contains(Exit); 5235f757f3fSDimitry Andric if (BISuccsSwapped) 5240b57cec5SDimitry Andric std::swap(Exit, NewHeader); 5250b57cec5SDimitry Andric assert(NewHeader && "Unable to determine new loop header"); 5260b57cec5SDimitry Andric assert(L->contains(NewHeader) && !L->contains(Exit) && 5270b57cec5SDimitry Andric "Unable to determine loop header and exit blocks"); 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric // This code assumes that the new header has exactly one predecessor. 5300b57cec5SDimitry Andric // Remove any single-entry PHI nodes in it. 5310b57cec5SDimitry Andric assert(NewHeader->getSinglePredecessor() && 5320b57cec5SDimitry Andric "New header doesn't have one pred!"); 5330b57cec5SDimitry Andric FoldSingleEntryPHINodes(NewHeader); 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Begin by walking OrigHeader and populating ValueMap with an entry for 5360b57cec5SDimitry Andric // each Instruction. 5370b57cec5SDimitry Andric BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); 5380b57cec5SDimitry Andric ValueToValueMapTy ValueMap, ValueMapMSSA; 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric // For PHI nodes, the value available in OldPreHeader is just the 5410b57cec5SDimitry Andric // incoming value from OldPreHeader. 5420b57cec5SDimitry Andric for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) 543480093f4SDimitry Andric InsertNewValueIntoMap(ValueMap, PN, 544480093f4SDimitry Andric PN->getIncomingValueForBlock(OrigPreheader)); 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric // For the rest of the instructions, either hoist to the OrigPreheader if 5470b57cec5SDimitry Andric // possible or create a clone in the OldPreHeader if not. 5480b57cec5SDimitry Andric Instruction *LoopEntryBranch = OrigPreheader->getTerminator(); 5490b57cec5SDimitry Andric 550fe6060f1SDimitry Andric // Record all debug intrinsics preceding LoopEntryBranch to avoid 551fe6060f1SDimitry Andric // duplication. 5520b57cec5SDimitry Andric using DbgIntrinsicHash = 553fe6060f1SDimitry Andric std::pair<std::pair<hash_code, DILocalVariable *>, DIExpression *>; 5545f757f3fSDimitry Andric auto makeHash = [](auto *D) -> DbgIntrinsicHash { 555fe6060f1SDimitry Andric auto VarLocOps = D->location_ops(); 556fe6060f1SDimitry Andric return {{hash_combine_range(VarLocOps.begin(), VarLocOps.end()), 557fe6060f1SDimitry Andric D->getVariable()}, 558fe6060f1SDimitry Andric D->getExpression()}; 5590b57cec5SDimitry Andric }; 5605f757f3fSDimitry Andric 5610b57cec5SDimitry Andric SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; 562349cc55cSDimitry Andric for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) { 5635f757f3fSDimitry Andric if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) { 5640b57cec5SDimitry Andric DbgIntrinsics.insert(makeHash(DII)); 565*0fca6ea1SDimitry Andric // Until RemoveDIs supports dbg.declares in DbgVariableRecord format, 566*0fca6ea1SDimitry Andric // we'll need to collect DbgVariableRecords attached to any other debug 567*0fca6ea1SDimitry Andric // intrinsics. 568*0fca6ea1SDimitry Andric for (const DbgVariableRecord &DVR : 569*0fca6ea1SDimitry Andric filterDbgVars(DII->getDbgRecordRange())) 570*0fca6ea1SDimitry Andric DbgIntrinsics.insert(makeHash(&DVR)); 5715f757f3fSDimitry Andric } else { 5720b57cec5SDimitry Andric break; 5730b57cec5SDimitry Andric } 5745f757f3fSDimitry Andric } 5755f757f3fSDimitry Andric 576*0fca6ea1SDimitry Andric // Build DbgVariableRecord hashes for DbgVariableRecords attached to the 577*0fca6ea1SDimitry Andric // terminator, which isn't considered in the loop above. 578*0fca6ea1SDimitry Andric for (const DbgVariableRecord &DVR : 579*0fca6ea1SDimitry Andric filterDbgVars(OrigPreheader->getTerminator()->getDbgRecordRange())) 580*0fca6ea1SDimitry Andric DbgIntrinsics.insert(makeHash(&DVR)); 5810b57cec5SDimitry Andric 582e8d8bef9SDimitry Andric // Remember the local noalias scope declarations in the header. After the 583e8d8bef9SDimitry Andric // rotation, they must be duplicated and the scope must be cloned. This 584e8d8bef9SDimitry Andric // avoids unwanted interaction across iterations. 585e8d8bef9SDimitry Andric SmallVector<NoAliasScopeDeclInst *, 6> NoAliasDeclInstructions; 586e8d8bef9SDimitry Andric for (Instruction &I : *OrigHeader) 587e8d8bef9SDimitry Andric if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 588e8d8bef9SDimitry Andric NoAliasDeclInstructions.push_back(Decl); 589e8d8bef9SDimitry Andric 5905f757f3fSDimitry Andric Module *M = OrigHeader->getModule(); 5915f757f3fSDimitry Andric 592*0fca6ea1SDimitry Andric // Track the next DbgRecord to clone. If we have a sequence where an 5935f757f3fSDimitry Andric // instruction is hoisted instead of being cloned: 594*0fca6ea1SDimitry Andric // DbgRecord blah 5955f757f3fSDimitry Andric // %foo = add i32 0, 0 596*0fca6ea1SDimitry Andric // DbgRecord xyzzy 5975f757f3fSDimitry Andric // %bar = call i32 @foobar() 598*0fca6ea1SDimitry Andric // where %foo is hoisted, then the DbgRecord "blah" will be seen twice, once 5995f757f3fSDimitry Andric // attached to %foo, then when %foo his hoisted it will "fall down" onto the 6005f757f3fSDimitry Andric // function call: 601*0fca6ea1SDimitry Andric // DbgRecord blah 602*0fca6ea1SDimitry Andric // DbgRecord xyzzy 6035f757f3fSDimitry Andric // %bar = call i32 @foobar() 6045f757f3fSDimitry Andric // causing it to appear attached to the call too. 6055f757f3fSDimitry Andric // 6065f757f3fSDimitry Andric // To avoid this, cloneDebugInfoFrom takes an optional "start cloning from 607*0fca6ea1SDimitry Andric // here" position to account for this behaviour. We point it at any 608*0fca6ea1SDimitry Andric // DbgRecords on the next instruction, here labelled xyzzy, before we hoist 609*0fca6ea1SDimitry Andric // %foo. Later, we only only clone DbgRecords from that position (xyzzy) 610*0fca6ea1SDimitry Andric // onwards, which avoids cloning DbgRecord "blah" multiple times. (Stored as 611*0fca6ea1SDimitry Andric // a range because it gives us a natural way of testing whether 612*0fca6ea1SDimitry Andric // there were DbgRecords on the next instruction before we hoisted things). 613*0fca6ea1SDimitry Andric iterator_range<DbgRecord::self_iterator> NextDbgInsts = 614*0fca6ea1SDimitry Andric (I != E) ? I->getDbgRecordRange() : DbgMarker::getEmptyDbgRecordRange(); 6155f757f3fSDimitry Andric 6160b57cec5SDimitry Andric while (I != E) { 6170b57cec5SDimitry Andric Instruction *Inst = &*I++; 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric // If the instruction's operands are invariant and it doesn't read or write 6200b57cec5SDimitry Andric // memory, then it is safe to hoist. Doing this doesn't change the order of 6210b57cec5SDimitry Andric // execution in the preheader, but does prevent the instruction from 6220b57cec5SDimitry Andric // executing in each iteration of the loop. This means it is safe to hoist 6230b57cec5SDimitry Andric // something that might trap, but isn't safe to hoist something that reads 6240b57cec5SDimitry Andric // memory (without proving that the loop doesn't write). 6250b57cec5SDimitry Andric if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && 6260b57cec5SDimitry Andric !Inst->mayWriteToMemory() && !Inst->isTerminator() && 627*0fca6ea1SDimitry Andric !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst) && 628*0fca6ea1SDimitry Andric // It is not safe to hoist the value of these instructions in 629*0fca6ea1SDimitry Andric // coroutines, as the addresses of otherwise eligible variables (e.g. 630*0fca6ea1SDimitry Andric // thread-local variables and errno) may change if the coroutine is 631*0fca6ea1SDimitry Andric // resumed in a different thread.Therefore, we disable this 632*0fca6ea1SDimitry Andric // optimization for correctness. However, this may block other correct 633*0fca6ea1SDimitry Andric // optimizations. 634*0fca6ea1SDimitry Andric // FIXME: This should be reverted once we have a better model for 635*0fca6ea1SDimitry Andric // memory access in coroutines. 636*0fca6ea1SDimitry Andric !Inst->getFunction()->isPresplitCoroutine()) { 6375f757f3fSDimitry Andric 638*0fca6ea1SDimitry Andric if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat && 639*0fca6ea1SDimitry Andric !NextDbgInsts.empty()) { 6405f757f3fSDimitry Andric auto DbgValueRange = 641*0fca6ea1SDimitry Andric LoopEntryBranch->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); 642*0fca6ea1SDimitry Andric RemapDbgRecordRange(M, DbgValueRange, ValueMap, 6435f757f3fSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 6445f757f3fSDimitry Andric // Erase anything we've seen before. 645*0fca6ea1SDimitry Andric for (DbgVariableRecord &DVR : 646*0fca6ea1SDimitry Andric make_early_inc_range(filterDbgVars(DbgValueRange))) 647*0fca6ea1SDimitry Andric if (DbgIntrinsics.count(makeHash(&DVR))) 648*0fca6ea1SDimitry Andric DVR.eraseFromParent(); 6495f757f3fSDimitry Andric } 6505f757f3fSDimitry Andric 651*0fca6ea1SDimitry Andric NextDbgInsts = I->getDbgRecordRange(); 652*0fca6ea1SDimitry Andric 6530b57cec5SDimitry Andric Inst->moveBefore(LoopEntryBranch); 6545f757f3fSDimitry Andric 655fe6060f1SDimitry Andric ++NumInstrsHoisted; 6560b57cec5SDimitry Andric continue; 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric // Otherwise, create a duplicate of the instruction. 6600b57cec5SDimitry Andric Instruction *C = Inst->clone(); 66106c3fb27SDimitry Andric C->insertBefore(LoopEntryBranch); 66206c3fb27SDimitry Andric 663fe6060f1SDimitry Andric ++NumInstrsDuplicated; 6640b57cec5SDimitry Andric 665*0fca6ea1SDimitry Andric if (LoopEntryBranch->getParent()->IsNewDbgInfoFormat && 666*0fca6ea1SDimitry Andric !NextDbgInsts.empty()) { 667*0fca6ea1SDimitry Andric auto Range = C->cloneDebugInfoFrom(Inst, NextDbgInsts.begin()); 668*0fca6ea1SDimitry Andric RemapDbgRecordRange(M, Range, ValueMap, 6695f757f3fSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 670*0fca6ea1SDimitry Andric NextDbgInsts = DbgMarker::getEmptyDbgRecordRange(); 6715f757f3fSDimitry Andric // Erase anything we've seen before. 672*0fca6ea1SDimitry Andric for (DbgVariableRecord &DVR : 673*0fca6ea1SDimitry Andric make_early_inc_range(filterDbgVars(Range))) 674*0fca6ea1SDimitry Andric if (DbgIntrinsics.count(makeHash(&DVR))) 675*0fca6ea1SDimitry Andric DVR.eraseFromParent(); 6765f757f3fSDimitry Andric } 6775f757f3fSDimitry Andric 6780b57cec5SDimitry Andric // Eagerly remap the operands of the instruction. 6790b57cec5SDimitry Andric RemapInstruction(C, ValueMap, 6800b57cec5SDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric // Avoid inserting the same intrinsic twice. 6830b57cec5SDimitry Andric if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C)) 6840b57cec5SDimitry Andric if (DbgIntrinsics.count(makeHash(DII))) { 68506c3fb27SDimitry Andric C->eraseFromParent(); 6860b57cec5SDimitry Andric continue; 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric // With the operands remapped, see if the instruction constant folds or is 6900b57cec5SDimitry Andric // otherwise simplifyable. This commonly occurs because the entry from PHI 6910b57cec5SDimitry Andric // nodes allows icmps and other instructions to fold. 69281ad6265SDimitry Andric Value *V = simplifyInstruction(C, SQ); 6930b57cec5SDimitry Andric if (V && LI->replacementPreservesLCSSAForm(C, V)) { 6940b57cec5SDimitry Andric // If so, then delete the temporary instruction and stick the folded value 6950b57cec5SDimitry Andric // in the map. 696480093f4SDimitry Andric InsertNewValueIntoMap(ValueMap, Inst, V); 6970b57cec5SDimitry Andric if (!C->mayHaveSideEffects()) { 69806c3fb27SDimitry Andric C->eraseFromParent(); 6990b57cec5SDimitry Andric C = nullptr; 7000b57cec5SDimitry Andric } 7010b57cec5SDimitry Andric } else { 702480093f4SDimitry Andric InsertNewValueIntoMap(ValueMap, Inst, C); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric if (C) { 7050b57cec5SDimitry Andric // Otherwise, stick the new instruction into the new block! 7060b57cec5SDimitry Andric C->setName(Inst->getName()); 7070b57cec5SDimitry Andric 708fe6060f1SDimitry Andric if (auto *II = dyn_cast<AssumeInst>(C)) 7090b57cec5SDimitry Andric AC->registerAssumption(II); 7100b57cec5SDimitry Andric // MemorySSA cares whether the cloned instruction was inserted or not, and 7110b57cec5SDimitry Andric // not whether it can be remapped to a simplified value. 712480093f4SDimitry Andric if (MSSAU) 713480093f4SDimitry Andric InsertNewValueIntoMap(ValueMapMSSA, Inst, C); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 717e8d8bef9SDimitry Andric if (!NoAliasDeclInstructions.empty()) { 718e8d8bef9SDimitry Andric // There are noalias scope declarations: 719e8d8bef9SDimitry Andric // (general): 720e8d8bef9SDimitry Andric // Original: OrigPre { OrigHeader NewHeader ... Latch } 721e8d8bef9SDimitry Andric // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } 722e8d8bef9SDimitry Andric // 723e8d8bef9SDimitry Andric // with D: llvm.experimental.noalias.scope.decl, 724e8d8bef9SDimitry Andric // U: !noalias or !alias.scope depending on D 725e8d8bef9SDimitry Andric // ... { D U1 U2 } can transform into: 726e8d8bef9SDimitry Andric // (0) : ... { D U1 U2 } // no relevant rotation for this part 727e8d8bef9SDimitry Andric // (1) : ... D' { U1 U2 D } // D is part of OrigHeader 728e8d8bef9SDimitry Andric // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader 729e8d8bef9SDimitry Andric // 730e8d8bef9SDimitry Andric // We now want to transform: 731e8d8bef9SDimitry Andric // (1) -> : ... D' { D U1 U2 D'' } 732e8d8bef9SDimitry Andric // (2) -> : ... D' U1' { D U2 D'' U1'' } 733e8d8bef9SDimitry Andric // D: original llvm.experimental.noalias.scope.decl 734e8d8bef9SDimitry Andric // D', U1': duplicate with replaced scopes 735e8d8bef9SDimitry Andric // D'', U1'': different duplicate with replaced scopes 736e8d8bef9SDimitry Andric // This ensures a safe fallback to 'may_alias' introduced by the rotate, 737e8d8bef9SDimitry Andric // as U1'' and U1' scopes will not be compatible wrt to the local restrict 738e8d8bef9SDimitry Andric 739e8d8bef9SDimitry Andric // Clone the llvm.experimental.noalias.decl again for the NewHeader. 7405f757f3fSDimitry Andric BasicBlock::iterator NewHeaderInsertionPoint = 7415f757f3fSDimitry Andric NewHeader->getFirstNonPHIIt(); 742e8d8bef9SDimitry Andric for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) { 743e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:" 744e8d8bef9SDimitry Andric << *NAD << "\n"); 745e8d8bef9SDimitry Andric Instruction *NewNAD = NAD->clone(); 7465f757f3fSDimitry Andric NewNAD->insertBefore(*NewHeader, NewHeaderInsertionPoint); 747e8d8bef9SDimitry Andric } 748e8d8bef9SDimitry Andric 749e8d8bef9SDimitry Andric // Scopes must now be duplicated, once for OrigHeader and once for 750e8d8bef9SDimitry Andric // OrigPreHeader'. 751e8d8bef9SDimitry Andric { 752e8d8bef9SDimitry Andric auto &Context = NewHeader->getContext(); 753e8d8bef9SDimitry Andric 754e8d8bef9SDimitry Andric SmallVector<MDNode *, 8> NoAliasDeclScopes; 755e8d8bef9SDimitry Andric for (NoAliasScopeDeclInst *NAD : NoAliasDeclInstructions) 756e8d8bef9SDimitry Andric NoAliasDeclScopes.push_back(NAD->getScopeList()); 757e8d8bef9SDimitry Andric 758e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n"); 759e8d8bef9SDimitry Andric cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context, 760e8d8bef9SDimitry Andric "h.rot"); 761e8d8bef9SDimitry Andric LLVM_DEBUG(OrigHeader->dump()); 762e8d8bef9SDimitry Andric 763e8d8bef9SDimitry Andric // Keep the compile time impact low by only adapting the inserted block 764e8d8bef9SDimitry Andric // of instructions in the OrigPreHeader. This might result in slightly 765e8d8bef9SDimitry Andric // more aliasing between these instructions and those that were already 766e8d8bef9SDimitry Andric // present, but it will be much faster when the original PreHeader is 767e8d8bef9SDimitry Andric // large. 768e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n"); 769e8d8bef9SDimitry Andric auto *FirstDecl = 770e8d8bef9SDimitry Andric cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]); 771e8d8bef9SDimitry Andric auto *LastInst = &OrigPreheader->back(); 772e8d8bef9SDimitry Andric cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst, 773e8d8bef9SDimitry Andric Context, "pre.rot"); 774e8d8bef9SDimitry Andric LLVM_DEBUG(OrigPreheader->dump()); 775e8d8bef9SDimitry Andric 776e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Updated NewHeader:\n"); 777e8d8bef9SDimitry Andric LLVM_DEBUG(NewHeader->dump()); 778e8d8bef9SDimitry Andric } 779e8d8bef9SDimitry Andric } 780e8d8bef9SDimitry Andric 7810b57cec5SDimitry Andric // Along with all the other instructions, we just cloned OrigHeader's 7820b57cec5SDimitry Andric // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's 7830b57cec5SDimitry Andric // successors by duplicating their incoming values for OrigHeader. 7840b57cec5SDimitry Andric for (BasicBlock *SuccBB : successors(OrigHeader)) 7850b57cec5SDimitry Andric for (BasicBlock::iterator BI = SuccBB->begin(); 7860b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 7870b57cec5SDimitry Andric PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove 7900b57cec5SDimitry Andric // OrigPreHeader's old terminator (the original branch into the loop), and 7910b57cec5SDimitry Andric // remove the corresponding incoming values from the PHI nodes in OrigHeader. 7920b57cec5SDimitry Andric LoopEntryBranch->eraseFromParent(); 793*0fca6ea1SDimitry Andric OrigPreheader->flushTerminatorDbgRecords(); 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric // Update MemorySSA before the rewrite call below changes the 1:1 7960b57cec5SDimitry Andric // instruction:cloned_instruction_or_value mapping. 7970b57cec5SDimitry Andric if (MSSAU) { 798480093f4SDimitry Andric InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader); 7990b57cec5SDimitry Andric MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, 8000b57cec5SDimitry Andric ValueMapMSSA); 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric SmallVector<PHINode*, 2> InsertedPHIs; 8040b57cec5SDimitry Andric // If there were any uses of instructions in the duplicated block outside the 8050b57cec5SDimitry Andric // loop, update them, inserting PHI nodes as required 806349cc55cSDimitry Andric RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE, 8070b57cec5SDimitry Andric &InsertedPHIs); 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric // Attach dbg.value intrinsics to the new phis if that phi uses a value that 8100b57cec5SDimitry Andric // previously had debug metadata attached. This keeps the debug info 8110b57cec5SDimitry Andric // up-to-date in the loop body. 8120b57cec5SDimitry Andric if (!InsertedPHIs.empty()) 8130b57cec5SDimitry Andric insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric // NewHeader is now the header of the loop. 8160b57cec5SDimitry Andric L->moveToHeader(NewHeader); 8170b57cec5SDimitry Andric assert(L->getHeader() == NewHeader && "Latch block is our new header"); 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric // Inform DT about changes to the CFG. 8200b57cec5SDimitry Andric if (DT) { 8210b57cec5SDimitry Andric // The OrigPreheader branches to the NewHeader and Exit now. Then, inform 8220b57cec5SDimitry Andric // the DT about the removed edge to the OrigHeader (that got removed). 8230b57cec5SDimitry Andric SmallVector<DominatorTree::UpdateType, 3> Updates; 8240b57cec5SDimitry Andric Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); 8250b57cec5SDimitry Andric Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); 8260b57cec5SDimitry Andric Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric if (MSSAU) { 829e8d8bef9SDimitry Andric MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); 8300b57cec5SDimitry Andric if (VerifyMemorySSA) 8310b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 832e8d8bef9SDimitry Andric } else { 833e8d8bef9SDimitry Andric DT->applyUpdates(Updates); 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric // At this point, we've finished our major CFG changes. As part of cloning 8380b57cec5SDimitry Andric // the loop into the preheader we've simplified instructions and the 8390b57cec5SDimitry Andric // duplicated conditional branch may now be branching on a constant. If it is 8400b57cec5SDimitry Andric // branching on a constant and if that constant means that we enter the loop, 8410b57cec5SDimitry Andric // then we fold away the cond branch to an uncond branch. This simplifies the 8420b57cec5SDimitry Andric // loop in cases important for nested loops, and it also means we don't have 8430b57cec5SDimitry Andric // to split as many edges. 8440b57cec5SDimitry Andric BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); 8450b57cec5SDimitry Andric assert(PHBI->isConditional() && "Should be clone of BI condbr!"); 8465f757f3fSDimitry Andric const Value *Cond = PHBI->getCondition(); 8475f757f3fSDimitry Andric const bool HasConditionalPreHeader = 8485f757f3fSDimitry Andric !isa<ConstantInt>(Cond) || 8495f757f3fSDimitry Andric PHBI->getSuccessor(cast<ConstantInt>(Cond)->isZero()) != NewHeader; 8505f757f3fSDimitry Andric 8515f757f3fSDimitry Andric updateBranchWeights(*PHBI, *BI, HasConditionalPreHeader, BISuccsSwapped); 8525f757f3fSDimitry Andric 8535f757f3fSDimitry Andric if (HasConditionalPreHeader) { 8540b57cec5SDimitry Andric // The conditional branch can't be folded, handle the general case. 8550b57cec5SDimitry Andric // Split edges as necessary to preserve LoopSimplify form. 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and 8580b57cec5SDimitry Andric // thus is not a preheader anymore. 8590b57cec5SDimitry Andric // Split the edge to form a real preheader. 8600b57cec5SDimitry Andric BasicBlock *NewPH = SplitCriticalEdge( 8610b57cec5SDimitry Andric OrigPreheader, NewHeader, 8620b57cec5SDimitry Andric CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); 8630b57cec5SDimitry Andric NewPH->setName(NewHeader->getName() + ".lr.ph"); 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric // Preserve canonical loop form, which means that 'Exit' should have only 8660b57cec5SDimitry Andric // one predecessor. Note that Exit could be an exit block for multiple 8670b57cec5SDimitry Andric // nested loops, causing both of the edges to now be critical and need to 8680b57cec5SDimitry Andric // be split. 869349cc55cSDimitry Andric SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit)); 8700b57cec5SDimitry Andric bool SplitLatchEdge = false; 8710b57cec5SDimitry Andric for (BasicBlock *ExitPred : ExitPreds) { 8720b57cec5SDimitry Andric // We only need to split loop exit edges. 8730b57cec5SDimitry Andric Loop *PredLoop = LI->getLoopFor(ExitPred); 8740b57cec5SDimitry Andric if (!PredLoop || PredLoop->contains(Exit) || 875fcaf7f86SDimitry Andric isa<IndirectBrInst>(ExitPred->getTerminator())) 8760b57cec5SDimitry Andric continue; 8770b57cec5SDimitry Andric SplitLatchEdge |= L->getLoopLatch() == ExitPred; 8780b57cec5SDimitry Andric BasicBlock *ExitSplit = SplitCriticalEdge( 8790b57cec5SDimitry Andric ExitPred, Exit, 8800b57cec5SDimitry Andric CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); 8810b57cec5SDimitry Andric ExitSplit->moveBefore(Exit); 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric assert(SplitLatchEdge && 8840b57cec5SDimitry Andric "Despite splitting all preds, failed to split latch exit?"); 885fe6060f1SDimitry Andric (void)SplitLatchEdge; 8860b57cec5SDimitry Andric } else { 8870b57cec5SDimitry Andric // We can fold the conditional branch in the preheader, this makes things 8880b57cec5SDimitry Andric // simpler. The first step is to remove the extra edge to the Exit block. 8890b57cec5SDimitry Andric Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); 890*0fca6ea1SDimitry Andric BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI->getIterator()); 8910b57cec5SDimitry Andric NewBI->setDebugLoc(PHBI->getDebugLoc()); 8920b57cec5SDimitry Andric PHBI->eraseFromParent(); 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric // With our CFG finalized, update DomTree if it is available. 8950b57cec5SDimitry Andric if (DT) DT->deleteEdge(OrigPreheader, Exit); 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric // Update MSSA too, if available. 8980b57cec5SDimitry Andric if (MSSAU) 8990b57cec5SDimitry Andric MSSAU->removeEdge(OrigPreheader, Exit); 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric 9020b57cec5SDimitry Andric assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); 9030b57cec5SDimitry Andric assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 9060b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric // Now that the CFG and DomTree are in a consistent state again, try to merge 9090b57cec5SDimitry Andric // the OrigHeader block into OrigLatch. This will succeed if they are 9100b57cec5SDimitry Andric // connected by an unconditional branch. This is just a cleanup so the 9110b57cec5SDimitry Andric // emitted code isn't too gross in this common case. 9120b57cec5SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 913e8d8bef9SDimitry Andric BasicBlock *PredBB = OrigHeader->getUniquePredecessor(); 914e8d8bef9SDimitry Andric bool DidMerge = MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); 915e8d8bef9SDimitry Andric if (DidMerge) 916e8d8bef9SDimitry Andric RemoveRedundantDbgInstrs(PredBB); 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 9190b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 9200b57cec5SDimitry Andric 9210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric ++NumRotated; 9245ffd83dbSDimitry Andric 9255ffd83dbSDimitry Andric Rotated = true; 9265ffd83dbSDimitry Andric SimplifiedLatch = false; 9275ffd83dbSDimitry Andric 9285ffd83dbSDimitry Andric // Check that new latch is a deoptimizing exit and then repeat rotation if possible. 9295ffd83dbSDimitry Andric // Deoptimizing latch exit is not a generally typical case, so we just loop over. 9305ffd83dbSDimitry Andric // TODO: if it becomes a performance bottleneck extend rotation algorithm 9315ffd83dbSDimitry Andric // to handle multiple rotations in one go. 9325ffd83dbSDimitry Andric } while (MultiRotate && canRotateDeoptimizingLatchExit(L)); 9335ffd83dbSDimitry Andric 9345ffd83dbSDimitry Andric 9350b57cec5SDimitry Andric return true; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric /// Determine whether the instructions in this range may be safely and cheaply 9390b57cec5SDimitry Andric /// speculated. This is not an important enough situation to develop complex 9400b57cec5SDimitry Andric /// heuristics. We handle a single arithmetic instruction along with any type 9410b57cec5SDimitry Andric /// conversions. 9420b57cec5SDimitry Andric static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, 9430b57cec5SDimitry Andric BasicBlock::iterator End, Loop *L) { 9440b57cec5SDimitry Andric bool seenIncrement = false; 9450b57cec5SDimitry Andric bool MultiExitLoop = false; 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric if (!L->getExitingBlock()) 9480b57cec5SDimitry Andric MultiExitLoop = true; 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andric for (BasicBlock::iterator I = Begin; I != End; ++I) { 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric if (!isSafeToSpeculativelyExecute(&*I)) 9530b57cec5SDimitry Andric return false; 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(I)) 9560b57cec5SDimitry Andric continue; 9570b57cec5SDimitry Andric 9580b57cec5SDimitry Andric switch (I->getOpcode()) { 9590b57cec5SDimitry Andric default: 9600b57cec5SDimitry Andric return false; 9610b57cec5SDimitry Andric case Instruction::GetElementPtr: 9620b57cec5SDimitry Andric // GEPs are cheap if all indices are constant. 9630b57cec5SDimitry Andric if (!cast<GEPOperator>(I)->hasAllConstantIndices()) 9640b57cec5SDimitry Andric return false; 9650b57cec5SDimitry Andric // fall-thru to increment case 966bdd1243dSDimitry Andric [[fallthrough]]; 9670b57cec5SDimitry Andric case Instruction::Add: 9680b57cec5SDimitry Andric case Instruction::Sub: 9690b57cec5SDimitry Andric case Instruction::And: 9700b57cec5SDimitry Andric case Instruction::Or: 9710b57cec5SDimitry Andric case Instruction::Xor: 9720b57cec5SDimitry Andric case Instruction::Shl: 9730b57cec5SDimitry Andric case Instruction::LShr: 9740b57cec5SDimitry Andric case Instruction::AShr: { 9750b57cec5SDimitry Andric Value *IVOpnd = 9760b57cec5SDimitry Andric !isa<Constant>(I->getOperand(0)) 9770b57cec5SDimitry Andric ? I->getOperand(0) 9780b57cec5SDimitry Andric : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; 9790b57cec5SDimitry Andric if (!IVOpnd) 9800b57cec5SDimitry Andric return false; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric // If increment operand is used outside of the loop, this speculation 9830b57cec5SDimitry Andric // could cause extra live range interference. 9840b57cec5SDimitry Andric if (MultiExitLoop) { 9850b57cec5SDimitry Andric for (User *UseI : IVOpnd->users()) { 9860b57cec5SDimitry Andric auto *UserInst = cast<Instruction>(UseI); 9870b57cec5SDimitry Andric if (!L->contains(UserInst)) 9880b57cec5SDimitry Andric return false; 9890b57cec5SDimitry Andric } 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric if (seenIncrement) 9930b57cec5SDimitry Andric return false; 9940b57cec5SDimitry Andric seenIncrement = true; 9950b57cec5SDimitry Andric break; 9960b57cec5SDimitry Andric } 9970b57cec5SDimitry Andric case Instruction::Trunc: 9980b57cec5SDimitry Andric case Instruction::ZExt: 9990b57cec5SDimitry Andric case Instruction::SExt: 10000b57cec5SDimitry Andric // ignore type conversions 10010b57cec5SDimitry Andric break; 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric } 10040b57cec5SDimitry Andric return true; 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric /// Fold the loop tail into the loop exit by speculating the loop tail 10080b57cec5SDimitry Andric /// instructions. Typically, this is a single post-increment. In the case of a 10090b57cec5SDimitry Andric /// simple 2-block loop, hoisting the increment can be much better than 10100b57cec5SDimitry Andric /// duplicating the entire loop header. In the case of loops with early exits, 10110b57cec5SDimitry Andric /// rotation will not work anyway, but simplifyLoopLatch will put the loop in 10120b57cec5SDimitry Andric /// canonical form so downstream passes can handle it. 10130b57cec5SDimitry Andric /// 10140b57cec5SDimitry Andric /// I don't believe this invalidates SCEV. 10150b57cec5SDimitry Andric bool LoopRotate::simplifyLoopLatch(Loop *L) { 10160b57cec5SDimitry Andric BasicBlock *Latch = L->getLoopLatch(); 10170b57cec5SDimitry Andric if (!Latch || Latch->hasAddressTaken()) 10180b57cec5SDimitry Andric return false; 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); 10210b57cec5SDimitry Andric if (!Jmp || !Jmp->isUnconditional()) 10220b57cec5SDimitry Andric return false; 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric BasicBlock *LastExit = Latch->getSinglePredecessor(); 10250b57cec5SDimitry Andric if (!LastExit || !L->isLoopExiting(LastExit)) 10260b57cec5SDimitry Andric return false; 10270b57cec5SDimitry Andric 10280b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); 10290b57cec5SDimitry Andric if (!BI) 10300b57cec5SDimitry Andric return false; 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) 10330b57cec5SDimitry Andric return false; 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " 10360b57cec5SDimitry Andric << LastExit->getName() << "\n"); 10370b57cec5SDimitry Andric 10388bcb0991SDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 10398bcb0991SDimitry Andric MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr, 10408bcb0991SDimitry Andric /*PredecessorWithTwoSuccessors=*/true); 10410b57cec5SDimitry Andric 1042bdd1243dSDimitry Andric if (SE) { 1043bdd1243dSDimitry Andric // Merging blocks may remove blocks reference in the block disposition cache. Clear the cache. 1044bdd1243dSDimitry Andric SE->forgetBlockAndLoopDispositions(); 1045bdd1243dSDimitry Andric } 1046bdd1243dSDimitry Andric 10470b57cec5SDimitry Andric if (MSSAU && VerifyMemorySSA) 10480b57cec5SDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric return true; 10510b57cec5SDimitry Andric } 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric /// Rotate \c L, and return true if any modification was made. 10540b57cec5SDimitry Andric bool LoopRotate::processLoop(Loop *L) { 10550b57cec5SDimitry Andric // Save the loop metadata. 10560b57cec5SDimitry Andric MDNode *LoopMD = L->getLoopID(); 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric bool SimplifiedLatch = false; 10590b57cec5SDimitry Andric 10600b57cec5SDimitry Andric // Simplify the loop latch before attempting to rotate the header 10610b57cec5SDimitry Andric // upward. Rotation may not be needed if the loop tail can be folded into the 10620b57cec5SDimitry Andric // loop exit. 10630b57cec5SDimitry Andric if (!RotationOnly) 10640b57cec5SDimitry Andric SimplifiedLatch = simplifyLoopLatch(L); 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric bool MadeChange = rotateLoop(L, SimplifiedLatch); 10670b57cec5SDimitry Andric assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && 10680b57cec5SDimitry Andric "Loop latch should be exiting after loop-rotate."); 10690b57cec5SDimitry Andric 10700b57cec5SDimitry Andric // Restore the loop metadata. 10710b57cec5SDimitry Andric // NB! We presume LoopRotation DOESN'T ADD its own metadata. 10720b57cec5SDimitry Andric if ((MadeChange || SimplifiedLatch) && LoopMD) 10730b57cec5SDimitry Andric L->setLoopID(LoopMD); 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric return MadeChange || SimplifiedLatch; 10760b57cec5SDimitry Andric } 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric /// The utility to convert a loop into a loop with bottom test. 10800b57cec5SDimitry Andric bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, 10810b57cec5SDimitry Andric AssumptionCache *AC, DominatorTree *DT, 10820b57cec5SDimitry Andric ScalarEvolution *SE, MemorySSAUpdater *MSSAU, 10830b57cec5SDimitry Andric const SimplifyQuery &SQ, bool RotationOnly = true, 10840b57cec5SDimitry Andric unsigned Threshold = unsigned(-1), 1085e8d8bef9SDimitry Andric bool IsUtilMode = true, bool PrepareForLTO) { 10860b57cec5SDimitry Andric LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, 1087e8d8bef9SDimitry Andric IsUtilMode, PrepareForLTO); 10880b57cec5SDimitry Andric return LR.processLoop(L); 10890b57cec5SDimitry Andric } 1090