10b57cec5SDimitry Andric //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This transformation analyzes and transforms the induction variables (and 100b57cec5SDimitry Andric // computations derived from them) into simpler forms suitable for subsequent 110b57cec5SDimitry Andric // analysis and transformation. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // If the trip count of a loop is computable, this pass also makes the following 140b57cec5SDimitry Andric // changes: 150b57cec5SDimitry Andric // 1. The exit condition for the loop is canonicalized to compare the 160b57cec5SDimitry Andric // induction value against the exit value. This turns loops like: 170b57cec5SDimitry Andric // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 180b57cec5SDimitry Andric // 2. Any use outside of the loop of an expression derived from the indvar 190b57cec5SDimitry Andric // is changed to compute the derived value outside of the loop, eliminating 200b57cec5SDimitry Andric // the dependence on the exit value of the induction variable. If the only 210b57cec5SDimitry Andric // purpose of the loop is to compute the exit value of some derived 220b57cec5SDimitry Andric // expression, this transformation will make the loop dead. 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/IndVarSimplify.h" 270b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h" 280b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 290b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 300b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 31480093f4SDimitry Andric #include "llvm/ADT/SmallSet.h" 320b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 330b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 340b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 350b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 360b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 375ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSA.h" 385ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 390b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 400b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 410b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 420b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 430b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 440b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 450b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 460b57cec5SDimitry Andric #include "llvm/IR/ConstantRange.h" 470b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 480b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 490b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 500b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 510b57cec5SDimitry Andric #include "llvm/IR/Function.h" 520b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 530b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 540b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 550b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 560b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 570b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 580b57cec5SDimitry Andric #include "llvm/IR/Module.h" 590b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 600b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 610b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 620b57cec5SDimitry Andric #include "llvm/IR/Type.h" 630b57cec5SDimitry Andric #include "llvm/IR/Use.h" 640b57cec5SDimitry Andric #include "llvm/IR/User.h" 650b57cec5SDimitry Andric #include "llvm/IR/Value.h" 660b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 670b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 680b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 690b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 700b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 710b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 720b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 73*0fca6ea1SDimitry Andric #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 740b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 75480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 760b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 775ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 780b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SimplifyIndVar.h" 790b57cec5SDimitry Andric #include <cassert> 800b57cec5SDimitry Andric #include <cstdint> 810b57cec5SDimitry Andric #include <utility> 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric using namespace llvm; 84349cc55cSDimitry Andric using namespace PatternMatch; 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric #define DEBUG_TYPE "indvars" 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric STATISTIC(NumWidened , "Number of indvars widened"); 890b57cec5SDimitry Andric STATISTIC(NumReplaced , "Number of exit values replaced"); 900b57cec5SDimitry Andric STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 910b57cec5SDimitry Andric STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 920b57cec5SDimitry Andric STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric static cl::opt<ReplaceExitVal> ReplaceExitValue( 950b57cec5SDimitry Andric "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 960b57cec5SDimitry Andric cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 97753f127fSDimitry Andric cl::values( 98753f127fSDimitry Andric clEnumValN(NeverRepl, "never", "never replace exit value"), 990b57cec5SDimitry Andric clEnumValN(OnlyCheapRepl, "cheap", 1000b57cec5SDimitry Andric "only replace exit value when the cost is cheap"), 101753f127fSDimitry Andric clEnumValN( 102753f127fSDimitry Andric UnusedIndVarInLoop, "unusedindvarinloop", 103753f127fSDimitry Andric "only replace exit value when it is an unused " 104753f127fSDimitry Andric "induction variable in the loop and has cheap replacement cost"), 1050b57cec5SDimitry Andric clEnumValN(NoHardUse, "noharduse", 1060b57cec5SDimitry Andric "only replace exit values when loop def likely dead"), 1070b57cec5SDimitry Andric clEnumValN(AlwaysRepl, "always", 1080b57cec5SDimitry Andric "always replace exit value whenever possible"))); 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric static cl::opt<bool> UsePostIncrementRanges( 1110b57cec5SDimitry Andric "indvars-post-increment-ranges", cl::Hidden, 1120b57cec5SDimitry Andric cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 1130b57cec5SDimitry Andric cl::init(true)); 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric static cl::opt<bool> 1160b57cec5SDimitry Andric DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 1170b57cec5SDimitry Andric cl::desc("Disable Linear Function Test Replace optimization")); 1180b57cec5SDimitry Andric 1198bcb0991SDimitry Andric static cl::opt<bool> 120480093f4SDimitry Andric LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 1218bcb0991SDimitry Andric cl::desc("Predicate conditions in read only loops")); 1228bcb0991SDimitry Andric 123e8d8bef9SDimitry Andric static cl::opt<bool> 124e8d8bef9SDimitry Andric AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 125e8d8bef9SDimitry Andric cl::desc("Allow widening of indvars to eliminate s/zext")); 126e8d8bef9SDimitry Andric 1270b57cec5SDimitry Andric namespace { 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric class IndVarSimplify { 1300b57cec5SDimitry Andric LoopInfo *LI; 1310b57cec5SDimitry Andric ScalarEvolution *SE; 1320b57cec5SDimitry Andric DominatorTree *DT; 1330b57cec5SDimitry Andric const DataLayout &DL; 1340b57cec5SDimitry Andric TargetLibraryInfo *TLI; 1350b57cec5SDimitry Andric const TargetTransformInfo *TTI; 1365ffd83dbSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 139e8d8bef9SDimitry Andric bool WidenIndVars; 1400b57cec5SDimitry Andric 141*0fca6ea1SDimitry Andric bool RunUnswitching = false; 142*0fca6ea1SDimitry Andric 1430b57cec5SDimitry Andric bool handleFloatingPointIV(Loop *L, PHINode *PH); 1440b57cec5SDimitry Andric bool rewriteNonIntegerIVs(Loop *L); 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147349cc55cSDimitry Andric /// Try to improve our exit conditions by converting condition from signed 148349cc55cSDimitry Andric /// to unsigned or rotating computation out of the loop. 149349cc55cSDimitry Andric /// (See inline comment about why this is duplicated from simplifyAndExtend) 150349cc55cSDimitry Andric bool canonicalizeExitCondition(Loop *L); 1518bcb0991SDimitry Andric /// Try to eliminate loop exits based on analyzeable exit counts 1528bcb0991SDimitry Andric bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 1538bcb0991SDimitry Andric /// Try to form loop invariant tests for loop exits by changing how many 1548bcb0991SDimitry Andric /// iterations of the loop run when that is unobservable. 1558bcb0991SDimitry Andric bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric bool rewriteFirstIterationLoopExitValues(Loop *L); 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1600b57cec5SDimitry Andric const SCEV *ExitCount, 1610b57cec5SDimitry Andric PHINode *IndVar, SCEVExpander &Rewriter); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric bool sinkUnusedInvariants(Loop *L); 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric public: 1660b57cec5SDimitry Andric IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 1670b57cec5SDimitry Andric const DataLayout &DL, TargetLibraryInfo *TLI, 168e8d8bef9SDimitry Andric TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 169e8d8bef9SDimitry Andric : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 170e8d8bef9SDimitry Andric WidenIndVars(WidenIndVars) { 1715ffd83dbSDimitry Andric if (MSSA) 1725ffd83dbSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 1735ffd83dbSDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric bool run(Loop *L); 176*0fca6ea1SDimitry Andric 177*0fca6ea1SDimitry Andric bool runUnswitching() const { return RunUnswitching; } 1780b57cec5SDimitry Andric }; 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric } // end anonymous namespace 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1830b57cec5SDimitry Andric // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 1840b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric /// Convert APF to an integer, if possible. 1870b57cec5SDimitry Andric static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 1880b57cec5SDimitry Andric bool isExact = false; 1890b57cec5SDimitry Andric // See if we can convert this to an int64_t 1900b57cec5SDimitry Andric uint64_t UIntVal; 191bdd1243dSDimitry Andric if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 1920b57cec5SDimitry Andric APFloat::rmTowardZero, &isExact) != APFloat::opOK || 1930b57cec5SDimitry Andric !isExact) 1940b57cec5SDimitry Andric return false; 1950b57cec5SDimitry Andric IntVal = UIntVal; 1960b57cec5SDimitry Andric return true; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric /// If the loop has floating induction variable then insert corresponding 2000b57cec5SDimitry Andric /// integer induction variable if possible. 2010b57cec5SDimitry Andric /// For example, 2020b57cec5SDimitry Andric /// for(double i = 0; i < 10000; ++i) 2030b57cec5SDimitry Andric /// bar(i) 2040b57cec5SDimitry Andric /// is converted into 2050b57cec5SDimitry Andric /// for(int i = 0; i < 10000; ++i) 2060b57cec5SDimitry Andric /// bar((double)i); 2070b57cec5SDimitry Andric bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 2080b57cec5SDimitry Andric unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 2090b57cec5SDimitry Andric unsigned BackEdge = IncomingEdge^1; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric // Check incoming value. 2120b57cec5SDimitry Andric auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric int64_t InitValue; 2150b57cec5SDimitry Andric if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 2160b57cec5SDimitry Andric return false; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric // Check IV increment. Reject this PN if increment operation is not 2190b57cec5SDimitry Andric // an add or increment value can not be represented by an integer. 2200b57cec5SDimitry Andric auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 2210b57cec5SDimitry Andric if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric // If this is not an add of the PHI with a constantfp, or if the constant fp 2240b57cec5SDimitry Andric // is not an integer, bail out. 2250b57cec5SDimitry Andric ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 2260b57cec5SDimitry Andric int64_t IncValue; 2270b57cec5SDimitry Andric if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 2280b57cec5SDimitry Andric !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 2290b57cec5SDimitry Andric return false; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric // Check Incr uses. One user is PN and the other user is an exit condition 2320b57cec5SDimitry Andric // used by the conditional terminator. 2330b57cec5SDimitry Andric Value::user_iterator IncrUse = Incr->user_begin(); 2340b57cec5SDimitry Andric Instruction *U1 = cast<Instruction>(*IncrUse++); 2350b57cec5SDimitry Andric if (IncrUse == Incr->user_end()) return false; 2360b57cec5SDimitry Andric Instruction *U2 = cast<Instruction>(*IncrUse++); 2370b57cec5SDimitry Andric if (IncrUse != Incr->user_end()) return false; 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 2400b57cec5SDimitry Andric // only used by a branch, we can't transform it. 2410b57cec5SDimitry Andric FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 2420b57cec5SDimitry Andric if (!Compare) 2430b57cec5SDimitry Andric Compare = dyn_cast<FCmpInst>(U2); 2440b57cec5SDimitry Andric if (!Compare || !Compare->hasOneUse() || 2450b57cec5SDimitry Andric !isa<BranchInst>(Compare->user_back())) 2460b57cec5SDimitry Andric return false; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric // We need to verify that the branch actually controls the iteration count 2510b57cec5SDimitry Andric // of the loop. If not, the new IV can overflow and no one will notice. 2520b57cec5SDimitry Andric // The branch block must be in the loop and one of the successors must be out 2530b57cec5SDimitry Andric // of the loop. 2540b57cec5SDimitry Andric assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 2550b57cec5SDimitry Andric if (!L->contains(TheBr->getParent()) || 2560b57cec5SDimitry Andric (L->contains(TheBr->getSuccessor(0)) && 2570b57cec5SDimitry Andric L->contains(TheBr->getSuccessor(1)))) 2580b57cec5SDimitry Andric return false; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // If it isn't a comparison with an integer-as-fp (the exit value), we can't 2610b57cec5SDimitry Andric // transform it. 2620b57cec5SDimitry Andric ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 2630b57cec5SDimitry Andric int64_t ExitValue; 2640b57cec5SDimitry Andric if (ExitValueVal == nullptr || 2650b57cec5SDimitry Andric !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 2660b57cec5SDimitry Andric return false; 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric // Find new predicate for integer comparison. 2690b57cec5SDimitry Andric CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 2700b57cec5SDimitry Andric switch (Compare->getPredicate()) { 2710b57cec5SDimitry Andric default: return false; // Unknown comparison. 2720b57cec5SDimitry Andric case CmpInst::FCMP_OEQ: 2730b57cec5SDimitry Andric case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 2740b57cec5SDimitry Andric case CmpInst::FCMP_ONE: 2750b57cec5SDimitry Andric case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 2760b57cec5SDimitry Andric case CmpInst::FCMP_OGT: 2770b57cec5SDimitry Andric case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 2780b57cec5SDimitry Andric case CmpInst::FCMP_OGE: 2790b57cec5SDimitry Andric case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 2800b57cec5SDimitry Andric case CmpInst::FCMP_OLT: 2810b57cec5SDimitry Andric case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 2820b57cec5SDimitry Andric case CmpInst::FCMP_OLE: 2830b57cec5SDimitry Andric case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric // We convert the floating point induction variable to a signed i32 value if 2870b57cec5SDimitry Andric // we can. This is only safe if the comparison will not overflow in a way 2880b57cec5SDimitry Andric // that won't be trapped by the integer equivalent operations. Check for this 2890b57cec5SDimitry Andric // now. 2900b57cec5SDimitry Andric // TODO: We could use i64 if it is native and the range requires it. 2910b57cec5SDimitry Andric 2920b57cec5SDimitry Andric // The start/stride/exit values must all fit in signed i32. 2930b57cec5SDimitry Andric if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 2940b57cec5SDimitry Andric return false; 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric // If not actually striding (add x, 0.0), avoid touching the code. 2970b57cec5SDimitry Andric if (IncValue == 0) 2980b57cec5SDimitry Andric return false; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric // Positive and negative strides have different safety conditions. 3010b57cec5SDimitry Andric if (IncValue > 0) { 3020b57cec5SDimitry Andric // If we have a positive stride, we require the init to be less than the 3030b57cec5SDimitry Andric // exit value. 3040b57cec5SDimitry Andric if (InitValue >= ExitValue) 3050b57cec5SDimitry Andric return false; 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric uint32_t Range = uint32_t(ExitValue-InitValue); 3080b57cec5SDimitry Andric // Check for infinite loop, either: 3090b57cec5SDimitry Andric // while (i <= Exit) or until (i > Exit) 3100b57cec5SDimitry Andric if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 3110b57cec5SDimitry Andric if (++Range == 0) return false; // Range overflows. 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric unsigned Leftover = Range % uint32_t(IncValue); 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric // If this is an equality comparison, we require that the strided value 3170b57cec5SDimitry Andric // exactly land on the exit value, otherwise the IV condition will wrap 3180b57cec5SDimitry Andric // around and do things the fp IV wouldn't. 3190b57cec5SDimitry Andric if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 3200b57cec5SDimitry Andric Leftover != 0) 3210b57cec5SDimitry Andric return false; 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric // If the stride would wrap around the i32 before exiting, we can't 3240b57cec5SDimitry Andric // transform the IV. 3250b57cec5SDimitry Andric if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 3260b57cec5SDimitry Andric return false; 3270b57cec5SDimitry Andric } else { 3280b57cec5SDimitry Andric // If we have a negative stride, we require the init to be greater than the 3290b57cec5SDimitry Andric // exit value. 3300b57cec5SDimitry Andric if (InitValue <= ExitValue) 3310b57cec5SDimitry Andric return false; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric uint32_t Range = uint32_t(InitValue-ExitValue); 3340b57cec5SDimitry Andric // Check for infinite loop, either: 3350b57cec5SDimitry Andric // while (i >= Exit) or until (i < Exit) 3360b57cec5SDimitry Andric if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 3370b57cec5SDimitry Andric if (++Range == 0) return false; // Range overflows. 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric unsigned Leftover = Range % uint32_t(-IncValue); 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric // If this is an equality comparison, we require that the strided value 3430b57cec5SDimitry Andric // exactly land on the exit value, otherwise the IV condition will wrap 3440b57cec5SDimitry Andric // around and do things the fp IV wouldn't. 3450b57cec5SDimitry Andric if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 3460b57cec5SDimitry Andric Leftover != 0) 3470b57cec5SDimitry Andric return false; 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric // If the stride would wrap around the i32 before exiting, we can't 3500b57cec5SDimitry Andric // transform the IV. 3510b57cec5SDimitry Andric if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 3520b57cec5SDimitry Andric return false; 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric // Insert new integer induction variable. 358*0fca6ea1SDimitry Andric PHINode *NewPHI = 359*0fca6ea1SDimitry Andric PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator()); 3600b57cec5SDimitry Andric NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 3610b57cec5SDimitry Andric PN->getIncomingBlock(IncomingEdge)); 362*0fca6ea1SDimitry Andric NewPHI->setDebugLoc(PN->getDebugLoc()); 3630b57cec5SDimitry Andric 364*0fca6ea1SDimitry Andric Instruction *NewAdd = 3650b57cec5SDimitry Andric BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 366*0fca6ea1SDimitry Andric Incr->getName() + ".int", Incr->getIterator()); 367*0fca6ea1SDimitry Andric NewAdd->setDebugLoc(Incr->getDebugLoc()); 3680b57cec5SDimitry Andric NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 3690b57cec5SDimitry Andric 370*0fca6ea1SDimitry Andric ICmpInst *NewCompare = 371*0fca6ea1SDimitry Andric new ICmpInst(TheBr->getIterator(), NewPred, NewAdd, 372*0fca6ea1SDimitry Andric ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); 373*0fca6ea1SDimitry Andric NewCompare->setDebugLoc(Compare->getDebugLoc()); 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric // In the following deletions, PN may become dead and may be deleted. 3760b57cec5SDimitry Andric // Use a WeakTrackingVH to observe whether this happens. 3770b57cec5SDimitry Andric WeakTrackingVH WeakPH = PN; 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric // Delete the old floating point exit comparison. The branch starts using the 3800b57cec5SDimitry Andric // new comparison. 3810b57cec5SDimitry Andric NewCompare->takeName(Compare); 3820b57cec5SDimitry Andric Compare->replaceAllUsesWith(NewCompare); 3835ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric // Delete the old floating point increment. 38681ad6265SDimitry Andric Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 3875ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric // If the FP induction variable still has uses, this is because something else 3900b57cec5SDimitry Andric // in the loop uses its value. In order to canonicalize the induction 3910b57cec5SDimitry Andric // variable, we chose to eliminate the IV and rewrite it in terms of an 3920b57cec5SDimitry Andric // int->fp cast. 3930b57cec5SDimitry Andric // 3940b57cec5SDimitry Andric // We give preference to sitofp over uitofp because it is faster on most 3950b57cec5SDimitry Andric // platforms. 3960b57cec5SDimitry Andric if (WeakPH) { 397*0fca6ea1SDimitry Andric Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 398*0fca6ea1SDimitry Andric PN->getParent()->getFirstInsertionPt()); 399*0fca6ea1SDimitry Andric Conv->setDebugLoc(PN->getDebugLoc()); 4000b57cec5SDimitry Andric PN->replaceAllUsesWith(Conv); 4015ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric return true; 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 4070b57cec5SDimitry Andric // First step. Check to see if there are any floating-point recurrences. 4080b57cec5SDimitry Andric // If there are, change them into integer recurrences, permitting analysis by 4090b57cec5SDimitry Andric // the SCEV routines. 4100b57cec5SDimitry Andric BasicBlock *Header = L->getHeader(); 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 8> PHIs; 4130b57cec5SDimitry Andric for (PHINode &PN : Header->phis()) 4140b57cec5SDimitry Andric PHIs.push_back(&PN); 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric bool Changed = false; 41706c3fb27SDimitry Andric for (WeakTrackingVH &PHI : PHIs) 41806c3fb27SDimitry Andric if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI)) 4190b57cec5SDimitry Andric Changed |= handleFloatingPointIV(L, PN); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric // If the loop previously had floating-point IV, ScalarEvolution 4220b57cec5SDimitry Andric // may not have been able to compute a trip count. Now that we've done some 4230b57cec5SDimitry Andric // re-writing, the trip count may be computable. 4240b57cec5SDimitry Andric if (Changed) 4250b57cec5SDimitry Andric SE->forgetLoop(L); 4260b57cec5SDimitry Andric return Changed; 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 4300b57cec5SDimitry Andric // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 4310b57cec5SDimitry Andric // they will exit at the first iteration. 4320b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric /// Check to see if this loop has loop invariant conditions which lead to loop 4350b57cec5SDimitry Andric /// exits. If so, we know that if the exit path is taken, it is at the first 4360b57cec5SDimitry Andric /// loop iteration. This lets us predict exit values of PHI nodes that live in 4370b57cec5SDimitry Andric /// loop header. 4380b57cec5SDimitry Andric bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 4390b57cec5SDimitry Andric // Verify the input to the pass is already in LCSSA form. 4400b57cec5SDimitry Andric assert(L->isLCSSAForm(*DT)); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks; 4430b57cec5SDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric bool MadeAnyChanges = false; 4460b57cec5SDimitry Andric for (auto *ExitBB : ExitBlocks) { 4470b57cec5SDimitry Andric // If there are no more PHI nodes in this exit block, then no more 4480b57cec5SDimitry Andric // values defined inside the loop are used on this path. 4490b57cec5SDimitry Andric for (PHINode &PN : ExitBB->phis()) { 4500b57cec5SDimitry Andric for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 4510b57cec5SDimitry Andric IncomingValIdx != E; ++IncomingValIdx) { 4520b57cec5SDimitry Andric auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric // Can we prove that the exit must run on the first iteration if it 4550b57cec5SDimitry Andric // runs at all? (i.e. early exits are fine for our purposes, but 4560b57cec5SDimitry Andric // traces which lead to this exit being taken on the 2nd iteration 4570b57cec5SDimitry Andric // aren't.) Note that this is about whether the exit branch is 4580b57cec5SDimitry Andric // executed, not about whether it is taken. 4590b57cec5SDimitry Andric if (!L->getLoopLatch() || 4600b57cec5SDimitry Andric !DT->dominates(IncomingBB, L->getLoopLatch())) 4610b57cec5SDimitry Andric continue; 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric // Get condition that leads to the exit path. 4640b57cec5SDimitry Andric auto *TermInst = IncomingBB->getTerminator(); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric Value *Cond = nullptr; 4670b57cec5SDimitry Andric if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 4680b57cec5SDimitry Andric // Must be a conditional branch, otherwise the block 4690b57cec5SDimitry Andric // should not be in the loop. 4700b57cec5SDimitry Andric Cond = BI->getCondition(); 4710b57cec5SDimitry Andric } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 4720b57cec5SDimitry Andric Cond = SI->getCondition(); 4730b57cec5SDimitry Andric else 4740b57cec5SDimitry Andric continue; 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric if (!L->isLoopInvariant(Cond)) 4770b57cec5SDimitry Andric continue; 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric // Only deal with PHIs in the loop header. 4820b57cec5SDimitry Andric if (!ExitVal || ExitVal->getParent() != L->getHeader()) 4830b57cec5SDimitry Andric continue; 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric // If ExitVal is a PHI on the loop header, then we know its 4860b57cec5SDimitry Andric // value along this exit because the exit can only be taken 4870b57cec5SDimitry Andric // on the first iteration. 4880b57cec5SDimitry Andric auto *LoopPreheader = L->getLoopPreheader(); 4890b57cec5SDimitry Andric assert(LoopPreheader && "Invalid loop"); 4900b57cec5SDimitry Andric int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 4910b57cec5SDimitry Andric if (PreheaderIdx != -1) { 4920b57cec5SDimitry Andric assert(ExitVal->getParent() == L->getHeader() && 4930b57cec5SDimitry Andric "ExitVal must be in loop header"); 4940b57cec5SDimitry Andric MadeAnyChanges = true; 4950b57cec5SDimitry Andric PN.setIncomingValue(IncomingValIdx, 4960b57cec5SDimitry Andric ExitVal->getIncomingValue(PreheaderIdx)); 497349cc55cSDimitry Andric SE->forgetValue(&PN); 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric } 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric return MadeAnyChanges; 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5060b57cec5SDimitry Andric // IV Widening - Extend the width of an IV to cover its widest uses. 5070b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric /// Update information about the induction variable that is extended by this 5100b57cec5SDimitry Andric /// sign or zero extend operation. This is used to determine the final width of 5110b57cec5SDimitry Andric /// the IV before actually widening it. 512e8d8bef9SDimitry Andric static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 513e8d8bef9SDimitry Andric ScalarEvolution *SE, 5140b57cec5SDimitry Andric const TargetTransformInfo *TTI) { 5150b57cec5SDimitry Andric bool IsSigned = Cast->getOpcode() == Instruction::SExt; 5160b57cec5SDimitry Andric if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 5170b57cec5SDimitry Andric return; 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric Type *Ty = Cast->getType(); 5200b57cec5SDimitry Andric uint64_t Width = SE->getTypeSizeInBits(Ty); 521*0fca6ea1SDimitry Andric if (!Cast->getDataLayout().isLegalInteger(Width)) 5220b57cec5SDimitry Andric return; 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric // Check that `Cast` actually extends the induction variable (we rely on this 5250b57cec5SDimitry Andric // later). This takes care of cases where `Cast` is extending a truncation of 5260b57cec5SDimitry Andric // the narrow induction variable, and thus can end up being narrower than the 5270b57cec5SDimitry Andric // "narrow" induction variable. 5280b57cec5SDimitry Andric uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 5290b57cec5SDimitry Andric if (NarrowIVWidth >= Width) 5300b57cec5SDimitry Andric return; 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric // Cast is either an sext or zext up to this point. 5330b57cec5SDimitry Andric // We should not widen an indvar if arithmetics on the wider indvar are more 5340b57cec5SDimitry Andric // expensive than those on the narrower indvar. We check only the cost of ADD 5350b57cec5SDimitry Andric // because at least an ADD is required to increment the induction variable. We 5360b57cec5SDimitry Andric // could compute more comprehensively the cost of all instructions on the 5370b57cec5SDimitry Andric // induction variable when necessary. 5380b57cec5SDimitry Andric if (TTI && 5390b57cec5SDimitry Andric TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 5400b57cec5SDimitry Andric TTI->getArithmeticInstrCost(Instruction::Add, 5410b57cec5SDimitry Andric Cast->getOperand(0)->getType())) { 5420b57cec5SDimitry Andric return; 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 545349cc55cSDimitry Andric if (!WI.WidestNativeType || 546349cc55cSDimitry Andric Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 5470b57cec5SDimitry Andric WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 5480b57cec5SDimitry Andric WI.IsSigned = IsSigned; 5490b57cec5SDimitry Andric return; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 552349cc55cSDimitry Andric // We extend the IV to satisfy the sign of its user(s), or 'signed' 553349cc55cSDimitry Andric // if there are multiple users with both sign- and zero extensions, 554349cc55cSDimitry Andric // in order not to introduce nondeterministic behaviour based on the 555349cc55cSDimitry Andric // unspecified order of a PHI nodes' users-iterator. 556349cc55cSDimitry Andric WI.IsSigned |= IsSigned; 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5600b57cec5SDimitry Andric // Live IV Reduction - Minimize IVs live across the loop. 5610b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5640b57cec5SDimitry Andric // Simplification of IV users based on SCEV evaluation. 5650b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric namespace { 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric class IndVarSimplifyVisitor : public IVVisitor { 5700b57cec5SDimitry Andric ScalarEvolution *SE; 5710b57cec5SDimitry Andric const TargetTransformInfo *TTI; 5720b57cec5SDimitry Andric PHINode *IVPhi; 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric public: 5750b57cec5SDimitry Andric WideIVInfo WI; 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 5780b57cec5SDimitry Andric const TargetTransformInfo *TTI, 5790b57cec5SDimitry Andric const DominatorTree *DTree) 5800b57cec5SDimitry Andric : SE(SCEV), TTI(TTI), IVPhi(IV) { 5810b57cec5SDimitry Andric DT = DTree; 5820b57cec5SDimitry Andric WI.NarrowIV = IVPhi; 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 5850b57cec5SDimitry Andric // Implement the interface used by simplifyUsersOfIV. 5860b57cec5SDimitry Andric void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 5870b57cec5SDimitry Andric }; 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric } // end anonymous namespace 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric /// Iteratively perform simplification on a worklist of IV users. Each 5920b57cec5SDimitry Andric /// successive simplification may push more users which may themselves be 5930b57cec5SDimitry Andric /// candidates for simplification. 5940b57cec5SDimitry Andric /// 5950b57cec5SDimitry Andric /// Sign/Zero extend elimination is interleaved with IV simplification. 5960b57cec5SDimitry Andric bool IndVarSimplify::simplifyAndExtend(Loop *L, 5970b57cec5SDimitry Andric SCEVExpander &Rewriter, 5980b57cec5SDimitry Andric LoopInfo *LI) { 5990b57cec5SDimitry Andric SmallVector<WideIVInfo, 8> WideIVs; 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 6020b57cec5SDimitry Andric Intrinsic::getName(Intrinsic::experimental_guard)); 6030b57cec5SDimitry Andric bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric SmallVector<PHINode *, 8> LoopPhis; 60681ad6265SDimitry Andric for (PHINode &PN : L->getHeader()->phis()) 60781ad6265SDimitry Andric LoopPhis.push_back(&PN); 60881ad6265SDimitry Andric 6090b57cec5SDimitry Andric // Each round of simplification iterates through the SimplifyIVUsers worklist 6100b57cec5SDimitry Andric // for all current phis, then determines whether any IVs can be 6110b57cec5SDimitry Andric // widened. Widening adds new phis to LoopPhis, inducing another round of 6120b57cec5SDimitry Andric // simplification on the wide IVs. 6130b57cec5SDimitry Andric bool Changed = false; 6140b57cec5SDimitry Andric while (!LoopPhis.empty()) { 6150b57cec5SDimitry Andric // Evaluate as many IV expressions as possible before widening any IVs. This 6160b57cec5SDimitry Andric // forces SCEV to set no-wrap flags before evaluating sign/zero 6170b57cec5SDimitry Andric // extension. The first time SCEV attempts to normalize sign/zero extension, 6180b57cec5SDimitry Andric // the result becomes final. So for the most predictable results, we delay 6190b57cec5SDimitry Andric // evaluation of sign/zero extend evaluation until needed, and avoid running 6200b57cec5SDimitry Andric // other SCEV based analysis prior to simplifyAndExtend. 6210b57cec5SDimitry Andric do { 6220b57cec5SDimitry Andric PHINode *CurrIV = LoopPhis.pop_back_val(); 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric // Information about sign/zero extensions of CurrIV. 6250b57cec5SDimitry Andric IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 6260b57cec5SDimitry Andric 627*0fca6ea1SDimitry Andric const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, 628*0fca6ea1SDimitry Andric Rewriter, &Visitor); 6290b57cec5SDimitry Andric 630*0fca6ea1SDimitry Andric Changed |= C; 631*0fca6ea1SDimitry Andric RunUnswitching |= U; 6320b57cec5SDimitry Andric if (Visitor.WI.WidestNativeType) { 6330b57cec5SDimitry Andric WideIVs.push_back(Visitor.WI); 6340b57cec5SDimitry Andric } 6350b57cec5SDimitry Andric } while(!LoopPhis.empty()); 6360b57cec5SDimitry Andric 637e8d8bef9SDimitry Andric // Continue if we disallowed widening. 638e8d8bef9SDimitry Andric if (!WidenIndVars) 639e8d8bef9SDimitry Andric continue; 640e8d8bef9SDimitry Andric 6410b57cec5SDimitry Andric for (; !WideIVs.empty(); WideIVs.pop_back()) { 642e8d8bef9SDimitry Andric unsigned ElimExt; 643e8d8bef9SDimitry Andric unsigned Widened; 644e8d8bef9SDimitry Andric if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 645e8d8bef9SDimitry Andric DT, DeadInsts, ElimExt, Widened, 646e8d8bef9SDimitry Andric HasGuards, UsePostIncrementRanges)) { 647e8d8bef9SDimitry Andric NumElimExt += ElimExt; 648e8d8bef9SDimitry Andric NumWidened += Widened; 6490b57cec5SDimitry Andric Changed = true; 6500b57cec5SDimitry Andric LoopPhis.push_back(WidePhi); 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric return Changed; 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6580b57cec5SDimitry Andric // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 6590b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6600b57cec5SDimitry Andric 6610b57cec5SDimitry Andric /// Given an Value which is hoped to be part of an add recurance in the given 6620b57cec5SDimitry Andric /// loop, return the associated Phi node if so. Otherwise, return null. Note 6630b57cec5SDimitry Andric /// that this is less general than SCEVs AddRec checking. 6640b57cec5SDimitry Andric static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 6650b57cec5SDimitry Andric Instruction *IncI = dyn_cast<Instruction>(IncV); 6660b57cec5SDimitry Andric if (!IncI) 6670b57cec5SDimitry Andric return nullptr; 6680b57cec5SDimitry Andric 6690b57cec5SDimitry Andric switch (IncI->getOpcode()) { 6700b57cec5SDimitry Andric case Instruction::Add: 6710b57cec5SDimitry Andric case Instruction::Sub: 6720b57cec5SDimitry Andric break; 6730b57cec5SDimitry Andric case Instruction::GetElementPtr: 6740b57cec5SDimitry Andric // An IV counter must preserve its type. 6750b57cec5SDimitry Andric if (IncI->getNumOperands() == 2) 6760b57cec5SDimitry Andric break; 677bdd1243dSDimitry Andric [[fallthrough]]; 6780b57cec5SDimitry Andric default: 6790b57cec5SDimitry Andric return nullptr; 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 6830b57cec5SDimitry Andric if (Phi && Phi->getParent() == L->getHeader()) { 6840b57cec5SDimitry Andric if (L->isLoopInvariant(IncI->getOperand(1))) 6850b57cec5SDimitry Andric return Phi; 6860b57cec5SDimitry Andric return nullptr; 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric if (IncI->getOpcode() == Instruction::GetElementPtr) 6890b57cec5SDimitry Andric return nullptr; 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric // Allow add/sub to be commuted. 6920b57cec5SDimitry Andric Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 6930b57cec5SDimitry Andric if (Phi && Phi->getParent() == L->getHeader()) { 6940b57cec5SDimitry Andric if (L->isLoopInvariant(IncI->getOperand(0))) 6950b57cec5SDimitry Andric return Phi; 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric return nullptr; 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric /// Whether the current loop exit test is based on this value. Currently this 7010b57cec5SDimitry Andric /// is limited to a direct use in the loop condition. 7020b57cec5SDimitry Andric static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 7030b57cec5SDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 7040b57cec5SDimitry Andric ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 7050b57cec5SDimitry Andric // TODO: Allow non-icmp loop test. 7060b57cec5SDimitry Andric if (!ICmp) 7070b57cec5SDimitry Andric return false; 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric // TODO: Allow indirect use. 7100b57cec5SDimitry Andric return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric 7130b57cec5SDimitry Andric /// linearFunctionTestReplace policy. Return true unless we can show that the 7140b57cec5SDimitry Andric /// current exit test is already sufficiently canonical. 7150b57cec5SDimitry Andric static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 7160b57cec5SDimitry Andric assert(L->getLoopLatch() && "Must be in simplified form"); 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric // Avoid converting a constant or loop invariant test back to a runtime 7190b57cec5SDimitry Andric // test. This is critical for when SCEV's cached ExitCount is less precise 7200b57cec5SDimitry Andric // than the current IR (such as after we've proven a particular exit is 7210b57cec5SDimitry Andric // actually dead and thus the BE count never reaches our ExitCount.) 7220b57cec5SDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 7230b57cec5SDimitry Andric if (L->isLoopInvariant(BI->getCondition())) 7240b57cec5SDimitry Andric return false; 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // Do LFTR to simplify the exit condition to an ICMP. 7270b57cec5SDimitry Andric ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 7280b57cec5SDimitry Andric if (!Cond) 7290b57cec5SDimitry Andric return true; 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric // Do LFTR to simplify the exit ICMP to EQ/NE 7320b57cec5SDimitry Andric ICmpInst::Predicate Pred = Cond->getPredicate(); 7330b57cec5SDimitry Andric if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 7340b57cec5SDimitry Andric return true; 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric // Look for a loop invariant RHS 7370b57cec5SDimitry Andric Value *LHS = Cond->getOperand(0); 7380b57cec5SDimitry Andric Value *RHS = Cond->getOperand(1); 7390b57cec5SDimitry Andric if (!L->isLoopInvariant(RHS)) { 7400b57cec5SDimitry Andric if (!L->isLoopInvariant(LHS)) 7410b57cec5SDimitry Andric return true; 7420b57cec5SDimitry Andric std::swap(LHS, RHS); 7430b57cec5SDimitry Andric } 7440b57cec5SDimitry Andric // Look for a simple IV counter LHS 7450b57cec5SDimitry Andric PHINode *Phi = dyn_cast<PHINode>(LHS); 7460b57cec5SDimitry Andric if (!Phi) 7470b57cec5SDimitry Andric Phi = getLoopPhiForCounter(LHS, L); 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric if (!Phi) 7500b57cec5SDimitry Andric return true; 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 7530b57cec5SDimitry Andric int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 7540b57cec5SDimitry Andric if (Idx < 0) 7550b57cec5SDimitry Andric return true; 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric // Do LFTR if the exit condition's IV is *not* a simple counter. 7580b57cec5SDimitry Andric Value *IncV = Phi->getIncomingValue(Idx); 7590b57cec5SDimitry Andric return Phi != getLoopPhiForCounter(IncV, L); 7600b57cec5SDimitry Andric } 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 7630b57cec5SDimitry Andric /// down to checking that all operands are constant and listing instructions 7640b57cec5SDimitry Andric /// that may hide undef. 7650b57cec5SDimitry Andric static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 7660b57cec5SDimitry Andric unsigned Depth) { 7670b57cec5SDimitry Andric if (isa<Constant>(V)) 7680b57cec5SDimitry Andric return !isa<UndefValue>(V); 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric if (Depth >= 6) 7710b57cec5SDimitry Andric return false; 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric // Conservatively handle non-constant non-instructions. For example, Arguments 7740b57cec5SDimitry Andric // may be undef. 7750b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V); 7760b57cec5SDimitry Andric if (!I) 7770b57cec5SDimitry Andric return false; 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric // Load and return values may be undef. 7800b57cec5SDimitry Andric if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 7810b57cec5SDimitry Andric return false; 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric // Optimistically handle other instructions. 7840b57cec5SDimitry Andric for (Value *Op : I->operands()) { 7850b57cec5SDimitry Andric if (!Visited.insert(Op).second) 7860b57cec5SDimitry Andric continue; 7870b57cec5SDimitry Andric if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 7880b57cec5SDimitry Andric return false; 7890b57cec5SDimitry Andric } 7900b57cec5SDimitry Andric return true; 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric /// Return true if the given value is concrete. We must prove that undef can 7940b57cec5SDimitry Andric /// never reach it. 7950b57cec5SDimitry Andric /// 7960b57cec5SDimitry Andric /// TODO: If we decide that this is a good approach to checking for undef, we 7970b57cec5SDimitry Andric /// may factor it into a common location. 7980b57cec5SDimitry Andric static bool hasConcreteDef(Value *V) { 7990b57cec5SDimitry Andric SmallPtrSet<Value*, 8> Visited; 8000b57cec5SDimitry Andric Visited.insert(V); 8010b57cec5SDimitry Andric return hasConcreteDefImpl(V, Visited, 0); 8020b57cec5SDimitry Andric } 8030b57cec5SDimitry Andric 8040b57cec5SDimitry Andric /// Return true if the given phi is a "counter" in L. A counter is an 8050b57cec5SDimitry Andric /// add recurance (of integer or pointer type) with an arbitrary start, and a 8060b57cec5SDimitry Andric /// step of 1. Note that L must have exactly one latch. 8070b57cec5SDimitry Andric static bool isLoopCounter(PHINode* Phi, Loop *L, 8080b57cec5SDimitry Andric ScalarEvolution *SE) { 8090b57cec5SDimitry Andric assert(Phi->getParent() == L->getHeader()); 8100b57cec5SDimitry Andric assert(L->getLoopLatch()); 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric if (!SE->isSCEVable(Phi->getType())) 8130b57cec5SDimitry Andric return false; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 8160b57cec5SDimitry Andric if (!AR || AR->getLoop() != L || !AR->isAffine()) 8170b57cec5SDimitry Andric return false; 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 8200b57cec5SDimitry Andric if (!Step || !Step->isOne()) 8210b57cec5SDimitry Andric return false; 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 8240b57cec5SDimitry Andric Value *IncV = Phi->getIncomingValue(LatchIdx); 825fe6060f1SDimitry Andric return (getLoopPhiForCounter(IncV, L) == Phi && 826fe6060f1SDimitry Andric isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 8270b57cec5SDimitry Andric } 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric /// Search the loop header for a loop counter (anadd rec w/step of one) 8300b57cec5SDimitry Andric /// suitable for use by LFTR. If multiple counters are available, select the 8310b57cec5SDimitry Andric /// "best" one based profitable heuristics. 8320b57cec5SDimitry Andric /// 8330b57cec5SDimitry Andric /// BECount may be an i8* pointer type. The pointer difference is already 8340b57cec5SDimitry Andric /// valid count without scaling the address stride, so it remains a pointer 8350b57cec5SDimitry Andric /// expression as far as SCEV is concerned. 8360b57cec5SDimitry Andric static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 8370b57cec5SDimitry Andric const SCEV *BECount, 8380b57cec5SDimitry Andric ScalarEvolution *SE, DominatorTree *DT) { 8390b57cec5SDimitry Andric uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 8420b57cec5SDimitry Andric 8430b57cec5SDimitry Andric // Loop over all of the PHI nodes, looking for a simple counter. 8440b57cec5SDimitry Andric PHINode *BestPhi = nullptr; 8450b57cec5SDimitry Andric const SCEV *BestInit = nullptr; 8460b57cec5SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 8470b57cec5SDimitry Andric assert(LatchBlock && "Must be in simplified form"); 848*0fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 8510b57cec5SDimitry Andric PHINode *Phi = cast<PHINode>(I); 8520b57cec5SDimitry Andric if (!isLoopCounter(Phi, L, SE)) 8530b57cec5SDimitry Andric continue; 8540b57cec5SDimitry Andric 8558bcb0991SDimitry Andric const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric // AR may be a pointer type, while BECount is an integer type. 8580b57cec5SDimitry Andric // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 8590b57cec5SDimitry Andric // AR may not be a narrower type, or we may never exit. 8600b57cec5SDimitry Andric uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 8610b57cec5SDimitry Andric if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 8620b57cec5SDimitry Andric continue; 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric // Avoid reusing a potentially undef value to compute other values that may 8650b57cec5SDimitry Andric // have originally had a concrete definition. 8660b57cec5SDimitry Andric if (!hasConcreteDef(Phi)) { 8670b57cec5SDimitry Andric // We explicitly allow unknown phis as long as they are already used by 8680b57cec5SDimitry Andric // the loop exit test. This is legal since performing LFTR could not 8690b57cec5SDimitry Andric // increase the number of undef users. 8700b57cec5SDimitry Andric Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 8710b57cec5SDimitry Andric if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 8720b57cec5SDimitry Andric !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 8730b57cec5SDimitry Andric continue; 8740b57cec5SDimitry Andric } 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric // Avoid introducing undefined behavior due to poison which didn't exist in 8770b57cec5SDimitry Andric // the original program. (Annoyingly, the rules for poison and undef 8780b57cec5SDimitry Andric // propagation are distinct, so this does NOT cover the undef case above.) 8790b57cec5SDimitry Andric // We have to ensure that we don't introduce UB by introducing a use on an 8800b57cec5SDimitry Andric // iteration where said IV produces poison. Our strategy here differs for 8810b57cec5SDimitry Andric // pointers and integer IVs. For integers, we strip and reinfer as needed, 8820b57cec5SDimitry Andric // see code in linearFunctionTestReplace. For pointers, we restrict 8830b57cec5SDimitry Andric // transforms as there is no good way to reinfer inbounds once lost. 8840b57cec5SDimitry Andric if (!Phi->getType()->isIntegerTy() && 8850b57cec5SDimitry Andric !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 8860b57cec5SDimitry Andric continue; 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric const SCEV *Init = AR->getStart(); 8890b57cec5SDimitry Andric 89006c3fb27SDimitry Andric if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 8910b57cec5SDimitry Andric // Don't force a live loop counter if another IV can be used. 89206c3fb27SDimitry Andric if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 8930b57cec5SDimitry Andric continue; 8940b57cec5SDimitry Andric 8950b57cec5SDimitry Andric // Prefer to count-from-zero. This is a more "canonical" counter form. It 8960b57cec5SDimitry Andric // also prefers integer to pointer IVs. 8970b57cec5SDimitry Andric if (BestInit->isZero() != Init->isZero()) { 8980b57cec5SDimitry Andric if (BestInit->isZero()) 8990b57cec5SDimitry Andric continue; 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric // If two IVs both count from zero or both count from nonzero then the 9020b57cec5SDimitry Andric // narrower is likely a dead phi that has been widened. Use the wider phi 9030b57cec5SDimitry Andric // to allow the other to be eliminated. 9040b57cec5SDimitry Andric else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 9050b57cec5SDimitry Andric continue; 9060b57cec5SDimitry Andric } 9070b57cec5SDimitry Andric BestPhi = Phi; 9080b57cec5SDimitry Andric BestInit = Init; 9090b57cec5SDimitry Andric } 9100b57cec5SDimitry Andric return BestPhi; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric /// Insert an IR expression which computes the value held by the IV IndVar 9140b57cec5SDimitry Andric /// (which must be an loop counter w/unit stride) after the backedge of loop L 9150b57cec5SDimitry Andric /// is taken ExitCount times. 9160b57cec5SDimitry Andric static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 9170b57cec5SDimitry Andric const SCEV *ExitCount, bool UsePostInc, Loop *L, 9180b57cec5SDimitry Andric SCEVExpander &Rewriter, ScalarEvolution *SE) { 9190b57cec5SDimitry Andric assert(isLoopCounter(IndVar, L, SE)); 92006c3fb27SDimitry Andric assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 9210b57cec5SDimitry Andric const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 92204eeddc0SDimitry Andric assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 9230b57cec5SDimitry Andric 92406c3fb27SDimitry Andric // For integer IVs, truncate the IV before computing the limit unless we 92506c3fb27SDimitry Andric // know apriori that the limit must be a constant when evaluated in the 92606c3fb27SDimitry Andric // bitwidth of the IV. We prefer (potentially) keeping a truncate of the 92706c3fb27SDimitry Andric // IV in the loop over a (potentially) expensive expansion of the widened 92806c3fb27SDimitry Andric // exit count add(zext(add)) expression. 92906c3fb27SDimitry Andric if (IndVar->getType()->isIntegerTy() && 93006c3fb27SDimitry Andric SE->getTypeSizeInBits(AR->getType()) > 93106c3fb27SDimitry Andric SE->getTypeSizeInBits(ExitCount->getType())) { 93206c3fb27SDimitry Andric const SCEV *IVInit = AR->getStart(); 93306c3fb27SDimitry Andric if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) 93406c3fb27SDimitry Andric AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType())); 9350b57cec5SDimitry Andric } 9360b57cec5SDimitry Andric 93706c3fb27SDimitry Andric const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; 93806c3fb27SDimitry Andric const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); 9390b57cec5SDimitry Andric assert(SE->isLoopInvariant(IVLimit, L) && 9400b57cec5SDimitry Andric "Computed iteration count is not loop invariant!"); 94106c3fb27SDimitry Andric return Rewriter.expandCodeFor(IVLimit, ARBase->getType(), 94206c3fb27SDimitry Andric ExitingBB->getTerminator()); 9430b57cec5SDimitry Andric } 9440b57cec5SDimitry Andric 9450b57cec5SDimitry Andric /// This method rewrites the exit condition of the loop to be a canonical != 9460b57cec5SDimitry Andric /// comparison against the incremented loop induction variable. This pass is 9470b57cec5SDimitry Andric /// able to rewrite the exit tests of any loop where the SCEV analysis can 9480b57cec5SDimitry Andric /// determine a loop-invariant trip count of the loop, which is actually a much 9490b57cec5SDimitry Andric /// broader range than just linear tests. 9500b57cec5SDimitry Andric bool IndVarSimplify:: 9510b57cec5SDimitry Andric linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 9520b57cec5SDimitry Andric const SCEV *ExitCount, 9530b57cec5SDimitry Andric PHINode *IndVar, SCEVExpander &Rewriter) { 9540b57cec5SDimitry Andric assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 9550b57cec5SDimitry Andric assert(isLoopCounter(IndVar, L, SE)); 9560b57cec5SDimitry Andric Instruction * const IncVar = 9570b57cec5SDimitry Andric cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric // Initialize CmpIndVar to the preincremented IV. 9600b57cec5SDimitry Andric Value *CmpIndVar = IndVar; 9610b57cec5SDimitry Andric bool UsePostInc = false; 9620b57cec5SDimitry Andric 9630b57cec5SDimitry Andric // If the exiting block is the same as the backedge block, we prefer to 9640b57cec5SDimitry Andric // compare against the post-incremented value, otherwise we must compare 9650b57cec5SDimitry Andric // against the preincremented value. 9660b57cec5SDimitry Andric if (ExitingBB == L->getLoopLatch()) { 9670b57cec5SDimitry Andric // For pointer IVs, we chose to not strip inbounds which requires us not 9680b57cec5SDimitry Andric // to add a potentially UB introducing use. We need to either a) show 9690b57cec5SDimitry Andric // the loop test we're modifying is already in post-inc form, or b) show 9700b57cec5SDimitry Andric // that adding a use must not introduce UB. 9710b57cec5SDimitry Andric bool SafeToPostInc = 9720b57cec5SDimitry Andric IndVar->getType()->isIntegerTy() || 9730b57cec5SDimitry Andric isLoopExitTestBasedOn(IncVar, ExitingBB) || 9740b57cec5SDimitry Andric mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 9750b57cec5SDimitry Andric if (SafeToPostInc) { 9760b57cec5SDimitry Andric UsePostInc = true; 9770b57cec5SDimitry Andric CmpIndVar = IncVar; 9780b57cec5SDimitry Andric } 9790b57cec5SDimitry Andric } 9800b57cec5SDimitry Andric 9810b57cec5SDimitry Andric // It may be necessary to drop nowrap flags on the incrementing instruction 9820b57cec5SDimitry Andric // if either LFTR moves from a pre-inc check to a post-inc check (in which 9830b57cec5SDimitry Andric // case the increment might have previously been poison on the last iteration 9840b57cec5SDimitry Andric // only) or if LFTR switches to a different IV that was previously dynamically 9850b57cec5SDimitry Andric // dead (and as such may be arbitrarily poison). We remove any nowrap flags 9860b57cec5SDimitry Andric // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 9870b57cec5SDimitry Andric // check), because the pre-inc addrec flags may be adopted from the original 9880b57cec5SDimitry Andric // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 9890b57cec5SDimitry Andric // TODO: This handling is inaccurate for one case: If we switch to a 9900b57cec5SDimitry Andric // dynamically dead IV that wraps on the first loop iteration only, which is 9910b57cec5SDimitry Andric // not covered by the post-inc addrec. (If the new IV was not dynamically 9920b57cec5SDimitry Andric // dead, it could not be poison on the first iteration in the first place.) 9930b57cec5SDimitry Andric if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 9940b57cec5SDimitry Andric const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 9950b57cec5SDimitry Andric if (BO->hasNoUnsignedWrap()) 9960b57cec5SDimitry Andric BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 9970b57cec5SDimitry Andric if (BO->hasNoSignedWrap()) 9980b57cec5SDimitry Andric BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric Value *ExitCnt = genLoopLimit( 10020b57cec5SDimitry Andric IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 10030b57cec5SDimitry Andric assert(ExitCnt->getType()->isPointerTy() == 10040b57cec5SDimitry Andric IndVar->getType()->isPointerTy() && 10050b57cec5SDimitry Andric "genLoopLimit missed a cast"); 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric // Insert a new icmp_ne or icmp_eq instruction before the branch. 10080b57cec5SDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 10090b57cec5SDimitry Andric ICmpInst::Predicate P; 10100b57cec5SDimitry Andric if (L->contains(BI->getSuccessor(0))) 10110b57cec5SDimitry Andric P = ICmpInst::ICMP_NE; 10120b57cec5SDimitry Andric else 10130b57cec5SDimitry Andric P = ICmpInst::ICMP_EQ; 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric IRBuilder<> Builder(BI); 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric // The new loop exit condition should reuse the debug location of the 10180b57cec5SDimitry Andric // original loop exit condition. 10190b57cec5SDimitry Andric if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 10200b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric // For integer IVs, if we evaluated the limit in the narrower bitwidth to 10230b57cec5SDimitry Andric // avoid the expensive expansion of the limit expression in the wider type, 10240b57cec5SDimitry Andric // emit a truncate to narrow the IV to the ExitCount type. This is safe 10250b57cec5SDimitry Andric // since we know (from the exit count bitwidth), that we can't self-wrap in 10260b57cec5SDimitry Andric // the narrower type. 10270b57cec5SDimitry Andric unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 10280b57cec5SDimitry Andric unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 10290b57cec5SDimitry Andric if (CmpIndVarSize > ExitCntSize) { 10300b57cec5SDimitry Andric assert(!CmpIndVar->getType()->isPointerTy() && 10310b57cec5SDimitry Andric !ExitCnt->getType()->isPointerTy()); 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric // Before resorting to actually inserting the truncate, use the same 10340b57cec5SDimitry Andric // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 10350b57cec5SDimitry Andric // the other side of the comparison instead. We still evaluate the limit 10360b57cec5SDimitry Andric // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 10370b57cec5SDimitry Andric // a truncate within in. 10380b57cec5SDimitry Andric bool Extended = false; 10390b57cec5SDimitry Andric const SCEV *IV = SE->getSCEV(CmpIndVar); 104006c3fb27SDimitry Andric const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 10410b57cec5SDimitry Andric const SCEV *ZExtTrunc = 10420b57cec5SDimitry Andric SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric if (ZExtTrunc == IV) { 10450b57cec5SDimitry Andric Extended = true; 10460b57cec5SDimitry Andric ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 10470b57cec5SDimitry Andric "wide.trip.count"); 10480b57cec5SDimitry Andric } else { 10490b57cec5SDimitry Andric const SCEV *SExtTrunc = 10500b57cec5SDimitry Andric SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 10510b57cec5SDimitry Andric if (SExtTrunc == IV) { 10520b57cec5SDimitry Andric Extended = true; 10530b57cec5SDimitry Andric ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 10540b57cec5SDimitry Andric "wide.trip.count"); 10550b57cec5SDimitry Andric } 10560b57cec5SDimitry Andric } 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric if (Extended) { 10590b57cec5SDimitry Andric bool Discard; 10600b57cec5SDimitry Andric L->makeLoopInvariant(ExitCnt, Discard); 10610b57cec5SDimitry Andric } else 10620b57cec5SDimitry Andric CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 10630b57cec5SDimitry Andric "lftr.wideiv"); 10640b57cec5SDimitry Andric } 10650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 10660b57cec5SDimitry Andric << " LHS:" << *CmpIndVar << '\n' 10670b57cec5SDimitry Andric << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 10680b57cec5SDimitry Andric << "\n" 10690b57cec5SDimitry Andric << " RHS:\t" << *ExitCnt << "\n" 10700b57cec5SDimitry Andric << "ExitCount:\t" << *ExitCount << "\n" 10710b57cec5SDimitry Andric << " was: " << *BI->getCondition() << "\n"); 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 10740b57cec5SDimitry Andric Value *OrigCond = BI->getCondition(); 10750b57cec5SDimitry Andric // It's tempting to use replaceAllUsesWith here to fully replace the old 10760b57cec5SDimitry Andric // comparison, but that's not immediately safe, since users of the old 10770b57cec5SDimitry Andric // comparison may not be dominated by the new comparison. Instead, just 10780b57cec5SDimitry Andric // update the branch to use the new comparison; in the common case this 10790b57cec5SDimitry Andric // will make old comparison dead. 10800b57cec5SDimitry Andric BI->setCondition(Cond); 10815ffd83dbSDimitry Andric DeadInsts.emplace_back(OrigCond); 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric ++NumLFTR; 10840b57cec5SDimitry Andric return true; 10850b57cec5SDimitry Andric } 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 10880b57cec5SDimitry Andric // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 10890b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 10900b57cec5SDimitry Andric 10910b57cec5SDimitry Andric /// If there's a single exit block, sink any loop-invariant values that 10920b57cec5SDimitry Andric /// were defined in the preheader but not used inside the loop into the 10930b57cec5SDimitry Andric /// exit block to reduce register pressure in the loop. 10940b57cec5SDimitry Andric bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 10950b57cec5SDimitry Andric BasicBlock *ExitBlock = L->getExitBlock(); 10960b57cec5SDimitry Andric if (!ExitBlock) return false; 10970b57cec5SDimitry Andric 10980b57cec5SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 10990b57cec5SDimitry Andric if (!Preheader) return false; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric bool MadeAnyChanges = false; 11020b57cec5SDimitry Andric BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 11030b57cec5SDimitry Andric BasicBlock::iterator I(Preheader->getTerminator()); 11040b57cec5SDimitry Andric while (I != Preheader->begin()) { 11050b57cec5SDimitry Andric --I; 11060b57cec5SDimitry Andric // New instructions were inserted at the end of the preheader. 11070b57cec5SDimitry Andric if (isa<PHINode>(I)) 11080b57cec5SDimitry Andric break; 11090b57cec5SDimitry Andric 11100b57cec5SDimitry Andric // Don't move instructions which might have side effects, since the side 11110b57cec5SDimitry Andric // effects need to complete before instructions inside the loop. Also don't 11120b57cec5SDimitry Andric // move instructions which might read memory, since the loop may modify 11130b57cec5SDimitry Andric // memory. Note that it's okay if the instruction might have undefined 11140b57cec5SDimitry Andric // behavior: LoopSimplify guarantees that the preheader dominates the exit 11150b57cec5SDimitry Andric // block. 11160b57cec5SDimitry Andric if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 11170b57cec5SDimitry Andric continue; 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric // Skip debug info intrinsics. 11200b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(I)) 11210b57cec5SDimitry Andric continue; 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric // Skip eh pad instructions. 11240b57cec5SDimitry Andric if (I->isEHPad()) 11250b57cec5SDimitry Andric continue; 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric // Don't sink alloca: we never want to sink static alloca's out of the 11280b57cec5SDimitry Andric // entry block, and correctly sinking dynamic alloca's requires 11290b57cec5SDimitry Andric // checks for stacksave/stackrestore intrinsics. 11300b57cec5SDimitry Andric // FIXME: Refactor this check somehow? 11310b57cec5SDimitry Andric if (isa<AllocaInst>(I)) 11320b57cec5SDimitry Andric continue; 11330b57cec5SDimitry Andric 11340b57cec5SDimitry Andric // Determine if there is a use in or before the loop (direct or 11350b57cec5SDimitry Andric // otherwise). 11360b57cec5SDimitry Andric bool UsedInLoop = false; 11370b57cec5SDimitry Andric for (Use &U : I->uses()) { 11380b57cec5SDimitry Andric Instruction *User = cast<Instruction>(U.getUser()); 11390b57cec5SDimitry Andric BasicBlock *UseBB = User->getParent(); 11400b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(User)) { 11410b57cec5SDimitry Andric unsigned i = 11420b57cec5SDimitry Andric PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 11430b57cec5SDimitry Andric UseBB = P->getIncomingBlock(i); 11440b57cec5SDimitry Andric } 11450b57cec5SDimitry Andric if (UseBB == Preheader || L->contains(UseBB)) { 11460b57cec5SDimitry Andric UsedInLoop = true; 11470b57cec5SDimitry Andric break; 11480b57cec5SDimitry Andric } 11490b57cec5SDimitry Andric } 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric // If there is, the def must remain in the preheader. 11520b57cec5SDimitry Andric if (UsedInLoop) 11530b57cec5SDimitry Andric continue; 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric // Otherwise, sink it to the exit block. 11560b57cec5SDimitry Andric Instruction *ToMove = &*I; 11570b57cec5SDimitry Andric bool Done = false; 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric if (I != Preheader->begin()) { 11600b57cec5SDimitry Andric // Skip debug info intrinsics. 11610b57cec5SDimitry Andric do { 11620b57cec5SDimitry Andric --I; 1163349cc55cSDimitry Andric } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 11640b57cec5SDimitry Andric 1165349cc55cSDimitry Andric if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 11660b57cec5SDimitry Andric Done = true; 11670b57cec5SDimitry Andric } else { 11680b57cec5SDimitry Andric Done = true; 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric MadeAnyChanges = true; 11720b57cec5SDimitry Andric ToMove->moveBefore(*ExitBlock, InsertPt); 1173bdd1243dSDimitry Andric SE->forgetValue(ToMove); 11740b57cec5SDimitry Andric if (Done) break; 11750b57cec5SDimitry Andric InsertPt = ToMove->getIterator(); 11760b57cec5SDimitry Andric } 11770b57cec5SDimitry Andric 11780b57cec5SDimitry Andric return MadeAnyChanges; 11790b57cec5SDimitry Andric } 11800b57cec5SDimitry Andric 1181e8d8bef9SDimitry Andric static void replaceExitCond(BranchInst *BI, Value *NewCond, 1182e8d8bef9SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1183e8d8bef9SDimitry Andric auto *OldCond = BI->getCondition(); 1184bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1185bdd1243dSDimitry Andric << " with " << *NewCond << "\n"); 1186e8d8bef9SDimitry Andric BI->setCondition(NewCond); 1187e8d8bef9SDimitry Andric if (OldCond->use_empty()) 1188e8d8bef9SDimitry Andric DeadInsts.emplace_back(OldCond); 1189e8d8bef9SDimitry Andric } 11900b57cec5SDimitry Andric 1191bdd1243dSDimitry Andric static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1192bdd1243dSDimitry Andric bool IsTaken) { 1193e8d8bef9SDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1194e8d8bef9SDimitry Andric bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1195e8d8bef9SDimitry Andric auto *OldCond = BI->getCondition(); 1196bdd1243dSDimitry Andric return ConstantInt::get(OldCond->getType(), 1197bdd1243dSDimitry Andric IsTaken ? ExitIfTrue : !ExitIfTrue); 1198bdd1243dSDimitry Andric } 1199bdd1243dSDimitry Andric 1200bdd1243dSDimitry Andric static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1201bdd1243dSDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1202bdd1243dSDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1203bdd1243dSDimitry Andric auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1204e8d8bef9SDimitry Andric replaceExitCond(BI, NewCond, DeadInsts); 12050b57cec5SDimitry Andric } 1206e8d8bef9SDimitry Andric 1207349cc55cSDimitry Andric static void replaceLoopPHINodesWithPreheaderValues( 1208bdd1243dSDimitry Andric LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1209bdd1243dSDimitry Andric ScalarEvolution &SE) { 1210349cc55cSDimitry Andric assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1211349cc55cSDimitry Andric auto *LoopPreheader = L->getLoopPreheader(); 1212349cc55cSDimitry Andric auto *LoopHeader = L->getHeader(); 1213753f127fSDimitry Andric SmallVector<Instruction *> Worklist; 1214349cc55cSDimitry Andric for (auto &PN : LoopHeader->phis()) { 1215349cc55cSDimitry Andric auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1216753f127fSDimitry Andric for (User *U : PN.users()) 1217753f127fSDimitry Andric Worklist.push_back(cast<Instruction>(U)); 1218bdd1243dSDimitry Andric SE.forgetValue(&PN); 1219349cc55cSDimitry Andric PN.replaceAllUsesWith(PreheaderIncoming); 1220349cc55cSDimitry Andric DeadInsts.emplace_back(&PN); 1221349cc55cSDimitry Andric } 1222753f127fSDimitry Andric 1223753f127fSDimitry Andric // Replacing with the preheader value will often allow IV users to simplify 1224753f127fSDimitry Andric // (especially if the preheader value is a constant). 1225753f127fSDimitry Andric SmallPtrSet<Instruction *, 16> Visited; 1226753f127fSDimitry Andric while (!Worklist.empty()) { 1227753f127fSDimitry Andric auto *I = cast<Instruction>(Worklist.pop_back_val()); 1228753f127fSDimitry Andric if (!Visited.insert(I).second) 1229753f127fSDimitry Andric continue; 1230753f127fSDimitry Andric 1231753f127fSDimitry Andric // Don't simplify instructions outside the loop. 1232753f127fSDimitry Andric if (!L->contains(I)) 1233753f127fSDimitry Andric continue; 1234753f127fSDimitry Andric 1235*0fca6ea1SDimitry Andric Value *Res = simplifyInstruction(I, I->getDataLayout()); 1236753f127fSDimitry Andric if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1237753f127fSDimitry Andric for (User *U : I->users()) 1238753f127fSDimitry Andric Worklist.push_back(cast<Instruction>(U)); 1239753f127fSDimitry Andric I->replaceAllUsesWith(Res); 1240753f127fSDimitry Andric DeadInsts.emplace_back(I); 1241753f127fSDimitry Andric } 1242753f127fSDimitry Andric } 1243349cc55cSDimitry Andric } 1244349cc55cSDimitry Andric 1245bdd1243dSDimitry Andric static Value * 1246bdd1243dSDimitry Andric createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1247bdd1243dSDimitry Andric const ScalarEvolution::LoopInvariantPredicate &LIP, 1248bdd1243dSDimitry Andric SCEVExpander &Rewriter) { 1249bdd1243dSDimitry Andric ICmpInst::Predicate InvariantPred = LIP.Pred; 125006c3fb27SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 125106c3fb27SDimitry Andric assert(Preheader && "Preheader doesn't exist"); 125206c3fb27SDimitry Andric Rewriter.setInsertPoint(Preheader->getTerminator()); 1253bdd1243dSDimitry Andric auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1254bdd1243dSDimitry Andric auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1255e8d8bef9SDimitry Andric bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1256e8d8bef9SDimitry Andric if (ExitIfTrue) 1257e8d8bef9SDimitry Andric InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 125806c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator()); 125906c3fb27SDimitry Andric BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1260bdd1243dSDimitry Andric return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1261e8d8bef9SDimitry Andric BI->getCondition()->getName()); 12620b57cec5SDimitry Andric } 1263e8d8bef9SDimitry Andric 1264bdd1243dSDimitry Andric static std::optional<Value *> 1265bdd1243dSDimitry Andric createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1266e8d8bef9SDimitry Andric const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1267bdd1243dSDimitry Andric ScalarEvolution *SE, SCEVExpander &Rewriter) { 1268bdd1243dSDimitry Andric ICmpInst::Predicate Pred = ICmp->getPredicate(); 1269bdd1243dSDimitry Andric Value *LHS = ICmp->getOperand(0); 1270bdd1243dSDimitry Andric Value *RHS = ICmp->getOperand(1); 1271e8d8bef9SDimitry Andric 1272e8d8bef9SDimitry Andric // 'LHS pred RHS' should now mean that we stay in loop. 1273bdd1243dSDimitry Andric auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1274e8d8bef9SDimitry Andric if (Inverted) 1275e8d8bef9SDimitry Andric Pred = CmpInst::getInversePredicate(Pred); 1276e8d8bef9SDimitry Andric 1277e8d8bef9SDimitry Andric const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1278e8d8bef9SDimitry Andric const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1279bdd1243dSDimitry Andric // Can we prove it to be trivially true or false? 1280bdd1243dSDimitry Andric if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1281bdd1243dSDimitry Andric return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1282e8d8bef9SDimitry Andric 1283e8d8bef9SDimitry Andric auto *ARTy = LHSS->getType(); 1284e8d8bef9SDimitry Andric auto *MaxIterTy = MaxIter->getType(); 1285e8d8bef9SDimitry Andric // If possible, adjust types. 1286e8d8bef9SDimitry Andric if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1287e8d8bef9SDimitry Andric MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1288e8d8bef9SDimitry Andric else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1289e8d8bef9SDimitry Andric const SCEV *MinusOne = SE->getMinusOne(ARTy); 1290e8d8bef9SDimitry Andric auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1291e8d8bef9SDimitry Andric if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1292e8d8bef9SDimitry Andric MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1293e8d8bef9SDimitry Andric } 1294e8d8bef9SDimitry Andric 1295e8d8bef9SDimitry Andric if (SkipLastIter) { 1296bdd1243dSDimitry Andric // Semantically skip last iter is "subtract 1, do not bother about unsigned 1297bdd1243dSDimitry Andric // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1298bdd1243dSDimitry Andric // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1299bdd1243dSDimitry Andric // So we manually construct umin(a - 1, b - 1). 1300bdd1243dSDimitry Andric SmallVector<const SCEV *, 4> Elements; 1301bdd1243dSDimitry Andric if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1302bdd1243dSDimitry Andric for (auto *Op : UMin->operands()) 1303bdd1243dSDimitry Andric Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1304bdd1243dSDimitry Andric MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1305bdd1243dSDimitry Andric } else 1306bdd1243dSDimitry Andric MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1307e8d8bef9SDimitry Andric } 1308e8d8bef9SDimitry Andric 1309e8d8bef9SDimitry Andric // Check if there is a loop-invariant predicate equivalent to our check. 1310e8d8bef9SDimitry Andric auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1311e8d8bef9SDimitry Andric L, BI, MaxIter); 1312e8d8bef9SDimitry Andric if (!LIP) 1313bdd1243dSDimitry Andric return std::nullopt; 1314e8d8bef9SDimitry Andric 1315e8d8bef9SDimitry Andric // Can we prove it to be trivially true? 1316e8d8bef9SDimitry Andric if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1317bdd1243dSDimitry Andric return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1318e8d8bef9SDimitry Andric else 1319bdd1243dSDimitry Andric return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1320bdd1243dSDimitry Andric } 1321e8d8bef9SDimitry Andric 1322bdd1243dSDimitry Andric static bool optimizeLoopExitWithUnknownExitCount( 1323bdd1243dSDimitry Andric const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1324bdd1243dSDimitry Andric bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1325bdd1243dSDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1326bdd1243dSDimitry Andric assert( 1327bdd1243dSDimitry Andric (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1328bdd1243dSDimitry Andric "Not a loop exit!"); 1329bdd1243dSDimitry Andric 1330bdd1243dSDimitry Andric // For branch that stays in loop by TRUE condition, go through AND. For branch 1331bdd1243dSDimitry Andric // that stays in loop by FALSE condition, go through OR. Both gives the 1332bdd1243dSDimitry Andric // similar logic: "stay in loop iff all conditions are true(false)". 1333bdd1243dSDimitry Andric bool Inverted = L->contains(BI->getSuccessor(1)); 1334bdd1243dSDimitry Andric SmallVector<ICmpInst *, 4> LeafConditions; 1335bdd1243dSDimitry Andric SmallVector<Value *, 4> Worklist; 1336bdd1243dSDimitry Andric SmallPtrSet<Value *, 4> Visited; 1337bdd1243dSDimitry Andric Value *OldCond = BI->getCondition(); 1338bdd1243dSDimitry Andric Visited.insert(OldCond); 1339bdd1243dSDimitry Andric Worklist.push_back(OldCond); 1340bdd1243dSDimitry Andric 1341bdd1243dSDimitry Andric auto GoThrough = [&](Value *V) { 1342bdd1243dSDimitry Andric Value *LHS = nullptr, *RHS = nullptr; 1343bdd1243dSDimitry Andric if (Inverted) { 1344bdd1243dSDimitry Andric if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1345bdd1243dSDimitry Andric return false; 1346bdd1243dSDimitry Andric } else { 1347bdd1243dSDimitry Andric if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1348bdd1243dSDimitry Andric return false; 1349bdd1243dSDimitry Andric } 1350bdd1243dSDimitry Andric if (Visited.insert(LHS).second) 1351bdd1243dSDimitry Andric Worklist.push_back(LHS); 1352bdd1243dSDimitry Andric if (Visited.insert(RHS).second) 1353bdd1243dSDimitry Andric Worklist.push_back(RHS); 1354e8d8bef9SDimitry Andric return true; 1355bdd1243dSDimitry Andric }; 1356bdd1243dSDimitry Andric 1357bdd1243dSDimitry Andric do { 1358bdd1243dSDimitry Andric Value *Curr = Worklist.pop_back_val(); 1359bdd1243dSDimitry Andric // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1360bdd1243dSDimitry Andric // those with one use, to avoid instruction duplication. 1361bdd1243dSDimitry Andric if (Curr->hasOneUse()) 1362bdd1243dSDimitry Andric if (!GoThrough(Curr)) 1363bdd1243dSDimitry Andric if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1364bdd1243dSDimitry Andric LeafConditions.push_back(ICmp); 1365bdd1243dSDimitry Andric } while (!Worklist.empty()); 1366bdd1243dSDimitry Andric 1367bdd1243dSDimitry Andric // If the current basic block has the same exit count as the whole loop, and 1368bdd1243dSDimitry Andric // it consists of multiple icmp's, try to collect all icmp's that give exact 1369bdd1243dSDimitry Andric // same exit count. For all other icmp's, we could use one less iteration, 1370bdd1243dSDimitry Andric // because their value on the last iteration doesn't really matter. 1371bdd1243dSDimitry Andric SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1372bdd1243dSDimitry Andric if (!SkipLastIter && LeafConditions.size() > 1 && 1373bdd1243dSDimitry Andric SE->getExitCount(L, ExitingBB, 1374bdd1243dSDimitry Andric ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1375bdd1243dSDimitry Andric MaxIter) 1376bdd1243dSDimitry Andric for (auto *ICmp : LeafConditions) { 1377bdd1243dSDimitry Andric auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1378bdd1243dSDimitry Andric /*ControlsExit*/ false); 1379bdd1243dSDimitry Andric auto *ExitMax = EL.SymbolicMaxNotTaken; 1380bdd1243dSDimitry Andric if (isa<SCEVCouldNotCompute>(ExitMax)) 1381bdd1243dSDimitry Andric continue; 1382bdd1243dSDimitry Andric // They could be of different types (specifically this happens after 1383bdd1243dSDimitry Andric // IV widening). 1384bdd1243dSDimitry Andric auto *WiderType = 1385bdd1243dSDimitry Andric SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1386bdd1243dSDimitry Andric auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1387bdd1243dSDimitry Andric auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1388bdd1243dSDimitry Andric if (WideExitMax == WideMaxIter) 1389bdd1243dSDimitry Andric ICmpsFailingOnLastIter.insert(ICmp); 1390bdd1243dSDimitry Andric } 1391bdd1243dSDimitry Andric 1392bdd1243dSDimitry Andric bool Changed = false; 1393bdd1243dSDimitry Andric for (auto *OldCond : LeafConditions) { 1394bdd1243dSDimitry Andric // Skip last iteration for this icmp under one of two conditions: 1395bdd1243dSDimitry Andric // - We do it for all conditions; 1396bdd1243dSDimitry Andric // - There is another ICmp that would fail on last iter, so this one doesn't 1397bdd1243dSDimitry Andric // really matter. 1398bdd1243dSDimitry Andric bool OptimisticSkipLastIter = SkipLastIter; 1399bdd1243dSDimitry Andric if (!OptimisticSkipLastIter) { 1400bdd1243dSDimitry Andric if (ICmpsFailingOnLastIter.size() > 1) 1401bdd1243dSDimitry Andric OptimisticSkipLastIter = true; 1402bdd1243dSDimitry Andric else if (ICmpsFailingOnLastIter.size() == 1) 1403bdd1243dSDimitry Andric OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1404bdd1243dSDimitry Andric } 1405bdd1243dSDimitry Andric if (auto Replaced = 1406bdd1243dSDimitry Andric createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1407bdd1243dSDimitry Andric OptimisticSkipLastIter, SE, Rewriter)) { 1408bdd1243dSDimitry Andric Changed = true; 1409bdd1243dSDimitry Andric auto *NewCond = *Replaced; 1410bdd1243dSDimitry Andric if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1411bdd1243dSDimitry Andric NCI->setName(OldCond->getName() + ".first_iter"); 1412bdd1243dSDimitry Andric } 1413bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1414bdd1243dSDimitry Andric << " with " << *NewCond << "\n"); 1415bdd1243dSDimitry Andric assert(OldCond->hasOneUse() && "Must be!"); 1416bdd1243dSDimitry Andric OldCond->replaceAllUsesWith(NewCond); 1417bdd1243dSDimitry Andric DeadInsts.push_back(OldCond); 1418bdd1243dSDimitry Andric // Make sure we no longer consider this condition as failing on last 1419bdd1243dSDimitry Andric // iteration. 1420bdd1243dSDimitry Andric ICmpsFailingOnLastIter.erase(OldCond); 1421bdd1243dSDimitry Andric } 1422bdd1243dSDimitry Andric } 1423bdd1243dSDimitry Andric return Changed; 14248bcb0991SDimitry Andric } 14250b57cec5SDimitry Andric 1426349cc55cSDimitry Andric bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1427349cc55cSDimitry Andric // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1428349cc55cSDimitry Andric // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1429349cc55cSDimitry Andric // never reaches the icmp since the zext doesn't fold to an AddRec unless 1430349cc55cSDimitry Andric // it already has flags. The alternative to this would be to extending the 1431349cc55cSDimitry Andric // set of "interesting" IV users to include the icmp, but doing that 1432349cc55cSDimitry Andric // regresses results in practice by querying SCEVs before trip counts which 1433349cc55cSDimitry Andric // rely on them which results in SCEV caching sub-optimal answers. The 1434349cc55cSDimitry Andric // concern about caching sub-optimal results is why we only query SCEVs of 1435349cc55cSDimitry Andric // the loop invariant RHS here. 1436349cc55cSDimitry Andric SmallVector<BasicBlock*, 16> ExitingBlocks; 1437349cc55cSDimitry Andric L->getExitingBlocks(ExitingBlocks); 1438349cc55cSDimitry Andric bool Changed = false; 1439349cc55cSDimitry Andric for (auto *ExitingBB : ExitingBlocks) { 1440349cc55cSDimitry Andric auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1441349cc55cSDimitry Andric if (!BI) 1442349cc55cSDimitry Andric continue; 1443349cc55cSDimitry Andric assert(BI->isConditional() && "exit branch must be conditional"); 1444349cc55cSDimitry Andric 1445349cc55cSDimitry Andric auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1446349cc55cSDimitry Andric if (!ICmp || !ICmp->hasOneUse()) 1447349cc55cSDimitry Andric continue; 1448349cc55cSDimitry Andric 1449349cc55cSDimitry Andric auto *LHS = ICmp->getOperand(0); 1450349cc55cSDimitry Andric auto *RHS = ICmp->getOperand(1); 1451349cc55cSDimitry Andric // For the range reasoning, avoid computing SCEVs in the loop to avoid 1452349cc55cSDimitry Andric // poisoning cache with sub-optimal results. For the must-execute case, 1453349cc55cSDimitry Andric // this is a neccessary precondition for correctness. 1454349cc55cSDimitry Andric if (!L->isLoopInvariant(RHS)) { 1455349cc55cSDimitry Andric if (!L->isLoopInvariant(LHS)) 1456349cc55cSDimitry Andric continue; 1457349cc55cSDimitry Andric // Same logic applies for the inverse case 1458349cc55cSDimitry Andric std::swap(LHS, RHS); 1459349cc55cSDimitry Andric } 1460349cc55cSDimitry Andric 1461349cc55cSDimitry Andric // Match (icmp signed-cond zext, RHS) 1462349cc55cSDimitry Andric Value *LHSOp = nullptr; 1463349cc55cSDimitry Andric if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1464349cc55cSDimitry Andric continue; 1465349cc55cSDimitry Andric 1466*0fca6ea1SDimitry Andric const DataLayout &DL = ExitingBB->getDataLayout(); 1467349cc55cSDimitry Andric const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1468349cc55cSDimitry Andric const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1469349cc55cSDimitry Andric auto FullCR = ConstantRange::getFull(InnerBitWidth); 1470349cc55cSDimitry Andric FullCR = FullCR.zeroExtend(OuterBitWidth); 1471349cc55cSDimitry Andric auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1472349cc55cSDimitry Andric if (FullCR.contains(RHSCR)) { 1473349cc55cSDimitry Andric // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1474349cc55cSDimitry Andric // replace the signed condition with the unsigned version. 1475349cc55cSDimitry Andric ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1476349cc55cSDimitry Andric Changed = true; 1477349cc55cSDimitry Andric // Note: No SCEV invalidation needed. We've changed the predicate, but 1478349cc55cSDimitry Andric // have not changed exit counts, or the values produced by the compare. 1479349cc55cSDimitry Andric continue; 1480349cc55cSDimitry Andric } 1481349cc55cSDimitry Andric } 1482349cc55cSDimitry Andric 1483349cc55cSDimitry Andric // Now that we've canonicalized the condition to match the extend, 1484349cc55cSDimitry Andric // see if we can rotate the extend out of the loop. 1485349cc55cSDimitry Andric for (auto *ExitingBB : ExitingBlocks) { 1486349cc55cSDimitry Andric auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1487349cc55cSDimitry Andric if (!BI) 1488349cc55cSDimitry Andric continue; 1489349cc55cSDimitry Andric assert(BI->isConditional() && "exit branch must be conditional"); 1490349cc55cSDimitry Andric 1491349cc55cSDimitry Andric auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1492349cc55cSDimitry Andric if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1493349cc55cSDimitry Andric continue; 1494349cc55cSDimitry Andric 1495349cc55cSDimitry Andric bool Swapped = false; 1496349cc55cSDimitry Andric auto *LHS = ICmp->getOperand(0); 1497349cc55cSDimitry Andric auto *RHS = ICmp->getOperand(1); 1498349cc55cSDimitry Andric if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1499349cc55cSDimitry Andric // Nothing to rotate 1500349cc55cSDimitry Andric continue; 1501349cc55cSDimitry Andric if (L->isLoopInvariant(LHS)) { 1502349cc55cSDimitry Andric // Same logic applies for the inverse case until we actually pick 1503349cc55cSDimitry Andric // which operand of the compare to update. 1504349cc55cSDimitry Andric Swapped = true; 1505349cc55cSDimitry Andric std::swap(LHS, RHS); 1506349cc55cSDimitry Andric } 1507349cc55cSDimitry Andric assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1508349cc55cSDimitry Andric 1509349cc55cSDimitry Andric // Match (icmp unsigned-cond zext, RHS) 1510349cc55cSDimitry Andric // TODO: Extend to handle corresponding sext/signed-cmp case 1511349cc55cSDimitry Andric // TODO: Extend to other invertible functions 1512349cc55cSDimitry Andric Value *LHSOp = nullptr; 1513349cc55cSDimitry Andric if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1514349cc55cSDimitry Andric continue; 1515349cc55cSDimitry Andric 1516349cc55cSDimitry Andric // In general, we only rotate if we can do so without increasing the number 1517349cc55cSDimitry Andric // of instructions. The exception is when we have an zext(add-rec). The 1518349cc55cSDimitry Andric // reason for allowing this exception is that we know we need to get rid 1519349cc55cSDimitry Andric // of the zext for SCEV to be able to compute a trip count for said loops; 1520349cc55cSDimitry Andric // we consider the new trip count valuable enough to increase instruction 1521349cc55cSDimitry Andric // count by one. 1522349cc55cSDimitry Andric if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1523349cc55cSDimitry Andric continue; 1524349cc55cSDimitry Andric 1525349cc55cSDimitry Andric // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1526349cc55cSDimitry Andric // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1527349cc55cSDimitry Andric // when zext is loop varying and RHS is loop invariant. This converts 1528349cc55cSDimitry Andric // loop varying work to loop-invariant work. 1529349cc55cSDimitry Andric auto doRotateTransform = [&]() { 1530349cc55cSDimitry Andric assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1531*0fca6ea1SDimitry Andric auto *NewRHS = CastInst::Create( 1532*0fca6ea1SDimitry Andric Instruction::Trunc, RHS, LHSOp->getType(), "", 1533*0fca6ea1SDimitry Andric L->getLoopPreheader()->getTerminator()->getIterator()); 1534349cc55cSDimitry Andric ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1535349cc55cSDimitry Andric ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1536349cc55cSDimitry Andric if (LHS->use_empty()) 1537349cc55cSDimitry Andric DeadInsts.push_back(LHS); 1538349cc55cSDimitry Andric }; 1539349cc55cSDimitry Andric 1540349cc55cSDimitry Andric 1541*0fca6ea1SDimitry Andric const DataLayout &DL = ExitingBB->getDataLayout(); 1542349cc55cSDimitry Andric const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1543349cc55cSDimitry Andric const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1544349cc55cSDimitry Andric auto FullCR = ConstantRange::getFull(InnerBitWidth); 1545349cc55cSDimitry Andric FullCR = FullCR.zeroExtend(OuterBitWidth); 1546349cc55cSDimitry Andric auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1547349cc55cSDimitry Andric if (FullCR.contains(RHSCR)) { 1548349cc55cSDimitry Andric doRotateTransform(); 1549349cc55cSDimitry Andric Changed = true; 1550349cc55cSDimitry Andric // Note, we are leaving SCEV in an unfortunately imprecise case here 1551349cc55cSDimitry Andric // as rotation tends to reveal information about trip counts not 1552349cc55cSDimitry Andric // previously visible. 1553349cc55cSDimitry Andric continue; 1554349cc55cSDimitry Andric } 1555349cc55cSDimitry Andric } 1556349cc55cSDimitry Andric 1557349cc55cSDimitry Andric return Changed; 1558349cc55cSDimitry Andric } 1559349cc55cSDimitry Andric 15608bcb0991SDimitry Andric bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 15618bcb0991SDimitry Andric SmallVector<BasicBlock*, 16> ExitingBlocks; 15628bcb0991SDimitry Andric L->getExitingBlocks(ExitingBlocks); 15638bcb0991SDimitry Andric 1564e8d8bef9SDimitry Andric // Remove all exits which aren't both rewriteable and execute on every 1565e8d8bef9SDimitry Andric // iteration. 1566e8d8bef9SDimitry Andric llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 15670b57cec5SDimitry Andric // If our exitting block exits multiple loops, we can only rewrite the 15680b57cec5SDimitry Andric // innermost one. Otherwise, we're changing how many times the innermost 15690b57cec5SDimitry Andric // loop runs before it exits. 15700b57cec5SDimitry Andric if (LI->getLoopFor(ExitingBB) != L) 15718bcb0991SDimitry Andric return true; 15720b57cec5SDimitry Andric 15730b57cec5SDimitry Andric // Can't rewrite non-branch yet. 15740b57cec5SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 15750b57cec5SDimitry Andric if (!BI) 15768bcb0991SDimitry Andric return true; 15770b57cec5SDimitry Andric 1578e8d8bef9SDimitry Andric // Likewise, the loop latch must be dominated by the exiting BB. 1579e8d8bef9SDimitry Andric if (!DT->dominates(ExitingBB, L->getLoopLatch())) 15808bcb0991SDimitry Andric return true; 1581e8d8bef9SDimitry Andric 1582753f127fSDimitry Andric if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1583753f127fSDimitry Andric // If already constant, nothing to do. However, if this is an 1584753f127fSDimitry Andric // unconditional exit, we can still replace header phis with their 1585753f127fSDimitry Andric // preheader value. 1586753f127fSDimitry Andric if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1587bdd1243dSDimitry Andric replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1588753f127fSDimitry Andric return true; 1589753f127fSDimitry Andric } 1590753f127fSDimitry Andric 15918bcb0991SDimitry Andric return false; 15928bcb0991SDimitry Andric }); 15938bcb0991SDimitry Andric 15948bcb0991SDimitry Andric if (ExitingBlocks.empty()) 15958bcb0991SDimitry Andric return false; 15968bcb0991SDimitry Andric 15978bcb0991SDimitry Andric // Get a symbolic upper bound on the loop backedge taken count. 1598bdd1243dSDimitry Andric const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1599bdd1243dSDimitry Andric if (isa<SCEVCouldNotCompute>(MaxBECount)) 16008bcb0991SDimitry Andric return false; 16018bcb0991SDimitry Andric 16028bcb0991SDimitry Andric // Visit our exit blocks in order of dominance. We know from the fact that 1603e8d8bef9SDimitry Andric // all exits must dominate the latch, so there is a total dominance order 1604e8d8bef9SDimitry Andric // between them. 1605e8d8bef9SDimitry Andric llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 16068bcb0991SDimitry Andric // std::sort sorts in ascending order, so we want the inverse of 16078bcb0991SDimitry Andric // the normal dominance relation. 16085ffd83dbSDimitry Andric if (A == B) return false; 1609e8d8bef9SDimitry Andric if (DT->properlyDominates(A, B)) 1610e8d8bef9SDimitry Andric return true; 1611e8d8bef9SDimitry Andric else { 1612e8d8bef9SDimitry Andric assert(DT->properlyDominates(B, A) && 1613e8d8bef9SDimitry Andric "expected total dominance order!"); 1614e8d8bef9SDimitry Andric return false; 1615e8d8bef9SDimitry Andric } 16168bcb0991SDimitry Andric }); 16178bcb0991SDimitry Andric #ifdef ASSERT 16188bcb0991SDimitry Andric for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 16198bcb0991SDimitry Andric assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 16208bcb0991SDimitry Andric } 16218bcb0991SDimitry Andric #endif 16228bcb0991SDimitry Andric 16238bcb0991SDimitry Andric bool Changed = false; 1624e8d8bef9SDimitry Andric bool SkipLastIter = false; 1625bdd1243dSDimitry Andric const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1626bdd1243dSDimitry Andric auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1627bdd1243dSDimitry Andric if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1628bdd1243dSDimitry Andric return; 1629bdd1243dSDimitry Andric if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1630bdd1243dSDimitry Andric CurrMaxExit = MaxExitCount; 1631bdd1243dSDimitry Andric else 1632bdd1243dSDimitry Andric CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1633bdd1243dSDimitry Andric // If the loop has more than 1 iteration, all further checks will be 1634bdd1243dSDimitry Andric // executed 1 iteration less. 1635bdd1243dSDimitry Andric if (CurrMaxExit == MaxBECount) 1636bdd1243dSDimitry Andric SkipLastIter = true; 1637bdd1243dSDimitry Andric }; 1638bdd1243dSDimitry Andric SmallSet<const SCEV *, 8> DominatingExactExitCounts; 16398bcb0991SDimitry Andric for (BasicBlock *ExitingBB : ExitingBlocks) { 1640bdd1243dSDimitry Andric const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1641bdd1243dSDimitry Andric const SCEV *MaxExitCount = SE->getExitCount( 1642bdd1243dSDimitry Andric L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1643bdd1243dSDimitry Andric if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1644e8d8bef9SDimitry Andric // Okay, we do not know the exit count here. Can we at least prove that it 1645e8d8bef9SDimitry Andric // will remain the same within iteration space? 1646e8d8bef9SDimitry Andric auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1647bdd1243dSDimitry Andric auto OptimizeCond = [&](bool SkipLastIter) { 1648bdd1243dSDimitry Andric return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1649bdd1243dSDimitry Andric MaxBECount, SkipLastIter, 1650bdd1243dSDimitry Andric SE, Rewriter, DeadInsts); 1651e8d8bef9SDimitry Andric }; 1652e8d8bef9SDimitry Andric 1653e8d8bef9SDimitry Andric // TODO: We might have proved that we can skip the last iteration for 1654e8d8bef9SDimitry Andric // this check. In this case, we only want to check the condition on the 1655bdd1243dSDimitry Andric // pre-last iteration (MaxBECount - 1). However, there is a nasty 1656e8d8bef9SDimitry Andric // corner case: 1657e8d8bef9SDimitry Andric // 1658e8d8bef9SDimitry Andric // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1659e8d8bef9SDimitry Andric // 1660e8d8bef9SDimitry Andric // If we could not prove that len != 0, then we also could not prove that 1661e8d8bef9SDimitry Andric // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1662e8d8bef9SDimitry Andric // OptimizeCond will likely not prove anything for it, even if it could 1663e8d8bef9SDimitry Andric // prove the same fact for len. 1664e8d8bef9SDimitry Andric // 1665e8d8bef9SDimitry Andric // As a temporary solution, we query both last and pre-last iterations in 1666e8d8bef9SDimitry Andric // hope that we will be able to prove triviality for at least one of 1667bdd1243dSDimitry Andric // them. We can stop querying MaxBECount for this case once SCEV 1668bdd1243dSDimitry Andric // understands that (MaxBECount - 1) will not overflow here. 1669bdd1243dSDimitry Andric if (OptimizeCond(false)) 1670e8d8bef9SDimitry Andric Changed = true; 1671bdd1243dSDimitry Andric else if (SkipLastIter && OptimizeCond(true)) 1672e8d8bef9SDimitry Andric Changed = true; 1673bdd1243dSDimitry Andric UpdateSkipLastIter(MaxExitCount); 1674e8d8bef9SDimitry Andric continue; 1675e8d8bef9SDimitry Andric } 1676e8d8bef9SDimitry Andric 1677bdd1243dSDimitry Andric UpdateSkipLastIter(ExactExitCount); 16780b57cec5SDimitry Andric 16790b57cec5SDimitry Andric // If we know we'd exit on the first iteration, rewrite the exit to 16800b57cec5SDimitry Andric // reflect this. This does not imply the loop must exit through this 16810b57cec5SDimitry Andric // exit; there may be an earlier one taken on the first iteration. 1682349cc55cSDimitry Andric // We know that the backedge can't be taken, so we replace all 1683349cc55cSDimitry Andric // the header PHIs with values coming from the preheader. 1684bdd1243dSDimitry Andric if (ExactExitCount->isZero()) { 1685e8d8bef9SDimitry Andric foldExit(L, ExitingBB, true, DeadInsts); 1686bdd1243dSDimitry Andric replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 16870b57cec5SDimitry Andric Changed = true; 16880b57cec5SDimitry Andric continue; 16890b57cec5SDimitry Andric } 16900b57cec5SDimitry Andric 1691bdd1243dSDimitry Andric assert(ExactExitCount->getType()->isIntegerTy() && 1692bdd1243dSDimitry Andric MaxBECount->getType()->isIntegerTy() && 1693349cc55cSDimitry Andric "Exit counts must be integers"); 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andric Type *WiderType = 1696bdd1243dSDimitry Andric SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1697bdd1243dSDimitry Andric ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1698bdd1243dSDimitry Andric MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1699bdd1243dSDimitry Andric assert(MaxBECount->getType() == ExactExitCount->getType()); 17000b57cec5SDimitry Andric 17010b57cec5SDimitry Andric // Can we prove that some other exit must be taken strictly before this 17028bcb0991SDimitry Andric // one? 1703bdd1243dSDimitry Andric if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1704bdd1243dSDimitry Andric ExactExitCount)) { 1705e8d8bef9SDimitry Andric foldExit(L, ExitingBB, false, DeadInsts); 17060b57cec5SDimitry Andric Changed = true; 17070b57cec5SDimitry Andric continue; 17080b57cec5SDimitry Andric } 17090b57cec5SDimitry Andric 17108bcb0991SDimitry Andric // As we run, keep track of which exit counts we've encountered. If we 17118bcb0991SDimitry Andric // find a duplicate, we've found an exit which would have exited on the 17128bcb0991SDimitry Andric // exiting iteration, but (from the visit order) strictly follows another 17138bcb0991SDimitry Andric // which does the same and is thus dead. 1714bdd1243dSDimitry Andric if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1715e8d8bef9SDimitry Andric foldExit(L, ExitingBB, false, DeadInsts); 17168bcb0991SDimitry Andric Changed = true; 17178bcb0991SDimitry Andric continue; 17180b57cec5SDimitry Andric } 17198bcb0991SDimitry Andric 17208bcb0991SDimitry Andric // TODO: There might be another oppurtunity to leverage SCEV's reasoning 17218bcb0991SDimitry Andric // here. If we kept track of the min of dominanting exits so far, we could 17228bcb0991SDimitry Andric // discharge exits with EC >= MDEC. This is less powerful than the existing 17238bcb0991SDimitry Andric // transform (since later exits aren't considered), but potentially more 17248bcb0991SDimitry Andric // powerful for any case where SCEV can prove a >=u b, but neither a == b 17258bcb0991SDimitry Andric // or a >u b. Such a case is not currently known. 17268bcb0991SDimitry Andric } 17278bcb0991SDimitry Andric return Changed; 17288bcb0991SDimitry Andric } 17298bcb0991SDimitry Andric 17308bcb0991SDimitry Andric bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 17318bcb0991SDimitry Andric SmallVector<BasicBlock*, 16> ExitingBlocks; 17328bcb0991SDimitry Andric L->getExitingBlocks(ExitingBlocks); 17338bcb0991SDimitry Andric 17348bcb0991SDimitry Andric // Finally, see if we can rewrite our exit conditions into a loop invariant 17358bcb0991SDimitry Andric // form. If we have a read-only loop, and we can tell that we must exit down 17368bcb0991SDimitry Andric // a path which does not need any of the values computed within the loop, we 17378bcb0991SDimitry Andric // can rewrite the loop to exit on the first iteration. Note that this 17388bcb0991SDimitry Andric // doesn't either a) tell us the loop exits on the first iteration (unless 17398bcb0991SDimitry Andric // *all* exits are predicateable) or b) tell us *which* exit might be taken. 17408bcb0991SDimitry Andric // This transformation looks a lot like a restricted form of dead loop 17418bcb0991SDimitry Andric // elimination, but restricted to read-only loops and without neccesssarily 17428bcb0991SDimitry Andric // needing to kill the loop entirely. 17438bcb0991SDimitry Andric if (!LoopPredication) 17445ffd83dbSDimitry Andric return false; 17458bcb0991SDimitry Andric 17468bcb0991SDimitry Andric // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 17478bcb0991SDimitry Andric // through *explicit* control flow. We have to eliminate the possibility of 17488bcb0991SDimitry Andric // implicit exits (see below) before we know it's truly exact. 17498bcb0991SDimitry Andric const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1750fcaf7f86SDimitry Andric if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 17515ffd83dbSDimitry Andric return false; 17528bcb0991SDimitry Andric 1753349cc55cSDimitry Andric assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1754349cc55cSDimitry Andric assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1755480093f4SDimitry Andric 17568bcb0991SDimitry Andric auto BadExit = [&](BasicBlock *ExitingBB) { 17578bcb0991SDimitry Andric // If our exiting block exits multiple loops, we can only rewrite the 17588bcb0991SDimitry Andric // innermost one. Otherwise, we're changing how many times the innermost 17598bcb0991SDimitry Andric // loop runs before it exits. 17608bcb0991SDimitry Andric if (LI->getLoopFor(ExitingBB) != L) 17618bcb0991SDimitry Andric return true; 17628bcb0991SDimitry Andric 17638bcb0991SDimitry Andric // Can't rewrite non-branch yet. 17648bcb0991SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 17658bcb0991SDimitry Andric if (!BI) 17668bcb0991SDimitry Andric return true; 17678bcb0991SDimitry Andric 17688bcb0991SDimitry Andric // If already constant, nothing to do. 17698bcb0991SDimitry Andric if (isa<Constant>(BI->getCondition())) 17708bcb0991SDimitry Andric return true; 17718bcb0991SDimitry Andric 17728bcb0991SDimitry Andric // If the exit block has phis, we need to be able to compute the values 17738bcb0991SDimitry Andric // within the loop which contains them. This assumes trivially lcssa phis 17748bcb0991SDimitry Andric // have already been removed; TODO: generalize 17758bcb0991SDimitry Andric BasicBlock *ExitBlock = 17768bcb0991SDimitry Andric BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 17778bcb0991SDimitry Andric if (!ExitBlock->phis().empty()) 17788bcb0991SDimitry Andric return true; 17798bcb0991SDimitry Andric 17808bcb0991SDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1781fcaf7f86SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount) || 1782fcaf7f86SDimitry Andric !Rewriter.isSafeToExpand(ExitCount)) 17838bcb0991SDimitry Andric return true; 17848bcb0991SDimitry Andric 1785349cc55cSDimitry Andric assert(SE->isLoopInvariant(ExitCount, L) && 1786349cc55cSDimitry Andric "Exit count must be loop invariant"); 1787349cc55cSDimitry Andric assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 17888bcb0991SDimitry Andric return false; 17898bcb0991SDimitry Andric }; 17908bcb0991SDimitry Andric 17918bcb0991SDimitry Andric // If we have any exits which can't be predicated themselves, than we can't 17928bcb0991SDimitry Andric // predicate any exit which isn't guaranteed to execute before it. Consider 17938bcb0991SDimitry Andric // two exits (a) and (b) which would both exit on the same iteration. If we 17948bcb0991SDimitry Andric // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 17958bcb0991SDimitry Andric // we could convert a loop from exiting through (a) to one exiting through 17968bcb0991SDimitry Andric // (b). Note that this problem exists only for exits with the same exit 17978bcb0991SDimitry Andric // count, and we could be more aggressive when exit counts are known inequal. 17988bcb0991SDimitry Andric llvm::sort(ExitingBlocks, 17998bcb0991SDimitry Andric [&](BasicBlock *A, BasicBlock *B) { 18008bcb0991SDimitry Andric // std::sort sorts in ascending order, so we want the inverse of 18018bcb0991SDimitry Andric // the normal dominance relation, plus a tie breaker for blocks 18028bcb0991SDimitry Andric // unordered by dominance. 18038bcb0991SDimitry Andric if (DT->properlyDominates(A, B)) return true; 18048bcb0991SDimitry Andric if (DT->properlyDominates(B, A)) return false; 18058bcb0991SDimitry Andric return A->getName() < B->getName(); 18068bcb0991SDimitry Andric }); 18078bcb0991SDimitry Andric // Check to see if our exit blocks are a total order (i.e. a linear chain of 18088bcb0991SDimitry Andric // exits before the backedge). If they aren't, reasoning about reachability 18098bcb0991SDimitry Andric // is complicated and we choose not to for now. 18108bcb0991SDimitry Andric for (unsigned i = 1; i < ExitingBlocks.size(); i++) 18118bcb0991SDimitry Andric if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 18125ffd83dbSDimitry Andric return false; 18138bcb0991SDimitry Andric 18148bcb0991SDimitry Andric // Given our sorted total order, we know that exit[j] must be evaluated 18158bcb0991SDimitry Andric // after all exit[i] such j > i. 18168bcb0991SDimitry Andric for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 18178bcb0991SDimitry Andric if (BadExit(ExitingBlocks[i])) { 18188bcb0991SDimitry Andric ExitingBlocks.resize(i); 18198bcb0991SDimitry Andric break; 18208bcb0991SDimitry Andric } 18218bcb0991SDimitry Andric 18228bcb0991SDimitry Andric if (ExitingBlocks.empty()) 18235ffd83dbSDimitry Andric return false; 18248bcb0991SDimitry Andric 18258bcb0991SDimitry Andric // We rely on not being able to reach an exiting block on a later iteration 18268bcb0991SDimitry Andric // then it's statically compute exit count. The implementaton of 18278bcb0991SDimitry Andric // getExitCount currently has this invariant, but assert it here so that 18288bcb0991SDimitry Andric // breakage is obvious if this ever changes.. 18298bcb0991SDimitry Andric assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 18308bcb0991SDimitry Andric return DT->dominates(ExitingBB, L->getLoopLatch()); 18318bcb0991SDimitry Andric })); 18328bcb0991SDimitry Andric 18338bcb0991SDimitry Andric // At this point, ExitingBlocks consists of only those blocks which are 18348bcb0991SDimitry Andric // predicatable. Given that, we know we have at least one exit we can 18358bcb0991SDimitry Andric // predicate if the loop is doesn't have side effects and doesn't have any 18368bcb0991SDimitry Andric // implicit exits (because then our exact BTC isn't actually exact). 18378bcb0991SDimitry Andric // @Reviewers - As structured, this is O(I^2) for loop nests. Any 18388bcb0991SDimitry Andric // suggestions on how to improve this? I can obviously bail out for outer 18398bcb0991SDimitry Andric // loops, but that seems less than ideal. MemorySSA can find memory writes, 18408bcb0991SDimitry Andric // is that enough for *all* side effects? 18418bcb0991SDimitry Andric for (BasicBlock *BB : L->blocks()) 18428bcb0991SDimitry Andric for (auto &I : *BB) 18438bcb0991SDimitry Andric // TODO:isGuaranteedToTransfer 1844fe6060f1SDimitry Andric if (I.mayHaveSideEffects()) 18455ffd83dbSDimitry Andric return false; 18468bcb0991SDimitry Andric 18475ffd83dbSDimitry Andric bool Changed = false; 18488bcb0991SDimitry Andric // Finally, do the actual predication for all predicatable blocks. A couple 18498bcb0991SDimitry Andric // of notes here: 18508bcb0991SDimitry Andric // 1) We don't bother to constant fold dominated exits with identical exit 18518bcb0991SDimitry Andric // counts; that's simply a form of CSE/equality propagation and we leave 18528bcb0991SDimitry Andric // it for dedicated passes. 18538bcb0991SDimitry Andric // 2) We insert the comparison at the branch. Hoisting introduces additional 18548bcb0991SDimitry Andric // legality constraints and we leave that to dedicated logic. We want to 18558bcb0991SDimitry Andric // predicate even if we can't insert a loop invariant expression as 18568bcb0991SDimitry Andric // peeling or unrolling will likely reduce the cost of the otherwise loop 18578bcb0991SDimitry Andric // varying check. 18588bcb0991SDimitry Andric Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 18598bcb0991SDimitry Andric IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1860480093f4SDimitry Andric Value *ExactBTCV = nullptr; // Lazily generated if needed. 18618bcb0991SDimitry Andric for (BasicBlock *ExitingBB : ExitingBlocks) { 18628bcb0991SDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 18638bcb0991SDimitry Andric 18648bcb0991SDimitry Andric auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 18658bcb0991SDimitry Andric Value *NewCond; 18668bcb0991SDimitry Andric if (ExitCount == ExactBTC) { 18678bcb0991SDimitry Andric NewCond = L->contains(BI->getSuccessor(0)) ? 18688bcb0991SDimitry Andric B.getFalse() : B.getTrue(); 18698bcb0991SDimitry Andric } else { 18708bcb0991SDimitry Andric Value *ECV = Rewriter.expandCodeFor(ExitCount); 18718bcb0991SDimitry Andric if (!ExactBTCV) 18728bcb0991SDimitry Andric ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 18738bcb0991SDimitry Andric Value *RHS = ExactBTCV; 18748bcb0991SDimitry Andric if (ECV->getType() != RHS->getType()) { 18758bcb0991SDimitry Andric Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 18768bcb0991SDimitry Andric ECV = B.CreateZExt(ECV, WiderTy); 18778bcb0991SDimitry Andric RHS = B.CreateZExt(RHS, WiderTy); 18788bcb0991SDimitry Andric } 18798bcb0991SDimitry Andric auto Pred = L->contains(BI->getSuccessor(0)) ? 18808bcb0991SDimitry Andric ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 18818bcb0991SDimitry Andric NewCond = B.CreateICmp(Pred, ECV, RHS); 18828bcb0991SDimitry Andric } 18838bcb0991SDimitry Andric Value *OldCond = BI->getCondition(); 18848bcb0991SDimitry Andric BI->setCondition(NewCond); 18858bcb0991SDimitry Andric if (OldCond->use_empty()) 18865ffd83dbSDimitry Andric DeadInsts.emplace_back(OldCond); 18878bcb0991SDimitry Andric Changed = true; 1888*0fca6ea1SDimitry Andric RunUnswitching = true; 18898bcb0991SDimitry Andric } 18908bcb0991SDimitry Andric 18910b57cec5SDimitry Andric return Changed; 18920b57cec5SDimitry Andric } 18930b57cec5SDimitry Andric 18940b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 18950b57cec5SDimitry Andric // IndVarSimplify driver. Manage several subpasses of IV simplification. 18960b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 18970b57cec5SDimitry Andric 18980b57cec5SDimitry Andric bool IndVarSimplify::run(Loop *L) { 18990b57cec5SDimitry Andric // We need (and expect!) the incoming loop to be in LCSSA. 19000b57cec5SDimitry Andric assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 19010b57cec5SDimitry Andric "LCSSA required to run indvars!"); 19020b57cec5SDimitry Andric 19030b57cec5SDimitry Andric // If LoopSimplify form is not available, stay out of trouble. Some notes: 19040b57cec5SDimitry Andric // - LSR currently only supports LoopSimplify-form loops. Indvars' 19050b57cec5SDimitry Andric // canonicalization can be a pessimization without LSR to "clean up" 19060b57cec5SDimitry Andric // afterwards. 19070b57cec5SDimitry Andric // - We depend on having a preheader; in particular, 19080b57cec5SDimitry Andric // Loop::getCanonicalInductionVariable only supports loops with preheaders, 19090b57cec5SDimitry Andric // and we're in trouble if we can't find the induction variable even when 19100b57cec5SDimitry Andric // we've manually inserted one. 19110b57cec5SDimitry Andric // - LFTR relies on having a single backedge. 19120b57cec5SDimitry Andric if (!L->isLoopSimplifyForm()) 19130b57cec5SDimitry Andric return false; 19140b57cec5SDimitry Andric 19155ffd83dbSDimitry Andric bool Changed = false; 1916480093f4SDimitry Andric // If there are any floating-point recurrences, attempt to 1917480093f4SDimitry Andric // transform them to use integer recurrences. 1918480093f4SDimitry Andric Changed |= rewriteNonIntegerIVs(L); 1919480093f4SDimitry Andric 19200b57cec5SDimitry Andric // Create a rewriter object which we'll use to transform the code with. 19210b57cec5SDimitry Andric SCEVExpander Rewriter(*SE, DL, "indvars"); 19220b57cec5SDimitry Andric #ifndef NDEBUG 19230b57cec5SDimitry Andric Rewriter.setDebugType(DEBUG_TYPE); 19240b57cec5SDimitry Andric #endif 19250b57cec5SDimitry Andric 19260b57cec5SDimitry Andric // Eliminate redundant IV users. 19270b57cec5SDimitry Andric // 19280b57cec5SDimitry Andric // Simplification works best when run before other consumers of SCEV. We 19290b57cec5SDimitry Andric // attempt to avoid evaluating SCEVs for sign/zero extend operations until 19300b57cec5SDimitry Andric // other expressions involving loop IVs have been evaluated. This helps SCEV 19310b57cec5SDimitry Andric // set no-wrap flags before normalizing sign/zero extension. 19320b57cec5SDimitry Andric Rewriter.disableCanonicalMode(); 19330b57cec5SDimitry Andric Changed |= simplifyAndExtend(L, Rewriter, LI); 19340b57cec5SDimitry Andric 19358bcb0991SDimitry Andric // Check to see if we can compute the final value of any expressions 19360b57cec5SDimitry Andric // that are recurrent in the loop, and substitute the exit values from the 19378bcb0991SDimitry Andric // loop into any instructions outside of the loop that use the final values 19388bcb0991SDimitry Andric // of the current expressions. 19395ffd83dbSDimitry Andric if (ReplaceExitValue != NeverRepl) { 19405ffd83dbSDimitry Andric if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 19415ffd83dbSDimitry Andric ReplaceExitValue, DeadInsts)) { 19425ffd83dbSDimitry Andric NumReplaced += Rewrites; 19435ffd83dbSDimitry Andric Changed = true; 19445ffd83dbSDimitry Andric } 19455ffd83dbSDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric // Eliminate redundant IV cycles. 1948349cc55cSDimitry Andric NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1949349cc55cSDimitry Andric 1950349cc55cSDimitry Andric // Try to convert exit conditions to unsigned and rotate computation 1951349cc55cSDimitry Andric // out of the loop. Note: Handles invalidation internally if needed. 1952349cc55cSDimitry Andric Changed |= canonicalizeExitCondition(L); 19530b57cec5SDimitry Andric 19548bcb0991SDimitry Andric // Try to eliminate loop exits based on analyzeable exit counts 1955480093f4SDimitry Andric if (optimizeLoopExits(L, Rewriter)) { 1956480093f4SDimitry Andric Changed = true; 1957480093f4SDimitry Andric // Given we've changed exit counts, notify SCEV 1958e8d8bef9SDimitry Andric // Some nested loops may share same folded exit basic block, 1959e8d8bef9SDimitry Andric // thus we need to notify top most loop. 1960e8d8bef9SDimitry Andric SE->forgetTopmostLoop(L); 1961480093f4SDimitry Andric } 19628bcb0991SDimitry Andric 19638bcb0991SDimitry Andric // Try to form loop invariant tests for loop exits by changing how many 19648bcb0991SDimitry Andric // iterations of the loop run when that is unobservable. 1965480093f4SDimitry Andric if (predicateLoopExits(L, Rewriter)) { 1966480093f4SDimitry Andric Changed = true; 1967480093f4SDimitry Andric // Given we've changed exit counts, notify SCEV 1968480093f4SDimitry Andric SE->forgetLoop(L); 1969480093f4SDimitry Andric } 19700b57cec5SDimitry Andric 19710b57cec5SDimitry Andric // If we have a trip count expression, rewrite the loop's exit condition 19720b57cec5SDimitry Andric // using it. 19730b57cec5SDimitry Andric if (!DisableLFTR) { 19745ffd83dbSDimitry Andric BasicBlock *PreHeader = L->getLoopPreheader(); 19755ffd83dbSDimitry Andric 19760b57cec5SDimitry Andric SmallVector<BasicBlock*, 16> ExitingBlocks; 19770b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks); 19780b57cec5SDimitry Andric for (BasicBlock *ExitingBB : ExitingBlocks) { 19790b57cec5SDimitry Andric // Can't rewrite non-branch yet. 19800b57cec5SDimitry Andric if (!isa<BranchInst>(ExitingBB->getTerminator())) 19810b57cec5SDimitry Andric continue; 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andric // If our exitting block exits multiple loops, we can only rewrite the 19840b57cec5SDimitry Andric // innermost one. Otherwise, we're changing how many times the innermost 19850b57cec5SDimitry Andric // loop runs before it exits. 19860b57cec5SDimitry Andric if (LI->getLoopFor(ExitingBB) != L) 19870b57cec5SDimitry Andric continue; 19880b57cec5SDimitry Andric 19890b57cec5SDimitry Andric if (!needsLFTR(L, ExitingBB)) 19900b57cec5SDimitry Andric continue; 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 19930b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount)) 19940b57cec5SDimitry Andric continue; 19950b57cec5SDimitry Andric 19960b57cec5SDimitry Andric // This was handled above, but as we form SCEVs, we can sometimes refine 19970b57cec5SDimitry Andric // existing ones; this allows exit counts to be folded to zero which 19980b57cec5SDimitry Andric // weren't when optimizeLoopExits saw them. Arguably, we should iterate 19990b57cec5SDimitry Andric // until stable to handle cases like this better. 20000b57cec5SDimitry Andric if (ExitCount->isZero()) 20010b57cec5SDimitry Andric continue; 20020b57cec5SDimitry Andric 20030b57cec5SDimitry Andric PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 20040b57cec5SDimitry Andric if (!IndVar) 20050b57cec5SDimitry Andric continue; 20060b57cec5SDimitry Andric 20070b57cec5SDimitry Andric // Avoid high cost expansions. Note: This heuristic is questionable in 20080b57cec5SDimitry Andric // that our definition of "high cost" is not exactly principled. 20095ffd83dbSDimitry Andric if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 20104824e7fdSDimitry Andric TTI, PreHeader->getTerminator())) 20110b57cec5SDimitry Andric continue; 20120b57cec5SDimitry Andric 20135f757f3fSDimitry Andric if (!Rewriter.isSafeToExpand(ExitCount)) 20145f757f3fSDimitry Andric continue; 20155f757f3fSDimitry Andric 20160b57cec5SDimitry Andric Changed |= linearFunctionTestReplace(L, ExitingBB, 20170b57cec5SDimitry Andric ExitCount, IndVar, 20180b57cec5SDimitry Andric Rewriter); 20190b57cec5SDimitry Andric } 20200b57cec5SDimitry Andric } 20210b57cec5SDimitry Andric // Clear the rewriter cache, because values that are in the rewriter's cache 20220b57cec5SDimitry Andric // can be deleted in the loop below, causing the AssertingVH in the cache to 20230b57cec5SDimitry Andric // trigger. 20240b57cec5SDimitry Andric Rewriter.clear(); 20250b57cec5SDimitry Andric 20260b57cec5SDimitry Andric // Now that we're done iterating through lists, clean up any instructions 20270b57cec5SDimitry Andric // which are now dead. 2028e8d8bef9SDimitry Andric while (!DeadInsts.empty()) { 2029e8d8bef9SDimitry Andric Value *V = DeadInsts.pop_back_val(); 2030e8d8bef9SDimitry Andric 2031e8d8bef9SDimitry Andric if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2032e8d8bef9SDimitry Andric Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2033e8d8bef9SDimitry Andric else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 20345ffd83dbSDimitry Andric Changed |= 20355ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2036e8d8bef9SDimitry Andric } 20370b57cec5SDimitry Andric 20380b57cec5SDimitry Andric // The Rewriter may not be used from this point on. 20390b57cec5SDimitry Andric 20400b57cec5SDimitry Andric // Loop-invariant instructions in the preheader that aren't used in the 20410b57cec5SDimitry Andric // loop may be sunk below the loop to reduce register pressure. 20420b57cec5SDimitry Andric Changed |= sinkUnusedInvariants(L); 20430b57cec5SDimitry Andric 20440b57cec5SDimitry Andric // rewriteFirstIterationLoopExitValues does not rely on the computation of 20450b57cec5SDimitry Andric // trip count and therefore can further simplify exit values in addition to 20460b57cec5SDimitry Andric // rewriteLoopExitValues. 20470b57cec5SDimitry Andric Changed |= rewriteFirstIterationLoopExitValues(L); 20480b57cec5SDimitry Andric 20490b57cec5SDimitry Andric // Clean up dead instructions. 20505ffd83dbSDimitry Andric Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 20510b57cec5SDimitry Andric 20520b57cec5SDimitry Andric // Check a post-condition. 20530b57cec5SDimitry Andric assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 20540b57cec5SDimitry Andric "Indvars did not preserve LCSSA!"); 20555ffd83dbSDimitry Andric if (VerifyMemorySSA && MSSAU) 20565ffd83dbSDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 20570b57cec5SDimitry Andric 20580b57cec5SDimitry Andric return Changed; 20590b57cec5SDimitry Andric } 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 20620b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, 20630b57cec5SDimitry Andric LPMUpdater &) { 20640b57cec5SDimitry Andric Function *F = L.getHeader()->getParent(); 2065*0fca6ea1SDimitry Andric const DataLayout &DL = F->getDataLayout(); 20660b57cec5SDimitry Andric 2067e8d8bef9SDimitry Andric IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2068e8d8bef9SDimitry Andric WidenIndVars && AllowIVWidening); 20690b57cec5SDimitry Andric if (!IVS.run(&L)) 20700b57cec5SDimitry Andric return PreservedAnalyses::all(); 20710b57cec5SDimitry Andric 20720b57cec5SDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 20730b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 2074*0fca6ea1SDimitry Andric if (IVS.runUnswitching()) { 2075*0fca6ea1SDimitry Andric AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR); 2076*0fca6ea1SDimitry Andric PA.preserve<ShouldRunExtraSimpleLoopUnswitch>(); 2077*0fca6ea1SDimitry Andric } 2078*0fca6ea1SDimitry Andric 20795ffd83dbSDimitry Andric if (AR.MSSA) 20805ffd83dbSDimitry Andric PA.preserve<MemorySSAAnalysis>(); 20810b57cec5SDimitry Andric return PA; 20820b57cec5SDimitry Andric } 2083