1*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/LoopConstrainer.h" 2*5f757f3fSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 3*5f757f3fSDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 4*5f757f3fSDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 5*5f757f3fSDimitry Andric #include "llvm/IR/Dominators.h" 6*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/Cloning.h" 7*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/LoopSimplify.h" 8*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 9*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 10*5f757f3fSDimitry Andric 11*5f757f3fSDimitry Andric using namespace llvm; 12*5f757f3fSDimitry Andric 13*5f757f3fSDimitry Andric static const char *ClonedLoopTag = "loop_constrainer.loop.clone"; 14*5f757f3fSDimitry Andric 15*5f757f3fSDimitry Andric #define DEBUG_TYPE "loop-constrainer" 16*5f757f3fSDimitry Andric 17*5f757f3fSDimitry Andric /// Given a loop with an deccreasing induction variable, is it possible to 18*5f757f3fSDimitry Andric /// safely calculate the bounds of a new loop using the given Predicate. 19*5f757f3fSDimitry Andric static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, 20*5f757f3fSDimitry Andric const SCEV *Step, ICmpInst::Predicate Pred, 21*5f757f3fSDimitry Andric unsigned LatchBrExitIdx, Loop *L, 22*5f757f3fSDimitry Andric ScalarEvolution &SE) { 23*5f757f3fSDimitry Andric if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && 24*5f757f3fSDimitry Andric Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) 25*5f757f3fSDimitry Andric return false; 26*5f757f3fSDimitry Andric 27*5f757f3fSDimitry Andric if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) 28*5f757f3fSDimitry Andric return false; 29*5f757f3fSDimitry Andric 30*5f757f3fSDimitry Andric assert(SE.isKnownNegative(Step) && "expecting negative step"); 31*5f757f3fSDimitry Andric 32*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "isSafeDecreasingBound with:\n"); 33*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n"); 34*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n"); 35*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n"); 36*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n"); 37*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n"); 38*5f757f3fSDimitry Andric 39*5f757f3fSDimitry Andric bool IsSigned = ICmpInst::isSigned(Pred); 40*5f757f3fSDimitry Andric // The predicate that we need to check that the induction variable lies 41*5f757f3fSDimitry Andric // within bounds. 42*5f757f3fSDimitry Andric ICmpInst::Predicate BoundPred = 43*5f757f3fSDimitry Andric IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; 44*5f757f3fSDimitry Andric 45*5f757f3fSDimitry Andric if (LatchBrExitIdx == 1) 46*5f757f3fSDimitry Andric return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); 47*5f757f3fSDimitry Andric 48*5f757f3fSDimitry Andric assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); 49*5f757f3fSDimitry Andric 50*5f757f3fSDimitry Andric const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); 51*5f757f3fSDimitry Andric unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); 52*5f757f3fSDimitry Andric APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) 53*5f757f3fSDimitry Andric : APInt::getMinValue(BitWidth); 54*5f757f3fSDimitry Andric const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne); 55*5f757f3fSDimitry Andric 56*5f757f3fSDimitry Andric const SCEV *MinusOne = 57*5f757f3fSDimitry Andric SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType())); 58*5f757f3fSDimitry Andric 59*5f757f3fSDimitry Andric return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) && 60*5f757f3fSDimitry Andric SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit); 61*5f757f3fSDimitry Andric } 62*5f757f3fSDimitry Andric 63*5f757f3fSDimitry Andric /// Given a loop with an increasing induction variable, is it possible to 64*5f757f3fSDimitry Andric /// safely calculate the bounds of a new loop using the given Predicate. 65*5f757f3fSDimitry Andric static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, 66*5f757f3fSDimitry Andric const SCEV *Step, ICmpInst::Predicate Pred, 67*5f757f3fSDimitry Andric unsigned LatchBrExitIdx, Loop *L, 68*5f757f3fSDimitry Andric ScalarEvolution &SE) { 69*5f757f3fSDimitry Andric if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && 70*5f757f3fSDimitry Andric Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) 71*5f757f3fSDimitry Andric return false; 72*5f757f3fSDimitry Andric 73*5f757f3fSDimitry Andric if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) 74*5f757f3fSDimitry Andric return false; 75*5f757f3fSDimitry Andric 76*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "isSafeIncreasingBound with:\n"); 77*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Start: " << *Start << "\n"); 78*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Step: " << *Step << "\n"); 79*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "BoundSCEV: " << *BoundSCEV << "\n"); 80*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Pred: " << Pred << "\n"); 81*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LatchExitBrIdx: " << LatchBrExitIdx << "\n"); 82*5f757f3fSDimitry Andric 83*5f757f3fSDimitry Andric bool IsSigned = ICmpInst::isSigned(Pred); 84*5f757f3fSDimitry Andric // The predicate that we need to check that the induction variable lies 85*5f757f3fSDimitry Andric // within bounds. 86*5f757f3fSDimitry Andric ICmpInst::Predicate BoundPred = 87*5f757f3fSDimitry Andric IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; 88*5f757f3fSDimitry Andric 89*5f757f3fSDimitry Andric if (LatchBrExitIdx == 1) 90*5f757f3fSDimitry Andric return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); 91*5f757f3fSDimitry Andric 92*5f757f3fSDimitry Andric assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); 93*5f757f3fSDimitry Andric 94*5f757f3fSDimitry Andric const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); 95*5f757f3fSDimitry Andric unsigned BitWidth = cast<IntegerType>(BoundSCEV->getType())->getBitWidth(); 96*5f757f3fSDimitry Andric APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) 97*5f757f3fSDimitry Andric : APInt::getMaxValue(BitWidth); 98*5f757f3fSDimitry Andric const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); 99*5f757f3fSDimitry Andric 100*5f757f3fSDimitry Andric return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, 101*5f757f3fSDimitry Andric SE.getAddExpr(BoundSCEV, Step)) && 102*5f757f3fSDimitry Andric SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit)); 103*5f757f3fSDimitry Andric } 104*5f757f3fSDimitry Andric 105*5f757f3fSDimitry Andric /// Returns estimate for max latch taken count of the loop of the narrowest 106*5f757f3fSDimitry Andric /// available type. If the latch block has such estimate, it is returned. 107*5f757f3fSDimitry Andric /// Otherwise, we use max exit count of whole loop (that is potentially of wider 108*5f757f3fSDimitry Andric /// type than latch check itself), which is still better than no estimate. 109*5f757f3fSDimitry Andric static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE, 110*5f757f3fSDimitry Andric const Loop &L) { 111*5f757f3fSDimitry Andric const SCEV *FromBlock = 112*5f757f3fSDimitry Andric SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum); 113*5f757f3fSDimitry Andric if (isa<SCEVCouldNotCompute>(FromBlock)) 114*5f757f3fSDimitry Andric return SE.getSymbolicMaxBackedgeTakenCount(&L); 115*5f757f3fSDimitry Andric return FromBlock; 116*5f757f3fSDimitry Andric } 117*5f757f3fSDimitry Andric 118*5f757f3fSDimitry Andric std::optional<LoopStructure> 119*5f757f3fSDimitry Andric LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L, 120*5f757f3fSDimitry Andric bool AllowUnsignedLatchCond, 121*5f757f3fSDimitry Andric const char *&FailureReason) { 122*5f757f3fSDimitry Andric if (!L.isLoopSimplifyForm()) { 123*5f757f3fSDimitry Andric FailureReason = "loop not in LoopSimplify form"; 124*5f757f3fSDimitry Andric return std::nullopt; 125*5f757f3fSDimitry Andric } 126*5f757f3fSDimitry Andric 127*5f757f3fSDimitry Andric BasicBlock *Latch = L.getLoopLatch(); 128*5f757f3fSDimitry Andric assert(Latch && "Simplified loops only have one latch!"); 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) { 131*5f757f3fSDimitry Andric FailureReason = "loop has already been cloned"; 132*5f757f3fSDimitry Andric return std::nullopt; 133*5f757f3fSDimitry Andric } 134*5f757f3fSDimitry Andric 135*5f757f3fSDimitry Andric if (!L.isLoopExiting(Latch)) { 136*5f757f3fSDimitry Andric FailureReason = "no loop latch"; 137*5f757f3fSDimitry Andric return std::nullopt; 138*5f757f3fSDimitry Andric } 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric BasicBlock *Header = L.getHeader(); 141*5f757f3fSDimitry Andric BasicBlock *Preheader = L.getLoopPreheader(); 142*5f757f3fSDimitry Andric if (!Preheader) { 143*5f757f3fSDimitry Andric FailureReason = "no preheader"; 144*5f757f3fSDimitry Andric return std::nullopt; 145*5f757f3fSDimitry Andric } 146*5f757f3fSDimitry Andric 147*5f757f3fSDimitry Andric BranchInst *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 148*5f757f3fSDimitry Andric if (!LatchBr || LatchBr->isUnconditional()) { 149*5f757f3fSDimitry Andric FailureReason = "latch terminator not conditional branch"; 150*5f757f3fSDimitry Andric return std::nullopt; 151*5f757f3fSDimitry Andric } 152*5f757f3fSDimitry Andric 153*5f757f3fSDimitry Andric unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0; 154*5f757f3fSDimitry Andric 155*5f757f3fSDimitry Andric ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition()); 156*5f757f3fSDimitry Andric if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) { 157*5f757f3fSDimitry Andric FailureReason = "latch terminator branch not conditional on integral icmp"; 158*5f757f3fSDimitry Andric return std::nullopt; 159*5f757f3fSDimitry Andric } 160*5f757f3fSDimitry Andric 161*5f757f3fSDimitry Andric const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L); 162*5f757f3fSDimitry Andric if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) { 163*5f757f3fSDimitry Andric FailureReason = "could not compute latch count"; 164*5f757f3fSDimitry Andric return std::nullopt; 165*5f757f3fSDimitry Andric } 166*5f757f3fSDimitry Andric assert(SE.getLoopDisposition(MaxBETakenCount, &L) == 167*5f757f3fSDimitry Andric ScalarEvolution::LoopInvariant && 168*5f757f3fSDimitry Andric "loop variant exit count doesn't make sense!"); 169*5f757f3fSDimitry Andric 170*5f757f3fSDimitry Andric ICmpInst::Predicate Pred = ICI->getPredicate(); 171*5f757f3fSDimitry Andric Value *LeftValue = ICI->getOperand(0); 172*5f757f3fSDimitry Andric const SCEV *LeftSCEV = SE.getSCEV(LeftValue); 173*5f757f3fSDimitry Andric IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType()); 174*5f757f3fSDimitry Andric 175*5f757f3fSDimitry Andric Value *RightValue = ICI->getOperand(1); 176*5f757f3fSDimitry Andric const SCEV *RightSCEV = SE.getSCEV(RightValue); 177*5f757f3fSDimitry Andric 178*5f757f3fSDimitry Andric // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence. 179*5f757f3fSDimitry Andric if (!isa<SCEVAddRecExpr>(LeftSCEV)) { 180*5f757f3fSDimitry Andric if (isa<SCEVAddRecExpr>(RightSCEV)) { 181*5f757f3fSDimitry Andric std::swap(LeftSCEV, RightSCEV); 182*5f757f3fSDimitry Andric std::swap(LeftValue, RightValue); 183*5f757f3fSDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred); 184*5f757f3fSDimitry Andric } else { 185*5f757f3fSDimitry Andric FailureReason = "no add recurrences in the icmp"; 186*5f757f3fSDimitry Andric return std::nullopt; 187*5f757f3fSDimitry Andric } 188*5f757f3fSDimitry Andric } 189*5f757f3fSDimitry Andric 190*5f757f3fSDimitry Andric auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { 191*5f757f3fSDimitry Andric if (AR->getNoWrapFlags(SCEV::FlagNSW)) 192*5f757f3fSDimitry Andric return true; 193*5f757f3fSDimitry Andric 194*5f757f3fSDimitry Andric IntegerType *Ty = cast<IntegerType>(AR->getType()); 195*5f757f3fSDimitry Andric IntegerType *WideTy = 196*5f757f3fSDimitry Andric IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); 197*5f757f3fSDimitry Andric 198*5f757f3fSDimitry Andric const SCEVAddRecExpr *ExtendAfterOp = 199*5f757f3fSDimitry Andric dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); 200*5f757f3fSDimitry Andric if (ExtendAfterOp) { 201*5f757f3fSDimitry Andric const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); 202*5f757f3fSDimitry Andric const SCEV *ExtendedStep = 203*5f757f3fSDimitry Andric SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); 204*5f757f3fSDimitry Andric 205*5f757f3fSDimitry Andric bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && 206*5f757f3fSDimitry Andric ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; 207*5f757f3fSDimitry Andric 208*5f757f3fSDimitry Andric if (NoSignedWrap) 209*5f757f3fSDimitry Andric return true; 210*5f757f3fSDimitry Andric } 211*5f757f3fSDimitry Andric 212*5f757f3fSDimitry Andric // We may have proved this when computing the sign extension above. 213*5f757f3fSDimitry Andric return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; 214*5f757f3fSDimitry Andric }; 215*5f757f3fSDimitry Andric 216*5f757f3fSDimitry Andric // `ICI` is interpreted as taking the backedge if the *next* value of the 217*5f757f3fSDimitry Andric // induction variable satisfies some constraint. 218*5f757f3fSDimitry Andric 219*5f757f3fSDimitry Andric const SCEVAddRecExpr *IndVarBase = cast<SCEVAddRecExpr>(LeftSCEV); 220*5f757f3fSDimitry Andric if (IndVarBase->getLoop() != &L) { 221*5f757f3fSDimitry Andric FailureReason = "LHS in cmp is not an AddRec for this loop"; 222*5f757f3fSDimitry Andric return std::nullopt; 223*5f757f3fSDimitry Andric } 224*5f757f3fSDimitry Andric if (!IndVarBase->isAffine()) { 225*5f757f3fSDimitry Andric FailureReason = "LHS in icmp not induction variable"; 226*5f757f3fSDimitry Andric return std::nullopt; 227*5f757f3fSDimitry Andric } 228*5f757f3fSDimitry Andric const SCEV *StepRec = IndVarBase->getStepRecurrence(SE); 229*5f757f3fSDimitry Andric if (!isa<SCEVConstant>(StepRec)) { 230*5f757f3fSDimitry Andric FailureReason = "LHS in icmp not induction variable"; 231*5f757f3fSDimitry Andric return std::nullopt; 232*5f757f3fSDimitry Andric } 233*5f757f3fSDimitry Andric ConstantInt *StepCI = cast<SCEVConstant>(StepRec)->getValue(); 234*5f757f3fSDimitry Andric 235*5f757f3fSDimitry Andric if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) { 236*5f757f3fSDimitry Andric FailureReason = "LHS in icmp needs nsw for equality predicates"; 237*5f757f3fSDimitry Andric return std::nullopt; 238*5f757f3fSDimitry Andric } 239*5f757f3fSDimitry Andric 240*5f757f3fSDimitry Andric assert(!StepCI->isZero() && "Zero step?"); 241*5f757f3fSDimitry Andric bool IsIncreasing = !StepCI->isNegative(); 242*5f757f3fSDimitry Andric bool IsSignedPredicate; 243*5f757f3fSDimitry Andric const SCEV *StartNext = IndVarBase->getStart(); 244*5f757f3fSDimitry Andric const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); 245*5f757f3fSDimitry Andric const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); 246*5f757f3fSDimitry Andric const SCEV *Step = SE.getSCEV(StepCI); 247*5f757f3fSDimitry Andric 248*5f757f3fSDimitry Andric const SCEV *FixedRightSCEV = nullptr; 249*5f757f3fSDimitry Andric 250*5f757f3fSDimitry Andric // If RightValue resides within loop (but still being loop invariant), 251*5f757f3fSDimitry Andric // regenerate it as preheader. 252*5f757f3fSDimitry Andric if (auto *I = dyn_cast<Instruction>(RightValue)) 253*5f757f3fSDimitry Andric if (L.contains(I->getParent())) 254*5f757f3fSDimitry Andric FixedRightSCEV = RightSCEV; 255*5f757f3fSDimitry Andric 256*5f757f3fSDimitry Andric if (IsIncreasing) { 257*5f757f3fSDimitry Andric bool DecreasedRightValueByOne = false; 258*5f757f3fSDimitry Andric if (StepCI->isOne()) { 259*5f757f3fSDimitry Andric // Try to turn eq/ne predicates to those we can work with. 260*5f757f3fSDimitry Andric if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) 261*5f757f3fSDimitry Andric // while (++i != len) { while (++i < len) { 262*5f757f3fSDimitry Andric // ... ---> ... 263*5f757f3fSDimitry Andric // } } 264*5f757f3fSDimitry Andric // If both parts are known non-negative, it is profitable to use 265*5f757f3fSDimitry Andric // unsigned comparison in increasing loop. This allows us to make the 266*5f757f3fSDimitry Andric // comparison check against "RightSCEV + 1" more optimistic. 267*5f757f3fSDimitry Andric if (isKnownNonNegativeInLoop(IndVarStart, &L, SE) && 268*5f757f3fSDimitry Andric isKnownNonNegativeInLoop(RightSCEV, &L, SE)) 269*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_ULT; 270*5f757f3fSDimitry Andric else 271*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_SLT; 272*5f757f3fSDimitry Andric else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { 273*5f757f3fSDimitry Andric // while (true) { while (true) { 274*5f757f3fSDimitry Andric // if (++i == len) ---> if (++i > len - 1) 275*5f757f3fSDimitry Andric // break; break; 276*5f757f3fSDimitry Andric // ... ... 277*5f757f3fSDimitry Andric // } } 278*5f757f3fSDimitry Andric if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && 279*5f757f3fSDimitry Andric cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ false)) { 280*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_UGT; 281*5f757f3fSDimitry Andric RightSCEV = 282*5f757f3fSDimitry Andric SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); 283*5f757f3fSDimitry Andric DecreasedRightValueByOne = true; 284*5f757f3fSDimitry Andric } else if (cannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/ true)) { 285*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_SGT; 286*5f757f3fSDimitry Andric RightSCEV = 287*5f757f3fSDimitry Andric SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); 288*5f757f3fSDimitry Andric DecreasedRightValueByOne = true; 289*5f757f3fSDimitry Andric } 290*5f757f3fSDimitry Andric } 291*5f757f3fSDimitry Andric } 292*5f757f3fSDimitry Andric 293*5f757f3fSDimitry Andric bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); 294*5f757f3fSDimitry Andric bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); 295*5f757f3fSDimitry Andric bool FoundExpectedPred = 296*5f757f3fSDimitry Andric (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0); 297*5f757f3fSDimitry Andric 298*5f757f3fSDimitry Andric if (!FoundExpectedPred) { 299*5f757f3fSDimitry Andric FailureReason = "expected icmp slt semantically, found something else"; 300*5f757f3fSDimitry Andric return std::nullopt; 301*5f757f3fSDimitry Andric } 302*5f757f3fSDimitry Andric 303*5f757f3fSDimitry Andric IsSignedPredicate = ICmpInst::isSigned(Pred); 304*5f757f3fSDimitry Andric if (!IsSignedPredicate && !AllowUnsignedLatchCond) { 305*5f757f3fSDimitry Andric FailureReason = "unsigned latch conditions are explicitly prohibited"; 306*5f757f3fSDimitry Andric return std::nullopt; 307*5f757f3fSDimitry Andric } 308*5f757f3fSDimitry Andric 309*5f757f3fSDimitry Andric if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred, 310*5f757f3fSDimitry Andric LatchBrExitIdx, &L, SE)) { 311*5f757f3fSDimitry Andric FailureReason = "Unsafe loop bounds"; 312*5f757f3fSDimitry Andric return std::nullopt; 313*5f757f3fSDimitry Andric } 314*5f757f3fSDimitry Andric if (LatchBrExitIdx == 0) { 315*5f757f3fSDimitry Andric // We need to increase the right value unless we have already decreased 316*5f757f3fSDimitry Andric // it virtually when we replaced EQ with SGT. 317*5f757f3fSDimitry Andric if (!DecreasedRightValueByOne) 318*5f757f3fSDimitry Andric FixedRightSCEV = 319*5f757f3fSDimitry Andric SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); 320*5f757f3fSDimitry Andric } else { 321*5f757f3fSDimitry Andric assert(!DecreasedRightValueByOne && 322*5f757f3fSDimitry Andric "Right value can be decreased only for LatchBrExitIdx == 0!"); 323*5f757f3fSDimitry Andric } 324*5f757f3fSDimitry Andric } else { 325*5f757f3fSDimitry Andric bool IncreasedRightValueByOne = false; 326*5f757f3fSDimitry Andric if (StepCI->isMinusOne()) { 327*5f757f3fSDimitry Andric // Try to turn eq/ne predicates to those we can work with. 328*5f757f3fSDimitry Andric if (Pred == ICmpInst::ICMP_NE && LatchBrExitIdx == 1) 329*5f757f3fSDimitry Andric // while (--i != len) { while (--i > len) { 330*5f757f3fSDimitry Andric // ... ---> ... 331*5f757f3fSDimitry Andric // } } 332*5f757f3fSDimitry Andric // We intentionally don't turn the predicate into UGT even if we know 333*5f757f3fSDimitry Andric // that both operands are non-negative, because it will only pessimize 334*5f757f3fSDimitry Andric // our check against "RightSCEV - 1". 335*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_SGT; 336*5f757f3fSDimitry Andric else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { 337*5f757f3fSDimitry Andric // while (true) { while (true) { 338*5f757f3fSDimitry Andric // if (--i == len) ---> if (--i < len + 1) 339*5f757f3fSDimitry Andric // break; break; 340*5f757f3fSDimitry Andric // ... ... 341*5f757f3fSDimitry Andric // } } 342*5f757f3fSDimitry Andric if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && 343*5f757f3fSDimitry Andric cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) { 344*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_ULT; 345*5f757f3fSDimitry Andric RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); 346*5f757f3fSDimitry Andric IncreasedRightValueByOne = true; 347*5f757f3fSDimitry Andric } else if (cannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) { 348*5f757f3fSDimitry Andric Pred = ICmpInst::ICMP_SLT; 349*5f757f3fSDimitry Andric RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); 350*5f757f3fSDimitry Andric IncreasedRightValueByOne = true; 351*5f757f3fSDimitry Andric } 352*5f757f3fSDimitry Andric } 353*5f757f3fSDimitry Andric } 354*5f757f3fSDimitry Andric 355*5f757f3fSDimitry Andric bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); 356*5f757f3fSDimitry Andric bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); 357*5f757f3fSDimitry Andric 358*5f757f3fSDimitry Andric bool FoundExpectedPred = 359*5f757f3fSDimitry Andric (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0); 360*5f757f3fSDimitry Andric 361*5f757f3fSDimitry Andric if (!FoundExpectedPred) { 362*5f757f3fSDimitry Andric FailureReason = "expected icmp sgt semantically, found something else"; 363*5f757f3fSDimitry Andric return std::nullopt; 364*5f757f3fSDimitry Andric } 365*5f757f3fSDimitry Andric 366*5f757f3fSDimitry Andric IsSignedPredicate = 367*5f757f3fSDimitry Andric Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; 368*5f757f3fSDimitry Andric 369*5f757f3fSDimitry Andric if (!IsSignedPredicate && !AllowUnsignedLatchCond) { 370*5f757f3fSDimitry Andric FailureReason = "unsigned latch conditions are explicitly prohibited"; 371*5f757f3fSDimitry Andric return std::nullopt; 372*5f757f3fSDimitry Andric } 373*5f757f3fSDimitry Andric 374*5f757f3fSDimitry Andric if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred, 375*5f757f3fSDimitry Andric LatchBrExitIdx, &L, SE)) { 376*5f757f3fSDimitry Andric FailureReason = "Unsafe bounds"; 377*5f757f3fSDimitry Andric return std::nullopt; 378*5f757f3fSDimitry Andric } 379*5f757f3fSDimitry Andric 380*5f757f3fSDimitry Andric if (LatchBrExitIdx == 0) { 381*5f757f3fSDimitry Andric // We need to decrease the right value unless we have already increased 382*5f757f3fSDimitry Andric // it virtually when we replaced EQ with SLT. 383*5f757f3fSDimitry Andric if (!IncreasedRightValueByOne) 384*5f757f3fSDimitry Andric FixedRightSCEV = 385*5f757f3fSDimitry Andric SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); 386*5f757f3fSDimitry Andric } else { 387*5f757f3fSDimitry Andric assert(!IncreasedRightValueByOne && 388*5f757f3fSDimitry Andric "Right value can be increased only for LatchBrExitIdx == 0!"); 389*5f757f3fSDimitry Andric } 390*5f757f3fSDimitry Andric } 391*5f757f3fSDimitry Andric BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); 392*5f757f3fSDimitry Andric 393*5f757f3fSDimitry Andric assert(!L.contains(LatchExit) && "expected an exit block!"); 394*5f757f3fSDimitry Andric const DataLayout &DL = Preheader->getModule()->getDataLayout(); 395*5f757f3fSDimitry Andric SCEVExpander Expander(SE, DL, "loop-constrainer"); 396*5f757f3fSDimitry Andric Instruction *Ins = Preheader->getTerminator(); 397*5f757f3fSDimitry Andric 398*5f757f3fSDimitry Andric if (FixedRightSCEV) 399*5f757f3fSDimitry Andric RightValue = 400*5f757f3fSDimitry Andric Expander.expandCodeFor(FixedRightSCEV, FixedRightSCEV->getType(), Ins); 401*5f757f3fSDimitry Andric 402*5f757f3fSDimitry Andric Value *IndVarStartV = Expander.expandCodeFor(IndVarStart, IndVarTy, Ins); 403*5f757f3fSDimitry Andric IndVarStartV->setName("indvar.start"); 404*5f757f3fSDimitry Andric 405*5f757f3fSDimitry Andric LoopStructure Result; 406*5f757f3fSDimitry Andric 407*5f757f3fSDimitry Andric Result.Tag = "main"; 408*5f757f3fSDimitry Andric Result.Header = Header; 409*5f757f3fSDimitry Andric Result.Latch = Latch; 410*5f757f3fSDimitry Andric Result.LatchBr = LatchBr; 411*5f757f3fSDimitry Andric Result.LatchExit = LatchExit; 412*5f757f3fSDimitry Andric Result.LatchBrExitIdx = LatchBrExitIdx; 413*5f757f3fSDimitry Andric Result.IndVarStart = IndVarStartV; 414*5f757f3fSDimitry Andric Result.IndVarStep = StepCI; 415*5f757f3fSDimitry Andric Result.IndVarBase = LeftValue; 416*5f757f3fSDimitry Andric Result.IndVarIncreasing = IsIncreasing; 417*5f757f3fSDimitry Andric Result.LoopExitAt = RightValue; 418*5f757f3fSDimitry Andric Result.IsSignedPredicate = IsSignedPredicate; 419*5f757f3fSDimitry Andric Result.ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType()); 420*5f757f3fSDimitry Andric 421*5f757f3fSDimitry Andric FailureReason = nullptr; 422*5f757f3fSDimitry Andric 423*5f757f3fSDimitry Andric return Result; 424*5f757f3fSDimitry Andric } 425*5f757f3fSDimitry Andric 426*5f757f3fSDimitry Andric // Add metadata to the loop L to disable loop optimizations. Callers need to 427*5f757f3fSDimitry Andric // confirm that optimizing loop L is not beneficial. 428*5f757f3fSDimitry Andric static void DisableAllLoopOptsOnLoop(Loop &L) { 429*5f757f3fSDimitry Andric // We do not care about any existing loopID related metadata for L, since we 430*5f757f3fSDimitry Andric // are setting all loop metadata to false. 431*5f757f3fSDimitry Andric LLVMContext &Context = L.getHeader()->getContext(); 432*5f757f3fSDimitry Andric // Reserve first location for self reference to the LoopID metadata node. 433*5f757f3fSDimitry Andric MDNode *Dummy = MDNode::get(Context, {}); 434*5f757f3fSDimitry Andric MDNode *DisableUnroll = MDNode::get( 435*5f757f3fSDimitry Andric Context, {MDString::get(Context, "llvm.loop.unroll.disable")}); 436*5f757f3fSDimitry Andric Metadata *FalseVal = 437*5f757f3fSDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0)); 438*5f757f3fSDimitry Andric MDNode *DisableVectorize = MDNode::get( 439*5f757f3fSDimitry Andric Context, 440*5f757f3fSDimitry Andric {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal}); 441*5f757f3fSDimitry Andric MDNode *DisableLICMVersioning = MDNode::get( 442*5f757f3fSDimitry Andric Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")}); 443*5f757f3fSDimitry Andric MDNode *DisableDistribution = MDNode::get( 444*5f757f3fSDimitry Andric Context, 445*5f757f3fSDimitry Andric {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal}); 446*5f757f3fSDimitry Andric MDNode *NewLoopID = 447*5f757f3fSDimitry Andric MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize, 448*5f757f3fSDimitry Andric DisableLICMVersioning, DisableDistribution}); 449*5f757f3fSDimitry Andric // Set operand 0 to refer to the loop id itself. 450*5f757f3fSDimitry Andric NewLoopID->replaceOperandWith(0, NewLoopID); 451*5f757f3fSDimitry Andric L.setLoopID(NewLoopID); 452*5f757f3fSDimitry Andric } 453*5f757f3fSDimitry Andric 454*5f757f3fSDimitry Andric LoopConstrainer::LoopConstrainer(Loop &L, LoopInfo &LI, 455*5f757f3fSDimitry Andric function_ref<void(Loop *, bool)> LPMAddNewLoop, 456*5f757f3fSDimitry Andric const LoopStructure &LS, ScalarEvolution &SE, 457*5f757f3fSDimitry Andric DominatorTree &DT, Type *T, SubRanges SR) 458*5f757f3fSDimitry Andric : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), 459*5f757f3fSDimitry Andric DT(DT), LI(LI), LPMAddNewLoop(LPMAddNewLoop), OriginalLoop(L), RangeTy(T), 460*5f757f3fSDimitry Andric MainLoopStructure(LS), SR(SR) {} 461*5f757f3fSDimitry Andric 462*5f757f3fSDimitry Andric void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, 463*5f757f3fSDimitry Andric const char *Tag) const { 464*5f757f3fSDimitry Andric for (BasicBlock *BB : OriginalLoop.getBlocks()) { 465*5f757f3fSDimitry Andric BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); 466*5f757f3fSDimitry Andric Result.Blocks.push_back(Clone); 467*5f757f3fSDimitry Andric Result.Map[BB] = Clone; 468*5f757f3fSDimitry Andric } 469*5f757f3fSDimitry Andric 470*5f757f3fSDimitry Andric auto GetClonedValue = [&Result](Value *V) { 471*5f757f3fSDimitry Andric assert(V && "null values not in domain!"); 472*5f757f3fSDimitry Andric auto It = Result.Map.find(V); 473*5f757f3fSDimitry Andric if (It == Result.Map.end()) 474*5f757f3fSDimitry Andric return V; 475*5f757f3fSDimitry Andric return static_cast<Value *>(It->second); 476*5f757f3fSDimitry Andric }; 477*5f757f3fSDimitry Andric 478*5f757f3fSDimitry Andric auto *ClonedLatch = 479*5f757f3fSDimitry Andric cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch())); 480*5f757f3fSDimitry Andric ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag, 481*5f757f3fSDimitry Andric MDNode::get(Ctx, {})); 482*5f757f3fSDimitry Andric 483*5f757f3fSDimitry Andric Result.Structure = MainLoopStructure.map(GetClonedValue); 484*5f757f3fSDimitry Andric Result.Structure.Tag = Tag; 485*5f757f3fSDimitry Andric 486*5f757f3fSDimitry Andric for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { 487*5f757f3fSDimitry Andric BasicBlock *ClonedBB = Result.Blocks[i]; 488*5f757f3fSDimitry Andric BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; 489*5f757f3fSDimitry Andric 490*5f757f3fSDimitry Andric assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); 491*5f757f3fSDimitry Andric 492*5f757f3fSDimitry Andric for (Instruction &I : *ClonedBB) 493*5f757f3fSDimitry Andric RemapInstruction(&I, Result.Map, 494*5f757f3fSDimitry Andric RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 495*5f757f3fSDimitry Andric 496*5f757f3fSDimitry Andric // Exit blocks will now have one more predecessor and their PHI nodes need 497*5f757f3fSDimitry Andric // to be edited to reflect that. No phi nodes need to be introduced because 498*5f757f3fSDimitry Andric // the loop is in LCSSA. 499*5f757f3fSDimitry Andric 500*5f757f3fSDimitry Andric for (auto *SBB : successors(OriginalBB)) { 501*5f757f3fSDimitry Andric if (OriginalLoop.contains(SBB)) 502*5f757f3fSDimitry Andric continue; // not an exit block 503*5f757f3fSDimitry Andric 504*5f757f3fSDimitry Andric for (PHINode &PN : SBB->phis()) { 505*5f757f3fSDimitry Andric Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB); 506*5f757f3fSDimitry Andric PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB); 507*5f757f3fSDimitry Andric SE.forgetValue(&PN); 508*5f757f3fSDimitry Andric } 509*5f757f3fSDimitry Andric } 510*5f757f3fSDimitry Andric } 511*5f757f3fSDimitry Andric } 512*5f757f3fSDimitry Andric 513*5f757f3fSDimitry Andric LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( 514*5f757f3fSDimitry Andric const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt, 515*5f757f3fSDimitry Andric BasicBlock *ContinuationBlock) const { 516*5f757f3fSDimitry Andric // We start with a loop with a single latch: 517*5f757f3fSDimitry Andric // 518*5f757f3fSDimitry Andric // +--------------------+ 519*5f757f3fSDimitry Andric // | | 520*5f757f3fSDimitry Andric // | preheader | 521*5f757f3fSDimitry Andric // | | 522*5f757f3fSDimitry Andric // +--------+-----------+ 523*5f757f3fSDimitry Andric // | ----------------\ 524*5f757f3fSDimitry Andric // | / | 525*5f757f3fSDimitry Andric // +--------v----v------+ | 526*5f757f3fSDimitry Andric // | | | 527*5f757f3fSDimitry Andric // | header | | 528*5f757f3fSDimitry Andric // | | | 529*5f757f3fSDimitry Andric // +--------------------+ | 530*5f757f3fSDimitry Andric // | 531*5f757f3fSDimitry Andric // ..... | 532*5f757f3fSDimitry Andric // | 533*5f757f3fSDimitry Andric // +--------------------+ | 534*5f757f3fSDimitry Andric // | | | 535*5f757f3fSDimitry Andric // | latch >----------/ 536*5f757f3fSDimitry Andric // | | 537*5f757f3fSDimitry Andric // +-------v------------+ 538*5f757f3fSDimitry Andric // | 539*5f757f3fSDimitry Andric // | 540*5f757f3fSDimitry Andric // | +--------------------+ 541*5f757f3fSDimitry Andric // | | | 542*5f757f3fSDimitry Andric // +---> original exit | 543*5f757f3fSDimitry Andric // | | 544*5f757f3fSDimitry Andric // +--------------------+ 545*5f757f3fSDimitry Andric // 546*5f757f3fSDimitry Andric // We change the control flow to look like 547*5f757f3fSDimitry Andric // 548*5f757f3fSDimitry Andric // 549*5f757f3fSDimitry Andric // +--------------------+ 550*5f757f3fSDimitry Andric // | | 551*5f757f3fSDimitry Andric // | preheader >-------------------------+ 552*5f757f3fSDimitry Andric // | | | 553*5f757f3fSDimitry Andric // +--------v-----------+ | 554*5f757f3fSDimitry Andric // | /-------------+ | 555*5f757f3fSDimitry Andric // | / | | 556*5f757f3fSDimitry Andric // +--------v--v--------+ | | 557*5f757f3fSDimitry Andric // | | | | 558*5f757f3fSDimitry Andric // | header | | +--------+ | 559*5f757f3fSDimitry Andric // | | | | | | 560*5f757f3fSDimitry Andric // +--------------------+ | | +-----v-----v-----------+ 561*5f757f3fSDimitry Andric // | | | | 562*5f757f3fSDimitry Andric // | | | .pseudo.exit | 563*5f757f3fSDimitry Andric // | | | | 564*5f757f3fSDimitry Andric // | | +-----------v-----------+ 565*5f757f3fSDimitry Andric // | | | 566*5f757f3fSDimitry Andric // ..... | | | 567*5f757f3fSDimitry Andric // | | +--------v-------------+ 568*5f757f3fSDimitry Andric // +--------------------+ | | | | 569*5f757f3fSDimitry Andric // | | | | | ContinuationBlock | 570*5f757f3fSDimitry Andric // | latch >------+ | | | 571*5f757f3fSDimitry Andric // | | | +----------------------+ 572*5f757f3fSDimitry Andric // +---------v----------+ | 573*5f757f3fSDimitry Andric // | | 574*5f757f3fSDimitry Andric // | | 575*5f757f3fSDimitry Andric // | +---------------^-----+ 576*5f757f3fSDimitry Andric // | | | 577*5f757f3fSDimitry Andric // +-----> .exit.selector | 578*5f757f3fSDimitry Andric // | | 579*5f757f3fSDimitry Andric // +----------v----------+ 580*5f757f3fSDimitry Andric // | 581*5f757f3fSDimitry Andric // +--------------------+ | 582*5f757f3fSDimitry Andric // | | | 583*5f757f3fSDimitry Andric // | original exit <----+ 584*5f757f3fSDimitry Andric // | | 585*5f757f3fSDimitry Andric // +--------------------+ 586*5f757f3fSDimitry Andric 587*5f757f3fSDimitry Andric RewrittenRangeInfo RRI; 588*5f757f3fSDimitry Andric 589*5f757f3fSDimitry Andric BasicBlock *BBInsertLocation = LS.Latch->getNextNode(); 590*5f757f3fSDimitry Andric RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", 591*5f757f3fSDimitry Andric &F, BBInsertLocation); 592*5f757f3fSDimitry Andric RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, 593*5f757f3fSDimitry Andric BBInsertLocation); 594*5f757f3fSDimitry Andric 595*5f757f3fSDimitry Andric BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator()); 596*5f757f3fSDimitry Andric bool Increasing = LS.IndVarIncreasing; 597*5f757f3fSDimitry Andric bool IsSignedPredicate = LS.IsSignedPredicate; 598*5f757f3fSDimitry Andric 599*5f757f3fSDimitry Andric IRBuilder<> B(PreheaderJump); 600*5f757f3fSDimitry Andric auto NoopOrExt = [&](Value *V) { 601*5f757f3fSDimitry Andric if (V->getType() == RangeTy) 602*5f757f3fSDimitry Andric return V; 603*5f757f3fSDimitry Andric return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName()) 604*5f757f3fSDimitry Andric : B.CreateZExt(V, RangeTy, "wide." + V->getName()); 605*5f757f3fSDimitry Andric }; 606*5f757f3fSDimitry Andric 607*5f757f3fSDimitry Andric // EnterLoopCond - is it okay to start executing this `LS'? 608*5f757f3fSDimitry Andric Value *EnterLoopCond = nullptr; 609*5f757f3fSDimitry Andric auto Pred = 610*5f757f3fSDimitry Andric Increasing 611*5f757f3fSDimitry Andric ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT) 612*5f757f3fSDimitry Andric : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); 613*5f757f3fSDimitry Andric Value *IndVarStart = NoopOrExt(LS.IndVarStart); 614*5f757f3fSDimitry Andric EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt); 615*5f757f3fSDimitry Andric 616*5f757f3fSDimitry Andric B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); 617*5f757f3fSDimitry Andric PreheaderJump->eraseFromParent(); 618*5f757f3fSDimitry Andric 619*5f757f3fSDimitry Andric LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); 620*5f757f3fSDimitry Andric B.SetInsertPoint(LS.LatchBr); 621*5f757f3fSDimitry Andric Value *IndVarBase = NoopOrExt(LS.IndVarBase); 622*5f757f3fSDimitry Andric Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt); 623*5f757f3fSDimitry Andric 624*5f757f3fSDimitry Andric Value *CondForBranch = LS.LatchBrExitIdx == 1 625*5f757f3fSDimitry Andric ? TakeBackedgeLoopCond 626*5f757f3fSDimitry Andric : B.CreateNot(TakeBackedgeLoopCond); 627*5f757f3fSDimitry Andric 628*5f757f3fSDimitry Andric LS.LatchBr->setCondition(CondForBranch); 629*5f757f3fSDimitry Andric 630*5f757f3fSDimitry Andric B.SetInsertPoint(RRI.ExitSelector); 631*5f757f3fSDimitry Andric 632*5f757f3fSDimitry Andric // IterationsLeft - are there any more iterations left, given the original 633*5f757f3fSDimitry Andric // upper bound on the induction variable? If not, we branch to the "real" 634*5f757f3fSDimitry Andric // exit. 635*5f757f3fSDimitry Andric Value *LoopExitAt = NoopOrExt(LS.LoopExitAt); 636*5f757f3fSDimitry Andric Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt); 637*5f757f3fSDimitry Andric B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); 638*5f757f3fSDimitry Andric 639*5f757f3fSDimitry Andric BranchInst *BranchToContinuation = 640*5f757f3fSDimitry Andric BranchInst::Create(ContinuationBlock, RRI.PseudoExit); 641*5f757f3fSDimitry Andric 642*5f757f3fSDimitry Andric // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of 643*5f757f3fSDimitry Andric // each of the PHI nodes in the loop header. This feeds into the initial 644*5f757f3fSDimitry Andric // value of the same PHI nodes if/when we continue execution. 645*5f757f3fSDimitry Andric for (PHINode &PN : LS.Header->phis()) { 646*5f757f3fSDimitry Andric PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy", 647*5f757f3fSDimitry Andric BranchToContinuation); 648*5f757f3fSDimitry Andric 649*5f757f3fSDimitry Andric NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader); 650*5f757f3fSDimitry Andric NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch), 651*5f757f3fSDimitry Andric RRI.ExitSelector); 652*5f757f3fSDimitry Andric RRI.PHIValuesAtPseudoExit.push_back(NewPHI); 653*5f757f3fSDimitry Andric } 654*5f757f3fSDimitry Andric 655*5f757f3fSDimitry Andric RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end", 656*5f757f3fSDimitry Andric BranchToContinuation); 657*5f757f3fSDimitry Andric RRI.IndVarEnd->addIncoming(IndVarStart, Preheader); 658*5f757f3fSDimitry Andric RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector); 659*5f757f3fSDimitry Andric 660*5f757f3fSDimitry Andric // The latch exit now has a branch from `RRI.ExitSelector' instead of 661*5f757f3fSDimitry Andric // `LS.Latch'. The PHI nodes need to be updated to reflect that. 662*5f757f3fSDimitry Andric LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector); 663*5f757f3fSDimitry Andric 664*5f757f3fSDimitry Andric return RRI; 665*5f757f3fSDimitry Andric } 666*5f757f3fSDimitry Andric 667*5f757f3fSDimitry Andric void LoopConstrainer::rewriteIncomingValuesForPHIs( 668*5f757f3fSDimitry Andric LoopStructure &LS, BasicBlock *ContinuationBlock, 669*5f757f3fSDimitry Andric const LoopConstrainer::RewrittenRangeInfo &RRI) const { 670*5f757f3fSDimitry Andric unsigned PHIIndex = 0; 671*5f757f3fSDimitry Andric for (PHINode &PN : LS.Header->phis()) 672*5f757f3fSDimitry Andric PN.setIncomingValueForBlock(ContinuationBlock, 673*5f757f3fSDimitry Andric RRI.PHIValuesAtPseudoExit[PHIIndex++]); 674*5f757f3fSDimitry Andric 675*5f757f3fSDimitry Andric LS.IndVarStart = RRI.IndVarEnd; 676*5f757f3fSDimitry Andric } 677*5f757f3fSDimitry Andric 678*5f757f3fSDimitry Andric BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, 679*5f757f3fSDimitry Andric BasicBlock *OldPreheader, 680*5f757f3fSDimitry Andric const char *Tag) const { 681*5f757f3fSDimitry Andric BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); 682*5f757f3fSDimitry Andric BranchInst::Create(LS.Header, Preheader); 683*5f757f3fSDimitry Andric 684*5f757f3fSDimitry Andric LS.Header->replacePhiUsesWith(OldPreheader, Preheader); 685*5f757f3fSDimitry Andric 686*5f757f3fSDimitry Andric return Preheader; 687*5f757f3fSDimitry Andric } 688*5f757f3fSDimitry Andric 689*5f757f3fSDimitry Andric void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { 690*5f757f3fSDimitry Andric Loop *ParentLoop = OriginalLoop.getParentLoop(); 691*5f757f3fSDimitry Andric if (!ParentLoop) 692*5f757f3fSDimitry Andric return; 693*5f757f3fSDimitry Andric 694*5f757f3fSDimitry Andric for (BasicBlock *BB : BBs) 695*5f757f3fSDimitry Andric ParentLoop->addBasicBlockToLoop(BB, LI); 696*5f757f3fSDimitry Andric } 697*5f757f3fSDimitry Andric 698*5f757f3fSDimitry Andric Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, 699*5f757f3fSDimitry Andric ValueToValueMapTy &VM, 700*5f757f3fSDimitry Andric bool IsSubloop) { 701*5f757f3fSDimitry Andric Loop &New = *LI.AllocateLoop(); 702*5f757f3fSDimitry Andric if (Parent) 703*5f757f3fSDimitry Andric Parent->addChildLoop(&New); 704*5f757f3fSDimitry Andric else 705*5f757f3fSDimitry Andric LI.addTopLevelLoop(&New); 706*5f757f3fSDimitry Andric LPMAddNewLoop(&New, IsSubloop); 707*5f757f3fSDimitry Andric 708*5f757f3fSDimitry Andric // Add all of the blocks in Original to the new loop. 709*5f757f3fSDimitry Andric for (auto *BB : Original->blocks()) 710*5f757f3fSDimitry Andric if (LI.getLoopFor(BB) == Original) 711*5f757f3fSDimitry Andric New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI); 712*5f757f3fSDimitry Andric 713*5f757f3fSDimitry Andric // Add all of the subloops to the new loop. 714*5f757f3fSDimitry Andric for (Loop *SubLoop : *Original) 715*5f757f3fSDimitry Andric createClonedLoopStructure(SubLoop, &New, VM, /* IsSubloop */ true); 716*5f757f3fSDimitry Andric 717*5f757f3fSDimitry Andric return &New; 718*5f757f3fSDimitry Andric } 719*5f757f3fSDimitry Andric 720*5f757f3fSDimitry Andric bool LoopConstrainer::run() { 721*5f757f3fSDimitry Andric BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); 722*5f757f3fSDimitry Andric assert(Preheader != nullptr && "precondition!"); 723*5f757f3fSDimitry Andric 724*5f757f3fSDimitry Andric OriginalPreheader = Preheader; 725*5f757f3fSDimitry Andric MainLoopPreheader = Preheader; 726*5f757f3fSDimitry Andric bool IsSignedPredicate = MainLoopStructure.IsSignedPredicate; 727*5f757f3fSDimitry Andric bool Increasing = MainLoopStructure.IndVarIncreasing; 728*5f757f3fSDimitry Andric IntegerType *IVTy = cast<IntegerType>(RangeTy); 729*5f757f3fSDimitry Andric 730*5f757f3fSDimitry Andric SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "loop-constrainer"); 731*5f757f3fSDimitry Andric Instruction *InsertPt = OriginalPreheader->getTerminator(); 732*5f757f3fSDimitry Andric 733*5f757f3fSDimitry Andric // It would have been better to make `PreLoop' and `PostLoop' 734*5f757f3fSDimitry Andric // `std::optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy 735*5f757f3fSDimitry Andric // constructor. 736*5f757f3fSDimitry Andric ClonedLoop PreLoop, PostLoop; 737*5f757f3fSDimitry Andric bool NeedsPreLoop = 738*5f757f3fSDimitry Andric Increasing ? SR.LowLimit.has_value() : SR.HighLimit.has_value(); 739*5f757f3fSDimitry Andric bool NeedsPostLoop = 740*5f757f3fSDimitry Andric Increasing ? SR.HighLimit.has_value() : SR.LowLimit.has_value(); 741*5f757f3fSDimitry Andric 742*5f757f3fSDimitry Andric Value *ExitPreLoopAt = nullptr; 743*5f757f3fSDimitry Andric Value *ExitMainLoopAt = nullptr; 744*5f757f3fSDimitry Andric const SCEVConstant *MinusOneS = 745*5f757f3fSDimitry Andric cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */)); 746*5f757f3fSDimitry Andric 747*5f757f3fSDimitry Andric if (NeedsPreLoop) { 748*5f757f3fSDimitry Andric const SCEV *ExitPreLoopAtSCEV = nullptr; 749*5f757f3fSDimitry Andric 750*5f757f3fSDimitry Andric if (Increasing) 751*5f757f3fSDimitry Andric ExitPreLoopAtSCEV = *SR.LowLimit; 752*5f757f3fSDimitry Andric else if (cannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE, 753*5f757f3fSDimitry Andric IsSignedPredicate)) 754*5f757f3fSDimitry Andric ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); 755*5f757f3fSDimitry Andric else { 756*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing " 757*5f757f3fSDimitry Andric << "preloop exit limit. HighLimit = " 758*5f757f3fSDimitry Andric << *(*SR.HighLimit) << "\n"); 759*5f757f3fSDimitry Andric return false; 760*5f757f3fSDimitry Andric } 761*5f757f3fSDimitry Andric 762*5f757f3fSDimitry Andric if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) { 763*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the" 764*5f757f3fSDimitry Andric << " preloop exit limit " << *ExitPreLoopAtSCEV 765*5f757f3fSDimitry Andric << " at block " << InsertPt->getParent()->getName() 766*5f757f3fSDimitry Andric << "\n"); 767*5f757f3fSDimitry Andric return false; 768*5f757f3fSDimitry Andric } 769*5f757f3fSDimitry Andric 770*5f757f3fSDimitry Andric ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt); 771*5f757f3fSDimitry Andric ExitPreLoopAt->setName("exit.preloop.at"); 772*5f757f3fSDimitry Andric } 773*5f757f3fSDimitry Andric 774*5f757f3fSDimitry Andric if (NeedsPostLoop) { 775*5f757f3fSDimitry Andric const SCEV *ExitMainLoopAtSCEV = nullptr; 776*5f757f3fSDimitry Andric 777*5f757f3fSDimitry Andric if (Increasing) 778*5f757f3fSDimitry Andric ExitMainLoopAtSCEV = *SR.HighLimit; 779*5f757f3fSDimitry Andric else if (cannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE, 780*5f757f3fSDimitry Andric IsSignedPredicate)) 781*5f757f3fSDimitry Andric ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); 782*5f757f3fSDimitry Andric else { 783*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "could not prove no-overflow when computing " 784*5f757f3fSDimitry Andric << "mainloop exit limit. LowLimit = " 785*5f757f3fSDimitry Andric << *(*SR.LowLimit) << "\n"); 786*5f757f3fSDimitry Andric return false; 787*5f757f3fSDimitry Andric } 788*5f757f3fSDimitry Andric 789*5f757f3fSDimitry Andric if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) { 790*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "could not prove that it is safe to expand the" 791*5f757f3fSDimitry Andric << " main loop exit limit " << *ExitMainLoopAtSCEV 792*5f757f3fSDimitry Andric << " at block " << InsertPt->getParent()->getName() 793*5f757f3fSDimitry Andric << "\n"); 794*5f757f3fSDimitry Andric return false; 795*5f757f3fSDimitry Andric } 796*5f757f3fSDimitry Andric 797*5f757f3fSDimitry Andric ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt); 798*5f757f3fSDimitry Andric ExitMainLoopAt->setName("exit.mainloop.at"); 799*5f757f3fSDimitry Andric } 800*5f757f3fSDimitry Andric 801*5f757f3fSDimitry Andric // We clone these ahead of time so that we don't have to deal with changing 802*5f757f3fSDimitry Andric // and temporarily invalid IR as we transform the loops. 803*5f757f3fSDimitry Andric if (NeedsPreLoop) 804*5f757f3fSDimitry Andric cloneLoop(PreLoop, "preloop"); 805*5f757f3fSDimitry Andric if (NeedsPostLoop) 806*5f757f3fSDimitry Andric cloneLoop(PostLoop, "postloop"); 807*5f757f3fSDimitry Andric 808*5f757f3fSDimitry Andric RewrittenRangeInfo PreLoopRRI; 809*5f757f3fSDimitry Andric 810*5f757f3fSDimitry Andric if (NeedsPreLoop) { 811*5f757f3fSDimitry Andric Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, 812*5f757f3fSDimitry Andric PreLoop.Structure.Header); 813*5f757f3fSDimitry Andric 814*5f757f3fSDimitry Andric MainLoopPreheader = 815*5f757f3fSDimitry Andric createPreheader(MainLoopStructure, Preheader, "mainloop"); 816*5f757f3fSDimitry Andric PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader, 817*5f757f3fSDimitry Andric ExitPreLoopAt, MainLoopPreheader); 818*5f757f3fSDimitry Andric rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, 819*5f757f3fSDimitry Andric PreLoopRRI); 820*5f757f3fSDimitry Andric } 821*5f757f3fSDimitry Andric 822*5f757f3fSDimitry Andric BasicBlock *PostLoopPreheader = nullptr; 823*5f757f3fSDimitry Andric RewrittenRangeInfo PostLoopRRI; 824*5f757f3fSDimitry Andric 825*5f757f3fSDimitry Andric if (NeedsPostLoop) { 826*5f757f3fSDimitry Andric PostLoopPreheader = 827*5f757f3fSDimitry Andric createPreheader(PostLoop.Structure, Preheader, "postloop"); 828*5f757f3fSDimitry Andric PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, 829*5f757f3fSDimitry Andric ExitMainLoopAt, PostLoopPreheader); 830*5f757f3fSDimitry Andric rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, 831*5f757f3fSDimitry Andric PostLoopRRI); 832*5f757f3fSDimitry Andric } 833*5f757f3fSDimitry Andric 834*5f757f3fSDimitry Andric BasicBlock *NewMainLoopPreheader = 835*5f757f3fSDimitry Andric MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr; 836*5f757f3fSDimitry Andric BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit, 837*5f757f3fSDimitry Andric PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, 838*5f757f3fSDimitry Andric PostLoopRRI.ExitSelector, NewMainLoopPreheader}; 839*5f757f3fSDimitry Andric 840*5f757f3fSDimitry Andric // Some of the above may be nullptr, filter them out before passing to 841*5f757f3fSDimitry Andric // addToParentLoopIfNeeded. 842*5f757f3fSDimitry Andric auto NewBlocksEnd = 843*5f757f3fSDimitry Andric std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); 844*5f757f3fSDimitry Andric 845*5f757f3fSDimitry Andric addToParentLoopIfNeeded(ArrayRef(std::begin(NewBlocks), NewBlocksEnd)); 846*5f757f3fSDimitry Andric 847*5f757f3fSDimitry Andric DT.recalculate(F); 848*5f757f3fSDimitry Andric 849*5f757f3fSDimitry Andric // We need to first add all the pre and post loop blocks into the loop 850*5f757f3fSDimitry Andric // structures (as part of createClonedLoopStructure), and then update the 851*5f757f3fSDimitry Andric // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating 852*5f757f3fSDimitry Andric // LI when LoopSimplifyForm is generated. 853*5f757f3fSDimitry Andric Loop *PreL = nullptr, *PostL = nullptr; 854*5f757f3fSDimitry Andric if (!PreLoop.Blocks.empty()) { 855*5f757f3fSDimitry Andric PreL = createClonedLoopStructure(&OriginalLoop, 856*5f757f3fSDimitry Andric OriginalLoop.getParentLoop(), PreLoop.Map, 857*5f757f3fSDimitry Andric /* IsSubLoop */ false); 858*5f757f3fSDimitry Andric } 859*5f757f3fSDimitry Andric 860*5f757f3fSDimitry Andric if (!PostLoop.Blocks.empty()) { 861*5f757f3fSDimitry Andric PostL = 862*5f757f3fSDimitry Andric createClonedLoopStructure(&OriginalLoop, OriginalLoop.getParentLoop(), 863*5f757f3fSDimitry Andric PostLoop.Map, /* IsSubLoop */ false); 864*5f757f3fSDimitry Andric } 865*5f757f3fSDimitry Andric 866*5f757f3fSDimitry Andric // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. 867*5f757f3fSDimitry Andric auto CanonicalizeLoop = [&](Loop *L, bool IsOriginalLoop) { 868*5f757f3fSDimitry Andric formLCSSARecursively(*L, DT, &LI, &SE); 869*5f757f3fSDimitry Andric simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true); 870*5f757f3fSDimitry Andric // Pre/post loops are slow paths, we do not need to perform any loop 871*5f757f3fSDimitry Andric // optimizations on them. 872*5f757f3fSDimitry Andric if (!IsOriginalLoop) 873*5f757f3fSDimitry Andric DisableAllLoopOptsOnLoop(*L); 874*5f757f3fSDimitry Andric }; 875*5f757f3fSDimitry Andric if (PreL) 876*5f757f3fSDimitry Andric CanonicalizeLoop(PreL, false); 877*5f757f3fSDimitry Andric if (PostL) 878*5f757f3fSDimitry Andric CanonicalizeLoop(PostL, false); 879*5f757f3fSDimitry Andric CanonicalizeLoop(&OriginalLoop, true); 880*5f757f3fSDimitry Andric 881*5f757f3fSDimitry Andric /// At this point: 882*5f757f3fSDimitry Andric /// - We've broken a "main loop" out of the loop in a way that the "main loop" 883*5f757f3fSDimitry Andric /// runs with the induction variable in a subset of [Begin, End). 884*5f757f3fSDimitry Andric /// - There is no overflow when computing "main loop" exit limit. 885*5f757f3fSDimitry Andric /// - Max latch taken count of the loop is limited. 886*5f757f3fSDimitry Andric /// It guarantees that induction variable will not overflow iterating in the 887*5f757f3fSDimitry Andric /// "main loop". 888*5f757f3fSDimitry Andric if (isa<OverflowingBinaryOperator>(MainLoopStructure.IndVarBase)) 889*5f757f3fSDimitry Andric if (IsSignedPredicate) 890*5f757f3fSDimitry Andric cast<BinaryOperator>(MainLoopStructure.IndVarBase) 891*5f757f3fSDimitry Andric ->setHasNoSignedWrap(true); 892*5f757f3fSDimitry Andric /// TODO: support unsigned predicate. 893*5f757f3fSDimitry Andric /// To add NUW flag we need to prove that both operands of BO are 894*5f757f3fSDimitry Andric /// non-negative. E.g: 895*5f757f3fSDimitry Andric /// ... 896*5f757f3fSDimitry Andric /// %iv.next = add nsw i32 %iv, -1 897*5f757f3fSDimitry Andric /// %cmp = icmp ult i32 %iv.next, %n 898*5f757f3fSDimitry Andric /// br i1 %cmp, label %loopexit, label %loop 899*5f757f3fSDimitry Andric /// 900*5f757f3fSDimitry Andric /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will 901*5f757f3fSDimitry Andric /// overflow, therefore NUW flag is not legal here. 902*5f757f3fSDimitry Andric 903*5f757f3fSDimitry Andric return true; 904*5f757f3fSDimitry Andric } 905