Lines Matching +full:row +full:- +full:stride

1 //===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
21 // guard(n - 1 < len);
26 // loop-unswitch can later unswitch the loop by this condition which basically
29 // if (n - 1 < len)
114 // X == guardLimit - 1 - guardStart
115 // (and guardLimit is non-zero, but we won't use this latter fact).
116 // If X == guardLimit - 1 - guardStart then the second half of the antecedent is
117 // latchStart + guardLimit - 1 - guardStart u< latchLimit
119 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
122 // latchLimit u<= latchStart + guardLimit - 1 - guardStart
131 // latchStart + X u< latchStart + guardLimit - 1 - guardStart =>
134 // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) =>
136 // == forall X . X in [-guardStart, guardLimit - guardStart) &&
137 // X in [-latchStart, guardLimit - 1 - guardStart) =>
138 // X in [-guardStart - 1, guardLimit - guardStart - 1)
143 // latchStart + guardLimit - 1 - guardStart u>= latchLimit
146 // latchStart + guardLimit - 1 - guardStart u> latchLimit
149 // latchStart + guardLimit - 1 - guardStart s>= latchLimit
152 // latchStart + guardLimit - 1 - guardStart s> latchLimit
154 // When S = -1 (i.e. reverse iterating loop), the transformation is supported
159 // G(X) = X - 1 u< guardLimit
162 // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit
177 //===----------------------------------------------------------------------===//
206 #define DEBUG_TYPE "loop-predication"
213 static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
216 static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop",
220 SkipProfitabilityChecks("loop-predication-skip-profitability-checks",
228 "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0),
233 "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
239 "loop-predication-insert-assumes-of-predicated-guards-conditions",
350 auto Pred = ICI->getPredicate(); in parseLoopICmp()
351 auto *LHS = ICI->getOperand(0); in parseLoopICmp()
352 auto *RHS = ICI->getOperand(1); in parseLoopICmp()
354 const SCEV *LHSS = SE->getSCEV(LHS); in parseLoopICmp()
357 const SCEV *RHSS = SE->getSCEV(RHS); in parseLoopICmp()
361 // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV in parseLoopICmp()
362 if (SE->isLoopInvariant(LHSS, L)) { in parseLoopICmp()
369 if (!AR || AR->getLoop() != L) in parseLoopICmp()
379 Type *Ty = LHS->getType(); in expandCheck()
380 assert(Ty == RHS->getType() && "expandCheck operands have different types?"); in expandCheck()
382 if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) { in expandCheck()
384 if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) in expandCheck()
386 if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), in expandCheck()
418 assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedValue() > in isSafeToTruncateWideIVType()
425 auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart()); in isSafeToTruncateWideIVType()
440 return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && in isSafeToTruncateWideIVType()
441 Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; in isSafeToTruncateWideIVType()
452 auto *LatchType = LatchCheck.IV->getType(); in generateLoopLatchCheck()
479 return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); in isSupportedStep()
485 if (!L->isLoopInvariant(Op)) in findInsertPt()
487 return Preheader->getTerminator(); in findInsertPt()
497 if (!SE->isLoopInvariant(Op, L) || in findInsertPt()
498 !Expander.isSafeToExpandAt(Op, Preheader->getTerminator())) in findInsertPt()
500 return Preheader->getTerminator(); in findInsertPt()
507 // otherwise need us to iteration licm, loop-predication, and either in isLoopInvariantValue()
508 // loop-unswitch or loop-peeling to make progress on examples with lots of in isLoopInvariantValue()
509 // predicable range checks in a row. (Since, in the general case, we can't in isLoopInvariantValue()
523 if (SE->isLoopInvariant(S, L)) in isLoopInvariantValue()
532 if (const auto *LI = dyn_cast<LoadInst>(U->getValue())) in isLoopInvariantValue()
533 if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) in isLoopInvariantValue()
534 if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))) || in isLoopInvariantValue()
535 LI->hasMetadata(LLVMContext::MD_invariant_load)) in isLoopInvariantValue()
543 auto *Ty = RangeCheck.IV->getType(); in widenICmpRangeCheckIncrementingLoop()
546 // latchLimit <pred> guardLimit - 1 - guardStart + latchStart in widenICmpRangeCheckIncrementingLoop()
549 // guardLimit - guardStart + latchStart - 1 in widenICmpRangeCheckIncrementingLoop()
550 const SCEV *GuardStart = RangeCheck.IV->getStart(); in widenICmpRangeCheckIncrementingLoop()
552 const SCEV *LatchStart = LatchCheck.IV->getStart(); in widenICmpRangeCheckIncrementingLoop()
570 // guardLimit - guardStart + latchStart - 1 in widenICmpRangeCheckIncrementingLoop()
572 SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), in widenICmpRangeCheckIncrementingLoop()
573 SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); in widenICmpRangeCheckIncrementingLoop()
593 auto *Ty = RangeCheck.IV->getType(); in widenICmpRangeCheckDecrementingLoop()
594 const SCEV *GuardStart = RangeCheck.IV->getStart(); in widenICmpRangeCheckDecrementingLoop()
596 const SCEV *LatchStart = LatchCheck.IV->getStart(); in widenICmpRangeCheckDecrementingLoop()
615 auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE); in widenICmpRangeCheckDecrementingLoop()
633 SE->getOne(Ty)); in widenICmpRangeCheckDecrementingLoop()
644 RC.IV->getStepRecurrence(*SE)->isOne() && in normalizePredicate()
645 SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) in normalizePredicate()
657 LLVM_DEBUG(ICI->dump()); in widenICmpRangeCheck()
669 LLVM_DEBUG(RangeCheck->dump()); in widenICmpRangeCheck()
670 if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { in widenICmpRangeCheck()
672 << RangeCheck->Pred << ")!\n"); in widenICmpRangeCheck()
675 auto *RangeCheckIV = RangeCheck->IV; in widenICmpRangeCheck()
676 if (!RangeCheckIV->isAffine()) { in widenICmpRangeCheck()
680 auto *Step = RangeCheckIV->getStepRecurrence(*SE); in widenICmpRangeCheck()
687 auto *Ty = RangeCheckIV->getType(); in widenICmpRangeCheck()
698 // not have the same value (we support both 1 and -1 steps). in widenICmpRangeCheck()
699 assert(Step->getType() == in widenICmpRangeCheck()
700 CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() && in widenICmpRangeCheck()
702 if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { in widenICmpRangeCheck()
707 if (Step->isOne()) in widenICmpRangeCheck()
711 assert(Step->isAllOnesValue() && "Step should be -1!"); in widenICmpRangeCheck()
731 LLVM_DEBUG(Guard->dump()); in widenGuardConditions()
746 auto *OldCond = Guard->getOperand(0); in widenGuardConditions()
747 Guard->setOperand(0, AllChecks); in widenGuardConditions()
762 LLVM_DEBUG(BI->dump()); in widenWidenableBranchGuardConditions()
781 auto *OldCond = BI->getCondition(); in widenWidenableBranchGuardConditions()
782 BI->setCondition(AllChecks); in widenWidenableBranchGuardConditions()
784 BasicBlock *IfTrueBB = BI->getSuccessor(0); in widenWidenableBranchGuardConditions()
785 Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt()); in widenWidenableBranchGuardConditions()
790 if (!IfTrueBB->getUniquePredecessor()) { in widenWidenableBranchGuardConditions()
791 auto *GuardBB = BI->getParent(); in widenWidenableBranchGuardConditions()
792 auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB), in widenWidenableBranchGuardConditions()
795 PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred); in widenWidenableBranchGuardConditions()
811 BasicBlock *LoopLatch = L->getLoopLatch(); in parseLoopLatchICmp()
817 auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator()); in parseLoopLatchICmp()
818 if (!BI || !BI->isConditional()) { in parseLoopLatchICmp()
822 BasicBlock *TrueDest = BI->getSuccessor(0); in parseLoopLatchICmp()
824 (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) && in parseLoopLatchICmp()
827 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition()); in parseLoopLatchICmp()
838 if (TrueDest != L->getHeader()) in parseLoopLatchICmp()
839 Result->Pred = ICmpInst::getInversePredicate(Result->Pred); in parseLoopLatchICmp()
843 if (!Result->IV->isAffine()) { in parseLoopLatchICmp()
848 auto *Step = Result->IV->getStepRecurrence(*SE); in parseLoopLatchICmp()
850 LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); in parseLoopLatchICmp()
855 if (Step->isOne()) { in parseLoopLatchICmp()
859 assert(Step->isAllOnesValue() && "Step should be -1!"); in parseLoopLatchICmp()
866 if (IsUnsupportedPredicate(Step, Result->Pred)) { in parseLoopLatchICmp()
867 LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred in parseLoopLatchICmp()
880 L->getExitEdges(ExitEdges); in isLoopProfitableToPredicate()
891 auto *LatchBlock = L->getLoopLatch(); in isLoopProfitableToPredicate()
893 auto *LatchTerm = LatchBlock->getTerminator(); in isLoopProfitableToPredicate()
894 assert(LatchTerm->getNumSuccessors() == 2 && in isLoopProfitableToPredicate()
897 LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0; in isLoopProfitableToPredicate()
905 auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx); in isLoopProfitableToPredicate()
907 LatchExitBlock->getTerminatingDeoptimizeCall()) in isLoopProfitableToPredicate()
917 const BasicBlock *ExitBlock) -> BranchProbability { in isLoopProfitableToPredicate()
918 auto *Term = ExitingBlock->getTerminator(); in isLoopProfitableToPredicate()
919 unsigned NumSucc = Term->getNumSuccessors(); in isLoopProfitableToPredicate()
925 if (Term->getSuccessor(i) == ExitBlock) in isLoopProfitableToPredicate()
950 << "Ignored user setting for loop-predication-latch-probability-scale: " in isLoopProfitableToPredicate()
981 BasicBlock *BB = L->getLoopPreheader(); in FindWidenableTerminatorAboveLoop()
985 if (BasicBlock *Pred = BB->getSinglePredecessor()) in FindWidenableTerminatorAboveLoop()
986 if (BB == Pred->getSingleSuccessor()) { in FindWidenableTerminatorAboveLoop()
993 if (BasicBlock *Pred = BB->getSinglePredecessor()) { in FindWidenableTerminatorAboveLoop()
994 if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator())) in FindWidenableTerminatorAboveLoop()
995 if (BI->getSuccessor(0) == BB && isWidenableBranch(BI)) in FindWidenableTerminatorAboveLoop()
1008 L->getExitingBlocks(ExitingBlocks); in getMinAnalyzeableBackedgeTakenCount()
1015 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && in getMinAnalyzeableBackedgeTakenCount()
1039 // which restrict the applicability of the non-WC based version of this in predicateLoopExits()
1042 // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may in predicateLoopExits()
1051 L->getExitingBlocks(ExitingBlocks); in predicateLoopExits()
1056 auto *Latch = L->getLoopLatch(); in predicateLoopExits()
1064 const SCEV *LatchEC = SE->getExitCount(L, Latch); in predicateLoopExits()
1066 return false; // profitability - want hot exit in analyzeable set in predicateLoopExits()
1077 if (LI->getLoopFor(ExitingBB) != L) in predicateLoopExits()
1080 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); in predicateLoopExits()
1085 if (L->contains(BI->getSuccessor(0))) { in predicateLoopExits()
1086 assert(WC->hasOneUse() && "Not appropriate widenable branch!"); in predicateLoopExits()
1087 WC->user_back()->replaceUsesOfWith( in predicateLoopExits()
1088 WC, ConstantInt::getTrue(BI->getContext())); in predicateLoopExits()
1093 SE->forgetLoop(L); in predicateLoopExits()
1097 // change a loop-invariant condition to a loop-varying one. in predicateLoopExits()
1098 auto *IP = cast<Instruction>(WidenableBR->getCondition()); in predicateLoopExits()
1107 if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || in predicateLoopExits()
1108 !SE->isLoopInvariant(MinEC, L) || in predicateLoopExits()
1121 if (LI->getLoopFor(ExitingBB) != L) in predicateLoopExits()
1124 // Can't rewrite non-branch yet. in predicateLoopExits()
1125 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); in predicateLoopExits()
1130 if (isa<Constant>(BI->getCondition())) in predicateLoopExits()
1133 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); in predicateLoopExits()
1135 ExitCount->getType()->isPointerTy() || in predicateLoopExits()
1139 const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); in predicateLoopExits()
1140 BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1); in predicateLoopExits()
1141 if (!ExitBB->getPostdominatingDeoptimizeCall()) in predicateLoopExits()
1150 // 2) fold the branch to untaken - avoids infinite looping in predicateLoopExits()
1156 if (ECV->getType() != RHS->getType()) { in predicateLoopExits()
1157 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); in predicateLoopExits()
1161 assert(!Latch || DT->dominates(ExitingBB, Latch)); in predicateLoopExits()
1170 Value *OldCond = BI->getCondition(); in predicateLoopExits()
1171 BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue)); in predicateLoopExits()
1180 SE->forgetLoop(L); in predicateLoopExits()
1190 LLVM_DEBUG(L->dump()); in runOnLoop()
1192 Module *M = L->getHeader()->getModule(); in runOnLoop()
1196 M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); in runOnLoop()
1197 bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); in runOnLoop()
1198 auto *WCDecl = M->getFunction( in runOnLoop()
1201 PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty(); in runOnLoop()
1205 DL = &M->getDataLayout(); in runOnLoop()
1207 Preheader = L->getLoopPreheader(); in runOnLoop()
1227 for (const auto BB : L->blocks()) { in runOnLoop()
1232 isGuardAsWidenableBranch(BB->getTerminator())) in runOnLoop()
1234 cast<BranchInst>(BB->getTerminator())); in runOnLoop()
1237 SCEVExpander Expander(*SE, *DL, "loop-predication"); in runOnLoop()
1246 MSSAU->getMemorySSA()->verifyMemorySSA(); in runOnLoop()