Lines Matching +full:dt +full:- +full:node
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
14 // run-time trip count modulo the unroll factor is not 0. When this is the
17 // The current strategy generates an if-then-else sequence prior to the
21 //===----------------------------------------------------------------------===//
47 #define DEBUG_TYPE "loop-unroll"
50 "Number of loops unrolled with run-time trip counts");
52 "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
56 "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
72 /// 'extra' iterations if the run-time trip count modulo the
73 /// unroll count is non-zero.
76 /// - Create PHI nodes at prolog end block to combine values
78 /// - Add a PHI operand to a PHI node at the loop exit block
80 /// - Branch around the original loop if the trip count is less
87 ValueToValueMapTy &VMap, DominatorTree *DT,
101 BasicBlock *Latch = L->getLoopLatch();
105 // Create a PHI node for each outgoing value from the original loop
107 // The new PHI node is inserted in the prolog end basic block.
108 // The new PHI node value is added as an operand of a PHI node in either
111 for (PHINode &PN : Succ->phis()) {
112 // Add a new PHI node to the prolog end block and add the
116 // PrologLatch. When supporting multiple-exiting block loops, we can have
120 NewPN->insertBefore(PrologExit->getFirstNonPHIIt());
121 // Adding a value to the new PHI node from the original loop preheader.
123 if (L->contains(&PN)) {
125 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
129 NewPN->addIncoming(PoisonValue::get(PN.getType()), PreHeader);
134 if (L->contains(I)) {
138 // Adding a value to the new PHI node from the last prolog block
140 NewPN->addIncoming(V, PrologLatch);
142 // Update the existing PHI node operand with the value from the
143 // new PHI node. How this is done depends on if the existing
144 // PHI node is in the original loop block, or the exit block.
145 if (L->contains(&PN))
155 Loop *PrologLoop = LI->getLoopFor(PrologLatch);
158 if (PrologLoop->contains(PredBB))
161 SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
167 Instruction *InsertPt = PrologExit->getTerminator();
172 // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
174 // loop were executed by the prologue. Note that if BECount <u (Count - 1)
175 // then (BECount + 1) cannot unsigned-overflow.
177 B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
180 SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
184 if (hasBranchWeightMD(*Latch->getTerminator())) {
191 InsertPt->eraseFromParent();
192 if (DT) {
193 auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
195 DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
201 /// 'extra' iterations if the run-time trip count modulo the
202 /// unroll count is non-zero.
205 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
206 /// - Create PHI nodes at the unrolling loop exit to combine
208 /// - Update PHI operands in the epilog loop by the new PHI nodes
209 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
214 ValueToValueMapTy &VMap, DominatorTree *DT,
217 BasicBlock *Latch = L->getLoopLatch();
236 for (PHINode &PN : NewExit->phis()) {
246 // Exits from non-latch blocks point to the original exit block and the
252 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
253 assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
261 if (I && L->contains(I))
266 EpilogPN->addIncoming(V, EpilogLatch);
268 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
271 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
285 if (!L->contains(Succ))
287 for (PHINode &PN : Succ->phis()) {
291 NewPN->insertBefore(NewExit->getFirstNonPHIIt());
292 // Adding a value to the new PHI node from the unrolling loop preheader.
293 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
294 // Adding a value to the new PHI node from the unrolling loop latch.
295 NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
297 // Update the existing PHI node operand with the value from the new PHI
298 // node. Corresponding instruction in epilog loop should be PHI.
300 VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
304 Instruction *InsertPt = NewExit->getTerminator();
310 SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
314 if (hasBranchWeightMD(*Latch->getTerminator())) {
317 BranchWeights = MDB.createBranchWeights(1, Count - 1);
320 InsertPt->eraseFromParent();
321 if (DT) {
322 auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
323 DT->changeImmediateDominator(Exit, NewDom);
328 SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
345 DominatorTree *DT, LoopInfo *LI, unsigned Count) {
347 BasicBlock *Header = L->getHeader();
348 BasicBlock *Latch = L->getLoopLatch();
349 Function *F = Header->getParent();
352 Loop *ParentLoop = L->getParentLoop();
368 InsertTop->getTerminator()->setSuccessor(0, NewBB);
371 if (DT) {
374 DT->addNewBlock(NewBB, InsertTop);
377 BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
378 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
384 VMap.erase((*BB)->getTerminator());
385 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
387 // thus we must compare the post-increment (wrapping) value.
389 BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
392 PHINode::Create(NewIter->getType(), 2, suffix + ".iter");
393 NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());
394 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
395 auto *One = ConstantInt::get(NewIdx->getType(), 1);
397 Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
398 Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
404 // Note: We do not enter this loop for zero-remainders. The check
408 BackEdgeWeight = (Count - 2) / 2;
419 NewIdx->addIncoming(Zero, InsertTop);
420 NewIdx->addIncoming(IdxNext, NewBB);
421 LatchBR->eraseFromParent();
427 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
429 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
430 NewPHI->setIncomingBlock(idx, InsertTop);
432 idx = NewPHI->getBasicBlockIndex(Latch);
433 Value *InVal = NewPHI->getIncomingValue(idx);
434 NewPHI->setIncomingBlock(idx, NewLatch);
436 NewPHI->setIncomingValue(idx, V);
441 MDNode *LoopID = NewLoop->getLoopID();
451 NewLoop->setLoopID(*NewLoopID);
459 NewLoop->setLoopAlreadyUnrolled();
463 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
473 // The main pain point with multi-exit loop unrolling is that once unrolled,
489 L->getExitingBlocks(ExitingBlocks);
505 OtherExits[0]->getPostdominatingDeoptimizeCall()));
506 // TODO: These can be fine-tuned further to consider code size or deopt states
530 return B.CreateAnd(TripCount, Count - 1, "xtraiter");
534 Constant *CountC = ConstantInt::get(BECount->getType(), Count);
537 ConstantInt::get(ModValTmp->getType(), 1));
545 /// run-time trip-count.
550 /// We assume also that the loop unroll factor is a power-of-two. So, after
552 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
561 /// extraiters -= 1 // Omitted if unroll factor is 2.
571 /// unroll_iters = tripcount - extraiters
573 /// unroll_iter -= 1
578 /// extraiters -= 1 // Omitted if unroll factor is 2.
585 LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
588 LLVM_DEBUG(L->dump());
593 if (!L->isLoopSimplifyForm()) {
599 BasicBlock *Latch = L->getLoopLatch();
600 BasicBlock *Header = L->getHeader();
602 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
604 if (!LatchBR || LatchBR->isUnconditional()) {
605 // The loop-rotate pass can be helpful to avoid this in many cases.
612 unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
613 BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
615 if (L->contains(LatchExit)) {
626 L->getUniqueNonLatchExitBlocks(OtherExits);
627 // Support only single exit and exiting block unless multi-exit loop
629 if (!L->getExitingBlock() || OtherExits.size()) {
631 // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
639 << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
654 const SCEV *BECountSC = SE->getExitCount(L, Latch);
660 unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
665 SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
671 BasicBlock *PreHeader = L->getLoopPreheader();
672 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
673 const DataLayout &DL = Header->getDataLayout();
674 SCEVExpander Expander(*SE, DL, "loop-unroll");
708 NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
709 NewPreHeader->setName(PreHeader->getName() + ".new");
711 NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
716 auto *NewExitTerminator = NewExit->getTerminator();
717 NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
719 EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
720 EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
728 if (auto *ParentL = L->getParentLoop())
729 if (LI->getLoopFor(LatchExit) != ParentL) {
730 LI->removeBlock(NewExit);
731 ParentL->addBasicBlockToLoop(NewExit, *LI);
732 LI->removeBlock(EpilogPreHeader);
733 ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
739 PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
740 PrologPreHeader->setName(Header->getName() + ".prol.preheader");
741 PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
742 DT, LI);
743 PrologExit->setName(Header->getName() + ".prol.loopexit");
745 NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
746 NewPreHeader->setName(PreHeader->getName() + ".new");
763 // extra iterations = run-time trip count % loop unroll factor
764 PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
766 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
775 if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
776 !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
779 B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType()));
784 Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
791 ConstantInt::get(BECount->getType(),
792 Count - 1)) :
798 if (hasBranchWeightMD(*Latch->getTerminator())) {
804 PreHeaderBR->eraseFromParent();
805 if (DT) {
807 DT->changeImmediateDominator(NewExit, PreHeader);
809 DT->changeImmediateDominator(PrologExit, PreHeader);
811 Function *F = Header->getParent();
832 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);
835 F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());
845 for (PHINode &PN : BB->phis()) {
848 // node.
854 if (!L->contains(PredBB))
861 if (L->contains(I))
880 if (DT && !L->getExitingBlock()) {
886 for (auto *BB : L->blocks()) {
887 auto *DomNodeBB = DT->getNode(BB);
888 for (auto *DomChild : DomNodeBB->children()) {
889 auto *DomChildBB = DomChild->getBlock();
890 if (!L->contains(LI->getLoopFor(DomChildBB)))
895 DT->changeImmediateDominator(BB, PreHeader);
916 Module *M = BB->getModule();
929 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);
932 // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
934 // thus we must compare the post-increment (wrapping) value.
935 IRBuilder<> B2(NewPreHeader->getTerminator());
937 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
938 PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");
939 NewIdx->insertBefore(Header->getFirstNonPHIIt());
941 auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
942 auto *One = ConstantInt::get(NewIdx->getType(), 1);
943 Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
944 auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
945 Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
946 NewIdx->addIncoming(Zero, NewPreHeader);
947 NewIdx->addIncoming(IdxNext, Latch);
948 LatchBR->setCondition(IdxCmp);
953 NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);
958 SE->forgetTopmostLoop(L);
962 if (DT) {
963 assert(DT->verify(DominatorTree::VerificationLevel::Full));
964 LI->verify(*DT);
969 if (Count == 2 && DT && LI && SE) {
971 // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
972 BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
974 SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
975 remainderLoop->getBlocks().end());
976 breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
980 const DataLayout &DL = L->getHeader()->getDataLayout();
984 if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
985 if (LI->replacementPreservesLCSSAForm(&Inst, V))
997 auto *ExitBB = RemainderLatch->getSingleSuccessor();
999 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
1009 formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
1013 formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
1020 ULO.Count = Count - 1;
1028 UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,