xref: /llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp (revision a7938c74f16379704fbd38a3d82dfcb9345651ab)
1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
11 // trip counts.
12 //
13 // The functions in this file are used to generate extra code when the
14 // run-time trip count modulo the unroll factor is not 0.  When this is the
15 // case, we need to generate code to execute these 'left over' iterations.
16 //
17 // The current strategy generates an if-then-else sequence prior to the
18 // unrolled loop to execute the 'left over' iterations before or after the
19 // unrolled loop.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/DomTreeUpdater.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/LoopIterator.h"
27 #include "llvm/Analysis/ScalarEvolution.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/BasicBlock.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38 #include "llvm/Transforms/Utils/Local.h"
39 #include "llvm/Transforms/Utils/LoopUtils.h"
40 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
41 #include "llvm/Transforms/Utils/UnrollLoop.h"
42 #include <algorithm>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "loop-unroll"
47 
48 STATISTIC(NumRuntimeUnrolled,
49           "Number of loops unrolled with run-time trip counts");
50 static cl::opt<bool> UnrollRuntimeMultiExit(
51     "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
52     cl::desc("Allow runtime unrolling for loops with multiple exits, when "
53              "epilog is generated"));
54 static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
55     "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
56     cl::desc("Assume the non latch exit block to be predictable"));
57 
58 /// Connect the unrolling prolog code to the original loop.
59 /// The unrolling prolog code contains code to execute the
60 /// 'extra' iterations if the run-time trip count modulo the
61 /// unroll count is non-zero.
62 ///
63 /// This function performs the following:
64 /// - Create PHI nodes at prolog end block to combine values
65 ///   that exit the prolog code and jump around the prolog.
66 /// - Add a PHI operand to a PHI node at the loop exit block
67 ///   for values that exit the prolog and go around the loop.
68 /// - Branch around the original loop if the trip count is less
69 ///   than the unroll factor.
70 ///
71 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
72                           BasicBlock *PrologExit,
73                           BasicBlock *OriginalLoopLatchExit,
74                           BasicBlock *PreHeader, BasicBlock *NewPreHeader,
75                           ValueToValueMapTy &VMap, DominatorTree *DT,
76                           LoopInfo *LI, bool PreserveLCSSA) {
77   // Loop structure should be the following:
78   // Preheader
79   //  PrologHeader
80   //  ...
81   //  PrologLatch
82   //  PrologExit
83   //   NewPreheader
84   //    Header
85   //    ...
86   //    Latch
87   //      LatchExit
88   BasicBlock *Latch = L->getLoopLatch();
89   assert(Latch && "Loop must have a latch");
90   BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
91 
92   // Create a PHI node for each outgoing value from the original loop
93   // (which means it is an outgoing value from the prolog code too).
94   // The new PHI node is inserted in the prolog end basic block.
95   // The new PHI node value is added as an operand of a PHI node in either
96   // the loop header or the loop exit block.
97   for (BasicBlock *Succ : successors(Latch)) {
98     for (PHINode &PN : Succ->phis()) {
99       // Add a new PHI node to the prolog end block and add the
100       // appropriate incoming values.
101       // TODO: This code assumes that the PrologExit (or the LatchExit block for
102       // prolog loop) contains only one predecessor from the loop, i.e. the
103       // PrologLatch. When supporting multiple-exiting block loops, we can have
104       // two or more blocks that have the LatchExit as the target in the
105       // original loop.
106       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
107                                        PrologExit->getFirstNonPHI());
108       // Adding a value to the new PHI node from the original loop preheader.
109       // This is the value that skips all the prolog code.
110       if (L->contains(&PN)) {
111         // Succ is loop header.
112         NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
113                            PreHeader);
114       } else {
115         // Succ is LatchExit.
116         NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
117       }
118 
119       Value *V = PN.getIncomingValueForBlock(Latch);
120       if (Instruction *I = dyn_cast<Instruction>(V)) {
121         if (L->contains(I)) {
122           V = VMap.lookup(I);
123         }
124       }
125       // Adding a value to the new PHI node from the last prolog block
126       // that was created.
127       NewPN->addIncoming(V, PrologLatch);
128 
129       // Update the existing PHI node operand with the value from the
130       // new PHI node.  How this is done depends on if the existing
131       // PHI node is in the original loop block, or the exit block.
132       if (L->contains(&PN))
133         PN.setIncomingValueForBlock(NewPreHeader, NewPN);
134       else
135         PN.addIncoming(NewPN, PrologExit);
136     }
137   }
138 
139   // Make sure that created prolog loop is in simplified form
140   SmallVector<BasicBlock *, 4> PrologExitPreds;
141   Loop *PrologLoop = LI->getLoopFor(PrologLatch);
142   if (PrologLoop) {
143     for (BasicBlock *PredBB : predecessors(PrologExit))
144       if (PrologLoop->contains(PredBB))
145         PrologExitPreds.push_back(PredBB);
146 
147     SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
148                            nullptr, PreserveLCSSA);
149   }
150 
151   // Create a branch around the original loop, which is taken if there are no
152   // iterations remaining to be executed after running the prologue.
153   Instruction *InsertPt = PrologExit->getTerminator();
154   IRBuilder<> B(InsertPt);
155 
156   assert(Count != 0 && "nonsensical Count!");
157 
158   // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
159   // This means %xtraiter is (BECount + 1) and all of the iterations of this
160   // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
161   // then (BECount + 1) cannot unsigned-overflow.
162   Value *BrLoopExit =
163       B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
164   // Split the exit to maintain loop canonicalization guarantees
165   SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
166   SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
167                          nullptr, PreserveLCSSA);
168   // Add the branch to the exit block (around the unrolled loop)
169   B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
170   InsertPt->eraseFromParent();
171   if (DT) {
172     auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
173                                                   PrologExit);
174     DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
175   }
176 }
177 
178 /// Connect the unrolling epilog code to the original loop.
179 /// The unrolling epilog code contains code to execute the
180 /// 'extra' iterations if the run-time trip count modulo the
181 /// unroll count is non-zero.
182 ///
183 /// This function performs the following:
184 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
185 /// - Create PHI nodes at the unrolling loop exit to combine
186 ///   values that exit the unrolling loop code and jump around it.
187 /// - Update PHI operands in the epilog loop by the new PHI nodes
188 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
189 ///
190 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
191                           BasicBlock *Exit, BasicBlock *PreHeader,
192                           BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
193                           ValueToValueMapTy &VMap, DominatorTree *DT,
194                           LoopInfo *LI, bool PreserveLCSSA)  {
195   BasicBlock *Latch = L->getLoopLatch();
196   assert(Latch && "Loop must have a latch");
197   BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
198 
199   // Loop structure should be the following:
200   //
201   // PreHeader
202   // NewPreHeader
203   //   Header
204   //   ...
205   //   Latch
206   // NewExit (PN)
207   // EpilogPreHeader
208   //   EpilogHeader
209   //   ...
210   //   EpilogLatch
211   // Exit (EpilogPN)
212 
213   // Update PHI nodes at NewExit and Exit.
214   for (PHINode &PN : NewExit->phis()) {
215     // PN should be used in another PHI located in Exit block as
216     // Exit was split by SplitBlockPredecessors into Exit and NewExit
217     // Basicaly it should look like:
218     // NewExit:
219     //   PN = PHI [I, Latch]
220     // ...
221     // Exit:
222     //   EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
223     //
224     // Exits from non-latch blocks point to the original exit block and the
225     // epilogue edges have already been added.
226     //
227     // There is EpilogPreHeader incoming block instead of NewExit as
228     // NewExit was spilt 1 more time to get EpilogPreHeader.
229     assert(PN.hasOneUse() && "The phi should have 1 use");
230     PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
231     assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
232 
233     // Add incoming PreHeader from branch around the Loop
234     PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
235 
236     Value *V = PN.getIncomingValueForBlock(Latch);
237     Instruction *I = dyn_cast<Instruction>(V);
238     if (I && L->contains(I))
239       // If value comes from an instruction in the loop add VMap value.
240       V = VMap.lookup(I);
241     // For the instruction out of the loop, constant or undefined value
242     // insert value itself.
243     EpilogPN->addIncoming(V, EpilogLatch);
244 
245     assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
246           "EpilogPN should have EpilogPreHeader incoming block");
247     // Change EpilogPreHeader incoming block to NewExit.
248     EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
249                                NewExit);
250     // Now PHIs should look like:
251     // NewExit:
252     //   PN = PHI [I, Latch], [undef, PreHeader]
253     // ...
254     // Exit:
255     //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
256   }
257 
258   // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
259   // Update corresponding PHI nodes in epilog loop.
260   for (BasicBlock *Succ : successors(Latch)) {
261     // Skip this as we already updated phis in exit blocks.
262     if (!L->contains(Succ))
263       continue;
264     for (PHINode &PN : Succ->phis()) {
265       // Add new PHI nodes to the loop exit block and update epilog
266       // PHIs with the new PHI values.
267       PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
268                                        NewExit->getFirstNonPHI());
269       // Adding a value to the new PHI node from the unrolling loop preheader.
270       NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
271       // Adding a value to the new PHI node from the unrolling loop latch.
272       NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
273 
274       // Update the existing PHI node operand with the value from the new PHI
275       // node.  Corresponding instruction in epilog loop should be PHI.
276       PHINode *VPN = cast<PHINode>(VMap[&PN]);
277       VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
278     }
279   }
280 
281   Instruction *InsertPt = NewExit->getTerminator();
282   IRBuilder<> B(InsertPt);
283   Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
284   assert(Exit && "Loop must have a single exit block only");
285   // Split the epilogue exit to maintain loop canonicalization guarantees
286   SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
287   SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
288                          PreserveLCSSA);
289   // Add the branch to the exit block (around the unrolling loop)
290   B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
291   InsertPt->eraseFromParent();
292   if (DT) {
293     auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
294     DT->changeImmediateDominator(Exit, NewDom);
295   }
296 
297   // Split the main loop exit to maintain canonicalization guarantees.
298   SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
299   SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
300                          PreserveLCSSA);
301 }
302 
303 /// Create a clone of the blocks in a loop and connect them together. A new
304 /// loop will be created including all cloned blocks, and the iterator of the
305 /// new loop switched to count NewIter down to 0.
306 /// The cloned blocks should be inserted between InsertTop and InsertBot.
307 /// InsertTop should be new preheader, InsertBot new loop exit.
308 /// Returns the new cloned loop that is created.
309 static Loop *
310 CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
311                 const bool UnrollRemainder,
312                 BasicBlock *InsertTop,
313                 BasicBlock *InsertBot, BasicBlock *Preheader,
314                 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
315                 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
316   StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
317   BasicBlock *Header = L->getHeader();
318   BasicBlock *Latch = L->getLoopLatch();
319   Function *F = Header->getParent();
320   LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
321   LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
322   Loop *ParentLoop = L->getParentLoop();
323   NewLoopsMap NewLoops;
324   NewLoops[ParentLoop] = ParentLoop;
325 
326   // For each block in the original loop, create a new copy,
327   // and update the value map with the newly created values.
328   for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
329     BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
330     NewBlocks.push_back(NewBB);
331 
332     addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
333 
334     VMap[*BB] = NewBB;
335     if (Header == *BB) {
336       // For the first block, add a CFG connection to this newly
337       // created block.
338       InsertTop->getTerminator()->setSuccessor(0, NewBB);
339     }
340 
341     if (DT) {
342       if (Header == *BB) {
343         // The header is dominated by the preheader.
344         DT->addNewBlock(NewBB, InsertTop);
345       } else {
346         // Copy information from original loop to unrolled loop.
347         BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
348         DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
349       }
350     }
351 
352     if (Latch == *BB) {
353       // For the last block, create a loop back to cloned head.
354       VMap.erase((*BB)->getTerminator());
355       // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
356       // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
357       // thus we must compare the post-increment (wrapping) value.
358       BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
359       BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
360       IRBuilder<> Builder(LatchBR);
361       PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
362                                         suffix + ".iter",
363                                         FirstLoopBB->getFirstNonPHI());
364       auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
365       auto *One = ConstantInt::get(NewIdx->getType(), 1);
366       Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
367       Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
368       Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
369       NewIdx->addIncoming(Zero, InsertTop);
370       NewIdx->addIncoming(IdxNext, NewBB);
371       LatchBR->eraseFromParent();
372     }
373   }
374 
375   // Change the incoming values to the ones defined in the preheader or
376   // cloned loop.
377   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
378     PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
379     unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
380     NewPHI->setIncomingBlock(idx, InsertTop);
381     BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
382     idx = NewPHI->getBasicBlockIndex(Latch);
383     Value *InVal = NewPHI->getIncomingValue(idx);
384     NewPHI->setIncomingBlock(idx, NewLatch);
385     if (Value *V = VMap.lookup(InVal))
386       NewPHI->setIncomingValue(idx, V);
387   }
388 
389   Loop *NewLoop = NewLoops[L];
390   assert(NewLoop && "L should have been cloned");
391   MDNode *LoopID = NewLoop->getLoopID();
392 
393   // Only add loop metadata if the loop is not going to be completely
394   // unrolled.
395   if (UnrollRemainder)
396     return NewLoop;
397 
398   Optional<MDNode *> NewLoopID = makeFollowupLoopID(
399       LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
400   if (NewLoopID) {
401     NewLoop->setLoopID(NewLoopID.getValue());
402 
403     // Do not setLoopAlreadyUnrolled if loop attributes have been defined
404     // explicitly.
405     return NewLoop;
406   }
407 
408   // Add unroll disable metadata to disable future unrolling for this loop.
409   NewLoop->setLoopAlreadyUnrolled();
410   return NewLoop;
411 }
412 
413 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
414 /// we return true only if UnrollRuntimeMultiExit is set to true.
415 static bool canProfitablyUnrollMultiExitLoop(
416     Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
417     bool UseEpilogRemainder) {
418 
419   // Priority goes to UnrollRuntimeMultiExit if it's supplied.
420   if (UnrollRuntimeMultiExit.getNumOccurrences())
421     return UnrollRuntimeMultiExit;
422 
423   // The main pain point with multi-exit loop unrolling is that once unrolled,
424   // we will not be able to merge all blocks into a straight line code.
425   // There are branches within the unrolled loop that go to the OtherExits.
426   // The second point is the increase in code size, but this is true
427   // irrespective of multiple exits.
428 
429   // Note: Both the heuristics below are coarse grained. We are essentially
430   // enabling unrolling of loops that have a single side exit other than the
431   // normal LatchExit (i.e. exiting into a deoptimize block).
432   // The heuristics considered are:
433   // 1. low number of branches in the unrolled version.
434   // 2. high predictability of these extra branches.
435   // We avoid unrolling loops that have more than two exiting blocks. This
436   // limits the total number of branches in the unrolled loop to be atmost
437   // the unroll factor (since one of the exiting blocks is the latch block).
438   SmallVector<BasicBlock*, 4> ExitingBlocks;
439   L->getExitingBlocks(ExitingBlocks);
440   if (ExitingBlocks.size() > 2)
441     return false;
442 
443   // Allow unrolling of loops with no non latch exit blocks.
444   if (OtherExits.size() == 0)
445     return true;
446 
447   // The second heuristic is that L has one exit other than the latchexit and
448   // that exit is a deoptimize block. We know that deoptimize blocks are rarely
449   // taken, which also implies the branch leading to the deoptimize block is
450   // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
451   // assume the other exit branch is predictable even if it has no deoptimize
452   // call.
453   return (OtherExits.size() == 1 &&
454           (UnrollRuntimeOtherExitPredictable ||
455            OtherExits[0]->getTerminatingDeoptimizeCall()));
456   // TODO: These can be fine-tuned further to consider code size or deopt states
457   // that are captured by the deoptimize exit block.
458   // Also, we can extend this to support more cases, if we actually
459   // know of kinds of multiexit loops that would benefit from unrolling.
460 }
461 
462 // Assign the maximum possible trip count as the back edge weight for the
463 // remainder loop if the original loop comes with a branch weight.
464 static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
465                                                      Loop *RemainderLoop,
466                                                      uint64_t UnrollFactor) {
467   uint64_t TrueWeight, FalseWeight;
468   BranchInst *LatchBR =
469       cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
470   if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
471     return;
472   uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
473                             ? FalseWeight
474                             : TrueWeight;
475   assert(UnrollFactor > 1);
476   uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
477   BasicBlock *Header = RemainderLoop->getHeader();
478   BasicBlock *Latch = RemainderLoop->getLoopLatch();
479   auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
480   unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
481   MDBuilder MDB(RemainderLatchBR->getContext());
482   MDNode *WeightNode =
483     HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
484                 : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
485   RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
486 }
487 
488 /// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
489 /// accounting for the possibility of unsigned overflow in the 2s complement
490 /// domain. Preconditions:
491 /// 1) TripCount = BECount + 1 (allowing overflow)
492 /// 2) Log2(Count) <= BitWidth(BECount)
493 static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,
494                                   Value *TripCount, unsigned Count) {
495   // Note that TripCount is BECount + 1.
496   if (isPowerOf2_32(Count))
497     // If the expression is zero, then either:
498     //  1. There are no iterations to be run in the prolog/epilog loop.
499     // OR
500     //  2. The addition computing TripCount overflowed.
501     //
502     // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
503     // the number of iterations that remain to be run in the original loop is a
504     // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
505     // precondition of this method).
506     return B.CreateAnd(TripCount, Count - 1, "xtraiter");
507 
508   // As (BECount + 1) can potentially unsigned overflow we count
509   // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
510   Constant *CountC = ConstantInt::get(BECount->getType(), Count);
511   Value *ModValTmp = B.CreateURem(BECount, CountC);
512   Value *ModValAdd = B.CreateAdd(ModValTmp,
513                                  ConstantInt::get(ModValTmp->getType(), 1));
514   // At that point (BECount % Count) + 1 could be equal to Count.
515   // To handle this case we need to take mod by Count one more time.
516   return B.CreateURem(ModValAdd, CountC, "xtraiter");
517 }
518 
519 
520 /// Insert code in the prolog/epilog code when unrolling a loop with a
521 /// run-time trip-count.
522 ///
523 /// This method assumes that the loop unroll factor is total number
524 /// of loop bodies in the loop after unrolling. (Some folks refer
525 /// to the unroll factor as the number of *extra* copies added).
526 /// We assume also that the loop unroll factor is a power-of-two. So, after
527 /// unrolling the loop, the number of loop bodies executed is 2,
528 /// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
529 /// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
530 /// the switch instruction is generated.
531 ///
532 /// ***Prolog case***
533 ///        extraiters = tripcount % loopfactor
534 ///        if (extraiters == 0) jump Loop:
535 ///        else jump Prol:
536 /// Prol:  LoopBody;
537 ///        extraiters -= 1                 // Omitted if unroll factor is 2.
538 ///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
539 ///        if (tripcount < loopfactor) jump End:
540 /// Loop:
541 /// ...
542 /// End:
543 ///
544 /// ***Epilog case***
545 ///        extraiters = tripcount % loopfactor
546 ///        if (tripcount < loopfactor) jump LoopExit:
547 ///        unroll_iters = tripcount - extraiters
548 /// Loop:  LoopBody; (executes unroll_iter times);
549 ///        unroll_iter -= 1
550 ///        if (unroll_iter != 0) jump Loop:
551 /// LoopExit:
552 ///        if (extraiters == 0) jump EpilExit:
553 /// Epil:  LoopBody; (executes extraiters times)
554 ///        extraiters -= 1                 // Omitted if unroll factor is 2.
555 ///        if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
556 /// EpilExit:
557 
558 bool llvm::UnrollRuntimeLoopRemainder(
559     Loop *L, unsigned Count, bool AllowExpensiveTripCount,
560     bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
561     LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
562     const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
563   LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
564   LLVM_DEBUG(L->dump());
565   LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
566                                 : dbgs() << "Using prolog remainder.\n");
567 
568   // Make sure the loop is in canonical form.
569   if (!L->isLoopSimplifyForm()) {
570     LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
571     return false;
572   }
573 
574   // Guaranteed by LoopSimplifyForm.
575   BasicBlock *Latch = L->getLoopLatch();
576   BasicBlock *Header = L->getHeader();
577 
578   BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
579 
580   if (!LatchBR || LatchBR->isUnconditional()) {
581     // The loop-rotate pass can be helpful to avoid this in many cases.
582     LLVM_DEBUG(
583         dbgs()
584         << "Loop latch not terminated by a conditional branch.\n");
585     return false;
586   }
587 
588   unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
589   BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
590 
591   if (L->contains(LatchExit)) {
592     // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
593     // targets of the Latch be an exit block out of the loop.
594     LLVM_DEBUG(
595         dbgs()
596         << "One of the loop latch successors must be the exit block.\n");
597     return false;
598   }
599 
600   // These are exit blocks other than the target of the latch exiting block.
601   SmallVector<BasicBlock *, 4> OtherExits;
602   L->getUniqueNonLatchExitBlocks(OtherExits);
603   // Support only single exit and exiting block unless multi-exit loop
604   // unrolling is enabled.
605   if (!L->getExitingBlock() || OtherExits.size()) {
606     // We rely on LCSSA form being preserved when the exit blocks are transformed.
607     // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
608     if (!PreserveLCSSA)
609       return false;
610 
611     if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
612                                           UseEpilogRemainder)) {
613       LLVM_DEBUG(
614           dbgs()
615           << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
616              "enabled!\n");
617       return false;
618     }
619   }
620   // Use Scalar Evolution to compute the trip count. This allows more loops to
621   // be unrolled than relying on induction var simplification.
622   if (!SE)
623     return false;
624 
625   // Only unroll loops with a computable trip count.
626   // We calculate the backedge count by using getExitCount on the Latch block,
627   // which is proven to be the only exiting block in this loop. This is same as
628   // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
629   // exiting blocks).
630   const SCEV *BECountSC = SE->getExitCount(L, Latch);
631   if (isa<SCEVCouldNotCompute>(BECountSC)) {
632     LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
633     return false;
634   }
635 
636   unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
637 
638   // Add 1 since the backedge count doesn't include the first loop iteration.
639   // (Note that overflow can occur, this is handled explicitly below)
640   const SCEV *TripCountSC =
641       SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
642   if (isa<SCEVCouldNotCompute>(TripCountSC)) {
643     LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
644     return false;
645   }
646 
647   BasicBlock *PreHeader = L->getLoopPreheader();
648   BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
649   const DataLayout &DL = Header->getModule()->getDataLayout();
650   SCEVExpander Expander(*SE, DL, "loop-unroll");
651   if (!AllowExpensiveTripCount &&
652       Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
653                                    TTI, PreHeaderBR)) {
654     LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
655     return false;
656   }
657 
658   // This constraint lets us deal with an overflowing trip count easily; see the
659   // comment on ModVal below.
660   if (Log2_32(Count) > BEWidth) {
661     LLVM_DEBUG(
662         dbgs()
663         << "Count failed constraint on overflow trip count calculation.\n");
664     return false;
665   }
666 
667   // Loop structure is the following:
668   //
669   // PreHeader
670   //   Header
671   //   ...
672   //   Latch
673   // LatchExit
674 
675   BasicBlock *NewPreHeader;
676   BasicBlock *NewExit = nullptr;
677   BasicBlock *PrologExit = nullptr;
678   BasicBlock *EpilogPreHeader = nullptr;
679   BasicBlock *PrologPreHeader = nullptr;
680 
681   if (UseEpilogRemainder) {
682     // If epilog remainder
683     // Split PreHeader to insert a branch around loop for unrolling.
684     NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
685     NewPreHeader->setName(PreHeader->getName() + ".new");
686     // Split LatchExit to create phi nodes from branch above.
687     NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
688                                      nullptr, PreserveLCSSA);
689     // NewExit gets its DebugLoc from LatchExit, which is not part of the
690     // original Loop.
691     // Fix this by setting Loop's DebugLoc to NewExit.
692     auto *NewExitTerminator = NewExit->getTerminator();
693     NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
694     // Split NewExit to insert epilog remainder loop.
695     EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
696     EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
697 
698     // If the latch exits from multiple level of nested loops, then
699     // by assumption there must be another loop exit which branches to the
700     // outer loop and we must adjust the loop for the newly inserted blocks
701     // to account for the fact that our epilogue is still in the same outer
702     // loop. Note that this leaves loopinfo temporarily out of sync with the
703     // CFG until the actual epilogue loop is inserted.
704     if (auto *ParentL = L->getParentLoop())
705       if (LI->getLoopFor(LatchExit) != ParentL) {
706         LI->removeBlock(NewExit);
707         ParentL->addBasicBlockToLoop(NewExit, *LI);
708         LI->removeBlock(EpilogPreHeader);
709         ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
710       }
711 
712   } else {
713     // If prolog remainder
714     // Split the original preheader twice to insert prolog remainder loop
715     PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
716     PrologPreHeader->setName(Header->getName() + ".prol.preheader");
717     PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
718                             DT, LI);
719     PrologExit->setName(Header->getName() + ".prol.loopexit");
720     // Split PrologExit to get NewPreHeader.
721     NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
722     NewPreHeader->setName(PreHeader->getName() + ".new");
723   }
724   // Loop structure should be the following:
725   //  Epilog             Prolog
726   //
727   // PreHeader         PreHeader
728   // *NewPreHeader     *PrologPreHeader
729   //   Header          *PrologExit
730   //   ...             *NewPreHeader
731   //   Latch             Header
732   // *NewExit            ...
733   // *EpilogPreHeader    Latch
734   // LatchExit              LatchExit
735 
736   // Calculate conditions for branch around loop for unrolling
737   // in epilog case and around prolog remainder loop in prolog case.
738   // Compute the number of extra iterations required, which is:
739   //  extra iterations = run-time trip count % loop unroll factor
740   PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
741   IRBuilder<> B(PreHeaderBR);
742   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
743                                             PreHeaderBR);
744   Value *BECount;
745   // If there are other exits before the latch, that may cause the latch exit
746   // branch to never be executed, and the latch exit count may be poison.
747   // In this case, freeze the TripCount and base BECount on the frozen
748   // TripCount. We will introduce two branches using these values, and it's
749   // important that they see a consistent value (which would not be guaranteed
750   // if were frozen independently.)
751   if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&
752       !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {
753     TripCount = B.CreateFreeze(TripCount);
754     BECount =
755         B.CreateAdd(TripCount, ConstantInt::get(TripCount->getType(), -1));
756   } else {
757     // If we don't need to freeze, use SCEVExpander for BECount as well, to
758     // allow slightly better value reuse.
759     BECount =
760         Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);
761   }
762 
763   Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
764 
765   Value *BranchVal =
766       UseEpilogRemainder ? B.CreateICmpULT(BECount,
767                                            ConstantInt::get(BECount->getType(),
768                                                             Count - 1)) :
769                            B.CreateIsNotNull(ModVal, "lcmp.mod");
770   BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
771   BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
772   // Branch to either remainder (extra iterations) loop or unrolling loop.
773   B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
774   PreHeaderBR->eraseFromParent();
775   if (DT) {
776     if (UseEpilogRemainder)
777       DT->changeImmediateDominator(NewExit, PreHeader);
778     else
779       DT->changeImmediateDominator(PrologExit, PreHeader);
780   }
781   Function *F = Header->getParent();
782   // Get an ordered list of blocks in the loop to help with the ordering of the
783   // cloned blocks in the prolog/epilog code
784   LoopBlocksDFS LoopBlocks(L);
785   LoopBlocks.perform(LI);
786 
787   //
788   // For each extra loop iteration, create a copy of the loop's basic blocks
789   // and generate a condition that branches to the copy depending on the
790   // number of 'left over' iterations.
791   //
792   std::vector<BasicBlock *> NewBlocks;
793   ValueToValueMapTy VMap;
794 
795   // Clone all the basic blocks in the loop. If Count is 2, we don't clone
796   // the loop, otherwise we create a cloned loop to execute the extra
797   // iterations. This function adds the appropriate CFG connections.
798   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
799   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
800   Loop *remainderLoop = CloneLoopBlocks(
801       L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
802       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
803 
804   // Assign the maximum possible trip count as the back edge weight for the
805   // remainder loop if the original loop comes with a branch weight.
806   if (remainderLoop && !UnrollRemainder)
807     updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
808 
809   // Insert the cloned blocks into the function.
810   F->getBasicBlockList().splice(InsertBot->getIterator(),
811                                 F->getBasicBlockList(),
812                                 NewBlocks[0]->getIterator(),
813                                 F->end());
814 
815   // Now the loop blocks are cloned and the other exiting blocks from the
816   // remainder are connected to the original Loop's exit blocks. The remaining
817   // work is to update the phi nodes in the original loop, and take in the
818   // values from the cloned region.
819   for (auto *BB : OtherExits) {
820     // Given we preserve LCSSA form, we know that the values used outside the
821     // loop will be used through these phi nodes at the exit blocks that are
822     // transformed below.
823     for (PHINode &PN : BB->phis()) {
824      unsigned oldNumOperands = PN.getNumIncomingValues();
825      // Add the incoming values from the remainder code to the end of the phi
826      // node.
827      for (unsigned i = 0; i < oldNumOperands; i++){
828        auto *PredBB =PN.getIncomingBlock(i);
829        if (PredBB == Latch)
830          // The latch exit is handled seperately, see connectX
831          continue;
832        if (!L->contains(PredBB))
833          // Even if we had dedicated exits, the code above inserted an
834          // extra branch which can reach the latch exit.
835          continue;
836 
837        auto *V = PN.getIncomingValue(i);
838        if (Instruction *I = dyn_cast<Instruction>(V))
839          if (L->contains(I))
840            V = VMap.lookup(I);
841        PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
842      }
843    }
844 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
845     for (BasicBlock *SuccBB : successors(BB)) {
846       assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
847              "Breaks the definition of dedicated exits!");
848     }
849 #endif
850   }
851 
852   // Update the immediate dominator of the exit blocks and blocks that are
853   // reachable from the exit blocks. This is needed because we now have paths
854   // from both the original loop and the remainder code reaching the exit
855   // blocks. While the IDom of these exit blocks were from the original loop,
856   // now the IDom is the preheader (which decides whether the original loop or
857   // remainder code should run).
858   if (DT && !L->getExitingBlock()) {
859     SmallVector<BasicBlock *, 16> ChildrenToUpdate;
860     // NB! We have to examine the dom children of all loop blocks, not just
861     // those which are the IDom of the exit blocks. This is because blocks
862     // reachable from the exit blocks can have their IDom as the nearest common
863     // dominator of the exit blocks.
864     for (auto *BB : L->blocks()) {
865       auto *DomNodeBB = DT->getNode(BB);
866       for (auto *DomChild : DomNodeBB->children()) {
867         auto *DomChildBB = DomChild->getBlock();
868         if (!L->contains(LI->getLoopFor(DomChildBB)))
869           ChildrenToUpdate.push_back(DomChildBB);
870       }
871     }
872     for (auto *BB : ChildrenToUpdate)
873       DT->changeImmediateDominator(BB, PreHeader);
874   }
875 
876   // Loop structure should be the following:
877   //  Epilog             Prolog
878   //
879   // PreHeader         PreHeader
880   // NewPreHeader      PrologPreHeader
881   //   Header            PrologHeader
882   //   ...               ...
883   //   Latch             PrologLatch
884   // NewExit           PrologExit
885   // EpilogPreHeader   NewPreHeader
886   //   EpilogHeader      Header
887   //   ...               ...
888   //   EpilogLatch       Latch
889   // LatchExit              LatchExit
890 
891   // Rewrite the cloned instruction operands to use the values created when the
892   // clone is created.
893   for (BasicBlock *BB : NewBlocks) {
894     for (Instruction &I : *BB) {
895       RemapInstruction(&I, VMap,
896                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
897     }
898   }
899 
900   if (UseEpilogRemainder) {
901     // Connect the epilog code to the original loop and update the
902     // PHI functions.
903     ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
904                   EpilogPreHeader, NewPreHeader, VMap, DT, LI,
905                   PreserveLCSSA);
906 
907     // Update counter in loop for unrolling.
908     // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.
909     // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
910     // thus we must compare the post-increment (wrapping) value.
911     IRBuilder<> B2(NewPreHeader->getTerminator());
912     Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
913     BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
914     PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
915                                       Header->getFirstNonPHI());
916     B2.SetInsertPoint(LatchBR);
917     auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
918     auto *One = ConstantInt::get(NewIdx->getType(), 1);
919     Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
920     auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
921     Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
922     NewIdx->addIncoming(Zero, NewPreHeader);
923     NewIdx->addIncoming(IdxNext, Latch);
924     LatchBR->setCondition(IdxCmp);
925   } else {
926     // Connect the prolog code to the original loop and update the
927     // PHI functions.
928     ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
929                   NewPreHeader, VMap, DT, LI, PreserveLCSSA);
930   }
931 
932   // If this loop is nested, then the loop unroller changes the code in the any
933   // of its parent loops, so the Scalar Evolution pass needs to be run again.
934   SE->forgetTopmostLoop(L);
935 
936   // Verify that the Dom Tree and Loop Info are correct.
937 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
938   if (DT) {
939     assert(DT->verify(DominatorTree::VerificationLevel::Full));
940     LI->verify(*DT);
941   }
942 #endif
943 
944   // For unroll factor 2 remainder loop will have 1 iteration.
945   if (Count == 2 && DT && LI && SE) {
946     // TODO: This code could probably be pulled out into a helper function
947     // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
948     BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
949     assert(RemainderLatch);
950     SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
951                                              remainderLoop->getBlocks().end());
952     breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
953     remainderLoop = nullptr;
954 
955     // Simplify loop values after breaking the backedge
956     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
957     SmallVector<WeakTrackingVH, 16> DeadInsts;
958     for (BasicBlock *BB : RemainderBlocks) {
959       for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
960         if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
961           if (LI->replacementPreservesLCSSAForm(&Inst, V))
962             Inst.replaceAllUsesWith(V);
963         if (isInstructionTriviallyDead(&Inst))
964           DeadInsts.emplace_back(&Inst);
965       }
966       // We can't do recursive deletion until we're done iterating, as we might
967       // have a phi which (potentially indirectly) uses instructions later in
968       // the block we're iterating through.
969       RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
970     }
971 
972     // Merge latch into exit block.
973     auto *ExitBB = RemainderLatch->getSingleSuccessor();
974     assert(ExitBB && "required after breaking cond br backedge");
975     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
976     MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
977   }
978 
979   // Canonicalize to LoopSimplifyForm both original and remainder loops. We
980   // cannot rely on the LoopUnrollPass to do this because it only does
981   // canonicalization for parent/subloops and not the sibling loops.
982   if (OtherExits.size() > 0) {
983     // Generate dedicated exit blocks for the original loop, to preserve
984     // LoopSimplifyForm.
985     formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
986     // Generate dedicated exit blocks for the remainder loop if one exists, to
987     // preserve LoopSimplifyForm.
988     if (remainderLoop)
989       formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
990   }
991 
992   auto UnrollResult = LoopUnrollResult::Unmodified;
993   if (remainderLoop && UnrollRemainder) {
994     LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
995     UnrollResult =
996         UnrollLoop(remainderLoop,
997                    {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,
998                     /*AllowExpensiveTripCount*/ false,
999                     /*UnrollRemainder*/ false, ForgetAllSCEV},
1000                    LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
1001   }
1002 
1003   if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
1004     *ResultLoop = remainderLoop;
1005   NumRuntimeUnrolled++;
1006   return true;
1007 }
1008