1 //===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements some loop unrolling utilities for loops with run-time
10 // trip counts. See LoopUnroll.cpp for unrolling loops with compile-time
11 // trip counts.
12 //
13 // The functions in this file are used to generate extra code when the
14 // run-time trip count modulo the unroll factor is not 0. When this is the
15 // case, we need to generate code to execute these 'left over' iterations.
16 //
17 // The current strategy generates an if-then-else sequence prior to the
18 // unrolled loop to execute the 'left over' iterations before or after the
19 // unrolled loop.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/LoopIterator.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38 #include "llvm/Transforms/Utils/LoopUtils.h"
39 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
40 #include "llvm/Transforms/Utils/UnrollLoop.h"
41 #include <algorithm>
42
43 using namespace llvm;
44
45 #define DEBUG_TYPE "loop-unroll"
46
47 STATISTIC(NumRuntimeUnrolled,
48 "Number of loops unrolled with run-time trip counts");
49 static cl::opt<bool> UnrollRuntimeMultiExit(
50 "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,
51 cl::desc("Allow runtime unrolling for loops with multiple exits, when "
52 "epilog is generated"));
53 static cl::opt<bool> UnrollRuntimeOtherExitPredictable(
54 "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,
55 cl::desc("Assume the non latch exit block to be predictable"));
56
57 /// Connect the unrolling prolog code to the original loop.
58 /// The unrolling prolog code contains code to execute the
59 /// 'extra' iterations if the run-time trip count modulo the
60 /// unroll count is non-zero.
61 ///
62 /// This function performs the following:
63 /// - Create PHI nodes at prolog end block to combine values
64 /// that exit the prolog code and jump around the prolog.
65 /// - Add a PHI operand to a PHI node at the loop exit block
66 /// for values that exit the prolog and go around the loop.
67 /// - Branch around the original loop if the trip count is less
68 /// than the unroll factor.
69 ///
ConnectProlog(Loop * L,Value * BECount,unsigned Count,BasicBlock * PrologExit,BasicBlock * OriginalLoopLatchExit,BasicBlock * PreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)70 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
71 BasicBlock *PrologExit,
72 BasicBlock *OriginalLoopLatchExit,
73 BasicBlock *PreHeader, BasicBlock *NewPreHeader,
74 ValueToValueMapTy &VMap, DominatorTree *DT,
75 LoopInfo *LI, bool PreserveLCSSA) {
76 // Loop structure should be the following:
77 // Preheader
78 // PrologHeader
79 // ...
80 // PrologLatch
81 // PrologExit
82 // NewPreheader
83 // Header
84 // ...
85 // Latch
86 // LatchExit
87 BasicBlock *Latch = L->getLoopLatch();
88 assert(Latch && "Loop must have a latch");
89 BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);
90
91 // Create a PHI node for each outgoing value from the original loop
92 // (which means it is an outgoing value from the prolog code too).
93 // The new PHI node is inserted in the prolog end basic block.
94 // The new PHI node value is added as an operand of a PHI node in either
95 // the loop header or the loop exit block.
96 for (BasicBlock *Succ : successors(Latch)) {
97 for (PHINode &PN : Succ->phis()) {
98 // Add a new PHI node to the prolog end block and add the
99 // appropriate incoming values.
100 // TODO: This code assumes that the PrologExit (or the LatchExit block for
101 // prolog loop) contains only one predecessor from the loop, i.e. the
102 // PrologLatch. When supporting multiple-exiting block loops, we can have
103 // two or more blocks that have the LatchExit as the target in the
104 // original loop.
105 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
106 PrologExit->getFirstNonPHI());
107 // Adding a value to the new PHI node from the original loop preheader.
108 // This is the value that skips all the prolog code.
109 if (L->contains(&PN)) {
110 // Succ is loop header.
111 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),
112 PreHeader);
113 } else {
114 // Succ is LatchExit.
115 NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);
116 }
117
118 Value *V = PN.getIncomingValueForBlock(Latch);
119 if (Instruction *I = dyn_cast<Instruction>(V)) {
120 if (L->contains(I)) {
121 V = VMap.lookup(I);
122 }
123 }
124 // Adding a value to the new PHI node from the last prolog block
125 // that was created.
126 NewPN->addIncoming(V, PrologLatch);
127
128 // Update the existing PHI node operand with the value from the
129 // new PHI node. How this is done depends on if the existing
130 // PHI node is in the original loop block, or the exit block.
131 if (L->contains(&PN))
132 PN.setIncomingValueForBlock(NewPreHeader, NewPN);
133 else
134 PN.addIncoming(NewPN, PrologExit);
135 }
136 }
137
138 // Make sure that created prolog loop is in simplified form
139 SmallVector<BasicBlock *, 4> PrologExitPreds;
140 Loop *PrologLoop = LI->getLoopFor(PrologLatch);
141 if (PrologLoop) {
142 for (BasicBlock *PredBB : predecessors(PrologExit))
143 if (PrologLoop->contains(PredBB))
144 PrologExitPreds.push_back(PredBB);
145
146 SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
147 nullptr, PreserveLCSSA);
148 }
149
150 // Create a branch around the original loop, which is taken if there are no
151 // iterations remaining to be executed after running the prologue.
152 Instruction *InsertPt = PrologExit->getTerminator();
153 IRBuilder<> B(InsertPt);
154
155 assert(Count != 0 && "nonsensical Count!");
156
157 // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
158 // This means %xtraiter is (BECount + 1) and all of the iterations of this
159 // loop were executed by the prologue. Note that if BECount <u (Count - 1)
160 // then (BECount + 1) cannot unsigned-overflow.
161 Value *BrLoopExit =
162 B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
163 // Split the exit to maintain loop canonicalization guarantees
164 SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
165 SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
166 nullptr, PreserveLCSSA);
167 // Add the branch to the exit block (around the unrolled loop)
168 B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
169 InsertPt->eraseFromParent();
170 if (DT)
171 DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
172 }
173
174 /// Connect the unrolling epilog code to the original loop.
175 /// The unrolling epilog code contains code to execute the
176 /// 'extra' iterations if the run-time trip count modulo the
177 /// unroll count is non-zero.
178 ///
179 /// This function performs the following:
180 /// - Update PHI nodes at the unrolling loop exit and epilog loop exit
181 /// - Create PHI nodes at the unrolling loop exit to combine
182 /// values that exit the unrolling loop code and jump around it.
183 /// - Update PHI operands in the epilog loop by the new PHI nodes
184 /// - Branch around the epilog loop if extra iters (ModVal) is zero.
185 ///
ConnectEpilog(Loop * L,Value * ModVal,BasicBlock * NewExit,BasicBlock * Exit,BasicBlock * PreHeader,BasicBlock * EpilogPreHeader,BasicBlock * NewPreHeader,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI,bool PreserveLCSSA)186 static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
187 BasicBlock *Exit, BasicBlock *PreHeader,
188 BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
189 ValueToValueMapTy &VMap, DominatorTree *DT,
190 LoopInfo *LI, bool PreserveLCSSA) {
191 BasicBlock *Latch = L->getLoopLatch();
192 assert(Latch && "Loop must have a latch");
193 BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);
194
195 // Loop structure should be the following:
196 //
197 // PreHeader
198 // NewPreHeader
199 // Header
200 // ...
201 // Latch
202 // NewExit (PN)
203 // EpilogPreHeader
204 // EpilogHeader
205 // ...
206 // EpilogLatch
207 // Exit (EpilogPN)
208
209 // Update PHI nodes at NewExit and Exit.
210 for (PHINode &PN : NewExit->phis()) {
211 // PN should be used in another PHI located in Exit block as
212 // Exit was split by SplitBlockPredecessors into Exit and NewExit
213 // Basicaly it should look like:
214 // NewExit:
215 // PN = PHI [I, Latch]
216 // ...
217 // Exit:
218 // EpilogPN = PHI [PN, EpilogPreHeader]
219 //
220 // There is EpilogPreHeader incoming block instead of NewExit as
221 // NewExit was spilt 1 more time to get EpilogPreHeader.
222 assert(PN.hasOneUse() && "The phi should have 1 use");
223 PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());
224 assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");
225
226 // Add incoming PreHeader from branch around the Loop
227 PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);
228
229 Value *V = PN.getIncomingValueForBlock(Latch);
230 Instruction *I = dyn_cast<Instruction>(V);
231 if (I && L->contains(I))
232 // If value comes from an instruction in the loop add VMap value.
233 V = VMap.lookup(I);
234 // For the instruction out of the loop, constant or undefined value
235 // insert value itself.
236 EpilogPN->addIncoming(V, EpilogLatch);
237
238 assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
239 "EpilogPN should have EpilogPreHeader incoming block");
240 // Change EpilogPreHeader incoming block to NewExit.
241 EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
242 NewExit);
243 // Now PHIs should look like:
244 // NewExit:
245 // PN = PHI [I, Latch], [undef, PreHeader]
246 // ...
247 // Exit:
248 // EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
249 }
250
251 // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
252 // Update corresponding PHI nodes in epilog loop.
253 for (BasicBlock *Succ : successors(Latch)) {
254 // Skip this as we already updated phis in exit blocks.
255 if (!L->contains(Succ))
256 continue;
257 for (PHINode &PN : Succ->phis()) {
258 // Add new PHI nodes to the loop exit block and update epilog
259 // PHIs with the new PHI values.
260 PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",
261 NewExit->getFirstNonPHI());
262 // Adding a value to the new PHI node from the unrolling loop preheader.
263 NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);
264 // Adding a value to the new PHI node from the unrolling loop latch.
265 NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);
266
267 // Update the existing PHI node operand with the value from the new PHI
268 // node. Corresponding instruction in epilog loop should be PHI.
269 PHINode *VPN = cast<PHINode>(VMap[&PN]);
270 VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
271 }
272 }
273
274 Instruction *InsertPt = NewExit->getTerminator();
275 IRBuilder<> B(InsertPt);
276 Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
277 assert(Exit && "Loop must have a single exit block only");
278 // Split the epilogue exit to maintain loop canonicalization guarantees
279 SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
280 SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,
281 PreserveLCSSA);
282 // Add the branch to the exit block (around the unrolling loop)
283 B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
284 InsertPt->eraseFromParent();
285 if (DT)
286 DT->changeImmediateDominator(Exit, NewExit);
287
288 // Split the main loop exit to maintain canonicalization guarantees.
289 SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
290 SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,
291 PreserveLCSSA);
292 }
293
294 /// Create a clone of the blocks in a loop and connect them together.
295 /// If CreateRemainderLoop is false, loop structure will not be cloned,
296 /// otherwise a new loop will be created including all cloned blocks, and the
297 /// iterator of it switches to count NewIter down to 0.
298 /// The cloned blocks should be inserted between InsertTop and InsertBot.
299 /// If loop structure is cloned InsertTop should be new preheader, InsertBot
300 /// new loop exit.
301 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
302 static Loop *
CloneLoopBlocks(Loop * L,Value * NewIter,const bool CreateRemainderLoop,const bool UseEpilogRemainder,const bool UnrollRemainder,BasicBlock * InsertTop,BasicBlock * InsertBot,BasicBlock * Preheader,std::vector<BasicBlock * > & NewBlocks,LoopBlocksDFS & LoopBlocks,ValueToValueMapTy & VMap,DominatorTree * DT,LoopInfo * LI)303 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
304 const bool UseEpilogRemainder, const bool UnrollRemainder,
305 BasicBlock *InsertTop,
306 BasicBlock *InsertBot, BasicBlock *Preheader,
307 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
308 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
309 StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
310 BasicBlock *Header = L->getHeader();
311 BasicBlock *Latch = L->getLoopLatch();
312 Function *F = Header->getParent();
313 LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
314 LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
315 Loop *ParentLoop = L->getParentLoop();
316 NewLoopsMap NewLoops;
317 NewLoops[ParentLoop] = ParentLoop;
318 if (!CreateRemainderLoop)
319 NewLoops[L] = ParentLoop;
320
321 // For each block in the original loop, create a new copy,
322 // and update the value map with the newly created values.
323 for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
324 BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
325 NewBlocks.push_back(NewBB);
326
327 // If we're unrolling the outermost loop, there's no remainder loop,
328 // and this block isn't in a nested loop, then the new block is not
329 // in any loop. Otherwise, add it to loopinfo.
330 if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
331 addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
332
333 VMap[*BB] = NewBB;
334 if (Header == *BB) {
335 // For the first block, add a CFG connection to this newly
336 // created block.
337 InsertTop->getTerminator()->setSuccessor(0, NewBB);
338 }
339
340 if (DT) {
341 if (Header == *BB) {
342 // The header is dominated by the preheader.
343 DT->addNewBlock(NewBB, InsertTop);
344 } else {
345 // Copy information from original loop to unrolled loop.
346 BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
347 DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
348 }
349 }
350
351 if (Latch == *BB) {
352 // For the last block, if CreateRemainderLoop is false, create a direct
353 // jump to InsertBot. If not, create a loop back to cloned head.
354 VMap.erase((*BB)->getTerminator());
355 BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
356 BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
357 IRBuilder<> Builder(LatchBR);
358 if (!CreateRemainderLoop) {
359 Builder.CreateBr(InsertBot);
360 } else {
361 PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
362 suffix + ".iter",
363 FirstLoopBB->getFirstNonPHI());
364 Value *IdxSub =
365 Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
366 NewIdx->getName() + ".sub");
367 Value *IdxCmp =
368 Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
369 Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
370 NewIdx->addIncoming(NewIter, InsertTop);
371 NewIdx->addIncoming(IdxSub, NewBB);
372 }
373 LatchBR->eraseFromParent();
374 }
375 }
376
377 // Change the incoming values to the ones defined in the preheader or
378 // cloned loop.
379 for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
380 PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
381 if (!CreateRemainderLoop) {
382 if (UseEpilogRemainder) {
383 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
384 NewPHI->setIncomingBlock(idx, InsertTop);
385 NewPHI->removeIncomingValue(Latch, false);
386 } else {
387 VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
388 cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
389 }
390 } else {
391 unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
392 NewPHI->setIncomingBlock(idx, InsertTop);
393 BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
394 idx = NewPHI->getBasicBlockIndex(Latch);
395 Value *InVal = NewPHI->getIncomingValue(idx);
396 NewPHI->setIncomingBlock(idx, NewLatch);
397 if (Value *V = VMap.lookup(InVal))
398 NewPHI->setIncomingValue(idx, V);
399 }
400 }
401 if (CreateRemainderLoop) {
402 Loop *NewLoop = NewLoops[L];
403 assert(NewLoop && "L should have been cloned");
404 MDNode *LoopID = NewLoop->getLoopID();
405
406 // Only add loop metadata if the loop is not going to be completely
407 // unrolled.
408 if (UnrollRemainder)
409 return NewLoop;
410
411 Optional<MDNode *> NewLoopID = makeFollowupLoopID(
412 LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
413 if (NewLoopID.hasValue()) {
414 NewLoop->setLoopID(NewLoopID.getValue());
415
416 // Do not setLoopAlreadyUnrolled if loop attributes have been defined
417 // explicitly.
418 return NewLoop;
419 }
420
421 // Add unroll disable metadata to disable future unrolling for this loop.
422 NewLoop->setLoopAlreadyUnrolled();
423 return NewLoop;
424 }
425 else
426 return nullptr;
427 }
428
429 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
430 /// is populated with all the loop exit blocks other than the LatchExit block.
canSafelyUnrollMultiExitLoop(Loop * L,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)431 static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
432 bool PreserveLCSSA,
433 bool UseEpilogRemainder) {
434
435 // We currently have some correctness constrains in unrolling a multi-exit
436 // loop. Check for these below.
437
438 // We rely on LCSSA form being preserved when the exit blocks are transformed.
439 if (!PreserveLCSSA)
440 return false;
441
442 // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
443 // UnrollRuntimeMultiExit is true. This will need updating the logic in
444 // connectEpilog/connectProlog.
445 if (!LatchExit->getSinglePredecessor()) {
446 LLVM_DEBUG(
447 dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
448 "predecessor.\n");
449 return false;
450 }
451 // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
452 // and L is an inner loop. This is because in presence of multiple exits, the
453 // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
454 // outer loop. This is automatically handled in the prolog case, so we do not
455 // have that bug in prolog generation.
456 if (UseEpilogRemainder && L->getParentLoop())
457 return false;
458
459 // All constraints have been satisfied.
460 return true;
461 }
462
463 /// Returns true if we can profitably unroll the multi-exit loop L. Currently,
464 /// we return true only if UnrollRuntimeMultiExit is set to true.
canProfitablyUnrollMultiExitLoop(Loop * L,SmallVectorImpl<BasicBlock * > & OtherExits,BasicBlock * LatchExit,bool PreserveLCSSA,bool UseEpilogRemainder)465 static bool canProfitablyUnrollMultiExitLoop(
466 Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
467 bool PreserveLCSSA, bool UseEpilogRemainder) {
468
469 #if !defined(NDEBUG)
470 assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
471 UseEpilogRemainder) &&
472 "Should be safe to unroll before checking profitability!");
473 #endif
474
475 // Priority goes to UnrollRuntimeMultiExit if it's supplied.
476 if (UnrollRuntimeMultiExit.getNumOccurrences())
477 return UnrollRuntimeMultiExit;
478
479 // The main pain point with multi-exit loop unrolling is that once unrolled,
480 // we will not be able to merge all blocks into a straight line code.
481 // There are branches within the unrolled loop that go to the OtherExits.
482 // The second point is the increase in code size, but this is true
483 // irrespective of multiple exits.
484
485 // Note: Both the heuristics below are coarse grained. We are essentially
486 // enabling unrolling of loops that have a single side exit other than the
487 // normal LatchExit (i.e. exiting into a deoptimize block).
488 // The heuristics considered are:
489 // 1. low number of branches in the unrolled version.
490 // 2. high predictability of these extra branches.
491 // We avoid unrolling loops that have more than two exiting blocks. This
492 // limits the total number of branches in the unrolled loop to be atmost
493 // the unroll factor (since one of the exiting blocks is the latch block).
494 SmallVector<BasicBlock*, 4> ExitingBlocks;
495 L->getExitingBlocks(ExitingBlocks);
496 if (ExitingBlocks.size() > 2)
497 return false;
498
499 // Allow unrolling of loops with no non latch exit blocks.
500 if (OtherExits.size() == 0)
501 return true;
502
503 // The second heuristic is that L has one exit other than the latchexit and
504 // that exit is a deoptimize block. We know that deoptimize blocks are rarely
505 // taken, which also implies the branch leading to the deoptimize block is
506 // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we
507 // assume the other exit branch is predictable even if it has no deoptimize
508 // call.
509 return (OtherExits.size() == 1 &&
510 (UnrollRuntimeOtherExitPredictable ||
511 OtherExits[0]->getTerminatingDeoptimizeCall()));
512 // TODO: These can be fine-tuned further to consider code size or deopt states
513 // that are captured by the deoptimize exit block.
514 // Also, we can extend this to support more cases, if we actually
515 // know of kinds of multiexit loops that would benefit from unrolling.
516 }
517
518 // Assign the maximum possible trip count as the back edge weight for the
519 // remainder loop if the original loop comes with a branch weight.
updateLatchBranchWeightsForRemainderLoop(Loop * OrigLoop,Loop * RemainderLoop,uint64_t UnrollFactor)520 static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
521 Loop *RemainderLoop,
522 uint64_t UnrollFactor) {
523 uint64_t TrueWeight, FalseWeight;
524 BranchInst *LatchBR =
525 cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
526 if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
527 uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
528 ? FalseWeight
529 : TrueWeight;
530 assert(UnrollFactor > 1);
531 uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
532 BasicBlock *Header = RemainderLoop->getHeader();
533 BasicBlock *Latch = RemainderLoop->getLoopLatch();
534 auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
535 unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
536 MDBuilder MDB(RemainderLatchBR->getContext());
537 MDNode *WeightNode =
538 HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
539 : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
540 RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
541 }
542 }
543
544 /// Insert code in the prolog/epilog code when unrolling a loop with a
545 /// run-time trip-count.
546 ///
547 /// This method assumes that the loop unroll factor is total number
548 /// of loop bodies in the loop after unrolling. (Some folks refer
549 /// to the unroll factor as the number of *extra* copies added).
550 /// We assume also that the loop unroll factor is a power-of-two. So, after
551 /// unrolling the loop, the number of loop bodies executed is 2,
552 /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch
553 /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for
554 /// the switch instruction is generated.
555 ///
556 /// ***Prolog case***
557 /// extraiters = tripcount % loopfactor
558 /// if (extraiters == 0) jump Loop:
559 /// else jump Prol:
560 /// Prol: LoopBody;
561 /// extraiters -= 1 // Omitted if unroll factor is 2.
562 /// if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.
563 /// if (tripcount < loopfactor) jump End:
564 /// Loop:
565 /// ...
566 /// End:
567 ///
568 /// ***Epilog case***
569 /// extraiters = tripcount % loopfactor
570 /// if (tripcount < loopfactor) jump LoopExit:
571 /// unroll_iters = tripcount - extraiters
572 /// Loop: LoopBody; (executes unroll_iter times);
573 /// unroll_iter -= 1
574 /// if (unroll_iter != 0) jump Loop:
575 /// LoopExit:
576 /// if (extraiters == 0) jump EpilExit:
577 /// Epil: LoopBody; (executes extraiters times)
578 /// extraiters -= 1 // Omitted if unroll factor is 2.
579 /// if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.
580 /// EpilExit:
581
UnrollRuntimeLoopRemainder(Loop * L,unsigned Count,bool AllowExpensiveTripCount,bool UseEpilogRemainder,bool UnrollRemainder,bool ForgetAllSCEV,LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,AssumptionCache * AC,const TargetTransformInfo * TTI,bool PreserveLCSSA,Loop ** ResultLoop)582 bool llvm::UnrollRuntimeLoopRemainder(
583 Loop *L, unsigned Count, bool AllowExpensiveTripCount,
584 bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,
585 LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
586 const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {
587 LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
588 LLVM_DEBUG(L->dump());
589 LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
590 : dbgs() << "Using prolog remainder.\n");
591
592 // Make sure the loop is in canonical form.
593 if (!L->isLoopSimplifyForm()) {
594 LLVM_DEBUG(dbgs() << "Not in simplify form!\n");
595 return false;
596 }
597
598 // Guaranteed by LoopSimplifyForm.
599 BasicBlock *Latch = L->getLoopLatch();
600 BasicBlock *Header = L->getHeader();
601
602 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
603
604 if (!LatchBR || LatchBR->isUnconditional()) {
605 // The loop-rotate pass can be helpful to avoid this in many cases.
606 LLVM_DEBUG(
607 dbgs()
608 << "Loop latch not terminated by a conditional branch.\n");
609 return false;
610 }
611
612 unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
613 BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
614
615 if (L->contains(LatchExit)) {
616 // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
617 // targets of the Latch be an exit block out of the loop.
618 LLVM_DEBUG(
619 dbgs()
620 << "One of the loop latch successors must be the exit block.\n");
621 return false;
622 }
623
624 // These are exit blocks other than the target of the latch exiting block.
625 SmallVector<BasicBlock *, 4> OtherExits;
626 L->getUniqueNonLatchExitBlocks(OtherExits);
627 bool isMultiExitUnrollingEnabled =
628 canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
629 UseEpilogRemainder) &&
630 canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
631 UseEpilogRemainder);
632 // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
633 if (!isMultiExitUnrollingEnabled &&
634 (!L->getExitingBlock() || OtherExits.size())) {
635 LLVM_DEBUG(
636 dbgs()
637 << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
638 "enabled!\n");
639 return false;
640 }
641 // Use Scalar Evolution to compute the trip count. This allows more loops to
642 // be unrolled than relying on induction var simplification.
643 if (!SE)
644 return false;
645
646 // Only unroll loops with a computable trip count, and the trip count needs
647 // to be an int value (allowing a pointer type is a TODO item).
648 // We calculate the backedge count by using getExitCount on the Latch block,
649 // which is proven to be the only exiting block in this loop. This is same as
650 // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
651 // exiting blocks).
652 const SCEV *BECountSC = SE->getExitCount(L, Latch);
653 if (isa<SCEVCouldNotCompute>(BECountSC) ||
654 !BECountSC->getType()->isIntegerTy()) {
655 LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
656 return false;
657 }
658
659 unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
660
661 // Add 1 since the backedge count doesn't include the first loop iteration.
662 const SCEV *TripCountSC =
663 SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
664 if (isa<SCEVCouldNotCompute>(TripCountSC)) {
665 LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
666 return false;
667 }
668
669 BasicBlock *PreHeader = L->getLoopPreheader();
670 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
671 const DataLayout &DL = Header->getModule()->getDataLayout();
672 SCEVExpander Expander(*SE, DL, "loop-unroll");
673 if (!AllowExpensiveTripCount &&
674 Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,
675 TTI, PreHeaderBR)) {
676 LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
677 return false;
678 }
679
680 // This constraint lets us deal with an overflowing trip count easily; see the
681 // comment on ModVal below.
682 if (Log2_32(Count) > BEWidth) {
683 LLVM_DEBUG(
684 dbgs()
685 << "Count failed constraint on overflow trip count calculation.\n");
686 return false;
687 }
688
689 // Loop structure is the following:
690 //
691 // PreHeader
692 // Header
693 // ...
694 // Latch
695 // LatchExit
696
697 BasicBlock *NewPreHeader;
698 BasicBlock *NewExit = nullptr;
699 BasicBlock *PrologExit = nullptr;
700 BasicBlock *EpilogPreHeader = nullptr;
701 BasicBlock *PrologPreHeader = nullptr;
702
703 if (UseEpilogRemainder) {
704 // If epilog remainder
705 // Split PreHeader to insert a branch around loop for unrolling.
706 NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
707 NewPreHeader->setName(PreHeader->getName() + ".new");
708 // Split LatchExit to create phi nodes from branch above.
709 SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
710 NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
711 nullptr, PreserveLCSSA);
712 // NewExit gets its DebugLoc from LatchExit, which is not part of the
713 // original Loop.
714 // Fix this by setting Loop's DebugLoc to NewExit.
715 auto *NewExitTerminator = NewExit->getTerminator();
716 NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
717 // Split NewExit to insert epilog remainder loop.
718 EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
719 EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
720 } else {
721 // If prolog remainder
722 // Split the original preheader twice to insert prolog remainder loop
723 PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
724 PrologPreHeader->setName(Header->getName() + ".prol.preheader");
725 PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
726 DT, LI);
727 PrologExit->setName(Header->getName() + ".prol.loopexit");
728 // Split PrologExit to get NewPreHeader.
729 NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
730 NewPreHeader->setName(PreHeader->getName() + ".new");
731 }
732 // Loop structure should be the following:
733 // Epilog Prolog
734 //
735 // PreHeader PreHeader
736 // *NewPreHeader *PrologPreHeader
737 // Header *PrologExit
738 // ... *NewPreHeader
739 // Latch Header
740 // *NewExit ...
741 // *EpilogPreHeader Latch
742 // LatchExit LatchExit
743
744 // Calculate conditions for branch around loop for unrolling
745 // in epilog case and around prolog remainder loop in prolog case.
746 // Compute the number of extra iterations required, which is:
747 // extra iterations = run-time trip count % loop unroll factor
748 PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
749 Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
750 PreHeaderBR);
751 Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
752 PreHeaderBR);
753 IRBuilder<> B(PreHeaderBR);
754 Value *ModVal;
755 // Calculate ModVal = (BECount + 1) % Count.
756 // Note that TripCount is BECount + 1.
757 if (isPowerOf2_32(Count)) {
758 // When Count is power of 2 we don't BECount for epilog case, however we'll
759 // need it for a branch around unrolling loop for prolog case.
760 ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
761 // 1. There are no iterations to be run in the prolog/epilog loop.
762 // OR
763 // 2. The addition computing TripCount overflowed.
764 //
765 // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
766 // the number of iterations that remain to be run in the original loop is a
767 // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
768 // explicitly check this above).
769 } else {
770 // As (BECount + 1) can potentially unsigned overflow we count
771 // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
772 Value *ModValTmp = B.CreateURem(BECount,
773 ConstantInt::get(BECount->getType(),
774 Count));
775 Value *ModValAdd = B.CreateAdd(ModValTmp,
776 ConstantInt::get(ModValTmp->getType(), 1));
777 // At that point (BECount % Count) + 1 could be equal to Count.
778 // To handle this case we need to take mod by Count one more time.
779 ModVal = B.CreateURem(ModValAdd,
780 ConstantInt::get(BECount->getType(), Count),
781 "xtraiter");
782 }
783 Value *BranchVal =
784 UseEpilogRemainder ? B.CreateICmpULT(BECount,
785 ConstantInt::get(BECount->getType(),
786 Count - 1)) :
787 B.CreateIsNotNull(ModVal, "lcmp.mod");
788 BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
789 BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
790 // Branch to either remainder (extra iterations) loop or unrolling loop.
791 B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
792 PreHeaderBR->eraseFromParent();
793 if (DT) {
794 if (UseEpilogRemainder)
795 DT->changeImmediateDominator(NewExit, PreHeader);
796 else
797 DT->changeImmediateDominator(PrologExit, PreHeader);
798 }
799 Function *F = Header->getParent();
800 // Get an ordered list of blocks in the loop to help with the ordering of the
801 // cloned blocks in the prolog/epilog code
802 LoopBlocksDFS LoopBlocks(L);
803 LoopBlocks.perform(LI);
804
805 //
806 // For each extra loop iteration, create a copy of the loop's basic blocks
807 // and generate a condition that branches to the copy depending on the
808 // number of 'left over' iterations.
809 //
810 std::vector<BasicBlock *> NewBlocks;
811 ValueToValueMapTy VMap;
812
813 // For unroll factor 2 remainder loop will have 1 iterations.
814 // Do not create 1 iteration loop.
815 bool CreateRemainderLoop = (Count != 2);
816
817 // Clone all the basic blocks in the loop. If Count is 2, we don't clone
818 // the loop, otherwise we create a cloned loop to execute the extra
819 // iterations. This function adds the appropriate CFG connections.
820 BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
821 BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
822 Loop *remainderLoop = CloneLoopBlocks(
823 L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
824 InsertTop, InsertBot,
825 NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
826
827 // Assign the maximum possible trip count as the back edge weight for the
828 // remainder loop if the original loop comes with a branch weight.
829 if (remainderLoop && !UnrollRemainder)
830 updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
831
832 // Insert the cloned blocks into the function.
833 F->getBasicBlockList().splice(InsertBot->getIterator(),
834 F->getBasicBlockList(),
835 NewBlocks[0]->getIterator(),
836 F->end());
837
838 // Now the loop blocks are cloned and the other exiting blocks from the
839 // remainder are connected to the original Loop's exit blocks. The remaining
840 // work is to update the phi nodes in the original loop, and take in the
841 // values from the cloned region.
842 for (auto *BB : OtherExits) {
843 for (auto &II : *BB) {
844
845 // Given we preserve LCSSA form, we know that the values used outside the
846 // loop will be used through these phi nodes at the exit blocks that are
847 // transformed below.
848 if (!isa<PHINode>(II))
849 break;
850 PHINode *Phi = cast<PHINode>(&II);
851 unsigned oldNumOperands = Phi->getNumIncomingValues();
852 // Add the incoming values from the remainder code to the end of the phi
853 // node.
854 for (unsigned i =0; i < oldNumOperands; i++){
855 Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
856 // newVal can be a constant or derived from values outside the loop, and
857 // hence need not have a VMap value. Also, since lookup already generated
858 // a default "null" VMap entry for this value, we need to populate that
859 // VMap entry correctly, with the mapped entry being itself.
860 if (!newVal) {
861 newVal = Phi->getIncomingValue(i);
862 VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
863 }
864 Phi->addIncoming(newVal,
865 cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
866 }
867 }
868 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
869 for (BasicBlock *SuccBB : successors(BB)) {
870 assert(!(any_of(OtherExits,
871 [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
872 SuccBB == LatchExit) &&
873 "Breaks the definition of dedicated exits!");
874 }
875 #endif
876 }
877
878 // Update the immediate dominator of the exit blocks and blocks that are
879 // reachable from the exit blocks. This is needed because we now have paths
880 // from both the original loop and the remainder code reaching the exit
881 // blocks. While the IDom of these exit blocks were from the original loop,
882 // now the IDom is the preheader (which decides whether the original loop or
883 // remainder code should run).
884 if (DT && !L->getExitingBlock()) {
885 SmallVector<BasicBlock *, 16> ChildrenToUpdate;
886 // NB! We have to examine the dom children of all loop blocks, not just
887 // those which are the IDom of the exit blocks. This is because blocks
888 // reachable from the exit blocks can have their IDom as the nearest common
889 // dominator of the exit blocks.
890 for (auto *BB : L->blocks()) {
891 auto *DomNodeBB = DT->getNode(BB);
892 for (auto *DomChild : DomNodeBB->children()) {
893 auto *DomChildBB = DomChild->getBlock();
894 if (!L->contains(LI->getLoopFor(DomChildBB)))
895 ChildrenToUpdate.push_back(DomChildBB);
896 }
897 }
898 for (auto *BB : ChildrenToUpdate)
899 DT->changeImmediateDominator(BB, PreHeader);
900 }
901
902 // Loop structure should be the following:
903 // Epilog Prolog
904 //
905 // PreHeader PreHeader
906 // NewPreHeader PrologPreHeader
907 // Header PrologHeader
908 // ... ...
909 // Latch PrologLatch
910 // NewExit PrologExit
911 // EpilogPreHeader NewPreHeader
912 // EpilogHeader Header
913 // ... ...
914 // EpilogLatch Latch
915 // LatchExit LatchExit
916
917 // Rewrite the cloned instruction operands to use the values created when the
918 // clone is created.
919 for (BasicBlock *BB : NewBlocks) {
920 for (Instruction &I : *BB) {
921 RemapInstruction(&I, VMap,
922 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
923 }
924 }
925
926 if (UseEpilogRemainder) {
927 // Connect the epilog code to the original loop and update the
928 // PHI functions.
929 ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
930 EpilogPreHeader, NewPreHeader, VMap, DT, LI,
931 PreserveLCSSA);
932
933 // Update counter in loop for unrolling.
934 // I should be multiply of Count.
935 IRBuilder<> B2(NewPreHeader->getTerminator());
936 Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
937 BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
938 B2.SetInsertPoint(LatchBR);
939 PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
940 Header->getFirstNonPHI());
941 Value *IdxSub =
942 B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
943 NewIdx->getName() + ".nsub");
944 Value *IdxCmp;
945 if (LatchBR->getSuccessor(0) == Header)
946 IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
947 else
948 IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
949 NewIdx->addIncoming(TestVal, NewPreHeader);
950 NewIdx->addIncoming(IdxSub, Latch);
951 LatchBR->setCondition(IdxCmp);
952 } else {
953 // Connect the prolog code to the original loop and update the
954 // PHI functions.
955 ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
956 NewPreHeader, VMap, DT, LI, PreserveLCSSA);
957 }
958
959 // If this loop is nested, then the loop unroller changes the code in the any
960 // of its parent loops, so the Scalar Evolution pass needs to be run again.
961 SE->forgetTopmostLoop(L);
962
963 // Verify that the Dom Tree is correct.
964 #if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
965 if (DT)
966 assert(DT->verify(DominatorTree::VerificationLevel::Full));
967 #endif
968
969 // Canonicalize to LoopSimplifyForm both original and remainder loops. We
970 // cannot rely on the LoopUnrollPass to do this because it only does
971 // canonicalization for parent/subloops and not the sibling loops.
972 if (OtherExits.size() > 0) {
973 // Generate dedicated exit blocks for the original loop, to preserve
974 // LoopSimplifyForm.
975 formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
976 // Generate dedicated exit blocks for the remainder loop if one exists, to
977 // preserve LoopSimplifyForm.
978 if (remainderLoop)
979 formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
980 }
981
982 auto UnrollResult = LoopUnrollResult::Unmodified;
983 if (remainderLoop && UnrollRemainder) {
984 LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
985 UnrollResult =
986 UnrollLoop(remainderLoop,
987 {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
988 /*Force*/ false, /*AllowRuntime*/ false,
989 /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
990 /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
991 /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
992 LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);
993 }
994
995 if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
996 *ResultLoop = remainderLoop;
997 NumRuntimeUnrolled++;
998 return true;
999 }
1000