1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Scalar.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/Local.h"
78 #include "llvm/Transforms/Utils/LoopUtils.h"
79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
81 #include <cassert>
82 #include <cstdint>
83 #include <utility>
84
85 using namespace llvm;
86 using namespace PatternMatch;
87
88 #define DEBUG_TYPE "indvars"
89
90 STATISTIC(NumWidened , "Number of indvars widened");
91 STATISTIC(NumReplaced , "Number of exit values replaced");
92 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
95
96 // Trip count verification can be enabled by default under NDEBUG if we
97 // implement a strong expression equivalence checker in SCEV. Until then, we
98 // use the verify-indvars flag, which may assert in some cases.
99 static cl::opt<bool> VerifyIndvars(
100 "verify-indvars", cl::Hidden,
101 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
102 "effect in release builds. (Note: this adds additional SCEV "
103 "queries potentially changing the analysis result)"));
104
105 static cl::opt<ReplaceExitVal> ReplaceExitValue(
106 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
108 cl::values(
109 clEnumValN(NeverRepl, "never", "never replace exit value"),
110 clEnumValN(OnlyCheapRepl, "cheap",
111 "only replace exit value when the cost is cheap"),
112 clEnumValN(
113 UnusedIndVarInLoop, "unusedindvarinloop",
114 "only replace exit value when it is an unused "
115 "induction variable in the loop and has cheap replacement cost"),
116 clEnumValN(NoHardUse, "noharduse",
117 "only replace exit values when loop def likely dead"),
118 clEnumValN(AlwaysRepl, "always",
119 "always replace exit value whenever possible")));
120
121 static cl::opt<bool> UsePostIncrementRanges(
122 "indvars-post-increment-ranges", cl::Hidden,
123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124 cl::init(true));
125
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128 cl::desc("Disable Linear Function Test Replace optimization"));
129
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132 cl::desc("Predicate conditions in read only loops"));
133
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136 cl::desc("Allow widening of indvars to eliminate s/zext"));
137
138 namespace {
139
140 class IndVarSimplify {
141 LoopInfo *LI;
142 ScalarEvolution *SE;
143 DominatorTree *DT;
144 const DataLayout &DL;
145 TargetLibraryInfo *TLI;
146 const TargetTransformInfo *TTI;
147 std::unique_ptr<MemorySSAUpdater> MSSAU;
148
149 SmallVector<WeakTrackingVH, 16> DeadInsts;
150 bool WidenIndVars;
151
152 bool handleFloatingPointIV(Loop *L, PHINode *PH);
153 bool rewriteNonIntegerIVs(Loop *L);
154
155 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
156 /// Try to improve our exit conditions by converting condition from signed
157 /// to unsigned or rotating computation out of the loop.
158 /// (See inline comment about why this is duplicated from simplifyAndExtend)
159 bool canonicalizeExitCondition(Loop *L);
160 /// Try to eliminate loop exits based on analyzeable exit counts
161 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
162 /// Try to form loop invariant tests for loop exits by changing how many
163 /// iterations of the loop run when that is unobservable.
164 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
165
166 bool rewriteFirstIterationLoopExitValues(Loop *L);
167
168 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
169 const SCEV *ExitCount,
170 PHINode *IndVar, SCEVExpander &Rewriter);
171
172 bool sinkUnusedInvariants(Loop *L);
173
174 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,MemorySSA * MSSA,bool WidenIndVars)175 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
176 const DataLayout &DL, TargetLibraryInfo *TLI,
177 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
178 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
179 WidenIndVars(WidenIndVars) {
180 if (MSSA)
181 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
182 }
183
184 bool run(Loop *L);
185 };
186
187 } // end anonymous namespace
188
189 //===----------------------------------------------------------------------===//
190 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
191 //===----------------------------------------------------------------------===//
192
193 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)194 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
195 bool isExact = false;
196 // See if we can convert this to an int64_t
197 uint64_t UIntVal;
198 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
199 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
200 !isExact)
201 return false;
202 IntVal = UIntVal;
203 return true;
204 }
205
206 /// If the loop has floating induction variable then insert corresponding
207 /// integer induction variable if possible.
208 /// For example,
209 /// for(double i = 0; i < 10000; ++i)
210 /// bar(i)
211 /// is converted into
212 /// for(int i = 0; i < 10000; ++i)
213 /// bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)214 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
215 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
216 unsigned BackEdge = IncomingEdge^1;
217
218 // Check incoming value.
219 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
220
221 int64_t InitValue;
222 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
223 return false;
224
225 // Check IV increment. Reject this PN if increment operation is not
226 // an add or increment value can not be represented by an integer.
227 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
228 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
229
230 // If this is not an add of the PHI with a constantfp, or if the constant fp
231 // is not an integer, bail out.
232 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
233 int64_t IncValue;
234 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
235 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
236 return false;
237
238 // Check Incr uses. One user is PN and the other user is an exit condition
239 // used by the conditional terminator.
240 Value::user_iterator IncrUse = Incr->user_begin();
241 Instruction *U1 = cast<Instruction>(*IncrUse++);
242 if (IncrUse == Incr->user_end()) return false;
243 Instruction *U2 = cast<Instruction>(*IncrUse++);
244 if (IncrUse != Incr->user_end()) return false;
245
246 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
247 // only used by a branch, we can't transform it.
248 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
249 if (!Compare)
250 Compare = dyn_cast<FCmpInst>(U2);
251 if (!Compare || !Compare->hasOneUse() ||
252 !isa<BranchInst>(Compare->user_back()))
253 return false;
254
255 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
256
257 // We need to verify that the branch actually controls the iteration count
258 // of the loop. If not, the new IV can overflow and no one will notice.
259 // The branch block must be in the loop and one of the successors must be out
260 // of the loop.
261 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
262 if (!L->contains(TheBr->getParent()) ||
263 (L->contains(TheBr->getSuccessor(0)) &&
264 L->contains(TheBr->getSuccessor(1))))
265 return false;
266
267 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
268 // transform it.
269 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
270 int64_t ExitValue;
271 if (ExitValueVal == nullptr ||
272 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
273 return false;
274
275 // Find new predicate for integer comparison.
276 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
277 switch (Compare->getPredicate()) {
278 default: return false; // Unknown comparison.
279 case CmpInst::FCMP_OEQ:
280 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
281 case CmpInst::FCMP_ONE:
282 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
283 case CmpInst::FCMP_OGT:
284 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
285 case CmpInst::FCMP_OGE:
286 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
287 case CmpInst::FCMP_OLT:
288 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
289 case CmpInst::FCMP_OLE:
290 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
291 }
292
293 // We convert the floating point induction variable to a signed i32 value if
294 // we can. This is only safe if the comparison will not overflow in a way
295 // that won't be trapped by the integer equivalent operations. Check for this
296 // now.
297 // TODO: We could use i64 if it is native and the range requires it.
298
299 // The start/stride/exit values must all fit in signed i32.
300 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
301 return false;
302
303 // If not actually striding (add x, 0.0), avoid touching the code.
304 if (IncValue == 0)
305 return false;
306
307 // Positive and negative strides have different safety conditions.
308 if (IncValue > 0) {
309 // If we have a positive stride, we require the init to be less than the
310 // exit value.
311 if (InitValue >= ExitValue)
312 return false;
313
314 uint32_t Range = uint32_t(ExitValue-InitValue);
315 // Check for infinite loop, either:
316 // while (i <= Exit) or until (i > Exit)
317 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
318 if (++Range == 0) return false; // Range overflows.
319 }
320
321 unsigned Leftover = Range % uint32_t(IncValue);
322
323 // If this is an equality comparison, we require that the strided value
324 // exactly land on the exit value, otherwise the IV condition will wrap
325 // around and do things the fp IV wouldn't.
326 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
327 Leftover != 0)
328 return false;
329
330 // If the stride would wrap around the i32 before exiting, we can't
331 // transform the IV.
332 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
333 return false;
334 } else {
335 // If we have a negative stride, we require the init to be greater than the
336 // exit value.
337 if (InitValue <= ExitValue)
338 return false;
339
340 uint32_t Range = uint32_t(InitValue-ExitValue);
341 // Check for infinite loop, either:
342 // while (i >= Exit) or until (i < Exit)
343 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
344 if (++Range == 0) return false; // Range overflows.
345 }
346
347 unsigned Leftover = Range % uint32_t(-IncValue);
348
349 // If this is an equality comparison, we require that the strided value
350 // exactly land on the exit value, otherwise the IV condition will wrap
351 // around and do things the fp IV wouldn't.
352 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
353 Leftover != 0)
354 return false;
355
356 // If the stride would wrap around the i32 before exiting, we can't
357 // transform the IV.
358 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
359 return false;
360 }
361
362 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
363
364 // Insert new integer induction variable.
365 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
366 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
367 PN->getIncomingBlock(IncomingEdge));
368
369 Value *NewAdd =
370 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
371 Incr->getName()+".int", Incr);
372 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
373
374 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
375 ConstantInt::get(Int32Ty, ExitValue),
376 Compare->getName());
377
378 // In the following deletions, PN may become dead and may be deleted.
379 // Use a WeakTrackingVH to observe whether this happens.
380 WeakTrackingVH WeakPH = PN;
381
382 // Delete the old floating point exit comparison. The branch starts using the
383 // new comparison.
384 NewCompare->takeName(Compare);
385 Compare->replaceAllUsesWith(NewCompare);
386 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
387
388 // Delete the old floating point increment.
389 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
390 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
391
392 // If the FP induction variable still has uses, this is because something else
393 // in the loop uses its value. In order to canonicalize the induction
394 // variable, we chose to eliminate the IV and rewrite it in terms of an
395 // int->fp cast.
396 //
397 // We give preference to sitofp over uitofp because it is faster on most
398 // platforms.
399 if (WeakPH) {
400 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
401 &*PN->getParent()->getFirstInsertionPt());
402 PN->replaceAllUsesWith(Conv);
403 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404 }
405 return true;
406 }
407
rewriteNonIntegerIVs(Loop * L)408 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409 // First step. Check to see if there are any floating-point recurrences.
410 // If there are, change them into integer recurrences, permitting analysis by
411 // the SCEV routines.
412 BasicBlock *Header = L->getHeader();
413
414 SmallVector<WeakTrackingVH, 8> PHIs;
415 for (PHINode &PN : Header->phis())
416 PHIs.push_back(&PN);
417
418 bool Changed = false;
419 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
420 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
421 Changed |= handleFloatingPointIV(L, PN);
422
423 // If the loop previously had floating-point IV, ScalarEvolution
424 // may not have been able to compute a trip count. Now that we've done some
425 // re-writing, the trip count may be computable.
426 if (Changed)
427 SE->forgetLoop(L);
428 return Changed;
429 }
430
431 //===---------------------------------------------------------------------===//
432 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
433 // they will exit at the first iteration.
434 //===---------------------------------------------------------------------===//
435
436 /// Check to see if this loop has loop invariant conditions which lead to loop
437 /// exits. If so, we know that if the exit path is taken, it is at the first
438 /// loop iteration. This lets us predict exit values of PHI nodes that live in
439 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)440 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
441 // Verify the input to the pass is already in LCSSA form.
442 assert(L->isLCSSAForm(*DT));
443
444 SmallVector<BasicBlock *, 8> ExitBlocks;
445 L->getUniqueExitBlocks(ExitBlocks);
446
447 bool MadeAnyChanges = false;
448 for (auto *ExitBB : ExitBlocks) {
449 // If there are no more PHI nodes in this exit block, then no more
450 // values defined inside the loop are used on this path.
451 for (PHINode &PN : ExitBB->phis()) {
452 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
453 IncomingValIdx != E; ++IncomingValIdx) {
454 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
455
456 // Can we prove that the exit must run on the first iteration if it
457 // runs at all? (i.e. early exits are fine for our purposes, but
458 // traces which lead to this exit being taken on the 2nd iteration
459 // aren't.) Note that this is about whether the exit branch is
460 // executed, not about whether it is taken.
461 if (!L->getLoopLatch() ||
462 !DT->dominates(IncomingBB, L->getLoopLatch()))
463 continue;
464
465 // Get condition that leads to the exit path.
466 auto *TermInst = IncomingBB->getTerminator();
467
468 Value *Cond = nullptr;
469 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
470 // Must be a conditional branch, otherwise the block
471 // should not be in the loop.
472 Cond = BI->getCondition();
473 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
474 Cond = SI->getCondition();
475 else
476 continue;
477
478 if (!L->isLoopInvariant(Cond))
479 continue;
480
481 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
482
483 // Only deal with PHIs in the loop header.
484 if (!ExitVal || ExitVal->getParent() != L->getHeader())
485 continue;
486
487 // If ExitVal is a PHI on the loop header, then we know its
488 // value along this exit because the exit can only be taken
489 // on the first iteration.
490 auto *LoopPreheader = L->getLoopPreheader();
491 assert(LoopPreheader && "Invalid loop");
492 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
493 if (PreheaderIdx != -1) {
494 assert(ExitVal->getParent() == L->getHeader() &&
495 "ExitVal must be in loop header");
496 MadeAnyChanges = true;
497 PN.setIncomingValue(IncomingValIdx,
498 ExitVal->getIncomingValue(PreheaderIdx));
499 SE->forgetValue(&PN);
500 }
501 }
502 }
503 }
504 return MadeAnyChanges;
505 }
506
507 //===----------------------------------------------------------------------===//
508 // IV Widening - Extend the width of an IV to cover its widest uses.
509 //===----------------------------------------------------------------------===//
510
511 /// Update information about the induction variable that is extended by this
512 /// sign or zero extend operation. This is used to determine the final width of
513 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)514 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
515 ScalarEvolution *SE,
516 const TargetTransformInfo *TTI) {
517 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
518 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
519 return;
520
521 Type *Ty = Cast->getType();
522 uint64_t Width = SE->getTypeSizeInBits(Ty);
523 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
524 return;
525
526 // Check that `Cast` actually extends the induction variable (we rely on this
527 // later). This takes care of cases where `Cast` is extending a truncation of
528 // the narrow induction variable, and thus can end up being narrower than the
529 // "narrow" induction variable.
530 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
531 if (NarrowIVWidth >= Width)
532 return;
533
534 // Cast is either an sext or zext up to this point.
535 // We should not widen an indvar if arithmetics on the wider indvar are more
536 // expensive than those on the narrower indvar. We check only the cost of ADD
537 // because at least an ADD is required to increment the induction variable. We
538 // could compute more comprehensively the cost of all instructions on the
539 // induction variable when necessary.
540 if (TTI &&
541 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
542 TTI->getArithmeticInstrCost(Instruction::Add,
543 Cast->getOperand(0)->getType())) {
544 return;
545 }
546
547 if (!WI.WidestNativeType ||
548 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
549 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
550 WI.IsSigned = IsSigned;
551 return;
552 }
553
554 // We extend the IV to satisfy the sign of its user(s), or 'signed'
555 // if there are multiple users with both sign- and zero extensions,
556 // in order not to introduce nondeterministic behaviour based on the
557 // unspecified order of a PHI nodes' users-iterator.
558 WI.IsSigned |= IsSigned;
559 }
560
561 //===----------------------------------------------------------------------===//
562 // Live IV Reduction - Minimize IVs live across the loop.
563 //===----------------------------------------------------------------------===//
564
565 //===----------------------------------------------------------------------===//
566 // Simplification of IV users based on SCEV evaluation.
567 //===----------------------------------------------------------------------===//
568
569 namespace {
570
571 class IndVarSimplifyVisitor : public IVVisitor {
572 ScalarEvolution *SE;
573 const TargetTransformInfo *TTI;
574 PHINode *IVPhi;
575
576 public:
577 WideIVInfo WI;
578
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)579 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
580 const TargetTransformInfo *TTI,
581 const DominatorTree *DTree)
582 : SE(SCEV), TTI(TTI), IVPhi(IV) {
583 DT = DTree;
584 WI.NarrowIV = IVPhi;
585 }
586
587 // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)588 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
589 };
590
591 } // end anonymous namespace
592
593 /// Iteratively perform simplification on a worklist of IV users. Each
594 /// successive simplification may push more users which may themselves be
595 /// candidates for simplification.
596 ///
597 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)598 bool IndVarSimplify::simplifyAndExtend(Loop *L,
599 SCEVExpander &Rewriter,
600 LoopInfo *LI) {
601 SmallVector<WideIVInfo, 8> WideIVs;
602
603 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
604 Intrinsic::getName(Intrinsic::experimental_guard));
605 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
606
607 SmallVector<PHINode *, 8> LoopPhis;
608 for (PHINode &PN : L->getHeader()->phis())
609 LoopPhis.push_back(&PN);
610
611 // Each round of simplification iterates through the SimplifyIVUsers worklist
612 // for all current phis, then determines whether any IVs can be
613 // widened. Widening adds new phis to LoopPhis, inducing another round of
614 // simplification on the wide IVs.
615 bool Changed = false;
616 while (!LoopPhis.empty()) {
617 // Evaluate as many IV expressions as possible before widening any IVs. This
618 // forces SCEV to set no-wrap flags before evaluating sign/zero
619 // extension. The first time SCEV attempts to normalize sign/zero extension,
620 // the result becomes final. So for the most predictable results, we delay
621 // evaluation of sign/zero extend evaluation until needed, and avoid running
622 // other SCEV based analysis prior to simplifyAndExtend.
623 do {
624 PHINode *CurrIV = LoopPhis.pop_back_val();
625
626 // Information about sign/zero extensions of CurrIV.
627 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
628
629 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
630 &Visitor);
631
632 if (Visitor.WI.WidestNativeType) {
633 WideIVs.push_back(Visitor.WI);
634 }
635 } while(!LoopPhis.empty());
636
637 // Continue if we disallowed widening.
638 if (!WidenIndVars)
639 continue;
640
641 for (; !WideIVs.empty(); WideIVs.pop_back()) {
642 unsigned ElimExt;
643 unsigned Widened;
644 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645 DT, DeadInsts, ElimExt, Widened,
646 HasGuards, UsePostIncrementRanges)) {
647 NumElimExt += ElimExt;
648 NumWidened += Widened;
649 Changed = true;
650 LoopPhis.push_back(WidePhi);
651 }
652 }
653 }
654 return Changed;
655 }
656
657 //===----------------------------------------------------------------------===//
658 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659 //===----------------------------------------------------------------------===//
660
661 /// Given an Value which is hoped to be part of an add recurance in the given
662 /// loop, return the associated Phi node if so. Otherwise, return null. Note
663 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665 Instruction *IncI = dyn_cast<Instruction>(IncV);
666 if (!IncI)
667 return nullptr;
668
669 switch (IncI->getOpcode()) {
670 case Instruction::Add:
671 case Instruction::Sub:
672 break;
673 case Instruction::GetElementPtr:
674 // An IV counter must preserve its type.
675 if (IncI->getNumOperands() == 2)
676 break;
677 [[fallthrough]];
678 default:
679 return nullptr;
680 }
681
682 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683 if (Phi && Phi->getParent() == L->getHeader()) {
684 if (L->isLoopInvariant(IncI->getOperand(1)))
685 return Phi;
686 return nullptr;
687 }
688 if (IncI->getOpcode() == Instruction::GetElementPtr)
689 return nullptr;
690
691 // Allow add/sub to be commuted.
692 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693 if (Phi && Phi->getParent() == L->getHeader()) {
694 if (L->isLoopInvariant(IncI->getOperand(0)))
695 return Phi;
696 }
697 return nullptr;
698 }
699
700 /// Whether the current loop exit test is based on this value. Currently this
701 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705 // TODO: Allow non-icmp loop test.
706 if (!ICmp)
707 return false;
708
709 // TODO: Allow indirect use.
710 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711 }
712
713 /// linearFunctionTestReplace policy. Return true unless we can show that the
714 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716 assert(L->getLoopLatch() && "Must be in simplified form");
717
718 // Avoid converting a constant or loop invariant test back to a runtime
719 // test. This is critical for when SCEV's cached ExitCount is less precise
720 // than the current IR (such as after we've proven a particular exit is
721 // actually dead and thus the BE count never reaches our ExitCount.)
722 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723 if (L->isLoopInvariant(BI->getCondition()))
724 return false;
725
726 // Do LFTR to simplify the exit condition to an ICMP.
727 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728 if (!Cond)
729 return true;
730
731 // Do LFTR to simplify the exit ICMP to EQ/NE
732 ICmpInst::Predicate Pred = Cond->getPredicate();
733 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734 return true;
735
736 // Look for a loop invariant RHS
737 Value *LHS = Cond->getOperand(0);
738 Value *RHS = Cond->getOperand(1);
739 if (!L->isLoopInvariant(RHS)) {
740 if (!L->isLoopInvariant(LHS))
741 return true;
742 std::swap(LHS, RHS);
743 }
744 // Look for a simple IV counter LHS
745 PHINode *Phi = dyn_cast<PHINode>(LHS);
746 if (!Phi)
747 Phi = getLoopPhiForCounter(LHS, L);
748
749 if (!Phi)
750 return true;
751
752 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754 if (Idx < 0)
755 return true;
756
757 // Do LFTR if the exit condition's IV is *not* a simple counter.
758 Value *IncV = Phi->getIncomingValue(Idx);
759 return Phi != getLoopPhiForCounter(IncV, L);
760 }
761
762 /// Return true if undefined behavior would provable be executed on the path to
763 /// OnPathTo if Root produced a posion result. Note that this doesn't say
764 /// anything about whether OnPathTo is actually executed or whether Root is
765 /// actually poison. This can be used to assess whether a new use of Root can
766 /// be added at a location which is control equivalent with OnPathTo (such as
767 /// immediately before it) without introducing UB which didn't previously
768 /// exist. Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)769 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
770 Instruction *OnPathTo,
771 DominatorTree *DT) {
772 // Basic approach is to assume Root is poison, propagate poison forward
773 // through all users we can easily track, and then check whether any of those
774 // users are provable UB and must execute before out exiting block might
775 // exit.
776
777 // The set of all recursive users we've visited (which are assumed to all be
778 // poison because of said visit)
779 SmallSet<const Value *, 16> KnownPoison;
780 SmallVector<const Instruction*, 16> Worklist;
781 Worklist.push_back(Root);
782 while (!Worklist.empty()) {
783 const Instruction *I = Worklist.pop_back_val();
784
785 // If we know this must trigger UB on a path leading our target.
786 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
787 return true;
788
789 // If we can't analyze propagation through this instruction, just skip it
790 // and transitive users. Safe as false is a conservative result.
791 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
792 return KnownPoison.contains(U) && propagatesPoison(U);
793 }))
794 continue;
795
796 if (KnownPoison.insert(I).second)
797 for (const User *User : I->users())
798 Worklist.push_back(cast<Instruction>(User));
799 }
800
801 // Might be non-UB, or might have a path we couldn't prove must execute on
802 // way to exiting bb.
803 return false;
804 }
805
806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807 /// down to checking that all operands are constant and listing instructions
808 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810 unsigned Depth) {
811 if (isa<Constant>(V))
812 return !isa<UndefValue>(V);
813
814 if (Depth >= 6)
815 return false;
816
817 // Conservatively handle non-constant non-instructions. For example, Arguments
818 // may be undef.
819 Instruction *I = dyn_cast<Instruction>(V);
820 if (!I)
821 return false;
822
823 // Load and return values may be undef.
824 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825 return false;
826
827 // Optimistically handle other instructions.
828 for (Value *Op : I->operands()) {
829 if (!Visited.insert(Op).second)
830 continue;
831 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832 return false;
833 }
834 return true;
835 }
836
837 /// Return true if the given value is concrete. We must prove that undef can
838 /// never reach it.
839 ///
840 /// TODO: If we decide that this is a good approach to checking for undef, we
841 /// may factor it into a common location.
hasConcreteDef(Value * V)842 static bool hasConcreteDef(Value *V) {
843 SmallPtrSet<Value*, 8> Visited;
844 Visited.insert(V);
845 return hasConcreteDefImpl(V, Visited, 0);
846 }
847
848 /// Return true if this IV has any uses other than the (soon to be rewritten)
849 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852 Value *IncV = Phi->getIncomingValue(LatchIdx);
853
854 for (User *U : Phi->users())
855 if (U != Cond && U != IncV) return false;
856
857 for (User *U : IncV->users())
858 if (U != Cond && U != Phi) return false;
859 return true;
860 }
861
862 /// Return true if the given phi is a "counter" in L. A counter is an
863 /// add recurance (of integer or pointer type) with an arbitrary start, and a
864 /// step of 1. Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)865 static bool isLoopCounter(PHINode* Phi, Loop *L,
866 ScalarEvolution *SE) {
867 assert(Phi->getParent() == L->getHeader());
868 assert(L->getLoopLatch());
869
870 if (!SE->isSCEVable(Phi->getType()))
871 return false;
872
873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874 if (!AR || AR->getLoop() != L || !AR->isAffine())
875 return false;
876
877 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878 if (!Step || !Step->isOne())
879 return false;
880
881 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882 Value *IncV = Phi->getIncomingValue(LatchIdx);
883 return (getLoopPhiForCounter(IncV, L) == Phi &&
884 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885 }
886
887 /// Search the loop header for a loop counter (anadd rec w/step of one)
888 /// suitable for use by LFTR. If multiple counters are available, select the
889 /// "best" one based profitable heuristics.
890 ///
891 /// BECount may be an i8* pointer type. The pointer difference is already
892 /// valid count without scaling the address stride, so it remains a pointer
893 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895 const SCEV *BECount,
896 ScalarEvolution *SE, DominatorTree *DT) {
897 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898
899 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900
901 // Loop over all of the PHI nodes, looking for a simple counter.
902 PHINode *BestPhi = nullptr;
903 const SCEV *BestInit = nullptr;
904 BasicBlock *LatchBlock = L->getLoopLatch();
905 assert(LatchBlock && "Must be in simplified form");
906 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907
908 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909 PHINode *Phi = cast<PHINode>(I);
910 if (!isLoopCounter(Phi, L, SE))
911 continue;
912
913 // Avoid comparing an integer IV against a pointer Limit.
914 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915 continue;
916
917 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918
919 // AR may be a pointer type, while BECount is an integer type.
920 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921 // AR may not be a narrower type, or we may never exit.
922 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924 continue;
925
926 // Avoid reusing a potentially undef value to compute other values that may
927 // have originally had a concrete definition.
928 if (!hasConcreteDef(Phi)) {
929 // We explicitly allow unknown phis as long as they are already used by
930 // the loop exit test. This is legal since performing LFTR could not
931 // increase the number of undef users.
932 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935 continue;
936 }
937
938 // Avoid introducing undefined behavior due to poison which didn't exist in
939 // the original program. (Annoyingly, the rules for poison and undef
940 // propagation are distinct, so this does NOT cover the undef case above.)
941 // We have to ensure that we don't introduce UB by introducing a use on an
942 // iteration where said IV produces poison. Our strategy here differs for
943 // pointers and integer IVs. For integers, we strip and reinfer as needed,
944 // see code in linearFunctionTestReplace. For pointers, we restrict
945 // transforms as there is no good way to reinfer inbounds once lost.
946 if (!Phi->getType()->isIntegerTy() &&
947 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948 continue;
949
950 const SCEV *Init = AR->getStart();
951
952 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953 // Don't force a live loop counter if another IV can be used.
954 if (AlmostDeadIV(Phi, LatchBlock, Cond))
955 continue;
956
957 // Prefer to count-from-zero. This is a more "canonical" counter form. It
958 // also prefers integer to pointer IVs.
959 if (BestInit->isZero() != Init->isZero()) {
960 if (BestInit->isZero())
961 continue;
962 }
963 // If two IVs both count from zero or both count from nonzero then the
964 // narrower is likely a dead phi that has been widened. Use the wider phi
965 // to allow the other to be eliminated.
966 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967 continue;
968 }
969 BestPhi = Phi;
970 BestInit = Init;
971 }
972 return BestPhi;
973 }
974
975 /// Insert an IR expression which computes the value held by the IV IndVar
976 /// (which must be an loop counter w/unit stride) after the backedge of loop L
977 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979 const SCEV *ExitCount, bool UsePostInc, Loop *L,
980 SCEVExpander &Rewriter, ScalarEvolution *SE) {
981 assert(isLoopCounter(IndVar, L, SE));
982 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983 const SCEV *IVInit = AR->getStart();
984 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
985
986 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
987 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
988 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
989 // the existing GEPs whenever possible.
990 if (IndVar->getType()->isPointerTy() &&
991 !ExitCount->getType()->isPointerTy()) {
992 // IVOffset will be the new GEP offset that is interpreted by GEP as a
993 // signed value. ExitCount on the other hand represents the loop trip count,
994 // which is an unsigned value. FindLoopCounter only allows induction
995 // variables that have a positive unit stride of one. This means we don't
996 // have to handle the case of negative offsets (yet) and just need to zero
997 // extend ExitCount.
998 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
999 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
1000 if (UsePostInc)
1001 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1002
1003 // Expand the code for the iteration count.
1004 assert(SE->isLoopInvariant(IVOffset, L) &&
1005 "Computed iteration count is not loop invariant!");
1006
1007 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1008 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1010 } else {
1011 // In any other case, convert both IVInit and ExitCount to integers before
1012 // comparing. This may result in SCEV expansion of pointers, but in practice
1013 // SCEV will fold the pointer arithmetic away as such:
1014 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1015 //
1016 // Valid Cases: (1) both integers is most common; (2) both may be pointers
1017 // for simple memset-style loops.
1018 //
1019 // IVInit integer and ExitCount pointer would only occur if a canonical IV
1020 // were generated on top of case #2, which is not expected.
1021
1022 // For unit stride, IVCount = Start + ExitCount with 2's complement
1023 // overflow.
1024
1025 // For integer IVs, truncate the IV before computing IVInit + BECount,
1026 // unless we know apriori that the limit must be a constant when evaluated
1027 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1028 // of the IV in the loop over a (potentially) expensive expansion of the
1029 // widened exit count add(zext(add)) expression.
1030 if (SE->getTypeSizeInBits(IVInit->getType())
1031 > SE->getTypeSizeInBits(ExitCount->getType())) {
1032 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1033 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1034 else
1035 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1036 }
1037
1038 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1039
1040 if (UsePostInc)
1041 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1042
1043 // Expand the code for the iteration count.
1044 assert(SE->isLoopInvariant(IVLimit, L) &&
1045 "Computed iteration count is not loop invariant!");
1046 // Ensure that we generate the same type as IndVar, or a smaller integer
1047 // type. In the presence of null pointer values, we have an integer type
1048 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1049 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1050 IndVar->getType() : ExitCount->getType();
1051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1052 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1053 }
1054 }
1055
1056 /// This method rewrites the exit condition of the loop to be a canonical !=
1057 /// comparison against the incremented loop induction variable. This pass is
1058 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1059 /// determine a loop-invariant trip count of the loop, which is actually a much
1060 /// broader range than just linear tests.
1061 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)1062 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1063 const SCEV *ExitCount,
1064 PHINode *IndVar, SCEVExpander &Rewriter) {
1065 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1066 assert(isLoopCounter(IndVar, L, SE));
1067 Instruction * const IncVar =
1068 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1069
1070 // Initialize CmpIndVar to the preincremented IV.
1071 Value *CmpIndVar = IndVar;
1072 bool UsePostInc = false;
1073
1074 // If the exiting block is the same as the backedge block, we prefer to
1075 // compare against the post-incremented value, otherwise we must compare
1076 // against the preincremented value.
1077 if (ExitingBB == L->getLoopLatch()) {
1078 // For pointer IVs, we chose to not strip inbounds which requires us not
1079 // to add a potentially UB introducing use. We need to either a) show
1080 // the loop test we're modifying is already in post-inc form, or b) show
1081 // that adding a use must not introduce UB.
1082 bool SafeToPostInc =
1083 IndVar->getType()->isIntegerTy() ||
1084 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1085 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1086 if (SafeToPostInc) {
1087 UsePostInc = true;
1088 CmpIndVar = IncVar;
1089 }
1090 }
1091
1092 // It may be necessary to drop nowrap flags on the incrementing instruction
1093 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1094 // case the increment might have previously been poison on the last iteration
1095 // only) or if LFTR switches to a different IV that was previously dynamically
1096 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1097 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1098 // check), because the pre-inc addrec flags may be adopted from the original
1099 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1100 // TODO: This handling is inaccurate for one case: If we switch to a
1101 // dynamically dead IV that wraps on the first loop iteration only, which is
1102 // not covered by the post-inc addrec. (If the new IV was not dynamically
1103 // dead, it could not be poison on the first iteration in the first place.)
1104 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1105 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1106 if (BO->hasNoUnsignedWrap())
1107 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1108 if (BO->hasNoSignedWrap())
1109 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1110 }
1111
1112 Value *ExitCnt = genLoopLimit(
1113 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1114 assert(ExitCnt->getType()->isPointerTy() ==
1115 IndVar->getType()->isPointerTy() &&
1116 "genLoopLimit missed a cast");
1117
1118 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1119 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1120 ICmpInst::Predicate P;
1121 if (L->contains(BI->getSuccessor(0)))
1122 P = ICmpInst::ICMP_NE;
1123 else
1124 P = ICmpInst::ICMP_EQ;
1125
1126 IRBuilder<> Builder(BI);
1127
1128 // The new loop exit condition should reuse the debug location of the
1129 // original loop exit condition.
1130 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1131 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1132
1133 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1134 // avoid the expensive expansion of the limit expression in the wider type,
1135 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1136 // since we know (from the exit count bitwidth), that we can't self-wrap in
1137 // the narrower type.
1138 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1139 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1140 if (CmpIndVarSize > ExitCntSize) {
1141 assert(!CmpIndVar->getType()->isPointerTy() &&
1142 !ExitCnt->getType()->isPointerTy());
1143
1144 // Before resorting to actually inserting the truncate, use the same
1145 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1146 // the other side of the comparison instead. We still evaluate the limit
1147 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1148 // a truncate within in.
1149 bool Extended = false;
1150 const SCEV *IV = SE->getSCEV(CmpIndVar);
1151 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1152 ExitCnt->getType());
1153 const SCEV *ZExtTrunc =
1154 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1155
1156 if (ZExtTrunc == IV) {
1157 Extended = true;
1158 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1159 "wide.trip.count");
1160 } else {
1161 const SCEV *SExtTrunc =
1162 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1163 if (SExtTrunc == IV) {
1164 Extended = true;
1165 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1166 "wide.trip.count");
1167 }
1168 }
1169
1170 if (Extended) {
1171 bool Discard;
1172 L->makeLoopInvariant(ExitCnt, Discard);
1173 } else
1174 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1175 "lftr.wideiv");
1176 }
1177 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178 << " LHS:" << *CmpIndVar << '\n'
1179 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180 << "\n"
1181 << " RHS:\t" << *ExitCnt << "\n"
1182 << "ExitCount:\t" << *ExitCount << "\n"
1183 << " was: " << *BI->getCondition() << "\n");
1184
1185 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1186 Value *OrigCond = BI->getCondition();
1187 // It's tempting to use replaceAllUsesWith here to fully replace the old
1188 // comparison, but that's not immediately safe, since users of the old
1189 // comparison may not be dominated by the new comparison. Instead, just
1190 // update the branch to use the new comparison; in the common case this
1191 // will make old comparison dead.
1192 BI->setCondition(Cond);
1193 DeadInsts.emplace_back(OrigCond);
1194
1195 ++NumLFTR;
1196 return true;
1197 }
1198
1199 //===----------------------------------------------------------------------===//
1200 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201 //===----------------------------------------------------------------------===//
1202
1203 /// If there's a single exit block, sink any loop-invariant values that
1204 /// were defined in the preheader but not used inside the loop into the
1205 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)1206 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207 BasicBlock *ExitBlock = L->getExitBlock();
1208 if (!ExitBlock) return false;
1209
1210 BasicBlock *Preheader = L->getLoopPreheader();
1211 if (!Preheader) return false;
1212
1213 bool MadeAnyChanges = false;
1214 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1215 BasicBlock::iterator I(Preheader->getTerminator());
1216 while (I != Preheader->begin()) {
1217 --I;
1218 // New instructions were inserted at the end of the preheader.
1219 if (isa<PHINode>(I))
1220 break;
1221
1222 // Don't move instructions which might have side effects, since the side
1223 // effects need to complete before instructions inside the loop. Also don't
1224 // move instructions which might read memory, since the loop may modify
1225 // memory. Note that it's okay if the instruction might have undefined
1226 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1227 // block.
1228 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1229 continue;
1230
1231 // Skip debug info intrinsics.
1232 if (isa<DbgInfoIntrinsic>(I))
1233 continue;
1234
1235 // Skip eh pad instructions.
1236 if (I->isEHPad())
1237 continue;
1238
1239 // Don't sink alloca: we never want to sink static alloca's out of the
1240 // entry block, and correctly sinking dynamic alloca's requires
1241 // checks for stacksave/stackrestore intrinsics.
1242 // FIXME: Refactor this check somehow?
1243 if (isa<AllocaInst>(I))
1244 continue;
1245
1246 // Determine if there is a use in or before the loop (direct or
1247 // otherwise).
1248 bool UsedInLoop = false;
1249 for (Use &U : I->uses()) {
1250 Instruction *User = cast<Instruction>(U.getUser());
1251 BasicBlock *UseBB = User->getParent();
1252 if (PHINode *P = dyn_cast<PHINode>(User)) {
1253 unsigned i =
1254 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1255 UseBB = P->getIncomingBlock(i);
1256 }
1257 if (UseBB == Preheader || L->contains(UseBB)) {
1258 UsedInLoop = true;
1259 break;
1260 }
1261 }
1262
1263 // If there is, the def must remain in the preheader.
1264 if (UsedInLoop)
1265 continue;
1266
1267 // Otherwise, sink it to the exit block.
1268 Instruction *ToMove = &*I;
1269 bool Done = false;
1270
1271 if (I != Preheader->begin()) {
1272 // Skip debug info intrinsics.
1273 do {
1274 --I;
1275 } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1276
1277 if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1278 Done = true;
1279 } else {
1280 Done = true;
1281 }
1282
1283 MadeAnyChanges = true;
1284 ToMove->moveBefore(*ExitBlock, InsertPt);
1285 SE->forgetValue(ToMove);
1286 if (Done) break;
1287 InsertPt = ToMove->getIterator();
1288 }
1289
1290 return MadeAnyChanges;
1291 }
1292
replaceExitCond(BranchInst * BI,Value * NewCond,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1293 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1294 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1295 auto *OldCond = BI->getCondition();
1296 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1297 << " with " << *NewCond << "\n");
1298 BI->setCondition(NewCond);
1299 if (OldCond->use_empty())
1300 DeadInsts.emplace_back(OldCond);
1301 }
1302
createFoldedExitCond(const Loop * L,BasicBlock * ExitingBB,bool IsTaken)1303 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1304 bool IsTaken) {
1305 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1306 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1307 auto *OldCond = BI->getCondition();
1308 return ConstantInt::get(OldCond->getType(),
1309 IsTaken ? ExitIfTrue : !ExitIfTrue);
1310 }
1311
foldExit(const Loop * L,BasicBlock * ExitingBB,bool IsTaken,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1312 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1313 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1315 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1316 replaceExitCond(BI, NewCond, DeadInsts);
1317 }
1318
replaceLoopPHINodesWithPreheaderValues(LoopInfo * LI,Loop * L,SmallVectorImpl<WeakTrackingVH> & DeadInsts,ScalarEvolution & SE)1319 static void replaceLoopPHINodesWithPreheaderValues(
1320 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1321 ScalarEvolution &SE) {
1322 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1323 auto *LoopPreheader = L->getLoopPreheader();
1324 auto *LoopHeader = L->getHeader();
1325 SmallVector<Instruction *> Worklist;
1326 for (auto &PN : LoopHeader->phis()) {
1327 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1328 for (User *U : PN.users())
1329 Worklist.push_back(cast<Instruction>(U));
1330 SE.forgetValue(&PN);
1331 PN.replaceAllUsesWith(PreheaderIncoming);
1332 DeadInsts.emplace_back(&PN);
1333 }
1334
1335 // Replacing with the preheader value will often allow IV users to simplify
1336 // (especially if the preheader value is a constant).
1337 SmallPtrSet<Instruction *, 16> Visited;
1338 while (!Worklist.empty()) {
1339 auto *I = cast<Instruction>(Worklist.pop_back_val());
1340 if (!Visited.insert(I).second)
1341 continue;
1342
1343 // Don't simplify instructions outside the loop.
1344 if (!L->contains(I))
1345 continue;
1346
1347 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1348 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1349 for (User *U : I->users())
1350 Worklist.push_back(cast<Instruction>(U));
1351 I->replaceAllUsesWith(Res);
1352 DeadInsts.emplace_back(I);
1353 }
1354 }
1355 }
1356
1357 static Value *
createInvariantCond(const Loop * L,BasicBlock * ExitingBB,const ScalarEvolution::LoopInvariantPredicate & LIP,SCEVExpander & Rewriter)1358 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1359 const ScalarEvolution::LoopInvariantPredicate &LIP,
1360 SCEVExpander &Rewriter) {
1361 ICmpInst::Predicate InvariantPred = LIP.Pred;
1362 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1363 Rewriter.setInsertPoint(BI);
1364 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1365 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1366 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1367 if (ExitIfTrue)
1368 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1369 IRBuilder<> Builder(BI);
1370 return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1371 BI->getCondition()->getName());
1372 }
1373
1374 static std::optional<Value *>
createReplacement(ICmpInst * ICmp,const Loop * L,BasicBlock * ExitingBB,const SCEV * MaxIter,bool Inverted,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter)1375 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1376 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1377 ScalarEvolution *SE, SCEVExpander &Rewriter) {
1378 ICmpInst::Predicate Pred = ICmp->getPredicate();
1379 Value *LHS = ICmp->getOperand(0);
1380 Value *RHS = ICmp->getOperand(1);
1381
1382 // 'LHS pred RHS' should now mean that we stay in loop.
1383 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1384 if (Inverted)
1385 Pred = CmpInst::getInversePredicate(Pred);
1386
1387 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1388 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1389 // Can we prove it to be trivially true or false?
1390 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1391 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1392
1393 auto *ARTy = LHSS->getType();
1394 auto *MaxIterTy = MaxIter->getType();
1395 // If possible, adjust types.
1396 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1397 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1398 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1399 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1400 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1401 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1402 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1403 }
1404
1405 if (SkipLastIter) {
1406 // Semantically skip last iter is "subtract 1, do not bother about unsigned
1407 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1408 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1409 // So we manually construct umin(a - 1, b - 1).
1410 SmallVector<const SCEV *, 4> Elements;
1411 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1412 for (auto *Op : UMin->operands())
1413 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1414 MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1415 } else
1416 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1417 }
1418
1419 // Check if there is a loop-invariant predicate equivalent to our check.
1420 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1421 L, BI, MaxIter);
1422 if (!LIP)
1423 return std::nullopt;
1424
1425 // Can we prove it to be trivially true?
1426 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1427 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1428 else
1429 return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1430 }
1431
optimizeLoopExitWithUnknownExitCount(const Loop * L,BranchInst * BI,BasicBlock * ExitingBB,const SCEV * MaxIter,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1432 static bool optimizeLoopExitWithUnknownExitCount(
1433 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1434 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1435 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1436 assert(
1437 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1438 "Not a loop exit!");
1439
1440 // For branch that stays in loop by TRUE condition, go through AND. For branch
1441 // that stays in loop by FALSE condition, go through OR. Both gives the
1442 // similar logic: "stay in loop iff all conditions are true(false)".
1443 bool Inverted = L->contains(BI->getSuccessor(1));
1444 SmallVector<ICmpInst *, 4> LeafConditions;
1445 SmallVector<Value *, 4> Worklist;
1446 SmallPtrSet<Value *, 4> Visited;
1447 Value *OldCond = BI->getCondition();
1448 Visited.insert(OldCond);
1449 Worklist.push_back(OldCond);
1450
1451 auto GoThrough = [&](Value *V) {
1452 Value *LHS = nullptr, *RHS = nullptr;
1453 if (Inverted) {
1454 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1455 return false;
1456 } else {
1457 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1458 return false;
1459 }
1460 if (Visited.insert(LHS).second)
1461 Worklist.push_back(LHS);
1462 if (Visited.insert(RHS).second)
1463 Worklist.push_back(RHS);
1464 return true;
1465 };
1466
1467 do {
1468 Value *Curr = Worklist.pop_back_val();
1469 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1470 // those with one use, to avoid instruction duplication.
1471 if (Curr->hasOneUse())
1472 if (!GoThrough(Curr))
1473 if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1474 LeafConditions.push_back(ICmp);
1475 } while (!Worklist.empty());
1476
1477 // If the current basic block has the same exit count as the whole loop, and
1478 // it consists of multiple icmp's, try to collect all icmp's that give exact
1479 // same exit count. For all other icmp's, we could use one less iteration,
1480 // because their value on the last iteration doesn't really matter.
1481 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1482 if (!SkipLastIter && LeafConditions.size() > 1 &&
1483 SE->getExitCount(L, ExitingBB,
1484 ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1485 MaxIter)
1486 for (auto *ICmp : LeafConditions) {
1487 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1488 /*ControlsExit*/ false);
1489 auto *ExitMax = EL.SymbolicMaxNotTaken;
1490 if (isa<SCEVCouldNotCompute>(ExitMax))
1491 continue;
1492 // They could be of different types (specifically this happens after
1493 // IV widening).
1494 auto *WiderType =
1495 SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1496 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1497 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1498 if (WideExitMax == WideMaxIter)
1499 ICmpsFailingOnLastIter.insert(ICmp);
1500 }
1501
1502 bool Changed = false;
1503 for (auto *OldCond : LeafConditions) {
1504 // Skip last iteration for this icmp under one of two conditions:
1505 // - We do it for all conditions;
1506 // - There is another ICmp that would fail on last iter, so this one doesn't
1507 // really matter.
1508 bool OptimisticSkipLastIter = SkipLastIter;
1509 if (!OptimisticSkipLastIter) {
1510 if (ICmpsFailingOnLastIter.size() > 1)
1511 OptimisticSkipLastIter = true;
1512 else if (ICmpsFailingOnLastIter.size() == 1)
1513 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1514 }
1515 if (auto Replaced =
1516 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1517 OptimisticSkipLastIter, SE, Rewriter)) {
1518 Changed = true;
1519 auto *NewCond = *Replaced;
1520 if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1521 NCI->setName(OldCond->getName() + ".first_iter");
1522 NCI->moveBefore(cast<Instruction>(OldCond));
1523 }
1524 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1525 << " with " << *NewCond << "\n");
1526 assert(OldCond->hasOneUse() && "Must be!");
1527 OldCond->replaceAllUsesWith(NewCond);
1528 DeadInsts.push_back(OldCond);
1529 // Make sure we no longer consider this condition as failing on last
1530 // iteration.
1531 ICmpsFailingOnLastIter.erase(OldCond);
1532 }
1533 }
1534 return Changed;
1535 }
1536
canonicalizeExitCondition(Loop * L)1537 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1538 // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1539 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1540 // never reaches the icmp since the zext doesn't fold to an AddRec unless
1541 // it already has flags. The alternative to this would be to extending the
1542 // set of "interesting" IV users to include the icmp, but doing that
1543 // regresses results in practice by querying SCEVs before trip counts which
1544 // rely on them which results in SCEV caching sub-optimal answers. The
1545 // concern about caching sub-optimal results is why we only query SCEVs of
1546 // the loop invariant RHS here.
1547 SmallVector<BasicBlock*, 16> ExitingBlocks;
1548 L->getExitingBlocks(ExitingBlocks);
1549 bool Changed = false;
1550 for (auto *ExitingBB : ExitingBlocks) {
1551 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1552 if (!BI)
1553 continue;
1554 assert(BI->isConditional() && "exit branch must be conditional");
1555
1556 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1557 if (!ICmp || !ICmp->hasOneUse())
1558 continue;
1559
1560 auto *LHS = ICmp->getOperand(0);
1561 auto *RHS = ICmp->getOperand(1);
1562 // For the range reasoning, avoid computing SCEVs in the loop to avoid
1563 // poisoning cache with sub-optimal results. For the must-execute case,
1564 // this is a neccessary precondition for correctness.
1565 if (!L->isLoopInvariant(RHS)) {
1566 if (!L->isLoopInvariant(LHS))
1567 continue;
1568 // Same logic applies for the inverse case
1569 std::swap(LHS, RHS);
1570 }
1571
1572 // Match (icmp signed-cond zext, RHS)
1573 Value *LHSOp = nullptr;
1574 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1575 continue;
1576
1577 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1578 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1579 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1580 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1581 FullCR = FullCR.zeroExtend(OuterBitWidth);
1582 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1583 if (FullCR.contains(RHSCR)) {
1584 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1585 // replace the signed condition with the unsigned version.
1586 ICmp->setPredicate(ICmp->getUnsignedPredicate());
1587 Changed = true;
1588 // Note: No SCEV invalidation needed. We've changed the predicate, but
1589 // have not changed exit counts, or the values produced by the compare.
1590 continue;
1591 }
1592 }
1593
1594 // Now that we've canonicalized the condition to match the extend,
1595 // see if we can rotate the extend out of the loop.
1596 for (auto *ExitingBB : ExitingBlocks) {
1597 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1598 if (!BI)
1599 continue;
1600 assert(BI->isConditional() && "exit branch must be conditional");
1601
1602 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1603 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1604 continue;
1605
1606 bool Swapped = false;
1607 auto *LHS = ICmp->getOperand(0);
1608 auto *RHS = ICmp->getOperand(1);
1609 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1610 // Nothing to rotate
1611 continue;
1612 if (L->isLoopInvariant(LHS)) {
1613 // Same logic applies for the inverse case until we actually pick
1614 // which operand of the compare to update.
1615 Swapped = true;
1616 std::swap(LHS, RHS);
1617 }
1618 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1619
1620 // Match (icmp unsigned-cond zext, RHS)
1621 // TODO: Extend to handle corresponding sext/signed-cmp case
1622 // TODO: Extend to other invertible functions
1623 Value *LHSOp = nullptr;
1624 if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1625 continue;
1626
1627 // In general, we only rotate if we can do so without increasing the number
1628 // of instructions. The exception is when we have an zext(add-rec). The
1629 // reason for allowing this exception is that we know we need to get rid
1630 // of the zext for SCEV to be able to compute a trip count for said loops;
1631 // we consider the new trip count valuable enough to increase instruction
1632 // count by one.
1633 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1634 continue;
1635
1636 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1637 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1638 // when zext is loop varying and RHS is loop invariant. This converts
1639 // loop varying work to loop-invariant work.
1640 auto doRotateTransform = [&]() {
1641 assert(ICmp->isUnsigned() && "must have proven unsigned already");
1642 auto *NewRHS =
1643 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1644 L->getLoopPreheader()->getTerminator());
1645 ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1646 ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1647 if (LHS->use_empty())
1648 DeadInsts.push_back(LHS);
1649 };
1650
1651
1652 const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1653 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1654 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1655 auto FullCR = ConstantRange::getFull(InnerBitWidth);
1656 FullCR = FullCR.zeroExtend(OuterBitWidth);
1657 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1658 if (FullCR.contains(RHSCR)) {
1659 doRotateTransform();
1660 Changed = true;
1661 // Note, we are leaving SCEV in an unfortunately imprecise case here
1662 // as rotation tends to reveal information about trip counts not
1663 // previously visible.
1664 continue;
1665 }
1666 }
1667
1668 return Changed;
1669 }
1670
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)1671 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1672 SmallVector<BasicBlock*, 16> ExitingBlocks;
1673 L->getExitingBlocks(ExitingBlocks);
1674
1675 // Remove all exits which aren't both rewriteable and execute on every
1676 // iteration.
1677 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1678 // If our exitting block exits multiple loops, we can only rewrite the
1679 // innermost one. Otherwise, we're changing how many times the innermost
1680 // loop runs before it exits.
1681 if (LI->getLoopFor(ExitingBB) != L)
1682 return true;
1683
1684 // Can't rewrite non-branch yet.
1685 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1686 if (!BI)
1687 return true;
1688
1689 // Likewise, the loop latch must be dominated by the exiting BB.
1690 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1691 return true;
1692
1693 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1694 // If already constant, nothing to do. However, if this is an
1695 // unconditional exit, we can still replace header phis with their
1696 // preheader value.
1697 if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1698 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1699 return true;
1700 }
1701
1702 return false;
1703 });
1704
1705 if (ExitingBlocks.empty())
1706 return false;
1707
1708 // Get a symbolic upper bound on the loop backedge taken count.
1709 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1710 if (isa<SCEVCouldNotCompute>(MaxBECount))
1711 return false;
1712
1713 // Visit our exit blocks in order of dominance. We know from the fact that
1714 // all exits must dominate the latch, so there is a total dominance order
1715 // between them.
1716 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1717 // std::sort sorts in ascending order, so we want the inverse of
1718 // the normal dominance relation.
1719 if (A == B) return false;
1720 if (DT->properlyDominates(A, B))
1721 return true;
1722 else {
1723 assert(DT->properlyDominates(B, A) &&
1724 "expected total dominance order!");
1725 return false;
1726 }
1727 });
1728 #ifdef ASSERT
1729 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1730 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1731 }
1732 #endif
1733
1734 bool Changed = false;
1735 bool SkipLastIter = false;
1736 const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1737 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1738 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1739 return;
1740 if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1741 CurrMaxExit = MaxExitCount;
1742 else
1743 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1744 // If the loop has more than 1 iteration, all further checks will be
1745 // executed 1 iteration less.
1746 if (CurrMaxExit == MaxBECount)
1747 SkipLastIter = true;
1748 };
1749 SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1750 for (BasicBlock *ExitingBB : ExitingBlocks) {
1751 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1752 const SCEV *MaxExitCount = SE->getExitCount(
1753 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1754 if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1755 // Okay, we do not know the exit count here. Can we at least prove that it
1756 // will remain the same within iteration space?
1757 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1758 auto OptimizeCond = [&](bool SkipLastIter) {
1759 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1760 MaxBECount, SkipLastIter,
1761 SE, Rewriter, DeadInsts);
1762 };
1763
1764 // TODO: We might have proved that we can skip the last iteration for
1765 // this check. In this case, we only want to check the condition on the
1766 // pre-last iteration (MaxBECount - 1). However, there is a nasty
1767 // corner case:
1768 //
1769 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1770 //
1771 // If we could not prove that len != 0, then we also could not prove that
1772 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1773 // OptimizeCond will likely not prove anything for it, even if it could
1774 // prove the same fact for len.
1775 //
1776 // As a temporary solution, we query both last and pre-last iterations in
1777 // hope that we will be able to prove triviality for at least one of
1778 // them. We can stop querying MaxBECount for this case once SCEV
1779 // understands that (MaxBECount - 1) will not overflow here.
1780 if (OptimizeCond(false))
1781 Changed = true;
1782 else if (SkipLastIter && OptimizeCond(true))
1783 Changed = true;
1784 UpdateSkipLastIter(MaxExitCount);
1785 continue;
1786 }
1787
1788 UpdateSkipLastIter(ExactExitCount);
1789
1790 // If we know we'd exit on the first iteration, rewrite the exit to
1791 // reflect this. This does not imply the loop must exit through this
1792 // exit; there may be an earlier one taken on the first iteration.
1793 // We know that the backedge can't be taken, so we replace all
1794 // the header PHIs with values coming from the preheader.
1795 if (ExactExitCount->isZero()) {
1796 foldExit(L, ExitingBB, true, DeadInsts);
1797 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1798 Changed = true;
1799 continue;
1800 }
1801
1802 assert(ExactExitCount->getType()->isIntegerTy() &&
1803 MaxBECount->getType()->isIntegerTy() &&
1804 "Exit counts must be integers");
1805
1806 Type *WiderType =
1807 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1808 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1809 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1810 assert(MaxBECount->getType() == ExactExitCount->getType());
1811
1812 // Can we prove that some other exit must be taken strictly before this
1813 // one?
1814 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1815 ExactExitCount)) {
1816 foldExit(L, ExitingBB, false, DeadInsts);
1817 Changed = true;
1818 continue;
1819 }
1820
1821 // As we run, keep track of which exit counts we've encountered. If we
1822 // find a duplicate, we've found an exit which would have exited on the
1823 // exiting iteration, but (from the visit order) strictly follows another
1824 // which does the same and is thus dead.
1825 if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1826 foldExit(L, ExitingBB, false, DeadInsts);
1827 Changed = true;
1828 continue;
1829 }
1830
1831 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1832 // here. If we kept track of the min of dominanting exits so far, we could
1833 // discharge exits with EC >= MDEC. This is less powerful than the existing
1834 // transform (since later exits aren't considered), but potentially more
1835 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1836 // or a >u b. Such a case is not currently known.
1837 }
1838 return Changed;
1839 }
1840
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)1841 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1842 SmallVector<BasicBlock*, 16> ExitingBlocks;
1843 L->getExitingBlocks(ExitingBlocks);
1844
1845 // Finally, see if we can rewrite our exit conditions into a loop invariant
1846 // form. If we have a read-only loop, and we can tell that we must exit down
1847 // a path which does not need any of the values computed within the loop, we
1848 // can rewrite the loop to exit on the first iteration. Note that this
1849 // doesn't either a) tell us the loop exits on the first iteration (unless
1850 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1851 // This transformation looks a lot like a restricted form of dead loop
1852 // elimination, but restricted to read-only loops and without neccesssarily
1853 // needing to kill the loop entirely.
1854 if (!LoopPredication)
1855 return false;
1856
1857 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1858 // through *explicit* control flow. We have to eliminate the possibility of
1859 // implicit exits (see below) before we know it's truly exact.
1860 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1861 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1862 return false;
1863
1864 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1865 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1866
1867 auto BadExit = [&](BasicBlock *ExitingBB) {
1868 // If our exiting block exits multiple loops, we can only rewrite the
1869 // innermost one. Otherwise, we're changing how many times the innermost
1870 // loop runs before it exits.
1871 if (LI->getLoopFor(ExitingBB) != L)
1872 return true;
1873
1874 // Can't rewrite non-branch yet.
1875 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1876 if (!BI)
1877 return true;
1878
1879 // If already constant, nothing to do.
1880 if (isa<Constant>(BI->getCondition()))
1881 return true;
1882
1883 // If the exit block has phis, we need to be able to compute the values
1884 // within the loop which contains them. This assumes trivially lcssa phis
1885 // have already been removed; TODO: generalize
1886 BasicBlock *ExitBlock =
1887 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1888 if (!ExitBlock->phis().empty())
1889 return true;
1890
1891 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1892 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1893 !Rewriter.isSafeToExpand(ExitCount))
1894 return true;
1895
1896 assert(SE->isLoopInvariant(ExitCount, L) &&
1897 "Exit count must be loop invariant");
1898 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1899 return false;
1900 };
1901
1902 // If we have any exits which can't be predicated themselves, than we can't
1903 // predicate any exit which isn't guaranteed to execute before it. Consider
1904 // two exits (a) and (b) which would both exit on the same iteration. If we
1905 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1906 // we could convert a loop from exiting through (a) to one exiting through
1907 // (b). Note that this problem exists only for exits with the same exit
1908 // count, and we could be more aggressive when exit counts are known inequal.
1909 llvm::sort(ExitingBlocks,
1910 [&](BasicBlock *A, BasicBlock *B) {
1911 // std::sort sorts in ascending order, so we want the inverse of
1912 // the normal dominance relation, plus a tie breaker for blocks
1913 // unordered by dominance.
1914 if (DT->properlyDominates(A, B)) return true;
1915 if (DT->properlyDominates(B, A)) return false;
1916 return A->getName() < B->getName();
1917 });
1918 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1919 // exits before the backedge). If they aren't, reasoning about reachability
1920 // is complicated and we choose not to for now.
1921 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1922 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1923 return false;
1924
1925 // Given our sorted total order, we know that exit[j] must be evaluated
1926 // after all exit[i] such j > i.
1927 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1928 if (BadExit(ExitingBlocks[i])) {
1929 ExitingBlocks.resize(i);
1930 break;
1931 }
1932
1933 if (ExitingBlocks.empty())
1934 return false;
1935
1936 // We rely on not being able to reach an exiting block on a later iteration
1937 // then it's statically compute exit count. The implementaton of
1938 // getExitCount currently has this invariant, but assert it here so that
1939 // breakage is obvious if this ever changes..
1940 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1941 return DT->dominates(ExitingBB, L->getLoopLatch());
1942 }));
1943
1944 // At this point, ExitingBlocks consists of only those blocks which are
1945 // predicatable. Given that, we know we have at least one exit we can
1946 // predicate if the loop is doesn't have side effects and doesn't have any
1947 // implicit exits (because then our exact BTC isn't actually exact).
1948 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1949 // suggestions on how to improve this? I can obviously bail out for outer
1950 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1951 // is that enough for *all* side effects?
1952 for (BasicBlock *BB : L->blocks())
1953 for (auto &I : *BB)
1954 // TODO:isGuaranteedToTransfer
1955 if (I.mayHaveSideEffects())
1956 return false;
1957
1958 bool Changed = false;
1959 // Finally, do the actual predication for all predicatable blocks. A couple
1960 // of notes here:
1961 // 1) We don't bother to constant fold dominated exits with identical exit
1962 // counts; that's simply a form of CSE/equality propagation and we leave
1963 // it for dedicated passes.
1964 // 2) We insert the comparison at the branch. Hoisting introduces additional
1965 // legality constraints and we leave that to dedicated logic. We want to
1966 // predicate even if we can't insert a loop invariant expression as
1967 // peeling or unrolling will likely reduce the cost of the otherwise loop
1968 // varying check.
1969 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1970 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1971 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1972 for (BasicBlock *ExitingBB : ExitingBlocks) {
1973 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1974
1975 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1976 Value *NewCond;
1977 if (ExitCount == ExactBTC) {
1978 NewCond = L->contains(BI->getSuccessor(0)) ?
1979 B.getFalse() : B.getTrue();
1980 } else {
1981 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1982 if (!ExactBTCV)
1983 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1984 Value *RHS = ExactBTCV;
1985 if (ECV->getType() != RHS->getType()) {
1986 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1987 ECV = B.CreateZExt(ECV, WiderTy);
1988 RHS = B.CreateZExt(RHS, WiderTy);
1989 }
1990 auto Pred = L->contains(BI->getSuccessor(0)) ?
1991 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1992 NewCond = B.CreateICmp(Pred, ECV, RHS);
1993 }
1994 Value *OldCond = BI->getCondition();
1995 BI->setCondition(NewCond);
1996 if (OldCond->use_empty())
1997 DeadInsts.emplace_back(OldCond);
1998 Changed = true;
1999 }
2000
2001 return Changed;
2002 }
2003
2004 //===----------------------------------------------------------------------===//
2005 // IndVarSimplify driver. Manage several subpasses of IV simplification.
2006 //===----------------------------------------------------------------------===//
2007
run(Loop * L)2008 bool IndVarSimplify::run(Loop *L) {
2009 // We need (and expect!) the incoming loop to be in LCSSA.
2010 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2011 "LCSSA required to run indvars!");
2012
2013 // If LoopSimplify form is not available, stay out of trouble. Some notes:
2014 // - LSR currently only supports LoopSimplify-form loops. Indvars'
2015 // canonicalization can be a pessimization without LSR to "clean up"
2016 // afterwards.
2017 // - We depend on having a preheader; in particular,
2018 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
2019 // and we're in trouble if we can't find the induction variable even when
2020 // we've manually inserted one.
2021 // - LFTR relies on having a single backedge.
2022 if (!L->isLoopSimplifyForm())
2023 return false;
2024
2025 #ifndef NDEBUG
2026 // Used below for a consistency check only
2027 // Note: Since the result returned by ScalarEvolution may depend on the order
2028 // in which previous results are added to its cache, the call to
2029 // getBackedgeTakenCount() may change following SCEV queries.
2030 const SCEV *BackedgeTakenCount;
2031 if (VerifyIndvars)
2032 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2033 #endif
2034
2035 bool Changed = false;
2036 // If there are any floating-point recurrences, attempt to
2037 // transform them to use integer recurrences.
2038 Changed |= rewriteNonIntegerIVs(L);
2039
2040 // Create a rewriter object which we'll use to transform the code with.
2041 SCEVExpander Rewriter(*SE, DL, "indvars");
2042 #ifndef NDEBUG
2043 Rewriter.setDebugType(DEBUG_TYPE);
2044 #endif
2045
2046 // Eliminate redundant IV users.
2047 //
2048 // Simplification works best when run before other consumers of SCEV. We
2049 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2050 // other expressions involving loop IVs have been evaluated. This helps SCEV
2051 // set no-wrap flags before normalizing sign/zero extension.
2052 Rewriter.disableCanonicalMode();
2053 Changed |= simplifyAndExtend(L, Rewriter, LI);
2054
2055 // Check to see if we can compute the final value of any expressions
2056 // that are recurrent in the loop, and substitute the exit values from the
2057 // loop into any instructions outside of the loop that use the final values
2058 // of the current expressions.
2059 if (ReplaceExitValue != NeverRepl) {
2060 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2061 ReplaceExitValue, DeadInsts)) {
2062 NumReplaced += Rewrites;
2063 Changed = true;
2064 }
2065 }
2066
2067 // Eliminate redundant IV cycles.
2068 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2069
2070 // Try to convert exit conditions to unsigned and rotate computation
2071 // out of the loop. Note: Handles invalidation internally if needed.
2072 Changed |= canonicalizeExitCondition(L);
2073
2074 // Try to eliminate loop exits based on analyzeable exit counts
2075 if (optimizeLoopExits(L, Rewriter)) {
2076 Changed = true;
2077 // Given we've changed exit counts, notify SCEV
2078 // Some nested loops may share same folded exit basic block,
2079 // thus we need to notify top most loop.
2080 SE->forgetTopmostLoop(L);
2081 }
2082
2083 // Try to form loop invariant tests for loop exits by changing how many
2084 // iterations of the loop run when that is unobservable.
2085 if (predicateLoopExits(L, Rewriter)) {
2086 Changed = true;
2087 // Given we've changed exit counts, notify SCEV
2088 SE->forgetLoop(L);
2089 }
2090
2091 // If we have a trip count expression, rewrite the loop's exit condition
2092 // using it.
2093 if (!DisableLFTR) {
2094 BasicBlock *PreHeader = L->getLoopPreheader();
2095
2096 SmallVector<BasicBlock*, 16> ExitingBlocks;
2097 L->getExitingBlocks(ExitingBlocks);
2098 for (BasicBlock *ExitingBB : ExitingBlocks) {
2099 // Can't rewrite non-branch yet.
2100 if (!isa<BranchInst>(ExitingBB->getTerminator()))
2101 continue;
2102
2103 // If our exitting block exits multiple loops, we can only rewrite the
2104 // innermost one. Otherwise, we're changing how many times the innermost
2105 // loop runs before it exits.
2106 if (LI->getLoopFor(ExitingBB) != L)
2107 continue;
2108
2109 if (!needsLFTR(L, ExitingBB))
2110 continue;
2111
2112 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2113 if (isa<SCEVCouldNotCompute>(ExitCount))
2114 continue;
2115
2116 // This was handled above, but as we form SCEVs, we can sometimes refine
2117 // existing ones; this allows exit counts to be folded to zero which
2118 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
2119 // until stable to handle cases like this better.
2120 if (ExitCount->isZero())
2121 continue;
2122
2123 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2124 if (!IndVar)
2125 continue;
2126
2127 // Avoid high cost expansions. Note: This heuristic is questionable in
2128 // that our definition of "high cost" is not exactly principled.
2129 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2130 TTI, PreHeader->getTerminator()))
2131 continue;
2132
2133 // Check preconditions for proper SCEVExpander operation. SCEV does not
2134 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2135 // any pass that uses the SCEVExpander must do it. This does not work
2136 // well for loop passes because SCEVExpander makes assumptions about
2137 // all loops, while LoopPassManager only forces the current loop to be
2138 // simplified.
2139 //
2140 // FIXME: SCEV expansion has no way to bail out, so the caller must
2141 // explicitly check any assumptions made by SCEV. Brittle.
2142 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2143 if (!AR || AR->getLoop()->getLoopPreheader())
2144 Changed |= linearFunctionTestReplace(L, ExitingBB,
2145 ExitCount, IndVar,
2146 Rewriter);
2147 }
2148 }
2149 // Clear the rewriter cache, because values that are in the rewriter's cache
2150 // can be deleted in the loop below, causing the AssertingVH in the cache to
2151 // trigger.
2152 Rewriter.clear();
2153
2154 // Now that we're done iterating through lists, clean up any instructions
2155 // which are now dead.
2156 while (!DeadInsts.empty()) {
2157 Value *V = DeadInsts.pop_back_val();
2158
2159 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2160 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2161 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2162 Changed |=
2163 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2164 }
2165
2166 // The Rewriter may not be used from this point on.
2167
2168 // Loop-invariant instructions in the preheader that aren't used in the
2169 // loop may be sunk below the loop to reduce register pressure.
2170 Changed |= sinkUnusedInvariants(L);
2171
2172 // rewriteFirstIterationLoopExitValues does not rely on the computation of
2173 // trip count and therefore can further simplify exit values in addition to
2174 // rewriteLoopExitValues.
2175 Changed |= rewriteFirstIterationLoopExitValues(L);
2176
2177 // Clean up dead instructions.
2178 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2179
2180 // Check a post-condition.
2181 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2182 "Indvars did not preserve LCSSA!");
2183
2184 // Verify that LFTR, and any other change have not interfered with SCEV's
2185 // ability to compute trip count. We may have *changed* the exit count, but
2186 // only by reducing it.
2187 #ifndef NDEBUG
2188 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2189 SE->forgetLoop(L);
2190 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2191 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2192 SE->getTypeSizeInBits(NewBECount->getType()))
2193 NewBECount = SE->getTruncateOrNoop(NewBECount,
2194 BackedgeTakenCount->getType());
2195 else
2196 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2197 NewBECount->getType());
2198 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2199 NewBECount) && "indvars must preserve SCEV");
2200 }
2201 if (VerifyMemorySSA && MSSAU)
2202 MSSAU->getMemorySSA()->verifyMemorySSA();
2203 #endif
2204
2205 return Changed;
2206 }
2207
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)2208 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2209 LoopStandardAnalysisResults &AR,
2210 LPMUpdater &) {
2211 Function *F = L.getHeader()->getParent();
2212 const DataLayout &DL = F->getParent()->getDataLayout();
2213
2214 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2215 WidenIndVars && AllowIVWidening);
2216 if (!IVS.run(&L))
2217 return PreservedAnalyses::all();
2218
2219 auto PA = getLoopPassPreservedAnalyses();
2220 PA.preserveSet<CFGAnalyses>();
2221 if (AR.MSSA)
2222 PA.preserve<MemorySSAAnalysis>();
2223 return PA;
2224 }
2225
2226 namespace {
2227
2228 struct IndVarSimplifyLegacyPass : public LoopPass {
2229 static char ID; // Pass identification, replacement for typeid
2230
IndVarSimplifyLegacyPass__anonc289e1bb0d11::IndVarSimplifyLegacyPass2231 IndVarSimplifyLegacyPass() : LoopPass(ID) {
2232 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2233 }
2234
runOnLoop__anonc289e1bb0d11::IndVarSimplifyLegacyPass2235 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2236 if (skipLoop(L))
2237 return false;
2238
2239 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2240 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2241 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2242 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2243 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2244 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2245 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2246 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2247 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2248 MemorySSA *MSSA = nullptr;
2249 if (MSSAAnalysis)
2250 MSSA = &MSSAAnalysis->getMSSA();
2251
2252 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2253 return IVS.run(L);
2254 }
2255
getAnalysisUsage__anonc289e1bb0d11::IndVarSimplifyLegacyPass2256 void getAnalysisUsage(AnalysisUsage &AU) const override {
2257 AU.setPreservesCFG();
2258 AU.addPreserved<MemorySSAWrapperPass>();
2259 getLoopAnalysisUsage(AU);
2260 }
2261 };
2262
2263 } // end anonymous namespace
2264
2265 char IndVarSimplifyLegacyPass::ID = 0;
2266
2267 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2268 "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)2269 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2270 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2271 "Induction Variable Simplification", false, false)
2272
2273 Pass *llvm::createIndVarSimplifyPass() {
2274 return new IndVarSimplifyLegacyPass();
2275 }
2276