1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 // 1. The exit condition for the loop is canonicalized to compare the
16 // induction value against the exit value. This turns loops like:
17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 // 2. Any use outside of the loop of an expression derived from the indvar
19 // is changed to compute the derived value outside of the loop, eliminating
20 // the dependence on the exit value of the induction variable. If the only
21 // purpose of the loop is to compute the exit value of some derived
22 // expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/MemorySSA.h"
42 #include "llvm/Analysis/MemorySSAUpdater.h"
43 #include "llvm/Analysis/ScalarEvolution.h"
44 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
45 #include "llvm/Analysis/TargetLibraryInfo.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MathExtras.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Scalar/LoopPassManager.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
86 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
87 #include <cassert>
88 #include <cstdint>
89 #include <utility>
90
91 using namespace llvm;
92
93 #define DEBUG_TYPE "indvars"
94
95 STATISTIC(NumWidened , "Number of indvars widened");
96 STATISTIC(NumReplaced , "Number of exit values replaced");
97 STATISTIC(NumLFTR , "Number of loop exit tests replaced");
98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
100
101 // Trip count verification can be enabled by default under NDEBUG if we
102 // implement a strong expression equivalence checker in SCEV. Until then, we
103 // use the verify-indvars flag, which may assert in some cases.
104 static cl::opt<bool> VerifyIndvars(
105 "verify-indvars", cl::Hidden,
106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
107 "effect in release builds. (Note: this adds additional SCEV "
108 "queries potentially changing the analysis result)"));
109
110 static cl::opt<ReplaceExitVal> ReplaceExitValue(
111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
114 clEnumValN(OnlyCheapRepl, "cheap",
115 "only replace exit value when the cost is cheap"),
116 clEnumValN(NoHardUse, "noharduse",
117 "only replace exit values when loop def likely dead"),
118 clEnumValN(AlwaysRepl, "always",
119 "always replace exit value whenever possible")));
120
121 static cl::opt<bool> UsePostIncrementRanges(
122 "indvars-post-increment-ranges", cl::Hidden,
123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124 cl::init(true));
125
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128 cl::desc("Disable Linear Function Test Replace optimization"));
129
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132 cl::desc("Predicate conditions in read only loops"));
133
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136 cl::desc("Allow widening of indvars to eliminate s/zext"));
137
138 namespace {
139
140 struct RewritePhi;
141
142 class IndVarSimplify {
143 LoopInfo *LI;
144 ScalarEvolution *SE;
145 DominatorTree *DT;
146 const DataLayout &DL;
147 TargetLibraryInfo *TLI;
148 const TargetTransformInfo *TTI;
149 std::unique_ptr<MemorySSAUpdater> MSSAU;
150
151 SmallVector<WeakTrackingVH, 16> DeadInsts;
152 bool WidenIndVars;
153
154 bool handleFloatingPointIV(Loop *L, PHINode *PH);
155 bool rewriteNonIntegerIVs(Loop *L);
156
157 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
158 /// Try to eliminate loop exits based on analyzeable exit counts
159 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
160 /// Try to form loop invariant tests for loop exits by changing how many
161 /// iterations of the loop run when that is unobservable.
162 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
163
164 bool rewriteFirstIterationLoopExitValues(Loop *L);
165
166 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
167 const SCEV *ExitCount,
168 PHINode *IndVar, SCEVExpander &Rewriter);
169
170 bool sinkUnusedInvariants(Loop *L);
171
172 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,MemorySSA * MSSA,bool WidenIndVars)173 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
174 const DataLayout &DL, TargetLibraryInfo *TLI,
175 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
176 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
177 WidenIndVars(WidenIndVars) {
178 if (MSSA)
179 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
180 }
181
182 bool run(Loop *L);
183 };
184
185 } // end anonymous namespace
186
187 //===----------------------------------------------------------------------===//
188 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
189 //===----------------------------------------------------------------------===//
190
191 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)192 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
193 bool isExact = false;
194 // See if we can convert this to an int64_t
195 uint64_t UIntVal;
196 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
197 APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
198 !isExact)
199 return false;
200 IntVal = UIntVal;
201 return true;
202 }
203
204 /// If the loop has floating induction variable then insert corresponding
205 /// integer induction variable if possible.
206 /// For example,
207 /// for(double i = 0; i < 10000; ++i)
208 /// bar(i)
209 /// is converted into
210 /// for(int i = 0; i < 10000; ++i)
211 /// bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)212 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
213 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
214 unsigned BackEdge = IncomingEdge^1;
215
216 // Check incoming value.
217 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
218
219 int64_t InitValue;
220 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
221 return false;
222
223 // Check IV increment. Reject this PN if increment operation is not
224 // an add or increment value can not be represented by an integer.
225 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
226 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
227
228 // If this is not an add of the PHI with a constantfp, or if the constant fp
229 // is not an integer, bail out.
230 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
231 int64_t IncValue;
232 if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
233 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
234 return false;
235
236 // Check Incr uses. One user is PN and the other user is an exit condition
237 // used by the conditional terminator.
238 Value::user_iterator IncrUse = Incr->user_begin();
239 Instruction *U1 = cast<Instruction>(*IncrUse++);
240 if (IncrUse == Incr->user_end()) return false;
241 Instruction *U2 = cast<Instruction>(*IncrUse++);
242 if (IncrUse != Incr->user_end()) return false;
243
244 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
245 // only used by a branch, we can't transform it.
246 FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
247 if (!Compare)
248 Compare = dyn_cast<FCmpInst>(U2);
249 if (!Compare || !Compare->hasOneUse() ||
250 !isa<BranchInst>(Compare->user_back()))
251 return false;
252
253 BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
254
255 // We need to verify that the branch actually controls the iteration count
256 // of the loop. If not, the new IV can overflow and no one will notice.
257 // The branch block must be in the loop and one of the successors must be out
258 // of the loop.
259 assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
260 if (!L->contains(TheBr->getParent()) ||
261 (L->contains(TheBr->getSuccessor(0)) &&
262 L->contains(TheBr->getSuccessor(1))))
263 return false;
264
265 // If it isn't a comparison with an integer-as-fp (the exit value), we can't
266 // transform it.
267 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
268 int64_t ExitValue;
269 if (ExitValueVal == nullptr ||
270 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
271 return false;
272
273 // Find new predicate for integer comparison.
274 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
275 switch (Compare->getPredicate()) {
276 default: return false; // Unknown comparison.
277 case CmpInst::FCMP_OEQ:
278 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
279 case CmpInst::FCMP_ONE:
280 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
281 case CmpInst::FCMP_OGT:
282 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
283 case CmpInst::FCMP_OGE:
284 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
285 case CmpInst::FCMP_OLT:
286 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
287 case CmpInst::FCMP_OLE:
288 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
289 }
290
291 // We convert the floating point induction variable to a signed i32 value if
292 // we can. This is only safe if the comparison will not overflow in a way
293 // that won't be trapped by the integer equivalent operations. Check for this
294 // now.
295 // TODO: We could use i64 if it is native and the range requires it.
296
297 // The start/stride/exit values must all fit in signed i32.
298 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
299 return false;
300
301 // If not actually striding (add x, 0.0), avoid touching the code.
302 if (IncValue == 0)
303 return false;
304
305 // Positive and negative strides have different safety conditions.
306 if (IncValue > 0) {
307 // If we have a positive stride, we require the init to be less than the
308 // exit value.
309 if (InitValue >= ExitValue)
310 return false;
311
312 uint32_t Range = uint32_t(ExitValue-InitValue);
313 // Check for infinite loop, either:
314 // while (i <= Exit) or until (i > Exit)
315 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
316 if (++Range == 0) return false; // Range overflows.
317 }
318
319 unsigned Leftover = Range % uint32_t(IncValue);
320
321 // If this is an equality comparison, we require that the strided value
322 // exactly land on the exit value, otherwise the IV condition will wrap
323 // around and do things the fp IV wouldn't.
324 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
325 Leftover != 0)
326 return false;
327
328 // If the stride would wrap around the i32 before exiting, we can't
329 // transform the IV.
330 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
331 return false;
332 } else {
333 // If we have a negative stride, we require the init to be greater than the
334 // exit value.
335 if (InitValue <= ExitValue)
336 return false;
337
338 uint32_t Range = uint32_t(InitValue-ExitValue);
339 // Check for infinite loop, either:
340 // while (i >= Exit) or until (i < Exit)
341 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
342 if (++Range == 0) return false; // Range overflows.
343 }
344
345 unsigned Leftover = Range % uint32_t(-IncValue);
346
347 // If this is an equality comparison, we require that the strided value
348 // exactly land on the exit value, otherwise the IV condition will wrap
349 // around and do things the fp IV wouldn't.
350 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
351 Leftover != 0)
352 return false;
353
354 // If the stride would wrap around the i32 before exiting, we can't
355 // transform the IV.
356 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
357 return false;
358 }
359
360 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
361
362 // Insert new integer induction variable.
363 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
364 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
365 PN->getIncomingBlock(IncomingEdge));
366
367 Value *NewAdd =
368 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
369 Incr->getName()+".int", Incr);
370 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
371
372 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
373 ConstantInt::get(Int32Ty, ExitValue),
374 Compare->getName());
375
376 // In the following deletions, PN may become dead and may be deleted.
377 // Use a WeakTrackingVH to observe whether this happens.
378 WeakTrackingVH WeakPH = PN;
379
380 // Delete the old floating point exit comparison. The branch starts using the
381 // new comparison.
382 NewCompare->takeName(Compare);
383 Compare->replaceAllUsesWith(NewCompare);
384 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
385
386 // Delete the old floating point increment.
387 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
388 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
389
390 // If the FP induction variable still has uses, this is because something else
391 // in the loop uses its value. In order to canonicalize the induction
392 // variable, we chose to eliminate the IV and rewrite it in terms of an
393 // int->fp cast.
394 //
395 // We give preference to sitofp over uitofp because it is faster on most
396 // platforms.
397 if (WeakPH) {
398 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
399 &*PN->getParent()->getFirstInsertionPt());
400 PN->replaceAllUsesWith(Conv);
401 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402 }
403 return true;
404 }
405
rewriteNonIntegerIVs(Loop * L)406 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407 // First step. Check to see if there are any floating-point recurrences.
408 // If there are, change them into integer recurrences, permitting analysis by
409 // the SCEV routines.
410 BasicBlock *Header = L->getHeader();
411
412 SmallVector<WeakTrackingVH, 8> PHIs;
413 for (PHINode &PN : Header->phis())
414 PHIs.push_back(&PN);
415
416 bool Changed = false;
417 for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
418 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
419 Changed |= handleFloatingPointIV(L, PN);
420
421 // If the loop previously had floating-point IV, ScalarEvolution
422 // may not have been able to compute a trip count. Now that we've done some
423 // re-writing, the trip count may be computable.
424 if (Changed)
425 SE->forgetLoop(L);
426 return Changed;
427 }
428
429 //===---------------------------------------------------------------------===//
430 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431 // they will exit at the first iteration.
432 //===---------------------------------------------------------------------===//
433
434 /// Check to see if this loop has loop invariant conditions which lead to loop
435 /// exits. If so, we know that if the exit path is taken, it is at the first
436 /// loop iteration. This lets us predict exit values of PHI nodes that live in
437 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)438 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439 // Verify the input to the pass is already in LCSSA form.
440 assert(L->isLCSSAForm(*DT));
441
442 SmallVector<BasicBlock *, 8> ExitBlocks;
443 L->getUniqueExitBlocks(ExitBlocks);
444
445 bool MadeAnyChanges = false;
446 for (auto *ExitBB : ExitBlocks) {
447 // If there are no more PHI nodes in this exit block, then no more
448 // values defined inside the loop are used on this path.
449 for (PHINode &PN : ExitBB->phis()) {
450 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451 IncomingValIdx != E; ++IncomingValIdx) {
452 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453
454 // Can we prove that the exit must run on the first iteration if it
455 // runs at all? (i.e. early exits are fine for our purposes, but
456 // traces which lead to this exit being taken on the 2nd iteration
457 // aren't.) Note that this is about whether the exit branch is
458 // executed, not about whether it is taken.
459 if (!L->getLoopLatch() ||
460 !DT->dominates(IncomingBB, L->getLoopLatch()))
461 continue;
462
463 // Get condition that leads to the exit path.
464 auto *TermInst = IncomingBB->getTerminator();
465
466 Value *Cond = nullptr;
467 if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468 // Must be a conditional branch, otherwise the block
469 // should not be in the loop.
470 Cond = BI->getCondition();
471 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472 Cond = SI->getCondition();
473 else
474 continue;
475
476 if (!L->isLoopInvariant(Cond))
477 continue;
478
479 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480
481 // Only deal with PHIs in the loop header.
482 if (!ExitVal || ExitVal->getParent() != L->getHeader())
483 continue;
484
485 // If ExitVal is a PHI on the loop header, then we know its
486 // value along this exit because the exit can only be taken
487 // on the first iteration.
488 auto *LoopPreheader = L->getLoopPreheader();
489 assert(LoopPreheader && "Invalid loop");
490 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491 if (PreheaderIdx != -1) {
492 assert(ExitVal->getParent() == L->getHeader() &&
493 "ExitVal must be in loop header");
494 MadeAnyChanges = true;
495 PN.setIncomingValue(IncomingValIdx,
496 ExitVal->getIncomingValue(PreheaderIdx));
497 }
498 }
499 }
500 }
501 return MadeAnyChanges;
502 }
503
504 //===----------------------------------------------------------------------===//
505 // IV Widening - Extend the width of an IV to cover its widest uses.
506 //===----------------------------------------------------------------------===//
507
508 /// Update information about the induction variable that is extended by this
509 /// sign or zero extend operation. This is used to determine the final width of
510 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)511 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
512 ScalarEvolution *SE,
513 const TargetTransformInfo *TTI) {
514 bool IsSigned = Cast->getOpcode() == Instruction::SExt;
515 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
516 return;
517
518 Type *Ty = Cast->getType();
519 uint64_t Width = SE->getTypeSizeInBits(Ty);
520 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
521 return;
522
523 // Check that `Cast` actually extends the induction variable (we rely on this
524 // later). This takes care of cases where `Cast` is extending a truncation of
525 // the narrow induction variable, and thus can end up being narrower than the
526 // "narrow" induction variable.
527 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
528 if (NarrowIVWidth >= Width)
529 return;
530
531 // Cast is either an sext or zext up to this point.
532 // We should not widen an indvar if arithmetics on the wider indvar are more
533 // expensive than those on the narrower indvar. We check only the cost of ADD
534 // because at least an ADD is required to increment the induction variable. We
535 // could compute more comprehensively the cost of all instructions on the
536 // induction variable when necessary.
537 if (TTI &&
538 TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
539 TTI->getArithmeticInstrCost(Instruction::Add,
540 Cast->getOperand(0)->getType())) {
541 return;
542 }
543
544 if (!WI.WidestNativeType) {
545 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
546 WI.IsSigned = IsSigned;
547 return;
548 }
549
550 // We extend the IV to satisfy the sign of its first user, arbitrarily.
551 if (WI.IsSigned != IsSigned)
552 return;
553
554 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
555 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
556 }
557
558 //===----------------------------------------------------------------------===//
559 // Live IV Reduction - Minimize IVs live across the loop.
560 //===----------------------------------------------------------------------===//
561
562 //===----------------------------------------------------------------------===//
563 // Simplification of IV users based on SCEV evaluation.
564 //===----------------------------------------------------------------------===//
565
566 namespace {
567
568 class IndVarSimplifyVisitor : public IVVisitor {
569 ScalarEvolution *SE;
570 const TargetTransformInfo *TTI;
571 PHINode *IVPhi;
572
573 public:
574 WideIVInfo WI;
575
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)576 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
577 const TargetTransformInfo *TTI,
578 const DominatorTree *DTree)
579 : SE(SCEV), TTI(TTI), IVPhi(IV) {
580 DT = DTree;
581 WI.NarrowIV = IVPhi;
582 }
583
584 // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)585 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
586 };
587
588 } // end anonymous namespace
589
590 /// Iteratively perform simplification on a worklist of IV users. Each
591 /// successive simplification may push more users which may themselves be
592 /// candidates for simplification.
593 ///
594 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)595 bool IndVarSimplify::simplifyAndExtend(Loop *L,
596 SCEVExpander &Rewriter,
597 LoopInfo *LI) {
598 SmallVector<WideIVInfo, 8> WideIVs;
599
600 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
601 Intrinsic::getName(Intrinsic::experimental_guard));
602 bool HasGuards = GuardDecl && !GuardDecl->use_empty();
603
604 SmallVector<PHINode*, 8> LoopPhis;
605 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
606 LoopPhis.push_back(cast<PHINode>(I));
607 }
608 // Each round of simplification iterates through the SimplifyIVUsers worklist
609 // for all current phis, then determines whether any IVs can be
610 // widened. Widening adds new phis to LoopPhis, inducing another round of
611 // simplification on the wide IVs.
612 bool Changed = false;
613 while (!LoopPhis.empty()) {
614 // Evaluate as many IV expressions as possible before widening any IVs. This
615 // forces SCEV to set no-wrap flags before evaluating sign/zero
616 // extension. The first time SCEV attempts to normalize sign/zero extension,
617 // the result becomes final. So for the most predictable results, we delay
618 // evaluation of sign/zero extend evaluation until needed, and avoid running
619 // other SCEV based analysis prior to simplifyAndExtend.
620 do {
621 PHINode *CurrIV = LoopPhis.pop_back_val();
622
623 // Information about sign/zero extensions of CurrIV.
624 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
625
626 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
627 &Visitor);
628
629 if (Visitor.WI.WidestNativeType) {
630 WideIVs.push_back(Visitor.WI);
631 }
632 } while(!LoopPhis.empty());
633
634 // Continue if we disallowed widening.
635 if (!WidenIndVars)
636 continue;
637
638 for (; !WideIVs.empty(); WideIVs.pop_back()) {
639 unsigned ElimExt;
640 unsigned Widened;
641 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
642 DT, DeadInsts, ElimExt, Widened,
643 HasGuards, UsePostIncrementRanges)) {
644 NumElimExt += ElimExt;
645 NumWidened += Widened;
646 Changed = true;
647 LoopPhis.push_back(WidePhi);
648 }
649 }
650 }
651 return Changed;
652 }
653
654 //===----------------------------------------------------------------------===//
655 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
656 //===----------------------------------------------------------------------===//
657
658 /// Given an Value which is hoped to be part of an add recurance in the given
659 /// loop, return the associated Phi node if so. Otherwise, return null. Note
660 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)661 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
662 Instruction *IncI = dyn_cast<Instruction>(IncV);
663 if (!IncI)
664 return nullptr;
665
666 switch (IncI->getOpcode()) {
667 case Instruction::Add:
668 case Instruction::Sub:
669 break;
670 case Instruction::GetElementPtr:
671 // An IV counter must preserve its type.
672 if (IncI->getNumOperands() == 2)
673 break;
674 LLVM_FALLTHROUGH;
675 default:
676 return nullptr;
677 }
678
679 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
680 if (Phi && Phi->getParent() == L->getHeader()) {
681 if (L->isLoopInvariant(IncI->getOperand(1)))
682 return Phi;
683 return nullptr;
684 }
685 if (IncI->getOpcode() == Instruction::GetElementPtr)
686 return nullptr;
687
688 // Allow add/sub to be commuted.
689 Phi = dyn_cast<PHINode>(IncI->getOperand(1));
690 if (Phi && Phi->getParent() == L->getHeader()) {
691 if (L->isLoopInvariant(IncI->getOperand(0)))
692 return Phi;
693 }
694 return nullptr;
695 }
696
697 /// Whether the current loop exit test is based on this value. Currently this
698 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)699 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
700 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
701 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
702 // TODO: Allow non-icmp loop test.
703 if (!ICmp)
704 return false;
705
706 // TODO: Allow indirect use.
707 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
708 }
709
710 /// linearFunctionTestReplace policy. Return true unless we can show that the
711 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)712 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
713 assert(L->getLoopLatch() && "Must be in simplified form");
714
715 // Avoid converting a constant or loop invariant test back to a runtime
716 // test. This is critical for when SCEV's cached ExitCount is less precise
717 // than the current IR (such as after we've proven a particular exit is
718 // actually dead and thus the BE count never reaches our ExitCount.)
719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
720 if (L->isLoopInvariant(BI->getCondition()))
721 return false;
722
723 // Do LFTR to simplify the exit condition to an ICMP.
724 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
725 if (!Cond)
726 return true;
727
728 // Do LFTR to simplify the exit ICMP to EQ/NE
729 ICmpInst::Predicate Pred = Cond->getPredicate();
730 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
731 return true;
732
733 // Look for a loop invariant RHS
734 Value *LHS = Cond->getOperand(0);
735 Value *RHS = Cond->getOperand(1);
736 if (!L->isLoopInvariant(RHS)) {
737 if (!L->isLoopInvariant(LHS))
738 return true;
739 std::swap(LHS, RHS);
740 }
741 // Look for a simple IV counter LHS
742 PHINode *Phi = dyn_cast<PHINode>(LHS);
743 if (!Phi)
744 Phi = getLoopPhiForCounter(LHS, L);
745
746 if (!Phi)
747 return true;
748
749 // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
750 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
751 if (Idx < 0)
752 return true;
753
754 // Do LFTR if the exit condition's IV is *not* a simple counter.
755 Value *IncV = Phi->getIncomingValue(Idx);
756 return Phi != getLoopPhiForCounter(IncV, L);
757 }
758
759 /// Return true if undefined behavior would provable be executed on the path to
760 /// OnPathTo if Root produced a posion result. Note that this doesn't say
761 /// anything about whether OnPathTo is actually executed or whether Root is
762 /// actually poison. This can be used to assess whether a new use of Root can
763 /// be added at a location which is control equivalent with OnPathTo (such as
764 /// immediately before it) without introducing UB which didn't previously
765 /// exist. Note that a false result conveys no information.
mustExecuteUBIfPoisonOnPathTo(Instruction * Root,Instruction * OnPathTo,DominatorTree * DT)766 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
767 Instruction *OnPathTo,
768 DominatorTree *DT) {
769 // Basic approach is to assume Root is poison, propagate poison forward
770 // through all users we can easily track, and then check whether any of those
771 // users are provable UB and must execute before out exiting block might
772 // exit.
773
774 // The set of all recursive users we've visited (which are assumed to all be
775 // poison because of said visit)
776 SmallSet<const Value *, 16> KnownPoison;
777 SmallVector<const Instruction*, 16> Worklist;
778 Worklist.push_back(Root);
779 while (!Worklist.empty()) {
780 const Instruction *I = Worklist.pop_back_val();
781
782 // If we know this must trigger UB on a path leading our target.
783 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
784 return true;
785
786 // If we can't analyze propagation through this instruction, just skip it
787 // and transitive users. Safe as false is a conservative result.
788 if (!propagatesPoison(cast<Operator>(I)) && I != Root)
789 continue;
790
791 if (KnownPoison.insert(I).second)
792 for (const User *User : I->users())
793 Worklist.push_back(cast<Instruction>(User));
794 }
795
796 // Might be non-UB, or might have a path we couldn't prove must execute on
797 // way to exiting bb.
798 return false;
799 }
800
801 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
802 /// down to checking that all operands are constant and listing instructions
803 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)804 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
805 unsigned Depth) {
806 if (isa<Constant>(V))
807 return !isa<UndefValue>(V);
808
809 if (Depth >= 6)
810 return false;
811
812 // Conservatively handle non-constant non-instructions. For example, Arguments
813 // may be undef.
814 Instruction *I = dyn_cast<Instruction>(V);
815 if (!I)
816 return false;
817
818 // Load and return values may be undef.
819 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
820 return false;
821
822 // Optimistically handle other instructions.
823 for (Value *Op : I->operands()) {
824 if (!Visited.insert(Op).second)
825 continue;
826 if (!hasConcreteDefImpl(Op, Visited, Depth+1))
827 return false;
828 }
829 return true;
830 }
831
832 /// Return true if the given value is concrete. We must prove that undef can
833 /// never reach it.
834 ///
835 /// TODO: If we decide that this is a good approach to checking for undef, we
836 /// may factor it into a common location.
hasConcreteDef(Value * V)837 static bool hasConcreteDef(Value *V) {
838 SmallPtrSet<Value*, 8> Visited;
839 Visited.insert(V);
840 return hasConcreteDefImpl(V, Visited, 0);
841 }
842
843 /// Return true if this IV has any uses other than the (soon to be rewritten)
844 /// loop exit test.
AlmostDeadIV(PHINode * Phi,BasicBlock * LatchBlock,Value * Cond)845 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
846 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
847 Value *IncV = Phi->getIncomingValue(LatchIdx);
848
849 for (User *U : Phi->users())
850 if (U != Cond && U != IncV) return false;
851
852 for (User *U : IncV->users())
853 if (U != Cond && U != Phi) return false;
854 return true;
855 }
856
857 /// Return true if the given phi is a "counter" in L. A counter is an
858 /// add recurance (of integer or pointer type) with an arbitrary start, and a
859 /// step of 1. Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)860 static bool isLoopCounter(PHINode* Phi, Loop *L,
861 ScalarEvolution *SE) {
862 assert(Phi->getParent() == L->getHeader());
863 assert(L->getLoopLatch());
864
865 if (!SE->isSCEVable(Phi->getType()))
866 return false;
867
868 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
869 if (!AR || AR->getLoop() != L || !AR->isAffine())
870 return false;
871
872 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
873 if (!Step || !Step->isOne())
874 return false;
875
876 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
877 Value *IncV = Phi->getIncomingValue(LatchIdx);
878 return (getLoopPhiForCounter(IncV, L) == Phi &&
879 isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
880 }
881
882 /// Search the loop header for a loop counter (anadd rec w/step of one)
883 /// suitable for use by LFTR. If multiple counters are available, select the
884 /// "best" one based profitable heuristics.
885 ///
886 /// BECount may be an i8* pointer type. The pointer difference is already
887 /// valid count without scaling the address stride, so it remains a pointer
888 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)889 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
890 const SCEV *BECount,
891 ScalarEvolution *SE, DominatorTree *DT) {
892 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
893
894 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
895
896 // Loop over all of the PHI nodes, looking for a simple counter.
897 PHINode *BestPhi = nullptr;
898 const SCEV *BestInit = nullptr;
899 BasicBlock *LatchBlock = L->getLoopLatch();
900 assert(LatchBlock && "Must be in simplified form");
901 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
902
903 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
904 PHINode *Phi = cast<PHINode>(I);
905 if (!isLoopCounter(Phi, L, SE))
906 continue;
907
908 // Avoid comparing an integer IV against a pointer Limit.
909 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
910 continue;
911
912 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
913
914 // AR may be a pointer type, while BECount is an integer type.
915 // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
916 // AR may not be a narrower type, or we may never exit.
917 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
918 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
919 continue;
920
921 // Avoid reusing a potentially undef value to compute other values that may
922 // have originally had a concrete definition.
923 if (!hasConcreteDef(Phi)) {
924 // We explicitly allow unknown phis as long as they are already used by
925 // the loop exit test. This is legal since performing LFTR could not
926 // increase the number of undef users.
927 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
928 if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
929 !isLoopExitTestBasedOn(IncPhi, ExitingBB))
930 continue;
931 }
932
933 // Avoid introducing undefined behavior due to poison which didn't exist in
934 // the original program. (Annoyingly, the rules for poison and undef
935 // propagation are distinct, so this does NOT cover the undef case above.)
936 // We have to ensure that we don't introduce UB by introducing a use on an
937 // iteration where said IV produces poison. Our strategy here differs for
938 // pointers and integer IVs. For integers, we strip and reinfer as needed,
939 // see code in linearFunctionTestReplace. For pointers, we restrict
940 // transforms as there is no good way to reinfer inbounds once lost.
941 if (!Phi->getType()->isIntegerTy() &&
942 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
943 continue;
944
945 const SCEV *Init = AR->getStart();
946
947 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
948 // Don't force a live loop counter if another IV can be used.
949 if (AlmostDeadIV(Phi, LatchBlock, Cond))
950 continue;
951
952 // Prefer to count-from-zero. This is a more "canonical" counter form. It
953 // also prefers integer to pointer IVs.
954 if (BestInit->isZero() != Init->isZero()) {
955 if (BestInit->isZero())
956 continue;
957 }
958 // If two IVs both count from zero or both count from nonzero then the
959 // narrower is likely a dead phi that has been widened. Use the wider phi
960 // to allow the other to be eliminated.
961 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
962 continue;
963 }
964 BestPhi = Phi;
965 BestInit = Init;
966 }
967 return BestPhi;
968 }
969
970 /// Insert an IR expression which computes the value held by the IV IndVar
971 /// (which must be an loop counter w/unit stride) after the backedge of loop L
972 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)973 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
974 const SCEV *ExitCount, bool UsePostInc, Loop *L,
975 SCEVExpander &Rewriter, ScalarEvolution *SE) {
976 assert(isLoopCounter(IndVar, L, SE));
977 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
978 const SCEV *IVInit = AR->getStart();
979
980 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
981 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
982 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
983 // the existing GEPs whenever possible.
984 if (IndVar->getType()->isPointerTy() &&
985 !ExitCount->getType()->isPointerTy()) {
986 // IVOffset will be the new GEP offset that is interpreted by GEP as a
987 // signed value. ExitCount on the other hand represents the loop trip count,
988 // which is an unsigned value. FindLoopCounter only allows induction
989 // variables that have a positive unit stride of one. This means we don't
990 // have to handle the case of negative offsets (yet) and just need to zero
991 // extend ExitCount.
992 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
993 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
994 if (UsePostInc)
995 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
996
997 // Expand the code for the iteration count.
998 assert(SE->isLoopInvariant(IVOffset, L) &&
999 "Computed iteration count is not loop invariant!");
1000
1001 // We could handle pointer IVs other than i8*, but we need to compensate for
1002 // gep index scaling.
1003 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
1004 cast<PointerType>(IndVar->getType())
1005 ->getElementType())->isOne() &&
1006 "unit stride pointer IV must be i8*");
1007
1008 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1009 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1010 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1011 } else {
1012 // In any other case, convert both IVInit and ExitCount to integers before
1013 // comparing. This may result in SCEV expansion of pointers, but in practice
1014 // SCEV will fold the pointer arithmetic away as such:
1015 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1016 //
1017 // Valid Cases: (1) both integers is most common; (2) both may be pointers
1018 // for simple memset-style loops.
1019 //
1020 // IVInit integer and ExitCount pointer would only occur if a canonical IV
1021 // were generated on top of case #2, which is not expected.
1022
1023 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
1024 // For unit stride, IVCount = Start + ExitCount with 2's complement
1025 // overflow.
1026
1027 // For integer IVs, truncate the IV before computing IVInit + BECount,
1028 // unless we know apriori that the limit must be a constant when evaluated
1029 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
1030 // of the IV in the loop over a (potentially) expensive expansion of the
1031 // widened exit count add(zext(add)) expression.
1032 if (SE->getTypeSizeInBits(IVInit->getType())
1033 > SE->getTypeSizeInBits(ExitCount->getType())) {
1034 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1035 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1036 else
1037 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1038 }
1039
1040 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1041
1042 if (UsePostInc)
1043 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1044
1045 // Expand the code for the iteration count.
1046 assert(SE->isLoopInvariant(IVLimit, L) &&
1047 "Computed iteration count is not loop invariant!");
1048 // Ensure that we generate the same type as IndVar, or a smaller integer
1049 // type. In the presence of null pointer values, we have an integer type
1050 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1051 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1052 IndVar->getType() : ExitCount->getType();
1053 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1054 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1055 }
1056 }
1057
1058 /// This method rewrites the exit condition of the loop to be a canonical !=
1059 /// comparison against the incremented loop induction variable. This pass is
1060 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1061 /// determine a loop-invariant trip count of the loop, which is actually a much
1062 /// broader range than just linear tests.
1063 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)1064 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1065 const SCEV *ExitCount,
1066 PHINode *IndVar, SCEVExpander &Rewriter) {
1067 assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1068 assert(isLoopCounter(IndVar, L, SE));
1069 Instruction * const IncVar =
1070 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1071
1072 // Initialize CmpIndVar to the preincremented IV.
1073 Value *CmpIndVar = IndVar;
1074 bool UsePostInc = false;
1075
1076 // If the exiting block is the same as the backedge block, we prefer to
1077 // compare against the post-incremented value, otherwise we must compare
1078 // against the preincremented value.
1079 if (ExitingBB == L->getLoopLatch()) {
1080 // For pointer IVs, we chose to not strip inbounds which requires us not
1081 // to add a potentially UB introducing use. We need to either a) show
1082 // the loop test we're modifying is already in post-inc form, or b) show
1083 // that adding a use must not introduce UB.
1084 bool SafeToPostInc =
1085 IndVar->getType()->isIntegerTy() ||
1086 isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1087 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1088 if (SafeToPostInc) {
1089 UsePostInc = true;
1090 CmpIndVar = IncVar;
1091 }
1092 }
1093
1094 // It may be necessary to drop nowrap flags on the incrementing instruction
1095 // if either LFTR moves from a pre-inc check to a post-inc check (in which
1096 // case the increment might have previously been poison on the last iteration
1097 // only) or if LFTR switches to a different IV that was previously dynamically
1098 // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1099 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1100 // check), because the pre-inc addrec flags may be adopted from the original
1101 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1102 // TODO: This handling is inaccurate for one case: If we switch to a
1103 // dynamically dead IV that wraps on the first loop iteration only, which is
1104 // not covered by the post-inc addrec. (If the new IV was not dynamically
1105 // dead, it could not be poison on the first iteration in the first place.)
1106 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1107 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1108 if (BO->hasNoUnsignedWrap())
1109 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1110 if (BO->hasNoSignedWrap())
1111 BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1112 }
1113
1114 Value *ExitCnt = genLoopLimit(
1115 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1116 assert(ExitCnt->getType()->isPointerTy() ==
1117 IndVar->getType()->isPointerTy() &&
1118 "genLoopLimit missed a cast");
1119
1120 // Insert a new icmp_ne or icmp_eq instruction before the branch.
1121 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1122 ICmpInst::Predicate P;
1123 if (L->contains(BI->getSuccessor(0)))
1124 P = ICmpInst::ICMP_NE;
1125 else
1126 P = ICmpInst::ICMP_EQ;
1127
1128 IRBuilder<> Builder(BI);
1129
1130 // The new loop exit condition should reuse the debug location of the
1131 // original loop exit condition.
1132 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1133 Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1134
1135 // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1136 // avoid the expensive expansion of the limit expression in the wider type,
1137 // emit a truncate to narrow the IV to the ExitCount type. This is safe
1138 // since we know (from the exit count bitwidth), that we can't self-wrap in
1139 // the narrower type.
1140 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1141 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1142 if (CmpIndVarSize > ExitCntSize) {
1143 assert(!CmpIndVar->getType()->isPointerTy() &&
1144 !ExitCnt->getType()->isPointerTy());
1145
1146 // Before resorting to actually inserting the truncate, use the same
1147 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1148 // the other side of the comparison instead. We still evaluate the limit
1149 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1150 // a truncate within in.
1151 bool Extended = false;
1152 const SCEV *IV = SE->getSCEV(CmpIndVar);
1153 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1154 ExitCnt->getType());
1155 const SCEV *ZExtTrunc =
1156 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1157
1158 if (ZExtTrunc == IV) {
1159 Extended = true;
1160 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1161 "wide.trip.count");
1162 } else {
1163 const SCEV *SExtTrunc =
1164 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1165 if (SExtTrunc == IV) {
1166 Extended = true;
1167 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1168 "wide.trip.count");
1169 }
1170 }
1171
1172 if (Extended) {
1173 bool Discard;
1174 L->makeLoopInvariant(ExitCnt, Discard);
1175 } else
1176 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1177 "lftr.wideiv");
1178 }
1179 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1180 << " LHS:" << *CmpIndVar << '\n'
1181 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1182 << "\n"
1183 << " RHS:\t" << *ExitCnt << "\n"
1184 << "ExitCount:\t" << *ExitCount << "\n"
1185 << " was: " << *BI->getCondition() << "\n");
1186
1187 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1188 Value *OrigCond = BI->getCondition();
1189 // It's tempting to use replaceAllUsesWith here to fully replace the old
1190 // comparison, but that's not immediately safe, since users of the old
1191 // comparison may not be dominated by the new comparison. Instead, just
1192 // update the branch to use the new comparison; in the common case this
1193 // will make old comparison dead.
1194 BI->setCondition(Cond);
1195 DeadInsts.emplace_back(OrigCond);
1196
1197 ++NumLFTR;
1198 return true;
1199 }
1200
1201 //===----------------------------------------------------------------------===//
1202 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1203 //===----------------------------------------------------------------------===//
1204
1205 /// If there's a single exit block, sink any loop-invariant values that
1206 /// were defined in the preheader but not used inside the loop into the
1207 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)1208 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1209 BasicBlock *ExitBlock = L->getExitBlock();
1210 if (!ExitBlock) return false;
1211
1212 BasicBlock *Preheader = L->getLoopPreheader();
1213 if (!Preheader) return false;
1214
1215 bool MadeAnyChanges = false;
1216 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1217 BasicBlock::iterator I(Preheader->getTerminator());
1218 while (I != Preheader->begin()) {
1219 --I;
1220 // New instructions were inserted at the end of the preheader.
1221 if (isa<PHINode>(I))
1222 break;
1223
1224 // Don't move instructions which might have side effects, since the side
1225 // effects need to complete before instructions inside the loop. Also don't
1226 // move instructions which might read memory, since the loop may modify
1227 // memory. Note that it's okay if the instruction might have undefined
1228 // behavior: LoopSimplify guarantees that the preheader dominates the exit
1229 // block.
1230 if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1231 continue;
1232
1233 // Skip debug info intrinsics.
1234 if (isa<DbgInfoIntrinsic>(I))
1235 continue;
1236
1237 // Skip eh pad instructions.
1238 if (I->isEHPad())
1239 continue;
1240
1241 // Don't sink alloca: we never want to sink static alloca's out of the
1242 // entry block, and correctly sinking dynamic alloca's requires
1243 // checks for stacksave/stackrestore intrinsics.
1244 // FIXME: Refactor this check somehow?
1245 if (isa<AllocaInst>(I))
1246 continue;
1247
1248 // Determine if there is a use in or before the loop (direct or
1249 // otherwise).
1250 bool UsedInLoop = false;
1251 for (Use &U : I->uses()) {
1252 Instruction *User = cast<Instruction>(U.getUser());
1253 BasicBlock *UseBB = User->getParent();
1254 if (PHINode *P = dyn_cast<PHINode>(User)) {
1255 unsigned i =
1256 PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1257 UseBB = P->getIncomingBlock(i);
1258 }
1259 if (UseBB == Preheader || L->contains(UseBB)) {
1260 UsedInLoop = true;
1261 break;
1262 }
1263 }
1264
1265 // If there is, the def must remain in the preheader.
1266 if (UsedInLoop)
1267 continue;
1268
1269 // Otherwise, sink it to the exit block.
1270 Instruction *ToMove = &*I;
1271 bool Done = false;
1272
1273 if (I != Preheader->begin()) {
1274 // Skip debug info intrinsics.
1275 do {
1276 --I;
1277 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
1278
1279 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
1280 Done = true;
1281 } else {
1282 Done = true;
1283 }
1284
1285 MadeAnyChanges = true;
1286 ToMove->moveBefore(*ExitBlock, InsertPt);
1287 if (Done) break;
1288 InsertPt = ToMove->getIterator();
1289 }
1290
1291 return MadeAnyChanges;
1292 }
1293
replaceExitCond(BranchInst * BI,Value * NewCond,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1294 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1295 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1296 auto *OldCond = BI->getCondition();
1297 BI->setCondition(NewCond);
1298 if (OldCond->use_empty())
1299 DeadInsts.emplace_back(OldCond);
1300 }
1301
foldExit(const Loop * L,BasicBlock * ExitingBB,bool IsTaken,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1302 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1303 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1304 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1305 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1306 auto *OldCond = BI->getCondition();
1307 auto *NewCond =
1308 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue);
1309 replaceExitCond(BI, NewCond, DeadInsts);
1310 }
1311
replaceWithInvariantCond(const Loop * L,BasicBlock * ExitingBB,ICmpInst::Predicate InvariantPred,const SCEV * InvariantLHS,const SCEV * InvariantRHS,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1312 static void replaceWithInvariantCond(
1313 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
1314 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
1315 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1316 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1317 Rewriter.setInsertPoint(BI);
1318 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
1319 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
1320 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1321 if (ExitIfTrue)
1322 InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1323 IRBuilder<> Builder(BI);
1324 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1325 BI->getCondition()->getName());
1326 replaceExitCond(BI, NewCond, DeadInsts);
1327 }
1328
optimizeLoopExitWithUnknownExitCount(const Loop * L,BranchInst * BI,BasicBlock * ExitingBB,const SCEV * MaxIter,bool Inverted,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1329 static bool optimizeLoopExitWithUnknownExitCount(
1330 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB,
1331 const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1332 ScalarEvolution *SE, SCEVExpander &Rewriter,
1333 SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1334 ICmpInst::Predicate Pred;
1335 Value *LHS, *RHS;
1336 using namespace PatternMatch;
1337 BasicBlock *TrueSucc, *FalseSucc;
1338 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
1339 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
1340 return false;
1341
1342 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) &&
1343 "Not a loop exit!");
1344
1345 // 'LHS pred RHS' should now mean that we stay in loop.
1346 if (L->contains(FalseSucc))
1347 Pred = CmpInst::getInversePredicate(Pred);
1348
1349 // If we are proving loop exit, invert the predicate.
1350 if (Inverted)
1351 Pred = CmpInst::getInversePredicate(Pred);
1352
1353 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1354 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1355 // Can we prove it to be trivially true?
1356 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) {
1357 foldExit(L, ExitingBB, Inverted, DeadInsts);
1358 return true;
1359 }
1360 // Further logic works for non-inverted condition only.
1361 if (Inverted)
1362 return false;
1363
1364 auto *ARTy = LHSS->getType();
1365 auto *MaxIterTy = MaxIter->getType();
1366 // If possible, adjust types.
1367 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1368 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1369 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1370 const SCEV *MinusOne = SE->getMinusOne(ARTy);
1371 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1372 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1373 MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1374 }
1375
1376 if (SkipLastIter) {
1377 const SCEV *One = SE->getOne(MaxIter->getType());
1378 MaxIter = SE->getMinusSCEV(MaxIter, One);
1379 }
1380
1381 // Check if there is a loop-invariant predicate equivalent to our check.
1382 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1383 L, BI, MaxIter);
1384 if (!LIP)
1385 return false;
1386
1387 // Can we prove it to be trivially true?
1388 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1389 foldExit(L, ExitingBB, Inverted, DeadInsts);
1390 else
1391 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS,
1392 Rewriter, DeadInsts);
1393
1394 return true;
1395 }
1396
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)1397 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1398 SmallVector<BasicBlock*, 16> ExitingBlocks;
1399 L->getExitingBlocks(ExitingBlocks);
1400
1401 // Remove all exits which aren't both rewriteable and execute on every
1402 // iteration.
1403 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1404 // If our exitting block exits multiple loops, we can only rewrite the
1405 // innermost one. Otherwise, we're changing how many times the innermost
1406 // loop runs before it exits.
1407 if (LI->getLoopFor(ExitingBB) != L)
1408 return true;
1409
1410 // Can't rewrite non-branch yet.
1411 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1412 if (!BI)
1413 return true;
1414
1415 // If already constant, nothing to do.
1416 if (isa<Constant>(BI->getCondition()))
1417 return true;
1418
1419 // Likewise, the loop latch must be dominated by the exiting BB.
1420 if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1421 return true;
1422
1423 return false;
1424 });
1425
1426 if (ExitingBlocks.empty())
1427 return false;
1428
1429 // Get a symbolic upper bound on the loop backedge taken count.
1430 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L);
1431 if (isa<SCEVCouldNotCompute>(MaxExitCount))
1432 return false;
1433
1434 // Visit our exit blocks in order of dominance. We know from the fact that
1435 // all exits must dominate the latch, so there is a total dominance order
1436 // between them.
1437 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1438 // std::sort sorts in ascending order, so we want the inverse of
1439 // the normal dominance relation.
1440 if (A == B) return false;
1441 if (DT->properlyDominates(A, B))
1442 return true;
1443 else {
1444 assert(DT->properlyDominates(B, A) &&
1445 "expected total dominance order!");
1446 return false;
1447 }
1448 });
1449 #ifdef ASSERT
1450 for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1451 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1452 }
1453 #endif
1454
1455 bool Changed = false;
1456 bool SkipLastIter = false;
1457 SmallSet<const SCEV*, 8> DominatingExitCounts;
1458 for (BasicBlock *ExitingBB : ExitingBlocks) {
1459 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1460 if (isa<SCEVCouldNotCompute>(ExitCount)) {
1461 // Okay, we do not know the exit count here. Can we at least prove that it
1462 // will remain the same within iteration space?
1463 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1464 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) {
1465 return optimizeLoopExitWithUnknownExitCount(
1466 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE,
1467 Rewriter, DeadInsts);
1468 };
1469
1470 // TODO: We might have proved that we can skip the last iteration for
1471 // this check. In this case, we only want to check the condition on the
1472 // pre-last iteration (MaxExitCount - 1). However, there is a nasty
1473 // corner case:
1474 //
1475 // for (i = len; i != 0; i--) { ... check (i ult X) ... }
1476 //
1477 // If we could not prove that len != 0, then we also could not prove that
1478 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1479 // OptimizeCond will likely not prove anything for it, even if it could
1480 // prove the same fact for len.
1481 //
1482 // As a temporary solution, we query both last and pre-last iterations in
1483 // hope that we will be able to prove triviality for at least one of
1484 // them. We can stop querying MaxExitCount for this case once SCEV
1485 // understands that (MaxExitCount - 1) will not overflow here.
1486 if (OptimizeCond(false, false) || OptimizeCond(true, false))
1487 Changed = true;
1488 else if (SkipLastIter)
1489 if (OptimizeCond(false, true) || OptimizeCond(true, true))
1490 Changed = true;
1491 continue;
1492 }
1493
1494 if (MaxExitCount == ExitCount)
1495 // If the loop has more than 1 iteration, all further checks will be
1496 // executed 1 iteration less.
1497 SkipLastIter = true;
1498
1499 // If we know we'd exit on the first iteration, rewrite the exit to
1500 // reflect this. This does not imply the loop must exit through this
1501 // exit; there may be an earlier one taken on the first iteration.
1502 // TODO: Given we know the backedge can't be taken, we should go ahead
1503 // and break it. Or at least, kill all the header phis and simplify.
1504 if (ExitCount->isZero()) {
1505 foldExit(L, ExitingBB, true, DeadInsts);
1506 Changed = true;
1507 continue;
1508 }
1509
1510 // If we end up with a pointer exit count, bail. Note that we can end up
1511 // with a pointer exit count for one exiting block, and not for another in
1512 // the same loop.
1513 if (!ExitCount->getType()->isIntegerTy() ||
1514 !MaxExitCount->getType()->isIntegerTy())
1515 continue;
1516
1517 Type *WiderType =
1518 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
1519 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
1520 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
1521 assert(MaxExitCount->getType() == ExitCount->getType());
1522
1523 // Can we prove that some other exit must be taken strictly before this
1524 // one?
1525 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
1526 MaxExitCount, ExitCount)) {
1527 foldExit(L, ExitingBB, false, DeadInsts);
1528 Changed = true;
1529 continue;
1530 }
1531
1532 // As we run, keep track of which exit counts we've encountered. If we
1533 // find a duplicate, we've found an exit which would have exited on the
1534 // exiting iteration, but (from the visit order) strictly follows another
1535 // which does the same and is thus dead.
1536 if (!DominatingExitCounts.insert(ExitCount).second) {
1537 foldExit(L, ExitingBB, false, DeadInsts);
1538 Changed = true;
1539 continue;
1540 }
1541
1542 // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1543 // here. If we kept track of the min of dominanting exits so far, we could
1544 // discharge exits with EC >= MDEC. This is less powerful than the existing
1545 // transform (since later exits aren't considered), but potentially more
1546 // powerful for any case where SCEV can prove a >=u b, but neither a == b
1547 // or a >u b. Such a case is not currently known.
1548 }
1549 return Changed;
1550 }
1551
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)1552 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1553 SmallVector<BasicBlock*, 16> ExitingBlocks;
1554 L->getExitingBlocks(ExitingBlocks);
1555
1556 // Finally, see if we can rewrite our exit conditions into a loop invariant
1557 // form. If we have a read-only loop, and we can tell that we must exit down
1558 // a path which does not need any of the values computed within the loop, we
1559 // can rewrite the loop to exit on the first iteration. Note that this
1560 // doesn't either a) tell us the loop exits on the first iteration (unless
1561 // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1562 // This transformation looks a lot like a restricted form of dead loop
1563 // elimination, but restricted to read-only loops and without neccesssarily
1564 // needing to kill the loop entirely.
1565 if (!LoopPredication)
1566 return false;
1567
1568 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1569 // through *explicit* control flow. We have to eliminate the possibility of
1570 // implicit exits (see below) before we know it's truly exact.
1571 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1572 if (isa<SCEVCouldNotCompute>(ExactBTC) ||
1573 !SE->isLoopInvariant(ExactBTC, L) ||
1574 !isSafeToExpand(ExactBTC, *SE))
1575 return false;
1576
1577 // If we end up with a pointer exit count, bail. It may be unsized.
1578 if (!ExactBTC->getType()->isIntegerTy())
1579 return false;
1580
1581 auto BadExit = [&](BasicBlock *ExitingBB) {
1582 // If our exiting block exits multiple loops, we can only rewrite the
1583 // innermost one. Otherwise, we're changing how many times the innermost
1584 // loop runs before it exits.
1585 if (LI->getLoopFor(ExitingBB) != L)
1586 return true;
1587
1588 // Can't rewrite non-branch yet.
1589 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1590 if (!BI)
1591 return true;
1592
1593 // If already constant, nothing to do.
1594 if (isa<Constant>(BI->getCondition()))
1595 return true;
1596
1597 // If the exit block has phis, we need to be able to compute the values
1598 // within the loop which contains them. This assumes trivially lcssa phis
1599 // have already been removed; TODO: generalize
1600 BasicBlock *ExitBlock =
1601 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1602 if (!ExitBlock->phis().empty())
1603 return true;
1604
1605 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1606 if (isa<SCEVCouldNotCompute>(ExitCount) ||
1607 !SE->isLoopInvariant(ExitCount, L) ||
1608 !isSafeToExpand(ExitCount, *SE))
1609 return true;
1610
1611 // If we end up with a pointer exit count, bail. It may be unsized.
1612 if (!ExitCount->getType()->isIntegerTy())
1613 return true;
1614
1615 return false;
1616 };
1617
1618 // If we have any exits which can't be predicated themselves, than we can't
1619 // predicate any exit which isn't guaranteed to execute before it. Consider
1620 // two exits (a) and (b) which would both exit on the same iteration. If we
1621 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1622 // we could convert a loop from exiting through (a) to one exiting through
1623 // (b). Note that this problem exists only for exits with the same exit
1624 // count, and we could be more aggressive when exit counts are known inequal.
1625 llvm::sort(ExitingBlocks,
1626 [&](BasicBlock *A, BasicBlock *B) {
1627 // std::sort sorts in ascending order, so we want the inverse of
1628 // the normal dominance relation, plus a tie breaker for blocks
1629 // unordered by dominance.
1630 if (DT->properlyDominates(A, B)) return true;
1631 if (DT->properlyDominates(B, A)) return false;
1632 return A->getName() < B->getName();
1633 });
1634 // Check to see if our exit blocks are a total order (i.e. a linear chain of
1635 // exits before the backedge). If they aren't, reasoning about reachability
1636 // is complicated and we choose not to for now.
1637 for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1638 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1639 return false;
1640
1641 // Given our sorted total order, we know that exit[j] must be evaluated
1642 // after all exit[i] such j > i.
1643 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1644 if (BadExit(ExitingBlocks[i])) {
1645 ExitingBlocks.resize(i);
1646 break;
1647 }
1648
1649 if (ExitingBlocks.empty())
1650 return false;
1651
1652 // We rely on not being able to reach an exiting block on a later iteration
1653 // then it's statically compute exit count. The implementaton of
1654 // getExitCount currently has this invariant, but assert it here so that
1655 // breakage is obvious if this ever changes..
1656 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1657 return DT->dominates(ExitingBB, L->getLoopLatch());
1658 }));
1659
1660 // At this point, ExitingBlocks consists of only those blocks which are
1661 // predicatable. Given that, we know we have at least one exit we can
1662 // predicate if the loop is doesn't have side effects and doesn't have any
1663 // implicit exits (because then our exact BTC isn't actually exact).
1664 // @Reviewers - As structured, this is O(I^2) for loop nests. Any
1665 // suggestions on how to improve this? I can obviously bail out for outer
1666 // loops, but that seems less than ideal. MemorySSA can find memory writes,
1667 // is that enough for *all* side effects?
1668 for (BasicBlock *BB : L->blocks())
1669 for (auto &I : *BB)
1670 // TODO:isGuaranteedToTransfer
1671 if (I.mayHaveSideEffects())
1672 return false;
1673
1674 bool Changed = false;
1675 // Finally, do the actual predication for all predicatable blocks. A couple
1676 // of notes here:
1677 // 1) We don't bother to constant fold dominated exits with identical exit
1678 // counts; that's simply a form of CSE/equality propagation and we leave
1679 // it for dedicated passes.
1680 // 2) We insert the comparison at the branch. Hoisting introduces additional
1681 // legality constraints and we leave that to dedicated logic. We want to
1682 // predicate even if we can't insert a loop invariant expression as
1683 // peeling or unrolling will likely reduce the cost of the otherwise loop
1684 // varying check.
1685 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1686 IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1687 Value *ExactBTCV = nullptr; // Lazily generated if needed.
1688 for (BasicBlock *ExitingBB : ExitingBlocks) {
1689 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1690
1691 auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1692 Value *NewCond;
1693 if (ExitCount == ExactBTC) {
1694 NewCond = L->contains(BI->getSuccessor(0)) ?
1695 B.getFalse() : B.getTrue();
1696 } else {
1697 Value *ECV = Rewriter.expandCodeFor(ExitCount);
1698 if (!ExactBTCV)
1699 ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1700 Value *RHS = ExactBTCV;
1701 if (ECV->getType() != RHS->getType()) {
1702 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1703 ECV = B.CreateZExt(ECV, WiderTy);
1704 RHS = B.CreateZExt(RHS, WiderTy);
1705 }
1706 auto Pred = L->contains(BI->getSuccessor(0)) ?
1707 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1708 NewCond = B.CreateICmp(Pred, ECV, RHS);
1709 }
1710 Value *OldCond = BI->getCondition();
1711 BI->setCondition(NewCond);
1712 if (OldCond->use_empty())
1713 DeadInsts.emplace_back(OldCond);
1714 Changed = true;
1715 }
1716
1717 return Changed;
1718 }
1719
1720 //===----------------------------------------------------------------------===//
1721 // IndVarSimplify driver. Manage several subpasses of IV simplification.
1722 //===----------------------------------------------------------------------===//
1723
run(Loop * L)1724 bool IndVarSimplify::run(Loop *L) {
1725 // We need (and expect!) the incoming loop to be in LCSSA.
1726 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1727 "LCSSA required to run indvars!");
1728
1729 // If LoopSimplify form is not available, stay out of trouble. Some notes:
1730 // - LSR currently only supports LoopSimplify-form loops. Indvars'
1731 // canonicalization can be a pessimization without LSR to "clean up"
1732 // afterwards.
1733 // - We depend on having a preheader; in particular,
1734 // Loop::getCanonicalInductionVariable only supports loops with preheaders,
1735 // and we're in trouble if we can't find the induction variable even when
1736 // we've manually inserted one.
1737 // - LFTR relies on having a single backedge.
1738 if (!L->isLoopSimplifyForm())
1739 return false;
1740
1741 #ifndef NDEBUG
1742 // Used below for a consistency check only
1743 // Note: Since the result returned by ScalarEvolution may depend on the order
1744 // in which previous results are added to its cache, the call to
1745 // getBackedgeTakenCount() may change following SCEV queries.
1746 const SCEV *BackedgeTakenCount;
1747 if (VerifyIndvars)
1748 BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1749 #endif
1750
1751 bool Changed = false;
1752 // If there are any floating-point recurrences, attempt to
1753 // transform them to use integer recurrences.
1754 Changed |= rewriteNonIntegerIVs(L);
1755
1756 // Create a rewriter object which we'll use to transform the code with.
1757 SCEVExpander Rewriter(*SE, DL, "indvars");
1758 #ifndef NDEBUG
1759 Rewriter.setDebugType(DEBUG_TYPE);
1760 #endif
1761
1762 // Eliminate redundant IV users.
1763 //
1764 // Simplification works best when run before other consumers of SCEV. We
1765 // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1766 // other expressions involving loop IVs have been evaluated. This helps SCEV
1767 // set no-wrap flags before normalizing sign/zero extension.
1768 Rewriter.disableCanonicalMode();
1769 Changed |= simplifyAndExtend(L, Rewriter, LI);
1770
1771 // Check to see if we can compute the final value of any expressions
1772 // that are recurrent in the loop, and substitute the exit values from the
1773 // loop into any instructions outside of the loop that use the final values
1774 // of the current expressions.
1775 if (ReplaceExitValue != NeverRepl) {
1776 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1777 ReplaceExitValue, DeadInsts)) {
1778 NumReplaced += Rewrites;
1779 Changed = true;
1780 }
1781 }
1782
1783 // Eliminate redundant IV cycles.
1784 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
1785
1786 // Try to eliminate loop exits based on analyzeable exit counts
1787 if (optimizeLoopExits(L, Rewriter)) {
1788 Changed = true;
1789 // Given we've changed exit counts, notify SCEV
1790 // Some nested loops may share same folded exit basic block,
1791 // thus we need to notify top most loop.
1792 SE->forgetTopmostLoop(L);
1793 }
1794
1795 // Try to form loop invariant tests for loop exits by changing how many
1796 // iterations of the loop run when that is unobservable.
1797 if (predicateLoopExits(L, Rewriter)) {
1798 Changed = true;
1799 // Given we've changed exit counts, notify SCEV
1800 SE->forgetLoop(L);
1801 }
1802
1803 // If we have a trip count expression, rewrite the loop's exit condition
1804 // using it.
1805 if (!DisableLFTR) {
1806 BasicBlock *PreHeader = L->getLoopPreheader();
1807 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
1808
1809 SmallVector<BasicBlock*, 16> ExitingBlocks;
1810 L->getExitingBlocks(ExitingBlocks);
1811 for (BasicBlock *ExitingBB : ExitingBlocks) {
1812 // Can't rewrite non-branch yet.
1813 if (!isa<BranchInst>(ExitingBB->getTerminator()))
1814 continue;
1815
1816 // If our exitting block exits multiple loops, we can only rewrite the
1817 // innermost one. Otherwise, we're changing how many times the innermost
1818 // loop runs before it exits.
1819 if (LI->getLoopFor(ExitingBB) != L)
1820 continue;
1821
1822 if (!needsLFTR(L, ExitingBB))
1823 continue;
1824
1825 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1826 if (isa<SCEVCouldNotCompute>(ExitCount))
1827 continue;
1828
1829 // This was handled above, but as we form SCEVs, we can sometimes refine
1830 // existing ones; this allows exit counts to be folded to zero which
1831 // weren't when optimizeLoopExits saw them. Arguably, we should iterate
1832 // until stable to handle cases like this better.
1833 if (ExitCount->isZero())
1834 continue;
1835
1836 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1837 if (!IndVar)
1838 continue;
1839
1840 // Avoid high cost expansions. Note: This heuristic is questionable in
1841 // that our definition of "high cost" is not exactly principled.
1842 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1843 TTI, PreHeaderBR))
1844 continue;
1845
1846 // Check preconditions for proper SCEVExpander operation. SCEV does not
1847 // express SCEVExpander's dependencies, such as LoopSimplify. Instead
1848 // any pass that uses the SCEVExpander must do it. This does not work
1849 // well for loop passes because SCEVExpander makes assumptions about
1850 // all loops, while LoopPassManager only forces the current loop to be
1851 // simplified.
1852 //
1853 // FIXME: SCEV expansion has no way to bail out, so the caller must
1854 // explicitly check any assumptions made by SCEV. Brittle.
1855 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
1856 if (!AR || AR->getLoop()->getLoopPreheader())
1857 Changed |= linearFunctionTestReplace(L, ExitingBB,
1858 ExitCount, IndVar,
1859 Rewriter);
1860 }
1861 }
1862 // Clear the rewriter cache, because values that are in the rewriter's cache
1863 // can be deleted in the loop below, causing the AssertingVH in the cache to
1864 // trigger.
1865 Rewriter.clear();
1866
1867 // Now that we're done iterating through lists, clean up any instructions
1868 // which are now dead.
1869 while (!DeadInsts.empty()) {
1870 Value *V = DeadInsts.pop_back_val();
1871
1872 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
1873 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
1874 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
1875 Changed |=
1876 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
1877 }
1878
1879 // The Rewriter may not be used from this point on.
1880
1881 // Loop-invariant instructions in the preheader that aren't used in the
1882 // loop may be sunk below the loop to reduce register pressure.
1883 Changed |= sinkUnusedInvariants(L);
1884
1885 // rewriteFirstIterationLoopExitValues does not rely on the computation of
1886 // trip count and therefore can further simplify exit values in addition to
1887 // rewriteLoopExitValues.
1888 Changed |= rewriteFirstIterationLoopExitValues(L);
1889
1890 // Clean up dead instructions.
1891 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
1892
1893 // Check a post-condition.
1894 assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1895 "Indvars did not preserve LCSSA!");
1896
1897 // Verify that LFTR, and any other change have not interfered with SCEV's
1898 // ability to compute trip count. We may have *changed* the exit count, but
1899 // only by reducing it.
1900 #ifndef NDEBUG
1901 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
1902 SE->forgetLoop(L);
1903 const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
1904 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
1905 SE->getTypeSizeInBits(NewBECount->getType()))
1906 NewBECount = SE->getTruncateOrNoop(NewBECount,
1907 BackedgeTakenCount->getType());
1908 else
1909 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
1910 NewBECount->getType());
1911 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
1912 NewBECount) && "indvars must preserve SCEV");
1913 }
1914 if (VerifyMemorySSA && MSSAU)
1915 MSSAU->getMemorySSA()->verifyMemorySSA();
1916 #endif
1917
1918 return Changed;
1919 }
1920
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)1921 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
1922 LoopStandardAnalysisResults &AR,
1923 LPMUpdater &) {
1924 Function *F = L.getHeader()->getParent();
1925 const DataLayout &DL = F->getParent()->getDataLayout();
1926
1927 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
1928 WidenIndVars && AllowIVWidening);
1929 if (!IVS.run(&L))
1930 return PreservedAnalyses::all();
1931
1932 auto PA = getLoopPassPreservedAnalyses();
1933 PA.preserveSet<CFGAnalyses>();
1934 if (AR.MSSA)
1935 PA.preserve<MemorySSAAnalysis>();
1936 return PA;
1937 }
1938
1939 namespace {
1940
1941 struct IndVarSimplifyLegacyPass : public LoopPass {
1942 static char ID; // Pass identification, replacement for typeid
1943
IndVarSimplifyLegacyPass__anon1b5442ef0911::IndVarSimplifyLegacyPass1944 IndVarSimplifyLegacyPass() : LoopPass(ID) {
1945 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
1946 }
1947
runOnLoop__anon1b5442ef0911::IndVarSimplifyLegacyPass1948 bool runOnLoop(Loop *L, LPPassManager &LPM) override {
1949 if (skipLoop(L))
1950 return false;
1951
1952 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1953 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1954 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1955 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1956 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
1957 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
1958 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
1959 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1960 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
1961 MemorySSA *MSSA = nullptr;
1962 if (MSSAAnalysis)
1963 MSSA = &MSSAAnalysis->getMSSA();
1964
1965 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
1966 return IVS.run(L);
1967 }
1968
getAnalysisUsage__anon1b5442ef0911::IndVarSimplifyLegacyPass1969 void getAnalysisUsage(AnalysisUsage &AU) const override {
1970 AU.setPreservesCFG();
1971 AU.addPreserved<MemorySSAWrapperPass>();
1972 getLoopAnalysisUsage(AU);
1973 }
1974 };
1975
1976 } // end anonymous namespace
1977
1978 char IndVarSimplifyLegacyPass::ID = 0;
1979
1980 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
1981 "Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)1982 INITIALIZE_PASS_DEPENDENCY(LoopPass)
1983 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
1984 "Induction Variable Simplification", false, false)
1985
1986 Pass *llvm::createIndVarSimplifyPass() {
1987 return new IndVarSimplifyLegacyPass();
1988 }
1989