1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 using namespace PatternMatch; 93 94 #define DEBUG_TYPE "indvars" 95 96 STATISTIC(NumWidened , "Number of indvars widened"); 97 STATISTIC(NumReplaced , "Number of exit values replaced"); 98 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 99 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 100 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 101 102 // Trip count verification can be enabled by default under NDEBUG if we 103 // implement a strong expression equivalence checker in SCEV. Until then, we 104 // use the verify-indvars flag, which may assert in some cases. 105 static cl::opt<bool> VerifyIndvars( 106 "verify-indvars", cl::Hidden, 107 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 108 "effect in release builds. (Note: this adds additional SCEV " 109 "queries potentially changing the analysis result)")); 110 111 static cl::opt<ReplaceExitVal> ReplaceExitValue( 112 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 113 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 114 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 115 clEnumValN(OnlyCheapRepl, "cheap", 116 "only replace exit value when the cost is cheap"), 117 clEnumValN(NoHardUse, "noharduse", 118 "only replace exit values when loop def likely dead"), 119 clEnumValN(AlwaysRepl, "always", 120 "always replace exit value whenever possible"))); 121 122 static cl::opt<bool> UsePostIncrementRanges( 123 "indvars-post-increment-ranges", cl::Hidden, 124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 125 cl::init(true)); 126 127 static cl::opt<bool> 128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 129 cl::desc("Disable Linear Function Test Replace optimization")); 130 131 static cl::opt<bool> 132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 133 cl::desc("Predicate conditions in read only loops")); 134 135 static cl::opt<bool> 136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 137 cl::desc("Allow widening of indvars to eliminate s/zext")); 138 139 namespace { 140 141 struct RewritePhi; 142 143 class IndVarSimplify { 144 LoopInfo *LI; 145 ScalarEvolution *SE; 146 DominatorTree *DT; 147 const DataLayout &DL; 148 TargetLibraryInfo *TLI; 149 const TargetTransformInfo *TTI; 150 std::unique_ptr<MemorySSAUpdater> MSSAU; 151 152 SmallVector<WeakTrackingVH, 16> DeadInsts; 153 bool WidenIndVars; 154 155 bool handleFloatingPointIV(Loop *L, PHINode *PH); 156 bool rewriteNonIntegerIVs(Loop *L); 157 158 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 159 /// Try to improve our exit conditions by converting condition from signed 160 /// to unsigned or rotating computation out of the loop. 161 /// (See inline comment about why this is duplicated from simplifyAndExtend) 162 bool canonicalizeExitCondition(Loop *L); 163 /// Try to eliminate loop exits based on analyzeable exit counts 164 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 165 /// Try to form loop invariant tests for loop exits by changing how many 166 /// iterations of the loop run when that is unobservable. 167 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 168 169 bool rewriteFirstIterationLoopExitValues(Loop *L); 170 171 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 172 const SCEV *ExitCount, 173 PHINode *IndVar, SCEVExpander &Rewriter); 174 175 bool sinkUnusedInvariants(Loop *L); 176 177 public: 178 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 179 const DataLayout &DL, TargetLibraryInfo *TLI, 180 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 181 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 182 WidenIndVars(WidenIndVars) { 183 if (MSSA) 184 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 185 } 186 187 bool run(Loop *L); 188 }; 189 190 } // end anonymous namespace 191 192 //===----------------------------------------------------------------------===// 193 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 194 //===----------------------------------------------------------------------===// 195 196 /// Convert APF to an integer, if possible. 197 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 198 bool isExact = false; 199 // See if we can convert this to an int64_t 200 uint64_t UIntVal; 201 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 202 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 203 !isExact) 204 return false; 205 IntVal = UIntVal; 206 return true; 207 } 208 209 /// If the loop has floating induction variable then insert corresponding 210 /// integer induction variable if possible. 211 /// For example, 212 /// for(double i = 0; i < 10000; ++i) 213 /// bar(i) 214 /// is converted into 215 /// for(int i = 0; i < 10000; ++i) 216 /// bar((double)i); 217 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 218 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 219 unsigned BackEdge = IncomingEdge^1; 220 221 // Check incoming value. 222 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 223 224 int64_t InitValue; 225 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 226 return false; 227 228 // Check IV increment. Reject this PN if increment operation is not 229 // an add or increment value can not be represented by an integer. 230 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 231 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 232 233 // If this is not an add of the PHI with a constantfp, or if the constant fp 234 // is not an integer, bail out. 235 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 236 int64_t IncValue; 237 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 238 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 239 return false; 240 241 // Check Incr uses. One user is PN and the other user is an exit condition 242 // used by the conditional terminator. 243 Value::user_iterator IncrUse = Incr->user_begin(); 244 Instruction *U1 = cast<Instruction>(*IncrUse++); 245 if (IncrUse == Incr->user_end()) return false; 246 Instruction *U2 = cast<Instruction>(*IncrUse++); 247 if (IncrUse != Incr->user_end()) return false; 248 249 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 250 // only used by a branch, we can't transform it. 251 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 252 if (!Compare) 253 Compare = dyn_cast<FCmpInst>(U2); 254 if (!Compare || !Compare->hasOneUse() || 255 !isa<BranchInst>(Compare->user_back())) 256 return false; 257 258 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 259 260 // We need to verify that the branch actually controls the iteration count 261 // of the loop. If not, the new IV can overflow and no one will notice. 262 // The branch block must be in the loop and one of the successors must be out 263 // of the loop. 264 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 265 if (!L->contains(TheBr->getParent()) || 266 (L->contains(TheBr->getSuccessor(0)) && 267 L->contains(TheBr->getSuccessor(1)))) 268 return false; 269 270 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 271 // transform it. 272 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 273 int64_t ExitValue; 274 if (ExitValueVal == nullptr || 275 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 276 return false; 277 278 // Find new predicate for integer comparison. 279 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 280 switch (Compare->getPredicate()) { 281 default: return false; // Unknown comparison. 282 case CmpInst::FCMP_OEQ: 283 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 284 case CmpInst::FCMP_ONE: 285 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 286 case CmpInst::FCMP_OGT: 287 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 288 case CmpInst::FCMP_OGE: 289 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 290 case CmpInst::FCMP_OLT: 291 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 292 case CmpInst::FCMP_OLE: 293 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 294 } 295 296 // We convert the floating point induction variable to a signed i32 value if 297 // we can. This is only safe if the comparison will not overflow in a way 298 // that won't be trapped by the integer equivalent operations. Check for this 299 // now. 300 // TODO: We could use i64 if it is native and the range requires it. 301 302 // The start/stride/exit values must all fit in signed i32. 303 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 304 return false; 305 306 // If not actually striding (add x, 0.0), avoid touching the code. 307 if (IncValue == 0) 308 return false; 309 310 // Positive and negative strides have different safety conditions. 311 if (IncValue > 0) { 312 // If we have a positive stride, we require the init to be less than the 313 // exit value. 314 if (InitValue >= ExitValue) 315 return false; 316 317 uint32_t Range = uint32_t(ExitValue-InitValue); 318 // Check for infinite loop, either: 319 // while (i <= Exit) or until (i > Exit) 320 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 321 if (++Range == 0) return false; // Range overflows. 322 } 323 324 unsigned Leftover = Range % uint32_t(IncValue); 325 326 // If this is an equality comparison, we require that the strided value 327 // exactly land on the exit value, otherwise the IV condition will wrap 328 // around and do things the fp IV wouldn't. 329 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 330 Leftover != 0) 331 return false; 332 333 // If the stride would wrap around the i32 before exiting, we can't 334 // transform the IV. 335 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 336 return false; 337 } else { 338 // If we have a negative stride, we require the init to be greater than the 339 // exit value. 340 if (InitValue <= ExitValue) 341 return false; 342 343 uint32_t Range = uint32_t(InitValue-ExitValue); 344 // Check for infinite loop, either: 345 // while (i >= Exit) or until (i < Exit) 346 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 347 if (++Range == 0) return false; // Range overflows. 348 } 349 350 unsigned Leftover = Range % uint32_t(-IncValue); 351 352 // If this is an equality comparison, we require that the strided value 353 // exactly land on the exit value, otherwise the IV condition will wrap 354 // around and do things the fp IV wouldn't. 355 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 356 Leftover != 0) 357 return false; 358 359 // If the stride would wrap around the i32 before exiting, we can't 360 // transform the IV. 361 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 362 return false; 363 } 364 365 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 366 367 // Insert new integer induction variable. 368 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 369 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 370 PN->getIncomingBlock(IncomingEdge)); 371 372 Value *NewAdd = 373 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 374 Incr->getName()+".int", Incr); 375 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 376 377 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 378 ConstantInt::get(Int32Ty, ExitValue), 379 Compare->getName()); 380 381 // In the following deletions, PN may become dead and may be deleted. 382 // Use a WeakTrackingVH to observe whether this happens. 383 WeakTrackingVH WeakPH = PN; 384 385 // Delete the old floating point exit comparison. The branch starts using the 386 // new comparison. 387 NewCompare->takeName(Compare); 388 Compare->replaceAllUsesWith(NewCompare); 389 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 390 391 // Delete the old floating point increment. 392 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 393 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 394 395 // If the FP induction variable still has uses, this is because something else 396 // in the loop uses its value. In order to canonicalize the induction 397 // variable, we chose to eliminate the IV and rewrite it in terms of an 398 // int->fp cast. 399 // 400 // We give preference to sitofp over uitofp because it is faster on most 401 // platforms. 402 if (WeakPH) { 403 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 404 &*PN->getParent()->getFirstInsertionPt()); 405 PN->replaceAllUsesWith(Conv); 406 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 407 } 408 return true; 409 } 410 411 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 412 // First step. Check to see if there are any floating-point recurrences. 413 // If there are, change them into integer recurrences, permitting analysis by 414 // the SCEV routines. 415 BasicBlock *Header = L->getHeader(); 416 417 SmallVector<WeakTrackingVH, 8> PHIs; 418 for (PHINode &PN : Header->phis()) 419 PHIs.push_back(&PN); 420 421 bool Changed = false; 422 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 423 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 424 Changed |= handleFloatingPointIV(L, PN); 425 426 // If the loop previously had floating-point IV, ScalarEvolution 427 // may not have been able to compute a trip count. Now that we've done some 428 // re-writing, the trip count may be computable. 429 if (Changed) 430 SE->forgetLoop(L); 431 return Changed; 432 } 433 434 //===---------------------------------------------------------------------===// 435 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 436 // they will exit at the first iteration. 437 //===---------------------------------------------------------------------===// 438 439 /// Check to see if this loop has loop invariant conditions which lead to loop 440 /// exits. If so, we know that if the exit path is taken, it is at the first 441 /// loop iteration. This lets us predict exit values of PHI nodes that live in 442 /// loop header. 443 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 444 // Verify the input to the pass is already in LCSSA form. 445 assert(L->isLCSSAForm(*DT)); 446 447 SmallVector<BasicBlock *, 8> ExitBlocks; 448 L->getUniqueExitBlocks(ExitBlocks); 449 450 bool MadeAnyChanges = false; 451 for (auto *ExitBB : ExitBlocks) { 452 // If there are no more PHI nodes in this exit block, then no more 453 // values defined inside the loop are used on this path. 454 for (PHINode &PN : ExitBB->phis()) { 455 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 456 IncomingValIdx != E; ++IncomingValIdx) { 457 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 458 459 // Can we prove that the exit must run on the first iteration if it 460 // runs at all? (i.e. early exits are fine for our purposes, but 461 // traces which lead to this exit being taken on the 2nd iteration 462 // aren't.) Note that this is about whether the exit branch is 463 // executed, not about whether it is taken. 464 if (!L->getLoopLatch() || 465 !DT->dominates(IncomingBB, L->getLoopLatch())) 466 continue; 467 468 // Get condition that leads to the exit path. 469 auto *TermInst = IncomingBB->getTerminator(); 470 471 Value *Cond = nullptr; 472 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 473 // Must be a conditional branch, otherwise the block 474 // should not be in the loop. 475 Cond = BI->getCondition(); 476 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 477 Cond = SI->getCondition(); 478 else 479 continue; 480 481 if (!L->isLoopInvariant(Cond)) 482 continue; 483 484 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 485 486 // Only deal with PHIs in the loop header. 487 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 488 continue; 489 490 // If ExitVal is a PHI on the loop header, then we know its 491 // value along this exit because the exit can only be taken 492 // on the first iteration. 493 auto *LoopPreheader = L->getLoopPreheader(); 494 assert(LoopPreheader && "Invalid loop"); 495 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 496 if (PreheaderIdx != -1) { 497 assert(ExitVal->getParent() == L->getHeader() && 498 "ExitVal must be in loop header"); 499 MadeAnyChanges = true; 500 PN.setIncomingValue(IncomingValIdx, 501 ExitVal->getIncomingValue(PreheaderIdx)); 502 SE->forgetValue(&PN); 503 } 504 } 505 } 506 } 507 return MadeAnyChanges; 508 } 509 510 //===----------------------------------------------------------------------===// 511 // IV Widening - Extend the width of an IV to cover its widest uses. 512 //===----------------------------------------------------------------------===// 513 514 /// Update information about the induction variable that is extended by this 515 /// sign or zero extend operation. This is used to determine the final width of 516 /// the IV before actually widening it. 517 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 518 ScalarEvolution *SE, 519 const TargetTransformInfo *TTI) { 520 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 521 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 522 return; 523 524 Type *Ty = Cast->getType(); 525 uint64_t Width = SE->getTypeSizeInBits(Ty); 526 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 527 return; 528 529 // Check that `Cast` actually extends the induction variable (we rely on this 530 // later). This takes care of cases where `Cast` is extending a truncation of 531 // the narrow induction variable, and thus can end up being narrower than the 532 // "narrow" induction variable. 533 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 534 if (NarrowIVWidth >= Width) 535 return; 536 537 // Cast is either an sext or zext up to this point. 538 // We should not widen an indvar if arithmetics on the wider indvar are more 539 // expensive than those on the narrower indvar. We check only the cost of ADD 540 // because at least an ADD is required to increment the induction variable. We 541 // could compute more comprehensively the cost of all instructions on the 542 // induction variable when necessary. 543 if (TTI && 544 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 545 TTI->getArithmeticInstrCost(Instruction::Add, 546 Cast->getOperand(0)->getType())) { 547 return; 548 } 549 550 if (!WI.WidestNativeType) { 551 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 552 WI.IsSigned = IsSigned; 553 return; 554 } 555 556 // We extend the IV to satisfy the sign of its first user, arbitrarily. 557 if (WI.IsSigned != IsSigned) 558 return; 559 560 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 561 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 562 } 563 564 //===----------------------------------------------------------------------===// 565 // Live IV Reduction - Minimize IVs live across the loop. 566 //===----------------------------------------------------------------------===// 567 568 //===----------------------------------------------------------------------===// 569 // Simplification of IV users based on SCEV evaluation. 570 //===----------------------------------------------------------------------===// 571 572 namespace { 573 574 class IndVarSimplifyVisitor : public IVVisitor { 575 ScalarEvolution *SE; 576 const TargetTransformInfo *TTI; 577 PHINode *IVPhi; 578 579 public: 580 WideIVInfo WI; 581 582 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 583 const TargetTransformInfo *TTI, 584 const DominatorTree *DTree) 585 : SE(SCEV), TTI(TTI), IVPhi(IV) { 586 DT = DTree; 587 WI.NarrowIV = IVPhi; 588 } 589 590 // Implement the interface used by simplifyUsersOfIV. 591 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 592 }; 593 594 } // end anonymous namespace 595 596 /// Iteratively perform simplification on a worklist of IV users. Each 597 /// successive simplification may push more users which may themselves be 598 /// candidates for simplification. 599 /// 600 /// Sign/Zero extend elimination is interleaved with IV simplification. 601 bool IndVarSimplify::simplifyAndExtend(Loop *L, 602 SCEVExpander &Rewriter, 603 LoopInfo *LI) { 604 SmallVector<WideIVInfo, 8> WideIVs; 605 606 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 607 Intrinsic::getName(Intrinsic::experimental_guard)); 608 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 609 610 SmallVector<PHINode*, 8> LoopPhis; 611 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 612 LoopPhis.push_back(cast<PHINode>(I)); 613 } 614 // Each round of simplification iterates through the SimplifyIVUsers worklist 615 // for all current phis, then determines whether any IVs can be 616 // widened. Widening adds new phis to LoopPhis, inducing another round of 617 // simplification on the wide IVs. 618 bool Changed = false; 619 while (!LoopPhis.empty()) { 620 // Evaluate as many IV expressions as possible before widening any IVs. This 621 // forces SCEV to set no-wrap flags before evaluating sign/zero 622 // extension. The first time SCEV attempts to normalize sign/zero extension, 623 // the result becomes final. So for the most predictable results, we delay 624 // evaluation of sign/zero extend evaluation until needed, and avoid running 625 // other SCEV based analysis prior to simplifyAndExtend. 626 do { 627 PHINode *CurrIV = LoopPhis.pop_back_val(); 628 629 // Information about sign/zero extensions of CurrIV. 630 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 631 632 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 633 &Visitor); 634 635 if (Visitor.WI.WidestNativeType) { 636 WideIVs.push_back(Visitor.WI); 637 } 638 } while(!LoopPhis.empty()); 639 640 // Continue if we disallowed widening. 641 if (!WidenIndVars) 642 continue; 643 644 for (; !WideIVs.empty(); WideIVs.pop_back()) { 645 unsigned ElimExt; 646 unsigned Widened; 647 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 648 DT, DeadInsts, ElimExt, Widened, 649 HasGuards, UsePostIncrementRanges)) { 650 NumElimExt += ElimExt; 651 NumWidened += Widened; 652 Changed = true; 653 LoopPhis.push_back(WidePhi); 654 } 655 } 656 } 657 return Changed; 658 } 659 660 //===----------------------------------------------------------------------===// 661 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 662 //===----------------------------------------------------------------------===// 663 664 /// Given an Value which is hoped to be part of an add recurance in the given 665 /// loop, return the associated Phi node if so. Otherwise, return null. Note 666 /// that this is less general than SCEVs AddRec checking. 667 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 668 Instruction *IncI = dyn_cast<Instruction>(IncV); 669 if (!IncI) 670 return nullptr; 671 672 switch (IncI->getOpcode()) { 673 case Instruction::Add: 674 case Instruction::Sub: 675 break; 676 case Instruction::GetElementPtr: 677 // An IV counter must preserve its type. 678 if (IncI->getNumOperands() == 2) 679 break; 680 LLVM_FALLTHROUGH; 681 default: 682 return nullptr; 683 } 684 685 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 686 if (Phi && Phi->getParent() == L->getHeader()) { 687 if (L->isLoopInvariant(IncI->getOperand(1))) 688 return Phi; 689 return nullptr; 690 } 691 if (IncI->getOpcode() == Instruction::GetElementPtr) 692 return nullptr; 693 694 // Allow add/sub to be commuted. 695 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 696 if (Phi && Phi->getParent() == L->getHeader()) { 697 if (L->isLoopInvariant(IncI->getOperand(0))) 698 return Phi; 699 } 700 return nullptr; 701 } 702 703 /// Whether the current loop exit test is based on this value. Currently this 704 /// is limited to a direct use in the loop condition. 705 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 706 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 707 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 708 // TODO: Allow non-icmp loop test. 709 if (!ICmp) 710 return false; 711 712 // TODO: Allow indirect use. 713 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 714 } 715 716 /// linearFunctionTestReplace policy. Return true unless we can show that the 717 /// current exit test is already sufficiently canonical. 718 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 719 assert(L->getLoopLatch() && "Must be in simplified form"); 720 721 // Avoid converting a constant or loop invariant test back to a runtime 722 // test. This is critical for when SCEV's cached ExitCount is less precise 723 // than the current IR (such as after we've proven a particular exit is 724 // actually dead and thus the BE count never reaches our ExitCount.) 725 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 726 if (L->isLoopInvariant(BI->getCondition())) 727 return false; 728 729 // Do LFTR to simplify the exit condition to an ICMP. 730 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 731 if (!Cond) 732 return true; 733 734 // Do LFTR to simplify the exit ICMP to EQ/NE 735 ICmpInst::Predicate Pred = Cond->getPredicate(); 736 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 737 return true; 738 739 // Look for a loop invariant RHS 740 Value *LHS = Cond->getOperand(0); 741 Value *RHS = Cond->getOperand(1); 742 if (!L->isLoopInvariant(RHS)) { 743 if (!L->isLoopInvariant(LHS)) 744 return true; 745 std::swap(LHS, RHS); 746 } 747 // Look for a simple IV counter LHS 748 PHINode *Phi = dyn_cast<PHINode>(LHS); 749 if (!Phi) 750 Phi = getLoopPhiForCounter(LHS, L); 751 752 if (!Phi) 753 return true; 754 755 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 756 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 757 if (Idx < 0) 758 return true; 759 760 // Do LFTR if the exit condition's IV is *not* a simple counter. 761 Value *IncV = Phi->getIncomingValue(Idx); 762 return Phi != getLoopPhiForCounter(IncV, L); 763 } 764 765 /// Return true if undefined behavior would provable be executed on the path to 766 /// OnPathTo if Root produced a posion result. Note that this doesn't say 767 /// anything about whether OnPathTo is actually executed or whether Root is 768 /// actually poison. This can be used to assess whether a new use of Root can 769 /// be added at a location which is control equivalent with OnPathTo (such as 770 /// immediately before it) without introducing UB which didn't previously 771 /// exist. Note that a false result conveys no information. 772 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 773 Instruction *OnPathTo, 774 DominatorTree *DT) { 775 // Basic approach is to assume Root is poison, propagate poison forward 776 // through all users we can easily track, and then check whether any of those 777 // users are provable UB and must execute before out exiting block might 778 // exit. 779 780 // The set of all recursive users we've visited (which are assumed to all be 781 // poison because of said visit) 782 SmallSet<const Value *, 16> KnownPoison; 783 SmallVector<const Instruction*, 16> Worklist; 784 Worklist.push_back(Root); 785 while (!Worklist.empty()) { 786 const Instruction *I = Worklist.pop_back_val(); 787 788 // If we know this must trigger UB on a path leading our target. 789 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 790 return true; 791 792 // If we can't analyze propagation through this instruction, just skip it 793 // and transitive users. Safe as false is a conservative result. 794 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 795 continue; 796 797 if (KnownPoison.insert(I).second) 798 for (const User *User : I->users()) 799 Worklist.push_back(cast<Instruction>(User)); 800 } 801 802 // Might be non-UB, or might have a path we couldn't prove must execute on 803 // way to exiting bb. 804 return false; 805 } 806 807 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 808 /// down to checking that all operands are constant and listing instructions 809 /// that may hide undef. 810 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 811 unsigned Depth) { 812 if (isa<Constant>(V)) 813 return !isa<UndefValue>(V); 814 815 if (Depth >= 6) 816 return false; 817 818 // Conservatively handle non-constant non-instructions. For example, Arguments 819 // may be undef. 820 Instruction *I = dyn_cast<Instruction>(V); 821 if (!I) 822 return false; 823 824 // Load and return values may be undef. 825 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 826 return false; 827 828 // Optimistically handle other instructions. 829 for (Value *Op : I->operands()) { 830 if (!Visited.insert(Op).second) 831 continue; 832 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 833 return false; 834 } 835 return true; 836 } 837 838 /// Return true if the given value is concrete. We must prove that undef can 839 /// never reach it. 840 /// 841 /// TODO: If we decide that this is a good approach to checking for undef, we 842 /// may factor it into a common location. 843 static bool hasConcreteDef(Value *V) { 844 SmallPtrSet<Value*, 8> Visited; 845 Visited.insert(V); 846 return hasConcreteDefImpl(V, Visited, 0); 847 } 848 849 /// Return true if this IV has any uses other than the (soon to be rewritten) 850 /// loop exit test. 851 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 852 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 853 Value *IncV = Phi->getIncomingValue(LatchIdx); 854 855 for (User *U : Phi->users()) 856 if (U != Cond && U != IncV) return false; 857 858 for (User *U : IncV->users()) 859 if (U != Cond && U != Phi) return false; 860 return true; 861 } 862 863 /// Return true if the given phi is a "counter" in L. A counter is an 864 /// add recurance (of integer or pointer type) with an arbitrary start, and a 865 /// step of 1. Note that L must have exactly one latch. 866 static bool isLoopCounter(PHINode* Phi, Loop *L, 867 ScalarEvolution *SE) { 868 assert(Phi->getParent() == L->getHeader()); 869 assert(L->getLoopLatch()); 870 871 if (!SE->isSCEVable(Phi->getType())) 872 return false; 873 874 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 875 if (!AR || AR->getLoop() != L || !AR->isAffine()) 876 return false; 877 878 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 879 if (!Step || !Step->isOne()) 880 return false; 881 882 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 883 Value *IncV = Phi->getIncomingValue(LatchIdx); 884 return (getLoopPhiForCounter(IncV, L) == Phi && 885 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 886 } 887 888 /// Search the loop header for a loop counter (anadd rec w/step of one) 889 /// suitable for use by LFTR. If multiple counters are available, select the 890 /// "best" one based profitable heuristics. 891 /// 892 /// BECount may be an i8* pointer type. The pointer difference is already 893 /// valid count without scaling the address stride, so it remains a pointer 894 /// expression as far as SCEV is concerned. 895 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 896 const SCEV *BECount, 897 ScalarEvolution *SE, DominatorTree *DT) { 898 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 899 900 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 901 902 // Loop over all of the PHI nodes, looking for a simple counter. 903 PHINode *BestPhi = nullptr; 904 const SCEV *BestInit = nullptr; 905 BasicBlock *LatchBlock = L->getLoopLatch(); 906 assert(LatchBlock && "Must be in simplified form"); 907 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 908 909 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 910 PHINode *Phi = cast<PHINode>(I); 911 if (!isLoopCounter(Phi, L, SE)) 912 continue; 913 914 // Avoid comparing an integer IV against a pointer Limit. 915 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 916 continue; 917 918 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 919 920 // AR may be a pointer type, while BECount is an integer type. 921 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 922 // AR may not be a narrower type, or we may never exit. 923 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 924 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 925 continue; 926 927 // Avoid reusing a potentially undef value to compute other values that may 928 // have originally had a concrete definition. 929 if (!hasConcreteDef(Phi)) { 930 // We explicitly allow unknown phis as long as they are already used by 931 // the loop exit test. This is legal since performing LFTR could not 932 // increase the number of undef users. 933 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 934 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 935 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 936 continue; 937 } 938 939 // Avoid introducing undefined behavior due to poison which didn't exist in 940 // the original program. (Annoyingly, the rules for poison and undef 941 // propagation are distinct, so this does NOT cover the undef case above.) 942 // We have to ensure that we don't introduce UB by introducing a use on an 943 // iteration where said IV produces poison. Our strategy here differs for 944 // pointers and integer IVs. For integers, we strip and reinfer as needed, 945 // see code in linearFunctionTestReplace. For pointers, we restrict 946 // transforms as there is no good way to reinfer inbounds once lost. 947 if (!Phi->getType()->isIntegerTy() && 948 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 949 continue; 950 951 const SCEV *Init = AR->getStart(); 952 953 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 954 // Don't force a live loop counter if another IV can be used. 955 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 956 continue; 957 958 // Prefer to count-from-zero. This is a more "canonical" counter form. It 959 // also prefers integer to pointer IVs. 960 if (BestInit->isZero() != Init->isZero()) { 961 if (BestInit->isZero()) 962 continue; 963 } 964 // If two IVs both count from zero or both count from nonzero then the 965 // narrower is likely a dead phi that has been widened. Use the wider phi 966 // to allow the other to be eliminated. 967 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 968 continue; 969 } 970 BestPhi = Phi; 971 BestInit = Init; 972 } 973 return BestPhi; 974 } 975 976 /// Insert an IR expression which computes the value held by the IV IndVar 977 /// (which must be an loop counter w/unit stride) after the backedge of loop L 978 /// is taken ExitCount times. 979 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 980 const SCEV *ExitCount, bool UsePostInc, Loop *L, 981 SCEVExpander &Rewriter, ScalarEvolution *SE) { 982 assert(isLoopCounter(IndVar, L, SE)); 983 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 984 const SCEV *IVInit = AR->getStart(); 985 986 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 987 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 988 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 989 // the existing GEPs whenever possible. 990 if (IndVar->getType()->isPointerTy() && 991 !ExitCount->getType()->isPointerTy()) { 992 // IVOffset will be the new GEP offset that is interpreted by GEP as a 993 // signed value. ExitCount on the other hand represents the loop trip count, 994 // which is an unsigned value. FindLoopCounter only allows induction 995 // variables that have a positive unit stride of one. This means we don't 996 // have to handle the case of negative offsets (yet) and just need to zero 997 // extend ExitCount. 998 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 999 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 1000 if (UsePostInc) 1001 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1002 1003 // Expand the code for the iteration count. 1004 assert(SE->isLoopInvariant(IVOffset, L) && 1005 "Computed iteration count is not loop invariant!"); 1006 1007 // We could handle pointer IVs other than i8*, but we need to compensate for 1008 // gep index scaling. 1009 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1010 cast<PointerType>(IndVar->getType()) 1011 ->getElementType())->isOne() && 1012 "unit stride pointer IV must be i8*"); 1013 1014 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1015 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1016 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1017 } else { 1018 // In any other case, convert both IVInit and ExitCount to integers before 1019 // comparing. This may result in SCEV expansion of pointers, but in practice 1020 // SCEV will fold the pointer arithmetic away as such: 1021 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1022 // 1023 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1024 // for simple memset-style loops. 1025 // 1026 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1027 // were generated on top of case #2, which is not expected. 1028 1029 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1030 // For unit stride, IVCount = Start + ExitCount with 2's complement 1031 // overflow. 1032 1033 // For integer IVs, truncate the IV before computing IVInit + BECount, 1034 // unless we know apriori that the limit must be a constant when evaluated 1035 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1036 // of the IV in the loop over a (potentially) expensive expansion of the 1037 // widened exit count add(zext(add)) expression. 1038 if (SE->getTypeSizeInBits(IVInit->getType()) 1039 > SE->getTypeSizeInBits(ExitCount->getType())) { 1040 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1041 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1042 else 1043 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1044 } 1045 1046 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1047 1048 if (UsePostInc) 1049 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1050 1051 // Expand the code for the iteration count. 1052 assert(SE->isLoopInvariant(IVLimit, L) && 1053 "Computed iteration count is not loop invariant!"); 1054 // Ensure that we generate the same type as IndVar, or a smaller integer 1055 // type. In the presence of null pointer values, we have an integer type 1056 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1057 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1058 IndVar->getType() : ExitCount->getType(); 1059 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1060 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1061 } 1062 } 1063 1064 /// This method rewrites the exit condition of the loop to be a canonical != 1065 /// comparison against the incremented loop induction variable. This pass is 1066 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1067 /// determine a loop-invariant trip count of the loop, which is actually a much 1068 /// broader range than just linear tests. 1069 bool IndVarSimplify:: 1070 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1071 const SCEV *ExitCount, 1072 PHINode *IndVar, SCEVExpander &Rewriter) { 1073 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1074 assert(isLoopCounter(IndVar, L, SE)); 1075 Instruction * const IncVar = 1076 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1077 1078 // Initialize CmpIndVar to the preincremented IV. 1079 Value *CmpIndVar = IndVar; 1080 bool UsePostInc = false; 1081 1082 // If the exiting block is the same as the backedge block, we prefer to 1083 // compare against the post-incremented value, otherwise we must compare 1084 // against the preincremented value. 1085 if (ExitingBB == L->getLoopLatch()) { 1086 // For pointer IVs, we chose to not strip inbounds which requires us not 1087 // to add a potentially UB introducing use. We need to either a) show 1088 // the loop test we're modifying is already in post-inc form, or b) show 1089 // that adding a use must not introduce UB. 1090 bool SafeToPostInc = 1091 IndVar->getType()->isIntegerTy() || 1092 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1093 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1094 if (SafeToPostInc) { 1095 UsePostInc = true; 1096 CmpIndVar = IncVar; 1097 } 1098 } 1099 1100 // It may be necessary to drop nowrap flags on the incrementing instruction 1101 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1102 // case the increment might have previously been poison on the last iteration 1103 // only) or if LFTR switches to a different IV that was previously dynamically 1104 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1105 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1106 // check), because the pre-inc addrec flags may be adopted from the original 1107 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1108 // TODO: This handling is inaccurate for one case: If we switch to a 1109 // dynamically dead IV that wraps on the first loop iteration only, which is 1110 // not covered by the post-inc addrec. (If the new IV was not dynamically 1111 // dead, it could not be poison on the first iteration in the first place.) 1112 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1113 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1114 if (BO->hasNoUnsignedWrap()) 1115 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1116 if (BO->hasNoSignedWrap()) 1117 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1118 } 1119 1120 Value *ExitCnt = genLoopLimit( 1121 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1122 assert(ExitCnt->getType()->isPointerTy() == 1123 IndVar->getType()->isPointerTy() && 1124 "genLoopLimit missed a cast"); 1125 1126 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1127 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1128 ICmpInst::Predicate P; 1129 if (L->contains(BI->getSuccessor(0))) 1130 P = ICmpInst::ICMP_NE; 1131 else 1132 P = ICmpInst::ICMP_EQ; 1133 1134 IRBuilder<> Builder(BI); 1135 1136 // The new loop exit condition should reuse the debug location of the 1137 // original loop exit condition. 1138 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1139 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1140 1141 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1142 // avoid the expensive expansion of the limit expression in the wider type, 1143 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1144 // since we know (from the exit count bitwidth), that we can't self-wrap in 1145 // the narrower type. 1146 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1147 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1148 if (CmpIndVarSize > ExitCntSize) { 1149 assert(!CmpIndVar->getType()->isPointerTy() && 1150 !ExitCnt->getType()->isPointerTy()); 1151 1152 // Before resorting to actually inserting the truncate, use the same 1153 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1154 // the other side of the comparison instead. We still evaluate the limit 1155 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1156 // a truncate within in. 1157 bool Extended = false; 1158 const SCEV *IV = SE->getSCEV(CmpIndVar); 1159 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1160 ExitCnt->getType()); 1161 const SCEV *ZExtTrunc = 1162 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1163 1164 if (ZExtTrunc == IV) { 1165 Extended = true; 1166 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1167 "wide.trip.count"); 1168 } else { 1169 const SCEV *SExtTrunc = 1170 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1171 if (SExtTrunc == IV) { 1172 Extended = true; 1173 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1174 "wide.trip.count"); 1175 } 1176 } 1177 1178 if (Extended) { 1179 bool Discard; 1180 L->makeLoopInvariant(ExitCnt, Discard); 1181 } else 1182 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1183 "lftr.wideiv"); 1184 } 1185 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1186 << " LHS:" << *CmpIndVar << '\n' 1187 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1188 << "\n" 1189 << " RHS:\t" << *ExitCnt << "\n" 1190 << "ExitCount:\t" << *ExitCount << "\n" 1191 << " was: " << *BI->getCondition() << "\n"); 1192 1193 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1194 Value *OrigCond = BI->getCondition(); 1195 // It's tempting to use replaceAllUsesWith here to fully replace the old 1196 // comparison, but that's not immediately safe, since users of the old 1197 // comparison may not be dominated by the new comparison. Instead, just 1198 // update the branch to use the new comparison; in the common case this 1199 // will make old comparison dead. 1200 BI->setCondition(Cond); 1201 DeadInsts.emplace_back(OrigCond); 1202 1203 ++NumLFTR; 1204 return true; 1205 } 1206 1207 //===----------------------------------------------------------------------===// 1208 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1209 //===----------------------------------------------------------------------===// 1210 1211 /// If there's a single exit block, sink any loop-invariant values that 1212 /// were defined in the preheader but not used inside the loop into the 1213 /// exit block to reduce register pressure in the loop. 1214 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1215 BasicBlock *ExitBlock = L->getExitBlock(); 1216 if (!ExitBlock) return false; 1217 1218 BasicBlock *Preheader = L->getLoopPreheader(); 1219 if (!Preheader) return false; 1220 1221 bool MadeAnyChanges = false; 1222 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1223 BasicBlock::iterator I(Preheader->getTerminator()); 1224 while (I != Preheader->begin()) { 1225 --I; 1226 // New instructions were inserted at the end of the preheader. 1227 if (isa<PHINode>(I)) 1228 break; 1229 1230 // Don't move instructions which might have side effects, since the side 1231 // effects need to complete before instructions inside the loop. Also don't 1232 // move instructions which might read memory, since the loop may modify 1233 // memory. Note that it's okay if the instruction might have undefined 1234 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1235 // block. 1236 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1237 continue; 1238 1239 // Skip debug info intrinsics. 1240 if (isa<DbgInfoIntrinsic>(I)) 1241 continue; 1242 1243 // Skip eh pad instructions. 1244 if (I->isEHPad()) 1245 continue; 1246 1247 // Don't sink alloca: we never want to sink static alloca's out of the 1248 // entry block, and correctly sinking dynamic alloca's requires 1249 // checks for stacksave/stackrestore intrinsics. 1250 // FIXME: Refactor this check somehow? 1251 if (isa<AllocaInst>(I)) 1252 continue; 1253 1254 // Determine if there is a use in or before the loop (direct or 1255 // otherwise). 1256 bool UsedInLoop = false; 1257 for (Use &U : I->uses()) { 1258 Instruction *User = cast<Instruction>(U.getUser()); 1259 BasicBlock *UseBB = User->getParent(); 1260 if (PHINode *P = dyn_cast<PHINode>(User)) { 1261 unsigned i = 1262 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1263 UseBB = P->getIncomingBlock(i); 1264 } 1265 if (UseBB == Preheader || L->contains(UseBB)) { 1266 UsedInLoop = true; 1267 break; 1268 } 1269 } 1270 1271 // If there is, the def must remain in the preheader. 1272 if (UsedInLoop) 1273 continue; 1274 1275 // Otherwise, sink it to the exit block. 1276 Instruction *ToMove = &*I; 1277 bool Done = false; 1278 1279 if (I != Preheader->begin()) { 1280 // Skip debug info intrinsics. 1281 do { 1282 --I; 1283 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1284 1285 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1286 Done = true; 1287 } else { 1288 Done = true; 1289 } 1290 1291 MadeAnyChanges = true; 1292 ToMove->moveBefore(*ExitBlock, InsertPt); 1293 if (Done) break; 1294 InsertPt = ToMove->getIterator(); 1295 } 1296 1297 return MadeAnyChanges; 1298 } 1299 1300 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1301 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1302 auto *OldCond = BI->getCondition(); 1303 BI->setCondition(NewCond); 1304 if (OldCond->use_empty()) 1305 DeadInsts.emplace_back(OldCond); 1306 } 1307 1308 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1309 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1310 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1311 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1312 auto *OldCond = BI->getCondition(); 1313 auto *NewCond = 1314 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1315 replaceExitCond(BI, NewCond, DeadInsts); 1316 } 1317 1318 static void replaceLoopPHINodesWithPreheaderValues( 1319 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1320 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1321 auto *LoopPreheader = L->getLoopPreheader(); 1322 auto *LoopHeader = L->getHeader(); 1323 for (auto &PN : LoopHeader->phis()) { 1324 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1325 PN.replaceAllUsesWith(PreheaderIncoming); 1326 DeadInsts.emplace_back(&PN); 1327 } 1328 } 1329 1330 static void replaceWithInvariantCond( 1331 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1332 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1333 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1334 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1335 Rewriter.setInsertPoint(BI); 1336 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1337 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1338 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1339 if (ExitIfTrue) 1340 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1341 IRBuilder<> Builder(BI); 1342 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1343 BI->getCondition()->getName()); 1344 replaceExitCond(BI, NewCond, DeadInsts); 1345 } 1346 1347 static bool optimizeLoopExitWithUnknownExitCount( 1348 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1349 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1350 ScalarEvolution *SE, SCEVExpander &Rewriter, 1351 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1352 ICmpInst::Predicate Pred; 1353 Value *LHS, *RHS; 1354 BasicBlock *TrueSucc, *FalseSucc; 1355 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1356 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1357 return false; 1358 1359 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1360 "Not a loop exit!"); 1361 1362 // 'LHS pred RHS' should now mean that we stay in loop. 1363 if (L->contains(FalseSucc)) 1364 Pred = CmpInst::getInversePredicate(Pred); 1365 1366 // If we are proving loop exit, invert the predicate. 1367 if (Inverted) 1368 Pred = CmpInst::getInversePredicate(Pred); 1369 1370 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1371 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1372 // Can we prove it to be trivially true? 1373 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1374 foldExit(L, ExitingBB, Inverted, DeadInsts); 1375 return true; 1376 } 1377 // Further logic works for non-inverted condition only. 1378 if (Inverted) 1379 return false; 1380 1381 auto *ARTy = LHSS->getType(); 1382 auto *MaxIterTy = MaxIter->getType(); 1383 // If possible, adjust types. 1384 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1385 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1386 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1387 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1388 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1389 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1390 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1391 } 1392 1393 if (SkipLastIter) { 1394 const SCEV *One = SE->getOne(MaxIter->getType()); 1395 MaxIter = SE->getMinusSCEV(MaxIter, One); 1396 } 1397 1398 // Check if there is a loop-invariant predicate equivalent to our check. 1399 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1400 L, BI, MaxIter); 1401 if (!LIP) 1402 return false; 1403 1404 // Can we prove it to be trivially true? 1405 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1406 foldExit(L, ExitingBB, Inverted, DeadInsts); 1407 else 1408 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1409 Rewriter, DeadInsts); 1410 1411 return true; 1412 } 1413 1414 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1415 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1416 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1417 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1418 // it already has flags. The alternative to this would be to extending the 1419 // set of "interesting" IV users to include the icmp, but doing that 1420 // regresses results in practice by querying SCEVs before trip counts which 1421 // rely on them which results in SCEV caching sub-optimal answers. The 1422 // concern about caching sub-optimal results is why we only query SCEVs of 1423 // the loop invariant RHS here. 1424 SmallVector<BasicBlock*, 16> ExitingBlocks; 1425 L->getExitingBlocks(ExitingBlocks); 1426 bool Changed = false; 1427 for (auto *ExitingBB : ExitingBlocks) { 1428 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1429 if (!BI) 1430 continue; 1431 assert(BI->isConditional() && "exit branch must be conditional"); 1432 1433 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1434 if (!ICmp || !ICmp->hasOneUse()) 1435 continue; 1436 1437 auto *LHS = ICmp->getOperand(0); 1438 auto *RHS = ICmp->getOperand(1); 1439 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1440 // poisoning cache with sub-optimal results. For the must-execute case, 1441 // this is a neccessary precondition for correctness. 1442 if (!L->isLoopInvariant(RHS)) { 1443 if (!L->isLoopInvariant(LHS)) 1444 continue; 1445 // Same logic applies for the inverse case 1446 std::swap(LHS, RHS); 1447 } 1448 1449 // Match (icmp signed-cond zext, RHS) 1450 Value *LHSOp = nullptr; 1451 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1452 continue; 1453 1454 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1455 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1456 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1457 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1458 FullCR = FullCR.zeroExtend(OuterBitWidth); 1459 if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) { 1460 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1461 // replace the signed condition with the unsigned version. 1462 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1463 Changed = true; 1464 // Note: No SCEV invalidation needed. We've changed the predicate, but 1465 // have not changed exit counts, or the values produced by the compare. 1466 continue; 1467 } 1468 } 1469 1470 // Now that we've canonicalized the condition to match the extend, 1471 // see if we can rotate the extend out of the loop. 1472 for (auto *ExitingBB : ExitingBlocks) { 1473 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1474 if (!BI) 1475 continue; 1476 assert(BI->isConditional() && "exit branch must be conditional"); 1477 1478 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1479 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1480 continue; 1481 1482 bool Swapped = false; 1483 auto *LHS = ICmp->getOperand(0); 1484 auto *RHS = ICmp->getOperand(1); 1485 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1486 // Nothing to rotate 1487 continue; 1488 if (L->isLoopInvariant(LHS)) { 1489 // Same logic applies for the inverse case until we actually pick 1490 // which operand of the compare to update. 1491 Swapped = true; 1492 std::swap(LHS, RHS); 1493 } 1494 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1495 1496 if (!LHS->hasOneUse()) 1497 // Can't rotate without increasing instruction count 1498 continue; 1499 1500 // Match (icmp unsigned-cond zext, RHS) 1501 // TODO: Extend to handle corresponding sext/signed-cmp case 1502 // TODO: Extend to other invertible functions 1503 Value *LHSOp = nullptr; 1504 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1505 continue; 1506 1507 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1508 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1509 // when zext is loop varying and RHS is loop invariant. This converts 1510 // loop varying work to loop-invariant work. 1511 auto doRotateTransform = [&]() { 1512 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1513 auto *NewRHS = 1514 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1515 L->getLoopPreheader()->getTerminator()); 1516 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1517 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1518 if (LHS->use_empty()) 1519 DeadInsts.push_back(LHS); 1520 }; 1521 1522 1523 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1524 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1525 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1526 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1527 FullCR = FullCR.zeroExtend(OuterBitWidth); 1528 if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) { 1529 doRotateTransform(); 1530 Changed = true; 1531 // Note, we are leaving SCEV in an unfortunately imprecise case here 1532 // as rotation tends to reveal information about trip counts not 1533 // previously visible. 1534 continue; 1535 } 1536 } 1537 1538 return Changed; 1539 } 1540 1541 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1542 SmallVector<BasicBlock*, 16> ExitingBlocks; 1543 L->getExitingBlocks(ExitingBlocks); 1544 1545 // Remove all exits which aren't both rewriteable and execute on every 1546 // iteration. 1547 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1548 // If our exitting block exits multiple loops, we can only rewrite the 1549 // innermost one. Otherwise, we're changing how many times the innermost 1550 // loop runs before it exits. 1551 if (LI->getLoopFor(ExitingBB) != L) 1552 return true; 1553 1554 // Can't rewrite non-branch yet. 1555 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1556 if (!BI) 1557 return true; 1558 1559 // If already constant, nothing to do. 1560 if (isa<Constant>(BI->getCondition())) 1561 return true; 1562 1563 // Likewise, the loop latch must be dominated by the exiting BB. 1564 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1565 return true; 1566 1567 return false; 1568 }); 1569 1570 if (ExitingBlocks.empty()) 1571 return false; 1572 1573 // Get a symbolic upper bound on the loop backedge taken count. 1574 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1575 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1576 return false; 1577 1578 // Visit our exit blocks in order of dominance. We know from the fact that 1579 // all exits must dominate the latch, so there is a total dominance order 1580 // between them. 1581 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1582 // std::sort sorts in ascending order, so we want the inverse of 1583 // the normal dominance relation. 1584 if (A == B) return false; 1585 if (DT->properlyDominates(A, B)) 1586 return true; 1587 else { 1588 assert(DT->properlyDominates(B, A) && 1589 "expected total dominance order!"); 1590 return false; 1591 } 1592 }); 1593 #ifdef ASSERT 1594 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1595 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1596 } 1597 #endif 1598 1599 bool Changed = false; 1600 bool SkipLastIter = false; 1601 SmallSet<const SCEV*, 8> DominatingExitCounts; 1602 for (BasicBlock *ExitingBB : ExitingBlocks) { 1603 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1604 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1605 // Okay, we do not know the exit count here. Can we at least prove that it 1606 // will remain the same within iteration space? 1607 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1608 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1609 return optimizeLoopExitWithUnknownExitCount( 1610 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1611 Rewriter, DeadInsts); 1612 }; 1613 1614 // TODO: We might have proved that we can skip the last iteration for 1615 // this check. In this case, we only want to check the condition on the 1616 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1617 // corner case: 1618 // 1619 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1620 // 1621 // If we could not prove that len != 0, then we also could not prove that 1622 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1623 // OptimizeCond will likely not prove anything for it, even if it could 1624 // prove the same fact for len. 1625 // 1626 // As a temporary solution, we query both last and pre-last iterations in 1627 // hope that we will be able to prove triviality for at least one of 1628 // them. We can stop querying MaxExitCount for this case once SCEV 1629 // understands that (MaxExitCount - 1) will not overflow here. 1630 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1631 Changed = true; 1632 else if (SkipLastIter) 1633 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1634 Changed = true; 1635 continue; 1636 } 1637 1638 if (MaxExitCount == ExitCount) 1639 // If the loop has more than 1 iteration, all further checks will be 1640 // executed 1 iteration less. 1641 SkipLastIter = true; 1642 1643 // If we know we'd exit on the first iteration, rewrite the exit to 1644 // reflect this. This does not imply the loop must exit through this 1645 // exit; there may be an earlier one taken on the first iteration. 1646 // We know that the backedge can't be taken, so we replace all 1647 // the header PHIs with values coming from the preheader. 1648 if (ExitCount->isZero()) { 1649 foldExit(L, ExitingBB, true, DeadInsts); 1650 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1651 Changed = true; 1652 continue; 1653 } 1654 1655 assert(ExitCount->getType()->isIntegerTy() && 1656 MaxExitCount->getType()->isIntegerTy() && 1657 "Exit counts must be integers"); 1658 1659 Type *WiderType = 1660 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1661 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1662 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1663 assert(MaxExitCount->getType() == ExitCount->getType()); 1664 1665 // Can we prove that some other exit must be taken strictly before this 1666 // one? 1667 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1668 MaxExitCount, ExitCount)) { 1669 foldExit(L, ExitingBB, false, DeadInsts); 1670 Changed = true; 1671 continue; 1672 } 1673 1674 // As we run, keep track of which exit counts we've encountered. If we 1675 // find a duplicate, we've found an exit which would have exited on the 1676 // exiting iteration, but (from the visit order) strictly follows another 1677 // which does the same and is thus dead. 1678 if (!DominatingExitCounts.insert(ExitCount).second) { 1679 foldExit(L, ExitingBB, false, DeadInsts); 1680 Changed = true; 1681 continue; 1682 } 1683 1684 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1685 // here. If we kept track of the min of dominanting exits so far, we could 1686 // discharge exits with EC >= MDEC. This is less powerful than the existing 1687 // transform (since later exits aren't considered), but potentially more 1688 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1689 // or a >u b. Such a case is not currently known. 1690 } 1691 return Changed; 1692 } 1693 1694 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1695 SmallVector<BasicBlock*, 16> ExitingBlocks; 1696 L->getExitingBlocks(ExitingBlocks); 1697 1698 // Finally, see if we can rewrite our exit conditions into a loop invariant 1699 // form. If we have a read-only loop, and we can tell that we must exit down 1700 // a path which does not need any of the values computed within the loop, we 1701 // can rewrite the loop to exit on the first iteration. Note that this 1702 // doesn't either a) tell us the loop exits on the first iteration (unless 1703 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1704 // This transformation looks a lot like a restricted form of dead loop 1705 // elimination, but restricted to read-only loops and without neccesssarily 1706 // needing to kill the loop entirely. 1707 if (!LoopPredication) 1708 return false; 1709 1710 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1711 // through *explicit* control flow. We have to eliminate the possibility of 1712 // implicit exits (see below) before we know it's truly exact. 1713 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1714 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1715 return false; 1716 1717 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1718 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1719 1720 auto BadExit = [&](BasicBlock *ExitingBB) { 1721 // If our exiting block exits multiple loops, we can only rewrite the 1722 // innermost one. Otherwise, we're changing how many times the innermost 1723 // loop runs before it exits. 1724 if (LI->getLoopFor(ExitingBB) != L) 1725 return true; 1726 1727 // Can't rewrite non-branch yet. 1728 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1729 if (!BI) 1730 return true; 1731 1732 // If already constant, nothing to do. 1733 if (isa<Constant>(BI->getCondition())) 1734 return true; 1735 1736 // If the exit block has phis, we need to be able to compute the values 1737 // within the loop which contains them. This assumes trivially lcssa phis 1738 // have already been removed; TODO: generalize 1739 BasicBlock *ExitBlock = 1740 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1741 if (!ExitBlock->phis().empty()) 1742 return true; 1743 1744 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1745 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1746 return true; 1747 1748 assert(SE->isLoopInvariant(ExitCount, L) && 1749 "Exit count must be loop invariant"); 1750 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1751 return false; 1752 }; 1753 1754 // If we have any exits which can't be predicated themselves, than we can't 1755 // predicate any exit which isn't guaranteed to execute before it. Consider 1756 // two exits (a) and (b) which would both exit on the same iteration. If we 1757 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1758 // we could convert a loop from exiting through (a) to one exiting through 1759 // (b). Note that this problem exists only for exits with the same exit 1760 // count, and we could be more aggressive when exit counts are known inequal. 1761 llvm::sort(ExitingBlocks, 1762 [&](BasicBlock *A, BasicBlock *B) { 1763 // std::sort sorts in ascending order, so we want the inverse of 1764 // the normal dominance relation, plus a tie breaker for blocks 1765 // unordered by dominance. 1766 if (DT->properlyDominates(A, B)) return true; 1767 if (DT->properlyDominates(B, A)) return false; 1768 return A->getName() < B->getName(); 1769 }); 1770 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1771 // exits before the backedge). If they aren't, reasoning about reachability 1772 // is complicated and we choose not to for now. 1773 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1774 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1775 return false; 1776 1777 // Given our sorted total order, we know that exit[j] must be evaluated 1778 // after all exit[i] such j > i. 1779 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1780 if (BadExit(ExitingBlocks[i])) { 1781 ExitingBlocks.resize(i); 1782 break; 1783 } 1784 1785 if (ExitingBlocks.empty()) 1786 return false; 1787 1788 // We rely on not being able to reach an exiting block on a later iteration 1789 // then it's statically compute exit count. The implementaton of 1790 // getExitCount currently has this invariant, but assert it here so that 1791 // breakage is obvious if this ever changes.. 1792 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1793 return DT->dominates(ExitingBB, L->getLoopLatch()); 1794 })); 1795 1796 // At this point, ExitingBlocks consists of only those blocks which are 1797 // predicatable. Given that, we know we have at least one exit we can 1798 // predicate if the loop is doesn't have side effects and doesn't have any 1799 // implicit exits (because then our exact BTC isn't actually exact). 1800 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1801 // suggestions on how to improve this? I can obviously bail out for outer 1802 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1803 // is that enough for *all* side effects? 1804 for (BasicBlock *BB : L->blocks()) 1805 for (auto &I : *BB) 1806 // TODO:isGuaranteedToTransfer 1807 if (I.mayHaveSideEffects()) 1808 return false; 1809 1810 bool Changed = false; 1811 // Finally, do the actual predication for all predicatable blocks. A couple 1812 // of notes here: 1813 // 1) We don't bother to constant fold dominated exits with identical exit 1814 // counts; that's simply a form of CSE/equality propagation and we leave 1815 // it for dedicated passes. 1816 // 2) We insert the comparison at the branch. Hoisting introduces additional 1817 // legality constraints and we leave that to dedicated logic. We want to 1818 // predicate even if we can't insert a loop invariant expression as 1819 // peeling or unrolling will likely reduce the cost of the otherwise loop 1820 // varying check. 1821 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1822 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1823 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1824 for (BasicBlock *ExitingBB : ExitingBlocks) { 1825 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1826 1827 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1828 Value *NewCond; 1829 if (ExitCount == ExactBTC) { 1830 NewCond = L->contains(BI->getSuccessor(0)) ? 1831 B.getFalse() : B.getTrue(); 1832 } else { 1833 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1834 if (!ExactBTCV) 1835 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1836 Value *RHS = ExactBTCV; 1837 if (ECV->getType() != RHS->getType()) { 1838 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1839 ECV = B.CreateZExt(ECV, WiderTy); 1840 RHS = B.CreateZExt(RHS, WiderTy); 1841 } 1842 auto Pred = L->contains(BI->getSuccessor(0)) ? 1843 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1844 NewCond = B.CreateICmp(Pred, ECV, RHS); 1845 } 1846 Value *OldCond = BI->getCondition(); 1847 BI->setCondition(NewCond); 1848 if (OldCond->use_empty()) 1849 DeadInsts.emplace_back(OldCond); 1850 Changed = true; 1851 } 1852 1853 return Changed; 1854 } 1855 1856 //===----------------------------------------------------------------------===// 1857 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1858 //===----------------------------------------------------------------------===// 1859 1860 bool IndVarSimplify::run(Loop *L) { 1861 // We need (and expect!) the incoming loop to be in LCSSA. 1862 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1863 "LCSSA required to run indvars!"); 1864 1865 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1866 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1867 // canonicalization can be a pessimization without LSR to "clean up" 1868 // afterwards. 1869 // - We depend on having a preheader; in particular, 1870 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1871 // and we're in trouble if we can't find the induction variable even when 1872 // we've manually inserted one. 1873 // - LFTR relies on having a single backedge. 1874 if (!L->isLoopSimplifyForm()) 1875 return false; 1876 1877 #ifndef NDEBUG 1878 // Used below for a consistency check only 1879 // Note: Since the result returned by ScalarEvolution may depend on the order 1880 // in which previous results are added to its cache, the call to 1881 // getBackedgeTakenCount() may change following SCEV queries. 1882 const SCEV *BackedgeTakenCount; 1883 if (VerifyIndvars) 1884 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1885 #endif 1886 1887 bool Changed = false; 1888 // If there are any floating-point recurrences, attempt to 1889 // transform them to use integer recurrences. 1890 Changed |= rewriteNonIntegerIVs(L); 1891 1892 // Create a rewriter object which we'll use to transform the code with. 1893 SCEVExpander Rewriter(*SE, DL, "indvars"); 1894 #ifndef NDEBUG 1895 Rewriter.setDebugType(DEBUG_TYPE); 1896 #endif 1897 1898 // Eliminate redundant IV users. 1899 // 1900 // Simplification works best when run before other consumers of SCEV. We 1901 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1902 // other expressions involving loop IVs have been evaluated. This helps SCEV 1903 // set no-wrap flags before normalizing sign/zero extension. 1904 Rewriter.disableCanonicalMode(); 1905 Changed |= simplifyAndExtend(L, Rewriter, LI); 1906 1907 // Check to see if we can compute the final value of any expressions 1908 // that are recurrent in the loop, and substitute the exit values from the 1909 // loop into any instructions outside of the loop that use the final values 1910 // of the current expressions. 1911 if (ReplaceExitValue != NeverRepl) { 1912 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1913 ReplaceExitValue, DeadInsts)) { 1914 NumReplaced += Rewrites; 1915 Changed = true; 1916 } 1917 } 1918 1919 // Eliminate redundant IV cycles. 1920 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 1921 1922 // Try to convert exit conditions to unsigned and rotate computation 1923 // out of the loop. Note: Handles invalidation internally if needed. 1924 Changed |= canonicalizeExitCondition(L); 1925 1926 // Try to eliminate loop exits based on analyzeable exit counts 1927 if (optimizeLoopExits(L, Rewriter)) { 1928 Changed = true; 1929 // Given we've changed exit counts, notify SCEV 1930 // Some nested loops may share same folded exit basic block, 1931 // thus we need to notify top most loop. 1932 SE->forgetTopmostLoop(L); 1933 } 1934 1935 // Try to form loop invariant tests for loop exits by changing how many 1936 // iterations of the loop run when that is unobservable. 1937 if (predicateLoopExits(L, Rewriter)) { 1938 Changed = true; 1939 // Given we've changed exit counts, notify SCEV 1940 SE->forgetLoop(L); 1941 } 1942 1943 // If we have a trip count expression, rewrite the loop's exit condition 1944 // using it. 1945 if (!DisableLFTR) { 1946 BasicBlock *PreHeader = L->getLoopPreheader(); 1947 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 1948 1949 SmallVector<BasicBlock*, 16> ExitingBlocks; 1950 L->getExitingBlocks(ExitingBlocks); 1951 for (BasicBlock *ExitingBB : ExitingBlocks) { 1952 // Can't rewrite non-branch yet. 1953 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1954 continue; 1955 1956 // If our exitting block exits multiple loops, we can only rewrite the 1957 // innermost one. Otherwise, we're changing how many times the innermost 1958 // loop runs before it exits. 1959 if (LI->getLoopFor(ExitingBB) != L) 1960 continue; 1961 1962 if (!needsLFTR(L, ExitingBB)) 1963 continue; 1964 1965 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1966 if (isa<SCEVCouldNotCompute>(ExitCount)) 1967 continue; 1968 1969 // This was handled above, but as we form SCEVs, we can sometimes refine 1970 // existing ones; this allows exit counts to be folded to zero which 1971 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1972 // until stable to handle cases like this better. 1973 if (ExitCount->isZero()) 1974 continue; 1975 1976 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1977 if (!IndVar) 1978 continue; 1979 1980 // Avoid high cost expansions. Note: This heuristic is questionable in 1981 // that our definition of "high cost" is not exactly principled. 1982 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1983 TTI, PreHeaderBR)) 1984 continue; 1985 1986 // Check preconditions for proper SCEVExpander operation. SCEV does not 1987 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1988 // any pass that uses the SCEVExpander must do it. This does not work 1989 // well for loop passes because SCEVExpander makes assumptions about 1990 // all loops, while LoopPassManager only forces the current loop to be 1991 // simplified. 1992 // 1993 // FIXME: SCEV expansion has no way to bail out, so the caller must 1994 // explicitly check any assumptions made by SCEV. Brittle. 1995 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1996 if (!AR || AR->getLoop()->getLoopPreheader()) 1997 Changed |= linearFunctionTestReplace(L, ExitingBB, 1998 ExitCount, IndVar, 1999 Rewriter); 2000 } 2001 } 2002 // Clear the rewriter cache, because values that are in the rewriter's cache 2003 // can be deleted in the loop below, causing the AssertingVH in the cache to 2004 // trigger. 2005 Rewriter.clear(); 2006 2007 // Now that we're done iterating through lists, clean up any instructions 2008 // which are now dead. 2009 while (!DeadInsts.empty()) { 2010 Value *V = DeadInsts.pop_back_val(); 2011 2012 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2013 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2014 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2015 Changed |= 2016 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2017 } 2018 2019 // The Rewriter may not be used from this point on. 2020 2021 // Loop-invariant instructions in the preheader that aren't used in the 2022 // loop may be sunk below the loop to reduce register pressure. 2023 Changed |= sinkUnusedInvariants(L); 2024 2025 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2026 // trip count and therefore can further simplify exit values in addition to 2027 // rewriteLoopExitValues. 2028 Changed |= rewriteFirstIterationLoopExitValues(L); 2029 2030 // Clean up dead instructions. 2031 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2032 2033 // Check a post-condition. 2034 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2035 "Indvars did not preserve LCSSA!"); 2036 2037 // Verify that LFTR, and any other change have not interfered with SCEV's 2038 // ability to compute trip count. We may have *changed* the exit count, but 2039 // only by reducing it. 2040 #ifndef NDEBUG 2041 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2042 SE->forgetLoop(L); 2043 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2044 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2045 SE->getTypeSizeInBits(NewBECount->getType())) 2046 NewBECount = SE->getTruncateOrNoop(NewBECount, 2047 BackedgeTakenCount->getType()); 2048 else 2049 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2050 NewBECount->getType()); 2051 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2052 NewBECount) && "indvars must preserve SCEV"); 2053 } 2054 if (VerifyMemorySSA && MSSAU) 2055 MSSAU->getMemorySSA()->verifyMemorySSA(); 2056 #endif 2057 2058 return Changed; 2059 } 2060 2061 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2062 LoopStandardAnalysisResults &AR, 2063 LPMUpdater &) { 2064 Function *F = L.getHeader()->getParent(); 2065 const DataLayout &DL = F->getParent()->getDataLayout(); 2066 2067 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2068 WidenIndVars && AllowIVWidening); 2069 if (!IVS.run(&L)) 2070 return PreservedAnalyses::all(); 2071 2072 auto PA = getLoopPassPreservedAnalyses(); 2073 PA.preserveSet<CFGAnalyses>(); 2074 if (AR.MSSA) 2075 PA.preserve<MemorySSAAnalysis>(); 2076 return PA; 2077 } 2078 2079 namespace { 2080 2081 struct IndVarSimplifyLegacyPass : public LoopPass { 2082 static char ID; // Pass identification, replacement for typeid 2083 2084 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2085 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2086 } 2087 2088 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2089 if (skipLoop(L)) 2090 return false; 2091 2092 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2093 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2094 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2095 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2096 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2097 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2098 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2099 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2100 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2101 MemorySSA *MSSA = nullptr; 2102 if (MSSAAnalysis) 2103 MSSA = &MSSAAnalysis->getMSSA(); 2104 2105 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2106 return IVS.run(L); 2107 } 2108 2109 void getAnalysisUsage(AnalysisUsage &AU) const override { 2110 AU.setPreservesCFG(); 2111 AU.addPreserved<MemorySSAWrapperPass>(); 2112 getLoopAnalysisUsage(AU); 2113 } 2114 }; 2115 2116 } // end anonymous namespace 2117 2118 char IndVarSimplifyLegacyPass::ID = 0; 2119 2120 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2121 "Induction Variable Simplification", false, false) 2122 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2123 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2124 "Induction Variable Simplification", false, false) 2125 2126 Pass *llvm::createIndVarSimplifyPass() { 2127 return new IndVarSimplifyLegacyPass(); 2128 } 2129