1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 using namespace PatternMatch; 93 94 #define DEBUG_TYPE "indvars" 95 96 STATISTIC(NumWidened , "Number of indvars widened"); 97 STATISTIC(NumReplaced , "Number of exit values replaced"); 98 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 99 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 100 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 101 102 // Trip count verification can be enabled by default under NDEBUG if we 103 // implement a strong expression equivalence checker in SCEV. Until then, we 104 // use the verify-indvars flag, which may assert in some cases. 105 static cl::opt<bool> VerifyIndvars( 106 "verify-indvars", cl::Hidden, 107 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 108 "effect in release builds. (Note: this adds additional SCEV " 109 "queries potentially changing the analysis result)")); 110 111 static cl::opt<ReplaceExitVal> ReplaceExitValue( 112 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 113 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 114 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 115 clEnumValN(OnlyCheapRepl, "cheap", 116 "only replace exit value when the cost is cheap"), 117 clEnumValN(NoHardUse, "noharduse", 118 "only replace exit values when loop def likely dead"), 119 clEnumValN(AlwaysRepl, "always", 120 "always replace exit value whenever possible"))); 121 122 static cl::opt<bool> UsePostIncrementRanges( 123 "indvars-post-increment-ranges", cl::Hidden, 124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 125 cl::init(true)); 126 127 static cl::opt<bool> 128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 129 cl::desc("Disable Linear Function Test Replace optimization")); 130 131 static cl::opt<bool> 132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 133 cl::desc("Predicate conditions in read only loops")); 134 135 static cl::opt<bool> 136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 137 cl::desc("Allow widening of indvars to eliminate s/zext")); 138 139 namespace { 140 141 struct RewritePhi; 142 143 class IndVarSimplify { 144 LoopInfo *LI; 145 ScalarEvolution *SE; 146 DominatorTree *DT; 147 const DataLayout &DL; 148 TargetLibraryInfo *TLI; 149 const TargetTransformInfo *TTI; 150 std::unique_ptr<MemorySSAUpdater> MSSAU; 151 152 SmallVector<WeakTrackingVH, 16> DeadInsts; 153 bool WidenIndVars; 154 155 bool handleFloatingPointIV(Loop *L, PHINode *PH); 156 bool rewriteNonIntegerIVs(Loop *L); 157 158 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 159 /// Try to improve our exit conditions by converting condition from signed 160 /// to unsigned or rotating computation out of the loop. 161 /// (See inline comment about why this is duplicated from simplifyAndExtend) 162 bool canonicalizeExitCondition(Loop *L); 163 /// Try to eliminate loop exits based on analyzeable exit counts 164 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 165 /// Try to form loop invariant tests for loop exits by changing how many 166 /// iterations of the loop run when that is unobservable. 167 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 168 169 bool rewriteFirstIterationLoopExitValues(Loop *L); 170 171 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 172 const SCEV *ExitCount, 173 PHINode *IndVar, SCEVExpander &Rewriter); 174 175 bool sinkUnusedInvariants(Loop *L); 176 177 public: 178 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 179 const DataLayout &DL, TargetLibraryInfo *TLI, 180 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 181 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 182 WidenIndVars(WidenIndVars) { 183 if (MSSA) 184 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 185 } 186 187 bool run(Loop *L); 188 }; 189 190 } // end anonymous namespace 191 192 //===----------------------------------------------------------------------===// 193 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 194 //===----------------------------------------------------------------------===// 195 196 /// Convert APF to an integer, if possible. 197 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 198 bool isExact = false; 199 // See if we can convert this to an int64_t 200 uint64_t UIntVal; 201 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 202 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 203 !isExact) 204 return false; 205 IntVal = UIntVal; 206 return true; 207 } 208 209 /// If the loop has floating induction variable then insert corresponding 210 /// integer induction variable if possible. 211 /// For example, 212 /// for(double i = 0; i < 10000; ++i) 213 /// bar(i) 214 /// is converted into 215 /// for(int i = 0; i < 10000; ++i) 216 /// bar((double)i); 217 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 218 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 219 unsigned BackEdge = IncomingEdge^1; 220 221 // Check incoming value. 222 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 223 224 int64_t InitValue; 225 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 226 return false; 227 228 // Check IV increment. Reject this PN if increment operation is not 229 // an add or increment value can not be represented by an integer. 230 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 231 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 232 233 // If this is not an add of the PHI with a constantfp, or if the constant fp 234 // is not an integer, bail out. 235 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 236 int64_t IncValue; 237 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 238 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 239 return false; 240 241 // Check Incr uses. One user is PN and the other user is an exit condition 242 // used by the conditional terminator. 243 Value::user_iterator IncrUse = Incr->user_begin(); 244 Instruction *U1 = cast<Instruction>(*IncrUse++); 245 if (IncrUse == Incr->user_end()) return false; 246 Instruction *U2 = cast<Instruction>(*IncrUse++); 247 if (IncrUse != Incr->user_end()) return false; 248 249 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 250 // only used by a branch, we can't transform it. 251 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 252 if (!Compare) 253 Compare = dyn_cast<FCmpInst>(U2); 254 if (!Compare || !Compare->hasOneUse() || 255 !isa<BranchInst>(Compare->user_back())) 256 return false; 257 258 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 259 260 // We need to verify that the branch actually controls the iteration count 261 // of the loop. If not, the new IV can overflow and no one will notice. 262 // The branch block must be in the loop and one of the successors must be out 263 // of the loop. 264 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 265 if (!L->contains(TheBr->getParent()) || 266 (L->contains(TheBr->getSuccessor(0)) && 267 L->contains(TheBr->getSuccessor(1)))) 268 return false; 269 270 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 271 // transform it. 272 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 273 int64_t ExitValue; 274 if (ExitValueVal == nullptr || 275 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 276 return false; 277 278 // Find new predicate for integer comparison. 279 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 280 switch (Compare->getPredicate()) { 281 default: return false; // Unknown comparison. 282 case CmpInst::FCMP_OEQ: 283 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 284 case CmpInst::FCMP_ONE: 285 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 286 case CmpInst::FCMP_OGT: 287 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 288 case CmpInst::FCMP_OGE: 289 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 290 case CmpInst::FCMP_OLT: 291 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 292 case CmpInst::FCMP_OLE: 293 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 294 } 295 296 // We convert the floating point induction variable to a signed i32 value if 297 // we can. This is only safe if the comparison will not overflow in a way 298 // that won't be trapped by the integer equivalent operations. Check for this 299 // now. 300 // TODO: We could use i64 if it is native and the range requires it. 301 302 // The start/stride/exit values must all fit in signed i32. 303 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 304 return false; 305 306 // If not actually striding (add x, 0.0), avoid touching the code. 307 if (IncValue == 0) 308 return false; 309 310 // Positive and negative strides have different safety conditions. 311 if (IncValue > 0) { 312 // If we have a positive stride, we require the init to be less than the 313 // exit value. 314 if (InitValue >= ExitValue) 315 return false; 316 317 uint32_t Range = uint32_t(ExitValue-InitValue); 318 // Check for infinite loop, either: 319 // while (i <= Exit) or until (i > Exit) 320 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 321 if (++Range == 0) return false; // Range overflows. 322 } 323 324 unsigned Leftover = Range % uint32_t(IncValue); 325 326 // If this is an equality comparison, we require that the strided value 327 // exactly land on the exit value, otherwise the IV condition will wrap 328 // around and do things the fp IV wouldn't. 329 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 330 Leftover != 0) 331 return false; 332 333 // If the stride would wrap around the i32 before exiting, we can't 334 // transform the IV. 335 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 336 return false; 337 } else { 338 // If we have a negative stride, we require the init to be greater than the 339 // exit value. 340 if (InitValue <= ExitValue) 341 return false; 342 343 uint32_t Range = uint32_t(InitValue-ExitValue); 344 // Check for infinite loop, either: 345 // while (i >= Exit) or until (i < Exit) 346 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 347 if (++Range == 0) return false; // Range overflows. 348 } 349 350 unsigned Leftover = Range % uint32_t(-IncValue); 351 352 // If this is an equality comparison, we require that the strided value 353 // exactly land on the exit value, otherwise the IV condition will wrap 354 // around and do things the fp IV wouldn't. 355 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 356 Leftover != 0) 357 return false; 358 359 // If the stride would wrap around the i32 before exiting, we can't 360 // transform the IV. 361 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 362 return false; 363 } 364 365 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 366 367 // Insert new integer induction variable. 368 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 369 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 370 PN->getIncomingBlock(IncomingEdge)); 371 372 Value *NewAdd = 373 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 374 Incr->getName()+".int", Incr); 375 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 376 377 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 378 ConstantInt::get(Int32Ty, ExitValue), 379 Compare->getName()); 380 381 // In the following deletions, PN may become dead and may be deleted. 382 // Use a WeakTrackingVH to observe whether this happens. 383 WeakTrackingVH WeakPH = PN; 384 385 // Delete the old floating point exit comparison. The branch starts using the 386 // new comparison. 387 NewCompare->takeName(Compare); 388 Compare->replaceAllUsesWith(NewCompare); 389 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 390 391 // Delete the old floating point increment. 392 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 393 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 394 395 // If the FP induction variable still has uses, this is because something else 396 // in the loop uses its value. In order to canonicalize the induction 397 // variable, we chose to eliminate the IV and rewrite it in terms of an 398 // int->fp cast. 399 // 400 // We give preference to sitofp over uitofp because it is faster on most 401 // platforms. 402 if (WeakPH) { 403 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 404 &*PN->getParent()->getFirstInsertionPt()); 405 PN->replaceAllUsesWith(Conv); 406 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 407 } 408 return true; 409 } 410 411 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 412 // First step. Check to see if there are any floating-point recurrences. 413 // If there are, change them into integer recurrences, permitting analysis by 414 // the SCEV routines. 415 BasicBlock *Header = L->getHeader(); 416 417 SmallVector<WeakTrackingVH, 8> PHIs; 418 for (PHINode &PN : Header->phis()) 419 PHIs.push_back(&PN); 420 421 bool Changed = false; 422 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 423 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 424 Changed |= handleFloatingPointIV(L, PN); 425 426 // If the loop previously had floating-point IV, ScalarEvolution 427 // may not have been able to compute a trip count. Now that we've done some 428 // re-writing, the trip count may be computable. 429 if (Changed) 430 SE->forgetLoop(L); 431 return Changed; 432 } 433 434 //===---------------------------------------------------------------------===// 435 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 436 // they will exit at the first iteration. 437 //===---------------------------------------------------------------------===// 438 439 /// Check to see if this loop has loop invariant conditions which lead to loop 440 /// exits. If so, we know that if the exit path is taken, it is at the first 441 /// loop iteration. This lets us predict exit values of PHI nodes that live in 442 /// loop header. 443 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 444 // Verify the input to the pass is already in LCSSA form. 445 assert(L->isLCSSAForm(*DT)); 446 447 SmallVector<BasicBlock *, 8> ExitBlocks; 448 L->getUniqueExitBlocks(ExitBlocks); 449 450 bool MadeAnyChanges = false; 451 for (auto *ExitBB : ExitBlocks) { 452 // If there are no more PHI nodes in this exit block, then no more 453 // values defined inside the loop are used on this path. 454 for (PHINode &PN : ExitBB->phis()) { 455 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 456 IncomingValIdx != E; ++IncomingValIdx) { 457 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 458 459 // Can we prove that the exit must run on the first iteration if it 460 // runs at all? (i.e. early exits are fine for our purposes, but 461 // traces which lead to this exit being taken on the 2nd iteration 462 // aren't.) Note that this is about whether the exit branch is 463 // executed, not about whether it is taken. 464 if (!L->getLoopLatch() || 465 !DT->dominates(IncomingBB, L->getLoopLatch())) 466 continue; 467 468 // Get condition that leads to the exit path. 469 auto *TermInst = IncomingBB->getTerminator(); 470 471 Value *Cond = nullptr; 472 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 473 // Must be a conditional branch, otherwise the block 474 // should not be in the loop. 475 Cond = BI->getCondition(); 476 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 477 Cond = SI->getCondition(); 478 else 479 continue; 480 481 if (!L->isLoopInvariant(Cond)) 482 continue; 483 484 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 485 486 // Only deal with PHIs in the loop header. 487 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 488 continue; 489 490 // If ExitVal is a PHI on the loop header, then we know its 491 // value along this exit because the exit can only be taken 492 // on the first iteration. 493 auto *LoopPreheader = L->getLoopPreheader(); 494 assert(LoopPreheader && "Invalid loop"); 495 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 496 if (PreheaderIdx != -1) { 497 assert(ExitVal->getParent() == L->getHeader() && 498 "ExitVal must be in loop header"); 499 MadeAnyChanges = true; 500 PN.setIncomingValue(IncomingValIdx, 501 ExitVal->getIncomingValue(PreheaderIdx)); 502 SE->forgetValue(&PN); 503 } 504 } 505 } 506 } 507 return MadeAnyChanges; 508 } 509 510 //===----------------------------------------------------------------------===// 511 // IV Widening - Extend the width of an IV to cover its widest uses. 512 //===----------------------------------------------------------------------===// 513 514 /// Update information about the induction variable that is extended by this 515 /// sign or zero extend operation. This is used to determine the final width of 516 /// the IV before actually widening it. 517 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 518 ScalarEvolution *SE, 519 const TargetTransformInfo *TTI) { 520 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 521 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 522 return; 523 524 Type *Ty = Cast->getType(); 525 uint64_t Width = SE->getTypeSizeInBits(Ty); 526 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 527 return; 528 529 // Check that `Cast` actually extends the induction variable (we rely on this 530 // later). This takes care of cases where `Cast` is extending a truncation of 531 // the narrow induction variable, and thus can end up being narrower than the 532 // "narrow" induction variable. 533 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 534 if (NarrowIVWidth >= Width) 535 return; 536 537 // Cast is either an sext or zext up to this point. 538 // We should not widen an indvar if arithmetics on the wider indvar are more 539 // expensive than those on the narrower indvar. We check only the cost of ADD 540 // because at least an ADD is required to increment the induction variable. We 541 // could compute more comprehensively the cost of all instructions on the 542 // induction variable when necessary. 543 if (TTI && 544 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 545 TTI->getArithmeticInstrCost(Instruction::Add, 546 Cast->getOperand(0)->getType())) { 547 return; 548 } 549 550 if (!WI.WidestNativeType || 551 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 552 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 553 WI.IsSigned = IsSigned; 554 return; 555 } 556 557 // We extend the IV to satisfy the sign of its user(s), or 'signed' 558 // if there are multiple users with both sign- and zero extensions, 559 // in order not to introduce nondeterministic behaviour based on the 560 // unspecified order of a PHI nodes' users-iterator. 561 WI.IsSigned |= IsSigned; 562 } 563 564 //===----------------------------------------------------------------------===// 565 // Live IV Reduction - Minimize IVs live across the loop. 566 //===----------------------------------------------------------------------===// 567 568 //===----------------------------------------------------------------------===// 569 // Simplification of IV users based on SCEV evaluation. 570 //===----------------------------------------------------------------------===// 571 572 namespace { 573 574 class IndVarSimplifyVisitor : public IVVisitor { 575 ScalarEvolution *SE; 576 const TargetTransformInfo *TTI; 577 PHINode *IVPhi; 578 579 public: 580 WideIVInfo WI; 581 582 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 583 const TargetTransformInfo *TTI, 584 const DominatorTree *DTree) 585 : SE(SCEV), TTI(TTI), IVPhi(IV) { 586 DT = DTree; 587 WI.NarrowIV = IVPhi; 588 } 589 590 // Implement the interface used by simplifyUsersOfIV. 591 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 592 }; 593 594 } // end anonymous namespace 595 596 /// Iteratively perform simplification on a worklist of IV users. Each 597 /// successive simplification may push more users which may themselves be 598 /// candidates for simplification. 599 /// 600 /// Sign/Zero extend elimination is interleaved with IV simplification. 601 bool IndVarSimplify::simplifyAndExtend(Loop *L, 602 SCEVExpander &Rewriter, 603 LoopInfo *LI) { 604 SmallVector<WideIVInfo, 8> WideIVs; 605 606 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 607 Intrinsic::getName(Intrinsic::experimental_guard)); 608 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 609 610 SmallVector<PHINode*, 8> LoopPhis; 611 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 612 LoopPhis.push_back(cast<PHINode>(I)); 613 } 614 // Each round of simplification iterates through the SimplifyIVUsers worklist 615 // for all current phis, then determines whether any IVs can be 616 // widened. Widening adds new phis to LoopPhis, inducing another round of 617 // simplification on the wide IVs. 618 bool Changed = false; 619 while (!LoopPhis.empty()) { 620 // Evaluate as many IV expressions as possible before widening any IVs. This 621 // forces SCEV to set no-wrap flags before evaluating sign/zero 622 // extension. The first time SCEV attempts to normalize sign/zero extension, 623 // the result becomes final. So for the most predictable results, we delay 624 // evaluation of sign/zero extend evaluation until needed, and avoid running 625 // other SCEV based analysis prior to simplifyAndExtend. 626 do { 627 PHINode *CurrIV = LoopPhis.pop_back_val(); 628 629 // Information about sign/zero extensions of CurrIV. 630 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 631 632 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 633 &Visitor); 634 635 if (Visitor.WI.WidestNativeType) { 636 WideIVs.push_back(Visitor.WI); 637 } 638 } while(!LoopPhis.empty()); 639 640 // Continue if we disallowed widening. 641 if (!WidenIndVars) 642 continue; 643 644 for (; !WideIVs.empty(); WideIVs.pop_back()) { 645 unsigned ElimExt; 646 unsigned Widened; 647 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 648 DT, DeadInsts, ElimExt, Widened, 649 HasGuards, UsePostIncrementRanges)) { 650 NumElimExt += ElimExt; 651 NumWidened += Widened; 652 Changed = true; 653 LoopPhis.push_back(WidePhi); 654 } 655 } 656 } 657 return Changed; 658 } 659 660 //===----------------------------------------------------------------------===// 661 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 662 //===----------------------------------------------------------------------===// 663 664 /// Given an Value which is hoped to be part of an add recurance in the given 665 /// loop, return the associated Phi node if so. Otherwise, return null. Note 666 /// that this is less general than SCEVs AddRec checking. 667 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 668 Instruction *IncI = dyn_cast<Instruction>(IncV); 669 if (!IncI) 670 return nullptr; 671 672 switch (IncI->getOpcode()) { 673 case Instruction::Add: 674 case Instruction::Sub: 675 break; 676 case Instruction::GetElementPtr: 677 // An IV counter must preserve its type. 678 if (IncI->getNumOperands() == 2) 679 break; 680 LLVM_FALLTHROUGH; 681 default: 682 return nullptr; 683 } 684 685 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 686 if (Phi && Phi->getParent() == L->getHeader()) { 687 if (L->isLoopInvariant(IncI->getOperand(1))) 688 return Phi; 689 return nullptr; 690 } 691 if (IncI->getOpcode() == Instruction::GetElementPtr) 692 return nullptr; 693 694 // Allow add/sub to be commuted. 695 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 696 if (Phi && Phi->getParent() == L->getHeader()) { 697 if (L->isLoopInvariant(IncI->getOperand(0))) 698 return Phi; 699 } 700 return nullptr; 701 } 702 703 /// Whether the current loop exit test is based on this value. Currently this 704 /// is limited to a direct use in the loop condition. 705 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 706 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 707 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 708 // TODO: Allow non-icmp loop test. 709 if (!ICmp) 710 return false; 711 712 // TODO: Allow indirect use. 713 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 714 } 715 716 /// linearFunctionTestReplace policy. Return true unless we can show that the 717 /// current exit test is already sufficiently canonical. 718 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 719 assert(L->getLoopLatch() && "Must be in simplified form"); 720 721 // Avoid converting a constant or loop invariant test back to a runtime 722 // test. This is critical for when SCEV's cached ExitCount is less precise 723 // than the current IR (such as after we've proven a particular exit is 724 // actually dead and thus the BE count never reaches our ExitCount.) 725 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 726 if (L->isLoopInvariant(BI->getCondition())) 727 return false; 728 729 // Do LFTR to simplify the exit condition to an ICMP. 730 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 731 if (!Cond) 732 return true; 733 734 // Do LFTR to simplify the exit ICMP to EQ/NE 735 ICmpInst::Predicate Pred = Cond->getPredicate(); 736 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 737 return true; 738 739 // Look for a loop invariant RHS 740 Value *LHS = Cond->getOperand(0); 741 Value *RHS = Cond->getOperand(1); 742 if (!L->isLoopInvariant(RHS)) { 743 if (!L->isLoopInvariant(LHS)) 744 return true; 745 std::swap(LHS, RHS); 746 } 747 // Look for a simple IV counter LHS 748 PHINode *Phi = dyn_cast<PHINode>(LHS); 749 if (!Phi) 750 Phi = getLoopPhiForCounter(LHS, L); 751 752 if (!Phi) 753 return true; 754 755 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 756 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 757 if (Idx < 0) 758 return true; 759 760 // Do LFTR if the exit condition's IV is *not* a simple counter. 761 Value *IncV = Phi->getIncomingValue(Idx); 762 return Phi != getLoopPhiForCounter(IncV, L); 763 } 764 765 /// Return true if undefined behavior would provable be executed on the path to 766 /// OnPathTo if Root produced a posion result. Note that this doesn't say 767 /// anything about whether OnPathTo is actually executed or whether Root is 768 /// actually poison. This can be used to assess whether a new use of Root can 769 /// be added at a location which is control equivalent with OnPathTo (such as 770 /// immediately before it) without introducing UB which didn't previously 771 /// exist. Note that a false result conveys no information. 772 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 773 Instruction *OnPathTo, 774 DominatorTree *DT) { 775 // Basic approach is to assume Root is poison, propagate poison forward 776 // through all users we can easily track, and then check whether any of those 777 // users are provable UB and must execute before out exiting block might 778 // exit. 779 780 // The set of all recursive users we've visited (which are assumed to all be 781 // poison because of said visit) 782 SmallSet<const Value *, 16> KnownPoison; 783 SmallVector<const Instruction*, 16> Worklist; 784 Worklist.push_back(Root); 785 while (!Worklist.empty()) { 786 const Instruction *I = Worklist.pop_back_val(); 787 788 // If we know this must trigger UB on a path leading our target. 789 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 790 return true; 791 792 // If we can't analyze propagation through this instruction, just skip it 793 // and transitive users. Safe as false is a conservative result. 794 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 795 continue; 796 797 if (KnownPoison.insert(I).second) 798 for (const User *User : I->users()) 799 Worklist.push_back(cast<Instruction>(User)); 800 } 801 802 // Might be non-UB, or might have a path we couldn't prove must execute on 803 // way to exiting bb. 804 return false; 805 } 806 807 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 808 /// down to checking that all operands are constant and listing instructions 809 /// that may hide undef. 810 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 811 unsigned Depth) { 812 if (isa<Constant>(V)) 813 return !isa<UndefValue>(V); 814 815 if (Depth >= 6) 816 return false; 817 818 // Conservatively handle non-constant non-instructions. For example, Arguments 819 // may be undef. 820 Instruction *I = dyn_cast<Instruction>(V); 821 if (!I) 822 return false; 823 824 // Load and return values may be undef. 825 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 826 return false; 827 828 // Optimistically handle other instructions. 829 for (Value *Op : I->operands()) { 830 if (!Visited.insert(Op).second) 831 continue; 832 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 833 return false; 834 } 835 return true; 836 } 837 838 /// Return true if the given value is concrete. We must prove that undef can 839 /// never reach it. 840 /// 841 /// TODO: If we decide that this is a good approach to checking for undef, we 842 /// may factor it into a common location. 843 static bool hasConcreteDef(Value *V) { 844 SmallPtrSet<Value*, 8> Visited; 845 Visited.insert(V); 846 return hasConcreteDefImpl(V, Visited, 0); 847 } 848 849 /// Return true if this IV has any uses other than the (soon to be rewritten) 850 /// loop exit test. 851 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 852 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 853 Value *IncV = Phi->getIncomingValue(LatchIdx); 854 855 for (User *U : Phi->users()) 856 if (U != Cond && U != IncV) return false; 857 858 for (User *U : IncV->users()) 859 if (U != Cond && U != Phi) return false; 860 return true; 861 } 862 863 /// Return true if the given phi is a "counter" in L. A counter is an 864 /// add recurance (of integer or pointer type) with an arbitrary start, and a 865 /// step of 1. Note that L must have exactly one latch. 866 static bool isLoopCounter(PHINode* Phi, Loop *L, 867 ScalarEvolution *SE) { 868 assert(Phi->getParent() == L->getHeader()); 869 assert(L->getLoopLatch()); 870 871 if (!SE->isSCEVable(Phi->getType())) 872 return false; 873 874 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 875 if (!AR || AR->getLoop() != L || !AR->isAffine()) 876 return false; 877 878 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 879 if (!Step || !Step->isOne()) 880 return false; 881 882 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 883 Value *IncV = Phi->getIncomingValue(LatchIdx); 884 return (getLoopPhiForCounter(IncV, L) == Phi && 885 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 886 } 887 888 /// Search the loop header for a loop counter (anadd rec w/step of one) 889 /// suitable for use by LFTR. If multiple counters are available, select the 890 /// "best" one based profitable heuristics. 891 /// 892 /// BECount may be an i8* pointer type. The pointer difference is already 893 /// valid count without scaling the address stride, so it remains a pointer 894 /// expression as far as SCEV is concerned. 895 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 896 const SCEV *BECount, 897 ScalarEvolution *SE, DominatorTree *DT) { 898 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 899 900 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 901 902 // Loop over all of the PHI nodes, looking for a simple counter. 903 PHINode *BestPhi = nullptr; 904 const SCEV *BestInit = nullptr; 905 BasicBlock *LatchBlock = L->getLoopLatch(); 906 assert(LatchBlock && "Must be in simplified form"); 907 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 908 909 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 910 PHINode *Phi = cast<PHINode>(I); 911 if (!isLoopCounter(Phi, L, SE)) 912 continue; 913 914 // Avoid comparing an integer IV against a pointer Limit. 915 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 916 continue; 917 918 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 919 920 // AR may be a pointer type, while BECount is an integer type. 921 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 922 // AR may not be a narrower type, or we may never exit. 923 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 924 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 925 continue; 926 927 // Avoid reusing a potentially undef value to compute other values that may 928 // have originally had a concrete definition. 929 if (!hasConcreteDef(Phi)) { 930 // We explicitly allow unknown phis as long as they are already used by 931 // the loop exit test. This is legal since performing LFTR could not 932 // increase the number of undef users. 933 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 934 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 935 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 936 continue; 937 } 938 939 // Avoid introducing undefined behavior due to poison which didn't exist in 940 // the original program. (Annoyingly, the rules for poison and undef 941 // propagation are distinct, so this does NOT cover the undef case above.) 942 // We have to ensure that we don't introduce UB by introducing a use on an 943 // iteration where said IV produces poison. Our strategy here differs for 944 // pointers and integer IVs. For integers, we strip and reinfer as needed, 945 // see code in linearFunctionTestReplace. For pointers, we restrict 946 // transforms as there is no good way to reinfer inbounds once lost. 947 if (!Phi->getType()->isIntegerTy() && 948 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 949 continue; 950 951 const SCEV *Init = AR->getStart(); 952 953 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 954 // Don't force a live loop counter if another IV can be used. 955 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 956 continue; 957 958 // Prefer to count-from-zero. This is a more "canonical" counter form. It 959 // also prefers integer to pointer IVs. 960 if (BestInit->isZero() != Init->isZero()) { 961 if (BestInit->isZero()) 962 continue; 963 } 964 // If two IVs both count from zero or both count from nonzero then the 965 // narrower is likely a dead phi that has been widened. Use the wider phi 966 // to allow the other to be eliminated. 967 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 968 continue; 969 } 970 BestPhi = Phi; 971 BestInit = Init; 972 } 973 return BestPhi; 974 } 975 976 /// Insert an IR expression which computes the value held by the IV IndVar 977 /// (which must be an loop counter w/unit stride) after the backedge of loop L 978 /// is taken ExitCount times. 979 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 980 const SCEV *ExitCount, bool UsePostInc, Loop *L, 981 SCEVExpander &Rewriter, ScalarEvolution *SE) { 982 assert(isLoopCounter(IndVar, L, SE)); 983 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 984 const SCEV *IVInit = AR->getStart(); 985 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 986 987 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 988 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 989 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 990 // the existing GEPs whenever possible. 991 if (IndVar->getType()->isPointerTy() && 992 !ExitCount->getType()->isPointerTy()) { 993 // IVOffset will be the new GEP offset that is interpreted by GEP as a 994 // signed value. ExitCount on the other hand represents the loop trip count, 995 // which is an unsigned value. FindLoopCounter only allows induction 996 // variables that have a positive unit stride of one. This means we don't 997 // have to handle the case of negative offsets (yet) and just need to zero 998 // extend ExitCount. 999 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 1000 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 1001 if (UsePostInc) 1002 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1003 1004 // Expand the code for the iteration count. 1005 assert(SE->isLoopInvariant(IVOffset, L) && 1006 "Computed iteration count is not loop invariant!"); 1007 1008 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1009 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1010 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1011 } else { 1012 // In any other case, convert both IVInit and ExitCount to integers before 1013 // comparing. This may result in SCEV expansion of pointers, but in practice 1014 // SCEV will fold the pointer arithmetic away as such: 1015 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1016 // 1017 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1018 // for simple memset-style loops. 1019 // 1020 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1021 // were generated on top of case #2, which is not expected. 1022 1023 // For unit stride, IVCount = Start + ExitCount with 2's complement 1024 // overflow. 1025 1026 // For integer IVs, truncate the IV before computing IVInit + BECount, 1027 // unless we know apriori that the limit must be a constant when evaluated 1028 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1029 // of the IV in the loop over a (potentially) expensive expansion of the 1030 // widened exit count add(zext(add)) expression. 1031 if (SE->getTypeSizeInBits(IVInit->getType()) 1032 > SE->getTypeSizeInBits(ExitCount->getType())) { 1033 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1034 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1035 else 1036 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1037 } 1038 1039 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1040 1041 if (UsePostInc) 1042 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1043 1044 // Expand the code for the iteration count. 1045 assert(SE->isLoopInvariant(IVLimit, L) && 1046 "Computed iteration count is not loop invariant!"); 1047 // Ensure that we generate the same type as IndVar, or a smaller integer 1048 // type. In the presence of null pointer values, we have an integer type 1049 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1050 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1051 IndVar->getType() : ExitCount->getType(); 1052 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1053 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1054 } 1055 } 1056 1057 /// This method rewrites the exit condition of the loop to be a canonical != 1058 /// comparison against the incremented loop induction variable. This pass is 1059 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1060 /// determine a loop-invariant trip count of the loop, which is actually a much 1061 /// broader range than just linear tests. 1062 bool IndVarSimplify:: 1063 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1064 const SCEV *ExitCount, 1065 PHINode *IndVar, SCEVExpander &Rewriter) { 1066 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1067 assert(isLoopCounter(IndVar, L, SE)); 1068 Instruction * const IncVar = 1069 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1070 1071 // Initialize CmpIndVar to the preincremented IV. 1072 Value *CmpIndVar = IndVar; 1073 bool UsePostInc = false; 1074 1075 // If the exiting block is the same as the backedge block, we prefer to 1076 // compare against the post-incremented value, otherwise we must compare 1077 // against the preincremented value. 1078 if (ExitingBB == L->getLoopLatch()) { 1079 // For pointer IVs, we chose to not strip inbounds which requires us not 1080 // to add a potentially UB introducing use. We need to either a) show 1081 // the loop test we're modifying is already in post-inc form, or b) show 1082 // that adding a use must not introduce UB. 1083 bool SafeToPostInc = 1084 IndVar->getType()->isIntegerTy() || 1085 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1086 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1087 if (SafeToPostInc) { 1088 UsePostInc = true; 1089 CmpIndVar = IncVar; 1090 } 1091 } 1092 1093 // It may be necessary to drop nowrap flags on the incrementing instruction 1094 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1095 // case the increment might have previously been poison on the last iteration 1096 // only) or if LFTR switches to a different IV that was previously dynamically 1097 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1098 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1099 // check), because the pre-inc addrec flags may be adopted from the original 1100 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1101 // TODO: This handling is inaccurate for one case: If we switch to a 1102 // dynamically dead IV that wraps on the first loop iteration only, which is 1103 // not covered by the post-inc addrec. (If the new IV was not dynamically 1104 // dead, it could not be poison on the first iteration in the first place.) 1105 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1106 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1107 if (BO->hasNoUnsignedWrap()) 1108 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1109 if (BO->hasNoSignedWrap()) 1110 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1111 } 1112 1113 Value *ExitCnt = genLoopLimit( 1114 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1115 assert(ExitCnt->getType()->isPointerTy() == 1116 IndVar->getType()->isPointerTy() && 1117 "genLoopLimit missed a cast"); 1118 1119 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1120 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1121 ICmpInst::Predicate P; 1122 if (L->contains(BI->getSuccessor(0))) 1123 P = ICmpInst::ICMP_NE; 1124 else 1125 P = ICmpInst::ICMP_EQ; 1126 1127 IRBuilder<> Builder(BI); 1128 1129 // The new loop exit condition should reuse the debug location of the 1130 // original loop exit condition. 1131 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1132 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1133 1134 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1135 // avoid the expensive expansion of the limit expression in the wider type, 1136 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1137 // since we know (from the exit count bitwidth), that we can't self-wrap in 1138 // the narrower type. 1139 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1140 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1141 if (CmpIndVarSize > ExitCntSize) { 1142 assert(!CmpIndVar->getType()->isPointerTy() && 1143 !ExitCnt->getType()->isPointerTy()); 1144 1145 // Before resorting to actually inserting the truncate, use the same 1146 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1147 // the other side of the comparison instead. We still evaluate the limit 1148 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1149 // a truncate within in. 1150 bool Extended = false; 1151 const SCEV *IV = SE->getSCEV(CmpIndVar); 1152 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1153 ExitCnt->getType()); 1154 const SCEV *ZExtTrunc = 1155 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1156 1157 if (ZExtTrunc == IV) { 1158 Extended = true; 1159 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1160 "wide.trip.count"); 1161 } else { 1162 const SCEV *SExtTrunc = 1163 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1164 if (SExtTrunc == IV) { 1165 Extended = true; 1166 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1167 "wide.trip.count"); 1168 } 1169 } 1170 1171 if (Extended) { 1172 bool Discard; 1173 L->makeLoopInvariant(ExitCnt, Discard); 1174 } else 1175 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1176 "lftr.wideiv"); 1177 } 1178 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1179 << " LHS:" << *CmpIndVar << '\n' 1180 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1181 << "\n" 1182 << " RHS:\t" << *ExitCnt << "\n" 1183 << "ExitCount:\t" << *ExitCount << "\n" 1184 << " was: " << *BI->getCondition() << "\n"); 1185 1186 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1187 Value *OrigCond = BI->getCondition(); 1188 // It's tempting to use replaceAllUsesWith here to fully replace the old 1189 // comparison, but that's not immediately safe, since users of the old 1190 // comparison may not be dominated by the new comparison. Instead, just 1191 // update the branch to use the new comparison; in the common case this 1192 // will make old comparison dead. 1193 BI->setCondition(Cond); 1194 DeadInsts.emplace_back(OrigCond); 1195 1196 ++NumLFTR; 1197 return true; 1198 } 1199 1200 //===----------------------------------------------------------------------===// 1201 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1202 //===----------------------------------------------------------------------===// 1203 1204 /// If there's a single exit block, sink any loop-invariant values that 1205 /// were defined in the preheader but not used inside the loop into the 1206 /// exit block to reduce register pressure in the loop. 1207 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1208 BasicBlock *ExitBlock = L->getExitBlock(); 1209 if (!ExitBlock) return false; 1210 1211 BasicBlock *Preheader = L->getLoopPreheader(); 1212 if (!Preheader) return false; 1213 1214 bool MadeAnyChanges = false; 1215 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1216 BasicBlock::iterator I(Preheader->getTerminator()); 1217 while (I != Preheader->begin()) { 1218 --I; 1219 // New instructions were inserted at the end of the preheader. 1220 if (isa<PHINode>(I)) 1221 break; 1222 1223 // Don't move instructions which might have side effects, since the side 1224 // effects need to complete before instructions inside the loop. Also don't 1225 // move instructions which might read memory, since the loop may modify 1226 // memory. Note that it's okay if the instruction might have undefined 1227 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1228 // block. 1229 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1230 continue; 1231 1232 // Skip debug info intrinsics. 1233 if (isa<DbgInfoIntrinsic>(I)) 1234 continue; 1235 1236 // Skip eh pad instructions. 1237 if (I->isEHPad()) 1238 continue; 1239 1240 // Don't sink alloca: we never want to sink static alloca's out of the 1241 // entry block, and correctly sinking dynamic alloca's requires 1242 // checks for stacksave/stackrestore intrinsics. 1243 // FIXME: Refactor this check somehow? 1244 if (isa<AllocaInst>(I)) 1245 continue; 1246 1247 // Determine if there is a use in or before the loop (direct or 1248 // otherwise). 1249 bool UsedInLoop = false; 1250 for (Use &U : I->uses()) { 1251 Instruction *User = cast<Instruction>(U.getUser()); 1252 BasicBlock *UseBB = User->getParent(); 1253 if (PHINode *P = dyn_cast<PHINode>(User)) { 1254 unsigned i = 1255 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1256 UseBB = P->getIncomingBlock(i); 1257 } 1258 if (UseBB == Preheader || L->contains(UseBB)) { 1259 UsedInLoop = true; 1260 break; 1261 } 1262 } 1263 1264 // If there is, the def must remain in the preheader. 1265 if (UsedInLoop) 1266 continue; 1267 1268 // Otherwise, sink it to the exit block. 1269 Instruction *ToMove = &*I; 1270 bool Done = false; 1271 1272 if (I != Preheader->begin()) { 1273 // Skip debug info intrinsics. 1274 do { 1275 --I; 1276 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1277 1278 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1279 Done = true; 1280 } else { 1281 Done = true; 1282 } 1283 1284 MadeAnyChanges = true; 1285 ToMove->moveBefore(*ExitBlock, InsertPt); 1286 if (Done) break; 1287 InsertPt = ToMove->getIterator(); 1288 } 1289 1290 return MadeAnyChanges; 1291 } 1292 1293 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1294 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1295 auto *OldCond = BI->getCondition(); 1296 BI->setCondition(NewCond); 1297 if (OldCond->use_empty()) 1298 DeadInsts.emplace_back(OldCond); 1299 } 1300 1301 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1302 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1303 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1304 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1305 auto *OldCond = BI->getCondition(); 1306 auto *NewCond = 1307 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1308 replaceExitCond(BI, NewCond, DeadInsts); 1309 } 1310 1311 static void replaceLoopPHINodesWithPreheaderValues( 1312 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1313 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1314 auto *LoopPreheader = L->getLoopPreheader(); 1315 auto *LoopHeader = L->getHeader(); 1316 for (auto &PN : LoopHeader->phis()) { 1317 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1318 PN.replaceAllUsesWith(PreheaderIncoming); 1319 DeadInsts.emplace_back(&PN); 1320 } 1321 } 1322 1323 static void replaceWithInvariantCond( 1324 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1325 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1326 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1327 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1328 Rewriter.setInsertPoint(BI); 1329 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1330 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1331 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1332 if (ExitIfTrue) 1333 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1334 IRBuilder<> Builder(BI); 1335 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1336 BI->getCondition()->getName()); 1337 replaceExitCond(BI, NewCond, DeadInsts); 1338 } 1339 1340 static bool optimizeLoopExitWithUnknownExitCount( 1341 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1342 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1343 ScalarEvolution *SE, SCEVExpander &Rewriter, 1344 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1345 ICmpInst::Predicate Pred; 1346 Value *LHS, *RHS; 1347 BasicBlock *TrueSucc, *FalseSucc; 1348 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1349 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1350 return false; 1351 1352 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1353 "Not a loop exit!"); 1354 1355 // 'LHS pred RHS' should now mean that we stay in loop. 1356 if (L->contains(FalseSucc)) 1357 Pred = CmpInst::getInversePredicate(Pred); 1358 1359 // If we are proving loop exit, invert the predicate. 1360 if (Inverted) 1361 Pred = CmpInst::getInversePredicate(Pred); 1362 1363 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1364 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1365 // Can we prove it to be trivially true? 1366 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1367 foldExit(L, ExitingBB, Inverted, DeadInsts); 1368 return true; 1369 } 1370 // Further logic works for non-inverted condition only. 1371 if (Inverted) 1372 return false; 1373 1374 auto *ARTy = LHSS->getType(); 1375 auto *MaxIterTy = MaxIter->getType(); 1376 // If possible, adjust types. 1377 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1378 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1379 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1380 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1381 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1382 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1383 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1384 } 1385 1386 if (SkipLastIter) { 1387 const SCEV *One = SE->getOne(MaxIter->getType()); 1388 MaxIter = SE->getMinusSCEV(MaxIter, One); 1389 } 1390 1391 // Check if there is a loop-invariant predicate equivalent to our check. 1392 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1393 L, BI, MaxIter); 1394 if (!LIP) 1395 return false; 1396 1397 // Can we prove it to be trivially true? 1398 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1399 foldExit(L, ExitingBB, Inverted, DeadInsts); 1400 else 1401 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1402 Rewriter, DeadInsts); 1403 1404 return true; 1405 } 1406 1407 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1408 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1409 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1410 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1411 // it already has flags. The alternative to this would be to extending the 1412 // set of "interesting" IV users to include the icmp, but doing that 1413 // regresses results in practice by querying SCEVs before trip counts which 1414 // rely on them which results in SCEV caching sub-optimal answers. The 1415 // concern about caching sub-optimal results is why we only query SCEVs of 1416 // the loop invariant RHS here. 1417 SmallVector<BasicBlock*, 16> ExitingBlocks; 1418 L->getExitingBlocks(ExitingBlocks); 1419 bool Changed = false; 1420 for (auto *ExitingBB : ExitingBlocks) { 1421 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1422 if (!BI) 1423 continue; 1424 assert(BI->isConditional() && "exit branch must be conditional"); 1425 1426 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1427 if (!ICmp || !ICmp->hasOneUse()) 1428 continue; 1429 1430 auto *LHS = ICmp->getOperand(0); 1431 auto *RHS = ICmp->getOperand(1); 1432 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1433 // poisoning cache with sub-optimal results. For the must-execute case, 1434 // this is a neccessary precondition for correctness. 1435 if (!L->isLoopInvariant(RHS)) { 1436 if (!L->isLoopInvariant(LHS)) 1437 continue; 1438 // Same logic applies for the inverse case 1439 std::swap(LHS, RHS); 1440 } 1441 1442 // Match (icmp signed-cond zext, RHS) 1443 Value *LHSOp = nullptr; 1444 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1445 continue; 1446 1447 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1448 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1449 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1450 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1451 FullCR = FullCR.zeroExtend(OuterBitWidth); 1452 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1453 if (FullCR.contains(RHSCR)) { 1454 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1455 // replace the signed condition with the unsigned version. 1456 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1457 Changed = true; 1458 // Note: No SCEV invalidation needed. We've changed the predicate, but 1459 // have not changed exit counts, or the values produced by the compare. 1460 continue; 1461 } 1462 } 1463 1464 // Now that we've canonicalized the condition to match the extend, 1465 // see if we can rotate the extend out of the loop. 1466 for (auto *ExitingBB : ExitingBlocks) { 1467 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1468 if (!BI) 1469 continue; 1470 assert(BI->isConditional() && "exit branch must be conditional"); 1471 1472 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1473 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1474 continue; 1475 1476 bool Swapped = false; 1477 auto *LHS = ICmp->getOperand(0); 1478 auto *RHS = ICmp->getOperand(1); 1479 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1480 // Nothing to rotate 1481 continue; 1482 if (L->isLoopInvariant(LHS)) { 1483 // Same logic applies for the inverse case until we actually pick 1484 // which operand of the compare to update. 1485 Swapped = true; 1486 std::swap(LHS, RHS); 1487 } 1488 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1489 1490 // Match (icmp unsigned-cond zext, RHS) 1491 // TODO: Extend to handle corresponding sext/signed-cmp case 1492 // TODO: Extend to other invertible functions 1493 Value *LHSOp = nullptr; 1494 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1495 continue; 1496 1497 // In general, we only rotate if we can do so without increasing the number 1498 // of instructions. The exception is when we have an zext(add-rec). The 1499 // reason for allowing this exception is that we know we need to get rid 1500 // of the zext for SCEV to be able to compute a trip count for said loops; 1501 // we consider the new trip count valuable enough to increase instruction 1502 // count by one. 1503 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1504 continue; 1505 1506 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1507 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1508 // when zext is loop varying and RHS is loop invariant. This converts 1509 // loop varying work to loop-invariant work. 1510 auto doRotateTransform = [&]() { 1511 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1512 auto *NewRHS = 1513 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1514 L->getLoopPreheader()->getTerminator()); 1515 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1516 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1517 if (LHS->use_empty()) 1518 DeadInsts.push_back(LHS); 1519 }; 1520 1521 1522 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1523 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1524 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1525 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1526 FullCR = FullCR.zeroExtend(OuterBitWidth); 1527 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1528 if (FullCR.contains(RHSCR)) { 1529 doRotateTransform(); 1530 Changed = true; 1531 // Note, we are leaving SCEV in an unfortunately imprecise case here 1532 // as rotation tends to reveal information about trip counts not 1533 // previously visible. 1534 continue; 1535 } 1536 } 1537 1538 return Changed; 1539 } 1540 1541 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1542 SmallVector<BasicBlock*, 16> ExitingBlocks; 1543 L->getExitingBlocks(ExitingBlocks); 1544 1545 // Remove all exits which aren't both rewriteable and execute on every 1546 // iteration. 1547 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1548 // If our exitting block exits multiple loops, we can only rewrite the 1549 // innermost one. Otherwise, we're changing how many times the innermost 1550 // loop runs before it exits. 1551 if (LI->getLoopFor(ExitingBB) != L) 1552 return true; 1553 1554 // Can't rewrite non-branch yet. 1555 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1556 if (!BI) 1557 return true; 1558 1559 // If already constant, nothing to do. 1560 if (isa<Constant>(BI->getCondition())) 1561 return true; 1562 1563 // Likewise, the loop latch must be dominated by the exiting BB. 1564 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1565 return true; 1566 1567 return false; 1568 }); 1569 1570 if (ExitingBlocks.empty()) 1571 return false; 1572 1573 // Get a symbolic upper bound on the loop backedge taken count. 1574 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1575 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1576 return false; 1577 1578 // Visit our exit blocks in order of dominance. We know from the fact that 1579 // all exits must dominate the latch, so there is a total dominance order 1580 // between them. 1581 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1582 // std::sort sorts in ascending order, so we want the inverse of 1583 // the normal dominance relation. 1584 if (A == B) return false; 1585 if (DT->properlyDominates(A, B)) 1586 return true; 1587 else { 1588 assert(DT->properlyDominates(B, A) && 1589 "expected total dominance order!"); 1590 return false; 1591 } 1592 }); 1593 #ifdef ASSERT 1594 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1595 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1596 } 1597 #endif 1598 1599 bool Changed = false; 1600 bool SkipLastIter = false; 1601 SmallSet<const SCEV*, 8> DominatingExitCounts; 1602 for (BasicBlock *ExitingBB : ExitingBlocks) { 1603 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1604 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1605 // Okay, we do not know the exit count here. Can we at least prove that it 1606 // will remain the same within iteration space? 1607 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1608 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1609 return optimizeLoopExitWithUnknownExitCount( 1610 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1611 Rewriter, DeadInsts); 1612 }; 1613 1614 // TODO: We might have proved that we can skip the last iteration for 1615 // this check. In this case, we only want to check the condition on the 1616 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1617 // corner case: 1618 // 1619 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1620 // 1621 // If we could not prove that len != 0, then we also could not prove that 1622 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1623 // OptimizeCond will likely not prove anything for it, even if it could 1624 // prove the same fact for len. 1625 // 1626 // As a temporary solution, we query both last and pre-last iterations in 1627 // hope that we will be able to prove triviality for at least one of 1628 // them. We can stop querying MaxExitCount for this case once SCEV 1629 // understands that (MaxExitCount - 1) will not overflow here. 1630 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1631 Changed = true; 1632 else if (SkipLastIter) 1633 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1634 Changed = true; 1635 continue; 1636 } 1637 1638 if (MaxExitCount == ExitCount) 1639 // If the loop has more than 1 iteration, all further checks will be 1640 // executed 1 iteration less. 1641 SkipLastIter = true; 1642 1643 // If we know we'd exit on the first iteration, rewrite the exit to 1644 // reflect this. This does not imply the loop must exit through this 1645 // exit; there may be an earlier one taken on the first iteration. 1646 // We know that the backedge can't be taken, so we replace all 1647 // the header PHIs with values coming from the preheader. 1648 if (ExitCount->isZero()) { 1649 foldExit(L, ExitingBB, true, DeadInsts); 1650 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1651 Changed = true; 1652 continue; 1653 } 1654 1655 assert(ExitCount->getType()->isIntegerTy() && 1656 MaxExitCount->getType()->isIntegerTy() && 1657 "Exit counts must be integers"); 1658 1659 Type *WiderType = 1660 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1661 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1662 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1663 assert(MaxExitCount->getType() == ExitCount->getType()); 1664 1665 // Can we prove that some other exit must be taken strictly before this 1666 // one? 1667 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1668 MaxExitCount, ExitCount)) { 1669 foldExit(L, ExitingBB, false, DeadInsts); 1670 Changed = true; 1671 continue; 1672 } 1673 1674 // As we run, keep track of which exit counts we've encountered. If we 1675 // find a duplicate, we've found an exit which would have exited on the 1676 // exiting iteration, but (from the visit order) strictly follows another 1677 // which does the same and is thus dead. 1678 if (!DominatingExitCounts.insert(ExitCount).second) { 1679 foldExit(L, ExitingBB, false, DeadInsts); 1680 Changed = true; 1681 continue; 1682 } 1683 1684 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1685 // here. If we kept track of the min of dominanting exits so far, we could 1686 // discharge exits with EC >= MDEC. This is less powerful than the existing 1687 // transform (since later exits aren't considered), but potentially more 1688 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1689 // or a >u b. Such a case is not currently known. 1690 } 1691 return Changed; 1692 } 1693 1694 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1695 SmallVector<BasicBlock*, 16> ExitingBlocks; 1696 L->getExitingBlocks(ExitingBlocks); 1697 1698 // Finally, see if we can rewrite our exit conditions into a loop invariant 1699 // form. If we have a read-only loop, and we can tell that we must exit down 1700 // a path which does not need any of the values computed within the loop, we 1701 // can rewrite the loop to exit on the first iteration. Note that this 1702 // doesn't either a) tell us the loop exits on the first iteration (unless 1703 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1704 // This transformation looks a lot like a restricted form of dead loop 1705 // elimination, but restricted to read-only loops and without neccesssarily 1706 // needing to kill the loop entirely. 1707 if (!LoopPredication) 1708 return false; 1709 1710 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1711 // through *explicit* control flow. We have to eliminate the possibility of 1712 // implicit exits (see below) before we know it's truly exact. 1713 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1714 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1715 return false; 1716 1717 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1718 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1719 1720 auto BadExit = [&](BasicBlock *ExitingBB) { 1721 // If our exiting block exits multiple loops, we can only rewrite the 1722 // innermost one. Otherwise, we're changing how many times the innermost 1723 // loop runs before it exits. 1724 if (LI->getLoopFor(ExitingBB) != L) 1725 return true; 1726 1727 // Can't rewrite non-branch yet. 1728 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1729 if (!BI) 1730 return true; 1731 1732 // If already constant, nothing to do. 1733 if (isa<Constant>(BI->getCondition())) 1734 return true; 1735 1736 // If the exit block has phis, we need to be able to compute the values 1737 // within the loop which contains them. This assumes trivially lcssa phis 1738 // have already been removed; TODO: generalize 1739 BasicBlock *ExitBlock = 1740 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1741 if (!ExitBlock->phis().empty()) 1742 return true; 1743 1744 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1745 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1746 return true; 1747 1748 assert(SE->isLoopInvariant(ExitCount, L) && 1749 "Exit count must be loop invariant"); 1750 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1751 return false; 1752 }; 1753 1754 // If we have any exits which can't be predicated themselves, than we can't 1755 // predicate any exit which isn't guaranteed to execute before it. Consider 1756 // two exits (a) and (b) which would both exit on the same iteration. If we 1757 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1758 // we could convert a loop from exiting through (a) to one exiting through 1759 // (b). Note that this problem exists only for exits with the same exit 1760 // count, and we could be more aggressive when exit counts are known inequal. 1761 llvm::sort(ExitingBlocks, 1762 [&](BasicBlock *A, BasicBlock *B) { 1763 // std::sort sorts in ascending order, so we want the inverse of 1764 // the normal dominance relation, plus a tie breaker for blocks 1765 // unordered by dominance. 1766 if (DT->properlyDominates(A, B)) return true; 1767 if (DT->properlyDominates(B, A)) return false; 1768 return A->getName() < B->getName(); 1769 }); 1770 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1771 // exits before the backedge). If they aren't, reasoning about reachability 1772 // is complicated and we choose not to for now. 1773 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1774 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1775 return false; 1776 1777 // Given our sorted total order, we know that exit[j] must be evaluated 1778 // after all exit[i] such j > i. 1779 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1780 if (BadExit(ExitingBlocks[i])) { 1781 ExitingBlocks.resize(i); 1782 break; 1783 } 1784 1785 if (ExitingBlocks.empty()) 1786 return false; 1787 1788 // We rely on not being able to reach an exiting block on a later iteration 1789 // then it's statically compute exit count. The implementaton of 1790 // getExitCount currently has this invariant, but assert it here so that 1791 // breakage is obvious if this ever changes.. 1792 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1793 return DT->dominates(ExitingBB, L->getLoopLatch()); 1794 })); 1795 1796 // At this point, ExitingBlocks consists of only those blocks which are 1797 // predicatable. Given that, we know we have at least one exit we can 1798 // predicate if the loop is doesn't have side effects and doesn't have any 1799 // implicit exits (because then our exact BTC isn't actually exact). 1800 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1801 // suggestions on how to improve this? I can obviously bail out for outer 1802 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1803 // is that enough for *all* side effects? 1804 for (BasicBlock *BB : L->blocks()) 1805 for (auto &I : *BB) 1806 // TODO:isGuaranteedToTransfer 1807 if (I.mayHaveSideEffects()) 1808 return false; 1809 1810 bool Changed = false; 1811 // Finally, do the actual predication for all predicatable blocks. A couple 1812 // of notes here: 1813 // 1) We don't bother to constant fold dominated exits with identical exit 1814 // counts; that's simply a form of CSE/equality propagation and we leave 1815 // it for dedicated passes. 1816 // 2) We insert the comparison at the branch. Hoisting introduces additional 1817 // legality constraints and we leave that to dedicated logic. We want to 1818 // predicate even if we can't insert a loop invariant expression as 1819 // peeling or unrolling will likely reduce the cost of the otherwise loop 1820 // varying check. 1821 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1822 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1823 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1824 for (BasicBlock *ExitingBB : ExitingBlocks) { 1825 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1826 1827 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1828 Value *NewCond; 1829 if (ExitCount == ExactBTC) { 1830 NewCond = L->contains(BI->getSuccessor(0)) ? 1831 B.getFalse() : B.getTrue(); 1832 } else { 1833 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1834 if (!ExactBTCV) 1835 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1836 Value *RHS = ExactBTCV; 1837 if (ECV->getType() != RHS->getType()) { 1838 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1839 ECV = B.CreateZExt(ECV, WiderTy); 1840 RHS = B.CreateZExt(RHS, WiderTy); 1841 } 1842 auto Pred = L->contains(BI->getSuccessor(0)) ? 1843 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1844 NewCond = B.CreateICmp(Pred, ECV, RHS); 1845 } 1846 Value *OldCond = BI->getCondition(); 1847 BI->setCondition(NewCond); 1848 if (OldCond->use_empty()) 1849 DeadInsts.emplace_back(OldCond); 1850 Changed = true; 1851 } 1852 1853 return Changed; 1854 } 1855 1856 //===----------------------------------------------------------------------===// 1857 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1858 //===----------------------------------------------------------------------===// 1859 1860 bool IndVarSimplify::run(Loop *L) { 1861 // We need (and expect!) the incoming loop to be in LCSSA. 1862 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1863 "LCSSA required to run indvars!"); 1864 1865 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1866 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1867 // canonicalization can be a pessimization without LSR to "clean up" 1868 // afterwards. 1869 // - We depend on having a preheader; in particular, 1870 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1871 // and we're in trouble if we can't find the induction variable even when 1872 // we've manually inserted one. 1873 // - LFTR relies on having a single backedge. 1874 if (!L->isLoopSimplifyForm()) 1875 return false; 1876 1877 #ifndef NDEBUG 1878 // Used below for a consistency check only 1879 // Note: Since the result returned by ScalarEvolution may depend on the order 1880 // in which previous results are added to its cache, the call to 1881 // getBackedgeTakenCount() may change following SCEV queries. 1882 const SCEV *BackedgeTakenCount; 1883 if (VerifyIndvars) 1884 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1885 #endif 1886 1887 bool Changed = false; 1888 // If there are any floating-point recurrences, attempt to 1889 // transform them to use integer recurrences. 1890 Changed |= rewriteNonIntegerIVs(L); 1891 1892 // Create a rewriter object which we'll use to transform the code with. 1893 SCEVExpander Rewriter(*SE, DL, "indvars"); 1894 #ifndef NDEBUG 1895 Rewriter.setDebugType(DEBUG_TYPE); 1896 #endif 1897 1898 // Eliminate redundant IV users. 1899 // 1900 // Simplification works best when run before other consumers of SCEV. We 1901 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1902 // other expressions involving loop IVs have been evaluated. This helps SCEV 1903 // set no-wrap flags before normalizing sign/zero extension. 1904 Rewriter.disableCanonicalMode(); 1905 Changed |= simplifyAndExtend(L, Rewriter, LI); 1906 1907 // Check to see if we can compute the final value of any expressions 1908 // that are recurrent in the loop, and substitute the exit values from the 1909 // loop into any instructions outside of the loop that use the final values 1910 // of the current expressions. 1911 if (ReplaceExitValue != NeverRepl) { 1912 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1913 ReplaceExitValue, DeadInsts)) { 1914 NumReplaced += Rewrites; 1915 Changed = true; 1916 } 1917 } 1918 1919 // Eliminate redundant IV cycles. 1920 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1921 1922 // Try to convert exit conditions to unsigned and rotate computation 1923 // out of the loop. Note: Handles invalidation internally if needed. 1924 Changed |= canonicalizeExitCondition(L); 1925 1926 // Try to eliminate loop exits based on analyzeable exit counts 1927 if (optimizeLoopExits(L, Rewriter)) { 1928 Changed = true; 1929 // Given we've changed exit counts, notify SCEV 1930 // Some nested loops may share same folded exit basic block, 1931 // thus we need to notify top most loop. 1932 SE->forgetTopmostLoop(L); 1933 } 1934 1935 // Try to form loop invariant tests for loop exits by changing how many 1936 // iterations of the loop run when that is unobservable. 1937 if (predicateLoopExits(L, Rewriter)) { 1938 Changed = true; 1939 // Given we've changed exit counts, notify SCEV 1940 SE->forgetLoop(L); 1941 } 1942 1943 // If we have a trip count expression, rewrite the loop's exit condition 1944 // using it. 1945 if (!DisableLFTR) { 1946 BasicBlock *PreHeader = L->getLoopPreheader(); 1947 1948 SmallVector<BasicBlock*, 16> ExitingBlocks; 1949 L->getExitingBlocks(ExitingBlocks); 1950 for (BasicBlock *ExitingBB : ExitingBlocks) { 1951 // Can't rewrite non-branch yet. 1952 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1953 continue; 1954 1955 // If our exitting block exits multiple loops, we can only rewrite the 1956 // innermost one. Otherwise, we're changing how many times the innermost 1957 // loop runs before it exits. 1958 if (LI->getLoopFor(ExitingBB) != L) 1959 continue; 1960 1961 if (!needsLFTR(L, ExitingBB)) 1962 continue; 1963 1964 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1965 if (isa<SCEVCouldNotCompute>(ExitCount)) 1966 continue; 1967 1968 // This was handled above, but as we form SCEVs, we can sometimes refine 1969 // existing ones; this allows exit counts to be folded to zero which 1970 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1971 // until stable to handle cases like this better. 1972 if (ExitCount->isZero()) 1973 continue; 1974 1975 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1976 if (!IndVar) 1977 continue; 1978 1979 // Avoid high cost expansions. Note: This heuristic is questionable in 1980 // that our definition of "high cost" is not exactly principled. 1981 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1982 TTI, PreHeader->getTerminator())) 1983 continue; 1984 1985 // Check preconditions for proper SCEVExpander operation. SCEV does not 1986 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1987 // any pass that uses the SCEVExpander must do it. This does not work 1988 // well for loop passes because SCEVExpander makes assumptions about 1989 // all loops, while LoopPassManager only forces the current loop to be 1990 // simplified. 1991 // 1992 // FIXME: SCEV expansion has no way to bail out, so the caller must 1993 // explicitly check any assumptions made by SCEV. Brittle. 1994 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1995 if (!AR || AR->getLoop()->getLoopPreheader()) 1996 Changed |= linearFunctionTestReplace(L, ExitingBB, 1997 ExitCount, IndVar, 1998 Rewriter); 1999 } 2000 } 2001 // Clear the rewriter cache, because values that are in the rewriter's cache 2002 // can be deleted in the loop below, causing the AssertingVH in the cache to 2003 // trigger. 2004 Rewriter.clear(); 2005 2006 // Now that we're done iterating through lists, clean up any instructions 2007 // which are now dead. 2008 while (!DeadInsts.empty()) { 2009 Value *V = DeadInsts.pop_back_val(); 2010 2011 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2012 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2013 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2014 Changed |= 2015 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2016 } 2017 2018 // The Rewriter may not be used from this point on. 2019 2020 // Loop-invariant instructions in the preheader that aren't used in the 2021 // loop may be sunk below the loop to reduce register pressure. 2022 Changed |= sinkUnusedInvariants(L); 2023 2024 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2025 // trip count and therefore can further simplify exit values in addition to 2026 // rewriteLoopExitValues. 2027 Changed |= rewriteFirstIterationLoopExitValues(L); 2028 2029 // Clean up dead instructions. 2030 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2031 2032 // Check a post-condition. 2033 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2034 "Indvars did not preserve LCSSA!"); 2035 2036 // Verify that LFTR, and any other change have not interfered with SCEV's 2037 // ability to compute trip count. We may have *changed* the exit count, but 2038 // only by reducing it. 2039 #ifndef NDEBUG 2040 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2041 SE->forgetLoop(L); 2042 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2043 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2044 SE->getTypeSizeInBits(NewBECount->getType())) 2045 NewBECount = SE->getTruncateOrNoop(NewBECount, 2046 BackedgeTakenCount->getType()); 2047 else 2048 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2049 NewBECount->getType()); 2050 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2051 NewBECount) && "indvars must preserve SCEV"); 2052 } 2053 if (VerifyMemorySSA && MSSAU) 2054 MSSAU->getMemorySSA()->verifyMemorySSA(); 2055 #endif 2056 2057 return Changed; 2058 } 2059 2060 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2061 LoopStandardAnalysisResults &AR, 2062 LPMUpdater &) { 2063 Function *F = L.getHeader()->getParent(); 2064 const DataLayout &DL = F->getParent()->getDataLayout(); 2065 2066 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2067 WidenIndVars && AllowIVWidening); 2068 if (!IVS.run(&L)) 2069 return PreservedAnalyses::all(); 2070 2071 auto PA = getLoopPassPreservedAnalyses(); 2072 PA.preserveSet<CFGAnalyses>(); 2073 if (AR.MSSA) 2074 PA.preserve<MemorySSAAnalysis>(); 2075 return PA; 2076 } 2077 2078 namespace { 2079 2080 struct IndVarSimplifyLegacyPass : public LoopPass { 2081 static char ID; // Pass identification, replacement for typeid 2082 2083 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2084 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2085 } 2086 2087 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2088 if (skipLoop(L)) 2089 return false; 2090 2091 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2092 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2093 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2094 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2095 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2096 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2097 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2098 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2099 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2100 MemorySSA *MSSA = nullptr; 2101 if (MSSAAnalysis) 2102 MSSA = &MSSAAnalysis->getMSSA(); 2103 2104 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2105 return IVS.run(L); 2106 } 2107 2108 void getAnalysisUsage(AnalysisUsage &AU) const override { 2109 AU.setPreservesCFG(); 2110 AU.addPreserved<MemorySSAWrapperPass>(); 2111 getLoopAnalysisUsage(AU); 2112 } 2113 }; 2114 2115 } // end anonymous namespace 2116 2117 char IndVarSimplifyLegacyPass::ID = 0; 2118 2119 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2120 "Induction Variable Simplification", false, false) 2121 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2122 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2123 "Induction Variable Simplification", false, false) 2124 2125 Pass *llvm::createIndVarSimplifyPass() { 2126 return new IndVarSimplifyLegacyPass(); 2127 } 2128