1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Optional.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/ADT/iterator_range.h" 36 #include "llvm/Analysis/LoopInfo.h" 37 #include "llvm/Analysis/LoopPass.h" 38 #include "llvm/Analysis/MemorySSA.h" 39 #include "llvm/Analysis/MemorySSAUpdater.h" 40 #include "llvm/Analysis/ScalarEvolution.h" 41 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 42 #include "llvm/Analysis/TargetLibraryInfo.h" 43 #include "llvm/Analysis/TargetTransformInfo.h" 44 #include "llvm/Analysis/ValueTracking.h" 45 #include "llvm/IR/BasicBlock.h" 46 #include "llvm/IR/Constant.h" 47 #include "llvm/IR/ConstantRange.h" 48 #include "llvm/IR/Constants.h" 49 #include "llvm/IR/DataLayout.h" 50 #include "llvm/IR/DerivedTypes.h" 51 #include "llvm/IR/Dominators.h" 52 #include "llvm/IR/Function.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/InstrTypes.h" 55 #include "llvm/IR/Instruction.h" 56 #include "llvm/IR/Instructions.h" 57 #include "llvm/IR/IntrinsicInst.h" 58 #include "llvm/IR/Intrinsics.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/Operator.h" 61 #include "llvm/IR/PassManager.h" 62 #include "llvm/IR/PatternMatch.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/Use.h" 65 #include "llvm/IR/User.h" 66 #include "llvm/IR/Value.h" 67 #include "llvm/IR/ValueHandle.h" 68 #include "llvm/InitializePasses.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/Debug.h" 74 #include "llvm/Support/MathExtras.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Transforms/Scalar.h" 77 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 78 #include "llvm/Transforms/Utils/Local.h" 79 #include "llvm/Transforms/Utils/LoopUtils.h" 80 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 81 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 82 #include <cassert> 83 #include <cstdint> 84 #include <utility> 85 86 using namespace llvm; 87 using namespace PatternMatch; 88 89 #define DEBUG_TYPE "indvars" 90 91 STATISTIC(NumWidened , "Number of indvars widened"); 92 STATISTIC(NumReplaced , "Number of exit values replaced"); 93 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 96 97 // Trip count verification can be enabled by default under NDEBUG if we 98 // implement a strong expression equivalence checker in SCEV. Until then, we 99 // use the verify-indvars flag, which may assert in some cases. 100 static cl::opt<bool> VerifyIndvars( 101 "verify-indvars", cl::Hidden, 102 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 103 "effect in release builds. (Note: this adds additional SCEV " 104 "queries potentially changing the analysis result)")); 105 106 static cl::opt<ReplaceExitVal> ReplaceExitValue( 107 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 108 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 109 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 110 clEnumValN(OnlyCheapRepl, "cheap", 111 "only replace exit value when the cost is cheap"), 112 clEnumValN(NoHardUse, "noharduse", 113 "only replace exit values when loop def likely dead"), 114 clEnumValN(AlwaysRepl, "always", 115 "always replace exit value whenever possible"))); 116 117 static cl::opt<bool> UsePostIncrementRanges( 118 "indvars-post-increment-ranges", cl::Hidden, 119 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 120 cl::init(true)); 121 122 static cl::opt<bool> 123 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 124 cl::desc("Disable Linear Function Test Replace optimization")); 125 126 static cl::opt<bool> 127 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 128 cl::desc("Predicate conditions in read only loops")); 129 130 static cl::opt<bool> 131 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 132 cl::desc("Allow widening of indvars to eliminate s/zext")); 133 134 namespace { 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 std::unique_ptr<MemorySSAUpdater> MSSAU; 144 145 SmallVector<WeakTrackingVH, 16> DeadInsts; 146 bool WidenIndVars; 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 /// Try to improve our exit conditions by converting condition from signed 153 /// to unsigned or rotating computation out of the loop. 154 /// (See inline comment about why this is duplicated from simplifyAndExtend) 155 bool canonicalizeExitCondition(Loop *L); 156 /// Try to eliminate loop exits based on analyzeable exit counts 157 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 158 /// Try to form loop invariant tests for loop exits by changing how many 159 /// iterations of the loop run when that is unobservable. 160 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 161 162 bool rewriteFirstIterationLoopExitValues(Loop *L); 163 164 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 165 const SCEV *ExitCount, 166 PHINode *IndVar, SCEVExpander &Rewriter); 167 168 bool sinkUnusedInvariants(Loop *L); 169 170 public: 171 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 172 const DataLayout &DL, TargetLibraryInfo *TLI, 173 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 174 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 175 WidenIndVars(WidenIndVars) { 176 if (MSSA) 177 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 178 } 179 180 bool run(Loop *L); 181 }; 182 183 } // end anonymous namespace 184 185 //===----------------------------------------------------------------------===// 186 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 187 //===----------------------------------------------------------------------===// 188 189 /// Convert APF to an integer, if possible. 190 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 191 bool isExact = false; 192 // See if we can convert this to an int64_t 193 uint64_t UIntVal; 194 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 195 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 196 !isExact) 197 return false; 198 IntVal = UIntVal; 199 return true; 200 } 201 202 /// If the loop has floating induction variable then insert corresponding 203 /// integer induction variable if possible. 204 /// For example, 205 /// for(double i = 0; i < 10000; ++i) 206 /// bar(i) 207 /// is converted into 208 /// for(int i = 0; i < 10000; ++i) 209 /// bar((double)i); 210 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 211 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 212 unsigned BackEdge = IncomingEdge^1; 213 214 // Check incoming value. 215 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 216 217 int64_t InitValue; 218 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 219 return false; 220 221 // Check IV increment. Reject this PN if increment operation is not 222 // an add or increment value can not be represented by an integer. 223 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 224 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 225 226 // If this is not an add of the PHI with a constantfp, or if the constant fp 227 // is not an integer, bail out. 228 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 229 int64_t IncValue; 230 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 231 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 232 return false; 233 234 // Check Incr uses. One user is PN and the other user is an exit condition 235 // used by the conditional terminator. 236 Value::user_iterator IncrUse = Incr->user_begin(); 237 Instruction *U1 = cast<Instruction>(*IncrUse++); 238 if (IncrUse == Incr->user_end()) return false; 239 Instruction *U2 = cast<Instruction>(*IncrUse++); 240 if (IncrUse != Incr->user_end()) return false; 241 242 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 243 // only used by a branch, we can't transform it. 244 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 245 if (!Compare) 246 Compare = dyn_cast<FCmpInst>(U2); 247 if (!Compare || !Compare->hasOneUse() || 248 !isa<BranchInst>(Compare->user_back())) 249 return false; 250 251 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 252 253 // We need to verify that the branch actually controls the iteration count 254 // of the loop. If not, the new IV can overflow and no one will notice. 255 // The branch block must be in the loop and one of the successors must be out 256 // of the loop. 257 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 258 if (!L->contains(TheBr->getParent()) || 259 (L->contains(TheBr->getSuccessor(0)) && 260 L->contains(TheBr->getSuccessor(1)))) 261 return false; 262 263 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 264 // transform it. 265 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 266 int64_t ExitValue; 267 if (ExitValueVal == nullptr || 268 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 269 return false; 270 271 // Find new predicate for integer comparison. 272 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 273 switch (Compare->getPredicate()) { 274 default: return false; // Unknown comparison. 275 case CmpInst::FCMP_OEQ: 276 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 277 case CmpInst::FCMP_ONE: 278 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 279 case CmpInst::FCMP_OGT: 280 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 281 case CmpInst::FCMP_OGE: 282 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 283 case CmpInst::FCMP_OLT: 284 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 285 case CmpInst::FCMP_OLE: 286 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 287 } 288 289 // We convert the floating point induction variable to a signed i32 value if 290 // we can. This is only safe if the comparison will not overflow in a way 291 // that won't be trapped by the integer equivalent operations. Check for this 292 // now. 293 // TODO: We could use i64 if it is native and the range requires it. 294 295 // The start/stride/exit values must all fit in signed i32. 296 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 297 return false; 298 299 // If not actually striding (add x, 0.0), avoid touching the code. 300 if (IncValue == 0) 301 return false; 302 303 // Positive and negative strides have different safety conditions. 304 if (IncValue > 0) { 305 // If we have a positive stride, we require the init to be less than the 306 // exit value. 307 if (InitValue >= ExitValue) 308 return false; 309 310 uint32_t Range = uint32_t(ExitValue-InitValue); 311 // Check for infinite loop, either: 312 // while (i <= Exit) or until (i > Exit) 313 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 314 if (++Range == 0) return false; // Range overflows. 315 } 316 317 unsigned Leftover = Range % uint32_t(IncValue); 318 319 // If this is an equality comparison, we require that the strided value 320 // exactly land on the exit value, otherwise the IV condition will wrap 321 // around and do things the fp IV wouldn't. 322 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 323 Leftover != 0) 324 return false; 325 326 // If the stride would wrap around the i32 before exiting, we can't 327 // transform the IV. 328 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 329 return false; 330 } else { 331 // If we have a negative stride, we require the init to be greater than the 332 // exit value. 333 if (InitValue <= ExitValue) 334 return false; 335 336 uint32_t Range = uint32_t(InitValue-ExitValue); 337 // Check for infinite loop, either: 338 // while (i >= Exit) or until (i < Exit) 339 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 340 if (++Range == 0) return false; // Range overflows. 341 } 342 343 unsigned Leftover = Range % uint32_t(-IncValue); 344 345 // If this is an equality comparison, we require that the strided value 346 // exactly land on the exit value, otherwise the IV condition will wrap 347 // around and do things the fp IV wouldn't. 348 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 349 Leftover != 0) 350 return false; 351 352 // If the stride would wrap around the i32 before exiting, we can't 353 // transform the IV. 354 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 355 return false; 356 } 357 358 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 359 360 // Insert new integer induction variable. 361 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 362 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 363 PN->getIncomingBlock(IncomingEdge)); 364 365 Value *NewAdd = 366 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 367 Incr->getName()+".int", Incr); 368 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 369 370 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 371 ConstantInt::get(Int32Ty, ExitValue), 372 Compare->getName()); 373 374 // In the following deletions, PN may become dead and may be deleted. 375 // Use a WeakTrackingVH to observe whether this happens. 376 WeakTrackingVH WeakPH = PN; 377 378 // Delete the old floating point exit comparison. The branch starts using the 379 // new comparison. 380 NewCompare->takeName(Compare); 381 Compare->replaceAllUsesWith(NewCompare); 382 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 383 384 // Delete the old floating point increment. 385 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 386 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 387 388 // If the FP induction variable still has uses, this is because something else 389 // in the loop uses its value. In order to canonicalize the induction 390 // variable, we chose to eliminate the IV and rewrite it in terms of an 391 // int->fp cast. 392 // 393 // We give preference to sitofp over uitofp because it is faster on most 394 // platforms. 395 if (WeakPH) { 396 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 397 &*PN->getParent()->getFirstInsertionPt()); 398 PN->replaceAllUsesWith(Conv); 399 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 400 } 401 return true; 402 } 403 404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 405 // First step. Check to see if there are any floating-point recurrences. 406 // If there are, change them into integer recurrences, permitting analysis by 407 // the SCEV routines. 408 BasicBlock *Header = L->getHeader(); 409 410 SmallVector<WeakTrackingVH, 8> PHIs; 411 for (PHINode &PN : Header->phis()) 412 PHIs.push_back(&PN); 413 414 bool Changed = false; 415 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 416 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 417 Changed |= handleFloatingPointIV(L, PN); 418 419 // If the loop previously had floating-point IV, ScalarEvolution 420 // may not have been able to compute a trip count. Now that we've done some 421 // re-writing, the trip count may be computable. 422 if (Changed) 423 SE->forgetLoop(L); 424 return Changed; 425 } 426 427 //===---------------------------------------------------------------------===// 428 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 429 // they will exit at the first iteration. 430 //===---------------------------------------------------------------------===// 431 432 /// Check to see if this loop has loop invariant conditions which lead to loop 433 /// exits. If so, we know that if the exit path is taken, it is at the first 434 /// loop iteration. This lets us predict exit values of PHI nodes that live in 435 /// loop header. 436 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 437 // Verify the input to the pass is already in LCSSA form. 438 assert(L->isLCSSAForm(*DT)); 439 440 SmallVector<BasicBlock *, 8> ExitBlocks; 441 L->getUniqueExitBlocks(ExitBlocks); 442 443 bool MadeAnyChanges = false; 444 for (auto *ExitBB : ExitBlocks) { 445 // If there are no more PHI nodes in this exit block, then no more 446 // values defined inside the loop are used on this path. 447 for (PHINode &PN : ExitBB->phis()) { 448 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 449 IncomingValIdx != E; ++IncomingValIdx) { 450 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 451 452 // Can we prove that the exit must run on the first iteration if it 453 // runs at all? (i.e. early exits are fine for our purposes, but 454 // traces which lead to this exit being taken on the 2nd iteration 455 // aren't.) Note that this is about whether the exit branch is 456 // executed, not about whether it is taken. 457 if (!L->getLoopLatch() || 458 !DT->dominates(IncomingBB, L->getLoopLatch())) 459 continue; 460 461 // Get condition that leads to the exit path. 462 auto *TermInst = IncomingBB->getTerminator(); 463 464 Value *Cond = nullptr; 465 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 466 // Must be a conditional branch, otherwise the block 467 // should not be in the loop. 468 Cond = BI->getCondition(); 469 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 470 Cond = SI->getCondition(); 471 else 472 continue; 473 474 if (!L->isLoopInvariant(Cond)) 475 continue; 476 477 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 478 479 // Only deal with PHIs in the loop header. 480 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 481 continue; 482 483 // If ExitVal is a PHI on the loop header, then we know its 484 // value along this exit because the exit can only be taken 485 // on the first iteration. 486 auto *LoopPreheader = L->getLoopPreheader(); 487 assert(LoopPreheader && "Invalid loop"); 488 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 489 if (PreheaderIdx != -1) { 490 assert(ExitVal->getParent() == L->getHeader() && 491 "ExitVal must be in loop header"); 492 MadeAnyChanges = true; 493 PN.setIncomingValue(IncomingValIdx, 494 ExitVal->getIncomingValue(PreheaderIdx)); 495 SE->forgetValue(&PN); 496 } 497 } 498 } 499 } 500 return MadeAnyChanges; 501 } 502 503 //===----------------------------------------------------------------------===// 504 // IV Widening - Extend the width of an IV to cover its widest uses. 505 //===----------------------------------------------------------------------===// 506 507 /// Update information about the induction variable that is extended by this 508 /// sign or zero extend operation. This is used to determine the final width of 509 /// the IV before actually widening it. 510 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 511 ScalarEvolution *SE, 512 const TargetTransformInfo *TTI) { 513 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 514 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 515 return; 516 517 Type *Ty = Cast->getType(); 518 uint64_t Width = SE->getTypeSizeInBits(Ty); 519 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 520 return; 521 522 // Check that `Cast` actually extends the induction variable (we rely on this 523 // later). This takes care of cases where `Cast` is extending a truncation of 524 // the narrow induction variable, and thus can end up being narrower than the 525 // "narrow" induction variable. 526 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 527 if (NarrowIVWidth >= Width) 528 return; 529 530 // Cast is either an sext or zext up to this point. 531 // We should not widen an indvar if arithmetics on the wider indvar are more 532 // expensive than those on the narrower indvar. We check only the cost of ADD 533 // because at least an ADD is required to increment the induction variable. We 534 // could compute more comprehensively the cost of all instructions on the 535 // induction variable when necessary. 536 if (TTI && 537 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 538 TTI->getArithmeticInstrCost(Instruction::Add, 539 Cast->getOperand(0)->getType())) { 540 return; 541 } 542 543 if (!WI.WidestNativeType || 544 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 545 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 546 WI.IsSigned = IsSigned; 547 return; 548 } 549 550 // We extend the IV to satisfy the sign of its user(s), or 'signed' 551 // if there are multiple users with both sign- and zero extensions, 552 // in order not to introduce nondeterministic behaviour based on the 553 // unspecified order of a PHI nodes' users-iterator. 554 WI.IsSigned |= IsSigned; 555 } 556 557 //===----------------------------------------------------------------------===// 558 // Live IV Reduction - Minimize IVs live across the loop. 559 //===----------------------------------------------------------------------===// 560 561 //===----------------------------------------------------------------------===// 562 // Simplification of IV users based on SCEV evaluation. 563 //===----------------------------------------------------------------------===// 564 565 namespace { 566 567 class IndVarSimplifyVisitor : public IVVisitor { 568 ScalarEvolution *SE; 569 const TargetTransformInfo *TTI; 570 PHINode *IVPhi; 571 572 public: 573 WideIVInfo WI; 574 575 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 576 const TargetTransformInfo *TTI, 577 const DominatorTree *DTree) 578 : SE(SCEV), TTI(TTI), IVPhi(IV) { 579 DT = DTree; 580 WI.NarrowIV = IVPhi; 581 } 582 583 // Implement the interface used by simplifyUsersOfIV. 584 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 585 }; 586 587 } // end anonymous namespace 588 589 /// Iteratively perform simplification on a worklist of IV users. Each 590 /// successive simplification may push more users which may themselves be 591 /// candidates for simplification. 592 /// 593 /// Sign/Zero extend elimination is interleaved with IV simplification. 594 bool IndVarSimplify::simplifyAndExtend(Loop *L, 595 SCEVExpander &Rewriter, 596 LoopInfo *LI) { 597 SmallVector<WideIVInfo, 8> WideIVs; 598 599 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 600 Intrinsic::getName(Intrinsic::experimental_guard)); 601 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 602 603 SmallVector<PHINode *, 8> LoopPhis; 604 for (PHINode &PN : L->getHeader()->phis()) 605 LoopPhis.push_back(&PN); 606 607 // Each round of simplification iterates through the SimplifyIVUsers worklist 608 // for all current phis, then determines whether any IVs can be 609 // widened. Widening adds new phis to LoopPhis, inducing another round of 610 // simplification on the wide IVs. 611 bool Changed = false; 612 while (!LoopPhis.empty()) { 613 // Evaluate as many IV expressions as possible before widening any IVs. This 614 // forces SCEV to set no-wrap flags before evaluating sign/zero 615 // extension. The first time SCEV attempts to normalize sign/zero extension, 616 // the result becomes final. So for the most predictable results, we delay 617 // evaluation of sign/zero extend evaluation until needed, and avoid running 618 // other SCEV based analysis prior to simplifyAndExtend. 619 do { 620 PHINode *CurrIV = LoopPhis.pop_back_val(); 621 622 // Information about sign/zero extensions of CurrIV. 623 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 624 625 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 626 &Visitor); 627 628 if (Visitor.WI.WidestNativeType) { 629 WideIVs.push_back(Visitor.WI); 630 } 631 } while(!LoopPhis.empty()); 632 633 // Continue if we disallowed widening. 634 if (!WidenIndVars) 635 continue; 636 637 for (; !WideIVs.empty(); WideIVs.pop_back()) { 638 unsigned ElimExt; 639 unsigned Widened; 640 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 641 DT, DeadInsts, ElimExt, Widened, 642 HasGuards, UsePostIncrementRanges)) { 643 NumElimExt += ElimExt; 644 NumWidened += Widened; 645 Changed = true; 646 LoopPhis.push_back(WidePhi); 647 } 648 } 649 } 650 return Changed; 651 } 652 653 //===----------------------------------------------------------------------===// 654 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 655 //===----------------------------------------------------------------------===// 656 657 /// Given an Value which is hoped to be part of an add recurance in the given 658 /// loop, return the associated Phi node if so. Otherwise, return null. Note 659 /// that this is less general than SCEVs AddRec checking. 660 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 661 Instruction *IncI = dyn_cast<Instruction>(IncV); 662 if (!IncI) 663 return nullptr; 664 665 switch (IncI->getOpcode()) { 666 case Instruction::Add: 667 case Instruction::Sub: 668 break; 669 case Instruction::GetElementPtr: 670 // An IV counter must preserve its type. 671 if (IncI->getNumOperands() == 2) 672 break; 673 LLVM_FALLTHROUGH; 674 default: 675 return nullptr; 676 } 677 678 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 679 if (Phi && Phi->getParent() == L->getHeader()) { 680 if (L->isLoopInvariant(IncI->getOperand(1))) 681 return Phi; 682 return nullptr; 683 } 684 if (IncI->getOpcode() == Instruction::GetElementPtr) 685 return nullptr; 686 687 // Allow add/sub to be commuted. 688 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 689 if (Phi && Phi->getParent() == L->getHeader()) { 690 if (L->isLoopInvariant(IncI->getOperand(0))) 691 return Phi; 692 } 693 return nullptr; 694 } 695 696 /// Whether the current loop exit test is based on this value. Currently this 697 /// is limited to a direct use in the loop condition. 698 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 699 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 700 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 701 // TODO: Allow non-icmp loop test. 702 if (!ICmp) 703 return false; 704 705 // TODO: Allow indirect use. 706 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 707 } 708 709 /// linearFunctionTestReplace policy. Return true unless we can show that the 710 /// current exit test is already sufficiently canonical. 711 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 712 assert(L->getLoopLatch() && "Must be in simplified form"); 713 714 // Avoid converting a constant or loop invariant test back to a runtime 715 // test. This is critical for when SCEV's cached ExitCount is less precise 716 // than the current IR (such as after we've proven a particular exit is 717 // actually dead and thus the BE count never reaches our ExitCount.) 718 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 719 if (L->isLoopInvariant(BI->getCondition())) 720 return false; 721 722 // Do LFTR to simplify the exit condition to an ICMP. 723 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 724 if (!Cond) 725 return true; 726 727 // Do LFTR to simplify the exit ICMP to EQ/NE 728 ICmpInst::Predicate Pred = Cond->getPredicate(); 729 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 730 return true; 731 732 // Look for a loop invariant RHS 733 Value *LHS = Cond->getOperand(0); 734 Value *RHS = Cond->getOperand(1); 735 if (!L->isLoopInvariant(RHS)) { 736 if (!L->isLoopInvariant(LHS)) 737 return true; 738 std::swap(LHS, RHS); 739 } 740 // Look for a simple IV counter LHS 741 PHINode *Phi = dyn_cast<PHINode>(LHS); 742 if (!Phi) 743 Phi = getLoopPhiForCounter(LHS, L); 744 745 if (!Phi) 746 return true; 747 748 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 749 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 750 if (Idx < 0) 751 return true; 752 753 // Do LFTR if the exit condition's IV is *not* a simple counter. 754 Value *IncV = Phi->getIncomingValue(Idx); 755 return Phi != getLoopPhiForCounter(IncV, L); 756 } 757 758 /// Return true if undefined behavior would provable be executed on the path to 759 /// OnPathTo if Root produced a posion result. Note that this doesn't say 760 /// anything about whether OnPathTo is actually executed or whether Root is 761 /// actually poison. This can be used to assess whether a new use of Root can 762 /// be added at a location which is control equivalent with OnPathTo (such as 763 /// immediately before it) without introducing UB which didn't previously 764 /// exist. Note that a false result conveys no information. 765 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 766 Instruction *OnPathTo, 767 DominatorTree *DT) { 768 // Basic approach is to assume Root is poison, propagate poison forward 769 // through all users we can easily track, and then check whether any of those 770 // users are provable UB and must execute before out exiting block might 771 // exit. 772 773 // The set of all recursive users we've visited (which are assumed to all be 774 // poison because of said visit) 775 SmallSet<const Value *, 16> KnownPoison; 776 SmallVector<const Instruction*, 16> Worklist; 777 Worklist.push_back(Root); 778 while (!Worklist.empty()) { 779 const Instruction *I = Worklist.pop_back_val(); 780 781 // If we know this must trigger UB on a path leading our target. 782 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 783 return true; 784 785 // If we can't analyze propagation through this instruction, just skip it 786 // and transitive users. Safe as false is a conservative result. 787 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 788 continue; 789 790 if (KnownPoison.insert(I).second) 791 for (const User *User : I->users()) 792 Worklist.push_back(cast<Instruction>(User)); 793 } 794 795 // Might be non-UB, or might have a path we couldn't prove must execute on 796 // way to exiting bb. 797 return false; 798 } 799 800 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 801 /// down to checking that all operands are constant and listing instructions 802 /// that may hide undef. 803 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 804 unsigned Depth) { 805 if (isa<Constant>(V)) 806 return !isa<UndefValue>(V); 807 808 if (Depth >= 6) 809 return false; 810 811 // Conservatively handle non-constant non-instructions. For example, Arguments 812 // may be undef. 813 Instruction *I = dyn_cast<Instruction>(V); 814 if (!I) 815 return false; 816 817 // Load and return values may be undef. 818 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 819 return false; 820 821 // Optimistically handle other instructions. 822 for (Value *Op : I->operands()) { 823 if (!Visited.insert(Op).second) 824 continue; 825 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 826 return false; 827 } 828 return true; 829 } 830 831 /// Return true if the given value is concrete. We must prove that undef can 832 /// never reach it. 833 /// 834 /// TODO: If we decide that this is a good approach to checking for undef, we 835 /// may factor it into a common location. 836 static bool hasConcreteDef(Value *V) { 837 SmallPtrSet<Value*, 8> Visited; 838 Visited.insert(V); 839 return hasConcreteDefImpl(V, Visited, 0); 840 } 841 842 /// Return true if this IV has any uses other than the (soon to be rewritten) 843 /// loop exit test. 844 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 845 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 846 Value *IncV = Phi->getIncomingValue(LatchIdx); 847 848 for (User *U : Phi->users()) 849 if (U != Cond && U != IncV) return false; 850 851 for (User *U : IncV->users()) 852 if (U != Cond && U != Phi) return false; 853 return true; 854 } 855 856 /// Return true if the given phi is a "counter" in L. A counter is an 857 /// add recurance (of integer or pointer type) with an arbitrary start, and a 858 /// step of 1. Note that L must have exactly one latch. 859 static bool isLoopCounter(PHINode* Phi, Loop *L, 860 ScalarEvolution *SE) { 861 assert(Phi->getParent() == L->getHeader()); 862 assert(L->getLoopLatch()); 863 864 if (!SE->isSCEVable(Phi->getType())) 865 return false; 866 867 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 868 if (!AR || AR->getLoop() != L || !AR->isAffine()) 869 return false; 870 871 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 872 if (!Step || !Step->isOne()) 873 return false; 874 875 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 876 Value *IncV = Phi->getIncomingValue(LatchIdx); 877 return (getLoopPhiForCounter(IncV, L) == Phi && 878 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 879 } 880 881 /// Search the loop header for a loop counter (anadd rec w/step of one) 882 /// suitable for use by LFTR. If multiple counters are available, select the 883 /// "best" one based profitable heuristics. 884 /// 885 /// BECount may be an i8* pointer type. The pointer difference is already 886 /// valid count without scaling the address stride, so it remains a pointer 887 /// expression as far as SCEV is concerned. 888 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 889 const SCEV *BECount, 890 ScalarEvolution *SE, DominatorTree *DT) { 891 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 892 893 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 894 895 // Loop over all of the PHI nodes, looking for a simple counter. 896 PHINode *BestPhi = nullptr; 897 const SCEV *BestInit = nullptr; 898 BasicBlock *LatchBlock = L->getLoopLatch(); 899 assert(LatchBlock && "Must be in simplified form"); 900 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 901 902 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 903 PHINode *Phi = cast<PHINode>(I); 904 if (!isLoopCounter(Phi, L, SE)) 905 continue; 906 907 // Avoid comparing an integer IV against a pointer Limit. 908 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 909 continue; 910 911 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 912 913 // AR may be a pointer type, while BECount is an integer type. 914 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 915 // AR may not be a narrower type, or we may never exit. 916 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 917 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 918 continue; 919 920 // Avoid reusing a potentially undef value to compute other values that may 921 // have originally had a concrete definition. 922 if (!hasConcreteDef(Phi)) { 923 // We explicitly allow unknown phis as long as they are already used by 924 // the loop exit test. This is legal since performing LFTR could not 925 // increase the number of undef users. 926 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 927 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 928 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 929 continue; 930 } 931 932 // Avoid introducing undefined behavior due to poison which didn't exist in 933 // the original program. (Annoyingly, the rules for poison and undef 934 // propagation are distinct, so this does NOT cover the undef case above.) 935 // We have to ensure that we don't introduce UB by introducing a use on an 936 // iteration where said IV produces poison. Our strategy here differs for 937 // pointers and integer IVs. For integers, we strip and reinfer as needed, 938 // see code in linearFunctionTestReplace. For pointers, we restrict 939 // transforms as there is no good way to reinfer inbounds once lost. 940 if (!Phi->getType()->isIntegerTy() && 941 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 942 continue; 943 944 const SCEV *Init = AR->getStart(); 945 946 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 947 // Don't force a live loop counter if another IV can be used. 948 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 949 continue; 950 951 // Prefer to count-from-zero. This is a more "canonical" counter form. It 952 // also prefers integer to pointer IVs. 953 if (BestInit->isZero() != Init->isZero()) { 954 if (BestInit->isZero()) 955 continue; 956 } 957 // If two IVs both count from zero or both count from nonzero then the 958 // narrower is likely a dead phi that has been widened. Use the wider phi 959 // to allow the other to be eliminated. 960 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 961 continue; 962 } 963 BestPhi = Phi; 964 BestInit = Init; 965 } 966 return BestPhi; 967 } 968 969 /// Insert an IR expression which computes the value held by the IV IndVar 970 /// (which must be an loop counter w/unit stride) after the backedge of loop L 971 /// is taken ExitCount times. 972 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 973 const SCEV *ExitCount, bool UsePostInc, Loop *L, 974 SCEVExpander &Rewriter, ScalarEvolution *SE) { 975 assert(isLoopCounter(IndVar, L, SE)); 976 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 977 const SCEV *IVInit = AR->getStart(); 978 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 979 980 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 981 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 982 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 983 // the existing GEPs whenever possible. 984 if (IndVar->getType()->isPointerTy() && 985 !ExitCount->getType()->isPointerTy()) { 986 // IVOffset will be the new GEP offset that is interpreted by GEP as a 987 // signed value. ExitCount on the other hand represents the loop trip count, 988 // which is an unsigned value. FindLoopCounter only allows induction 989 // variables that have a positive unit stride of one. This means we don't 990 // have to handle the case of negative offsets (yet) and just need to zero 991 // extend ExitCount. 992 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 993 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 994 if (UsePostInc) 995 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 996 997 // Expand the code for the iteration count. 998 assert(SE->isLoopInvariant(IVOffset, L) && 999 "Computed iteration count is not loop invariant!"); 1000 1001 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1002 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1003 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1004 } else { 1005 // In any other case, convert both IVInit and ExitCount to integers before 1006 // comparing. This may result in SCEV expansion of pointers, but in practice 1007 // SCEV will fold the pointer arithmetic away as such: 1008 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1009 // 1010 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1011 // for simple memset-style loops. 1012 // 1013 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1014 // were generated on top of case #2, which is not expected. 1015 1016 // For unit stride, IVCount = Start + ExitCount with 2's complement 1017 // overflow. 1018 1019 // For integer IVs, truncate the IV before computing IVInit + BECount, 1020 // unless we know apriori that the limit must be a constant when evaluated 1021 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1022 // of the IV in the loop over a (potentially) expensive expansion of the 1023 // widened exit count add(zext(add)) expression. 1024 if (SE->getTypeSizeInBits(IVInit->getType()) 1025 > SE->getTypeSizeInBits(ExitCount->getType())) { 1026 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1027 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1028 else 1029 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1030 } 1031 1032 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1033 1034 if (UsePostInc) 1035 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1036 1037 // Expand the code for the iteration count. 1038 assert(SE->isLoopInvariant(IVLimit, L) && 1039 "Computed iteration count is not loop invariant!"); 1040 // Ensure that we generate the same type as IndVar, or a smaller integer 1041 // type. In the presence of null pointer values, we have an integer type 1042 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1043 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1044 IndVar->getType() : ExitCount->getType(); 1045 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1046 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1047 } 1048 } 1049 1050 /// This method rewrites the exit condition of the loop to be a canonical != 1051 /// comparison against the incremented loop induction variable. This pass is 1052 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1053 /// determine a loop-invariant trip count of the loop, which is actually a much 1054 /// broader range than just linear tests. 1055 bool IndVarSimplify:: 1056 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1057 const SCEV *ExitCount, 1058 PHINode *IndVar, SCEVExpander &Rewriter) { 1059 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1060 assert(isLoopCounter(IndVar, L, SE)); 1061 Instruction * const IncVar = 1062 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1063 1064 // Initialize CmpIndVar to the preincremented IV. 1065 Value *CmpIndVar = IndVar; 1066 bool UsePostInc = false; 1067 1068 // If the exiting block is the same as the backedge block, we prefer to 1069 // compare against the post-incremented value, otherwise we must compare 1070 // against the preincremented value. 1071 if (ExitingBB == L->getLoopLatch()) { 1072 // For pointer IVs, we chose to not strip inbounds which requires us not 1073 // to add a potentially UB introducing use. We need to either a) show 1074 // the loop test we're modifying is already in post-inc form, or b) show 1075 // that adding a use must not introduce UB. 1076 bool SafeToPostInc = 1077 IndVar->getType()->isIntegerTy() || 1078 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1079 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1080 if (SafeToPostInc) { 1081 UsePostInc = true; 1082 CmpIndVar = IncVar; 1083 } 1084 } 1085 1086 // It may be necessary to drop nowrap flags on the incrementing instruction 1087 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1088 // case the increment might have previously been poison on the last iteration 1089 // only) or if LFTR switches to a different IV that was previously dynamically 1090 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1091 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1092 // check), because the pre-inc addrec flags may be adopted from the original 1093 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1094 // TODO: This handling is inaccurate for one case: If we switch to a 1095 // dynamically dead IV that wraps on the first loop iteration only, which is 1096 // not covered by the post-inc addrec. (If the new IV was not dynamically 1097 // dead, it could not be poison on the first iteration in the first place.) 1098 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1099 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1100 if (BO->hasNoUnsignedWrap()) 1101 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1102 if (BO->hasNoSignedWrap()) 1103 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1104 } 1105 1106 Value *ExitCnt = genLoopLimit( 1107 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1108 assert(ExitCnt->getType()->isPointerTy() == 1109 IndVar->getType()->isPointerTy() && 1110 "genLoopLimit missed a cast"); 1111 1112 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1113 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1114 ICmpInst::Predicate P; 1115 if (L->contains(BI->getSuccessor(0))) 1116 P = ICmpInst::ICMP_NE; 1117 else 1118 P = ICmpInst::ICMP_EQ; 1119 1120 IRBuilder<> Builder(BI); 1121 1122 // The new loop exit condition should reuse the debug location of the 1123 // original loop exit condition. 1124 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1125 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1126 1127 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1128 // avoid the expensive expansion of the limit expression in the wider type, 1129 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1130 // since we know (from the exit count bitwidth), that we can't self-wrap in 1131 // the narrower type. 1132 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1133 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1134 if (CmpIndVarSize > ExitCntSize) { 1135 assert(!CmpIndVar->getType()->isPointerTy() && 1136 !ExitCnt->getType()->isPointerTy()); 1137 1138 // Before resorting to actually inserting the truncate, use the same 1139 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1140 // the other side of the comparison instead. We still evaluate the limit 1141 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1142 // a truncate within in. 1143 bool Extended = false; 1144 const SCEV *IV = SE->getSCEV(CmpIndVar); 1145 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1146 ExitCnt->getType()); 1147 const SCEV *ZExtTrunc = 1148 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1149 1150 if (ZExtTrunc == IV) { 1151 Extended = true; 1152 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1153 "wide.trip.count"); 1154 } else { 1155 const SCEV *SExtTrunc = 1156 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1157 if (SExtTrunc == IV) { 1158 Extended = true; 1159 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1160 "wide.trip.count"); 1161 } 1162 } 1163 1164 if (Extended) { 1165 bool Discard; 1166 L->makeLoopInvariant(ExitCnt, Discard); 1167 } else 1168 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1169 "lftr.wideiv"); 1170 } 1171 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1172 << " LHS:" << *CmpIndVar << '\n' 1173 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1174 << "\n" 1175 << " RHS:\t" << *ExitCnt << "\n" 1176 << "ExitCount:\t" << *ExitCount << "\n" 1177 << " was: " << *BI->getCondition() << "\n"); 1178 1179 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1180 Value *OrigCond = BI->getCondition(); 1181 // It's tempting to use replaceAllUsesWith here to fully replace the old 1182 // comparison, but that's not immediately safe, since users of the old 1183 // comparison may not be dominated by the new comparison. Instead, just 1184 // update the branch to use the new comparison; in the common case this 1185 // will make old comparison dead. 1186 BI->setCondition(Cond); 1187 DeadInsts.emplace_back(OrigCond); 1188 1189 ++NumLFTR; 1190 return true; 1191 } 1192 1193 //===----------------------------------------------------------------------===// 1194 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1195 //===----------------------------------------------------------------------===// 1196 1197 /// If there's a single exit block, sink any loop-invariant values that 1198 /// were defined in the preheader but not used inside the loop into the 1199 /// exit block to reduce register pressure in the loop. 1200 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1201 BasicBlock *ExitBlock = L->getExitBlock(); 1202 if (!ExitBlock) return false; 1203 1204 BasicBlock *Preheader = L->getLoopPreheader(); 1205 if (!Preheader) return false; 1206 1207 bool MadeAnyChanges = false; 1208 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1209 BasicBlock::iterator I(Preheader->getTerminator()); 1210 while (I != Preheader->begin()) { 1211 --I; 1212 // New instructions were inserted at the end of the preheader. 1213 if (isa<PHINode>(I)) 1214 break; 1215 1216 // Don't move instructions which might have side effects, since the side 1217 // effects need to complete before instructions inside the loop. Also don't 1218 // move instructions which might read memory, since the loop may modify 1219 // memory. Note that it's okay if the instruction might have undefined 1220 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1221 // block. 1222 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1223 continue; 1224 1225 // Skip debug info intrinsics. 1226 if (isa<DbgInfoIntrinsic>(I)) 1227 continue; 1228 1229 // Skip eh pad instructions. 1230 if (I->isEHPad()) 1231 continue; 1232 1233 // Don't sink alloca: we never want to sink static alloca's out of the 1234 // entry block, and correctly sinking dynamic alloca's requires 1235 // checks for stacksave/stackrestore intrinsics. 1236 // FIXME: Refactor this check somehow? 1237 if (isa<AllocaInst>(I)) 1238 continue; 1239 1240 // Determine if there is a use in or before the loop (direct or 1241 // otherwise). 1242 bool UsedInLoop = false; 1243 for (Use &U : I->uses()) { 1244 Instruction *User = cast<Instruction>(U.getUser()); 1245 BasicBlock *UseBB = User->getParent(); 1246 if (PHINode *P = dyn_cast<PHINode>(User)) { 1247 unsigned i = 1248 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1249 UseBB = P->getIncomingBlock(i); 1250 } 1251 if (UseBB == Preheader || L->contains(UseBB)) { 1252 UsedInLoop = true; 1253 break; 1254 } 1255 } 1256 1257 // If there is, the def must remain in the preheader. 1258 if (UsedInLoop) 1259 continue; 1260 1261 // Otherwise, sink it to the exit block. 1262 Instruction *ToMove = &*I; 1263 bool Done = false; 1264 1265 if (I != Preheader->begin()) { 1266 // Skip debug info intrinsics. 1267 do { 1268 --I; 1269 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1270 1271 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1272 Done = true; 1273 } else { 1274 Done = true; 1275 } 1276 1277 MadeAnyChanges = true; 1278 ToMove->moveBefore(*ExitBlock, InsertPt); 1279 if (Done) break; 1280 InsertPt = ToMove->getIterator(); 1281 } 1282 1283 return MadeAnyChanges; 1284 } 1285 1286 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1287 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1288 auto *OldCond = BI->getCondition(); 1289 BI->setCondition(NewCond); 1290 if (OldCond->use_empty()) 1291 DeadInsts.emplace_back(OldCond); 1292 } 1293 1294 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1295 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1296 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1297 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1298 auto *OldCond = BI->getCondition(); 1299 auto *NewCond = 1300 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1301 replaceExitCond(BI, NewCond, DeadInsts); 1302 } 1303 1304 static void replaceLoopPHINodesWithPreheaderValues( 1305 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1306 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1307 auto *LoopPreheader = L->getLoopPreheader(); 1308 auto *LoopHeader = L->getHeader(); 1309 for (auto &PN : LoopHeader->phis()) { 1310 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1311 PN.replaceAllUsesWith(PreheaderIncoming); 1312 DeadInsts.emplace_back(&PN); 1313 } 1314 } 1315 1316 static void replaceWithInvariantCond( 1317 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1318 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1319 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1320 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1321 Rewriter.setInsertPoint(BI); 1322 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1323 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1324 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1325 if (ExitIfTrue) 1326 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1327 IRBuilder<> Builder(BI); 1328 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1329 BI->getCondition()->getName()); 1330 replaceExitCond(BI, NewCond, DeadInsts); 1331 } 1332 1333 static bool optimizeLoopExitWithUnknownExitCount( 1334 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1335 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1336 ScalarEvolution *SE, SCEVExpander &Rewriter, 1337 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1338 ICmpInst::Predicate Pred; 1339 Value *LHS, *RHS; 1340 BasicBlock *TrueSucc, *FalseSucc; 1341 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1342 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1343 return false; 1344 1345 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1346 "Not a loop exit!"); 1347 1348 // 'LHS pred RHS' should now mean that we stay in loop. 1349 if (L->contains(FalseSucc)) 1350 Pred = CmpInst::getInversePredicate(Pred); 1351 1352 // If we are proving loop exit, invert the predicate. 1353 if (Inverted) 1354 Pred = CmpInst::getInversePredicate(Pred); 1355 1356 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1357 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1358 // Can we prove it to be trivially true? 1359 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1360 foldExit(L, ExitingBB, Inverted, DeadInsts); 1361 return true; 1362 } 1363 // Further logic works for non-inverted condition only. 1364 if (Inverted) 1365 return false; 1366 1367 auto *ARTy = LHSS->getType(); 1368 auto *MaxIterTy = MaxIter->getType(); 1369 // If possible, adjust types. 1370 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1371 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1372 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1373 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1374 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1375 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1376 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1377 } 1378 1379 if (SkipLastIter) { 1380 const SCEV *One = SE->getOne(MaxIter->getType()); 1381 MaxIter = SE->getMinusSCEV(MaxIter, One); 1382 } 1383 1384 // Check if there is a loop-invariant predicate equivalent to our check. 1385 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1386 L, BI, MaxIter); 1387 if (!LIP) 1388 return false; 1389 1390 // Can we prove it to be trivially true? 1391 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1392 foldExit(L, ExitingBB, Inverted, DeadInsts); 1393 else 1394 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1395 Rewriter, DeadInsts); 1396 1397 return true; 1398 } 1399 1400 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1401 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1402 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1403 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1404 // it already has flags. The alternative to this would be to extending the 1405 // set of "interesting" IV users to include the icmp, but doing that 1406 // regresses results in practice by querying SCEVs before trip counts which 1407 // rely on them which results in SCEV caching sub-optimal answers. The 1408 // concern about caching sub-optimal results is why we only query SCEVs of 1409 // the loop invariant RHS here. 1410 SmallVector<BasicBlock*, 16> ExitingBlocks; 1411 L->getExitingBlocks(ExitingBlocks); 1412 bool Changed = false; 1413 for (auto *ExitingBB : ExitingBlocks) { 1414 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1415 if (!BI) 1416 continue; 1417 assert(BI->isConditional() && "exit branch must be conditional"); 1418 1419 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1420 if (!ICmp || !ICmp->hasOneUse()) 1421 continue; 1422 1423 auto *LHS = ICmp->getOperand(0); 1424 auto *RHS = ICmp->getOperand(1); 1425 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1426 // poisoning cache with sub-optimal results. For the must-execute case, 1427 // this is a neccessary precondition for correctness. 1428 if (!L->isLoopInvariant(RHS)) { 1429 if (!L->isLoopInvariant(LHS)) 1430 continue; 1431 // Same logic applies for the inverse case 1432 std::swap(LHS, RHS); 1433 } 1434 1435 // Match (icmp signed-cond zext, RHS) 1436 Value *LHSOp = nullptr; 1437 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1438 continue; 1439 1440 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1441 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1442 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1443 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1444 FullCR = FullCR.zeroExtend(OuterBitWidth); 1445 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1446 if (FullCR.contains(RHSCR)) { 1447 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1448 // replace the signed condition with the unsigned version. 1449 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1450 Changed = true; 1451 // Note: No SCEV invalidation needed. We've changed the predicate, but 1452 // have not changed exit counts, or the values produced by the compare. 1453 continue; 1454 } 1455 } 1456 1457 // Now that we've canonicalized the condition to match the extend, 1458 // see if we can rotate the extend out of the loop. 1459 for (auto *ExitingBB : ExitingBlocks) { 1460 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1461 if (!BI) 1462 continue; 1463 assert(BI->isConditional() && "exit branch must be conditional"); 1464 1465 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1466 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1467 continue; 1468 1469 bool Swapped = false; 1470 auto *LHS = ICmp->getOperand(0); 1471 auto *RHS = ICmp->getOperand(1); 1472 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1473 // Nothing to rotate 1474 continue; 1475 if (L->isLoopInvariant(LHS)) { 1476 // Same logic applies for the inverse case until we actually pick 1477 // which operand of the compare to update. 1478 Swapped = true; 1479 std::swap(LHS, RHS); 1480 } 1481 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1482 1483 // Match (icmp unsigned-cond zext, RHS) 1484 // TODO: Extend to handle corresponding sext/signed-cmp case 1485 // TODO: Extend to other invertible functions 1486 Value *LHSOp = nullptr; 1487 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1488 continue; 1489 1490 // In general, we only rotate if we can do so without increasing the number 1491 // of instructions. The exception is when we have an zext(add-rec). The 1492 // reason for allowing this exception is that we know we need to get rid 1493 // of the zext for SCEV to be able to compute a trip count for said loops; 1494 // we consider the new trip count valuable enough to increase instruction 1495 // count by one. 1496 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1497 continue; 1498 1499 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1500 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1501 // when zext is loop varying and RHS is loop invariant. This converts 1502 // loop varying work to loop-invariant work. 1503 auto doRotateTransform = [&]() { 1504 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1505 auto *NewRHS = 1506 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1507 L->getLoopPreheader()->getTerminator()); 1508 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1509 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1510 if (LHS->use_empty()) 1511 DeadInsts.push_back(LHS); 1512 }; 1513 1514 1515 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1516 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1517 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1518 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1519 FullCR = FullCR.zeroExtend(OuterBitWidth); 1520 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1521 if (FullCR.contains(RHSCR)) { 1522 doRotateTransform(); 1523 Changed = true; 1524 // Note, we are leaving SCEV in an unfortunately imprecise case here 1525 // as rotation tends to reveal information about trip counts not 1526 // previously visible. 1527 continue; 1528 } 1529 } 1530 1531 return Changed; 1532 } 1533 1534 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1535 SmallVector<BasicBlock*, 16> ExitingBlocks; 1536 L->getExitingBlocks(ExitingBlocks); 1537 1538 // Remove all exits which aren't both rewriteable and execute on every 1539 // iteration. 1540 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1541 // If our exitting block exits multiple loops, we can only rewrite the 1542 // innermost one. Otherwise, we're changing how many times the innermost 1543 // loop runs before it exits. 1544 if (LI->getLoopFor(ExitingBB) != L) 1545 return true; 1546 1547 // Can't rewrite non-branch yet. 1548 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1549 if (!BI) 1550 return true; 1551 1552 // If already constant, nothing to do. 1553 if (isa<Constant>(BI->getCondition())) 1554 return true; 1555 1556 // Likewise, the loop latch must be dominated by the exiting BB. 1557 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1558 return true; 1559 1560 return false; 1561 }); 1562 1563 if (ExitingBlocks.empty()) 1564 return false; 1565 1566 // Get a symbolic upper bound on the loop backedge taken count. 1567 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1568 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1569 return false; 1570 1571 // Visit our exit blocks in order of dominance. We know from the fact that 1572 // all exits must dominate the latch, so there is a total dominance order 1573 // between them. 1574 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1575 // std::sort sorts in ascending order, so we want the inverse of 1576 // the normal dominance relation. 1577 if (A == B) return false; 1578 if (DT->properlyDominates(A, B)) 1579 return true; 1580 else { 1581 assert(DT->properlyDominates(B, A) && 1582 "expected total dominance order!"); 1583 return false; 1584 } 1585 }); 1586 #ifdef ASSERT 1587 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1588 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1589 } 1590 #endif 1591 1592 bool Changed = false; 1593 bool SkipLastIter = false; 1594 SmallSet<const SCEV*, 8> DominatingExitCounts; 1595 for (BasicBlock *ExitingBB : ExitingBlocks) { 1596 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1597 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1598 // Okay, we do not know the exit count here. Can we at least prove that it 1599 // will remain the same within iteration space? 1600 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1601 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1602 return optimizeLoopExitWithUnknownExitCount( 1603 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1604 Rewriter, DeadInsts); 1605 }; 1606 1607 // TODO: We might have proved that we can skip the last iteration for 1608 // this check. In this case, we only want to check the condition on the 1609 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1610 // corner case: 1611 // 1612 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1613 // 1614 // If we could not prove that len != 0, then we also could not prove that 1615 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1616 // OptimizeCond will likely not prove anything for it, even if it could 1617 // prove the same fact for len. 1618 // 1619 // As a temporary solution, we query both last and pre-last iterations in 1620 // hope that we will be able to prove triviality for at least one of 1621 // them. We can stop querying MaxExitCount for this case once SCEV 1622 // understands that (MaxExitCount - 1) will not overflow here. 1623 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1624 Changed = true; 1625 else if (SkipLastIter) 1626 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1627 Changed = true; 1628 continue; 1629 } 1630 1631 if (MaxExitCount == ExitCount) 1632 // If the loop has more than 1 iteration, all further checks will be 1633 // executed 1 iteration less. 1634 SkipLastIter = true; 1635 1636 // If we know we'd exit on the first iteration, rewrite the exit to 1637 // reflect this. This does not imply the loop must exit through this 1638 // exit; there may be an earlier one taken on the first iteration. 1639 // We know that the backedge can't be taken, so we replace all 1640 // the header PHIs with values coming from the preheader. 1641 if (ExitCount->isZero()) { 1642 foldExit(L, ExitingBB, true, DeadInsts); 1643 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1644 Changed = true; 1645 continue; 1646 } 1647 1648 assert(ExitCount->getType()->isIntegerTy() && 1649 MaxExitCount->getType()->isIntegerTy() && 1650 "Exit counts must be integers"); 1651 1652 Type *WiderType = 1653 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1654 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1655 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1656 assert(MaxExitCount->getType() == ExitCount->getType()); 1657 1658 // Can we prove that some other exit must be taken strictly before this 1659 // one? 1660 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1661 MaxExitCount, ExitCount)) { 1662 foldExit(L, ExitingBB, false, DeadInsts); 1663 Changed = true; 1664 continue; 1665 } 1666 1667 // As we run, keep track of which exit counts we've encountered. If we 1668 // find a duplicate, we've found an exit which would have exited on the 1669 // exiting iteration, but (from the visit order) strictly follows another 1670 // which does the same and is thus dead. 1671 if (!DominatingExitCounts.insert(ExitCount).second) { 1672 foldExit(L, ExitingBB, false, DeadInsts); 1673 Changed = true; 1674 continue; 1675 } 1676 1677 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1678 // here. If we kept track of the min of dominanting exits so far, we could 1679 // discharge exits with EC >= MDEC. This is less powerful than the existing 1680 // transform (since later exits aren't considered), but potentially more 1681 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1682 // or a >u b. Such a case is not currently known. 1683 } 1684 return Changed; 1685 } 1686 1687 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1688 SmallVector<BasicBlock*, 16> ExitingBlocks; 1689 L->getExitingBlocks(ExitingBlocks); 1690 1691 // Finally, see if we can rewrite our exit conditions into a loop invariant 1692 // form. If we have a read-only loop, and we can tell that we must exit down 1693 // a path which does not need any of the values computed within the loop, we 1694 // can rewrite the loop to exit on the first iteration. Note that this 1695 // doesn't either a) tell us the loop exits on the first iteration (unless 1696 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1697 // This transformation looks a lot like a restricted form of dead loop 1698 // elimination, but restricted to read-only loops and without neccesssarily 1699 // needing to kill the loop entirely. 1700 if (!LoopPredication) 1701 return false; 1702 1703 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1704 // through *explicit* control flow. We have to eliminate the possibility of 1705 // implicit exits (see below) before we know it's truly exact. 1706 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1707 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1708 return false; 1709 1710 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1711 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1712 1713 auto BadExit = [&](BasicBlock *ExitingBB) { 1714 // If our exiting block exits multiple loops, we can only rewrite the 1715 // innermost one. Otherwise, we're changing how many times the innermost 1716 // loop runs before it exits. 1717 if (LI->getLoopFor(ExitingBB) != L) 1718 return true; 1719 1720 // Can't rewrite non-branch yet. 1721 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1722 if (!BI) 1723 return true; 1724 1725 // If already constant, nothing to do. 1726 if (isa<Constant>(BI->getCondition())) 1727 return true; 1728 1729 // If the exit block has phis, we need to be able to compute the values 1730 // within the loop which contains them. This assumes trivially lcssa phis 1731 // have already been removed; TODO: generalize 1732 BasicBlock *ExitBlock = 1733 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1734 if (!ExitBlock->phis().empty()) 1735 return true; 1736 1737 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1738 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1739 return true; 1740 1741 assert(SE->isLoopInvariant(ExitCount, L) && 1742 "Exit count must be loop invariant"); 1743 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1744 return false; 1745 }; 1746 1747 // If we have any exits which can't be predicated themselves, than we can't 1748 // predicate any exit which isn't guaranteed to execute before it. Consider 1749 // two exits (a) and (b) which would both exit on the same iteration. If we 1750 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1751 // we could convert a loop from exiting through (a) to one exiting through 1752 // (b). Note that this problem exists only for exits with the same exit 1753 // count, and we could be more aggressive when exit counts are known inequal. 1754 llvm::sort(ExitingBlocks, 1755 [&](BasicBlock *A, BasicBlock *B) { 1756 // std::sort sorts in ascending order, so we want the inverse of 1757 // the normal dominance relation, plus a tie breaker for blocks 1758 // unordered by dominance. 1759 if (DT->properlyDominates(A, B)) return true; 1760 if (DT->properlyDominates(B, A)) return false; 1761 return A->getName() < B->getName(); 1762 }); 1763 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1764 // exits before the backedge). If they aren't, reasoning about reachability 1765 // is complicated and we choose not to for now. 1766 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1767 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1768 return false; 1769 1770 // Given our sorted total order, we know that exit[j] must be evaluated 1771 // after all exit[i] such j > i. 1772 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1773 if (BadExit(ExitingBlocks[i])) { 1774 ExitingBlocks.resize(i); 1775 break; 1776 } 1777 1778 if (ExitingBlocks.empty()) 1779 return false; 1780 1781 // We rely on not being able to reach an exiting block on a later iteration 1782 // then it's statically compute exit count. The implementaton of 1783 // getExitCount currently has this invariant, but assert it here so that 1784 // breakage is obvious if this ever changes.. 1785 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1786 return DT->dominates(ExitingBB, L->getLoopLatch()); 1787 })); 1788 1789 // At this point, ExitingBlocks consists of only those blocks which are 1790 // predicatable. Given that, we know we have at least one exit we can 1791 // predicate if the loop is doesn't have side effects and doesn't have any 1792 // implicit exits (because then our exact BTC isn't actually exact). 1793 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1794 // suggestions on how to improve this? I can obviously bail out for outer 1795 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1796 // is that enough for *all* side effects? 1797 for (BasicBlock *BB : L->blocks()) 1798 for (auto &I : *BB) 1799 // TODO:isGuaranteedToTransfer 1800 if (I.mayHaveSideEffects()) 1801 return false; 1802 1803 bool Changed = false; 1804 // Finally, do the actual predication for all predicatable blocks. A couple 1805 // of notes here: 1806 // 1) We don't bother to constant fold dominated exits with identical exit 1807 // counts; that's simply a form of CSE/equality propagation and we leave 1808 // it for dedicated passes. 1809 // 2) We insert the comparison at the branch. Hoisting introduces additional 1810 // legality constraints and we leave that to dedicated logic. We want to 1811 // predicate even if we can't insert a loop invariant expression as 1812 // peeling or unrolling will likely reduce the cost of the otherwise loop 1813 // varying check. 1814 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1815 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1816 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1817 for (BasicBlock *ExitingBB : ExitingBlocks) { 1818 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1819 1820 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1821 Value *NewCond; 1822 if (ExitCount == ExactBTC) { 1823 NewCond = L->contains(BI->getSuccessor(0)) ? 1824 B.getFalse() : B.getTrue(); 1825 } else { 1826 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1827 if (!ExactBTCV) 1828 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1829 Value *RHS = ExactBTCV; 1830 if (ECV->getType() != RHS->getType()) { 1831 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1832 ECV = B.CreateZExt(ECV, WiderTy); 1833 RHS = B.CreateZExt(RHS, WiderTy); 1834 } 1835 auto Pred = L->contains(BI->getSuccessor(0)) ? 1836 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1837 NewCond = B.CreateICmp(Pred, ECV, RHS); 1838 } 1839 Value *OldCond = BI->getCondition(); 1840 BI->setCondition(NewCond); 1841 if (OldCond->use_empty()) 1842 DeadInsts.emplace_back(OldCond); 1843 Changed = true; 1844 } 1845 1846 return Changed; 1847 } 1848 1849 //===----------------------------------------------------------------------===// 1850 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1851 //===----------------------------------------------------------------------===// 1852 1853 bool IndVarSimplify::run(Loop *L) { 1854 // We need (and expect!) the incoming loop to be in LCSSA. 1855 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1856 "LCSSA required to run indvars!"); 1857 1858 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1859 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1860 // canonicalization can be a pessimization without LSR to "clean up" 1861 // afterwards. 1862 // - We depend on having a preheader; in particular, 1863 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1864 // and we're in trouble if we can't find the induction variable even when 1865 // we've manually inserted one. 1866 // - LFTR relies on having a single backedge. 1867 if (!L->isLoopSimplifyForm()) 1868 return false; 1869 1870 #ifndef NDEBUG 1871 // Used below for a consistency check only 1872 // Note: Since the result returned by ScalarEvolution may depend on the order 1873 // in which previous results are added to its cache, the call to 1874 // getBackedgeTakenCount() may change following SCEV queries. 1875 const SCEV *BackedgeTakenCount; 1876 if (VerifyIndvars) 1877 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1878 #endif 1879 1880 bool Changed = false; 1881 // If there are any floating-point recurrences, attempt to 1882 // transform them to use integer recurrences. 1883 Changed |= rewriteNonIntegerIVs(L); 1884 1885 // Create a rewriter object which we'll use to transform the code with. 1886 SCEVExpander Rewriter(*SE, DL, "indvars"); 1887 #ifndef NDEBUG 1888 Rewriter.setDebugType(DEBUG_TYPE); 1889 #endif 1890 1891 // Eliminate redundant IV users. 1892 // 1893 // Simplification works best when run before other consumers of SCEV. We 1894 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1895 // other expressions involving loop IVs have been evaluated. This helps SCEV 1896 // set no-wrap flags before normalizing sign/zero extension. 1897 Rewriter.disableCanonicalMode(); 1898 Changed |= simplifyAndExtend(L, Rewriter, LI); 1899 1900 // Check to see if we can compute the final value of any expressions 1901 // that are recurrent in the loop, and substitute the exit values from the 1902 // loop into any instructions outside of the loop that use the final values 1903 // of the current expressions. 1904 if (ReplaceExitValue != NeverRepl) { 1905 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1906 ReplaceExitValue, DeadInsts)) { 1907 NumReplaced += Rewrites; 1908 Changed = true; 1909 } 1910 } 1911 1912 // Eliminate redundant IV cycles. 1913 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1914 1915 // Try to convert exit conditions to unsigned and rotate computation 1916 // out of the loop. Note: Handles invalidation internally if needed. 1917 Changed |= canonicalizeExitCondition(L); 1918 1919 // Try to eliminate loop exits based on analyzeable exit counts 1920 if (optimizeLoopExits(L, Rewriter)) { 1921 Changed = true; 1922 // Given we've changed exit counts, notify SCEV 1923 // Some nested loops may share same folded exit basic block, 1924 // thus we need to notify top most loop. 1925 SE->forgetTopmostLoop(L); 1926 } 1927 1928 // Try to form loop invariant tests for loop exits by changing how many 1929 // iterations of the loop run when that is unobservable. 1930 if (predicateLoopExits(L, Rewriter)) { 1931 Changed = true; 1932 // Given we've changed exit counts, notify SCEV 1933 SE->forgetLoop(L); 1934 } 1935 1936 // If we have a trip count expression, rewrite the loop's exit condition 1937 // using it. 1938 if (!DisableLFTR) { 1939 BasicBlock *PreHeader = L->getLoopPreheader(); 1940 1941 SmallVector<BasicBlock*, 16> ExitingBlocks; 1942 L->getExitingBlocks(ExitingBlocks); 1943 for (BasicBlock *ExitingBB : ExitingBlocks) { 1944 // Can't rewrite non-branch yet. 1945 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1946 continue; 1947 1948 // If our exitting block exits multiple loops, we can only rewrite the 1949 // innermost one. Otherwise, we're changing how many times the innermost 1950 // loop runs before it exits. 1951 if (LI->getLoopFor(ExitingBB) != L) 1952 continue; 1953 1954 if (!needsLFTR(L, ExitingBB)) 1955 continue; 1956 1957 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1958 if (isa<SCEVCouldNotCompute>(ExitCount)) 1959 continue; 1960 1961 // This was handled above, but as we form SCEVs, we can sometimes refine 1962 // existing ones; this allows exit counts to be folded to zero which 1963 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1964 // until stable to handle cases like this better. 1965 if (ExitCount->isZero()) 1966 continue; 1967 1968 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1969 if (!IndVar) 1970 continue; 1971 1972 // Avoid high cost expansions. Note: This heuristic is questionable in 1973 // that our definition of "high cost" is not exactly principled. 1974 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1975 TTI, PreHeader->getTerminator())) 1976 continue; 1977 1978 // Check preconditions for proper SCEVExpander operation. SCEV does not 1979 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1980 // any pass that uses the SCEVExpander must do it. This does not work 1981 // well for loop passes because SCEVExpander makes assumptions about 1982 // all loops, while LoopPassManager only forces the current loop to be 1983 // simplified. 1984 // 1985 // FIXME: SCEV expansion has no way to bail out, so the caller must 1986 // explicitly check any assumptions made by SCEV. Brittle. 1987 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1988 if (!AR || AR->getLoop()->getLoopPreheader()) 1989 Changed |= linearFunctionTestReplace(L, ExitingBB, 1990 ExitCount, IndVar, 1991 Rewriter); 1992 } 1993 } 1994 // Clear the rewriter cache, because values that are in the rewriter's cache 1995 // can be deleted in the loop below, causing the AssertingVH in the cache to 1996 // trigger. 1997 Rewriter.clear(); 1998 1999 // Now that we're done iterating through lists, clean up any instructions 2000 // which are now dead. 2001 while (!DeadInsts.empty()) { 2002 Value *V = DeadInsts.pop_back_val(); 2003 2004 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2005 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2006 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2007 Changed |= 2008 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2009 } 2010 2011 // The Rewriter may not be used from this point on. 2012 2013 // Loop-invariant instructions in the preheader that aren't used in the 2014 // loop may be sunk below the loop to reduce register pressure. 2015 Changed |= sinkUnusedInvariants(L); 2016 2017 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2018 // trip count and therefore can further simplify exit values in addition to 2019 // rewriteLoopExitValues. 2020 Changed |= rewriteFirstIterationLoopExitValues(L); 2021 2022 // Clean up dead instructions. 2023 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2024 2025 // Check a post-condition. 2026 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2027 "Indvars did not preserve LCSSA!"); 2028 2029 // Verify that LFTR, and any other change have not interfered with SCEV's 2030 // ability to compute trip count. We may have *changed* the exit count, but 2031 // only by reducing it. 2032 #ifndef NDEBUG 2033 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2034 SE->forgetLoop(L); 2035 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2036 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2037 SE->getTypeSizeInBits(NewBECount->getType())) 2038 NewBECount = SE->getTruncateOrNoop(NewBECount, 2039 BackedgeTakenCount->getType()); 2040 else 2041 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2042 NewBECount->getType()); 2043 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2044 NewBECount) && "indvars must preserve SCEV"); 2045 } 2046 if (VerifyMemorySSA && MSSAU) 2047 MSSAU->getMemorySSA()->verifyMemorySSA(); 2048 #endif 2049 2050 return Changed; 2051 } 2052 2053 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2054 LoopStandardAnalysisResults &AR, 2055 LPMUpdater &) { 2056 Function *F = L.getHeader()->getParent(); 2057 const DataLayout &DL = F->getParent()->getDataLayout(); 2058 2059 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2060 WidenIndVars && AllowIVWidening); 2061 if (!IVS.run(&L)) 2062 return PreservedAnalyses::all(); 2063 2064 auto PA = getLoopPassPreservedAnalyses(); 2065 PA.preserveSet<CFGAnalyses>(); 2066 if (AR.MSSA) 2067 PA.preserve<MemorySSAAnalysis>(); 2068 return PA; 2069 } 2070 2071 namespace { 2072 2073 struct IndVarSimplifyLegacyPass : public LoopPass { 2074 static char ID; // Pass identification, replacement for typeid 2075 2076 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2077 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2078 } 2079 2080 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2081 if (skipLoop(L)) 2082 return false; 2083 2084 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2085 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2086 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2087 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2088 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2089 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2090 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2091 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2092 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2093 MemorySSA *MSSA = nullptr; 2094 if (MSSAAnalysis) 2095 MSSA = &MSSAAnalysis->getMSSA(); 2096 2097 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2098 return IVS.run(L); 2099 } 2100 2101 void getAnalysisUsage(AnalysisUsage &AU) const override { 2102 AU.setPreservesCFG(); 2103 AU.addPreserved<MemorySSAWrapperPass>(); 2104 getLoopAnalysisUsage(AU); 2105 } 2106 }; 2107 2108 } // end anonymous namespace 2109 2110 char IndVarSimplifyLegacyPass::ID = 0; 2111 2112 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2113 "Induction Variable Simplification", false, false) 2114 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2115 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2116 "Induction Variable Simplification", false, false) 2117 2118 Pass *llvm::createIndVarSimplifyPass() { 2119 return new IndVarSimplifyLegacyPass(); 2120 } 2121