1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Optional.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/ADT/iterator_range.h" 36 #include "llvm/Analysis/LoopInfo.h" 37 #include "llvm/Analysis/LoopPass.h" 38 #include "llvm/Analysis/MemorySSA.h" 39 #include "llvm/Analysis/MemorySSAUpdater.h" 40 #include "llvm/Analysis/ScalarEvolution.h" 41 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 42 #include "llvm/Analysis/TargetLibraryInfo.h" 43 #include "llvm/Analysis/TargetTransformInfo.h" 44 #include "llvm/Analysis/ValueTracking.h" 45 #include "llvm/IR/BasicBlock.h" 46 #include "llvm/IR/Constant.h" 47 #include "llvm/IR/ConstantRange.h" 48 #include "llvm/IR/Constants.h" 49 #include "llvm/IR/DataLayout.h" 50 #include "llvm/IR/DerivedTypes.h" 51 #include "llvm/IR/Dominators.h" 52 #include "llvm/IR/Function.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/InstrTypes.h" 55 #include "llvm/IR/Instruction.h" 56 #include "llvm/IR/Instructions.h" 57 #include "llvm/IR/IntrinsicInst.h" 58 #include "llvm/IR/Intrinsics.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/Operator.h" 61 #include "llvm/IR/PassManager.h" 62 #include "llvm/IR/PatternMatch.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/Use.h" 65 #include "llvm/IR/User.h" 66 #include "llvm/IR/Value.h" 67 #include "llvm/IR/ValueHandle.h" 68 #include "llvm/InitializePasses.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/Debug.h" 74 #include "llvm/Support/MathExtras.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Transforms/Scalar.h" 77 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 78 #include "llvm/Transforms/Utils/Local.h" 79 #include "llvm/Transforms/Utils/LoopUtils.h" 80 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 81 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 82 #include <cassert> 83 #include <cstdint> 84 #include <utility> 85 86 using namespace llvm; 87 using namespace PatternMatch; 88 89 #define DEBUG_TYPE "indvars" 90 91 STATISTIC(NumWidened , "Number of indvars widened"); 92 STATISTIC(NumReplaced , "Number of exit values replaced"); 93 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 94 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 95 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 96 97 // Trip count verification can be enabled by default under NDEBUG if we 98 // implement a strong expression equivalence checker in SCEV. Until then, we 99 // use the verify-indvars flag, which may assert in some cases. 100 static cl::opt<bool> VerifyIndvars( 101 "verify-indvars", cl::Hidden, 102 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 103 "effect in release builds. (Note: this adds additional SCEV " 104 "queries potentially changing the analysis result)")); 105 106 static cl::opt<ReplaceExitVal> ReplaceExitValue( 107 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 108 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 109 cl::values( 110 clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN( 114 UnusedIndVarInLoop, "unusedindvarinloop", 115 "only replace exit value when it is an unused " 116 "induction variable in the loop and has cheap replacement cost"), 117 clEnumValN(NoHardUse, "noharduse", 118 "only replace exit values when loop def likely dead"), 119 clEnumValN(AlwaysRepl, "always", 120 "always replace exit value whenever possible"))); 121 122 static cl::opt<bool> UsePostIncrementRanges( 123 "indvars-post-increment-ranges", cl::Hidden, 124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 125 cl::init(true)); 126 127 static cl::opt<bool> 128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 129 cl::desc("Disable Linear Function Test Replace optimization")); 130 131 static cl::opt<bool> 132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 133 cl::desc("Predicate conditions in read only loops")); 134 135 static cl::opt<bool> 136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 137 cl::desc("Allow widening of indvars to eliminate s/zext")); 138 139 namespace { 140 141 class IndVarSimplify { 142 LoopInfo *LI; 143 ScalarEvolution *SE; 144 DominatorTree *DT; 145 const DataLayout &DL; 146 TargetLibraryInfo *TLI; 147 const TargetTransformInfo *TTI; 148 std::unique_ptr<MemorySSAUpdater> MSSAU; 149 150 SmallVector<WeakTrackingVH, 16> DeadInsts; 151 bool WidenIndVars; 152 153 bool handleFloatingPointIV(Loop *L, PHINode *PH); 154 bool rewriteNonIntegerIVs(Loop *L); 155 156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 157 /// Try to improve our exit conditions by converting condition from signed 158 /// to unsigned or rotating computation out of the loop. 159 /// (See inline comment about why this is duplicated from simplifyAndExtend) 160 bool canonicalizeExitCondition(Loop *L); 161 /// Try to eliminate loop exits based on analyzeable exit counts 162 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 163 /// Try to form loop invariant tests for loop exits by changing how many 164 /// iterations of the loop run when that is unobservable. 165 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 166 167 bool rewriteFirstIterationLoopExitValues(Loop *L); 168 169 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 170 const SCEV *ExitCount, 171 PHINode *IndVar, SCEVExpander &Rewriter); 172 173 bool sinkUnusedInvariants(Loop *L); 174 175 public: 176 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 177 const DataLayout &DL, TargetLibraryInfo *TLI, 178 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 179 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 180 WidenIndVars(WidenIndVars) { 181 if (MSSA) 182 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 183 } 184 185 bool run(Loop *L); 186 }; 187 188 } // end anonymous namespace 189 190 //===----------------------------------------------------------------------===// 191 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 192 //===----------------------------------------------------------------------===// 193 194 /// Convert APF to an integer, if possible. 195 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 196 bool isExact = false; 197 // See if we can convert this to an int64_t 198 uint64_t UIntVal; 199 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 200 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 201 !isExact) 202 return false; 203 IntVal = UIntVal; 204 return true; 205 } 206 207 /// If the loop has floating induction variable then insert corresponding 208 /// integer induction variable if possible. 209 /// For example, 210 /// for(double i = 0; i < 10000; ++i) 211 /// bar(i) 212 /// is converted into 213 /// for(int i = 0; i < 10000; ++i) 214 /// bar((double)i); 215 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 216 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 217 unsigned BackEdge = IncomingEdge^1; 218 219 // Check incoming value. 220 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 221 222 int64_t InitValue; 223 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 224 return false; 225 226 // Check IV increment. Reject this PN if increment operation is not 227 // an add or increment value can not be represented by an integer. 228 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 229 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 230 231 // If this is not an add of the PHI with a constantfp, or if the constant fp 232 // is not an integer, bail out. 233 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 234 int64_t IncValue; 235 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 236 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 237 return false; 238 239 // Check Incr uses. One user is PN and the other user is an exit condition 240 // used by the conditional terminator. 241 Value::user_iterator IncrUse = Incr->user_begin(); 242 Instruction *U1 = cast<Instruction>(*IncrUse++); 243 if (IncrUse == Incr->user_end()) return false; 244 Instruction *U2 = cast<Instruction>(*IncrUse++); 245 if (IncrUse != Incr->user_end()) return false; 246 247 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 248 // only used by a branch, we can't transform it. 249 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 250 if (!Compare) 251 Compare = dyn_cast<FCmpInst>(U2); 252 if (!Compare || !Compare->hasOneUse() || 253 !isa<BranchInst>(Compare->user_back())) 254 return false; 255 256 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 257 258 // We need to verify that the branch actually controls the iteration count 259 // of the loop. If not, the new IV can overflow and no one will notice. 260 // The branch block must be in the loop and one of the successors must be out 261 // of the loop. 262 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 263 if (!L->contains(TheBr->getParent()) || 264 (L->contains(TheBr->getSuccessor(0)) && 265 L->contains(TheBr->getSuccessor(1)))) 266 return false; 267 268 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 269 // transform it. 270 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 271 int64_t ExitValue; 272 if (ExitValueVal == nullptr || 273 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 274 return false; 275 276 // Find new predicate for integer comparison. 277 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 278 switch (Compare->getPredicate()) { 279 default: return false; // Unknown comparison. 280 case CmpInst::FCMP_OEQ: 281 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 282 case CmpInst::FCMP_ONE: 283 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 284 case CmpInst::FCMP_OGT: 285 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 286 case CmpInst::FCMP_OGE: 287 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 288 case CmpInst::FCMP_OLT: 289 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 290 case CmpInst::FCMP_OLE: 291 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 292 } 293 294 // We convert the floating point induction variable to a signed i32 value if 295 // we can. This is only safe if the comparison will not overflow in a way 296 // that won't be trapped by the integer equivalent operations. Check for this 297 // now. 298 // TODO: We could use i64 if it is native and the range requires it. 299 300 // The start/stride/exit values must all fit in signed i32. 301 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 302 return false; 303 304 // If not actually striding (add x, 0.0), avoid touching the code. 305 if (IncValue == 0) 306 return false; 307 308 // Positive and negative strides have different safety conditions. 309 if (IncValue > 0) { 310 // If we have a positive stride, we require the init to be less than the 311 // exit value. 312 if (InitValue >= ExitValue) 313 return false; 314 315 uint32_t Range = uint32_t(ExitValue-InitValue); 316 // Check for infinite loop, either: 317 // while (i <= Exit) or until (i > Exit) 318 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 319 if (++Range == 0) return false; // Range overflows. 320 } 321 322 unsigned Leftover = Range % uint32_t(IncValue); 323 324 // If this is an equality comparison, we require that the strided value 325 // exactly land on the exit value, otherwise the IV condition will wrap 326 // around and do things the fp IV wouldn't. 327 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 328 Leftover != 0) 329 return false; 330 331 // If the stride would wrap around the i32 before exiting, we can't 332 // transform the IV. 333 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 334 return false; 335 } else { 336 // If we have a negative stride, we require the init to be greater than the 337 // exit value. 338 if (InitValue <= ExitValue) 339 return false; 340 341 uint32_t Range = uint32_t(InitValue-ExitValue); 342 // Check for infinite loop, either: 343 // while (i >= Exit) or until (i < Exit) 344 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 345 if (++Range == 0) return false; // Range overflows. 346 } 347 348 unsigned Leftover = Range % uint32_t(-IncValue); 349 350 // If this is an equality comparison, we require that the strided value 351 // exactly land on the exit value, otherwise the IV condition will wrap 352 // around and do things the fp IV wouldn't. 353 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 354 Leftover != 0) 355 return false; 356 357 // If the stride would wrap around the i32 before exiting, we can't 358 // transform the IV. 359 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 360 return false; 361 } 362 363 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 364 365 // Insert new integer induction variable. 366 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 367 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 368 PN->getIncomingBlock(IncomingEdge)); 369 370 Value *NewAdd = 371 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 372 Incr->getName()+".int", Incr); 373 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 374 375 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 376 ConstantInt::get(Int32Ty, ExitValue), 377 Compare->getName()); 378 379 // In the following deletions, PN may become dead and may be deleted. 380 // Use a WeakTrackingVH to observe whether this happens. 381 WeakTrackingVH WeakPH = PN; 382 383 // Delete the old floating point exit comparison. The branch starts using the 384 // new comparison. 385 NewCompare->takeName(Compare); 386 Compare->replaceAllUsesWith(NewCompare); 387 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 388 389 // Delete the old floating point increment. 390 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 391 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 392 393 // If the FP induction variable still has uses, this is because something else 394 // in the loop uses its value. In order to canonicalize the induction 395 // variable, we chose to eliminate the IV and rewrite it in terms of an 396 // int->fp cast. 397 // 398 // We give preference to sitofp over uitofp because it is faster on most 399 // platforms. 400 if (WeakPH) { 401 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 402 &*PN->getParent()->getFirstInsertionPt()); 403 PN->replaceAllUsesWith(Conv); 404 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 405 } 406 return true; 407 } 408 409 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 410 // First step. Check to see if there are any floating-point recurrences. 411 // If there are, change them into integer recurrences, permitting analysis by 412 // the SCEV routines. 413 BasicBlock *Header = L->getHeader(); 414 415 SmallVector<WeakTrackingVH, 8> PHIs; 416 for (PHINode &PN : Header->phis()) 417 PHIs.push_back(&PN); 418 419 bool Changed = false; 420 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 421 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 422 Changed |= handleFloatingPointIV(L, PN); 423 424 // If the loop previously had floating-point IV, ScalarEvolution 425 // may not have been able to compute a trip count. Now that we've done some 426 // re-writing, the trip count may be computable. 427 if (Changed) 428 SE->forgetLoop(L); 429 return Changed; 430 } 431 432 //===---------------------------------------------------------------------===// 433 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 434 // they will exit at the first iteration. 435 //===---------------------------------------------------------------------===// 436 437 /// Check to see if this loop has loop invariant conditions which lead to loop 438 /// exits. If so, we know that if the exit path is taken, it is at the first 439 /// loop iteration. This lets us predict exit values of PHI nodes that live in 440 /// loop header. 441 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 442 // Verify the input to the pass is already in LCSSA form. 443 assert(L->isLCSSAForm(*DT)); 444 445 SmallVector<BasicBlock *, 8> ExitBlocks; 446 L->getUniqueExitBlocks(ExitBlocks); 447 448 bool MadeAnyChanges = false; 449 for (auto *ExitBB : ExitBlocks) { 450 // If there are no more PHI nodes in this exit block, then no more 451 // values defined inside the loop are used on this path. 452 for (PHINode &PN : ExitBB->phis()) { 453 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 454 IncomingValIdx != E; ++IncomingValIdx) { 455 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 456 457 // Can we prove that the exit must run on the first iteration if it 458 // runs at all? (i.e. early exits are fine for our purposes, but 459 // traces which lead to this exit being taken on the 2nd iteration 460 // aren't.) Note that this is about whether the exit branch is 461 // executed, not about whether it is taken. 462 if (!L->getLoopLatch() || 463 !DT->dominates(IncomingBB, L->getLoopLatch())) 464 continue; 465 466 // Get condition that leads to the exit path. 467 auto *TermInst = IncomingBB->getTerminator(); 468 469 Value *Cond = nullptr; 470 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 471 // Must be a conditional branch, otherwise the block 472 // should not be in the loop. 473 Cond = BI->getCondition(); 474 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 475 Cond = SI->getCondition(); 476 else 477 continue; 478 479 if (!L->isLoopInvariant(Cond)) 480 continue; 481 482 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 483 484 // Only deal with PHIs in the loop header. 485 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 486 continue; 487 488 // If ExitVal is a PHI on the loop header, then we know its 489 // value along this exit because the exit can only be taken 490 // on the first iteration. 491 auto *LoopPreheader = L->getLoopPreheader(); 492 assert(LoopPreheader && "Invalid loop"); 493 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 494 if (PreheaderIdx != -1) { 495 assert(ExitVal->getParent() == L->getHeader() && 496 "ExitVal must be in loop header"); 497 MadeAnyChanges = true; 498 PN.setIncomingValue(IncomingValIdx, 499 ExitVal->getIncomingValue(PreheaderIdx)); 500 SE->forgetValue(&PN); 501 } 502 } 503 } 504 } 505 return MadeAnyChanges; 506 } 507 508 //===----------------------------------------------------------------------===// 509 // IV Widening - Extend the width of an IV to cover its widest uses. 510 //===----------------------------------------------------------------------===// 511 512 /// Update information about the induction variable that is extended by this 513 /// sign or zero extend operation. This is used to determine the final width of 514 /// the IV before actually widening it. 515 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 516 ScalarEvolution *SE, 517 const TargetTransformInfo *TTI) { 518 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 519 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 520 return; 521 522 Type *Ty = Cast->getType(); 523 uint64_t Width = SE->getTypeSizeInBits(Ty); 524 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 525 return; 526 527 // Check that `Cast` actually extends the induction variable (we rely on this 528 // later). This takes care of cases where `Cast` is extending a truncation of 529 // the narrow induction variable, and thus can end up being narrower than the 530 // "narrow" induction variable. 531 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 532 if (NarrowIVWidth >= Width) 533 return; 534 535 // Cast is either an sext or zext up to this point. 536 // We should not widen an indvar if arithmetics on the wider indvar are more 537 // expensive than those on the narrower indvar. We check only the cost of ADD 538 // because at least an ADD is required to increment the induction variable. We 539 // could compute more comprehensively the cost of all instructions on the 540 // induction variable when necessary. 541 if (TTI && 542 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 543 TTI->getArithmeticInstrCost(Instruction::Add, 544 Cast->getOperand(0)->getType())) { 545 return; 546 } 547 548 if (!WI.WidestNativeType || 549 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 550 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 551 WI.IsSigned = IsSigned; 552 return; 553 } 554 555 // We extend the IV to satisfy the sign of its user(s), or 'signed' 556 // if there are multiple users with both sign- and zero extensions, 557 // in order not to introduce nondeterministic behaviour based on the 558 // unspecified order of a PHI nodes' users-iterator. 559 WI.IsSigned |= IsSigned; 560 } 561 562 //===----------------------------------------------------------------------===// 563 // Live IV Reduction - Minimize IVs live across the loop. 564 //===----------------------------------------------------------------------===// 565 566 //===----------------------------------------------------------------------===// 567 // Simplification of IV users based on SCEV evaluation. 568 //===----------------------------------------------------------------------===// 569 570 namespace { 571 572 class IndVarSimplifyVisitor : public IVVisitor { 573 ScalarEvolution *SE; 574 const TargetTransformInfo *TTI; 575 PHINode *IVPhi; 576 577 public: 578 WideIVInfo WI; 579 580 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 581 const TargetTransformInfo *TTI, 582 const DominatorTree *DTree) 583 : SE(SCEV), TTI(TTI), IVPhi(IV) { 584 DT = DTree; 585 WI.NarrowIV = IVPhi; 586 } 587 588 // Implement the interface used by simplifyUsersOfIV. 589 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 590 }; 591 592 } // end anonymous namespace 593 594 /// Iteratively perform simplification on a worklist of IV users. Each 595 /// successive simplification may push more users which may themselves be 596 /// candidates for simplification. 597 /// 598 /// Sign/Zero extend elimination is interleaved with IV simplification. 599 bool IndVarSimplify::simplifyAndExtend(Loop *L, 600 SCEVExpander &Rewriter, 601 LoopInfo *LI) { 602 SmallVector<WideIVInfo, 8> WideIVs; 603 604 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 605 Intrinsic::getName(Intrinsic::experimental_guard)); 606 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 607 608 SmallVector<PHINode *, 8> LoopPhis; 609 for (PHINode &PN : L->getHeader()->phis()) 610 LoopPhis.push_back(&PN); 611 612 // Each round of simplification iterates through the SimplifyIVUsers worklist 613 // for all current phis, then determines whether any IVs can be 614 // widened. Widening adds new phis to LoopPhis, inducing another round of 615 // simplification on the wide IVs. 616 bool Changed = false; 617 while (!LoopPhis.empty()) { 618 // Evaluate as many IV expressions as possible before widening any IVs. This 619 // forces SCEV to set no-wrap flags before evaluating sign/zero 620 // extension. The first time SCEV attempts to normalize sign/zero extension, 621 // the result becomes final. So for the most predictable results, we delay 622 // evaluation of sign/zero extend evaluation until needed, and avoid running 623 // other SCEV based analysis prior to simplifyAndExtend. 624 do { 625 PHINode *CurrIV = LoopPhis.pop_back_val(); 626 627 // Information about sign/zero extensions of CurrIV. 628 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 629 630 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 631 &Visitor); 632 633 if (Visitor.WI.WidestNativeType) { 634 WideIVs.push_back(Visitor.WI); 635 } 636 } while(!LoopPhis.empty()); 637 638 // Continue if we disallowed widening. 639 if (!WidenIndVars) 640 continue; 641 642 for (; !WideIVs.empty(); WideIVs.pop_back()) { 643 unsigned ElimExt; 644 unsigned Widened; 645 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 646 DT, DeadInsts, ElimExt, Widened, 647 HasGuards, UsePostIncrementRanges)) { 648 NumElimExt += ElimExt; 649 NumWidened += Widened; 650 Changed = true; 651 LoopPhis.push_back(WidePhi); 652 } 653 } 654 } 655 return Changed; 656 } 657 658 //===----------------------------------------------------------------------===// 659 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 660 //===----------------------------------------------------------------------===// 661 662 /// Given an Value which is hoped to be part of an add recurance in the given 663 /// loop, return the associated Phi node if so. Otherwise, return null. Note 664 /// that this is less general than SCEVs AddRec checking. 665 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 666 Instruction *IncI = dyn_cast<Instruction>(IncV); 667 if (!IncI) 668 return nullptr; 669 670 switch (IncI->getOpcode()) { 671 case Instruction::Add: 672 case Instruction::Sub: 673 break; 674 case Instruction::GetElementPtr: 675 // An IV counter must preserve its type. 676 if (IncI->getNumOperands() == 2) 677 break; 678 [[fallthrough]]; 679 default: 680 return nullptr; 681 } 682 683 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 684 if (Phi && Phi->getParent() == L->getHeader()) { 685 if (L->isLoopInvariant(IncI->getOperand(1))) 686 return Phi; 687 return nullptr; 688 } 689 if (IncI->getOpcode() == Instruction::GetElementPtr) 690 return nullptr; 691 692 // Allow add/sub to be commuted. 693 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 694 if (Phi && Phi->getParent() == L->getHeader()) { 695 if (L->isLoopInvariant(IncI->getOperand(0))) 696 return Phi; 697 } 698 return nullptr; 699 } 700 701 /// Whether the current loop exit test is based on this value. Currently this 702 /// is limited to a direct use in the loop condition. 703 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 704 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 705 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 706 // TODO: Allow non-icmp loop test. 707 if (!ICmp) 708 return false; 709 710 // TODO: Allow indirect use. 711 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 712 } 713 714 /// linearFunctionTestReplace policy. Return true unless we can show that the 715 /// current exit test is already sufficiently canonical. 716 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 717 assert(L->getLoopLatch() && "Must be in simplified form"); 718 719 // Avoid converting a constant or loop invariant test back to a runtime 720 // test. This is critical for when SCEV's cached ExitCount is less precise 721 // than the current IR (such as after we've proven a particular exit is 722 // actually dead and thus the BE count never reaches our ExitCount.) 723 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 724 if (L->isLoopInvariant(BI->getCondition())) 725 return false; 726 727 // Do LFTR to simplify the exit condition to an ICMP. 728 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 729 if (!Cond) 730 return true; 731 732 // Do LFTR to simplify the exit ICMP to EQ/NE 733 ICmpInst::Predicate Pred = Cond->getPredicate(); 734 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 735 return true; 736 737 // Look for a loop invariant RHS 738 Value *LHS = Cond->getOperand(0); 739 Value *RHS = Cond->getOperand(1); 740 if (!L->isLoopInvariant(RHS)) { 741 if (!L->isLoopInvariant(LHS)) 742 return true; 743 std::swap(LHS, RHS); 744 } 745 // Look for a simple IV counter LHS 746 PHINode *Phi = dyn_cast<PHINode>(LHS); 747 if (!Phi) 748 Phi = getLoopPhiForCounter(LHS, L); 749 750 if (!Phi) 751 return true; 752 753 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 754 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 755 if (Idx < 0) 756 return true; 757 758 // Do LFTR if the exit condition's IV is *not* a simple counter. 759 Value *IncV = Phi->getIncomingValue(Idx); 760 return Phi != getLoopPhiForCounter(IncV, L); 761 } 762 763 /// Return true if undefined behavior would provable be executed on the path to 764 /// OnPathTo if Root produced a posion result. Note that this doesn't say 765 /// anything about whether OnPathTo is actually executed or whether Root is 766 /// actually poison. This can be used to assess whether a new use of Root can 767 /// be added at a location which is control equivalent with OnPathTo (such as 768 /// immediately before it) without introducing UB which didn't previously 769 /// exist. Note that a false result conveys no information. 770 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 771 Instruction *OnPathTo, 772 DominatorTree *DT) { 773 // Basic approach is to assume Root is poison, propagate poison forward 774 // through all users we can easily track, and then check whether any of those 775 // users are provable UB and must execute before out exiting block might 776 // exit. 777 778 // The set of all recursive users we've visited (which are assumed to all be 779 // poison because of said visit) 780 SmallSet<const Value *, 16> KnownPoison; 781 SmallVector<const Instruction*, 16> Worklist; 782 Worklist.push_back(Root); 783 while (!Worklist.empty()) { 784 const Instruction *I = Worklist.pop_back_val(); 785 786 // If we know this must trigger UB on a path leading our target. 787 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 788 return true; 789 790 // If we can't analyze propagation through this instruction, just skip it 791 // and transitive users. Safe as false is a conservative result. 792 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 793 continue; 794 795 if (KnownPoison.insert(I).second) 796 for (const User *User : I->users()) 797 Worklist.push_back(cast<Instruction>(User)); 798 } 799 800 // Might be non-UB, or might have a path we couldn't prove must execute on 801 // way to exiting bb. 802 return false; 803 } 804 805 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 806 /// down to checking that all operands are constant and listing instructions 807 /// that may hide undef. 808 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 809 unsigned Depth) { 810 if (isa<Constant>(V)) 811 return !isa<UndefValue>(V); 812 813 if (Depth >= 6) 814 return false; 815 816 // Conservatively handle non-constant non-instructions. For example, Arguments 817 // may be undef. 818 Instruction *I = dyn_cast<Instruction>(V); 819 if (!I) 820 return false; 821 822 // Load and return values may be undef. 823 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 824 return false; 825 826 // Optimistically handle other instructions. 827 for (Value *Op : I->operands()) { 828 if (!Visited.insert(Op).second) 829 continue; 830 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 831 return false; 832 } 833 return true; 834 } 835 836 /// Return true if the given value is concrete. We must prove that undef can 837 /// never reach it. 838 /// 839 /// TODO: If we decide that this is a good approach to checking for undef, we 840 /// may factor it into a common location. 841 static bool hasConcreteDef(Value *V) { 842 SmallPtrSet<Value*, 8> Visited; 843 Visited.insert(V); 844 return hasConcreteDefImpl(V, Visited, 0); 845 } 846 847 /// Return true if this IV has any uses other than the (soon to be rewritten) 848 /// loop exit test. 849 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 850 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 851 Value *IncV = Phi->getIncomingValue(LatchIdx); 852 853 for (User *U : Phi->users()) 854 if (U != Cond && U != IncV) return false; 855 856 for (User *U : IncV->users()) 857 if (U != Cond && U != Phi) return false; 858 return true; 859 } 860 861 /// Return true if the given phi is a "counter" in L. A counter is an 862 /// add recurance (of integer or pointer type) with an arbitrary start, and a 863 /// step of 1. Note that L must have exactly one latch. 864 static bool isLoopCounter(PHINode* Phi, Loop *L, 865 ScalarEvolution *SE) { 866 assert(Phi->getParent() == L->getHeader()); 867 assert(L->getLoopLatch()); 868 869 if (!SE->isSCEVable(Phi->getType())) 870 return false; 871 872 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 873 if (!AR || AR->getLoop() != L || !AR->isAffine()) 874 return false; 875 876 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 877 if (!Step || !Step->isOne()) 878 return false; 879 880 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 881 Value *IncV = Phi->getIncomingValue(LatchIdx); 882 return (getLoopPhiForCounter(IncV, L) == Phi && 883 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 884 } 885 886 /// Search the loop header for a loop counter (anadd rec w/step of one) 887 /// suitable for use by LFTR. If multiple counters are available, select the 888 /// "best" one based profitable heuristics. 889 /// 890 /// BECount may be an i8* pointer type. The pointer difference is already 891 /// valid count without scaling the address stride, so it remains a pointer 892 /// expression as far as SCEV is concerned. 893 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 894 const SCEV *BECount, 895 ScalarEvolution *SE, DominatorTree *DT) { 896 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 897 898 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 899 900 // Loop over all of the PHI nodes, looking for a simple counter. 901 PHINode *BestPhi = nullptr; 902 const SCEV *BestInit = nullptr; 903 BasicBlock *LatchBlock = L->getLoopLatch(); 904 assert(LatchBlock && "Must be in simplified form"); 905 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 906 907 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 908 PHINode *Phi = cast<PHINode>(I); 909 if (!isLoopCounter(Phi, L, SE)) 910 continue; 911 912 // Avoid comparing an integer IV against a pointer Limit. 913 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 914 continue; 915 916 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 917 918 // AR may be a pointer type, while BECount is an integer type. 919 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 920 // AR may not be a narrower type, or we may never exit. 921 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 922 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 923 continue; 924 925 // Avoid reusing a potentially undef value to compute other values that may 926 // have originally had a concrete definition. 927 if (!hasConcreteDef(Phi)) { 928 // We explicitly allow unknown phis as long as they are already used by 929 // the loop exit test. This is legal since performing LFTR could not 930 // increase the number of undef users. 931 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 932 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 933 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 934 continue; 935 } 936 937 // Avoid introducing undefined behavior due to poison which didn't exist in 938 // the original program. (Annoyingly, the rules for poison and undef 939 // propagation are distinct, so this does NOT cover the undef case above.) 940 // We have to ensure that we don't introduce UB by introducing a use on an 941 // iteration where said IV produces poison. Our strategy here differs for 942 // pointers and integer IVs. For integers, we strip and reinfer as needed, 943 // see code in linearFunctionTestReplace. For pointers, we restrict 944 // transforms as there is no good way to reinfer inbounds once lost. 945 if (!Phi->getType()->isIntegerTy() && 946 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 947 continue; 948 949 const SCEV *Init = AR->getStart(); 950 951 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 952 // Don't force a live loop counter if another IV can be used. 953 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 954 continue; 955 956 // Prefer to count-from-zero. This is a more "canonical" counter form. It 957 // also prefers integer to pointer IVs. 958 if (BestInit->isZero() != Init->isZero()) { 959 if (BestInit->isZero()) 960 continue; 961 } 962 // If two IVs both count from zero or both count from nonzero then the 963 // narrower is likely a dead phi that has been widened. Use the wider phi 964 // to allow the other to be eliminated. 965 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 966 continue; 967 } 968 BestPhi = Phi; 969 BestInit = Init; 970 } 971 return BestPhi; 972 } 973 974 /// Insert an IR expression which computes the value held by the IV IndVar 975 /// (which must be an loop counter w/unit stride) after the backedge of loop L 976 /// is taken ExitCount times. 977 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 978 const SCEV *ExitCount, bool UsePostInc, Loop *L, 979 SCEVExpander &Rewriter, ScalarEvolution *SE) { 980 assert(isLoopCounter(IndVar, L, SE)); 981 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 982 const SCEV *IVInit = AR->getStart(); 983 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 984 985 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 986 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 987 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 988 // the existing GEPs whenever possible. 989 if (IndVar->getType()->isPointerTy() && 990 !ExitCount->getType()->isPointerTy()) { 991 // IVOffset will be the new GEP offset that is interpreted by GEP as a 992 // signed value. ExitCount on the other hand represents the loop trip count, 993 // which is an unsigned value. FindLoopCounter only allows induction 994 // variables that have a positive unit stride of one. This means we don't 995 // have to handle the case of negative offsets (yet) and just need to zero 996 // extend ExitCount. 997 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 998 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 999 if (UsePostInc) 1000 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1001 1002 // Expand the code for the iteration count. 1003 assert(SE->isLoopInvariant(IVOffset, L) && 1004 "Computed iteration count is not loop invariant!"); 1005 1006 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1007 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1008 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1009 } else { 1010 // In any other case, convert both IVInit and ExitCount to integers before 1011 // comparing. This may result in SCEV expansion of pointers, but in practice 1012 // SCEV will fold the pointer arithmetic away as such: 1013 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1014 // 1015 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1016 // for simple memset-style loops. 1017 // 1018 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1019 // were generated on top of case #2, which is not expected. 1020 1021 // For unit stride, IVCount = Start + ExitCount with 2's complement 1022 // overflow. 1023 1024 // For integer IVs, truncate the IV before computing IVInit + BECount, 1025 // unless we know apriori that the limit must be a constant when evaluated 1026 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1027 // of the IV in the loop over a (potentially) expensive expansion of the 1028 // widened exit count add(zext(add)) expression. 1029 if (SE->getTypeSizeInBits(IVInit->getType()) 1030 > SE->getTypeSizeInBits(ExitCount->getType())) { 1031 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1032 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1033 else 1034 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1035 } 1036 1037 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1038 1039 if (UsePostInc) 1040 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1041 1042 // Expand the code for the iteration count. 1043 assert(SE->isLoopInvariant(IVLimit, L) && 1044 "Computed iteration count is not loop invariant!"); 1045 // Ensure that we generate the same type as IndVar, or a smaller integer 1046 // type. In the presence of null pointer values, we have an integer type 1047 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1048 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1049 IndVar->getType() : ExitCount->getType(); 1050 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1051 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1052 } 1053 } 1054 1055 /// This method rewrites the exit condition of the loop to be a canonical != 1056 /// comparison against the incremented loop induction variable. This pass is 1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1058 /// determine a loop-invariant trip count of the loop, which is actually a much 1059 /// broader range than just linear tests. 1060 bool IndVarSimplify:: 1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1062 const SCEV *ExitCount, 1063 PHINode *IndVar, SCEVExpander &Rewriter) { 1064 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1065 assert(isLoopCounter(IndVar, L, SE)); 1066 Instruction * const IncVar = 1067 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1068 1069 // Initialize CmpIndVar to the preincremented IV. 1070 Value *CmpIndVar = IndVar; 1071 bool UsePostInc = false; 1072 1073 // If the exiting block is the same as the backedge block, we prefer to 1074 // compare against the post-incremented value, otherwise we must compare 1075 // against the preincremented value. 1076 if (ExitingBB == L->getLoopLatch()) { 1077 // For pointer IVs, we chose to not strip inbounds which requires us not 1078 // to add a potentially UB introducing use. We need to either a) show 1079 // the loop test we're modifying is already in post-inc form, or b) show 1080 // that adding a use must not introduce UB. 1081 bool SafeToPostInc = 1082 IndVar->getType()->isIntegerTy() || 1083 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1084 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1085 if (SafeToPostInc) { 1086 UsePostInc = true; 1087 CmpIndVar = IncVar; 1088 } 1089 } 1090 1091 // It may be necessary to drop nowrap flags on the incrementing instruction 1092 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1093 // case the increment might have previously been poison on the last iteration 1094 // only) or if LFTR switches to a different IV that was previously dynamically 1095 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1096 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1097 // check), because the pre-inc addrec flags may be adopted from the original 1098 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1099 // TODO: This handling is inaccurate for one case: If we switch to a 1100 // dynamically dead IV that wraps on the first loop iteration only, which is 1101 // not covered by the post-inc addrec. (If the new IV was not dynamically 1102 // dead, it could not be poison on the first iteration in the first place.) 1103 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1104 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1105 if (BO->hasNoUnsignedWrap()) 1106 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1107 if (BO->hasNoSignedWrap()) 1108 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1109 } 1110 1111 Value *ExitCnt = genLoopLimit( 1112 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1113 assert(ExitCnt->getType()->isPointerTy() == 1114 IndVar->getType()->isPointerTy() && 1115 "genLoopLimit missed a cast"); 1116 1117 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1118 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1119 ICmpInst::Predicate P; 1120 if (L->contains(BI->getSuccessor(0))) 1121 P = ICmpInst::ICMP_NE; 1122 else 1123 P = ICmpInst::ICMP_EQ; 1124 1125 IRBuilder<> Builder(BI); 1126 1127 // The new loop exit condition should reuse the debug location of the 1128 // original loop exit condition. 1129 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1130 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1131 1132 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1133 // avoid the expensive expansion of the limit expression in the wider type, 1134 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1135 // since we know (from the exit count bitwidth), that we can't self-wrap in 1136 // the narrower type. 1137 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1138 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1139 if (CmpIndVarSize > ExitCntSize) { 1140 assert(!CmpIndVar->getType()->isPointerTy() && 1141 !ExitCnt->getType()->isPointerTy()); 1142 1143 // Before resorting to actually inserting the truncate, use the same 1144 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1145 // the other side of the comparison instead. We still evaluate the limit 1146 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1147 // a truncate within in. 1148 bool Extended = false; 1149 const SCEV *IV = SE->getSCEV(CmpIndVar); 1150 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1151 ExitCnt->getType()); 1152 const SCEV *ZExtTrunc = 1153 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1154 1155 if (ZExtTrunc == IV) { 1156 Extended = true; 1157 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1158 "wide.trip.count"); 1159 } else { 1160 const SCEV *SExtTrunc = 1161 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1162 if (SExtTrunc == IV) { 1163 Extended = true; 1164 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1165 "wide.trip.count"); 1166 } 1167 } 1168 1169 if (Extended) { 1170 bool Discard; 1171 L->makeLoopInvariant(ExitCnt, Discard); 1172 } else 1173 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1174 "lftr.wideiv"); 1175 } 1176 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1177 << " LHS:" << *CmpIndVar << '\n' 1178 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1179 << "\n" 1180 << " RHS:\t" << *ExitCnt << "\n" 1181 << "ExitCount:\t" << *ExitCount << "\n" 1182 << " was: " << *BI->getCondition() << "\n"); 1183 1184 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1185 Value *OrigCond = BI->getCondition(); 1186 // It's tempting to use replaceAllUsesWith here to fully replace the old 1187 // comparison, but that's not immediately safe, since users of the old 1188 // comparison may not be dominated by the new comparison. Instead, just 1189 // update the branch to use the new comparison; in the common case this 1190 // will make old comparison dead. 1191 BI->setCondition(Cond); 1192 DeadInsts.emplace_back(OrigCond); 1193 1194 ++NumLFTR; 1195 return true; 1196 } 1197 1198 //===----------------------------------------------------------------------===// 1199 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1200 //===----------------------------------------------------------------------===// 1201 1202 /// If there's a single exit block, sink any loop-invariant values that 1203 /// were defined in the preheader but not used inside the loop into the 1204 /// exit block to reduce register pressure in the loop. 1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1206 BasicBlock *ExitBlock = L->getExitBlock(); 1207 if (!ExitBlock) return false; 1208 1209 BasicBlock *Preheader = L->getLoopPreheader(); 1210 if (!Preheader) return false; 1211 1212 bool MadeAnyChanges = false; 1213 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1214 BasicBlock::iterator I(Preheader->getTerminator()); 1215 while (I != Preheader->begin()) { 1216 --I; 1217 // New instructions were inserted at the end of the preheader. 1218 if (isa<PHINode>(I)) 1219 break; 1220 1221 // Don't move instructions which might have side effects, since the side 1222 // effects need to complete before instructions inside the loop. Also don't 1223 // move instructions which might read memory, since the loop may modify 1224 // memory. Note that it's okay if the instruction might have undefined 1225 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1226 // block. 1227 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1228 continue; 1229 1230 // Skip debug info intrinsics. 1231 if (isa<DbgInfoIntrinsic>(I)) 1232 continue; 1233 1234 // Skip eh pad instructions. 1235 if (I->isEHPad()) 1236 continue; 1237 1238 // Don't sink alloca: we never want to sink static alloca's out of the 1239 // entry block, and correctly sinking dynamic alloca's requires 1240 // checks for stacksave/stackrestore intrinsics. 1241 // FIXME: Refactor this check somehow? 1242 if (isa<AllocaInst>(I)) 1243 continue; 1244 1245 // Determine if there is a use in or before the loop (direct or 1246 // otherwise). 1247 bool UsedInLoop = false; 1248 for (Use &U : I->uses()) { 1249 Instruction *User = cast<Instruction>(U.getUser()); 1250 BasicBlock *UseBB = User->getParent(); 1251 if (PHINode *P = dyn_cast<PHINode>(User)) { 1252 unsigned i = 1253 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1254 UseBB = P->getIncomingBlock(i); 1255 } 1256 if (UseBB == Preheader || L->contains(UseBB)) { 1257 UsedInLoop = true; 1258 break; 1259 } 1260 } 1261 1262 // If there is, the def must remain in the preheader. 1263 if (UsedInLoop) 1264 continue; 1265 1266 // Otherwise, sink it to the exit block. 1267 Instruction *ToMove = &*I; 1268 bool Done = false; 1269 1270 if (I != Preheader->begin()) { 1271 // Skip debug info intrinsics. 1272 do { 1273 --I; 1274 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1275 1276 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1277 Done = true; 1278 } else { 1279 Done = true; 1280 } 1281 1282 MadeAnyChanges = true; 1283 ToMove->moveBefore(*ExitBlock, InsertPt); 1284 SE->forgetValue(ToMove); 1285 if (Done) break; 1286 InsertPt = ToMove->getIterator(); 1287 } 1288 1289 return MadeAnyChanges; 1290 } 1291 1292 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1293 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1294 auto *OldCond = BI->getCondition(); 1295 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1296 << " with " << *NewCond << "\n"); 1297 BI->setCondition(NewCond); 1298 if (OldCond->use_empty()) 1299 DeadInsts.emplace_back(OldCond); 1300 } 1301 1302 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1303 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1304 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1305 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1306 auto *OldCond = BI->getCondition(); 1307 auto *NewCond = 1308 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1309 replaceExitCond(BI, NewCond, DeadInsts); 1310 } 1311 1312 static void replaceLoopPHINodesWithPreheaderValues( 1313 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1314 ScalarEvolution &SE) { 1315 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1316 auto *LoopPreheader = L->getLoopPreheader(); 1317 auto *LoopHeader = L->getHeader(); 1318 SmallVector<Instruction *> Worklist; 1319 for (auto &PN : LoopHeader->phis()) { 1320 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1321 for (User *U : PN.users()) 1322 Worklist.push_back(cast<Instruction>(U)); 1323 SE.forgetValue(&PN); 1324 PN.replaceAllUsesWith(PreheaderIncoming); 1325 DeadInsts.emplace_back(&PN); 1326 } 1327 1328 // Replacing with the preheader value will often allow IV users to simplify 1329 // (especially if the preheader value is a constant). 1330 SmallPtrSet<Instruction *, 16> Visited; 1331 while (!Worklist.empty()) { 1332 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1333 if (!Visited.insert(I).second) 1334 continue; 1335 1336 // Don't simplify instructions outside the loop. 1337 if (!L->contains(I)) 1338 continue; 1339 1340 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1341 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1342 for (User *U : I->users()) 1343 Worklist.push_back(cast<Instruction>(U)); 1344 I->replaceAllUsesWith(Res); 1345 DeadInsts.emplace_back(I); 1346 } 1347 } 1348 } 1349 1350 static void replaceWithInvariantCond( 1351 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1352 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1353 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1354 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1355 Rewriter.setInsertPoint(BI); 1356 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1357 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1358 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1359 if (ExitIfTrue) 1360 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1361 IRBuilder<> Builder(BI); 1362 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1363 BI->getCondition()->getName()); 1364 replaceExitCond(BI, NewCond, DeadInsts); 1365 } 1366 1367 static bool optimizeLoopExitWithUnknownExitCount( 1368 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1369 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1370 ScalarEvolution *SE, SCEVExpander &Rewriter, 1371 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1372 ICmpInst::Predicate Pred; 1373 Value *LHS, *RHS; 1374 BasicBlock *TrueSucc, *FalseSucc; 1375 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1376 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1377 return false; 1378 1379 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1380 "Not a loop exit!"); 1381 1382 // 'LHS pred RHS' should now mean that we stay in loop. 1383 if (L->contains(FalseSucc)) 1384 Pred = CmpInst::getInversePredicate(Pred); 1385 1386 // If we are proving loop exit, invert the predicate. 1387 if (Inverted) 1388 Pred = CmpInst::getInversePredicate(Pred); 1389 1390 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1391 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1392 // Can we prove it to be trivially true? 1393 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1394 foldExit(L, ExitingBB, Inverted, DeadInsts); 1395 return true; 1396 } 1397 // Further logic works for non-inverted condition only. 1398 if (Inverted) 1399 return false; 1400 1401 auto *ARTy = LHSS->getType(); 1402 auto *MaxIterTy = MaxIter->getType(); 1403 // If possible, adjust types. 1404 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1405 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1406 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1407 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1408 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1409 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1410 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1411 } 1412 1413 if (SkipLastIter) { 1414 const SCEV *One = SE->getOne(MaxIter->getType()); 1415 MaxIter = SE->getMinusSCEV(MaxIter, One); 1416 } 1417 1418 // Check if there is a loop-invariant predicate equivalent to our check. 1419 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1420 L, BI, MaxIter); 1421 if (!LIP) 1422 return false; 1423 1424 // Can we prove it to be trivially true? 1425 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1426 foldExit(L, ExitingBB, Inverted, DeadInsts); 1427 else 1428 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1429 Rewriter, DeadInsts); 1430 1431 return true; 1432 } 1433 1434 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1435 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1436 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1437 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1438 // it already has flags. The alternative to this would be to extending the 1439 // set of "interesting" IV users to include the icmp, but doing that 1440 // regresses results in practice by querying SCEVs before trip counts which 1441 // rely on them which results in SCEV caching sub-optimal answers. The 1442 // concern about caching sub-optimal results is why we only query SCEVs of 1443 // the loop invariant RHS here. 1444 SmallVector<BasicBlock*, 16> ExitingBlocks; 1445 L->getExitingBlocks(ExitingBlocks); 1446 bool Changed = false; 1447 for (auto *ExitingBB : ExitingBlocks) { 1448 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1449 if (!BI) 1450 continue; 1451 assert(BI->isConditional() && "exit branch must be conditional"); 1452 1453 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1454 if (!ICmp || !ICmp->hasOneUse()) 1455 continue; 1456 1457 auto *LHS = ICmp->getOperand(0); 1458 auto *RHS = ICmp->getOperand(1); 1459 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1460 // poisoning cache with sub-optimal results. For the must-execute case, 1461 // this is a neccessary precondition for correctness. 1462 if (!L->isLoopInvariant(RHS)) { 1463 if (!L->isLoopInvariant(LHS)) 1464 continue; 1465 // Same logic applies for the inverse case 1466 std::swap(LHS, RHS); 1467 } 1468 1469 // Match (icmp signed-cond zext, RHS) 1470 Value *LHSOp = nullptr; 1471 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1472 continue; 1473 1474 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1475 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1476 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1477 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1478 FullCR = FullCR.zeroExtend(OuterBitWidth); 1479 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1480 if (FullCR.contains(RHSCR)) { 1481 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1482 // replace the signed condition with the unsigned version. 1483 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1484 Changed = true; 1485 // Note: No SCEV invalidation needed. We've changed the predicate, but 1486 // have not changed exit counts, or the values produced by the compare. 1487 continue; 1488 } 1489 } 1490 1491 // Now that we've canonicalized the condition to match the extend, 1492 // see if we can rotate the extend out of the loop. 1493 for (auto *ExitingBB : ExitingBlocks) { 1494 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1495 if (!BI) 1496 continue; 1497 assert(BI->isConditional() && "exit branch must be conditional"); 1498 1499 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1500 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1501 continue; 1502 1503 bool Swapped = false; 1504 auto *LHS = ICmp->getOperand(0); 1505 auto *RHS = ICmp->getOperand(1); 1506 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1507 // Nothing to rotate 1508 continue; 1509 if (L->isLoopInvariant(LHS)) { 1510 // Same logic applies for the inverse case until we actually pick 1511 // which operand of the compare to update. 1512 Swapped = true; 1513 std::swap(LHS, RHS); 1514 } 1515 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1516 1517 // Match (icmp unsigned-cond zext, RHS) 1518 // TODO: Extend to handle corresponding sext/signed-cmp case 1519 // TODO: Extend to other invertible functions 1520 Value *LHSOp = nullptr; 1521 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1522 continue; 1523 1524 // In general, we only rotate if we can do so without increasing the number 1525 // of instructions. The exception is when we have an zext(add-rec). The 1526 // reason for allowing this exception is that we know we need to get rid 1527 // of the zext for SCEV to be able to compute a trip count for said loops; 1528 // we consider the new trip count valuable enough to increase instruction 1529 // count by one. 1530 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1531 continue; 1532 1533 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1534 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1535 // when zext is loop varying and RHS is loop invariant. This converts 1536 // loop varying work to loop-invariant work. 1537 auto doRotateTransform = [&]() { 1538 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1539 auto *NewRHS = 1540 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1541 L->getLoopPreheader()->getTerminator()); 1542 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1543 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1544 if (LHS->use_empty()) 1545 DeadInsts.push_back(LHS); 1546 }; 1547 1548 1549 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1550 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1551 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1552 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1553 FullCR = FullCR.zeroExtend(OuterBitWidth); 1554 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1555 if (FullCR.contains(RHSCR)) { 1556 doRotateTransform(); 1557 Changed = true; 1558 // Note, we are leaving SCEV in an unfortunately imprecise case here 1559 // as rotation tends to reveal information about trip counts not 1560 // previously visible. 1561 continue; 1562 } 1563 } 1564 1565 return Changed; 1566 } 1567 1568 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1569 SmallVector<BasicBlock*, 16> ExitingBlocks; 1570 L->getExitingBlocks(ExitingBlocks); 1571 1572 // Remove all exits which aren't both rewriteable and execute on every 1573 // iteration. 1574 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1575 // If our exitting block exits multiple loops, we can only rewrite the 1576 // innermost one. Otherwise, we're changing how many times the innermost 1577 // loop runs before it exits. 1578 if (LI->getLoopFor(ExitingBB) != L) 1579 return true; 1580 1581 // Can't rewrite non-branch yet. 1582 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1583 if (!BI) 1584 return true; 1585 1586 // Likewise, the loop latch must be dominated by the exiting BB. 1587 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1588 return true; 1589 1590 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1591 // If already constant, nothing to do. However, if this is an 1592 // unconditional exit, we can still replace header phis with their 1593 // preheader value. 1594 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1595 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1596 return true; 1597 } 1598 1599 return false; 1600 }); 1601 1602 if (ExitingBlocks.empty()) 1603 return false; 1604 1605 // Get a symbolic upper bound on the loop backedge taken count. 1606 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1607 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1608 return false; 1609 1610 // Visit our exit blocks in order of dominance. We know from the fact that 1611 // all exits must dominate the latch, so there is a total dominance order 1612 // between them. 1613 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1614 // std::sort sorts in ascending order, so we want the inverse of 1615 // the normal dominance relation. 1616 if (A == B) return false; 1617 if (DT->properlyDominates(A, B)) 1618 return true; 1619 else { 1620 assert(DT->properlyDominates(B, A) && 1621 "expected total dominance order!"); 1622 return false; 1623 } 1624 }); 1625 #ifdef ASSERT 1626 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1627 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1628 } 1629 #endif 1630 1631 bool Changed = false; 1632 bool SkipLastIter = false; 1633 SmallSet<const SCEV*, 8> DominatingExitCounts; 1634 for (BasicBlock *ExitingBB : ExitingBlocks) { 1635 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1636 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1637 // Okay, we do not know the exit count here. Can we at least prove that it 1638 // will remain the same within iteration space? 1639 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1640 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1641 return optimizeLoopExitWithUnknownExitCount( 1642 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1643 Rewriter, DeadInsts); 1644 }; 1645 1646 // TODO: We might have proved that we can skip the last iteration for 1647 // this check. In this case, we only want to check the condition on the 1648 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1649 // corner case: 1650 // 1651 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1652 // 1653 // If we could not prove that len != 0, then we also could not prove that 1654 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1655 // OptimizeCond will likely not prove anything for it, even if it could 1656 // prove the same fact for len. 1657 // 1658 // As a temporary solution, we query both last and pre-last iterations in 1659 // hope that we will be able to prove triviality for at least one of 1660 // them. We can stop querying MaxExitCount for this case once SCEV 1661 // understands that (MaxExitCount - 1) will not overflow here. 1662 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1663 Changed = true; 1664 else if (SkipLastIter) 1665 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1666 Changed = true; 1667 continue; 1668 } 1669 1670 if (MaxExitCount == ExitCount) 1671 // If the loop has more than 1 iteration, all further checks will be 1672 // executed 1 iteration less. 1673 SkipLastIter = true; 1674 1675 // If we know we'd exit on the first iteration, rewrite the exit to 1676 // reflect this. This does not imply the loop must exit through this 1677 // exit; there may be an earlier one taken on the first iteration. 1678 // We know that the backedge can't be taken, so we replace all 1679 // the header PHIs with values coming from the preheader. 1680 if (ExitCount->isZero()) { 1681 foldExit(L, ExitingBB, true, DeadInsts); 1682 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1683 Changed = true; 1684 continue; 1685 } 1686 1687 assert(ExitCount->getType()->isIntegerTy() && 1688 MaxExitCount->getType()->isIntegerTy() && 1689 "Exit counts must be integers"); 1690 1691 Type *WiderType = 1692 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1693 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1694 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1695 assert(MaxExitCount->getType() == ExitCount->getType()); 1696 1697 // Can we prove that some other exit must be taken strictly before this 1698 // one? 1699 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1700 MaxExitCount, ExitCount)) { 1701 foldExit(L, ExitingBB, false, DeadInsts); 1702 Changed = true; 1703 continue; 1704 } 1705 1706 // As we run, keep track of which exit counts we've encountered. If we 1707 // find a duplicate, we've found an exit which would have exited on the 1708 // exiting iteration, but (from the visit order) strictly follows another 1709 // which does the same and is thus dead. 1710 if (!DominatingExitCounts.insert(ExitCount).second) { 1711 foldExit(L, ExitingBB, false, DeadInsts); 1712 Changed = true; 1713 continue; 1714 } 1715 1716 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1717 // here. If we kept track of the min of dominanting exits so far, we could 1718 // discharge exits with EC >= MDEC. This is less powerful than the existing 1719 // transform (since later exits aren't considered), but potentially more 1720 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1721 // or a >u b. Such a case is not currently known. 1722 } 1723 return Changed; 1724 } 1725 1726 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1727 SmallVector<BasicBlock*, 16> ExitingBlocks; 1728 L->getExitingBlocks(ExitingBlocks); 1729 1730 // Finally, see if we can rewrite our exit conditions into a loop invariant 1731 // form. If we have a read-only loop, and we can tell that we must exit down 1732 // a path which does not need any of the values computed within the loop, we 1733 // can rewrite the loop to exit on the first iteration. Note that this 1734 // doesn't either a) tell us the loop exits on the first iteration (unless 1735 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1736 // This transformation looks a lot like a restricted form of dead loop 1737 // elimination, but restricted to read-only loops and without neccesssarily 1738 // needing to kill the loop entirely. 1739 if (!LoopPredication) 1740 return false; 1741 1742 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1743 // through *explicit* control flow. We have to eliminate the possibility of 1744 // implicit exits (see below) before we know it's truly exact. 1745 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1746 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1747 return false; 1748 1749 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1750 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1751 1752 auto BadExit = [&](BasicBlock *ExitingBB) { 1753 // If our exiting block exits multiple loops, we can only rewrite the 1754 // innermost one. Otherwise, we're changing how many times the innermost 1755 // loop runs before it exits. 1756 if (LI->getLoopFor(ExitingBB) != L) 1757 return true; 1758 1759 // Can't rewrite non-branch yet. 1760 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1761 if (!BI) 1762 return true; 1763 1764 // If already constant, nothing to do. 1765 if (isa<Constant>(BI->getCondition())) 1766 return true; 1767 1768 // If the exit block has phis, we need to be able to compute the values 1769 // within the loop which contains them. This assumes trivially lcssa phis 1770 // have already been removed; TODO: generalize 1771 BasicBlock *ExitBlock = 1772 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1773 if (!ExitBlock->phis().empty()) 1774 return true; 1775 1776 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1777 if (isa<SCEVCouldNotCompute>(ExitCount) || 1778 !Rewriter.isSafeToExpand(ExitCount)) 1779 return true; 1780 1781 assert(SE->isLoopInvariant(ExitCount, L) && 1782 "Exit count must be loop invariant"); 1783 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1784 return false; 1785 }; 1786 1787 // If we have any exits which can't be predicated themselves, than we can't 1788 // predicate any exit which isn't guaranteed to execute before it. Consider 1789 // two exits (a) and (b) which would both exit on the same iteration. If we 1790 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1791 // we could convert a loop from exiting through (a) to one exiting through 1792 // (b). Note that this problem exists only for exits with the same exit 1793 // count, and we could be more aggressive when exit counts are known inequal. 1794 llvm::sort(ExitingBlocks, 1795 [&](BasicBlock *A, BasicBlock *B) { 1796 // std::sort sorts in ascending order, so we want the inverse of 1797 // the normal dominance relation, plus a tie breaker for blocks 1798 // unordered by dominance. 1799 if (DT->properlyDominates(A, B)) return true; 1800 if (DT->properlyDominates(B, A)) return false; 1801 return A->getName() < B->getName(); 1802 }); 1803 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1804 // exits before the backedge). If they aren't, reasoning about reachability 1805 // is complicated and we choose not to for now. 1806 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1807 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1808 return false; 1809 1810 // Given our sorted total order, we know that exit[j] must be evaluated 1811 // after all exit[i] such j > i. 1812 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1813 if (BadExit(ExitingBlocks[i])) { 1814 ExitingBlocks.resize(i); 1815 break; 1816 } 1817 1818 if (ExitingBlocks.empty()) 1819 return false; 1820 1821 // We rely on not being able to reach an exiting block on a later iteration 1822 // then it's statically compute exit count. The implementaton of 1823 // getExitCount currently has this invariant, but assert it here so that 1824 // breakage is obvious if this ever changes.. 1825 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1826 return DT->dominates(ExitingBB, L->getLoopLatch()); 1827 })); 1828 1829 // At this point, ExitingBlocks consists of only those blocks which are 1830 // predicatable. Given that, we know we have at least one exit we can 1831 // predicate if the loop is doesn't have side effects and doesn't have any 1832 // implicit exits (because then our exact BTC isn't actually exact). 1833 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1834 // suggestions on how to improve this? I can obviously bail out for outer 1835 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1836 // is that enough for *all* side effects? 1837 for (BasicBlock *BB : L->blocks()) 1838 for (auto &I : *BB) 1839 // TODO:isGuaranteedToTransfer 1840 if (I.mayHaveSideEffects()) 1841 return false; 1842 1843 bool Changed = false; 1844 // Finally, do the actual predication for all predicatable blocks. A couple 1845 // of notes here: 1846 // 1) We don't bother to constant fold dominated exits with identical exit 1847 // counts; that's simply a form of CSE/equality propagation and we leave 1848 // it for dedicated passes. 1849 // 2) We insert the comparison at the branch. Hoisting introduces additional 1850 // legality constraints and we leave that to dedicated logic. We want to 1851 // predicate even if we can't insert a loop invariant expression as 1852 // peeling or unrolling will likely reduce the cost of the otherwise loop 1853 // varying check. 1854 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1855 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1856 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1857 for (BasicBlock *ExitingBB : ExitingBlocks) { 1858 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1859 1860 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1861 Value *NewCond; 1862 if (ExitCount == ExactBTC) { 1863 NewCond = L->contains(BI->getSuccessor(0)) ? 1864 B.getFalse() : B.getTrue(); 1865 } else { 1866 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1867 if (!ExactBTCV) 1868 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1869 Value *RHS = ExactBTCV; 1870 if (ECV->getType() != RHS->getType()) { 1871 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1872 ECV = B.CreateZExt(ECV, WiderTy); 1873 RHS = B.CreateZExt(RHS, WiderTy); 1874 } 1875 auto Pred = L->contains(BI->getSuccessor(0)) ? 1876 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1877 NewCond = B.CreateICmp(Pred, ECV, RHS); 1878 } 1879 Value *OldCond = BI->getCondition(); 1880 BI->setCondition(NewCond); 1881 if (OldCond->use_empty()) 1882 DeadInsts.emplace_back(OldCond); 1883 Changed = true; 1884 } 1885 1886 return Changed; 1887 } 1888 1889 //===----------------------------------------------------------------------===// 1890 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1891 //===----------------------------------------------------------------------===// 1892 1893 bool IndVarSimplify::run(Loop *L) { 1894 // We need (and expect!) the incoming loop to be in LCSSA. 1895 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1896 "LCSSA required to run indvars!"); 1897 1898 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1899 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1900 // canonicalization can be a pessimization without LSR to "clean up" 1901 // afterwards. 1902 // - We depend on having a preheader; in particular, 1903 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1904 // and we're in trouble if we can't find the induction variable even when 1905 // we've manually inserted one. 1906 // - LFTR relies on having a single backedge. 1907 if (!L->isLoopSimplifyForm()) 1908 return false; 1909 1910 #ifndef NDEBUG 1911 // Used below for a consistency check only 1912 // Note: Since the result returned by ScalarEvolution may depend on the order 1913 // in which previous results are added to its cache, the call to 1914 // getBackedgeTakenCount() may change following SCEV queries. 1915 const SCEV *BackedgeTakenCount; 1916 if (VerifyIndvars) 1917 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1918 #endif 1919 1920 bool Changed = false; 1921 // If there are any floating-point recurrences, attempt to 1922 // transform them to use integer recurrences. 1923 Changed |= rewriteNonIntegerIVs(L); 1924 1925 // Create a rewriter object which we'll use to transform the code with. 1926 SCEVExpander Rewriter(*SE, DL, "indvars"); 1927 #ifndef NDEBUG 1928 Rewriter.setDebugType(DEBUG_TYPE); 1929 #endif 1930 1931 // Eliminate redundant IV users. 1932 // 1933 // Simplification works best when run before other consumers of SCEV. We 1934 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1935 // other expressions involving loop IVs have been evaluated. This helps SCEV 1936 // set no-wrap flags before normalizing sign/zero extension. 1937 Rewriter.disableCanonicalMode(); 1938 Changed |= simplifyAndExtend(L, Rewriter, LI); 1939 1940 // Check to see if we can compute the final value of any expressions 1941 // that are recurrent in the loop, and substitute the exit values from the 1942 // loop into any instructions outside of the loop that use the final values 1943 // of the current expressions. 1944 if (ReplaceExitValue != NeverRepl) { 1945 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1946 ReplaceExitValue, DeadInsts)) { 1947 NumReplaced += Rewrites; 1948 Changed = true; 1949 } 1950 } 1951 1952 // Eliminate redundant IV cycles. 1953 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1954 1955 // Try to convert exit conditions to unsigned and rotate computation 1956 // out of the loop. Note: Handles invalidation internally if needed. 1957 Changed |= canonicalizeExitCondition(L); 1958 1959 // Try to eliminate loop exits based on analyzeable exit counts 1960 if (optimizeLoopExits(L, Rewriter)) { 1961 Changed = true; 1962 // Given we've changed exit counts, notify SCEV 1963 // Some nested loops may share same folded exit basic block, 1964 // thus we need to notify top most loop. 1965 SE->forgetTopmostLoop(L); 1966 } 1967 1968 // Try to form loop invariant tests for loop exits by changing how many 1969 // iterations of the loop run when that is unobservable. 1970 if (predicateLoopExits(L, Rewriter)) { 1971 Changed = true; 1972 // Given we've changed exit counts, notify SCEV 1973 SE->forgetLoop(L); 1974 } 1975 1976 // If we have a trip count expression, rewrite the loop's exit condition 1977 // using it. 1978 if (!DisableLFTR) { 1979 BasicBlock *PreHeader = L->getLoopPreheader(); 1980 1981 SmallVector<BasicBlock*, 16> ExitingBlocks; 1982 L->getExitingBlocks(ExitingBlocks); 1983 for (BasicBlock *ExitingBB : ExitingBlocks) { 1984 // Can't rewrite non-branch yet. 1985 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1986 continue; 1987 1988 // If our exitting block exits multiple loops, we can only rewrite the 1989 // innermost one. Otherwise, we're changing how many times the innermost 1990 // loop runs before it exits. 1991 if (LI->getLoopFor(ExitingBB) != L) 1992 continue; 1993 1994 if (!needsLFTR(L, ExitingBB)) 1995 continue; 1996 1997 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1998 if (isa<SCEVCouldNotCompute>(ExitCount)) 1999 continue; 2000 2001 // This was handled above, but as we form SCEVs, we can sometimes refine 2002 // existing ones; this allows exit counts to be folded to zero which 2003 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2004 // until stable to handle cases like this better. 2005 if (ExitCount->isZero()) 2006 continue; 2007 2008 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2009 if (!IndVar) 2010 continue; 2011 2012 // Avoid high cost expansions. Note: This heuristic is questionable in 2013 // that our definition of "high cost" is not exactly principled. 2014 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2015 TTI, PreHeader->getTerminator())) 2016 continue; 2017 2018 // Check preconditions for proper SCEVExpander operation. SCEV does not 2019 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2020 // any pass that uses the SCEVExpander must do it. This does not work 2021 // well for loop passes because SCEVExpander makes assumptions about 2022 // all loops, while LoopPassManager only forces the current loop to be 2023 // simplified. 2024 // 2025 // FIXME: SCEV expansion has no way to bail out, so the caller must 2026 // explicitly check any assumptions made by SCEV. Brittle. 2027 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2028 if (!AR || AR->getLoop()->getLoopPreheader()) 2029 Changed |= linearFunctionTestReplace(L, ExitingBB, 2030 ExitCount, IndVar, 2031 Rewriter); 2032 } 2033 } 2034 // Clear the rewriter cache, because values that are in the rewriter's cache 2035 // can be deleted in the loop below, causing the AssertingVH in the cache to 2036 // trigger. 2037 Rewriter.clear(); 2038 2039 // Now that we're done iterating through lists, clean up any instructions 2040 // which are now dead. 2041 while (!DeadInsts.empty()) { 2042 Value *V = DeadInsts.pop_back_val(); 2043 2044 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2045 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2046 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2047 Changed |= 2048 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2049 } 2050 2051 // The Rewriter may not be used from this point on. 2052 2053 // Loop-invariant instructions in the preheader that aren't used in the 2054 // loop may be sunk below the loop to reduce register pressure. 2055 Changed |= sinkUnusedInvariants(L); 2056 2057 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2058 // trip count and therefore can further simplify exit values in addition to 2059 // rewriteLoopExitValues. 2060 Changed |= rewriteFirstIterationLoopExitValues(L); 2061 2062 // Clean up dead instructions. 2063 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2064 2065 // Check a post-condition. 2066 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2067 "Indvars did not preserve LCSSA!"); 2068 2069 // Verify that LFTR, and any other change have not interfered with SCEV's 2070 // ability to compute trip count. We may have *changed* the exit count, but 2071 // only by reducing it. 2072 #ifndef NDEBUG 2073 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2074 SE->forgetLoop(L); 2075 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2076 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2077 SE->getTypeSizeInBits(NewBECount->getType())) 2078 NewBECount = SE->getTruncateOrNoop(NewBECount, 2079 BackedgeTakenCount->getType()); 2080 else 2081 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2082 NewBECount->getType()); 2083 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2084 NewBECount) && "indvars must preserve SCEV"); 2085 } 2086 if (VerifyMemorySSA && MSSAU) 2087 MSSAU->getMemorySSA()->verifyMemorySSA(); 2088 #endif 2089 2090 return Changed; 2091 } 2092 2093 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2094 LoopStandardAnalysisResults &AR, 2095 LPMUpdater &) { 2096 Function *F = L.getHeader()->getParent(); 2097 const DataLayout &DL = F->getParent()->getDataLayout(); 2098 2099 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2100 WidenIndVars && AllowIVWidening); 2101 if (!IVS.run(&L)) 2102 return PreservedAnalyses::all(); 2103 2104 auto PA = getLoopPassPreservedAnalyses(); 2105 PA.preserveSet<CFGAnalyses>(); 2106 if (AR.MSSA) 2107 PA.preserve<MemorySSAAnalysis>(); 2108 return PA; 2109 } 2110 2111 namespace { 2112 2113 struct IndVarSimplifyLegacyPass : public LoopPass { 2114 static char ID; // Pass identification, replacement for typeid 2115 2116 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2117 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2118 } 2119 2120 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2121 if (skipLoop(L)) 2122 return false; 2123 2124 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2125 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2126 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2127 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2128 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2129 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2130 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2131 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2132 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2133 MemorySSA *MSSA = nullptr; 2134 if (MSSAAnalysis) 2135 MSSA = &MSSAAnalysis->getMSSA(); 2136 2137 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2138 return IVS.run(L); 2139 } 2140 2141 void getAnalysisUsage(AnalysisUsage &AU) const override { 2142 AU.setPreservesCFG(); 2143 AU.addPreserved<MemorySSAWrapperPass>(); 2144 getLoopAnalysisUsage(AU); 2145 } 2146 }; 2147 2148 } // end anonymous namespace 2149 2150 char IndVarSimplifyLegacyPass::ID = 0; 2151 2152 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2153 "Induction Variable Simplification", false, false) 2154 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2155 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2156 "Induction Variable Simplification", false, false) 2157 2158 Pass *llvm::createIndVarSimplifyPass() { 2159 return new IndVarSimplifyLegacyPass(); 2160 } 2161