1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/InitializePasses.h" 68 #include "llvm/Pass.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 77 #include "llvm/Transforms/Utils/Local.h" 78 #include "llvm/Transforms/Utils/LoopUtils.h" 79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 80 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 81 #include <cassert> 82 #include <cstdint> 83 #include <utility> 84 85 using namespace llvm; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 // Trip count verification can be enabled by default under NDEBUG if we 97 // implement a strong expression equivalence checker in SCEV. Until then, we 98 // use the verify-indvars flag, which may assert in some cases. 99 static cl::opt<bool> VerifyIndvars( 100 "verify-indvars", cl::Hidden, 101 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 102 "effect in release builds. (Note: this adds additional SCEV " 103 "queries potentially changing the analysis result)")); 104 105 static cl::opt<ReplaceExitVal> ReplaceExitValue( 106 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 107 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 108 cl::values( 109 clEnumValN(NeverRepl, "never", "never replace exit value"), 110 clEnumValN(OnlyCheapRepl, "cheap", 111 "only replace exit value when the cost is cheap"), 112 clEnumValN( 113 UnusedIndVarInLoop, "unusedindvarinloop", 114 "only replace exit value when it is an unused " 115 "induction variable in the loop and has cheap replacement cost"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 class IndVarSimplify { 141 LoopInfo *LI; 142 ScalarEvolution *SE; 143 DominatorTree *DT; 144 const DataLayout &DL; 145 TargetLibraryInfo *TLI; 146 const TargetTransformInfo *TTI; 147 std::unique_ptr<MemorySSAUpdater> MSSAU; 148 149 SmallVector<WeakTrackingVH, 16> DeadInsts; 150 bool WidenIndVars; 151 152 bool handleFloatingPointIV(Loop *L, PHINode *PH); 153 bool rewriteNonIntegerIVs(Loop *L); 154 155 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 156 /// Try to improve our exit conditions by converting condition from signed 157 /// to unsigned or rotating computation out of the loop. 158 /// (See inline comment about why this is duplicated from simplifyAndExtend) 159 bool canonicalizeExitCondition(Loop *L); 160 /// Try to eliminate loop exits based on analyzeable exit counts 161 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 162 /// Try to form loop invariant tests for loop exits by changing how many 163 /// iterations of the loop run when that is unobservable. 164 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 165 166 bool rewriteFirstIterationLoopExitValues(Loop *L); 167 168 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 169 const SCEV *ExitCount, 170 PHINode *IndVar, SCEVExpander &Rewriter); 171 172 bool sinkUnusedInvariants(Loop *L); 173 174 public: 175 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 176 const DataLayout &DL, TargetLibraryInfo *TLI, 177 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 178 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 179 WidenIndVars(WidenIndVars) { 180 if (MSSA) 181 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 182 } 183 184 bool run(Loop *L); 185 }; 186 187 } // end anonymous namespace 188 189 //===----------------------------------------------------------------------===// 190 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 191 //===----------------------------------------------------------------------===// 192 193 /// Convert APF to an integer, if possible. 194 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 195 bool isExact = false; 196 // See if we can convert this to an int64_t 197 uint64_t UIntVal; 198 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 199 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 200 !isExact) 201 return false; 202 IntVal = UIntVal; 203 return true; 204 } 205 206 /// If the loop has floating induction variable then insert corresponding 207 /// integer induction variable if possible. 208 /// For example, 209 /// for(double i = 0; i < 10000; ++i) 210 /// bar(i) 211 /// is converted into 212 /// for(int i = 0; i < 10000; ++i) 213 /// bar((double)i); 214 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 215 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 216 unsigned BackEdge = IncomingEdge^1; 217 218 // Check incoming value. 219 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 220 221 int64_t InitValue; 222 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 223 return false; 224 225 // Check IV increment. Reject this PN if increment operation is not 226 // an add or increment value can not be represented by an integer. 227 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 228 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 229 230 // If this is not an add of the PHI with a constantfp, or if the constant fp 231 // is not an integer, bail out. 232 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 233 int64_t IncValue; 234 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 235 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 236 return false; 237 238 // Check Incr uses. One user is PN and the other user is an exit condition 239 // used by the conditional terminator. 240 Value::user_iterator IncrUse = Incr->user_begin(); 241 Instruction *U1 = cast<Instruction>(*IncrUse++); 242 if (IncrUse == Incr->user_end()) return false; 243 Instruction *U2 = cast<Instruction>(*IncrUse++); 244 if (IncrUse != Incr->user_end()) return false; 245 246 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 247 // only used by a branch, we can't transform it. 248 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 249 if (!Compare) 250 Compare = dyn_cast<FCmpInst>(U2); 251 if (!Compare || !Compare->hasOneUse() || 252 !isa<BranchInst>(Compare->user_back())) 253 return false; 254 255 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 256 257 // We need to verify that the branch actually controls the iteration count 258 // of the loop. If not, the new IV can overflow and no one will notice. 259 // The branch block must be in the loop and one of the successors must be out 260 // of the loop. 261 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 262 if (!L->contains(TheBr->getParent()) || 263 (L->contains(TheBr->getSuccessor(0)) && 264 L->contains(TheBr->getSuccessor(1)))) 265 return false; 266 267 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 268 // transform it. 269 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 270 int64_t ExitValue; 271 if (ExitValueVal == nullptr || 272 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 273 return false; 274 275 // Find new predicate for integer comparison. 276 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 277 switch (Compare->getPredicate()) { 278 default: return false; // Unknown comparison. 279 case CmpInst::FCMP_OEQ: 280 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 281 case CmpInst::FCMP_ONE: 282 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 283 case CmpInst::FCMP_OGT: 284 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 285 case CmpInst::FCMP_OGE: 286 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 287 case CmpInst::FCMP_OLT: 288 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 289 case CmpInst::FCMP_OLE: 290 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 291 } 292 293 // We convert the floating point induction variable to a signed i32 value if 294 // we can. This is only safe if the comparison will not overflow in a way 295 // that won't be trapped by the integer equivalent operations. Check for this 296 // now. 297 // TODO: We could use i64 if it is native and the range requires it. 298 299 // The start/stride/exit values must all fit in signed i32. 300 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 301 return false; 302 303 // If not actually striding (add x, 0.0), avoid touching the code. 304 if (IncValue == 0) 305 return false; 306 307 // Positive and negative strides have different safety conditions. 308 if (IncValue > 0) { 309 // If we have a positive stride, we require the init to be less than the 310 // exit value. 311 if (InitValue >= ExitValue) 312 return false; 313 314 uint32_t Range = uint32_t(ExitValue-InitValue); 315 // Check for infinite loop, either: 316 // while (i <= Exit) or until (i > Exit) 317 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 318 if (++Range == 0) return false; // Range overflows. 319 } 320 321 unsigned Leftover = Range % uint32_t(IncValue); 322 323 // If this is an equality comparison, we require that the strided value 324 // exactly land on the exit value, otherwise the IV condition will wrap 325 // around and do things the fp IV wouldn't. 326 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 327 Leftover != 0) 328 return false; 329 330 // If the stride would wrap around the i32 before exiting, we can't 331 // transform the IV. 332 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 333 return false; 334 } else { 335 // If we have a negative stride, we require the init to be greater than the 336 // exit value. 337 if (InitValue <= ExitValue) 338 return false; 339 340 uint32_t Range = uint32_t(InitValue-ExitValue); 341 // Check for infinite loop, either: 342 // while (i >= Exit) or until (i < Exit) 343 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 344 if (++Range == 0) return false; // Range overflows. 345 } 346 347 unsigned Leftover = Range % uint32_t(-IncValue); 348 349 // If this is an equality comparison, we require that the strided value 350 // exactly land on the exit value, otherwise the IV condition will wrap 351 // around and do things the fp IV wouldn't. 352 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 353 Leftover != 0) 354 return false; 355 356 // If the stride would wrap around the i32 before exiting, we can't 357 // transform the IV. 358 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 359 return false; 360 } 361 362 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 363 364 // Insert new integer induction variable. 365 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 366 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 367 PN->getIncomingBlock(IncomingEdge)); 368 369 Value *NewAdd = 370 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 371 Incr->getName()+".int", Incr); 372 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 373 374 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 375 ConstantInt::get(Int32Ty, ExitValue), 376 Compare->getName()); 377 378 // In the following deletions, PN may become dead and may be deleted. 379 // Use a WeakTrackingVH to observe whether this happens. 380 WeakTrackingVH WeakPH = PN; 381 382 // Delete the old floating point exit comparison. The branch starts using the 383 // new comparison. 384 NewCompare->takeName(Compare); 385 Compare->replaceAllUsesWith(NewCompare); 386 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 387 388 // Delete the old floating point increment. 389 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 390 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 391 392 // If the FP induction variable still has uses, this is because something else 393 // in the loop uses its value. In order to canonicalize the induction 394 // variable, we chose to eliminate the IV and rewrite it in terms of an 395 // int->fp cast. 396 // 397 // We give preference to sitofp over uitofp because it is faster on most 398 // platforms. 399 if (WeakPH) { 400 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 401 &*PN->getParent()->getFirstInsertionPt()); 402 PN->replaceAllUsesWith(Conv); 403 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 404 } 405 return true; 406 } 407 408 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 409 // First step. Check to see if there are any floating-point recurrences. 410 // If there are, change them into integer recurrences, permitting analysis by 411 // the SCEV routines. 412 BasicBlock *Header = L->getHeader(); 413 414 SmallVector<WeakTrackingVH, 8> PHIs; 415 for (PHINode &PN : Header->phis()) 416 PHIs.push_back(&PN); 417 418 bool Changed = false; 419 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 420 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 421 Changed |= handleFloatingPointIV(L, PN); 422 423 // If the loop previously had floating-point IV, ScalarEvolution 424 // may not have been able to compute a trip count. Now that we've done some 425 // re-writing, the trip count may be computable. 426 if (Changed) 427 SE->forgetLoop(L); 428 return Changed; 429 } 430 431 //===---------------------------------------------------------------------===// 432 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 433 // they will exit at the first iteration. 434 //===---------------------------------------------------------------------===// 435 436 /// Check to see if this loop has loop invariant conditions which lead to loop 437 /// exits. If so, we know that if the exit path is taken, it is at the first 438 /// loop iteration. This lets us predict exit values of PHI nodes that live in 439 /// loop header. 440 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 441 // Verify the input to the pass is already in LCSSA form. 442 assert(L->isLCSSAForm(*DT)); 443 444 SmallVector<BasicBlock *, 8> ExitBlocks; 445 L->getUniqueExitBlocks(ExitBlocks); 446 447 bool MadeAnyChanges = false; 448 for (auto *ExitBB : ExitBlocks) { 449 // If there are no more PHI nodes in this exit block, then no more 450 // values defined inside the loop are used on this path. 451 for (PHINode &PN : ExitBB->phis()) { 452 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 453 IncomingValIdx != E; ++IncomingValIdx) { 454 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 455 456 // Can we prove that the exit must run on the first iteration if it 457 // runs at all? (i.e. early exits are fine for our purposes, but 458 // traces which lead to this exit being taken on the 2nd iteration 459 // aren't.) Note that this is about whether the exit branch is 460 // executed, not about whether it is taken. 461 if (!L->getLoopLatch() || 462 !DT->dominates(IncomingBB, L->getLoopLatch())) 463 continue; 464 465 // Get condition that leads to the exit path. 466 auto *TermInst = IncomingBB->getTerminator(); 467 468 Value *Cond = nullptr; 469 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 470 // Must be a conditional branch, otherwise the block 471 // should not be in the loop. 472 Cond = BI->getCondition(); 473 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 474 Cond = SI->getCondition(); 475 else 476 continue; 477 478 if (!L->isLoopInvariant(Cond)) 479 continue; 480 481 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 482 483 // Only deal with PHIs in the loop header. 484 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 485 continue; 486 487 // If ExitVal is a PHI on the loop header, then we know its 488 // value along this exit because the exit can only be taken 489 // on the first iteration. 490 auto *LoopPreheader = L->getLoopPreheader(); 491 assert(LoopPreheader && "Invalid loop"); 492 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 493 if (PreheaderIdx != -1) { 494 assert(ExitVal->getParent() == L->getHeader() && 495 "ExitVal must be in loop header"); 496 MadeAnyChanges = true; 497 PN.setIncomingValue(IncomingValIdx, 498 ExitVal->getIncomingValue(PreheaderIdx)); 499 SE->forgetValue(&PN); 500 } 501 } 502 } 503 } 504 return MadeAnyChanges; 505 } 506 507 //===----------------------------------------------------------------------===// 508 // IV Widening - Extend the width of an IV to cover its widest uses. 509 //===----------------------------------------------------------------------===// 510 511 /// Update information about the induction variable that is extended by this 512 /// sign or zero extend operation. This is used to determine the final width of 513 /// the IV before actually widening it. 514 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 515 ScalarEvolution *SE, 516 const TargetTransformInfo *TTI) { 517 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 518 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 519 return; 520 521 Type *Ty = Cast->getType(); 522 uint64_t Width = SE->getTypeSizeInBits(Ty); 523 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 524 return; 525 526 // Check that `Cast` actually extends the induction variable (we rely on this 527 // later). This takes care of cases where `Cast` is extending a truncation of 528 // the narrow induction variable, and thus can end up being narrower than the 529 // "narrow" induction variable. 530 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 531 if (NarrowIVWidth >= Width) 532 return; 533 534 // Cast is either an sext or zext up to this point. 535 // We should not widen an indvar if arithmetics on the wider indvar are more 536 // expensive than those on the narrower indvar. We check only the cost of ADD 537 // because at least an ADD is required to increment the induction variable. We 538 // could compute more comprehensively the cost of all instructions on the 539 // induction variable when necessary. 540 if (TTI && 541 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 542 TTI->getArithmeticInstrCost(Instruction::Add, 543 Cast->getOperand(0)->getType())) { 544 return; 545 } 546 547 if (!WI.WidestNativeType || 548 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 549 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 550 WI.IsSigned = IsSigned; 551 return; 552 } 553 554 // We extend the IV to satisfy the sign of its user(s), or 'signed' 555 // if there are multiple users with both sign- and zero extensions, 556 // in order not to introduce nondeterministic behaviour based on the 557 // unspecified order of a PHI nodes' users-iterator. 558 WI.IsSigned |= IsSigned; 559 } 560 561 //===----------------------------------------------------------------------===// 562 // Live IV Reduction - Minimize IVs live across the loop. 563 //===----------------------------------------------------------------------===// 564 565 //===----------------------------------------------------------------------===// 566 // Simplification of IV users based on SCEV evaluation. 567 //===----------------------------------------------------------------------===// 568 569 namespace { 570 571 class IndVarSimplifyVisitor : public IVVisitor { 572 ScalarEvolution *SE; 573 const TargetTransformInfo *TTI; 574 PHINode *IVPhi; 575 576 public: 577 WideIVInfo WI; 578 579 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 580 const TargetTransformInfo *TTI, 581 const DominatorTree *DTree) 582 : SE(SCEV), TTI(TTI), IVPhi(IV) { 583 DT = DTree; 584 WI.NarrowIV = IVPhi; 585 } 586 587 // Implement the interface used by simplifyUsersOfIV. 588 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 589 }; 590 591 } // end anonymous namespace 592 593 /// Iteratively perform simplification on a worklist of IV users. Each 594 /// successive simplification may push more users which may themselves be 595 /// candidates for simplification. 596 /// 597 /// Sign/Zero extend elimination is interleaved with IV simplification. 598 bool IndVarSimplify::simplifyAndExtend(Loop *L, 599 SCEVExpander &Rewriter, 600 LoopInfo *LI) { 601 SmallVector<WideIVInfo, 8> WideIVs; 602 603 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 604 Intrinsic::getName(Intrinsic::experimental_guard)); 605 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 606 607 SmallVector<PHINode *, 8> LoopPhis; 608 for (PHINode &PN : L->getHeader()->phis()) 609 LoopPhis.push_back(&PN); 610 611 // Each round of simplification iterates through the SimplifyIVUsers worklist 612 // for all current phis, then determines whether any IVs can be 613 // widened. Widening adds new phis to LoopPhis, inducing another round of 614 // simplification on the wide IVs. 615 bool Changed = false; 616 while (!LoopPhis.empty()) { 617 // Evaluate as many IV expressions as possible before widening any IVs. This 618 // forces SCEV to set no-wrap flags before evaluating sign/zero 619 // extension. The first time SCEV attempts to normalize sign/zero extension, 620 // the result becomes final. So for the most predictable results, we delay 621 // evaluation of sign/zero extend evaluation until needed, and avoid running 622 // other SCEV based analysis prior to simplifyAndExtend. 623 do { 624 PHINode *CurrIV = LoopPhis.pop_back_val(); 625 626 // Information about sign/zero extensions of CurrIV. 627 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 628 629 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 630 &Visitor); 631 632 if (Visitor.WI.WidestNativeType) { 633 WideIVs.push_back(Visitor.WI); 634 } 635 } while(!LoopPhis.empty()); 636 637 // Continue if we disallowed widening. 638 if (!WidenIndVars) 639 continue; 640 641 for (; !WideIVs.empty(); WideIVs.pop_back()) { 642 unsigned ElimExt; 643 unsigned Widened; 644 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 645 DT, DeadInsts, ElimExt, Widened, 646 HasGuards, UsePostIncrementRanges)) { 647 NumElimExt += ElimExt; 648 NumWidened += Widened; 649 Changed = true; 650 LoopPhis.push_back(WidePhi); 651 } 652 } 653 } 654 return Changed; 655 } 656 657 //===----------------------------------------------------------------------===// 658 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 659 //===----------------------------------------------------------------------===// 660 661 /// Given an Value which is hoped to be part of an add recurance in the given 662 /// loop, return the associated Phi node if so. Otherwise, return null. Note 663 /// that this is less general than SCEVs AddRec checking. 664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 665 Instruction *IncI = dyn_cast<Instruction>(IncV); 666 if (!IncI) 667 return nullptr; 668 669 switch (IncI->getOpcode()) { 670 case Instruction::Add: 671 case Instruction::Sub: 672 break; 673 case Instruction::GetElementPtr: 674 // An IV counter must preserve its type. 675 if (IncI->getNumOperands() == 2) 676 break; 677 [[fallthrough]]; 678 default: 679 return nullptr; 680 } 681 682 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 683 if (Phi && Phi->getParent() == L->getHeader()) { 684 if (L->isLoopInvariant(IncI->getOperand(1))) 685 return Phi; 686 return nullptr; 687 } 688 if (IncI->getOpcode() == Instruction::GetElementPtr) 689 return nullptr; 690 691 // Allow add/sub to be commuted. 692 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 693 if (Phi && Phi->getParent() == L->getHeader()) { 694 if (L->isLoopInvariant(IncI->getOperand(0))) 695 return Phi; 696 } 697 return nullptr; 698 } 699 700 /// Whether the current loop exit test is based on this value. Currently this 701 /// is limited to a direct use in the loop condition. 702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 703 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 704 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 705 // TODO: Allow non-icmp loop test. 706 if (!ICmp) 707 return false; 708 709 // TODO: Allow indirect use. 710 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 711 } 712 713 /// linearFunctionTestReplace policy. Return true unless we can show that the 714 /// current exit test is already sufficiently canonical. 715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 716 assert(L->getLoopLatch() && "Must be in simplified form"); 717 718 // Avoid converting a constant or loop invariant test back to a runtime 719 // test. This is critical for when SCEV's cached ExitCount is less precise 720 // than the current IR (such as after we've proven a particular exit is 721 // actually dead and thus the BE count never reaches our ExitCount.) 722 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 723 if (L->isLoopInvariant(BI->getCondition())) 724 return false; 725 726 // Do LFTR to simplify the exit condition to an ICMP. 727 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 728 if (!Cond) 729 return true; 730 731 // Do LFTR to simplify the exit ICMP to EQ/NE 732 ICmpInst::Predicate Pred = Cond->getPredicate(); 733 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 734 return true; 735 736 // Look for a loop invariant RHS 737 Value *LHS = Cond->getOperand(0); 738 Value *RHS = Cond->getOperand(1); 739 if (!L->isLoopInvariant(RHS)) { 740 if (!L->isLoopInvariant(LHS)) 741 return true; 742 std::swap(LHS, RHS); 743 } 744 // Look for a simple IV counter LHS 745 PHINode *Phi = dyn_cast<PHINode>(LHS); 746 if (!Phi) 747 Phi = getLoopPhiForCounter(LHS, L); 748 749 if (!Phi) 750 return true; 751 752 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 753 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 754 if (Idx < 0) 755 return true; 756 757 // Do LFTR if the exit condition's IV is *not* a simple counter. 758 Value *IncV = Phi->getIncomingValue(Idx); 759 return Phi != getLoopPhiForCounter(IncV, L); 760 } 761 762 /// Return true if undefined behavior would provable be executed on the path to 763 /// OnPathTo if Root produced a posion result. Note that this doesn't say 764 /// anything about whether OnPathTo is actually executed or whether Root is 765 /// actually poison. This can be used to assess whether a new use of Root can 766 /// be added at a location which is control equivalent with OnPathTo (such as 767 /// immediately before it) without introducing UB which didn't previously 768 /// exist. Note that a false result conveys no information. 769 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 770 Instruction *OnPathTo, 771 DominatorTree *DT) { 772 // Basic approach is to assume Root is poison, propagate poison forward 773 // through all users we can easily track, and then check whether any of those 774 // users are provable UB and must execute before out exiting block might 775 // exit. 776 777 // The set of all recursive users we've visited (which are assumed to all be 778 // poison because of said visit) 779 SmallSet<const Value *, 16> KnownPoison; 780 SmallVector<const Instruction*, 16> Worklist; 781 Worklist.push_back(Root); 782 while (!Worklist.empty()) { 783 const Instruction *I = Worklist.pop_back_val(); 784 785 // If we know this must trigger UB on a path leading our target. 786 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 787 return true; 788 789 // If we can't analyze propagation through this instruction, just skip it 790 // and transitive users. Safe as false is a conservative result. 791 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 792 continue; 793 794 if (KnownPoison.insert(I).second) 795 for (const User *User : I->users()) 796 Worklist.push_back(cast<Instruction>(User)); 797 } 798 799 // Might be non-UB, or might have a path we couldn't prove must execute on 800 // way to exiting bb. 801 return false; 802 } 803 804 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 805 /// down to checking that all operands are constant and listing instructions 806 /// that may hide undef. 807 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 808 unsigned Depth) { 809 if (isa<Constant>(V)) 810 return !isa<UndefValue>(V); 811 812 if (Depth >= 6) 813 return false; 814 815 // Conservatively handle non-constant non-instructions. For example, Arguments 816 // may be undef. 817 Instruction *I = dyn_cast<Instruction>(V); 818 if (!I) 819 return false; 820 821 // Load and return values may be undef. 822 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 823 return false; 824 825 // Optimistically handle other instructions. 826 for (Value *Op : I->operands()) { 827 if (!Visited.insert(Op).second) 828 continue; 829 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 830 return false; 831 } 832 return true; 833 } 834 835 /// Return true if the given value is concrete. We must prove that undef can 836 /// never reach it. 837 /// 838 /// TODO: If we decide that this is a good approach to checking for undef, we 839 /// may factor it into a common location. 840 static bool hasConcreteDef(Value *V) { 841 SmallPtrSet<Value*, 8> Visited; 842 Visited.insert(V); 843 return hasConcreteDefImpl(V, Visited, 0); 844 } 845 846 /// Return true if this IV has any uses other than the (soon to be rewritten) 847 /// loop exit test. 848 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 849 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 850 Value *IncV = Phi->getIncomingValue(LatchIdx); 851 852 for (User *U : Phi->users()) 853 if (U != Cond && U != IncV) return false; 854 855 for (User *U : IncV->users()) 856 if (U != Cond && U != Phi) return false; 857 return true; 858 } 859 860 /// Return true if the given phi is a "counter" in L. A counter is an 861 /// add recurance (of integer or pointer type) with an arbitrary start, and a 862 /// step of 1. Note that L must have exactly one latch. 863 static bool isLoopCounter(PHINode* Phi, Loop *L, 864 ScalarEvolution *SE) { 865 assert(Phi->getParent() == L->getHeader()); 866 assert(L->getLoopLatch()); 867 868 if (!SE->isSCEVable(Phi->getType())) 869 return false; 870 871 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 872 if (!AR || AR->getLoop() != L || !AR->isAffine()) 873 return false; 874 875 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 876 if (!Step || !Step->isOne()) 877 return false; 878 879 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 880 Value *IncV = Phi->getIncomingValue(LatchIdx); 881 return (getLoopPhiForCounter(IncV, L) == Phi && 882 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 883 } 884 885 /// Search the loop header for a loop counter (anadd rec w/step of one) 886 /// suitable for use by LFTR. If multiple counters are available, select the 887 /// "best" one based profitable heuristics. 888 /// 889 /// BECount may be an i8* pointer type. The pointer difference is already 890 /// valid count without scaling the address stride, so it remains a pointer 891 /// expression as far as SCEV is concerned. 892 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 893 const SCEV *BECount, 894 ScalarEvolution *SE, DominatorTree *DT) { 895 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 896 897 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 898 899 // Loop over all of the PHI nodes, looking for a simple counter. 900 PHINode *BestPhi = nullptr; 901 const SCEV *BestInit = nullptr; 902 BasicBlock *LatchBlock = L->getLoopLatch(); 903 assert(LatchBlock && "Must be in simplified form"); 904 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 905 906 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 907 PHINode *Phi = cast<PHINode>(I); 908 if (!isLoopCounter(Phi, L, SE)) 909 continue; 910 911 // Avoid comparing an integer IV against a pointer Limit. 912 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 913 continue; 914 915 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 916 917 // AR may be a pointer type, while BECount is an integer type. 918 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 919 // AR may not be a narrower type, or we may never exit. 920 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 921 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 922 continue; 923 924 // Avoid reusing a potentially undef value to compute other values that may 925 // have originally had a concrete definition. 926 if (!hasConcreteDef(Phi)) { 927 // We explicitly allow unknown phis as long as they are already used by 928 // the loop exit test. This is legal since performing LFTR could not 929 // increase the number of undef users. 930 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 931 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 932 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 933 continue; 934 } 935 936 // Avoid introducing undefined behavior due to poison which didn't exist in 937 // the original program. (Annoyingly, the rules for poison and undef 938 // propagation are distinct, so this does NOT cover the undef case above.) 939 // We have to ensure that we don't introduce UB by introducing a use on an 940 // iteration where said IV produces poison. Our strategy here differs for 941 // pointers and integer IVs. For integers, we strip and reinfer as needed, 942 // see code in linearFunctionTestReplace. For pointers, we restrict 943 // transforms as there is no good way to reinfer inbounds once lost. 944 if (!Phi->getType()->isIntegerTy() && 945 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 946 continue; 947 948 const SCEV *Init = AR->getStart(); 949 950 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 951 // Don't force a live loop counter if another IV can be used. 952 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 953 continue; 954 955 // Prefer to count-from-zero. This is a more "canonical" counter form. It 956 // also prefers integer to pointer IVs. 957 if (BestInit->isZero() != Init->isZero()) { 958 if (BestInit->isZero()) 959 continue; 960 } 961 // If two IVs both count from zero or both count from nonzero then the 962 // narrower is likely a dead phi that has been widened. Use the wider phi 963 // to allow the other to be eliminated. 964 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 965 continue; 966 } 967 BestPhi = Phi; 968 BestInit = Init; 969 } 970 return BestPhi; 971 } 972 973 /// Insert an IR expression which computes the value held by the IV IndVar 974 /// (which must be an loop counter w/unit stride) after the backedge of loop L 975 /// is taken ExitCount times. 976 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 977 const SCEV *ExitCount, bool UsePostInc, Loop *L, 978 SCEVExpander &Rewriter, ScalarEvolution *SE) { 979 assert(isLoopCounter(IndVar, L, SE)); 980 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 981 const SCEV *IVInit = AR->getStart(); 982 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 983 984 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 985 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 986 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 987 // the existing GEPs whenever possible. 988 if (IndVar->getType()->isPointerTy() && 989 !ExitCount->getType()->isPointerTy()) { 990 // IVOffset will be the new GEP offset that is interpreted by GEP as a 991 // signed value. ExitCount on the other hand represents the loop trip count, 992 // which is an unsigned value. FindLoopCounter only allows induction 993 // variables that have a positive unit stride of one. This means we don't 994 // have to handle the case of negative offsets (yet) and just need to zero 995 // extend ExitCount. 996 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 997 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 998 if (UsePostInc) 999 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1000 1001 // Expand the code for the iteration count. 1002 assert(SE->isLoopInvariant(IVOffset, L) && 1003 "Computed iteration count is not loop invariant!"); 1004 1005 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1006 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1007 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1008 } else { 1009 // In any other case, convert both IVInit and ExitCount to integers before 1010 // comparing. This may result in SCEV expansion of pointers, but in practice 1011 // SCEV will fold the pointer arithmetic away as such: 1012 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1013 // 1014 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1015 // for simple memset-style loops. 1016 // 1017 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1018 // were generated on top of case #2, which is not expected. 1019 1020 // For unit stride, IVCount = Start + ExitCount with 2's complement 1021 // overflow. 1022 1023 // For integer IVs, truncate the IV before computing IVInit + BECount, 1024 // unless we know apriori that the limit must be a constant when evaluated 1025 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1026 // of the IV in the loop over a (potentially) expensive expansion of the 1027 // widened exit count add(zext(add)) expression. 1028 if (SE->getTypeSizeInBits(IVInit->getType()) 1029 > SE->getTypeSizeInBits(ExitCount->getType())) { 1030 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1031 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1032 else 1033 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1034 } 1035 1036 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1037 1038 if (UsePostInc) 1039 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1040 1041 // Expand the code for the iteration count. 1042 assert(SE->isLoopInvariant(IVLimit, L) && 1043 "Computed iteration count is not loop invariant!"); 1044 // Ensure that we generate the same type as IndVar, or a smaller integer 1045 // type. In the presence of null pointer values, we have an integer type 1046 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1047 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1048 IndVar->getType() : ExitCount->getType(); 1049 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1050 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1051 } 1052 } 1053 1054 /// This method rewrites the exit condition of the loop to be a canonical != 1055 /// comparison against the incremented loop induction variable. This pass is 1056 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1057 /// determine a loop-invariant trip count of the loop, which is actually a much 1058 /// broader range than just linear tests. 1059 bool IndVarSimplify:: 1060 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1061 const SCEV *ExitCount, 1062 PHINode *IndVar, SCEVExpander &Rewriter) { 1063 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1064 assert(isLoopCounter(IndVar, L, SE)); 1065 Instruction * const IncVar = 1066 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1067 1068 // Initialize CmpIndVar to the preincremented IV. 1069 Value *CmpIndVar = IndVar; 1070 bool UsePostInc = false; 1071 1072 // If the exiting block is the same as the backedge block, we prefer to 1073 // compare against the post-incremented value, otherwise we must compare 1074 // against the preincremented value. 1075 if (ExitingBB == L->getLoopLatch()) { 1076 // For pointer IVs, we chose to not strip inbounds which requires us not 1077 // to add a potentially UB introducing use. We need to either a) show 1078 // the loop test we're modifying is already in post-inc form, or b) show 1079 // that adding a use must not introduce UB. 1080 bool SafeToPostInc = 1081 IndVar->getType()->isIntegerTy() || 1082 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1083 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1084 if (SafeToPostInc) { 1085 UsePostInc = true; 1086 CmpIndVar = IncVar; 1087 } 1088 } 1089 1090 // It may be necessary to drop nowrap flags on the incrementing instruction 1091 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1092 // case the increment might have previously been poison on the last iteration 1093 // only) or if LFTR switches to a different IV that was previously dynamically 1094 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1095 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1096 // check), because the pre-inc addrec flags may be adopted from the original 1097 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1098 // TODO: This handling is inaccurate for one case: If we switch to a 1099 // dynamically dead IV that wraps on the first loop iteration only, which is 1100 // not covered by the post-inc addrec. (If the new IV was not dynamically 1101 // dead, it could not be poison on the first iteration in the first place.) 1102 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1103 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1104 if (BO->hasNoUnsignedWrap()) 1105 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1106 if (BO->hasNoSignedWrap()) 1107 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1108 } 1109 1110 Value *ExitCnt = genLoopLimit( 1111 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1112 assert(ExitCnt->getType()->isPointerTy() == 1113 IndVar->getType()->isPointerTy() && 1114 "genLoopLimit missed a cast"); 1115 1116 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1117 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1118 ICmpInst::Predicate P; 1119 if (L->contains(BI->getSuccessor(0))) 1120 P = ICmpInst::ICMP_NE; 1121 else 1122 P = ICmpInst::ICMP_EQ; 1123 1124 IRBuilder<> Builder(BI); 1125 1126 // The new loop exit condition should reuse the debug location of the 1127 // original loop exit condition. 1128 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1129 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1130 1131 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1132 // avoid the expensive expansion of the limit expression in the wider type, 1133 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1134 // since we know (from the exit count bitwidth), that we can't self-wrap in 1135 // the narrower type. 1136 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1137 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1138 if (CmpIndVarSize > ExitCntSize) { 1139 assert(!CmpIndVar->getType()->isPointerTy() && 1140 !ExitCnt->getType()->isPointerTy()); 1141 1142 // Before resorting to actually inserting the truncate, use the same 1143 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1144 // the other side of the comparison instead. We still evaluate the limit 1145 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1146 // a truncate within in. 1147 bool Extended = false; 1148 const SCEV *IV = SE->getSCEV(CmpIndVar); 1149 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1150 ExitCnt->getType()); 1151 const SCEV *ZExtTrunc = 1152 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1153 1154 if (ZExtTrunc == IV) { 1155 Extended = true; 1156 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1157 "wide.trip.count"); 1158 } else { 1159 const SCEV *SExtTrunc = 1160 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1161 if (SExtTrunc == IV) { 1162 Extended = true; 1163 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1164 "wide.trip.count"); 1165 } 1166 } 1167 1168 if (Extended) { 1169 bool Discard; 1170 L->makeLoopInvariant(ExitCnt, Discard); 1171 } else 1172 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1173 "lftr.wideiv"); 1174 } 1175 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1176 << " LHS:" << *CmpIndVar << '\n' 1177 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1178 << "\n" 1179 << " RHS:\t" << *ExitCnt << "\n" 1180 << "ExitCount:\t" << *ExitCount << "\n" 1181 << " was: " << *BI->getCondition() << "\n"); 1182 1183 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1184 Value *OrigCond = BI->getCondition(); 1185 // It's tempting to use replaceAllUsesWith here to fully replace the old 1186 // comparison, but that's not immediately safe, since users of the old 1187 // comparison may not be dominated by the new comparison. Instead, just 1188 // update the branch to use the new comparison; in the common case this 1189 // will make old comparison dead. 1190 BI->setCondition(Cond); 1191 DeadInsts.emplace_back(OrigCond); 1192 1193 ++NumLFTR; 1194 return true; 1195 } 1196 1197 //===----------------------------------------------------------------------===// 1198 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1199 //===----------------------------------------------------------------------===// 1200 1201 /// If there's a single exit block, sink any loop-invariant values that 1202 /// were defined in the preheader but not used inside the loop into the 1203 /// exit block to reduce register pressure in the loop. 1204 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1205 BasicBlock *ExitBlock = L->getExitBlock(); 1206 if (!ExitBlock) return false; 1207 1208 BasicBlock *Preheader = L->getLoopPreheader(); 1209 if (!Preheader) return false; 1210 1211 bool MadeAnyChanges = false; 1212 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1213 BasicBlock::iterator I(Preheader->getTerminator()); 1214 while (I != Preheader->begin()) { 1215 --I; 1216 // New instructions were inserted at the end of the preheader. 1217 if (isa<PHINode>(I)) 1218 break; 1219 1220 // Don't move instructions which might have side effects, since the side 1221 // effects need to complete before instructions inside the loop. Also don't 1222 // move instructions which might read memory, since the loop may modify 1223 // memory. Note that it's okay if the instruction might have undefined 1224 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1225 // block. 1226 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1227 continue; 1228 1229 // Skip debug info intrinsics. 1230 if (isa<DbgInfoIntrinsic>(I)) 1231 continue; 1232 1233 // Skip eh pad instructions. 1234 if (I->isEHPad()) 1235 continue; 1236 1237 // Don't sink alloca: we never want to sink static alloca's out of the 1238 // entry block, and correctly sinking dynamic alloca's requires 1239 // checks for stacksave/stackrestore intrinsics. 1240 // FIXME: Refactor this check somehow? 1241 if (isa<AllocaInst>(I)) 1242 continue; 1243 1244 // Determine if there is a use in or before the loop (direct or 1245 // otherwise). 1246 bool UsedInLoop = false; 1247 for (Use &U : I->uses()) { 1248 Instruction *User = cast<Instruction>(U.getUser()); 1249 BasicBlock *UseBB = User->getParent(); 1250 if (PHINode *P = dyn_cast<PHINode>(User)) { 1251 unsigned i = 1252 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1253 UseBB = P->getIncomingBlock(i); 1254 } 1255 if (UseBB == Preheader || L->contains(UseBB)) { 1256 UsedInLoop = true; 1257 break; 1258 } 1259 } 1260 1261 // If there is, the def must remain in the preheader. 1262 if (UsedInLoop) 1263 continue; 1264 1265 // Otherwise, sink it to the exit block. 1266 Instruction *ToMove = &*I; 1267 bool Done = false; 1268 1269 if (I != Preheader->begin()) { 1270 // Skip debug info intrinsics. 1271 do { 1272 --I; 1273 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1274 1275 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1276 Done = true; 1277 } else { 1278 Done = true; 1279 } 1280 1281 MadeAnyChanges = true; 1282 ToMove->moveBefore(*ExitBlock, InsertPt); 1283 SE->forgetValue(ToMove); 1284 if (Done) break; 1285 InsertPt = ToMove->getIterator(); 1286 } 1287 1288 return MadeAnyChanges; 1289 } 1290 1291 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1292 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1293 auto *OldCond = BI->getCondition(); 1294 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1295 << " with " << *NewCond << "\n"); 1296 BI->setCondition(NewCond); 1297 if (OldCond->use_empty()) 1298 DeadInsts.emplace_back(OldCond); 1299 } 1300 1301 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1302 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1303 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1304 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1305 auto *OldCond = BI->getCondition(); 1306 auto *NewCond = 1307 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1308 replaceExitCond(BI, NewCond, DeadInsts); 1309 } 1310 1311 static void replaceLoopPHINodesWithPreheaderValues( 1312 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1313 ScalarEvolution &SE) { 1314 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1315 auto *LoopPreheader = L->getLoopPreheader(); 1316 auto *LoopHeader = L->getHeader(); 1317 SmallVector<Instruction *> Worklist; 1318 for (auto &PN : LoopHeader->phis()) { 1319 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1320 for (User *U : PN.users()) 1321 Worklist.push_back(cast<Instruction>(U)); 1322 SE.forgetValue(&PN); 1323 PN.replaceAllUsesWith(PreheaderIncoming); 1324 DeadInsts.emplace_back(&PN); 1325 } 1326 1327 // Replacing with the preheader value will often allow IV users to simplify 1328 // (especially if the preheader value is a constant). 1329 SmallPtrSet<Instruction *, 16> Visited; 1330 while (!Worklist.empty()) { 1331 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1332 if (!Visited.insert(I).second) 1333 continue; 1334 1335 // Don't simplify instructions outside the loop. 1336 if (!L->contains(I)) 1337 continue; 1338 1339 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1340 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1341 for (User *U : I->users()) 1342 Worklist.push_back(cast<Instruction>(U)); 1343 I->replaceAllUsesWith(Res); 1344 DeadInsts.emplace_back(I); 1345 } 1346 } 1347 } 1348 1349 static void replaceWithInvariantCond( 1350 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1351 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1352 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1353 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1354 Rewriter.setInsertPoint(BI); 1355 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1356 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1357 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1358 if (ExitIfTrue) 1359 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1360 IRBuilder<> Builder(BI); 1361 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1362 BI->getCondition()->getName()); 1363 replaceExitCond(BI, NewCond, DeadInsts); 1364 } 1365 1366 static bool optimizeLoopExitWithUnknownExitCount( 1367 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1368 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1369 ScalarEvolution *SE, SCEVExpander &Rewriter, 1370 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1371 ICmpInst::Predicate Pred; 1372 Value *LHS, *RHS; 1373 BasicBlock *TrueSucc, *FalseSucc; 1374 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1375 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1376 return false; 1377 1378 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1379 "Not a loop exit!"); 1380 1381 // 'LHS pred RHS' should now mean that we stay in loop. 1382 if (L->contains(FalseSucc)) 1383 Pred = CmpInst::getInversePredicate(Pred); 1384 1385 // If we are proving loop exit, invert the predicate. 1386 if (Inverted) 1387 Pred = CmpInst::getInversePredicate(Pred); 1388 1389 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1390 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1391 // Can we prove it to be trivially true? 1392 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1393 foldExit(L, ExitingBB, Inverted, DeadInsts); 1394 return true; 1395 } 1396 // Further logic works for non-inverted condition only. 1397 if (Inverted) 1398 return false; 1399 1400 auto *ARTy = LHSS->getType(); 1401 auto *MaxIterTy = MaxIter->getType(); 1402 // If possible, adjust types. 1403 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1404 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1405 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1406 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1407 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1408 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1409 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1410 } 1411 1412 if (SkipLastIter) { 1413 const SCEV *One = SE->getOne(MaxIter->getType()); 1414 MaxIter = SE->getMinusSCEV(MaxIter, One); 1415 } 1416 1417 // Check if there is a loop-invariant predicate equivalent to our check. 1418 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1419 L, BI, MaxIter); 1420 if (!LIP) 1421 return false; 1422 1423 // Can we prove it to be trivially true? 1424 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1425 foldExit(L, ExitingBB, Inverted, DeadInsts); 1426 else 1427 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1428 Rewriter, DeadInsts); 1429 1430 return true; 1431 } 1432 1433 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1434 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1435 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1436 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1437 // it already has flags. The alternative to this would be to extending the 1438 // set of "interesting" IV users to include the icmp, but doing that 1439 // regresses results in practice by querying SCEVs before trip counts which 1440 // rely on them which results in SCEV caching sub-optimal answers. The 1441 // concern about caching sub-optimal results is why we only query SCEVs of 1442 // the loop invariant RHS here. 1443 SmallVector<BasicBlock*, 16> ExitingBlocks; 1444 L->getExitingBlocks(ExitingBlocks); 1445 bool Changed = false; 1446 for (auto *ExitingBB : ExitingBlocks) { 1447 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1448 if (!BI) 1449 continue; 1450 assert(BI->isConditional() && "exit branch must be conditional"); 1451 1452 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1453 if (!ICmp || !ICmp->hasOneUse()) 1454 continue; 1455 1456 auto *LHS = ICmp->getOperand(0); 1457 auto *RHS = ICmp->getOperand(1); 1458 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1459 // poisoning cache with sub-optimal results. For the must-execute case, 1460 // this is a neccessary precondition for correctness. 1461 if (!L->isLoopInvariant(RHS)) { 1462 if (!L->isLoopInvariant(LHS)) 1463 continue; 1464 // Same logic applies for the inverse case 1465 std::swap(LHS, RHS); 1466 } 1467 1468 // Match (icmp signed-cond zext, RHS) 1469 Value *LHSOp = nullptr; 1470 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1471 continue; 1472 1473 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1474 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1475 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1476 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1477 FullCR = FullCR.zeroExtend(OuterBitWidth); 1478 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1479 if (FullCR.contains(RHSCR)) { 1480 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1481 // replace the signed condition with the unsigned version. 1482 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1483 Changed = true; 1484 // Note: No SCEV invalidation needed. We've changed the predicate, but 1485 // have not changed exit counts, or the values produced by the compare. 1486 continue; 1487 } 1488 } 1489 1490 // Now that we've canonicalized the condition to match the extend, 1491 // see if we can rotate the extend out of the loop. 1492 for (auto *ExitingBB : ExitingBlocks) { 1493 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1494 if (!BI) 1495 continue; 1496 assert(BI->isConditional() && "exit branch must be conditional"); 1497 1498 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1499 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1500 continue; 1501 1502 bool Swapped = false; 1503 auto *LHS = ICmp->getOperand(0); 1504 auto *RHS = ICmp->getOperand(1); 1505 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1506 // Nothing to rotate 1507 continue; 1508 if (L->isLoopInvariant(LHS)) { 1509 // Same logic applies for the inverse case until we actually pick 1510 // which operand of the compare to update. 1511 Swapped = true; 1512 std::swap(LHS, RHS); 1513 } 1514 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1515 1516 // Match (icmp unsigned-cond zext, RHS) 1517 // TODO: Extend to handle corresponding sext/signed-cmp case 1518 // TODO: Extend to other invertible functions 1519 Value *LHSOp = nullptr; 1520 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1521 continue; 1522 1523 // In general, we only rotate if we can do so without increasing the number 1524 // of instructions. The exception is when we have an zext(add-rec). The 1525 // reason for allowing this exception is that we know we need to get rid 1526 // of the zext for SCEV to be able to compute a trip count for said loops; 1527 // we consider the new trip count valuable enough to increase instruction 1528 // count by one. 1529 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1530 continue; 1531 1532 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1533 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1534 // when zext is loop varying and RHS is loop invariant. This converts 1535 // loop varying work to loop-invariant work. 1536 auto doRotateTransform = [&]() { 1537 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1538 auto *NewRHS = 1539 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1540 L->getLoopPreheader()->getTerminator()); 1541 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1542 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1543 if (LHS->use_empty()) 1544 DeadInsts.push_back(LHS); 1545 }; 1546 1547 1548 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1549 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1550 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1551 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1552 FullCR = FullCR.zeroExtend(OuterBitWidth); 1553 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1554 if (FullCR.contains(RHSCR)) { 1555 doRotateTransform(); 1556 Changed = true; 1557 // Note, we are leaving SCEV in an unfortunately imprecise case here 1558 // as rotation tends to reveal information about trip counts not 1559 // previously visible. 1560 continue; 1561 } 1562 } 1563 1564 return Changed; 1565 } 1566 1567 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1568 SmallVector<BasicBlock*, 16> ExitingBlocks; 1569 L->getExitingBlocks(ExitingBlocks); 1570 1571 // Remove all exits which aren't both rewriteable and execute on every 1572 // iteration. 1573 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1574 // If our exitting block exits multiple loops, we can only rewrite the 1575 // innermost one. Otherwise, we're changing how many times the innermost 1576 // loop runs before it exits. 1577 if (LI->getLoopFor(ExitingBB) != L) 1578 return true; 1579 1580 // Can't rewrite non-branch yet. 1581 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1582 if (!BI) 1583 return true; 1584 1585 // Likewise, the loop latch must be dominated by the exiting BB. 1586 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1587 return true; 1588 1589 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1590 // If already constant, nothing to do. However, if this is an 1591 // unconditional exit, we can still replace header phis with their 1592 // preheader value. 1593 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1594 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1595 return true; 1596 } 1597 1598 return false; 1599 }); 1600 1601 if (ExitingBlocks.empty()) 1602 return false; 1603 1604 // Get a symbolic upper bound on the loop backedge taken count. 1605 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1606 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1607 return false; 1608 1609 // Visit our exit blocks in order of dominance. We know from the fact that 1610 // all exits must dominate the latch, so there is a total dominance order 1611 // between them. 1612 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1613 // std::sort sorts in ascending order, so we want the inverse of 1614 // the normal dominance relation. 1615 if (A == B) return false; 1616 if (DT->properlyDominates(A, B)) 1617 return true; 1618 else { 1619 assert(DT->properlyDominates(B, A) && 1620 "expected total dominance order!"); 1621 return false; 1622 } 1623 }); 1624 #ifdef ASSERT 1625 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1626 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1627 } 1628 #endif 1629 1630 bool Changed = false; 1631 bool SkipLastIter = false; 1632 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1633 for (BasicBlock *ExitingBB : ExitingBlocks) { 1634 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1635 const SCEV *MaxExitCount = SE->getExitCount( 1636 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1637 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1638 // Okay, we do not know the exit count here. Can we at least prove that it 1639 // will remain the same within iteration space? 1640 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1641 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1642 return optimizeLoopExitWithUnknownExitCount( 1643 L, BI, ExitingBB, MaxBECount, Inverted, SkipLastIter, SE, Rewriter, 1644 DeadInsts); 1645 }; 1646 1647 // TODO: We might have proved that we can skip the last iteration for 1648 // this check. In this case, we only want to check the condition on the 1649 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1650 // corner case: 1651 // 1652 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1653 // 1654 // If we could not prove that len != 0, then we also could not prove that 1655 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1656 // OptimizeCond will likely not prove anything for it, even if it could 1657 // prove the same fact for len. 1658 // 1659 // As a temporary solution, we query both last and pre-last iterations in 1660 // hope that we will be able to prove triviality for at least one of 1661 // them. We can stop querying MaxBECount for this case once SCEV 1662 // understands that (MaxBECount - 1) will not overflow here. 1663 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1664 Changed = true; 1665 else if (SkipLastIter) 1666 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1667 Changed = true; 1668 if (MaxBECount == MaxExitCount) 1669 // If the loop has more than 1 iteration, all further checks will be 1670 // executed 1 iteration less. 1671 SkipLastIter = true; 1672 continue; 1673 } 1674 1675 if (MaxBECount == MaxExitCount) 1676 // If the loop has more than 1 iteration, all further checks will be 1677 // executed 1 iteration less. 1678 SkipLastIter = true; 1679 1680 // If we know we'd exit on the first iteration, rewrite the exit to 1681 // reflect this. This does not imply the loop must exit through this 1682 // exit; there may be an earlier one taken on the first iteration. 1683 // We know that the backedge can't be taken, so we replace all 1684 // the header PHIs with values coming from the preheader. 1685 if (ExactExitCount->isZero()) { 1686 foldExit(L, ExitingBB, true, DeadInsts); 1687 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1688 Changed = true; 1689 continue; 1690 } 1691 1692 assert(ExactExitCount->getType()->isIntegerTy() && 1693 MaxBECount->getType()->isIntegerTy() && 1694 "Exit counts must be integers"); 1695 1696 Type *WiderType = 1697 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1698 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1699 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1700 assert(MaxBECount->getType() == ExactExitCount->getType()); 1701 1702 // Can we prove that some other exit must be taken strictly before this 1703 // one? 1704 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1705 ExactExitCount)) { 1706 foldExit(L, ExitingBB, false, DeadInsts); 1707 Changed = true; 1708 continue; 1709 } 1710 1711 // As we run, keep track of which exit counts we've encountered. If we 1712 // find a duplicate, we've found an exit which would have exited on the 1713 // exiting iteration, but (from the visit order) strictly follows another 1714 // which does the same and is thus dead. 1715 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1716 foldExit(L, ExitingBB, false, DeadInsts); 1717 Changed = true; 1718 continue; 1719 } 1720 1721 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1722 // here. If we kept track of the min of dominanting exits so far, we could 1723 // discharge exits with EC >= MDEC. This is less powerful than the existing 1724 // transform (since later exits aren't considered), but potentially more 1725 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1726 // or a >u b. Such a case is not currently known. 1727 } 1728 return Changed; 1729 } 1730 1731 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1732 SmallVector<BasicBlock*, 16> ExitingBlocks; 1733 L->getExitingBlocks(ExitingBlocks); 1734 1735 // Finally, see if we can rewrite our exit conditions into a loop invariant 1736 // form. If we have a read-only loop, and we can tell that we must exit down 1737 // a path which does not need any of the values computed within the loop, we 1738 // can rewrite the loop to exit on the first iteration. Note that this 1739 // doesn't either a) tell us the loop exits on the first iteration (unless 1740 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1741 // This transformation looks a lot like a restricted form of dead loop 1742 // elimination, but restricted to read-only loops and without neccesssarily 1743 // needing to kill the loop entirely. 1744 if (!LoopPredication) 1745 return false; 1746 1747 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1748 // through *explicit* control flow. We have to eliminate the possibility of 1749 // implicit exits (see below) before we know it's truly exact. 1750 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1751 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1752 return false; 1753 1754 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1755 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1756 1757 auto BadExit = [&](BasicBlock *ExitingBB) { 1758 // If our exiting block exits multiple loops, we can only rewrite the 1759 // innermost one. Otherwise, we're changing how many times the innermost 1760 // loop runs before it exits. 1761 if (LI->getLoopFor(ExitingBB) != L) 1762 return true; 1763 1764 // Can't rewrite non-branch yet. 1765 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1766 if (!BI) 1767 return true; 1768 1769 // If already constant, nothing to do. 1770 if (isa<Constant>(BI->getCondition())) 1771 return true; 1772 1773 // If the exit block has phis, we need to be able to compute the values 1774 // within the loop which contains them. This assumes trivially lcssa phis 1775 // have already been removed; TODO: generalize 1776 BasicBlock *ExitBlock = 1777 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1778 if (!ExitBlock->phis().empty()) 1779 return true; 1780 1781 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1782 if (isa<SCEVCouldNotCompute>(ExitCount) || 1783 !Rewriter.isSafeToExpand(ExitCount)) 1784 return true; 1785 1786 assert(SE->isLoopInvariant(ExitCount, L) && 1787 "Exit count must be loop invariant"); 1788 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1789 return false; 1790 }; 1791 1792 // If we have any exits which can't be predicated themselves, than we can't 1793 // predicate any exit which isn't guaranteed to execute before it. Consider 1794 // two exits (a) and (b) which would both exit on the same iteration. If we 1795 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1796 // we could convert a loop from exiting through (a) to one exiting through 1797 // (b). Note that this problem exists only for exits with the same exit 1798 // count, and we could be more aggressive when exit counts are known inequal. 1799 llvm::sort(ExitingBlocks, 1800 [&](BasicBlock *A, BasicBlock *B) { 1801 // std::sort sorts in ascending order, so we want the inverse of 1802 // the normal dominance relation, plus a tie breaker for blocks 1803 // unordered by dominance. 1804 if (DT->properlyDominates(A, B)) return true; 1805 if (DT->properlyDominates(B, A)) return false; 1806 return A->getName() < B->getName(); 1807 }); 1808 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1809 // exits before the backedge). If they aren't, reasoning about reachability 1810 // is complicated and we choose not to for now. 1811 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1812 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1813 return false; 1814 1815 // Given our sorted total order, we know that exit[j] must be evaluated 1816 // after all exit[i] such j > i. 1817 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1818 if (BadExit(ExitingBlocks[i])) { 1819 ExitingBlocks.resize(i); 1820 break; 1821 } 1822 1823 if (ExitingBlocks.empty()) 1824 return false; 1825 1826 // We rely on not being able to reach an exiting block on a later iteration 1827 // then it's statically compute exit count. The implementaton of 1828 // getExitCount currently has this invariant, but assert it here so that 1829 // breakage is obvious if this ever changes.. 1830 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1831 return DT->dominates(ExitingBB, L->getLoopLatch()); 1832 })); 1833 1834 // At this point, ExitingBlocks consists of only those blocks which are 1835 // predicatable. Given that, we know we have at least one exit we can 1836 // predicate if the loop is doesn't have side effects and doesn't have any 1837 // implicit exits (because then our exact BTC isn't actually exact). 1838 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1839 // suggestions on how to improve this? I can obviously bail out for outer 1840 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1841 // is that enough for *all* side effects? 1842 for (BasicBlock *BB : L->blocks()) 1843 for (auto &I : *BB) 1844 // TODO:isGuaranteedToTransfer 1845 if (I.mayHaveSideEffects()) 1846 return false; 1847 1848 bool Changed = false; 1849 // Finally, do the actual predication for all predicatable blocks. A couple 1850 // of notes here: 1851 // 1) We don't bother to constant fold dominated exits with identical exit 1852 // counts; that's simply a form of CSE/equality propagation and we leave 1853 // it for dedicated passes. 1854 // 2) We insert the comparison at the branch. Hoisting introduces additional 1855 // legality constraints and we leave that to dedicated logic. We want to 1856 // predicate even if we can't insert a loop invariant expression as 1857 // peeling or unrolling will likely reduce the cost of the otherwise loop 1858 // varying check. 1859 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1860 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1861 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1862 for (BasicBlock *ExitingBB : ExitingBlocks) { 1863 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1864 1865 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1866 Value *NewCond; 1867 if (ExitCount == ExactBTC) { 1868 NewCond = L->contains(BI->getSuccessor(0)) ? 1869 B.getFalse() : B.getTrue(); 1870 } else { 1871 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1872 if (!ExactBTCV) 1873 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1874 Value *RHS = ExactBTCV; 1875 if (ECV->getType() != RHS->getType()) { 1876 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1877 ECV = B.CreateZExt(ECV, WiderTy); 1878 RHS = B.CreateZExt(RHS, WiderTy); 1879 } 1880 auto Pred = L->contains(BI->getSuccessor(0)) ? 1881 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1882 NewCond = B.CreateICmp(Pred, ECV, RHS); 1883 } 1884 Value *OldCond = BI->getCondition(); 1885 BI->setCondition(NewCond); 1886 if (OldCond->use_empty()) 1887 DeadInsts.emplace_back(OldCond); 1888 Changed = true; 1889 } 1890 1891 return Changed; 1892 } 1893 1894 //===----------------------------------------------------------------------===// 1895 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1896 //===----------------------------------------------------------------------===// 1897 1898 bool IndVarSimplify::run(Loop *L) { 1899 // We need (and expect!) the incoming loop to be in LCSSA. 1900 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1901 "LCSSA required to run indvars!"); 1902 1903 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1904 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1905 // canonicalization can be a pessimization without LSR to "clean up" 1906 // afterwards. 1907 // - We depend on having a preheader; in particular, 1908 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1909 // and we're in trouble if we can't find the induction variable even when 1910 // we've manually inserted one. 1911 // - LFTR relies on having a single backedge. 1912 if (!L->isLoopSimplifyForm()) 1913 return false; 1914 1915 #ifndef NDEBUG 1916 // Used below for a consistency check only 1917 // Note: Since the result returned by ScalarEvolution may depend on the order 1918 // in which previous results are added to its cache, the call to 1919 // getBackedgeTakenCount() may change following SCEV queries. 1920 const SCEV *BackedgeTakenCount; 1921 if (VerifyIndvars) 1922 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1923 #endif 1924 1925 bool Changed = false; 1926 // If there are any floating-point recurrences, attempt to 1927 // transform them to use integer recurrences. 1928 Changed |= rewriteNonIntegerIVs(L); 1929 1930 // Create a rewriter object which we'll use to transform the code with. 1931 SCEVExpander Rewriter(*SE, DL, "indvars"); 1932 #ifndef NDEBUG 1933 Rewriter.setDebugType(DEBUG_TYPE); 1934 #endif 1935 1936 // Eliminate redundant IV users. 1937 // 1938 // Simplification works best when run before other consumers of SCEV. We 1939 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1940 // other expressions involving loop IVs have been evaluated. This helps SCEV 1941 // set no-wrap flags before normalizing sign/zero extension. 1942 Rewriter.disableCanonicalMode(); 1943 Changed |= simplifyAndExtend(L, Rewriter, LI); 1944 1945 // Check to see if we can compute the final value of any expressions 1946 // that are recurrent in the loop, and substitute the exit values from the 1947 // loop into any instructions outside of the loop that use the final values 1948 // of the current expressions. 1949 if (ReplaceExitValue != NeverRepl) { 1950 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1951 ReplaceExitValue, DeadInsts)) { 1952 NumReplaced += Rewrites; 1953 Changed = true; 1954 } 1955 } 1956 1957 // Eliminate redundant IV cycles. 1958 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1959 1960 // Try to convert exit conditions to unsigned and rotate computation 1961 // out of the loop. Note: Handles invalidation internally if needed. 1962 Changed |= canonicalizeExitCondition(L); 1963 1964 // Try to eliminate loop exits based on analyzeable exit counts 1965 if (optimizeLoopExits(L, Rewriter)) { 1966 Changed = true; 1967 // Given we've changed exit counts, notify SCEV 1968 // Some nested loops may share same folded exit basic block, 1969 // thus we need to notify top most loop. 1970 SE->forgetTopmostLoop(L); 1971 } 1972 1973 // Try to form loop invariant tests for loop exits by changing how many 1974 // iterations of the loop run when that is unobservable. 1975 if (predicateLoopExits(L, Rewriter)) { 1976 Changed = true; 1977 // Given we've changed exit counts, notify SCEV 1978 SE->forgetLoop(L); 1979 } 1980 1981 // If we have a trip count expression, rewrite the loop's exit condition 1982 // using it. 1983 if (!DisableLFTR) { 1984 BasicBlock *PreHeader = L->getLoopPreheader(); 1985 1986 SmallVector<BasicBlock*, 16> ExitingBlocks; 1987 L->getExitingBlocks(ExitingBlocks); 1988 for (BasicBlock *ExitingBB : ExitingBlocks) { 1989 // Can't rewrite non-branch yet. 1990 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1991 continue; 1992 1993 // If our exitting block exits multiple loops, we can only rewrite the 1994 // innermost one. Otherwise, we're changing how many times the innermost 1995 // loop runs before it exits. 1996 if (LI->getLoopFor(ExitingBB) != L) 1997 continue; 1998 1999 if (!needsLFTR(L, ExitingBB)) 2000 continue; 2001 2002 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2003 if (isa<SCEVCouldNotCompute>(ExitCount)) 2004 continue; 2005 2006 // This was handled above, but as we form SCEVs, we can sometimes refine 2007 // existing ones; this allows exit counts to be folded to zero which 2008 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2009 // until stable to handle cases like this better. 2010 if (ExitCount->isZero()) 2011 continue; 2012 2013 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2014 if (!IndVar) 2015 continue; 2016 2017 // Avoid high cost expansions. Note: This heuristic is questionable in 2018 // that our definition of "high cost" is not exactly principled. 2019 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2020 TTI, PreHeader->getTerminator())) 2021 continue; 2022 2023 // Check preconditions for proper SCEVExpander operation. SCEV does not 2024 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2025 // any pass that uses the SCEVExpander must do it. This does not work 2026 // well for loop passes because SCEVExpander makes assumptions about 2027 // all loops, while LoopPassManager only forces the current loop to be 2028 // simplified. 2029 // 2030 // FIXME: SCEV expansion has no way to bail out, so the caller must 2031 // explicitly check any assumptions made by SCEV. Brittle. 2032 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2033 if (!AR || AR->getLoop()->getLoopPreheader()) 2034 Changed |= linearFunctionTestReplace(L, ExitingBB, 2035 ExitCount, IndVar, 2036 Rewriter); 2037 } 2038 } 2039 // Clear the rewriter cache, because values that are in the rewriter's cache 2040 // can be deleted in the loop below, causing the AssertingVH in the cache to 2041 // trigger. 2042 Rewriter.clear(); 2043 2044 // Now that we're done iterating through lists, clean up any instructions 2045 // which are now dead. 2046 while (!DeadInsts.empty()) { 2047 Value *V = DeadInsts.pop_back_val(); 2048 2049 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2050 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2051 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2052 Changed |= 2053 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2054 } 2055 2056 // The Rewriter may not be used from this point on. 2057 2058 // Loop-invariant instructions in the preheader that aren't used in the 2059 // loop may be sunk below the loop to reduce register pressure. 2060 Changed |= sinkUnusedInvariants(L); 2061 2062 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2063 // trip count and therefore can further simplify exit values in addition to 2064 // rewriteLoopExitValues. 2065 Changed |= rewriteFirstIterationLoopExitValues(L); 2066 2067 // Clean up dead instructions. 2068 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2069 2070 // Check a post-condition. 2071 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2072 "Indvars did not preserve LCSSA!"); 2073 2074 // Verify that LFTR, and any other change have not interfered with SCEV's 2075 // ability to compute trip count. We may have *changed* the exit count, but 2076 // only by reducing it. 2077 #ifndef NDEBUG 2078 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2079 SE->forgetLoop(L); 2080 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2081 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2082 SE->getTypeSizeInBits(NewBECount->getType())) 2083 NewBECount = SE->getTruncateOrNoop(NewBECount, 2084 BackedgeTakenCount->getType()); 2085 else 2086 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2087 NewBECount->getType()); 2088 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2089 NewBECount) && "indvars must preserve SCEV"); 2090 } 2091 if (VerifyMemorySSA && MSSAU) 2092 MSSAU->getMemorySSA()->verifyMemorySSA(); 2093 #endif 2094 2095 return Changed; 2096 } 2097 2098 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2099 LoopStandardAnalysisResults &AR, 2100 LPMUpdater &) { 2101 Function *F = L.getHeader()->getParent(); 2102 const DataLayout &DL = F->getParent()->getDataLayout(); 2103 2104 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2105 WidenIndVars && AllowIVWidening); 2106 if (!IVS.run(&L)) 2107 return PreservedAnalyses::all(); 2108 2109 auto PA = getLoopPassPreservedAnalyses(); 2110 PA.preserveSet<CFGAnalyses>(); 2111 if (AR.MSSA) 2112 PA.preserve<MemorySSAAnalysis>(); 2113 return PA; 2114 } 2115 2116 namespace { 2117 2118 struct IndVarSimplifyLegacyPass : public LoopPass { 2119 static char ID; // Pass identification, replacement for typeid 2120 2121 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2122 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2123 } 2124 2125 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2126 if (skipLoop(L)) 2127 return false; 2128 2129 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2130 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2131 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2132 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2133 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2134 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2135 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2136 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2137 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2138 MemorySSA *MSSA = nullptr; 2139 if (MSSAAnalysis) 2140 MSSA = &MSSAAnalysis->getMSSA(); 2141 2142 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2143 return IVS.run(L); 2144 } 2145 2146 void getAnalysisUsage(AnalysisUsage &AU) const override { 2147 AU.setPreservesCFG(); 2148 AU.addPreserved<MemorySSAWrapperPass>(); 2149 getLoopAnalysisUsage(AU); 2150 } 2151 }; 2152 2153 } // end anonymous namespace 2154 2155 char IndVarSimplifyLegacyPass::ID = 0; 2156 2157 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2158 "Induction Variable Simplification", false, false) 2159 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2160 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2161 "Induction Variable Simplification", false, false) 2162 2163 Pass *llvm::createIndVarSimplifyPass() { 2164 return new IndVarSimplifyLegacyPass(); 2165 } 2166