1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 namespace { 135 136 struct RewritePhi; 137 138 class IndVarSimplify { 139 LoopInfo *LI; 140 ScalarEvolution *SE; 141 DominatorTree *DT; 142 const DataLayout &DL; 143 TargetLibraryInfo *TLI; 144 const TargetTransformInfo *TTI; 145 std::unique_ptr<MemorySSAUpdater> MSSAU; 146 147 SmallVector<WeakTrackingVH, 16> DeadInsts; 148 149 bool handleFloatingPointIV(Loop *L, PHINode *PH); 150 bool rewriteNonIntegerIVs(Loop *L); 151 152 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 153 /// Try to eliminate loop exits based on analyzeable exit counts 154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 155 /// Try to form loop invariant tests for loop exits by changing how many 156 /// iterations of the loop run when that is unobservable. 157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 158 159 bool rewriteFirstIterationLoopExitValues(Loop *L); 160 161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 162 const SCEV *ExitCount, 163 PHINode *IndVar, SCEVExpander &Rewriter); 164 165 bool sinkUnusedInvariants(Loop *L); 166 167 public: 168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 169 const DataLayout &DL, TargetLibraryInfo *TLI, 170 TargetTransformInfo *TTI, MemorySSA *MSSA) 171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 172 if (MSSA) 173 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 174 } 175 176 bool run(Loop *L); 177 }; 178 179 } // end anonymous namespace 180 181 /// Determine the insertion point for this user. By default, insert immediately 182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 184 /// common dominator for the incoming blocks. A nullptr can be returned if no 185 /// viable location is found: it may happen if User is a PHI and Def only comes 186 /// to this PHI from unreachable blocks. 187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 188 DominatorTree *DT, LoopInfo *LI) { 189 PHINode *PHI = dyn_cast<PHINode>(User); 190 if (!PHI) 191 return User; 192 193 Instruction *InsertPt = nullptr; 194 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 195 if (PHI->getIncomingValue(i) != Def) 196 continue; 197 198 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 199 200 if (!DT->isReachableFromEntry(InsertBB)) 201 continue; 202 203 if (!InsertPt) { 204 InsertPt = InsertBB->getTerminator(); 205 continue; 206 } 207 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 208 InsertPt = InsertBB->getTerminator(); 209 } 210 211 // If we have skipped all inputs, it means that Def only comes to Phi from 212 // unreachable blocks. 213 if (!InsertPt) 214 return nullptr; 215 216 auto *DefI = dyn_cast<Instruction>(Def); 217 if (!DefI) 218 return InsertPt; 219 220 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 221 222 auto *L = LI->getLoopFor(DefI->getParent()); 223 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 224 225 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 226 if (LI->getLoopFor(DTN->getBlock()) == L) 227 return DTN->getBlock()->getTerminator(); 228 229 llvm_unreachable("DefI dominates InsertPt!"); 230 } 231 232 //===----------------------------------------------------------------------===// 233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 234 //===----------------------------------------------------------------------===// 235 236 /// Convert APF to an integer, if possible. 237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 238 bool isExact = false; 239 // See if we can convert this to an int64_t 240 uint64_t UIntVal; 241 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 242 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 243 !isExact) 244 return false; 245 IntVal = UIntVal; 246 return true; 247 } 248 249 /// If the loop has floating induction variable then insert corresponding 250 /// integer induction variable if possible. 251 /// For example, 252 /// for(double i = 0; i < 10000; ++i) 253 /// bar(i) 254 /// is converted into 255 /// for(int i = 0; i < 10000; ++i) 256 /// bar((double)i); 257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 258 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 259 unsigned BackEdge = IncomingEdge^1; 260 261 // Check incoming value. 262 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 263 264 int64_t InitValue; 265 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 266 return false; 267 268 // Check IV increment. Reject this PN if increment operation is not 269 // an add or increment value can not be represented by an integer. 270 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 271 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 272 273 // If this is not an add of the PHI with a constantfp, or if the constant fp 274 // is not an integer, bail out. 275 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 276 int64_t IncValue; 277 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 278 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 279 return false; 280 281 // Check Incr uses. One user is PN and the other user is an exit condition 282 // used by the conditional terminator. 283 Value::user_iterator IncrUse = Incr->user_begin(); 284 Instruction *U1 = cast<Instruction>(*IncrUse++); 285 if (IncrUse == Incr->user_end()) return false; 286 Instruction *U2 = cast<Instruction>(*IncrUse++); 287 if (IncrUse != Incr->user_end()) return false; 288 289 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 290 // only used by a branch, we can't transform it. 291 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 292 if (!Compare) 293 Compare = dyn_cast<FCmpInst>(U2); 294 if (!Compare || !Compare->hasOneUse() || 295 !isa<BranchInst>(Compare->user_back())) 296 return false; 297 298 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 299 300 // We need to verify that the branch actually controls the iteration count 301 // of the loop. If not, the new IV can overflow and no one will notice. 302 // The branch block must be in the loop and one of the successors must be out 303 // of the loop. 304 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 305 if (!L->contains(TheBr->getParent()) || 306 (L->contains(TheBr->getSuccessor(0)) && 307 L->contains(TheBr->getSuccessor(1)))) 308 return false; 309 310 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 311 // transform it. 312 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 313 int64_t ExitValue; 314 if (ExitValueVal == nullptr || 315 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 316 return false; 317 318 // Find new predicate for integer comparison. 319 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 320 switch (Compare->getPredicate()) { 321 default: return false; // Unknown comparison. 322 case CmpInst::FCMP_OEQ: 323 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 324 case CmpInst::FCMP_ONE: 325 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 326 case CmpInst::FCMP_OGT: 327 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 328 case CmpInst::FCMP_OGE: 329 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 330 case CmpInst::FCMP_OLT: 331 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 332 case CmpInst::FCMP_OLE: 333 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 334 } 335 336 // We convert the floating point induction variable to a signed i32 value if 337 // we can. This is only safe if the comparison will not overflow in a way 338 // that won't be trapped by the integer equivalent operations. Check for this 339 // now. 340 // TODO: We could use i64 if it is native and the range requires it. 341 342 // The start/stride/exit values must all fit in signed i32. 343 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 344 return false; 345 346 // If not actually striding (add x, 0.0), avoid touching the code. 347 if (IncValue == 0) 348 return false; 349 350 // Positive and negative strides have different safety conditions. 351 if (IncValue > 0) { 352 // If we have a positive stride, we require the init to be less than the 353 // exit value. 354 if (InitValue >= ExitValue) 355 return false; 356 357 uint32_t Range = uint32_t(ExitValue-InitValue); 358 // Check for infinite loop, either: 359 // while (i <= Exit) or until (i > Exit) 360 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 361 if (++Range == 0) return false; // Range overflows. 362 } 363 364 unsigned Leftover = Range % uint32_t(IncValue); 365 366 // If this is an equality comparison, we require that the strided value 367 // exactly land on the exit value, otherwise the IV condition will wrap 368 // around and do things the fp IV wouldn't. 369 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 370 Leftover != 0) 371 return false; 372 373 // If the stride would wrap around the i32 before exiting, we can't 374 // transform the IV. 375 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 376 return false; 377 } else { 378 // If we have a negative stride, we require the init to be greater than the 379 // exit value. 380 if (InitValue <= ExitValue) 381 return false; 382 383 uint32_t Range = uint32_t(InitValue-ExitValue); 384 // Check for infinite loop, either: 385 // while (i >= Exit) or until (i < Exit) 386 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 387 if (++Range == 0) return false; // Range overflows. 388 } 389 390 unsigned Leftover = Range % uint32_t(-IncValue); 391 392 // If this is an equality comparison, we require that the strided value 393 // exactly land on the exit value, otherwise the IV condition will wrap 394 // around and do things the fp IV wouldn't. 395 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 396 Leftover != 0) 397 return false; 398 399 // If the stride would wrap around the i32 before exiting, we can't 400 // transform the IV. 401 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 402 return false; 403 } 404 405 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 406 407 // Insert new integer induction variable. 408 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 409 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 410 PN->getIncomingBlock(IncomingEdge)); 411 412 Value *NewAdd = 413 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 414 Incr->getName()+".int", Incr); 415 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 416 417 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 418 ConstantInt::get(Int32Ty, ExitValue), 419 Compare->getName()); 420 421 // In the following deletions, PN may become dead and may be deleted. 422 // Use a WeakTrackingVH to observe whether this happens. 423 WeakTrackingVH WeakPH = PN; 424 425 // Delete the old floating point exit comparison. The branch starts using the 426 // new comparison. 427 NewCompare->takeName(Compare); 428 Compare->replaceAllUsesWith(NewCompare); 429 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 430 431 // Delete the old floating point increment. 432 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 433 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 434 435 // If the FP induction variable still has uses, this is because something else 436 // in the loop uses its value. In order to canonicalize the induction 437 // variable, we chose to eliminate the IV and rewrite it in terms of an 438 // int->fp cast. 439 // 440 // We give preference to sitofp over uitofp because it is faster on most 441 // platforms. 442 if (WeakPH) { 443 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 444 &*PN->getParent()->getFirstInsertionPt()); 445 PN->replaceAllUsesWith(Conv); 446 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 447 } 448 return true; 449 } 450 451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 452 // First step. Check to see if there are any floating-point recurrences. 453 // If there are, change them into integer recurrences, permitting analysis by 454 // the SCEV routines. 455 BasicBlock *Header = L->getHeader(); 456 457 SmallVector<WeakTrackingVH, 8> PHIs; 458 for (PHINode &PN : Header->phis()) 459 PHIs.push_back(&PN); 460 461 bool Changed = false; 462 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 463 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 464 Changed |= handleFloatingPointIV(L, PN); 465 466 // If the loop previously had floating-point IV, ScalarEvolution 467 // may not have been able to compute a trip count. Now that we've done some 468 // re-writing, the trip count may be computable. 469 if (Changed) 470 SE->forgetLoop(L); 471 return Changed; 472 } 473 474 //===---------------------------------------------------------------------===// 475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 476 // they will exit at the first iteration. 477 //===---------------------------------------------------------------------===// 478 479 /// Check to see if this loop has loop invariant conditions which lead to loop 480 /// exits. If so, we know that if the exit path is taken, it is at the first 481 /// loop iteration. This lets us predict exit values of PHI nodes that live in 482 /// loop header. 483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 484 // Verify the input to the pass is already in LCSSA form. 485 assert(L->isLCSSAForm(*DT)); 486 487 SmallVector<BasicBlock *, 8> ExitBlocks; 488 L->getUniqueExitBlocks(ExitBlocks); 489 490 bool MadeAnyChanges = false; 491 for (auto *ExitBB : ExitBlocks) { 492 // If there are no more PHI nodes in this exit block, then no more 493 // values defined inside the loop are used on this path. 494 for (PHINode &PN : ExitBB->phis()) { 495 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 496 IncomingValIdx != E; ++IncomingValIdx) { 497 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 498 499 // Can we prove that the exit must run on the first iteration if it 500 // runs at all? (i.e. early exits are fine for our purposes, but 501 // traces which lead to this exit being taken on the 2nd iteration 502 // aren't.) Note that this is about whether the exit branch is 503 // executed, not about whether it is taken. 504 if (!L->getLoopLatch() || 505 !DT->dominates(IncomingBB, L->getLoopLatch())) 506 continue; 507 508 // Get condition that leads to the exit path. 509 auto *TermInst = IncomingBB->getTerminator(); 510 511 Value *Cond = nullptr; 512 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 513 // Must be a conditional branch, otherwise the block 514 // should not be in the loop. 515 Cond = BI->getCondition(); 516 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 517 Cond = SI->getCondition(); 518 else 519 continue; 520 521 if (!L->isLoopInvariant(Cond)) 522 continue; 523 524 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 525 526 // Only deal with PHIs in the loop header. 527 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 528 continue; 529 530 // If ExitVal is a PHI on the loop header, then we know its 531 // value along this exit because the exit can only be taken 532 // on the first iteration. 533 auto *LoopPreheader = L->getLoopPreheader(); 534 assert(LoopPreheader && "Invalid loop"); 535 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 536 if (PreheaderIdx != -1) { 537 assert(ExitVal->getParent() == L->getHeader() && 538 "ExitVal must be in loop header"); 539 MadeAnyChanges = true; 540 PN.setIncomingValue(IncomingValIdx, 541 ExitVal->getIncomingValue(PreheaderIdx)); 542 } 543 } 544 } 545 } 546 return MadeAnyChanges; 547 } 548 549 //===----------------------------------------------------------------------===// 550 // IV Widening - Extend the width of an IV to cover its widest uses. 551 //===----------------------------------------------------------------------===// 552 553 namespace { 554 555 // Collect information about induction variables that are used by sign/zero 556 // extend operations. This information is recorded by CollectExtend and provides 557 // the input to WidenIV. 558 struct WideIVInfo { 559 PHINode *NarrowIV = nullptr; 560 561 // Widest integer type created [sz]ext 562 Type *WidestNativeType = nullptr; 563 564 // Was a sext user seen before a zext? 565 bool IsSigned = false; 566 }; 567 568 } // end anonymous namespace 569 570 /// Update information about the induction variable that is extended by this 571 /// sign or zero extend operation. This is used to determine the final width of 572 /// the IV before actually widening it. 573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 574 const TargetTransformInfo *TTI) { 575 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 576 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 577 return; 578 579 Type *Ty = Cast->getType(); 580 uint64_t Width = SE->getTypeSizeInBits(Ty); 581 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 582 return; 583 584 // Check that `Cast` actually extends the induction variable (we rely on this 585 // later). This takes care of cases where `Cast` is extending a truncation of 586 // the narrow induction variable, and thus can end up being narrower than the 587 // "narrow" induction variable. 588 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 589 if (NarrowIVWidth >= Width) 590 return; 591 592 // Cast is either an sext or zext up to this point. 593 // We should not widen an indvar if arithmetics on the wider indvar are more 594 // expensive than those on the narrower indvar. We check only the cost of ADD 595 // because at least an ADD is required to increment the induction variable. We 596 // could compute more comprehensively the cost of all instructions on the 597 // induction variable when necessary. 598 if (TTI && 599 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 600 TTI->getArithmeticInstrCost(Instruction::Add, 601 Cast->getOperand(0)->getType())) { 602 return; 603 } 604 605 if (!WI.WidestNativeType) { 606 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 607 WI.IsSigned = IsSigned; 608 return; 609 } 610 611 // We extend the IV to satisfy the sign of its first user, arbitrarily. 612 if (WI.IsSigned != IsSigned) 613 return; 614 615 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 616 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 617 } 618 619 namespace { 620 621 /// Record a link in the Narrow IV def-use chain along with the WideIV that 622 /// computes the same value as the Narrow IV def. This avoids caching Use* 623 /// pointers. 624 struct NarrowIVDefUse { 625 Instruction *NarrowDef = nullptr; 626 Instruction *NarrowUse = nullptr; 627 Instruction *WideDef = nullptr; 628 629 // True if the narrow def is never negative. Tracking this information lets 630 // us use a sign extension instead of a zero extension or vice versa, when 631 // profitable and legal. 632 bool NeverNegative = false; 633 634 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 635 bool NeverNegative) 636 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 637 NeverNegative(NeverNegative) {} 638 }; 639 640 /// The goal of this transform is to remove sign and zero extends without 641 /// creating any new induction variables. To do this, it creates a new phi of 642 /// the wider type and redirects all users, either removing extends or inserting 643 /// truncs whenever we stop propagating the type. 644 class WidenIV { 645 // Parameters 646 PHINode *OrigPhi; 647 Type *WideType; 648 649 // Context 650 LoopInfo *LI; 651 Loop *L; 652 ScalarEvolution *SE; 653 DominatorTree *DT; 654 655 // Does the module have any calls to the llvm.experimental.guard intrinsic 656 // at all? If not we can avoid scanning instructions looking for guards. 657 bool HasGuards; 658 659 // Result 660 PHINode *WidePhi = nullptr; 661 Instruction *WideInc = nullptr; 662 const SCEV *WideIncExpr = nullptr; 663 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 664 665 SmallPtrSet<Instruction *,16> Widened; 666 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 667 668 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 669 670 // A map tracking the kind of extension used to widen each narrow IV 671 // and narrow IV user. 672 // Key: pointer to a narrow IV or IV user. 673 // Value: the kind of extension used to widen this Instruction. 674 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 675 676 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 677 678 // A map with control-dependent ranges for post increment IV uses. The key is 679 // a pair of IV def and a use of this def denoting the context. The value is 680 // a ConstantRange representing possible values of the def at the given 681 // context. 682 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 683 684 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 685 Instruction *UseI) { 686 DefUserPair Key(Def, UseI); 687 auto It = PostIncRangeInfos.find(Key); 688 return It == PostIncRangeInfos.end() 689 ? Optional<ConstantRange>(None) 690 : Optional<ConstantRange>(It->second); 691 } 692 693 void calculatePostIncRanges(PHINode *OrigPhi); 694 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 695 696 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 697 DefUserPair Key(Def, UseI); 698 auto It = PostIncRangeInfos.find(Key); 699 if (It == PostIncRangeInfos.end()) 700 PostIncRangeInfos.insert({Key, R}); 701 else 702 It->second = R.intersectWith(It->second); 703 } 704 705 public: 706 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 707 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 708 bool HasGuards) 709 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 710 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 711 HasGuards(HasGuards), DeadInsts(DI) { 712 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 713 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 714 } 715 716 PHINode *createWideIV(SCEVExpander &Rewriter); 717 718 protected: 719 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 720 Instruction *Use); 721 722 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 723 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 724 const SCEVAddRecExpr *WideAR); 725 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 726 727 ExtendKind getExtendKind(Instruction *I); 728 729 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 730 731 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 732 733 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 734 735 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 736 unsigned OpCode) const; 737 738 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 739 740 bool widenLoopCompare(NarrowIVDefUse DU); 741 bool widenWithVariantUse(NarrowIVDefUse DU); 742 void widenWithVariantUseCodegen(NarrowIVDefUse DU); 743 744 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 745 }; 746 747 } // end anonymous namespace 748 749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 750 bool IsSigned, Instruction *Use) { 751 // Set the debug location and conservative insertion point. 752 IRBuilder<> Builder(Use); 753 // Hoist the insertion point into loop preheaders as far as possible. 754 for (const Loop *L = LI->getLoopFor(Use->getParent()); 755 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 756 L = L->getParentLoop()) 757 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 758 759 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 760 Builder.CreateZExt(NarrowOper, WideType); 761 } 762 763 /// Instantiate a wide operation to replace a narrow operation. This only needs 764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 765 /// 0 for any operation we decide not to clone. 766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 767 const SCEVAddRecExpr *WideAR) { 768 unsigned Opcode = DU.NarrowUse->getOpcode(); 769 switch (Opcode) { 770 default: 771 return nullptr; 772 case Instruction::Add: 773 case Instruction::Mul: 774 case Instruction::UDiv: 775 case Instruction::Sub: 776 return cloneArithmeticIVUser(DU, WideAR); 777 778 case Instruction::And: 779 case Instruction::Or: 780 case Instruction::Xor: 781 case Instruction::Shl: 782 case Instruction::LShr: 783 case Instruction::AShr: 784 return cloneBitwiseIVUser(DU); 785 } 786 } 787 788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 789 Instruction *NarrowUse = DU.NarrowUse; 790 Instruction *NarrowDef = DU.NarrowDef; 791 Instruction *WideDef = DU.WideDef; 792 793 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 794 795 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 796 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 797 // invariant and will be folded or hoisted. If it actually comes from a 798 // widened IV, it should be removed during a future call to widenIVUse. 799 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 800 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 801 ? WideDef 802 : createExtendInst(NarrowUse->getOperand(0), WideType, 803 IsSigned, NarrowUse); 804 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 805 ? WideDef 806 : createExtendInst(NarrowUse->getOperand(1), WideType, 807 IsSigned, NarrowUse); 808 809 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 810 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 811 NarrowBO->getName()); 812 IRBuilder<> Builder(NarrowUse); 813 Builder.Insert(WideBO); 814 WideBO->copyIRFlags(NarrowBO); 815 return WideBO; 816 } 817 818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 819 const SCEVAddRecExpr *WideAR) { 820 Instruction *NarrowUse = DU.NarrowUse; 821 Instruction *NarrowDef = DU.NarrowDef; 822 Instruction *WideDef = DU.WideDef; 823 824 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 825 826 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 827 828 // We're trying to find X such that 829 // 830 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 831 // 832 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 833 // and check using SCEV if any of them are correct. 834 835 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 836 // correct solution to X. 837 auto GuessNonIVOperand = [&](bool SignExt) { 838 const SCEV *WideLHS; 839 const SCEV *WideRHS; 840 841 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 842 if (SignExt) 843 return SE->getSignExtendExpr(S, Ty); 844 return SE->getZeroExtendExpr(S, Ty); 845 }; 846 847 if (IVOpIdx == 0) { 848 WideLHS = SE->getSCEV(WideDef); 849 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 850 WideRHS = GetExtend(NarrowRHS, WideType); 851 } else { 852 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 853 WideLHS = GetExtend(NarrowLHS, WideType); 854 WideRHS = SE->getSCEV(WideDef); 855 } 856 857 // WideUse is "WideDef `op.wide` X" as described in the comment. 858 const SCEV *WideUse = nullptr; 859 860 switch (NarrowUse->getOpcode()) { 861 default: 862 llvm_unreachable("No other possibility!"); 863 864 case Instruction::Add: 865 WideUse = SE->getAddExpr(WideLHS, WideRHS); 866 break; 867 868 case Instruction::Mul: 869 WideUse = SE->getMulExpr(WideLHS, WideRHS); 870 break; 871 872 case Instruction::UDiv: 873 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 874 break; 875 876 case Instruction::Sub: 877 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 878 break; 879 } 880 881 return WideUse == WideAR; 882 }; 883 884 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 885 if (!GuessNonIVOperand(SignExtend)) { 886 SignExtend = !SignExtend; 887 if (!GuessNonIVOperand(SignExtend)) 888 return nullptr; 889 } 890 891 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 892 ? WideDef 893 : createExtendInst(NarrowUse->getOperand(0), WideType, 894 SignExtend, NarrowUse); 895 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 896 ? WideDef 897 : createExtendInst(NarrowUse->getOperand(1), WideType, 898 SignExtend, NarrowUse); 899 900 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 901 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 902 NarrowBO->getName()); 903 904 IRBuilder<> Builder(NarrowUse); 905 Builder.Insert(WideBO); 906 WideBO->copyIRFlags(NarrowBO); 907 return WideBO; 908 } 909 910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 911 auto It = ExtendKindMap.find(I); 912 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 913 return It->second; 914 } 915 916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 917 unsigned OpCode) const { 918 if (OpCode == Instruction::Add) 919 return SE->getAddExpr(LHS, RHS); 920 if (OpCode == Instruction::Sub) 921 return SE->getMinusSCEV(LHS, RHS); 922 if (OpCode == Instruction::Mul) 923 return SE->getMulExpr(LHS, RHS); 924 925 llvm_unreachable("Unsupported opcode."); 926 } 927 928 /// No-wrap operations can transfer sign extension of their result to their 929 /// operands. Generate the SCEV value for the widened operation without 930 /// actually modifying the IR yet. If the expression after extending the 931 /// operands is an AddRec for this loop, return the AddRec and the kind of 932 /// extension used. 933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 934 // Handle the common case of add<nsw/nuw> 935 const unsigned OpCode = DU.NarrowUse->getOpcode(); 936 // Only Add/Sub/Mul instructions supported yet. 937 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 938 OpCode != Instruction::Mul) 939 return {nullptr, Unknown}; 940 941 // One operand (NarrowDef) has already been extended to WideDef. Now determine 942 // if extending the other will lead to a recurrence. 943 const unsigned ExtendOperIdx = 944 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 945 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 946 947 const SCEV *ExtendOperExpr = nullptr; 948 const OverflowingBinaryOperator *OBO = 949 cast<OverflowingBinaryOperator>(DU.NarrowUse); 950 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 951 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 952 ExtendOperExpr = SE->getSignExtendExpr( 953 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 954 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 955 ExtendOperExpr = SE->getZeroExtendExpr( 956 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 957 else 958 return {nullptr, Unknown}; 959 960 // When creating this SCEV expr, don't apply the current operations NSW or NUW 961 // flags. This instruction may be guarded by control flow that the no-wrap 962 // behavior depends on. Non-control-equivalent instructions can be mapped to 963 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 964 // semantics to those operations. 965 const SCEV *lhs = SE->getSCEV(DU.WideDef); 966 const SCEV *rhs = ExtendOperExpr; 967 968 // Let's swap operands to the initial order for the case of non-commutative 969 // operations, like SUB. See PR21014. 970 if (ExtendOperIdx == 0) 971 std::swap(lhs, rhs); 972 const SCEVAddRecExpr *AddRec = 973 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 974 975 if (!AddRec || AddRec->getLoop() != L) 976 return {nullptr, Unknown}; 977 978 return {AddRec, ExtKind}; 979 } 980 981 /// Is this instruction potentially interesting for further simplification after 982 /// widening it's type? In other words, can the extend be safely hoisted out of 983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 984 /// so, return the extended recurrence and the kind of extension used. Otherwise 985 /// return {nullptr, Unknown}. 986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 987 if (!SE->isSCEVable(DU.NarrowUse->getType())) 988 return {nullptr, Unknown}; 989 990 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 991 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 992 SE->getTypeSizeInBits(WideType)) { 993 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 994 // index. So don't follow this use. 995 return {nullptr, Unknown}; 996 } 997 998 const SCEV *WideExpr; 999 ExtendKind ExtKind; 1000 if (DU.NeverNegative) { 1001 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1002 if (isa<SCEVAddRecExpr>(WideExpr)) 1003 ExtKind = SignExtended; 1004 else { 1005 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1006 ExtKind = ZeroExtended; 1007 } 1008 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1009 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1010 ExtKind = SignExtended; 1011 } else { 1012 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1013 ExtKind = ZeroExtended; 1014 } 1015 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1016 if (!AddRec || AddRec->getLoop() != L) 1017 return {nullptr, Unknown}; 1018 return {AddRec, ExtKind}; 1019 } 1020 1021 /// This IV user cannot be widened. Replace this use of the original narrow IV 1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1024 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1025 if (!InsertPt) 1026 return; 1027 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1028 << *DU.NarrowUse << "\n"); 1029 IRBuilder<> Builder(InsertPt); 1030 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1031 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1032 } 1033 1034 /// If the narrow use is a compare instruction, then widen the compare 1035 // (and possibly the other operand). The extend operation is hoisted into the 1036 // loop preheader as far as possible. 1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1038 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1039 if (!Cmp) 1040 return false; 1041 1042 // We can legally widen the comparison in the following two cases: 1043 // 1044 // - The signedness of the IV extension and comparison match 1045 // 1046 // - The narrow IV is always positive (and thus its sign extension is equal 1047 // to its zero extension). For instance, let's say we're zero extending 1048 // %narrow for the following use 1049 // 1050 // icmp slt i32 %narrow, %val ... (A) 1051 // 1052 // and %narrow is always positive. Then 1053 // 1054 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1055 // == icmp slt i32 zext(%narrow), sext(%val) 1056 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1057 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1058 return false; 1059 1060 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1061 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1062 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1063 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1064 1065 // Widen the compare instruction. 1066 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1067 if (!InsertPt) 1068 return false; 1069 IRBuilder<> Builder(InsertPt); 1070 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1071 1072 // Widen the other operand of the compare, if necessary. 1073 if (CastWidth < IVWidth) { 1074 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1075 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1076 } 1077 return true; 1078 } 1079 1080 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this 1081 // will not work when: 1082 // 1) SCEV traces back to an instruction inside the loop that SCEV can not 1083 // expand, eg. add %indvar, (load %addr) 1084 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant 1085 // While SCEV fails to avoid trunc, we can still try to use instruction 1086 // combining approach to prove trunc is not required. This can be further 1087 // extended with other instruction combining checks, but for now we handle the 1088 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") 1089 // 1090 // Src: 1091 // %c = sub nsw %b, %indvar 1092 // %d = sext %c to i64 1093 // Dst: 1094 // %indvar.ext1 = sext %indvar to i64 1095 // %m = sext %b to i64 1096 // %d = sub nsw i64 %m, %indvar.ext1 1097 // Therefore, as long as the result of add/sub/mul is extended to wide type, no 1098 // trunc is required regardless of how %b is generated. This pattern is common 1099 // when calculating address in 64 bit architecture 1100 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { 1101 Instruction *NarrowUse = DU.NarrowUse; 1102 Instruction *NarrowDef = DU.NarrowDef; 1103 Instruction *WideDef = DU.WideDef; 1104 1105 // Handle the common case of add<nsw/nuw> 1106 const unsigned OpCode = NarrowUse->getOpcode(); 1107 // Only Add/Sub/Mul instructions are supported. 1108 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1109 OpCode != Instruction::Mul) 1110 return false; 1111 1112 // The operand that is not defined by NarrowDef of DU. Let's call it the 1113 // other operand. 1114 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1115 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1116 "bad DU"); 1117 1118 const SCEV *ExtendOperExpr = nullptr; 1119 const OverflowingBinaryOperator *OBO = 1120 cast<OverflowingBinaryOperator>(NarrowUse); 1121 ExtendKind ExtKind = getExtendKind(NarrowDef); 1122 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1123 ExtendOperExpr = SE->getSignExtendExpr( 1124 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1125 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1126 ExtendOperExpr = SE->getZeroExtendExpr( 1127 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1128 else 1129 return false; 1130 1131 // Verifying that Defining operand is an AddRec 1132 const SCEV *Op1 = SE->getSCEV(WideDef); 1133 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1134 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1135 return false; 1136 // Verifying that other operand is an Extend. 1137 if (ExtKind == SignExtended) { 1138 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1139 return false; 1140 } else { 1141 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1142 return false; 1143 } 1144 1145 if (ExtKind == SignExtended) { 1146 for (Use &U : NarrowUse->uses()) { 1147 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1148 if (!User || User->getType() != WideType) 1149 return false; 1150 } 1151 } else { // ExtKind == ZeroExtended 1152 for (Use &U : NarrowUse->uses()) { 1153 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1154 if (!User || User->getType() != WideType) 1155 return false; 1156 } 1157 } 1158 1159 return true; 1160 } 1161 1162 /// Special Case for widening with loop variant (see 1163 /// WidenIV::widenWithVariant). This is the code generation part. 1164 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) { 1165 Instruction *NarrowUse = DU.NarrowUse; 1166 Instruction *NarrowDef = DU.NarrowDef; 1167 Instruction *WideDef = DU.WideDef; 1168 1169 ExtendKind ExtKind = getExtendKind(NarrowDef); 1170 1171 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1172 1173 // Generating a widening use instruction. 1174 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1175 ? WideDef 1176 : createExtendInst(NarrowUse->getOperand(0), WideType, 1177 ExtKind, NarrowUse); 1178 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1179 ? WideDef 1180 : createExtendInst(NarrowUse->getOperand(1), WideType, 1181 ExtKind, NarrowUse); 1182 1183 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1184 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1185 NarrowBO->getName()); 1186 IRBuilder<> Builder(NarrowUse); 1187 Builder.Insert(WideBO); 1188 WideBO->copyIRFlags(NarrowBO); 1189 1190 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1191 1192 ExtendKindMap[NarrowUse] = ExtKind; 1193 1194 for (Use &U : NarrowUse->uses()) { 1195 Instruction *User = nullptr; 1196 if (ExtKind == SignExtended) 1197 User = dyn_cast<SExtInst>(U.getUser()); 1198 else 1199 User = dyn_cast<ZExtInst>(U.getUser()); 1200 if (User && User->getType() == WideType) { 1201 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1202 << *WideBO << "\n"); 1203 ++NumElimExt; 1204 User->replaceAllUsesWith(WideBO); 1205 DeadInsts.emplace_back(User); 1206 } 1207 } 1208 } 1209 1210 /// Determine whether an individual user of the narrow IV can be widened. If so, 1211 /// return the wide clone of the user. 1212 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1213 assert(ExtendKindMap.count(DU.NarrowDef) && 1214 "Should already know the kind of extension used to widen NarrowDef"); 1215 1216 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1217 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1218 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1219 // For LCSSA phis, sink the truncate outside the loop. 1220 // After SimplifyCFG most loop exit targets have a single predecessor. 1221 // Otherwise fall back to a truncate within the loop. 1222 if (UsePhi->getNumOperands() != 1) 1223 truncateIVUse(DU, DT, LI); 1224 else { 1225 // Widening the PHI requires us to insert a trunc. The logical place 1226 // for this trunc is in the same BB as the PHI. This is not possible if 1227 // the BB is terminated by a catchswitch. 1228 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1229 return nullptr; 1230 1231 PHINode *WidePhi = 1232 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1233 UsePhi); 1234 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1235 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1236 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1237 UsePhi->replaceAllUsesWith(Trunc); 1238 DeadInsts.emplace_back(UsePhi); 1239 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1240 << *WidePhi << "\n"); 1241 } 1242 return nullptr; 1243 } 1244 } 1245 1246 // This narrow use can be widened by a sext if it's non-negative or its narrow 1247 // def was widended by a sext. Same for zext. 1248 auto canWidenBySExt = [&]() { 1249 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1250 }; 1251 auto canWidenByZExt = [&]() { 1252 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1253 }; 1254 1255 // Our raison d'etre! Eliminate sign and zero extension. 1256 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1257 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1258 Value *NewDef = DU.WideDef; 1259 if (DU.NarrowUse->getType() != WideType) { 1260 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1261 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1262 if (CastWidth < IVWidth) { 1263 // The cast isn't as wide as the IV, so insert a Trunc. 1264 IRBuilder<> Builder(DU.NarrowUse); 1265 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1266 } 1267 else { 1268 // A wider extend was hidden behind a narrower one. This may induce 1269 // another round of IV widening in which the intermediate IV becomes 1270 // dead. It should be very rare. 1271 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1272 << " not wide enough to subsume " << *DU.NarrowUse 1273 << "\n"); 1274 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1275 NewDef = DU.NarrowUse; 1276 } 1277 } 1278 if (NewDef != DU.NarrowUse) { 1279 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1280 << " replaced by " << *DU.WideDef << "\n"); 1281 ++NumElimExt; 1282 DU.NarrowUse->replaceAllUsesWith(NewDef); 1283 DeadInsts.emplace_back(DU.NarrowUse); 1284 } 1285 // Now that the extend is gone, we want to expose it's uses for potential 1286 // further simplification. We don't need to directly inform SimplifyIVUsers 1287 // of the new users, because their parent IV will be processed later as a 1288 // new loop phi. If we preserved IVUsers analysis, we would also want to 1289 // push the uses of WideDef here. 1290 1291 // No further widening is needed. The deceased [sz]ext had done it for us. 1292 return nullptr; 1293 } 1294 1295 // Does this user itself evaluate to a recurrence after widening? 1296 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1297 if (!WideAddRec.first) 1298 WideAddRec = getWideRecurrence(DU); 1299 1300 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1301 if (!WideAddRec.first) { 1302 // If use is a loop condition, try to promote the condition instead of 1303 // truncating the IV first. 1304 if (widenLoopCompare(DU)) 1305 return nullptr; 1306 1307 // We are here about to generate a truncate instruction that may hurt 1308 // performance because the scalar evolution expression computed earlier 1309 // in WideAddRec.first does not indicate a polynomial induction expression. 1310 // In that case, look at the operands of the use instruction to determine 1311 // if we can still widen the use instead of truncating its operand. 1312 if (widenWithVariantUse(DU)) { 1313 widenWithVariantUseCodegen(DU); 1314 return nullptr; 1315 } 1316 1317 // This user does not evaluate to a recurrence after widening, so don't 1318 // follow it. Instead insert a Trunc to kill off the original use, 1319 // eventually isolating the original narrow IV so it can be removed. 1320 truncateIVUse(DU, DT, LI); 1321 return nullptr; 1322 } 1323 // Assume block terminators cannot evaluate to a recurrence. We can't to 1324 // insert a Trunc after a terminator if there happens to be a critical edge. 1325 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1326 "SCEV is not expected to evaluate a block terminator"); 1327 1328 // Reuse the IV increment that SCEVExpander created as long as it dominates 1329 // NarrowUse. 1330 Instruction *WideUse = nullptr; 1331 if (WideAddRec.first == WideIncExpr && 1332 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1333 WideUse = WideInc; 1334 else { 1335 WideUse = cloneIVUser(DU, WideAddRec.first); 1336 if (!WideUse) 1337 return nullptr; 1338 } 1339 // Evaluation of WideAddRec ensured that the narrow expression could be 1340 // extended outside the loop without overflow. This suggests that the wide use 1341 // evaluates to the same expression as the extended narrow use, but doesn't 1342 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1343 // where it fails, we simply throw away the newly created wide use. 1344 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1345 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1346 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1347 << "\n"); 1348 DeadInsts.emplace_back(WideUse); 1349 return nullptr; 1350 } 1351 1352 // if we reached this point then we are going to replace 1353 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1354 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1355 1356 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1357 // Returning WideUse pushes it on the worklist. 1358 return WideUse; 1359 } 1360 1361 /// Add eligible users of NarrowDef to NarrowIVUsers. 1362 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1363 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1364 bool NonNegativeDef = 1365 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1366 SE->getConstant(NarrowSCEV->getType(), 0)); 1367 for (User *U : NarrowDef->users()) { 1368 Instruction *NarrowUser = cast<Instruction>(U); 1369 1370 // Handle data flow merges and bizarre phi cycles. 1371 if (!Widened.insert(NarrowUser).second) 1372 continue; 1373 1374 bool NonNegativeUse = false; 1375 if (!NonNegativeDef) { 1376 // We might have a control-dependent range information for this context. 1377 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1378 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1379 } 1380 1381 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1382 NonNegativeDef || NonNegativeUse); 1383 } 1384 } 1385 1386 /// Process a single induction variable. First use the SCEVExpander to create a 1387 /// wide induction variable that evaluates to the same recurrence as the 1388 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1389 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1390 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1391 /// 1392 /// It would be simpler to delete uses as they are processed, but we must avoid 1393 /// invalidating SCEV expressions. 1394 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1395 // Is this phi an induction variable? 1396 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1397 if (!AddRec) 1398 return nullptr; 1399 1400 // Widen the induction variable expression. 1401 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1402 ? SE->getSignExtendExpr(AddRec, WideType) 1403 : SE->getZeroExtendExpr(AddRec, WideType); 1404 1405 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1406 "Expect the new IV expression to preserve its type"); 1407 1408 // Can the IV be extended outside the loop without overflow? 1409 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1410 if (!AddRec || AddRec->getLoop() != L) 1411 return nullptr; 1412 1413 // An AddRec must have loop-invariant operands. Since this AddRec is 1414 // materialized by a loop header phi, the expression cannot have any post-loop 1415 // operands, so they must dominate the loop header. 1416 assert( 1417 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1418 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1419 "Loop header phi recurrence inputs do not dominate the loop"); 1420 1421 // Iterate over IV uses (including transitive ones) looking for IV increments 1422 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1423 // the increment calculate control-dependent range information basing on 1424 // dominating conditions inside of the loop (e.g. a range check inside of the 1425 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1426 // 1427 // Control-dependent range information is later used to prove that a narrow 1428 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1429 // this on demand because when pushNarrowIVUsers needs this information some 1430 // of the dominating conditions might be already widened. 1431 if (UsePostIncrementRanges) 1432 calculatePostIncRanges(OrigPhi); 1433 1434 // The rewriter provides a value for the desired IV expression. This may 1435 // either find an existing phi or materialize a new one. Either way, we 1436 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1437 // of the phi-SCC dominates the loop entry. 1438 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1439 WidePhi = dyn_cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1440 // If the wide phi is not a phi node, for example a cast node, like bitcast, 1441 // inttoptr, ptrtoint, just skip for now. 1442 if (!WidePhi) 1443 return nullptr; 1444 1445 // Remembering the WideIV increment generated by SCEVExpander allows 1446 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1447 // employ a general reuse mechanism because the call above is the only call to 1448 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1449 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1450 WideInc = 1451 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1452 WideIncExpr = SE->getSCEV(WideInc); 1453 // Propagate the debug location associated with the original loop increment 1454 // to the new (widened) increment. 1455 auto *OrigInc = 1456 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1457 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1458 } 1459 1460 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1461 ++NumWidened; 1462 1463 // Traverse the def-use chain using a worklist starting at the original IV. 1464 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1465 1466 Widened.insert(OrigPhi); 1467 pushNarrowIVUsers(OrigPhi, WidePhi); 1468 1469 while (!NarrowIVUsers.empty()) { 1470 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1471 1472 // Process a def-use edge. This may replace the use, so don't hold a 1473 // use_iterator across it. 1474 Instruction *WideUse = widenIVUse(DU, Rewriter); 1475 1476 // Follow all def-use edges from the previous narrow use. 1477 if (WideUse) 1478 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1479 1480 // widenIVUse may have removed the def-use edge. 1481 if (DU.NarrowDef->use_empty()) 1482 DeadInsts.emplace_back(DU.NarrowDef); 1483 } 1484 1485 // Attach any debug information to the new PHI. 1486 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1487 1488 return WidePhi; 1489 } 1490 1491 /// Calculates control-dependent range for the given def at the given context 1492 /// by looking at dominating conditions inside of the loop 1493 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1494 Instruction *NarrowUser) { 1495 using namespace llvm::PatternMatch; 1496 1497 Value *NarrowDefLHS; 1498 const APInt *NarrowDefRHS; 1499 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1500 m_APInt(NarrowDefRHS))) || 1501 !NarrowDefRHS->isNonNegative()) 1502 return; 1503 1504 auto UpdateRangeFromCondition = [&] (Value *Condition, 1505 bool TrueDest) { 1506 CmpInst::Predicate Pred; 1507 Value *CmpRHS; 1508 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1509 m_Value(CmpRHS)))) 1510 return; 1511 1512 CmpInst::Predicate P = 1513 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1514 1515 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1516 auto CmpConstrainedLHSRange = 1517 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1518 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1519 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1520 1521 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1522 }; 1523 1524 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1525 if (!HasGuards) 1526 return; 1527 1528 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1529 Ctx->getParent()->rend())) { 1530 Value *C = nullptr; 1531 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1532 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1533 } 1534 }; 1535 1536 UpdateRangeFromGuards(NarrowUser); 1537 1538 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1539 // If NarrowUserBB is statically unreachable asking dominator queries may 1540 // yield surprising results. (e.g. the block may not have a dom tree node) 1541 if (!DT->isReachableFromEntry(NarrowUserBB)) 1542 return; 1543 1544 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1545 L->contains(DTB->getBlock()); 1546 DTB = DTB->getIDom()) { 1547 auto *BB = DTB->getBlock(); 1548 auto *TI = BB->getTerminator(); 1549 UpdateRangeFromGuards(TI); 1550 1551 auto *BI = dyn_cast<BranchInst>(TI); 1552 if (!BI || !BI->isConditional()) 1553 continue; 1554 1555 auto *TrueSuccessor = BI->getSuccessor(0); 1556 auto *FalseSuccessor = BI->getSuccessor(1); 1557 1558 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1559 return BBE.isSingleEdge() && 1560 DT->dominates(BBE, NarrowUser->getParent()); 1561 }; 1562 1563 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1564 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1565 1566 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1567 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1568 } 1569 } 1570 1571 /// Calculates PostIncRangeInfos map for the given IV 1572 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1573 SmallPtrSet<Instruction *, 16> Visited; 1574 SmallVector<Instruction *, 6> Worklist; 1575 Worklist.push_back(OrigPhi); 1576 Visited.insert(OrigPhi); 1577 1578 while (!Worklist.empty()) { 1579 Instruction *NarrowDef = Worklist.pop_back_val(); 1580 1581 for (Use &U : NarrowDef->uses()) { 1582 auto *NarrowUser = cast<Instruction>(U.getUser()); 1583 1584 // Don't go looking outside the current loop. 1585 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1586 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1587 continue; 1588 1589 if (!Visited.insert(NarrowUser).second) 1590 continue; 1591 1592 Worklist.push_back(NarrowUser); 1593 1594 calculatePostIncRange(NarrowDef, NarrowUser); 1595 } 1596 } 1597 } 1598 1599 //===----------------------------------------------------------------------===// 1600 // Live IV Reduction - Minimize IVs live across the loop. 1601 //===----------------------------------------------------------------------===// 1602 1603 //===----------------------------------------------------------------------===// 1604 // Simplification of IV users based on SCEV evaluation. 1605 //===----------------------------------------------------------------------===// 1606 1607 namespace { 1608 1609 class IndVarSimplifyVisitor : public IVVisitor { 1610 ScalarEvolution *SE; 1611 const TargetTransformInfo *TTI; 1612 PHINode *IVPhi; 1613 1614 public: 1615 WideIVInfo WI; 1616 1617 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1618 const TargetTransformInfo *TTI, 1619 const DominatorTree *DTree) 1620 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1621 DT = DTree; 1622 WI.NarrowIV = IVPhi; 1623 } 1624 1625 // Implement the interface used by simplifyUsersOfIV. 1626 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1627 }; 1628 1629 } // end anonymous namespace 1630 1631 /// Iteratively perform simplification on a worklist of IV users. Each 1632 /// successive simplification may push more users which may themselves be 1633 /// candidates for simplification. 1634 /// 1635 /// Sign/Zero extend elimination is interleaved with IV simplification. 1636 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1637 SCEVExpander &Rewriter, 1638 LoopInfo *LI) { 1639 SmallVector<WideIVInfo, 8> WideIVs; 1640 1641 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1642 Intrinsic::getName(Intrinsic::experimental_guard)); 1643 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1644 1645 SmallVector<PHINode*, 8> LoopPhis; 1646 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1647 LoopPhis.push_back(cast<PHINode>(I)); 1648 } 1649 // Each round of simplification iterates through the SimplifyIVUsers worklist 1650 // for all current phis, then determines whether any IVs can be 1651 // widened. Widening adds new phis to LoopPhis, inducing another round of 1652 // simplification on the wide IVs. 1653 bool Changed = false; 1654 while (!LoopPhis.empty()) { 1655 // Evaluate as many IV expressions as possible before widening any IVs. This 1656 // forces SCEV to set no-wrap flags before evaluating sign/zero 1657 // extension. The first time SCEV attempts to normalize sign/zero extension, 1658 // the result becomes final. So for the most predictable results, we delay 1659 // evaluation of sign/zero extend evaluation until needed, and avoid running 1660 // other SCEV based analysis prior to simplifyAndExtend. 1661 do { 1662 PHINode *CurrIV = LoopPhis.pop_back_val(); 1663 1664 // Information about sign/zero extensions of CurrIV. 1665 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1666 1667 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 1668 &Visitor); 1669 1670 if (Visitor.WI.WidestNativeType) { 1671 WideIVs.push_back(Visitor.WI); 1672 } 1673 } while(!LoopPhis.empty()); 1674 1675 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1676 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1677 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1678 Changed = true; 1679 LoopPhis.push_back(WidePhi); 1680 } 1681 } 1682 } 1683 return Changed; 1684 } 1685 1686 //===----------------------------------------------------------------------===// 1687 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1688 //===----------------------------------------------------------------------===// 1689 1690 /// Given an Value which is hoped to be part of an add recurance in the given 1691 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1692 /// that this is less general than SCEVs AddRec checking. 1693 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1694 Instruction *IncI = dyn_cast<Instruction>(IncV); 1695 if (!IncI) 1696 return nullptr; 1697 1698 switch (IncI->getOpcode()) { 1699 case Instruction::Add: 1700 case Instruction::Sub: 1701 break; 1702 case Instruction::GetElementPtr: 1703 // An IV counter must preserve its type. 1704 if (IncI->getNumOperands() == 2) 1705 break; 1706 LLVM_FALLTHROUGH; 1707 default: 1708 return nullptr; 1709 } 1710 1711 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1712 if (Phi && Phi->getParent() == L->getHeader()) { 1713 if (L->isLoopInvariant(IncI->getOperand(1))) 1714 return Phi; 1715 return nullptr; 1716 } 1717 if (IncI->getOpcode() == Instruction::GetElementPtr) 1718 return nullptr; 1719 1720 // Allow add/sub to be commuted. 1721 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1722 if (Phi && Phi->getParent() == L->getHeader()) { 1723 if (L->isLoopInvariant(IncI->getOperand(0))) 1724 return Phi; 1725 } 1726 return nullptr; 1727 } 1728 1729 /// Whether the current loop exit test is based on this value. Currently this 1730 /// is limited to a direct use in the loop condition. 1731 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1732 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1733 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1734 // TODO: Allow non-icmp loop test. 1735 if (!ICmp) 1736 return false; 1737 1738 // TODO: Allow indirect use. 1739 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1740 } 1741 1742 /// linearFunctionTestReplace policy. Return true unless we can show that the 1743 /// current exit test is already sufficiently canonical. 1744 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1745 assert(L->getLoopLatch() && "Must be in simplified form"); 1746 1747 // Avoid converting a constant or loop invariant test back to a runtime 1748 // test. This is critical for when SCEV's cached ExitCount is less precise 1749 // than the current IR (such as after we've proven a particular exit is 1750 // actually dead and thus the BE count never reaches our ExitCount.) 1751 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1752 if (L->isLoopInvariant(BI->getCondition())) 1753 return false; 1754 1755 // Do LFTR to simplify the exit condition to an ICMP. 1756 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1757 if (!Cond) 1758 return true; 1759 1760 // Do LFTR to simplify the exit ICMP to EQ/NE 1761 ICmpInst::Predicate Pred = Cond->getPredicate(); 1762 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1763 return true; 1764 1765 // Look for a loop invariant RHS 1766 Value *LHS = Cond->getOperand(0); 1767 Value *RHS = Cond->getOperand(1); 1768 if (!L->isLoopInvariant(RHS)) { 1769 if (!L->isLoopInvariant(LHS)) 1770 return true; 1771 std::swap(LHS, RHS); 1772 } 1773 // Look for a simple IV counter LHS 1774 PHINode *Phi = dyn_cast<PHINode>(LHS); 1775 if (!Phi) 1776 Phi = getLoopPhiForCounter(LHS, L); 1777 1778 if (!Phi) 1779 return true; 1780 1781 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1782 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1783 if (Idx < 0) 1784 return true; 1785 1786 // Do LFTR if the exit condition's IV is *not* a simple counter. 1787 Value *IncV = Phi->getIncomingValue(Idx); 1788 return Phi != getLoopPhiForCounter(IncV, L); 1789 } 1790 1791 /// Return true if undefined behavior would provable be executed on the path to 1792 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1793 /// anything about whether OnPathTo is actually executed or whether Root is 1794 /// actually poison. This can be used to assess whether a new use of Root can 1795 /// be added at a location which is control equivalent with OnPathTo (such as 1796 /// immediately before it) without introducing UB which didn't previously 1797 /// exist. Note that a false result conveys no information. 1798 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1799 Instruction *OnPathTo, 1800 DominatorTree *DT) { 1801 // Basic approach is to assume Root is poison, propagate poison forward 1802 // through all users we can easily track, and then check whether any of those 1803 // users are provable UB and must execute before out exiting block might 1804 // exit. 1805 1806 // The set of all recursive users we've visited (which are assumed to all be 1807 // poison because of said visit) 1808 SmallSet<const Value *, 16> KnownPoison; 1809 SmallVector<const Instruction*, 16> Worklist; 1810 Worklist.push_back(Root); 1811 while (!Worklist.empty()) { 1812 const Instruction *I = Worklist.pop_back_val(); 1813 1814 // If we know this must trigger UB on a path leading our target. 1815 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1816 return true; 1817 1818 // If we can't analyze propagation through this instruction, just skip it 1819 // and transitive users. Safe as false is a conservative result. 1820 if (!propagatesPoison(I) && I != Root) 1821 continue; 1822 1823 if (KnownPoison.insert(I).second) 1824 for (const User *User : I->users()) 1825 Worklist.push_back(cast<Instruction>(User)); 1826 } 1827 1828 // Might be non-UB, or might have a path we couldn't prove must execute on 1829 // way to exiting bb. 1830 return false; 1831 } 1832 1833 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1834 /// down to checking that all operands are constant and listing instructions 1835 /// that may hide undef. 1836 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1837 unsigned Depth) { 1838 if (isa<Constant>(V)) 1839 return !isa<UndefValue>(V); 1840 1841 if (Depth >= 6) 1842 return false; 1843 1844 // Conservatively handle non-constant non-instructions. For example, Arguments 1845 // may be undef. 1846 Instruction *I = dyn_cast<Instruction>(V); 1847 if (!I) 1848 return false; 1849 1850 // Load and return values may be undef. 1851 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1852 return false; 1853 1854 // Optimistically handle other instructions. 1855 for (Value *Op : I->operands()) { 1856 if (!Visited.insert(Op).second) 1857 continue; 1858 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1859 return false; 1860 } 1861 return true; 1862 } 1863 1864 /// Return true if the given value is concrete. We must prove that undef can 1865 /// never reach it. 1866 /// 1867 /// TODO: If we decide that this is a good approach to checking for undef, we 1868 /// may factor it into a common location. 1869 static bool hasConcreteDef(Value *V) { 1870 SmallPtrSet<Value*, 8> Visited; 1871 Visited.insert(V); 1872 return hasConcreteDefImpl(V, Visited, 0); 1873 } 1874 1875 /// Return true if this IV has any uses other than the (soon to be rewritten) 1876 /// loop exit test. 1877 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1878 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1879 Value *IncV = Phi->getIncomingValue(LatchIdx); 1880 1881 for (User *U : Phi->users()) 1882 if (U != Cond && U != IncV) return false; 1883 1884 for (User *U : IncV->users()) 1885 if (U != Cond && U != Phi) return false; 1886 return true; 1887 } 1888 1889 /// Return true if the given phi is a "counter" in L. A counter is an 1890 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1891 /// step of 1. Note that L must have exactly one latch. 1892 static bool isLoopCounter(PHINode* Phi, Loop *L, 1893 ScalarEvolution *SE) { 1894 assert(Phi->getParent() == L->getHeader()); 1895 assert(L->getLoopLatch()); 1896 1897 if (!SE->isSCEVable(Phi->getType())) 1898 return false; 1899 1900 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1901 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1902 return false; 1903 1904 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1905 if (!Step || !Step->isOne()) 1906 return false; 1907 1908 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1909 Value *IncV = Phi->getIncomingValue(LatchIdx); 1910 return (getLoopPhiForCounter(IncV, L) == Phi); 1911 } 1912 1913 /// Search the loop header for a loop counter (anadd rec w/step of one) 1914 /// suitable for use by LFTR. If multiple counters are available, select the 1915 /// "best" one based profitable heuristics. 1916 /// 1917 /// BECount may be an i8* pointer type. The pointer difference is already 1918 /// valid count without scaling the address stride, so it remains a pointer 1919 /// expression as far as SCEV is concerned. 1920 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1921 const SCEV *BECount, 1922 ScalarEvolution *SE, DominatorTree *DT) { 1923 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1924 1925 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1926 1927 // Loop over all of the PHI nodes, looking for a simple counter. 1928 PHINode *BestPhi = nullptr; 1929 const SCEV *BestInit = nullptr; 1930 BasicBlock *LatchBlock = L->getLoopLatch(); 1931 assert(LatchBlock && "Must be in simplified form"); 1932 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1933 1934 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1935 PHINode *Phi = cast<PHINode>(I); 1936 if (!isLoopCounter(Phi, L, SE)) 1937 continue; 1938 1939 // Avoid comparing an integer IV against a pointer Limit. 1940 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1941 continue; 1942 1943 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1944 1945 // AR may be a pointer type, while BECount is an integer type. 1946 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1947 // AR may not be a narrower type, or we may never exit. 1948 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1949 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1950 continue; 1951 1952 // Avoid reusing a potentially undef value to compute other values that may 1953 // have originally had a concrete definition. 1954 if (!hasConcreteDef(Phi)) { 1955 // We explicitly allow unknown phis as long as they are already used by 1956 // the loop exit test. This is legal since performing LFTR could not 1957 // increase the number of undef users. 1958 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1959 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1960 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1961 continue; 1962 } 1963 1964 // Avoid introducing undefined behavior due to poison which didn't exist in 1965 // the original program. (Annoyingly, the rules for poison and undef 1966 // propagation are distinct, so this does NOT cover the undef case above.) 1967 // We have to ensure that we don't introduce UB by introducing a use on an 1968 // iteration where said IV produces poison. Our strategy here differs for 1969 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1970 // see code in linearFunctionTestReplace. For pointers, we restrict 1971 // transforms as there is no good way to reinfer inbounds once lost. 1972 if (!Phi->getType()->isIntegerTy() && 1973 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1974 continue; 1975 1976 const SCEV *Init = AR->getStart(); 1977 1978 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1979 // Don't force a live loop counter if another IV can be used. 1980 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1981 continue; 1982 1983 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1984 // also prefers integer to pointer IVs. 1985 if (BestInit->isZero() != Init->isZero()) { 1986 if (BestInit->isZero()) 1987 continue; 1988 } 1989 // If two IVs both count from zero or both count from nonzero then the 1990 // narrower is likely a dead phi that has been widened. Use the wider phi 1991 // to allow the other to be eliminated. 1992 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1993 continue; 1994 } 1995 BestPhi = Phi; 1996 BestInit = Init; 1997 } 1998 return BestPhi; 1999 } 2000 2001 /// Insert an IR expression which computes the value held by the IV IndVar 2002 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2003 /// is taken ExitCount times. 2004 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2005 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2006 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2007 assert(isLoopCounter(IndVar, L, SE)); 2008 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2009 const SCEV *IVInit = AR->getStart(); 2010 2011 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2012 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2013 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2014 // the existing GEPs whenever possible. 2015 if (IndVar->getType()->isPointerTy() && 2016 !ExitCount->getType()->isPointerTy()) { 2017 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2018 // signed value. ExitCount on the other hand represents the loop trip count, 2019 // which is an unsigned value. FindLoopCounter only allows induction 2020 // variables that have a positive unit stride of one. This means we don't 2021 // have to handle the case of negative offsets (yet) and just need to zero 2022 // extend ExitCount. 2023 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2024 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2025 if (UsePostInc) 2026 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2027 2028 // Expand the code for the iteration count. 2029 assert(SE->isLoopInvariant(IVOffset, L) && 2030 "Computed iteration count is not loop invariant!"); 2031 2032 // We could handle pointer IVs other than i8*, but we need to compensate for 2033 // gep index scaling. 2034 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2035 cast<PointerType>(IndVar->getType()) 2036 ->getElementType())->isOne() && 2037 "unit stride pointer IV must be i8*"); 2038 2039 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2040 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2041 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2042 } else { 2043 // In any other case, convert both IVInit and ExitCount to integers before 2044 // comparing. This may result in SCEV expansion of pointers, but in practice 2045 // SCEV will fold the pointer arithmetic away as such: 2046 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2047 // 2048 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2049 // for simple memset-style loops. 2050 // 2051 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2052 // were generated on top of case #2, which is not expected. 2053 2054 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2055 // For unit stride, IVCount = Start + ExitCount with 2's complement 2056 // overflow. 2057 2058 // For integer IVs, truncate the IV before computing IVInit + BECount, 2059 // unless we know apriori that the limit must be a constant when evaluated 2060 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2061 // of the IV in the loop over a (potentially) expensive expansion of the 2062 // widened exit count add(zext(add)) expression. 2063 if (SE->getTypeSizeInBits(IVInit->getType()) 2064 > SE->getTypeSizeInBits(ExitCount->getType())) { 2065 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2066 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2067 else 2068 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2069 } 2070 2071 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2072 2073 if (UsePostInc) 2074 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2075 2076 // Expand the code for the iteration count. 2077 assert(SE->isLoopInvariant(IVLimit, L) && 2078 "Computed iteration count is not loop invariant!"); 2079 // Ensure that we generate the same type as IndVar, or a smaller integer 2080 // type. In the presence of null pointer values, we have an integer type 2081 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2082 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2083 IndVar->getType() : ExitCount->getType(); 2084 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2085 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2086 } 2087 } 2088 2089 /// This method rewrites the exit condition of the loop to be a canonical != 2090 /// comparison against the incremented loop induction variable. This pass is 2091 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2092 /// determine a loop-invariant trip count of the loop, which is actually a much 2093 /// broader range than just linear tests. 2094 bool IndVarSimplify:: 2095 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2096 const SCEV *ExitCount, 2097 PHINode *IndVar, SCEVExpander &Rewriter) { 2098 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2099 assert(isLoopCounter(IndVar, L, SE)); 2100 Instruction * const IncVar = 2101 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2102 2103 // Initialize CmpIndVar to the preincremented IV. 2104 Value *CmpIndVar = IndVar; 2105 bool UsePostInc = false; 2106 2107 // If the exiting block is the same as the backedge block, we prefer to 2108 // compare against the post-incremented value, otherwise we must compare 2109 // against the preincremented value. 2110 if (ExitingBB == L->getLoopLatch()) { 2111 // For pointer IVs, we chose to not strip inbounds which requires us not 2112 // to add a potentially UB introducing use. We need to either a) show 2113 // the loop test we're modifying is already in post-inc form, or b) show 2114 // that adding a use must not introduce UB. 2115 bool SafeToPostInc = 2116 IndVar->getType()->isIntegerTy() || 2117 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2118 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2119 if (SafeToPostInc) { 2120 UsePostInc = true; 2121 CmpIndVar = IncVar; 2122 } 2123 } 2124 2125 // It may be necessary to drop nowrap flags on the incrementing instruction 2126 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2127 // case the increment might have previously been poison on the last iteration 2128 // only) or if LFTR switches to a different IV that was previously dynamically 2129 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2130 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2131 // check), because the pre-inc addrec flags may be adopted from the original 2132 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2133 // TODO: This handling is inaccurate for one case: If we switch to a 2134 // dynamically dead IV that wraps on the first loop iteration only, which is 2135 // not covered by the post-inc addrec. (If the new IV was not dynamically 2136 // dead, it could not be poison on the first iteration in the first place.) 2137 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2138 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2139 if (BO->hasNoUnsignedWrap()) 2140 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2141 if (BO->hasNoSignedWrap()) 2142 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2143 } 2144 2145 Value *ExitCnt = genLoopLimit( 2146 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2147 assert(ExitCnt->getType()->isPointerTy() == 2148 IndVar->getType()->isPointerTy() && 2149 "genLoopLimit missed a cast"); 2150 2151 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2152 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2153 ICmpInst::Predicate P; 2154 if (L->contains(BI->getSuccessor(0))) 2155 P = ICmpInst::ICMP_NE; 2156 else 2157 P = ICmpInst::ICMP_EQ; 2158 2159 IRBuilder<> Builder(BI); 2160 2161 // The new loop exit condition should reuse the debug location of the 2162 // original loop exit condition. 2163 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2164 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2165 2166 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2167 // avoid the expensive expansion of the limit expression in the wider type, 2168 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2169 // since we know (from the exit count bitwidth), that we can't self-wrap in 2170 // the narrower type. 2171 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2172 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2173 if (CmpIndVarSize > ExitCntSize) { 2174 assert(!CmpIndVar->getType()->isPointerTy() && 2175 !ExitCnt->getType()->isPointerTy()); 2176 2177 // Before resorting to actually inserting the truncate, use the same 2178 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2179 // the other side of the comparison instead. We still evaluate the limit 2180 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2181 // a truncate within in. 2182 bool Extended = false; 2183 const SCEV *IV = SE->getSCEV(CmpIndVar); 2184 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2185 ExitCnt->getType()); 2186 const SCEV *ZExtTrunc = 2187 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2188 2189 if (ZExtTrunc == IV) { 2190 Extended = true; 2191 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2192 "wide.trip.count"); 2193 } else { 2194 const SCEV *SExtTrunc = 2195 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2196 if (SExtTrunc == IV) { 2197 Extended = true; 2198 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2199 "wide.trip.count"); 2200 } 2201 } 2202 2203 if (Extended) { 2204 bool Discard; 2205 L->makeLoopInvariant(ExitCnt, Discard); 2206 } else 2207 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2208 "lftr.wideiv"); 2209 } 2210 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2211 << " LHS:" << *CmpIndVar << '\n' 2212 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2213 << "\n" 2214 << " RHS:\t" << *ExitCnt << "\n" 2215 << "ExitCount:\t" << *ExitCount << "\n" 2216 << " was: " << *BI->getCondition() << "\n"); 2217 2218 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2219 Value *OrigCond = BI->getCondition(); 2220 // It's tempting to use replaceAllUsesWith here to fully replace the old 2221 // comparison, but that's not immediately safe, since users of the old 2222 // comparison may not be dominated by the new comparison. Instead, just 2223 // update the branch to use the new comparison; in the common case this 2224 // will make old comparison dead. 2225 BI->setCondition(Cond); 2226 DeadInsts.push_back(OrigCond); 2227 2228 ++NumLFTR; 2229 return true; 2230 } 2231 2232 //===----------------------------------------------------------------------===// 2233 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2234 //===----------------------------------------------------------------------===// 2235 2236 /// If there's a single exit block, sink any loop-invariant values that 2237 /// were defined in the preheader but not used inside the loop into the 2238 /// exit block to reduce register pressure in the loop. 2239 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2240 BasicBlock *ExitBlock = L->getExitBlock(); 2241 if (!ExitBlock) return false; 2242 2243 BasicBlock *Preheader = L->getLoopPreheader(); 2244 if (!Preheader) return false; 2245 2246 bool MadeAnyChanges = false; 2247 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2248 BasicBlock::iterator I(Preheader->getTerminator()); 2249 while (I != Preheader->begin()) { 2250 --I; 2251 // New instructions were inserted at the end of the preheader. 2252 if (isa<PHINode>(I)) 2253 break; 2254 2255 // Don't move instructions which might have side effects, since the side 2256 // effects need to complete before instructions inside the loop. Also don't 2257 // move instructions which might read memory, since the loop may modify 2258 // memory. Note that it's okay if the instruction might have undefined 2259 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2260 // block. 2261 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2262 continue; 2263 2264 // Skip debug info intrinsics. 2265 if (isa<DbgInfoIntrinsic>(I)) 2266 continue; 2267 2268 // Skip eh pad instructions. 2269 if (I->isEHPad()) 2270 continue; 2271 2272 // Don't sink alloca: we never want to sink static alloca's out of the 2273 // entry block, and correctly sinking dynamic alloca's requires 2274 // checks for stacksave/stackrestore intrinsics. 2275 // FIXME: Refactor this check somehow? 2276 if (isa<AllocaInst>(I)) 2277 continue; 2278 2279 // Determine if there is a use in or before the loop (direct or 2280 // otherwise). 2281 bool UsedInLoop = false; 2282 for (Use &U : I->uses()) { 2283 Instruction *User = cast<Instruction>(U.getUser()); 2284 BasicBlock *UseBB = User->getParent(); 2285 if (PHINode *P = dyn_cast<PHINode>(User)) { 2286 unsigned i = 2287 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2288 UseBB = P->getIncomingBlock(i); 2289 } 2290 if (UseBB == Preheader || L->contains(UseBB)) { 2291 UsedInLoop = true; 2292 break; 2293 } 2294 } 2295 2296 // If there is, the def must remain in the preheader. 2297 if (UsedInLoop) 2298 continue; 2299 2300 // Otherwise, sink it to the exit block. 2301 Instruction *ToMove = &*I; 2302 bool Done = false; 2303 2304 if (I != Preheader->begin()) { 2305 // Skip debug info intrinsics. 2306 do { 2307 --I; 2308 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2309 2310 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2311 Done = true; 2312 } else { 2313 Done = true; 2314 } 2315 2316 MadeAnyChanges = true; 2317 ToMove->moveBefore(*ExitBlock, InsertPt); 2318 if (Done) break; 2319 InsertPt = ToMove->getIterator(); 2320 } 2321 2322 return MadeAnyChanges; 2323 } 2324 2325 /// Return a symbolic upper bound for the backedge taken count of the loop. 2326 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2327 /// an arbitrary expression as opposed to only constants. 2328 /// TODO: Move into the ScalarEvolution class. 2329 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2330 DominatorTree &DT, Loop *L) { 2331 SmallVector<BasicBlock*, 16> ExitingBlocks; 2332 L->getExitingBlocks(ExitingBlocks); 2333 2334 // Form an expression for the maximum exit count possible for this loop. We 2335 // merge the max and exact information to approximate a version of 2336 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2337 SmallVector<const SCEV*, 4> ExitCounts; 2338 for (BasicBlock *ExitingBB : ExitingBlocks) { 2339 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2340 if (isa<SCEVCouldNotCompute>(ExitCount)) 2341 ExitCount = SE.getExitCount(L, ExitingBB, 2342 ScalarEvolution::ConstantMaximum); 2343 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2344 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2345 "We should only have known counts for exiting blocks that " 2346 "dominate latch!"); 2347 ExitCounts.push_back(ExitCount); 2348 } 2349 } 2350 if (ExitCounts.empty()) 2351 return SE.getCouldNotCompute(); 2352 return SE.getUMinFromMismatchedTypes(ExitCounts); 2353 } 2354 2355 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2356 SmallVector<BasicBlock*, 16> ExitingBlocks; 2357 L->getExitingBlocks(ExitingBlocks); 2358 2359 // Remove all exits which aren't both rewriteable and analyzeable. 2360 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2361 // If our exitting block exits multiple loops, we can only rewrite the 2362 // innermost one. Otherwise, we're changing how many times the innermost 2363 // loop runs before it exits. 2364 if (LI->getLoopFor(ExitingBB) != L) 2365 return true; 2366 2367 // Can't rewrite non-branch yet. 2368 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2369 if (!BI) 2370 return true; 2371 2372 // If already constant, nothing to do. 2373 if (isa<Constant>(BI->getCondition())) 2374 return true; 2375 2376 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2377 if (isa<SCEVCouldNotCompute>(ExitCount)) 2378 return true; 2379 return false; 2380 }); 2381 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2382 2383 if (ExitingBlocks.empty()) 2384 return false; 2385 2386 // Get a symbolic upper bound on the loop backedge taken count. 2387 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2388 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2389 return false; 2390 2391 // Visit our exit blocks in order of dominance. We know from the fact that 2392 // all exits (left) are analyzeable that the must be a total dominance order 2393 // between them as each must dominate the latch. The visit order only 2394 // matters for the provably equal case. 2395 llvm::sort(ExitingBlocks, 2396 [&](BasicBlock *A, BasicBlock *B) { 2397 // std::sort sorts in ascending order, so we want the inverse of 2398 // the normal dominance relation. 2399 if (A == B) return false; 2400 if (DT->properlyDominates(A, B)) return true; 2401 if (DT->properlyDominates(B, A)) return false; 2402 llvm_unreachable("expected total dominance order!"); 2403 }); 2404 #ifdef ASSERT 2405 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2406 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2407 } 2408 #endif 2409 2410 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2411 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2412 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2413 auto *OldCond = BI->getCondition(); 2414 auto *NewCond = ConstantInt::get(OldCond->getType(), 2415 IsTaken ? ExitIfTrue : !ExitIfTrue); 2416 BI->setCondition(NewCond); 2417 if (OldCond->use_empty()) 2418 DeadInsts.push_back(OldCond); 2419 }; 2420 2421 bool Changed = false; 2422 SmallSet<const SCEV*, 8> DominatingExitCounts; 2423 for (BasicBlock *ExitingBB : ExitingBlocks) { 2424 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2425 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2426 2427 // If we know we'd exit on the first iteration, rewrite the exit to 2428 // reflect this. This does not imply the loop must exit through this 2429 // exit; there may be an earlier one taken on the first iteration. 2430 // TODO: Given we know the backedge can't be taken, we should go ahead 2431 // and break it. Or at least, kill all the header phis and simplify. 2432 if (ExitCount->isZero()) { 2433 FoldExit(ExitingBB, true); 2434 Changed = true; 2435 continue; 2436 } 2437 2438 // If we end up with a pointer exit count, bail. Note that we can end up 2439 // with a pointer exit count for one exiting block, and not for another in 2440 // the same loop. 2441 if (!ExitCount->getType()->isIntegerTy() || 2442 !MaxExitCount->getType()->isIntegerTy()) 2443 continue; 2444 2445 Type *WiderType = 2446 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2447 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2448 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2449 assert(MaxExitCount->getType() == ExitCount->getType()); 2450 2451 // Can we prove that some other exit must be taken strictly before this 2452 // one? 2453 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2454 MaxExitCount, ExitCount)) { 2455 FoldExit(ExitingBB, false); 2456 Changed = true; 2457 continue; 2458 } 2459 2460 // As we run, keep track of which exit counts we've encountered. If we 2461 // find a duplicate, we've found an exit which would have exited on the 2462 // exiting iteration, but (from the visit order) strictly follows another 2463 // which does the same and is thus dead. 2464 if (!DominatingExitCounts.insert(ExitCount).second) { 2465 FoldExit(ExitingBB, false); 2466 Changed = true; 2467 continue; 2468 } 2469 2470 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2471 // here. If we kept track of the min of dominanting exits so far, we could 2472 // discharge exits with EC >= MDEC. This is less powerful than the existing 2473 // transform (since later exits aren't considered), but potentially more 2474 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2475 // or a >u b. Such a case is not currently known. 2476 } 2477 return Changed; 2478 } 2479 2480 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2481 SmallVector<BasicBlock*, 16> ExitingBlocks; 2482 L->getExitingBlocks(ExitingBlocks); 2483 2484 // Finally, see if we can rewrite our exit conditions into a loop invariant 2485 // form. If we have a read-only loop, and we can tell that we must exit down 2486 // a path which does not need any of the values computed within the loop, we 2487 // can rewrite the loop to exit on the first iteration. Note that this 2488 // doesn't either a) tell us the loop exits on the first iteration (unless 2489 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2490 // This transformation looks a lot like a restricted form of dead loop 2491 // elimination, but restricted to read-only loops and without neccesssarily 2492 // needing to kill the loop entirely. 2493 if (!LoopPredication) 2494 return false; 2495 2496 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2497 return false; 2498 2499 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2500 // through *explicit* control flow. We have to eliminate the possibility of 2501 // implicit exits (see below) before we know it's truly exact. 2502 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2503 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2504 !SE->isLoopInvariant(ExactBTC, L) || 2505 !isSafeToExpand(ExactBTC, *SE)) 2506 return false; 2507 2508 // If we end up with a pointer exit count, bail. It may be unsized. 2509 if (!ExactBTC->getType()->isIntegerTy()) 2510 return false; 2511 2512 auto BadExit = [&](BasicBlock *ExitingBB) { 2513 // If our exiting block exits multiple loops, we can only rewrite the 2514 // innermost one. Otherwise, we're changing how many times the innermost 2515 // loop runs before it exits. 2516 if (LI->getLoopFor(ExitingBB) != L) 2517 return true; 2518 2519 // Can't rewrite non-branch yet. 2520 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2521 if (!BI) 2522 return true; 2523 2524 // If already constant, nothing to do. 2525 if (isa<Constant>(BI->getCondition())) 2526 return true; 2527 2528 // If the exit block has phis, we need to be able to compute the values 2529 // within the loop which contains them. This assumes trivially lcssa phis 2530 // have already been removed; TODO: generalize 2531 BasicBlock *ExitBlock = 2532 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2533 if (!ExitBlock->phis().empty()) 2534 return true; 2535 2536 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2537 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2538 if (!SE->isLoopInvariant(ExitCount, L) || 2539 !isSafeToExpand(ExitCount, *SE)) 2540 return true; 2541 2542 // If we end up with a pointer exit count, bail. It may be unsized. 2543 if (!ExitCount->getType()->isIntegerTy()) 2544 return true; 2545 2546 return false; 2547 }; 2548 2549 // If we have any exits which can't be predicated themselves, than we can't 2550 // predicate any exit which isn't guaranteed to execute before it. Consider 2551 // two exits (a) and (b) which would both exit on the same iteration. If we 2552 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2553 // we could convert a loop from exiting through (a) to one exiting through 2554 // (b). Note that this problem exists only for exits with the same exit 2555 // count, and we could be more aggressive when exit counts are known inequal. 2556 llvm::sort(ExitingBlocks, 2557 [&](BasicBlock *A, BasicBlock *B) { 2558 // std::sort sorts in ascending order, so we want the inverse of 2559 // the normal dominance relation, plus a tie breaker for blocks 2560 // unordered by dominance. 2561 if (DT->properlyDominates(A, B)) return true; 2562 if (DT->properlyDominates(B, A)) return false; 2563 return A->getName() < B->getName(); 2564 }); 2565 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2566 // exits before the backedge). If they aren't, reasoning about reachability 2567 // is complicated and we choose not to for now. 2568 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2569 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2570 return false; 2571 2572 // Given our sorted total order, we know that exit[j] must be evaluated 2573 // after all exit[i] such j > i. 2574 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2575 if (BadExit(ExitingBlocks[i])) { 2576 ExitingBlocks.resize(i); 2577 break; 2578 } 2579 2580 if (ExitingBlocks.empty()) 2581 return false; 2582 2583 // We rely on not being able to reach an exiting block on a later iteration 2584 // then it's statically compute exit count. The implementaton of 2585 // getExitCount currently has this invariant, but assert it here so that 2586 // breakage is obvious if this ever changes.. 2587 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2588 return DT->dominates(ExitingBB, L->getLoopLatch()); 2589 })); 2590 2591 // At this point, ExitingBlocks consists of only those blocks which are 2592 // predicatable. Given that, we know we have at least one exit we can 2593 // predicate if the loop is doesn't have side effects and doesn't have any 2594 // implicit exits (because then our exact BTC isn't actually exact). 2595 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2596 // suggestions on how to improve this? I can obviously bail out for outer 2597 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2598 // is that enough for *all* side effects? 2599 for (BasicBlock *BB : L->blocks()) 2600 for (auto &I : *BB) 2601 // TODO:isGuaranteedToTransfer 2602 if (I.mayHaveSideEffects() || I.mayThrow()) 2603 return false; 2604 2605 bool Changed = false; 2606 // Finally, do the actual predication for all predicatable blocks. A couple 2607 // of notes here: 2608 // 1) We don't bother to constant fold dominated exits with identical exit 2609 // counts; that's simply a form of CSE/equality propagation and we leave 2610 // it for dedicated passes. 2611 // 2) We insert the comparison at the branch. Hoisting introduces additional 2612 // legality constraints and we leave that to dedicated logic. We want to 2613 // predicate even if we can't insert a loop invariant expression as 2614 // peeling or unrolling will likely reduce the cost of the otherwise loop 2615 // varying check. 2616 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2617 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2618 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2619 for (BasicBlock *ExitingBB : ExitingBlocks) { 2620 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2621 2622 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2623 Value *NewCond; 2624 if (ExitCount == ExactBTC) { 2625 NewCond = L->contains(BI->getSuccessor(0)) ? 2626 B.getFalse() : B.getTrue(); 2627 } else { 2628 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2629 if (!ExactBTCV) 2630 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2631 Value *RHS = ExactBTCV; 2632 if (ECV->getType() != RHS->getType()) { 2633 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2634 ECV = B.CreateZExt(ECV, WiderTy); 2635 RHS = B.CreateZExt(RHS, WiderTy); 2636 } 2637 auto Pred = L->contains(BI->getSuccessor(0)) ? 2638 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2639 NewCond = B.CreateICmp(Pred, ECV, RHS); 2640 } 2641 Value *OldCond = BI->getCondition(); 2642 BI->setCondition(NewCond); 2643 if (OldCond->use_empty()) 2644 DeadInsts.push_back(OldCond); 2645 Changed = true; 2646 } 2647 2648 return Changed; 2649 } 2650 2651 //===----------------------------------------------------------------------===// 2652 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2653 //===----------------------------------------------------------------------===// 2654 2655 bool IndVarSimplify::run(Loop *L) { 2656 // We need (and expect!) the incoming loop to be in LCSSA. 2657 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2658 "LCSSA required to run indvars!"); 2659 2660 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2661 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2662 // canonicalization can be a pessimization without LSR to "clean up" 2663 // afterwards. 2664 // - We depend on having a preheader; in particular, 2665 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2666 // and we're in trouble if we can't find the induction variable even when 2667 // we've manually inserted one. 2668 // - LFTR relies on having a single backedge. 2669 if (!L->isLoopSimplifyForm()) 2670 return false; 2671 2672 #ifndef NDEBUG 2673 // Used below for a consistency check only 2674 // Note: Since the result returned by ScalarEvolution may depend on the order 2675 // in which previous results are added to its cache, the call to 2676 // getBackedgeTakenCount() may change following SCEV queries. 2677 const SCEV *BackedgeTakenCount; 2678 if (VerifyIndvars) 2679 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2680 #endif 2681 2682 bool Changed = false; 2683 // If there are any floating-point recurrences, attempt to 2684 // transform them to use integer recurrences. 2685 Changed |= rewriteNonIntegerIVs(L); 2686 2687 // Create a rewriter object which we'll use to transform the code with. 2688 SCEVExpander Rewriter(*SE, DL, "indvars"); 2689 #ifndef NDEBUG 2690 Rewriter.setDebugType(DEBUG_TYPE); 2691 #endif 2692 2693 // Eliminate redundant IV users. 2694 // 2695 // Simplification works best when run before other consumers of SCEV. We 2696 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2697 // other expressions involving loop IVs have been evaluated. This helps SCEV 2698 // set no-wrap flags before normalizing sign/zero extension. 2699 Rewriter.disableCanonicalMode(); 2700 Changed |= simplifyAndExtend(L, Rewriter, LI); 2701 2702 // Check to see if we can compute the final value of any expressions 2703 // that are recurrent in the loop, and substitute the exit values from the 2704 // loop into any instructions outside of the loop that use the final values 2705 // of the current expressions. 2706 if (ReplaceExitValue != NeverRepl) { 2707 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2708 ReplaceExitValue, DeadInsts)) { 2709 NumReplaced += Rewrites; 2710 Changed = true; 2711 } 2712 } 2713 2714 // Eliminate redundant IV cycles. 2715 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2716 2717 // Try to eliminate loop exits based on analyzeable exit counts 2718 if (optimizeLoopExits(L, Rewriter)) { 2719 Changed = true; 2720 // Given we've changed exit counts, notify SCEV 2721 SE->forgetLoop(L); 2722 } 2723 2724 // Try to form loop invariant tests for loop exits by changing how many 2725 // iterations of the loop run when that is unobservable. 2726 if (predicateLoopExits(L, Rewriter)) { 2727 Changed = true; 2728 // Given we've changed exit counts, notify SCEV 2729 SE->forgetLoop(L); 2730 } 2731 2732 // If we have a trip count expression, rewrite the loop's exit condition 2733 // using it. 2734 if (!DisableLFTR) { 2735 BasicBlock *PreHeader = L->getLoopPreheader(); 2736 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 2737 2738 SmallVector<BasicBlock*, 16> ExitingBlocks; 2739 L->getExitingBlocks(ExitingBlocks); 2740 for (BasicBlock *ExitingBB : ExitingBlocks) { 2741 // Can't rewrite non-branch yet. 2742 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2743 continue; 2744 2745 // If our exitting block exits multiple loops, we can only rewrite the 2746 // innermost one. Otherwise, we're changing how many times the innermost 2747 // loop runs before it exits. 2748 if (LI->getLoopFor(ExitingBB) != L) 2749 continue; 2750 2751 if (!needsLFTR(L, ExitingBB)) 2752 continue; 2753 2754 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2755 if (isa<SCEVCouldNotCompute>(ExitCount)) 2756 continue; 2757 2758 // This was handled above, but as we form SCEVs, we can sometimes refine 2759 // existing ones; this allows exit counts to be folded to zero which 2760 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2761 // until stable to handle cases like this better. 2762 if (ExitCount->isZero()) 2763 continue; 2764 2765 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2766 if (!IndVar) 2767 continue; 2768 2769 // Avoid high cost expansions. Note: This heuristic is questionable in 2770 // that our definition of "high cost" is not exactly principled. 2771 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2772 TTI, PreHeaderBR)) 2773 continue; 2774 2775 // Check preconditions for proper SCEVExpander operation. SCEV does not 2776 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2777 // any pass that uses the SCEVExpander must do it. This does not work 2778 // well for loop passes because SCEVExpander makes assumptions about 2779 // all loops, while LoopPassManager only forces the current loop to be 2780 // simplified. 2781 // 2782 // FIXME: SCEV expansion has no way to bail out, so the caller must 2783 // explicitly check any assumptions made by SCEV. Brittle. 2784 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2785 if (!AR || AR->getLoop()->getLoopPreheader()) 2786 Changed |= linearFunctionTestReplace(L, ExitingBB, 2787 ExitCount, IndVar, 2788 Rewriter); 2789 } 2790 } 2791 // Clear the rewriter cache, because values that are in the rewriter's cache 2792 // can be deleted in the loop below, causing the AssertingVH in the cache to 2793 // trigger. 2794 Rewriter.clear(); 2795 2796 // Now that we're done iterating through lists, clean up any instructions 2797 // which are now dead. 2798 while (!DeadInsts.empty()) 2799 if (Instruction *Inst = 2800 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2801 Changed |= 2802 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2803 2804 // The Rewriter may not be used from this point on. 2805 2806 // Loop-invariant instructions in the preheader that aren't used in the 2807 // loop may be sunk below the loop to reduce register pressure. 2808 Changed |= sinkUnusedInvariants(L); 2809 2810 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2811 // trip count and therefore can further simplify exit values in addition to 2812 // rewriteLoopExitValues. 2813 Changed |= rewriteFirstIterationLoopExitValues(L); 2814 2815 // Clean up dead instructions. 2816 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2817 2818 // Check a post-condition. 2819 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2820 "Indvars did not preserve LCSSA!"); 2821 2822 // Verify that LFTR, and any other change have not interfered with SCEV's 2823 // ability to compute trip count. We may have *changed* the exit count, but 2824 // only by reducing it. 2825 #ifndef NDEBUG 2826 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2827 SE->forgetLoop(L); 2828 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2829 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2830 SE->getTypeSizeInBits(NewBECount->getType())) 2831 NewBECount = SE->getTruncateOrNoop(NewBECount, 2832 BackedgeTakenCount->getType()); 2833 else 2834 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2835 NewBECount->getType()); 2836 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2837 NewBECount) && "indvars must preserve SCEV"); 2838 } 2839 if (VerifyMemorySSA && MSSAU) 2840 MSSAU->getMemorySSA()->verifyMemorySSA(); 2841 #endif 2842 2843 return Changed; 2844 } 2845 2846 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2847 LoopStandardAnalysisResults &AR, 2848 LPMUpdater &) { 2849 Function *F = L.getHeader()->getParent(); 2850 const DataLayout &DL = F->getParent()->getDataLayout(); 2851 2852 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2853 if (!IVS.run(&L)) 2854 return PreservedAnalyses::all(); 2855 2856 auto PA = getLoopPassPreservedAnalyses(); 2857 PA.preserveSet<CFGAnalyses>(); 2858 if (AR.MSSA) 2859 PA.preserve<MemorySSAAnalysis>(); 2860 return PA; 2861 } 2862 2863 namespace { 2864 2865 struct IndVarSimplifyLegacyPass : public LoopPass { 2866 static char ID; // Pass identification, replacement for typeid 2867 2868 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2869 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2870 } 2871 2872 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2873 if (skipLoop(L)) 2874 return false; 2875 2876 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2877 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2878 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2879 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2880 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2881 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2882 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2883 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2884 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2885 MemorySSA *MSSA = nullptr; 2886 if (MSSAAnalysis) 2887 MSSA = &MSSAAnalysis->getMSSA(); 2888 2889 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2890 return IVS.run(L); 2891 } 2892 2893 void getAnalysisUsage(AnalysisUsage &AU) const override { 2894 AU.setPreservesCFG(); 2895 AU.addPreserved<MemorySSAWrapperPass>(); 2896 getLoopAnalysisUsage(AU); 2897 } 2898 }; 2899 2900 } // end anonymous namespace 2901 2902 char IndVarSimplifyLegacyPass::ID = 0; 2903 2904 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2905 "Induction Variable Simplification", false, false) 2906 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2907 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2908 "Induction Variable Simplification", false, false) 2909 2910 Pass *llvm::createIndVarSimplifyPass() { 2911 return new IndVarSimplifyLegacyPass(); 2912 } 2913