1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpander.h" 45 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/TargetTransformInfo.h" 48 #include "llvm/Analysis/ValueTracking.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/Constant.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/IRBuilder.h" 58 #include "llvm/IR/InstrTypes.h" 59 #include "llvm/IR/Instruction.h" 60 #include "llvm/IR/Instructions.h" 61 #include "llvm/IR/IntrinsicInst.h" 62 #include "llvm/IR/Intrinsics.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/IR/PassManager.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/Use.h" 69 #include "llvm/IR/User.h" 70 #include "llvm/IR/Value.h" 71 #include "llvm/IR/ValueHandle.h" 72 #include "llvm/InitializePasses.h" 73 #include "llvm/Pass.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/CommandLine.h" 76 #include "llvm/Support/Compiler.h" 77 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/ErrorHandling.h" 79 #include "llvm/Support/MathExtras.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Transforms/Scalar.h" 82 #include "llvm/Transforms/Scalar/LoopPassManager.h" 83 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 84 #include "llvm/Transforms/Utils/Local.h" 85 #include "llvm/Transforms/Utils/LoopUtils.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 namespace { 135 136 struct RewritePhi; 137 138 class IndVarSimplify { 139 LoopInfo *LI; 140 ScalarEvolution *SE; 141 DominatorTree *DT; 142 const DataLayout &DL; 143 TargetLibraryInfo *TLI; 144 const TargetTransformInfo *TTI; 145 std::unique_ptr<MemorySSAUpdater> MSSAU; 146 147 SmallVector<WeakTrackingVH, 16> DeadInsts; 148 149 bool handleFloatingPointIV(Loop *L, PHINode *PH); 150 bool rewriteNonIntegerIVs(Loop *L); 151 152 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 153 /// Try to eliminate loop exits based on analyzeable exit counts 154 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 155 /// Try to form loop invariant tests for loop exits by changing how many 156 /// iterations of the loop run when that is unobservable. 157 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 158 159 bool rewriteFirstIterationLoopExitValues(Loop *L); 160 161 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 162 const SCEV *ExitCount, 163 PHINode *IndVar, SCEVExpander &Rewriter); 164 165 bool sinkUnusedInvariants(Loop *L); 166 167 public: 168 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 169 const DataLayout &DL, TargetLibraryInfo *TLI, 170 TargetTransformInfo *TTI, MemorySSA *MSSA) 171 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 172 if (MSSA) 173 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 174 } 175 176 bool run(Loop *L); 177 }; 178 179 } // end anonymous namespace 180 181 /// Determine the insertion point for this user. By default, insert immediately 182 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 183 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 184 /// common dominator for the incoming blocks. A nullptr can be returned if no 185 /// viable location is found: it may happen if User is a PHI and Def only comes 186 /// to this PHI from unreachable blocks. 187 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 188 DominatorTree *DT, LoopInfo *LI) { 189 PHINode *PHI = dyn_cast<PHINode>(User); 190 if (!PHI) 191 return User; 192 193 Instruction *InsertPt = nullptr; 194 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 195 if (PHI->getIncomingValue(i) != Def) 196 continue; 197 198 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 199 200 if (!DT->isReachableFromEntry(InsertBB)) 201 continue; 202 203 if (!InsertPt) { 204 InsertPt = InsertBB->getTerminator(); 205 continue; 206 } 207 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 208 InsertPt = InsertBB->getTerminator(); 209 } 210 211 // If we have skipped all inputs, it means that Def only comes to Phi from 212 // unreachable blocks. 213 if (!InsertPt) 214 return nullptr; 215 216 auto *DefI = dyn_cast<Instruction>(Def); 217 if (!DefI) 218 return InsertPt; 219 220 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 221 222 auto *L = LI->getLoopFor(DefI->getParent()); 223 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 224 225 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 226 if (LI->getLoopFor(DTN->getBlock()) == L) 227 return DTN->getBlock()->getTerminator(); 228 229 llvm_unreachable("DefI dominates InsertPt!"); 230 } 231 232 //===----------------------------------------------------------------------===// 233 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 234 //===----------------------------------------------------------------------===// 235 236 /// Convert APF to an integer, if possible. 237 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 238 bool isExact = false; 239 // See if we can convert this to an int64_t 240 uint64_t UIntVal; 241 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 242 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 243 !isExact) 244 return false; 245 IntVal = UIntVal; 246 return true; 247 } 248 249 /// If the loop has floating induction variable then insert corresponding 250 /// integer induction variable if possible. 251 /// For example, 252 /// for(double i = 0; i < 10000; ++i) 253 /// bar(i) 254 /// is converted into 255 /// for(int i = 0; i < 10000; ++i) 256 /// bar((double)i); 257 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 258 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 259 unsigned BackEdge = IncomingEdge^1; 260 261 // Check incoming value. 262 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 263 264 int64_t InitValue; 265 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 266 return false; 267 268 // Check IV increment. Reject this PN if increment operation is not 269 // an add or increment value can not be represented by an integer. 270 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 271 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 272 273 // If this is not an add of the PHI with a constantfp, or if the constant fp 274 // is not an integer, bail out. 275 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 276 int64_t IncValue; 277 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 278 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 279 return false; 280 281 // Check Incr uses. One user is PN and the other user is an exit condition 282 // used by the conditional terminator. 283 Value::user_iterator IncrUse = Incr->user_begin(); 284 Instruction *U1 = cast<Instruction>(*IncrUse++); 285 if (IncrUse == Incr->user_end()) return false; 286 Instruction *U2 = cast<Instruction>(*IncrUse++); 287 if (IncrUse != Incr->user_end()) return false; 288 289 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 290 // only used by a branch, we can't transform it. 291 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 292 if (!Compare) 293 Compare = dyn_cast<FCmpInst>(U2); 294 if (!Compare || !Compare->hasOneUse() || 295 !isa<BranchInst>(Compare->user_back())) 296 return false; 297 298 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 299 300 // We need to verify that the branch actually controls the iteration count 301 // of the loop. If not, the new IV can overflow and no one will notice. 302 // The branch block must be in the loop and one of the successors must be out 303 // of the loop. 304 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 305 if (!L->contains(TheBr->getParent()) || 306 (L->contains(TheBr->getSuccessor(0)) && 307 L->contains(TheBr->getSuccessor(1)))) 308 return false; 309 310 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 311 // transform it. 312 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 313 int64_t ExitValue; 314 if (ExitValueVal == nullptr || 315 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 316 return false; 317 318 // Find new predicate for integer comparison. 319 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 320 switch (Compare->getPredicate()) { 321 default: return false; // Unknown comparison. 322 case CmpInst::FCMP_OEQ: 323 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 324 case CmpInst::FCMP_ONE: 325 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 326 case CmpInst::FCMP_OGT: 327 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 328 case CmpInst::FCMP_OGE: 329 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 330 case CmpInst::FCMP_OLT: 331 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 332 case CmpInst::FCMP_OLE: 333 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 334 } 335 336 // We convert the floating point induction variable to a signed i32 value if 337 // we can. This is only safe if the comparison will not overflow in a way 338 // that won't be trapped by the integer equivalent operations. Check for this 339 // now. 340 // TODO: We could use i64 if it is native and the range requires it. 341 342 // The start/stride/exit values must all fit in signed i32. 343 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 344 return false; 345 346 // If not actually striding (add x, 0.0), avoid touching the code. 347 if (IncValue == 0) 348 return false; 349 350 // Positive and negative strides have different safety conditions. 351 if (IncValue > 0) { 352 // If we have a positive stride, we require the init to be less than the 353 // exit value. 354 if (InitValue >= ExitValue) 355 return false; 356 357 uint32_t Range = uint32_t(ExitValue-InitValue); 358 // Check for infinite loop, either: 359 // while (i <= Exit) or until (i > Exit) 360 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 361 if (++Range == 0) return false; // Range overflows. 362 } 363 364 unsigned Leftover = Range % uint32_t(IncValue); 365 366 // If this is an equality comparison, we require that the strided value 367 // exactly land on the exit value, otherwise the IV condition will wrap 368 // around and do things the fp IV wouldn't. 369 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 370 Leftover != 0) 371 return false; 372 373 // If the stride would wrap around the i32 before exiting, we can't 374 // transform the IV. 375 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 376 return false; 377 } else { 378 // If we have a negative stride, we require the init to be greater than the 379 // exit value. 380 if (InitValue <= ExitValue) 381 return false; 382 383 uint32_t Range = uint32_t(InitValue-ExitValue); 384 // Check for infinite loop, either: 385 // while (i >= Exit) or until (i < Exit) 386 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 387 if (++Range == 0) return false; // Range overflows. 388 } 389 390 unsigned Leftover = Range % uint32_t(-IncValue); 391 392 // If this is an equality comparison, we require that the strided value 393 // exactly land on the exit value, otherwise the IV condition will wrap 394 // around and do things the fp IV wouldn't. 395 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 396 Leftover != 0) 397 return false; 398 399 // If the stride would wrap around the i32 before exiting, we can't 400 // transform the IV. 401 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 402 return false; 403 } 404 405 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 406 407 // Insert new integer induction variable. 408 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 409 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 410 PN->getIncomingBlock(IncomingEdge)); 411 412 Value *NewAdd = 413 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 414 Incr->getName()+".int", Incr); 415 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 416 417 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 418 ConstantInt::get(Int32Ty, ExitValue), 419 Compare->getName()); 420 421 // In the following deletions, PN may become dead and may be deleted. 422 // Use a WeakTrackingVH to observe whether this happens. 423 WeakTrackingVH WeakPH = PN; 424 425 // Delete the old floating point exit comparison. The branch starts using the 426 // new comparison. 427 NewCompare->takeName(Compare); 428 Compare->replaceAllUsesWith(NewCompare); 429 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 430 431 // Delete the old floating point increment. 432 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 433 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 434 435 // If the FP induction variable still has uses, this is because something else 436 // in the loop uses its value. In order to canonicalize the induction 437 // variable, we chose to eliminate the IV and rewrite it in terms of an 438 // int->fp cast. 439 // 440 // We give preference to sitofp over uitofp because it is faster on most 441 // platforms. 442 if (WeakPH) { 443 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 444 &*PN->getParent()->getFirstInsertionPt()); 445 PN->replaceAllUsesWith(Conv); 446 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 447 } 448 return true; 449 } 450 451 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 452 // First step. Check to see if there are any floating-point recurrences. 453 // If there are, change them into integer recurrences, permitting analysis by 454 // the SCEV routines. 455 BasicBlock *Header = L->getHeader(); 456 457 SmallVector<WeakTrackingVH, 8> PHIs; 458 for (PHINode &PN : Header->phis()) 459 PHIs.push_back(&PN); 460 461 bool Changed = false; 462 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 463 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 464 Changed |= handleFloatingPointIV(L, PN); 465 466 // If the loop previously had floating-point IV, ScalarEvolution 467 // may not have been able to compute a trip count. Now that we've done some 468 // re-writing, the trip count may be computable. 469 if (Changed) 470 SE->forgetLoop(L); 471 return Changed; 472 } 473 474 //===---------------------------------------------------------------------===// 475 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 476 // they will exit at the first iteration. 477 //===---------------------------------------------------------------------===// 478 479 /// Check to see if this loop has loop invariant conditions which lead to loop 480 /// exits. If so, we know that if the exit path is taken, it is at the first 481 /// loop iteration. This lets us predict exit values of PHI nodes that live in 482 /// loop header. 483 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 484 // Verify the input to the pass is already in LCSSA form. 485 assert(L->isLCSSAForm(*DT)); 486 487 SmallVector<BasicBlock *, 8> ExitBlocks; 488 L->getUniqueExitBlocks(ExitBlocks); 489 490 bool MadeAnyChanges = false; 491 for (auto *ExitBB : ExitBlocks) { 492 // If there are no more PHI nodes in this exit block, then no more 493 // values defined inside the loop are used on this path. 494 for (PHINode &PN : ExitBB->phis()) { 495 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 496 IncomingValIdx != E; ++IncomingValIdx) { 497 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 498 499 // Can we prove that the exit must run on the first iteration if it 500 // runs at all? (i.e. early exits are fine for our purposes, but 501 // traces which lead to this exit being taken on the 2nd iteration 502 // aren't.) Note that this is about whether the exit branch is 503 // executed, not about whether it is taken. 504 if (!L->getLoopLatch() || 505 !DT->dominates(IncomingBB, L->getLoopLatch())) 506 continue; 507 508 // Get condition that leads to the exit path. 509 auto *TermInst = IncomingBB->getTerminator(); 510 511 Value *Cond = nullptr; 512 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 513 // Must be a conditional branch, otherwise the block 514 // should not be in the loop. 515 Cond = BI->getCondition(); 516 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 517 Cond = SI->getCondition(); 518 else 519 continue; 520 521 if (!L->isLoopInvariant(Cond)) 522 continue; 523 524 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 525 526 // Only deal with PHIs in the loop header. 527 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 528 continue; 529 530 // If ExitVal is a PHI on the loop header, then we know its 531 // value along this exit because the exit can only be taken 532 // on the first iteration. 533 auto *LoopPreheader = L->getLoopPreheader(); 534 assert(LoopPreheader && "Invalid loop"); 535 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 536 if (PreheaderIdx != -1) { 537 assert(ExitVal->getParent() == L->getHeader() && 538 "ExitVal must be in loop header"); 539 MadeAnyChanges = true; 540 PN.setIncomingValue(IncomingValIdx, 541 ExitVal->getIncomingValue(PreheaderIdx)); 542 } 543 } 544 } 545 } 546 return MadeAnyChanges; 547 } 548 549 //===----------------------------------------------------------------------===// 550 // IV Widening - Extend the width of an IV to cover its widest uses. 551 //===----------------------------------------------------------------------===// 552 553 namespace { 554 555 // Collect information about induction variables that are used by sign/zero 556 // extend operations. This information is recorded by CollectExtend and provides 557 // the input to WidenIV. 558 struct WideIVInfo { 559 PHINode *NarrowIV = nullptr; 560 561 // Widest integer type created [sz]ext 562 Type *WidestNativeType = nullptr; 563 564 // Was a sext user seen before a zext? 565 bool IsSigned = false; 566 }; 567 568 } // end anonymous namespace 569 570 /// Update information about the induction variable that is extended by this 571 /// sign or zero extend operation. This is used to determine the final width of 572 /// the IV before actually widening it. 573 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 574 const TargetTransformInfo *TTI) { 575 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 576 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 577 return; 578 579 Type *Ty = Cast->getType(); 580 uint64_t Width = SE->getTypeSizeInBits(Ty); 581 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 582 return; 583 584 // Check that `Cast` actually extends the induction variable (we rely on this 585 // later). This takes care of cases where `Cast` is extending a truncation of 586 // the narrow induction variable, and thus can end up being narrower than the 587 // "narrow" induction variable. 588 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 589 if (NarrowIVWidth >= Width) 590 return; 591 592 // Cast is either an sext or zext up to this point. 593 // We should not widen an indvar if arithmetics on the wider indvar are more 594 // expensive than those on the narrower indvar. We check only the cost of ADD 595 // because at least an ADD is required to increment the induction variable. We 596 // could compute more comprehensively the cost of all instructions on the 597 // induction variable when necessary. 598 if (TTI && 599 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 600 TTI->getArithmeticInstrCost(Instruction::Add, 601 Cast->getOperand(0)->getType())) { 602 return; 603 } 604 605 if (!WI.WidestNativeType) { 606 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 607 WI.IsSigned = IsSigned; 608 return; 609 } 610 611 // We extend the IV to satisfy the sign of its first user, arbitrarily. 612 if (WI.IsSigned != IsSigned) 613 return; 614 615 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 616 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 617 } 618 619 namespace { 620 621 /// Record a link in the Narrow IV def-use chain along with the WideIV that 622 /// computes the same value as the Narrow IV def. This avoids caching Use* 623 /// pointers. 624 struct NarrowIVDefUse { 625 Instruction *NarrowDef = nullptr; 626 Instruction *NarrowUse = nullptr; 627 Instruction *WideDef = nullptr; 628 629 // True if the narrow def is never negative. Tracking this information lets 630 // us use a sign extension instead of a zero extension or vice versa, when 631 // profitable and legal. 632 bool NeverNegative = false; 633 634 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 635 bool NeverNegative) 636 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 637 NeverNegative(NeverNegative) {} 638 }; 639 640 /// The goal of this transform is to remove sign and zero extends without 641 /// creating any new induction variables. To do this, it creates a new phi of 642 /// the wider type and redirects all users, either removing extends or inserting 643 /// truncs whenever we stop propagating the type. 644 class WidenIV { 645 // Parameters 646 PHINode *OrigPhi; 647 Type *WideType; 648 649 // Context 650 LoopInfo *LI; 651 Loop *L; 652 ScalarEvolution *SE; 653 DominatorTree *DT; 654 655 // Does the module have any calls to the llvm.experimental.guard intrinsic 656 // at all? If not we can avoid scanning instructions looking for guards. 657 bool HasGuards; 658 659 // Result 660 PHINode *WidePhi = nullptr; 661 Instruction *WideInc = nullptr; 662 const SCEV *WideIncExpr = nullptr; 663 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 664 665 SmallPtrSet<Instruction *,16> Widened; 666 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 667 668 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 669 670 // A map tracking the kind of extension used to widen each narrow IV 671 // and narrow IV user. 672 // Key: pointer to a narrow IV or IV user. 673 // Value: the kind of extension used to widen this Instruction. 674 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 675 676 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 677 678 // A map with control-dependent ranges for post increment IV uses. The key is 679 // a pair of IV def and a use of this def denoting the context. The value is 680 // a ConstantRange representing possible values of the def at the given 681 // context. 682 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 683 684 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 685 Instruction *UseI) { 686 DefUserPair Key(Def, UseI); 687 auto It = PostIncRangeInfos.find(Key); 688 return It == PostIncRangeInfos.end() 689 ? Optional<ConstantRange>(None) 690 : Optional<ConstantRange>(It->second); 691 } 692 693 void calculatePostIncRanges(PHINode *OrigPhi); 694 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 695 696 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 697 DefUserPair Key(Def, UseI); 698 auto It = PostIncRangeInfos.find(Key); 699 if (It == PostIncRangeInfos.end()) 700 PostIncRangeInfos.insert({Key, R}); 701 else 702 It->second = R.intersectWith(It->second); 703 } 704 705 public: 706 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 707 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 708 bool HasGuards) 709 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 710 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 711 HasGuards(HasGuards), DeadInsts(DI) { 712 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 713 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 714 } 715 716 PHINode *createWideIV(SCEVExpander &Rewriter); 717 718 protected: 719 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 720 Instruction *Use); 721 722 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 723 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 724 const SCEVAddRecExpr *WideAR); 725 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 726 727 ExtendKind getExtendKind(Instruction *I); 728 729 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 730 731 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 732 733 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 734 735 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 736 unsigned OpCode) const; 737 738 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 739 740 bool widenLoopCompare(NarrowIVDefUse DU); 741 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 742 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 743 744 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 745 }; 746 747 } // end anonymous namespace 748 749 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 750 bool IsSigned, Instruction *Use) { 751 // Set the debug location and conservative insertion point. 752 IRBuilder<> Builder(Use); 753 // Hoist the insertion point into loop preheaders as far as possible. 754 for (const Loop *L = LI->getLoopFor(Use->getParent()); 755 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 756 L = L->getParentLoop()) 757 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 758 759 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 760 Builder.CreateZExt(NarrowOper, WideType); 761 } 762 763 /// Instantiate a wide operation to replace a narrow operation. This only needs 764 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 765 /// 0 for any operation we decide not to clone. 766 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 767 const SCEVAddRecExpr *WideAR) { 768 unsigned Opcode = DU.NarrowUse->getOpcode(); 769 switch (Opcode) { 770 default: 771 return nullptr; 772 case Instruction::Add: 773 case Instruction::Mul: 774 case Instruction::UDiv: 775 case Instruction::Sub: 776 return cloneArithmeticIVUser(DU, WideAR); 777 778 case Instruction::And: 779 case Instruction::Or: 780 case Instruction::Xor: 781 case Instruction::Shl: 782 case Instruction::LShr: 783 case Instruction::AShr: 784 return cloneBitwiseIVUser(DU); 785 } 786 } 787 788 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 789 Instruction *NarrowUse = DU.NarrowUse; 790 Instruction *NarrowDef = DU.NarrowDef; 791 Instruction *WideDef = DU.WideDef; 792 793 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 794 795 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 796 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 797 // invariant and will be folded or hoisted. If it actually comes from a 798 // widened IV, it should be removed during a future call to widenIVUse. 799 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 800 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 801 ? WideDef 802 : createExtendInst(NarrowUse->getOperand(0), WideType, 803 IsSigned, NarrowUse); 804 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 805 ? WideDef 806 : createExtendInst(NarrowUse->getOperand(1), WideType, 807 IsSigned, NarrowUse); 808 809 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 810 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 811 NarrowBO->getName()); 812 IRBuilder<> Builder(NarrowUse); 813 Builder.Insert(WideBO); 814 WideBO->copyIRFlags(NarrowBO); 815 return WideBO; 816 } 817 818 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 819 const SCEVAddRecExpr *WideAR) { 820 Instruction *NarrowUse = DU.NarrowUse; 821 Instruction *NarrowDef = DU.NarrowDef; 822 Instruction *WideDef = DU.WideDef; 823 824 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 825 826 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 827 828 // We're trying to find X such that 829 // 830 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 831 // 832 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 833 // and check using SCEV if any of them are correct. 834 835 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 836 // correct solution to X. 837 auto GuessNonIVOperand = [&](bool SignExt) { 838 const SCEV *WideLHS; 839 const SCEV *WideRHS; 840 841 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 842 if (SignExt) 843 return SE->getSignExtendExpr(S, Ty); 844 return SE->getZeroExtendExpr(S, Ty); 845 }; 846 847 if (IVOpIdx == 0) { 848 WideLHS = SE->getSCEV(WideDef); 849 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 850 WideRHS = GetExtend(NarrowRHS, WideType); 851 } else { 852 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 853 WideLHS = GetExtend(NarrowLHS, WideType); 854 WideRHS = SE->getSCEV(WideDef); 855 } 856 857 // WideUse is "WideDef `op.wide` X" as described in the comment. 858 const SCEV *WideUse = nullptr; 859 860 switch (NarrowUse->getOpcode()) { 861 default: 862 llvm_unreachable("No other possibility!"); 863 864 case Instruction::Add: 865 WideUse = SE->getAddExpr(WideLHS, WideRHS); 866 break; 867 868 case Instruction::Mul: 869 WideUse = SE->getMulExpr(WideLHS, WideRHS); 870 break; 871 872 case Instruction::UDiv: 873 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 874 break; 875 876 case Instruction::Sub: 877 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 878 break; 879 } 880 881 return WideUse == WideAR; 882 }; 883 884 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 885 if (!GuessNonIVOperand(SignExtend)) { 886 SignExtend = !SignExtend; 887 if (!GuessNonIVOperand(SignExtend)) 888 return nullptr; 889 } 890 891 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 892 ? WideDef 893 : createExtendInst(NarrowUse->getOperand(0), WideType, 894 SignExtend, NarrowUse); 895 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 896 ? WideDef 897 : createExtendInst(NarrowUse->getOperand(1), WideType, 898 SignExtend, NarrowUse); 899 900 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 901 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 902 NarrowBO->getName()); 903 904 IRBuilder<> Builder(NarrowUse); 905 Builder.Insert(WideBO); 906 WideBO->copyIRFlags(NarrowBO); 907 return WideBO; 908 } 909 910 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 911 auto It = ExtendKindMap.find(I); 912 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 913 return It->second; 914 } 915 916 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 917 unsigned OpCode) const { 918 if (OpCode == Instruction::Add) 919 return SE->getAddExpr(LHS, RHS); 920 if (OpCode == Instruction::Sub) 921 return SE->getMinusSCEV(LHS, RHS); 922 if (OpCode == Instruction::Mul) 923 return SE->getMulExpr(LHS, RHS); 924 925 llvm_unreachable("Unsupported opcode."); 926 } 927 928 /// No-wrap operations can transfer sign extension of their result to their 929 /// operands. Generate the SCEV value for the widened operation without 930 /// actually modifying the IR yet. If the expression after extending the 931 /// operands is an AddRec for this loop, return the AddRec and the kind of 932 /// extension used. 933 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 934 // Handle the common case of add<nsw/nuw> 935 const unsigned OpCode = DU.NarrowUse->getOpcode(); 936 // Only Add/Sub/Mul instructions supported yet. 937 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 938 OpCode != Instruction::Mul) 939 return {nullptr, Unknown}; 940 941 // One operand (NarrowDef) has already been extended to WideDef. Now determine 942 // if extending the other will lead to a recurrence. 943 const unsigned ExtendOperIdx = 944 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 945 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 946 947 const SCEV *ExtendOperExpr = nullptr; 948 const OverflowingBinaryOperator *OBO = 949 cast<OverflowingBinaryOperator>(DU.NarrowUse); 950 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 951 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 952 ExtendOperExpr = SE->getSignExtendExpr( 953 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 954 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 955 ExtendOperExpr = SE->getZeroExtendExpr( 956 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 957 else 958 return {nullptr, Unknown}; 959 960 // When creating this SCEV expr, don't apply the current operations NSW or NUW 961 // flags. This instruction may be guarded by control flow that the no-wrap 962 // behavior depends on. Non-control-equivalent instructions can be mapped to 963 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 964 // semantics to those operations. 965 const SCEV *lhs = SE->getSCEV(DU.WideDef); 966 const SCEV *rhs = ExtendOperExpr; 967 968 // Let's swap operands to the initial order for the case of non-commutative 969 // operations, like SUB. See PR21014. 970 if (ExtendOperIdx == 0) 971 std::swap(lhs, rhs); 972 const SCEVAddRecExpr *AddRec = 973 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 974 975 if (!AddRec || AddRec->getLoop() != L) 976 return {nullptr, Unknown}; 977 978 return {AddRec, ExtKind}; 979 } 980 981 /// Is this instruction potentially interesting for further simplification after 982 /// widening it's type? In other words, can the extend be safely hoisted out of 983 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 984 /// so, return the extended recurrence and the kind of extension used. Otherwise 985 /// return {nullptr, Unknown}. 986 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 987 if (!SE->isSCEVable(DU.NarrowUse->getType())) 988 return {nullptr, Unknown}; 989 990 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 991 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 992 SE->getTypeSizeInBits(WideType)) { 993 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 994 // index. So don't follow this use. 995 return {nullptr, Unknown}; 996 } 997 998 const SCEV *WideExpr; 999 ExtendKind ExtKind; 1000 if (DU.NeverNegative) { 1001 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1002 if (isa<SCEVAddRecExpr>(WideExpr)) 1003 ExtKind = SignExtended; 1004 else { 1005 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1006 ExtKind = ZeroExtended; 1007 } 1008 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1009 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1010 ExtKind = SignExtended; 1011 } else { 1012 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1013 ExtKind = ZeroExtended; 1014 } 1015 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1016 if (!AddRec || AddRec->getLoop() != L) 1017 return {nullptr, Unknown}; 1018 return {AddRec, ExtKind}; 1019 } 1020 1021 /// This IV user cannot be widened. Replace this use of the original narrow IV 1022 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1023 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1024 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1025 if (!InsertPt) 1026 return; 1027 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1028 << *DU.NarrowUse << "\n"); 1029 IRBuilder<> Builder(InsertPt); 1030 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1031 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1032 } 1033 1034 /// If the narrow use is a compare instruction, then widen the compare 1035 // (and possibly the other operand). The extend operation is hoisted into the 1036 // loop preheader as far as possible. 1037 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1038 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1039 if (!Cmp) 1040 return false; 1041 1042 // We can legally widen the comparison in the following two cases: 1043 // 1044 // - The signedness of the IV extension and comparison match 1045 // 1046 // - The narrow IV is always positive (and thus its sign extension is equal 1047 // to its zero extension). For instance, let's say we're zero extending 1048 // %narrow for the following use 1049 // 1050 // icmp slt i32 %narrow, %val ... (A) 1051 // 1052 // and %narrow is always positive. Then 1053 // 1054 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1055 // == icmp slt i32 zext(%narrow), sext(%val) 1056 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1057 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1058 return false; 1059 1060 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1061 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1062 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1063 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1064 1065 // Widen the compare instruction. 1066 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1067 if (!InsertPt) 1068 return false; 1069 IRBuilder<> Builder(InsertPt); 1070 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1071 1072 // Widen the other operand of the compare, if necessary. 1073 if (CastWidth < IVWidth) { 1074 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1075 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1076 } 1077 return true; 1078 } 1079 1080 /// If the narrow use is an instruction whose two operands are the defining 1081 /// instruction of DU and a load instruction, then we have the following: 1082 /// if the load is hoisted outside the loop, then we do not reach this function 1083 /// as scalar evolution analysis works fine in widenIVUse with variables 1084 /// hoisted outside the loop and efficient code is subsequently generated by 1085 /// not emitting truncate instructions. But when the load is not hoisted 1086 /// (whether due to limitation in alias analysis or due to a true legality), 1087 /// then scalar evolution can not proceed with loop variant values and 1088 /// inefficient code is generated. This function handles the non-hoisted load 1089 /// special case by making the optimization generate the same type of code for 1090 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1091 /// instruction). This special case is important especially when the induction 1092 /// variables are affecting addressing mode in code generation. 1093 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1094 Instruction *NarrowUse = DU.NarrowUse; 1095 Instruction *NarrowDef = DU.NarrowDef; 1096 Instruction *WideDef = DU.WideDef; 1097 1098 // Handle the common case of add<nsw/nuw> 1099 const unsigned OpCode = NarrowUse->getOpcode(); 1100 // Only Add/Sub/Mul instructions are supported. 1101 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1102 OpCode != Instruction::Mul) 1103 return false; 1104 1105 // The operand that is not defined by NarrowDef of DU. Let's call it the 1106 // other operand. 1107 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1108 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1109 "bad DU"); 1110 1111 const SCEV *ExtendOperExpr = nullptr; 1112 const OverflowingBinaryOperator *OBO = 1113 cast<OverflowingBinaryOperator>(NarrowUse); 1114 ExtendKind ExtKind = getExtendKind(NarrowDef); 1115 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1116 ExtendOperExpr = SE->getSignExtendExpr( 1117 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1118 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1119 ExtendOperExpr = SE->getZeroExtendExpr( 1120 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1121 else 1122 return false; 1123 1124 // We are interested in the other operand being a load instruction. 1125 // But, we should look into relaxing this restriction later on. 1126 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1127 if (I && I->getOpcode() != Instruction::Load) 1128 return false; 1129 1130 // Verifying that Defining operand is an AddRec 1131 const SCEV *Op1 = SE->getSCEV(WideDef); 1132 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1133 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1134 return false; 1135 // Verifying that other operand is an Extend. 1136 if (ExtKind == SignExtended) { 1137 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1138 return false; 1139 } else { 1140 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1141 return false; 1142 } 1143 1144 if (ExtKind == SignExtended) { 1145 for (Use &U : NarrowUse->uses()) { 1146 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1147 if (!User || User->getType() != WideType) 1148 return false; 1149 } 1150 } else { // ExtKind == ZeroExtended 1151 for (Use &U : NarrowUse->uses()) { 1152 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1153 if (!User || User->getType() != WideType) 1154 return false; 1155 } 1156 } 1157 1158 return true; 1159 } 1160 1161 /// Special Case for widening with variant Loads (see 1162 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1163 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1164 Instruction *NarrowUse = DU.NarrowUse; 1165 Instruction *NarrowDef = DU.NarrowDef; 1166 Instruction *WideDef = DU.WideDef; 1167 1168 ExtendKind ExtKind = getExtendKind(NarrowDef); 1169 1170 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1171 1172 // Generating a widening use instruction. 1173 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1174 ? WideDef 1175 : createExtendInst(NarrowUse->getOperand(0), WideType, 1176 ExtKind, NarrowUse); 1177 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1178 ? WideDef 1179 : createExtendInst(NarrowUse->getOperand(1), WideType, 1180 ExtKind, NarrowUse); 1181 1182 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1183 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1184 NarrowBO->getName()); 1185 IRBuilder<> Builder(NarrowUse); 1186 Builder.Insert(WideBO); 1187 WideBO->copyIRFlags(NarrowBO); 1188 1189 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1190 1191 ExtendKindMap[NarrowUse] = ExtKind; 1192 1193 for (Use &U : NarrowUse->uses()) { 1194 Instruction *User = nullptr; 1195 if (ExtKind == SignExtended) 1196 User = dyn_cast<SExtInst>(U.getUser()); 1197 else 1198 User = dyn_cast<ZExtInst>(U.getUser()); 1199 if (User && User->getType() == WideType) { 1200 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1201 << *WideBO << "\n"); 1202 ++NumElimExt; 1203 User->replaceAllUsesWith(WideBO); 1204 DeadInsts.emplace_back(User); 1205 } 1206 } 1207 } 1208 1209 /// Determine whether an individual user of the narrow IV can be widened. If so, 1210 /// return the wide clone of the user. 1211 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1212 assert(ExtendKindMap.count(DU.NarrowDef) && 1213 "Should already know the kind of extension used to widen NarrowDef"); 1214 1215 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1216 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1217 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1218 // For LCSSA phis, sink the truncate outside the loop. 1219 // After SimplifyCFG most loop exit targets have a single predecessor. 1220 // Otherwise fall back to a truncate within the loop. 1221 if (UsePhi->getNumOperands() != 1) 1222 truncateIVUse(DU, DT, LI); 1223 else { 1224 // Widening the PHI requires us to insert a trunc. The logical place 1225 // for this trunc is in the same BB as the PHI. This is not possible if 1226 // the BB is terminated by a catchswitch. 1227 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1228 return nullptr; 1229 1230 PHINode *WidePhi = 1231 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1232 UsePhi); 1233 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1234 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1235 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1236 UsePhi->replaceAllUsesWith(Trunc); 1237 DeadInsts.emplace_back(UsePhi); 1238 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1239 << *WidePhi << "\n"); 1240 } 1241 return nullptr; 1242 } 1243 } 1244 1245 // This narrow use can be widened by a sext if it's non-negative or its narrow 1246 // def was widended by a sext. Same for zext. 1247 auto canWidenBySExt = [&]() { 1248 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1249 }; 1250 auto canWidenByZExt = [&]() { 1251 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1252 }; 1253 1254 // Our raison d'etre! Eliminate sign and zero extension. 1255 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1256 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1257 Value *NewDef = DU.WideDef; 1258 if (DU.NarrowUse->getType() != WideType) { 1259 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1260 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1261 if (CastWidth < IVWidth) { 1262 // The cast isn't as wide as the IV, so insert a Trunc. 1263 IRBuilder<> Builder(DU.NarrowUse); 1264 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1265 } 1266 else { 1267 // A wider extend was hidden behind a narrower one. This may induce 1268 // another round of IV widening in which the intermediate IV becomes 1269 // dead. It should be very rare. 1270 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1271 << " not wide enough to subsume " << *DU.NarrowUse 1272 << "\n"); 1273 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1274 NewDef = DU.NarrowUse; 1275 } 1276 } 1277 if (NewDef != DU.NarrowUse) { 1278 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1279 << " replaced by " << *DU.WideDef << "\n"); 1280 ++NumElimExt; 1281 DU.NarrowUse->replaceAllUsesWith(NewDef); 1282 DeadInsts.emplace_back(DU.NarrowUse); 1283 } 1284 // Now that the extend is gone, we want to expose it's uses for potential 1285 // further simplification. We don't need to directly inform SimplifyIVUsers 1286 // of the new users, because their parent IV will be processed later as a 1287 // new loop phi. If we preserved IVUsers analysis, we would also want to 1288 // push the uses of WideDef here. 1289 1290 // No further widening is needed. The deceased [sz]ext had done it for us. 1291 return nullptr; 1292 } 1293 1294 // Does this user itself evaluate to a recurrence after widening? 1295 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1296 if (!WideAddRec.first) 1297 WideAddRec = getWideRecurrence(DU); 1298 1299 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1300 if (!WideAddRec.first) { 1301 // If use is a loop condition, try to promote the condition instead of 1302 // truncating the IV first. 1303 if (widenLoopCompare(DU)) 1304 return nullptr; 1305 1306 // We are here about to generate a truncate instruction that may hurt 1307 // performance because the scalar evolution expression computed earlier 1308 // in WideAddRec.first does not indicate a polynomial induction expression. 1309 // In that case, look at the operands of the use instruction to determine 1310 // if we can still widen the use instead of truncating its operand. 1311 if (widenWithVariantLoadUse(DU)) { 1312 widenWithVariantLoadUseCodegen(DU); 1313 return nullptr; 1314 } 1315 1316 // This user does not evaluate to a recurrence after widening, so don't 1317 // follow it. Instead insert a Trunc to kill off the original use, 1318 // eventually isolating the original narrow IV so it can be removed. 1319 truncateIVUse(DU, DT, LI); 1320 return nullptr; 1321 } 1322 // Assume block terminators cannot evaluate to a recurrence. We can't to 1323 // insert a Trunc after a terminator if there happens to be a critical edge. 1324 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1325 "SCEV is not expected to evaluate a block terminator"); 1326 1327 // Reuse the IV increment that SCEVExpander created as long as it dominates 1328 // NarrowUse. 1329 Instruction *WideUse = nullptr; 1330 if (WideAddRec.first == WideIncExpr && 1331 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1332 WideUse = WideInc; 1333 else { 1334 WideUse = cloneIVUser(DU, WideAddRec.first); 1335 if (!WideUse) 1336 return nullptr; 1337 } 1338 // Evaluation of WideAddRec ensured that the narrow expression could be 1339 // extended outside the loop without overflow. This suggests that the wide use 1340 // evaluates to the same expression as the extended narrow use, but doesn't 1341 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1342 // where it fails, we simply throw away the newly created wide use. 1343 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1344 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1345 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1346 << "\n"); 1347 DeadInsts.emplace_back(WideUse); 1348 return nullptr; 1349 } 1350 1351 // if we reached this point then we are going to replace 1352 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1353 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1354 1355 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1356 // Returning WideUse pushes it on the worklist. 1357 return WideUse; 1358 } 1359 1360 /// Add eligible users of NarrowDef to NarrowIVUsers. 1361 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1362 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1363 bool NonNegativeDef = 1364 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1365 SE->getConstant(NarrowSCEV->getType(), 0)); 1366 for (User *U : NarrowDef->users()) { 1367 Instruction *NarrowUser = cast<Instruction>(U); 1368 1369 // Handle data flow merges and bizarre phi cycles. 1370 if (!Widened.insert(NarrowUser).second) 1371 continue; 1372 1373 bool NonNegativeUse = false; 1374 if (!NonNegativeDef) { 1375 // We might have a control-dependent range information for this context. 1376 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1377 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1378 } 1379 1380 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1381 NonNegativeDef || NonNegativeUse); 1382 } 1383 } 1384 1385 /// Process a single induction variable. First use the SCEVExpander to create a 1386 /// wide induction variable that evaluates to the same recurrence as the 1387 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1388 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1389 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1390 /// 1391 /// It would be simpler to delete uses as they are processed, but we must avoid 1392 /// invalidating SCEV expressions. 1393 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1394 // Is this phi an induction variable? 1395 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1396 if (!AddRec) 1397 return nullptr; 1398 1399 // Widen the induction variable expression. 1400 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1401 ? SE->getSignExtendExpr(AddRec, WideType) 1402 : SE->getZeroExtendExpr(AddRec, WideType); 1403 1404 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1405 "Expect the new IV expression to preserve its type"); 1406 1407 // Can the IV be extended outside the loop without overflow? 1408 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1409 if (!AddRec || AddRec->getLoop() != L) 1410 return nullptr; 1411 1412 // An AddRec must have loop-invariant operands. Since this AddRec is 1413 // materialized by a loop header phi, the expression cannot have any post-loop 1414 // operands, so they must dominate the loop header. 1415 assert( 1416 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1417 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1418 "Loop header phi recurrence inputs do not dominate the loop"); 1419 1420 // Iterate over IV uses (including transitive ones) looking for IV increments 1421 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1422 // the increment calculate control-dependent range information basing on 1423 // dominating conditions inside of the loop (e.g. a range check inside of the 1424 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1425 // 1426 // Control-dependent range information is later used to prove that a narrow 1427 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1428 // this on demand because when pushNarrowIVUsers needs this information some 1429 // of the dominating conditions might be already widened. 1430 if (UsePostIncrementRanges) 1431 calculatePostIncRanges(OrigPhi); 1432 1433 // The rewriter provides a value for the desired IV expression. This may 1434 // either find an existing phi or materialize a new one. Either way, we 1435 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1436 // of the phi-SCC dominates the loop entry. 1437 Instruction *InsertPt = &L->getHeader()->front(); 1438 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1439 1440 // Remembering the WideIV increment generated by SCEVExpander allows 1441 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1442 // employ a general reuse mechanism because the call above is the only call to 1443 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1444 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1445 WideInc = 1446 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1447 WideIncExpr = SE->getSCEV(WideInc); 1448 // Propagate the debug location associated with the original loop increment 1449 // to the new (widened) increment. 1450 auto *OrigInc = 1451 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1452 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1453 } 1454 1455 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1456 ++NumWidened; 1457 1458 // Traverse the def-use chain using a worklist starting at the original IV. 1459 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1460 1461 Widened.insert(OrigPhi); 1462 pushNarrowIVUsers(OrigPhi, WidePhi); 1463 1464 while (!NarrowIVUsers.empty()) { 1465 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1466 1467 // Process a def-use edge. This may replace the use, so don't hold a 1468 // use_iterator across it. 1469 Instruction *WideUse = widenIVUse(DU, Rewriter); 1470 1471 // Follow all def-use edges from the previous narrow use. 1472 if (WideUse) 1473 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1474 1475 // widenIVUse may have removed the def-use edge. 1476 if (DU.NarrowDef->use_empty()) 1477 DeadInsts.emplace_back(DU.NarrowDef); 1478 } 1479 1480 // Attach any debug information to the new PHI. 1481 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1482 1483 return WidePhi; 1484 } 1485 1486 /// Calculates control-dependent range for the given def at the given context 1487 /// by looking at dominating conditions inside of the loop 1488 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1489 Instruction *NarrowUser) { 1490 using namespace llvm::PatternMatch; 1491 1492 Value *NarrowDefLHS; 1493 const APInt *NarrowDefRHS; 1494 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1495 m_APInt(NarrowDefRHS))) || 1496 !NarrowDefRHS->isNonNegative()) 1497 return; 1498 1499 auto UpdateRangeFromCondition = [&] (Value *Condition, 1500 bool TrueDest) { 1501 CmpInst::Predicate Pred; 1502 Value *CmpRHS; 1503 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1504 m_Value(CmpRHS)))) 1505 return; 1506 1507 CmpInst::Predicate P = 1508 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1509 1510 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1511 auto CmpConstrainedLHSRange = 1512 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1513 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1514 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1515 1516 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1517 }; 1518 1519 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1520 if (!HasGuards) 1521 return; 1522 1523 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1524 Ctx->getParent()->rend())) { 1525 Value *C = nullptr; 1526 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1527 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1528 } 1529 }; 1530 1531 UpdateRangeFromGuards(NarrowUser); 1532 1533 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1534 // If NarrowUserBB is statically unreachable asking dominator queries may 1535 // yield surprising results. (e.g. the block may not have a dom tree node) 1536 if (!DT->isReachableFromEntry(NarrowUserBB)) 1537 return; 1538 1539 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1540 L->contains(DTB->getBlock()); 1541 DTB = DTB->getIDom()) { 1542 auto *BB = DTB->getBlock(); 1543 auto *TI = BB->getTerminator(); 1544 UpdateRangeFromGuards(TI); 1545 1546 auto *BI = dyn_cast<BranchInst>(TI); 1547 if (!BI || !BI->isConditional()) 1548 continue; 1549 1550 auto *TrueSuccessor = BI->getSuccessor(0); 1551 auto *FalseSuccessor = BI->getSuccessor(1); 1552 1553 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1554 return BBE.isSingleEdge() && 1555 DT->dominates(BBE, NarrowUser->getParent()); 1556 }; 1557 1558 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1559 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1560 1561 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1562 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1563 } 1564 } 1565 1566 /// Calculates PostIncRangeInfos map for the given IV 1567 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1568 SmallPtrSet<Instruction *, 16> Visited; 1569 SmallVector<Instruction *, 6> Worklist; 1570 Worklist.push_back(OrigPhi); 1571 Visited.insert(OrigPhi); 1572 1573 while (!Worklist.empty()) { 1574 Instruction *NarrowDef = Worklist.pop_back_val(); 1575 1576 for (Use &U : NarrowDef->uses()) { 1577 auto *NarrowUser = cast<Instruction>(U.getUser()); 1578 1579 // Don't go looking outside the current loop. 1580 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1581 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1582 continue; 1583 1584 if (!Visited.insert(NarrowUser).second) 1585 continue; 1586 1587 Worklist.push_back(NarrowUser); 1588 1589 calculatePostIncRange(NarrowDef, NarrowUser); 1590 } 1591 } 1592 } 1593 1594 //===----------------------------------------------------------------------===// 1595 // Live IV Reduction - Minimize IVs live across the loop. 1596 //===----------------------------------------------------------------------===// 1597 1598 //===----------------------------------------------------------------------===// 1599 // Simplification of IV users based on SCEV evaluation. 1600 //===----------------------------------------------------------------------===// 1601 1602 namespace { 1603 1604 class IndVarSimplifyVisitor : public IVVisitor { 1605 ScalarEvolution *SE; 1606 const TargetTransformInfo *TTI; 1607 PHINode *IVPhi; 1608 1609 public: 1610 WideIVInfo WI; 1611 1612 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1613 const TargetTransformInfo *TTI, 1614 const DominatorTree *DTree) 1615 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1616 DT = DTree; 1617 WI.NarrowIV = IVPhi; 1618 } 1619 1620 // Implement the interface used by simplifyUsersOfIV. 1621 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1622 }; 1623 1624 } // end anonymous namespace 1625 1626 /// Iteratively perform simplification on a worklist of IV users. Each 1627 /// successive simplification may push more users which may themselves be 1628 /// candidates for simplification. 1629 /// 1630 /// Sign/Zero extend elimination is interleaved with IV simplification. 1631 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1632 SCEVExpander &Rewriter, 1633 LoopInfo *LI) { 1634 SmallVector<WideIVInfo, 8> WideIVs; 1635 1636 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1637 Intrinsic::getName(Intrinsic::experimental_guard)); 1638 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1639 1640 SmallVector<PHINode*, 8> LoopPhis; 1641 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1642 LoopPhis.push_back(cast<PHINode>(I)); 1643 } 1644 // Each round of simplification iterates through the SimplifyIVUsers worklist 1645 // for all current phis, then determines whether any IVs can be 1646 // widened. Widening adds new phis to LoopPhis, inducing another round of 1647 // simplification on the wide IVs. 1648 bool Changed = false; 1649 while (!LoopPhis.empty()) { 1650 // Evaluate as many IV expressions as possible before widening any IVs. This 1651 // forces SCEV to set no-wrap flags before evaluating sign/zero 1652 // extension. The first time SCEV attempts to normalize sign/zero extension, 1653 // the result becomes final. So for the most predictable results, we delay 1654 // evaluation of sign/zero extend evaluation until needed, and avoid running 1655 // other SCEV based analysis prior to simplifyAndExtend. 1656 do { 1657 PHINode *CurrIV = LoopPhis.pop_back_val(); 1658 1659 // Information about sign/zero extensions of CurrIV. 1660 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1661 1662 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 1663 &Visitor); 1664 1665 if (Visitor.WI.WidestNativeType) { 1666 WideIVs.push_back(Visitor.WI); 1667 } 1668 } while(!LoopPhis.empty()); 1669 1670 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1671 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1672 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1673 Changed = true; 1674 LoopPhis.push_back(WidePhi); 1675 } 1676 } 1677 } 1678 return Changed; 1679 } 1680 1681 //===----------------------------------------------------------------------===// 1682 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1683 //===----------------------------------------------------------------------===// 1684 1685 /// Given an Value which is hoped to be part of an add recurance in the given 1686 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1687 /// that this is less general than SCEVs AddRec checking. 1688 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1689 Instruction *IncI = dyn_cast<Instruction>(IncV); 1690 if (!IncI) 1691 return nullptr; 1692 1693 switch (IncI->getOpcode()) { 1694 case Instruction::Add: 1695 case Instruction::Sub: 1696 break; 1697 case Instruction::GetElementPtr: 1698 // An IV counter must preserve its type. 1699 if (IncI->getNumOperands() == 2) 1700 break; 1701 LLVM_FALLTHROUGH; 1702 default: 1703 return nullptr; 1704 } 1705 1706 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1707 if (Phi && Phi->getParent() == L->getHeader()) { 1708 if (L->isLoopInvariant(IncI->getOperand(1))) 1709 return Phi; 1710 return nullptr; 1711 } 1712 if (IncI->getOpcode() == Instruction::GetElementPtr) 1713 return nullptr; 1714 1715 // Allow add/sub to be commuted. 1716 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1717 if (Phi && Phi->getParent() == L->getHeader()) { 1718 if (L->isLoopInvariant(IncI->getOperand(0))) 1719 return Phi; 1720 } 1721 return nullptr; 1722 } 1723 1724 /// Whether the current loop exit test is based on this value. Currently this 1725 /// is limited to a direct use in the loop condition. 1726 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1727 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1728 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1729 // TODO: Allow non-icmp loop test. 1730 if (!ICmp) 1731 return false; 1732 1733 // TODO: Allow indirect use. 1734 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1735 } 1736 1737 /// linearFunctionTestReplace policy. Return true unless we can show that the 1738 /// current exit test is already sufficiently canonical. 1739 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1740 assert(L->getLoopLatch() && "Must be in simplified form"); 1741 1742 // Avoid converting a constant or loop invariant test back to a runtime 1743 // test. This is critical for when SCEV's cached ExitCount is less precise 1744 // than the current IR (such as after we've proven a particular exit is 1745 // actually dead and thus the BE count never reaches our ExitCount.) 1746 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1747 if (L->isLoopInvariant(BI->getCondition())) 1748 return false; 1749 1750 // Do LFTR to simplify the exit condition to an ICMP. 1751 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1752 if (!Cond) 1753 return true; 1754 1755 // Do LFTR to simplify the exit ICMP to EQ/NE 1756 ICmpInst::Predicate Pred = Cond->getPredicate(); 1757 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1758 return true; 1759 1760 // Look for a loop invariant RHS 1761 Value *LHS = Cond->getOperand(0); 1762 Value *RHS = Cond->getOperand(1); 1763 if (!L->isLoopInvariant(RHS)) { 1764 if (!L->isLoopInvariant(LHS)) 1765 return true; 1766 std::swap(LHS, RHS); 1767 } 1768 // Look for a simple IV counter LHS 1769 PHINode *Phi = dyn_cast<PHINode>(LHS); 1770 if (!Phi) 1771 Phi = getLoopPhiForCounter(LHS, L); 1772 1773 if (!Phi) 1774 return true; 1775 1776 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1777 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1778 if (Idx < 0) 1779 return true; 1780 1781 // Do LFTR if the exit condition's IV is *not* a simple counter. 1782 Value *IncV = Phi->getIncomingValue(Idx); 1783 return Phi != getLoopPhiForCounter(IncV, L); 1784 } 1785 1786 /// Return true if undefined behavior would provable be executed on the path to 1787 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1788 /// anything about whether OnPathTo is actually executed or whether Root is 1789 /// actually poison. This can be used to assess whether a new use of Root can 1790 /// be added at a location which is control equivalent with OnPathTo (such as 1791 /// immediately before it) without introducing UB which didn't previously 1792 /// exist. Note that a false result conveys no information. 1793 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1794 Instruction *OnPathTo, 1795 DominatorTree *DT) { 1796 // Basic approach is to assume Root is poison, propagate poison forward 1797 // through all users we can easily track, and then check whether any of those 1798 // users are provable UB and must execute before out exiting block might 1799 // exit. 1800 1801 // The set of all recursive users we've visited (which are assumed to all be 1802 // poison because of said visit) 1803 SmallSet<const Value *, 16> KnownPoison; 1804 SmallVector<const Instruction*, 16> Worklist; 1805 Worklist.push_back(Root); 1806 while (!Worklist.empty()) { 1807 const Instruction *I = Worklist.pop_back_val(); 1808 1809 // If we know this must trigger UB on a path leading our target. 1810 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1811 return true; 1812 1813 // If we can't analyze propagation through this instruction, just skip it 1814 // and transitive users. Safe as false is a conservative result. 1815 if (!propagatesFullPoison(I) && I != Root) 1816 continue; 1817 1818 if (KnownPoison.insert(I).second) 1819 for (const User *User : I->users()) 1820 Worklist.push_back(cast<Instruction>(User)); 1821 } 1822 1823 // Might be non-UB, or might have a path we couldn't prove must execute on 1824 // way to exiting bb. 1825 return false; 1826 } 1827 1828 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1829 /// down to checking that all operands are constant and listing instructions 1830 /// that may hide undef. 1831 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1832 unsigned Depth) { 1833 if (isa<Constant>(V)) 1834 return !isa<UndefValue>(V); 1835 1836 if (Depth >= 6) 1837 return false; 1838 1839 // Conservatively handle non-constant non-instructions. For example, Arguments 1840 // may be undef. 1841 Instruction *I = dyn_cast<Instruction>(V); 1842 if (!I) 1843 return false; 1844 1845 // Load and return values may be undef. 1846 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1847 return false; 1848 1849 // Optimistically handle other instructions. 1850 for (Value *Op : I->operands()) { 1851 if (!Visited.insert(Op).second) 1852 continue; 1853 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1854 return false; 1855 } 1856 return true; 1857 } 1858 1859 /// Return true if the given value is concrete. We must prove that undef can 1860 /// never reach it. 1861 /// 1862 /// TODO: If we decide that this is a good approach to checking for undef, we 1863 /// may factor it into a common location. 1864 static bool hasConcreteDef(Value *V) { 1865 SmallPtrSet<Value*, 8> Visited; 1866 Visited.insert(V); 1867 return hasConcreteDefImpl(V, Visited, 0); 1868 } 1869 1870 /// Return true if this IV has any uses other than the (soon to be rewritten) 1871 /// loop exit test. 1872 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1873 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1874 Value *IncV = Phi->getIncomingValue(LatchIdx); 1875 1876 for (User *U : Phi->users()) 1877 if (U != Cond && U != IncV) return false; 1878 1879 for (User *U : IncV->users()) 1880 if (U != Cond && U != Phi) return false; 1881 return true; 1882 } 1883 1884 /// Return true if the given phi is a "counter" in L. A counter is an 1885 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1886 /// step of 1. Note that L must have exactly one latch. 1887 static bool isLoopCounter(PHINode* Phi, Loop *L, 1888 ScalarEvolution *SE) { 1889 assert(Phi->getParent() == L->getHeader()); 1890 assert(L->getLoopLatch()); 1891 1892 if (!SE->isSCEVable(Phi->getType())) 1893 return false; 1894 1895 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1896 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1897 return false; 1898 1899 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1900 if (!Step || !Step->isOne()) 1901 return false; 1902 1903 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1904 Value *IncV = Phi->getIncomingValue(LatchIdx); 1905 return (getLoopPhiForCounter(IncV, L) == Phi); 1906 } 1907 1908 /// Search the loop header for a loop counter (anadd rec w/step of one) 1909 /// suitable for use by LFTR. If multiple counters are available, select the 1910 /// "best" one based profitable heuristics. 1911 /// 1912 /// BECount may be an i8* pointer type. The pointer difference is already 1913 /// valid count without scaling the address stride, so it remains a pointer 1914 /// expression as far as SCEV is concerned. 1915 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1916 const SCEV *BECount, 1917 ScalarEvolution *SE, DominatorTree *DT) { 1918 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1919 1920 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1921 1922 // Loop over all of the PHI nodes, looking for a simple counter. 1923 PHINode *BestPhi = nullptr; 1924 const SCEV *BestInit = nullptr; 1925 BasicBlock *LatchBlock = L->getLoopLatch(); 1926 assert(LatchBlock && "Must be in simplified form"); 1927 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1928 1929 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1930 PHINode *Phi = cast<PHINode>(I); 1931 if (!isLoopCounter(Phi, L, SE)) 1932 continue; 1933 1934 // Avoid comparing an integer IV against a pointer Limit. 1935 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1936 continue; 1937 1938 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1939 1940 // AR may be a pointer type, while BECount is an integer type. 1941 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1942 // AR may not be a narrower type, or we may never exit. 1943 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1944 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1945 continue; 1946 1947 // Avoid reusing a potentially undef value to compute other values that may 1948 // have originally had a concrete definition. 1949 if (!hasConcreteDef(Phi)) { 1950 // We explicitly allow unknown phis as long as they are already used by 1951 // the loop exit test. This is legal since performing LFTR could not 1952 // increase the number of undef users. 1953 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1954 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1955 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1956 continue; 1957 } 1958 1959 // Avoid introducing undefined behavior due to poison which didn't exist in 1960 // the original program. (Annoyingly, the rules for poison and undef 1961 // propagation are distinct, so this does NOT cover the undef case above.) 1962 // We have to ensure that we don't introduce UB by introducing a use on an 1963 // iteration where said IV produces poison. Our strategy here differs for 1964 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1965 // see code in linearFunctionTestReplace. For pointers, we restrict 1966 // transforms as there is no good way to reinfer inbounds once lost. 1967 if (!Phi->getType()->isIntegerTy() && 1968 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1969 continue; 1970 1971 const SCEV *Init = AR->getStart(); 1972 1973 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1974 // Don't force a live loop counter if another IV can be used. 1975 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1976 continue; 1977 1978 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1979 // also prefers integer to pointer IVs. 1980 if (BestInit->isZero() != Init->isZero()) { 1981 if (BestInit->isZero()) 1982 continue; 1983 } 1984 // If two IVs both count from zero or both count from nonzero then the 1985 // narrower is likely a dead phi that has been widened. Use the wider phi 1986 // to allow the other to be eliminated. 1987 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1988 continue; 1989 } 1990 BestPhi = Phi; 1991 BestInit = Init; 1992 } 1993 return BestPhi; 1994 } 1995 1996 /// Insert an IR expression which computes the value held by the IV IndVar 1997 /// (which must be an loop counter w/unit stride) after the backedge of loop L 1998 /// is taken ExitCount times. 1999 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2000 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2001 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2002 assert(isLoopCounter(IndVar, L, SE)); 2003 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2004 const SCEV *IVInit = AR->getStart(); 2005 2006 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2007 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2008 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2009 // the existing GEPs whenever possible. 2010 if (IndVar->getType()->isPointerTy() && 2011 !ExitCount->getType()->isPointerTy()) { 2012 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2013 // signed value. ExitCount on the other hand represents the loop trip count, 2014 // which is an unsigned value. FindLoopCounter only allows induction 2015 // variables that have a positive unit stride of one. This means we don't 2016 // have to handle the case of negative offsets (yet) and just need to zero 2017 // extend ExitCount. 2018 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2019 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2020 if (UsePostInc) 2021 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2022 2023 // Expand the code for the iteration count. 2024 assert(SE->isLoopInvariant(IVOffset, L) && 2025 "Computed iteration count is not loop invariant!"); 2026 2027 // We could handle pointer IVs other than i8*, but we need to compensate for 2028 // gep index scaling. 2029 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2030 cast<PointerType>(IndVar->getType()) 2031 ->getElementType())->isOne() && 2032 "unit stride pointer IV must be i8*"); 2033 2034 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2035 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2036 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2037 } else { 2038 // In any other case, convert both IVInit and ExitCount to integers before 2039 // comparing. This may result in SCEV expansion of pointers, but in practice 2040 // SCEV will fold the pointer arithmetic away as such: 2041 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2042 // 2043 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2044 // for simple memset-style loops. 2045 // 2046 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2047 // were generated on top of case #2, which is not expected. 2048 2049 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2050 // For unit stride, IVCount = Start + ExitCount with 2's complement 2051 // overflow. 2052 2053 // For integer IVs, truncate the IV before computing IVInit + BECount, 2054 // unless we know apriori that the limit must be a constant when evaluated 2055 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2056 // of the IV in the loop over a (potentially) expensive expansion of the 2057 // widened exit count add(zext(add)) expression. 2058 if (SE->getTypeSizeInBits(IVInit->getType()) 2059 > SE->getTypeSizeInBits(ExitCount->getType())) { 2060 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2061 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2062 else 2063 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2064 } 2065 2066 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2067 2068 if (UsePostInc) 2069 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2070 2071 // Expand the code for the iteration count. 2072 assert(SE->isLoopInvariant(IVLimit, L) && 2073 "Computed iteration count is not loop invariant!"); 2074 // Ensure that we generate the same type as IndVar, or a smaller integer 2075 // type. In the presence of null pointer values, we have an integer type 2076 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2077 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2078 IndVar->getType() : ExitCount->getType(); 2079 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2080 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2081 } 2082 } 2083 2084 /// This method rewrites the exit condition of the loop to be a canonical != 2085 /// comparison against the incremented loop induction variable. This pass is 2086 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2087 /// determine a loop-invariant trip count of the loop, which is actually a much 2088 /// broader range than just linear tests. 2089 bool IndVarSimplify:: 2090 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2091 const SCEV *ExitCount, 2092 PHINode *IndVar, SCEVExpander &Rewriter) { 2093 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2094 assert(isLoopCounter(IndVar, L, SE)); 2095 Instruction * const IncVar = 2096 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2097 2098 // Initialize CmpIndVar to the preincremented IV. 2099 Value *CmpIndVar = IndVar; 2100 bool UsePostInc = false; 2101 2102 // If the exiting block is the same as the backedge block, we prefer to 2103 // compare against the post-incremented value, otherwise we must compare 2104 // against the preincremented value. 2105 if (ExitingBB == L->getLoopLatch()) { 2106 // For pointer IVs, we chose to not strip inbounds which requires us not 2107 // to add a potentially UB introducing use. We need to either a) show 2108 // the loop test we're modifying is already in post-inc form, or b) show 2109 // that adding a use must not introduce UB. 2110 bool SafeToPostInc = 2111 IndVar->getType()->isIntegerTy() || 2112 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2113 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2114 if (SafeToPostInc) { 2115 UsePostInc = true; 2116 CmpIndVar = IncVar; 2117 } 2118 } 2119 2120 // It may be necessary to drop nowrap flags on the incrementing instruction 2121 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2122 // case the increment might have previously been poison on the last iteration 2123 // only) or if LFTR switches to a different IV that was previously dynamically 2124 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2125 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2126 // check), because the pre-inc addrec flags may be adopted from the original 2127 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2128 // TODO: This handling is inaccurate for one case: If we switch to a 2129 // dynamically dead IV that wraps on the first loop iteration only, which is 2130 // not covered by the post-inc addrec. (If the new IV was not dynamically 2131 // dead, it could not be poison on the first iteration in the first place.) 2132 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2133 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2134 if (BO->hasNoUnsignedWrap()) 2135 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2136 if (BO->hasNoSignedWrap()) 2137 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2138 } 2139 2140 Value *ExitCnt = genLoopLimit( 2141 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2142 assert(ExitCnt->getType()->isPointerTy() == 2143 IndVar->getType()->isPointerTy() && 2144 "genLoopLimit missed a cast"); 2145 2146 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2147 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2148 ICmpInst::Predicate P; 2149 if (L->contains(BI->getSuccessor(0))) 2150 P = ICmpInst::ICMP_NE; 2151 else 2152 P = ICmpInst::ICMP_EQ; 2153 2154 IRBuilder<> Builder(BI); 2155 2156 // The new loop exit condition should reuse the debug location of the 2157 // original loop exit condition. 2158 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2159 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2160 2161 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2162 // avoid the expensive expansion of the limit expression in the wider type, 2163 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2164 // since we know (from the exit count bitwidth), that we can't self-wrap in 2165 // the narrower type. 2166 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2167 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2168 if (CmpIndVarSize > ExitCntSize) { 2169 assert(!CmpIndVar->getType()->isPointerTy() && 2170 !ExitCnt->getType()->isPointerTy()); 2171 2172 // Before resorting to actually inserting the truncate, use the same 2173 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2174 // the other side of the comparison instead. We still evaluate the limit 2175 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2176 // a truncate within in. 2177 bool Extended = false; 2178 const SCEV *IV = SE->getSCEV(CmpIndVar); 2179 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2180 ExitCnt->getType()); 2181 const SCEV *ZExtTrunc = 2182 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2183 2184 if (ZExtTrunc == IV) { 2185 Extended = true; 2186 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2187 "wide.trip.count"); 2188 } else { 2189 const SCEV *SExtTrunc = 2190 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2191 if (SExtTrunc == IV) { 2192 Extended = true; 2193 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2194 "wide.trip.count"); 2195 } 2196 } 2197 2198 if (Extended) { 2199 bool Discard; 2200 L->makeLoopInvariant(ExitCnt, Discard); 2201 } else 2202 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2203 "lftr.wideiv"); 2204 } 2205 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2206 << " LHS:" << *CmpIndVar << '\n' 2207 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2208 << "\n" 2209 << " RHS:\t" << *ExitCnt << "\n" 2210 << "ExitCount:\t" << *ExitCount << "\n" 2211 << " was: " << *BI->getCondition() << "\n"); 2212 2213 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2214 Value *OrigCond = BI->getCondition(); 2215 // It's tempting to use replaceAllUsesWith here to fully replace the old 2216 // comparison, but that's not immediately safe, since users of the old 2217 // comparison may not be dominated by the new comparison. Instead, just 2218 // update the branch to use the new comparison; in the common case this 2219 // will make old comparison dead. 2220 BI->setCondition(Cond); 2221 DeadInsts.push_back(OrigCond); 2222 2223 ++NumLFTR; 2224 return true; 2225 } 2226 2227 //===----------------------------------------------------------------------===// 2228 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2229 //===----------------------------------------------------------------------===// 2230 2231 /// If there's a single exit block, sink any loop-invariant values that 2232 /// were defined in the preheader but not used inside the loop into the 2233 /// exit block to reduce register pressure in the loop. 2234 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2235 BasicBlock *ExitBlock = L->getExitBlock(); 2236 if (!ExitBlock) return false; 2237 2238 BasicBlock *Preheader = L->getLoopPreheader(); 2239 if (!Preheader) return false; 2240 2241 bool MadeAnyChanges = false; 2242 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2243 BasicBlock::iterator I(Preheader->getTerminator()); 2244 while (I != Preheader->begin()) { 2245 --I; 2246 // New instructions were inserted at the end of the preheader. 2247 if (isa<PHINode>(I)) 2248 break; 2249 2250 // Don't move instructions which might have side effects, since the side 2251 // effects need to complete before instructions inside the loop. Also don't 2252 // move instructions which might read memory, since the loop may modify 2253 // memory. Note that it's okay if the instruction might have undefined 2254 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2255 // block. 2256 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2257 continue; 2258 2259 // Skip debug info intrinsics. 2260 if (isa<DbgInfoIntrinsic>(I)) 2261 continue; 2262 2263 // Skip eh pad instructions. 2264 if (I->isEHPad()) 2265 continue; 2266 2267 // Don't sink alloca: we never want to sink static alloca's out of the 2268 // entry block, and correctly sinking dynamic alloca's requires 2269 // checks for stacksave/stackrestore intrinsics. 2270 // FIXME: Refactor this check somehow? 2271 if (isa<AllocaInst>(I)) 2272 continue; 2273 2274 // Determine if there is a use in or before the loop (direct or 2275 // otherwise). 2276 bool UsedInLoop = false; 2277 for (Use &U : I->uses()) { 2278 Instruction *User = cast<Instruction>(U.getUser()); 2279 BasicBlock *UseBB = User->getParent(); 2280 if (PHINode *P = dyn_cast<PHINode>(User)) { 2281 unsigned i = 2282 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2283 UseBB = P->getIncomingBlock(i); 2284 } 2285 if (UseBB == Preheader || L->contains(UseBB)) { 2286 UsedInLoop = true; 2287 break; 2288 } 2289 } 2290 2291 // If there is, the def must remain in the preheader. 2292 if (UsedInLoop) 2293 continue; 2294 2295 // Otherwise, sink it to the exit block. 2296 Instruction *ToMove = &*I; 2297 bool Done = false; 2298 2299 if (I != Preheader->begin()) { 2300 // Skip debug info intrinsics. 2301 do { 2302 --I; 2303 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2304 2305 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2306 Done = true; 2307 } else { 2308 Done = true; 2309 } 2310 2311 MadeAnyChanges = true; 2312 ToMove->moveBefore(*ExitBlock, InsertPt); 2313 if (Done) break; 2314 InsertPt = ToMove->getIterator(); 2315 } 2316 2317 return MadeAnyChanges; 2318 } 2319 2320 /// Return a symbolic upper bound for the backedge taken count of the loop. 2321 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2322 /// an arbitrary expression as opposed to only constants. 2323 /// TODO: Move into the ScalarEvolution class. 2324 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2325 DominatorTree &DT, Loop *L) { 2326 SmallVector<BasicBlock*, 16> ExitingBlocks; 2327 L->getExitingBlocks(ExitingBlocks); 2328 2329 // Form an expression for the maximum exit count possible for this loop. We 2330 // merge the max and exact information to approximate a version of 2331 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2332 SmallVector<const SCEV*, 4> ExitCounts; 2333 for (BasicBlock *ExitingBB : ExitingBlocks) { 2334 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2335 if (isa<SCEVCouldNotCompute>(ExitCount)) 2336 ExitCount = SE.getExitCount(L, ExitingBB, 2337 ScalarEvolution::ConstantMaximum); 2338 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2339 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2340 "We should only have known counts for exiting blocks that " 2341 "dominate latch!"); 2342 ExitCounts.push_back(ExitCount); 2343 } 2344 } 2345 if (ExitCounts.empty()) 2346 return SE.getCouldNotCompute(); 2347 return SE.getUMinFromMismatchedTypes(ExitCounts); 2348 } 2349 2350 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2351 SmallVector<BasicBlock*, 16> ExitingBlocks; 2352 L->getExitingBlocks(ExitingBlocks); 2353 2354 // Remove all exits which aren't both rewriteable and analyzeable. 2355 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2356 // If our exitting block exits multiple loops, we can only rewrite the 2357 // innermost one. Otherwise, we're changing how many times the innermost 2358 // loop runs before it exits. 2359 if (LI->getLoopFor(ExitingBB) != L) 2360 return true; 2361 2362 // Can't rewrite non-branch yet. 2363 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2364 if (!BI) 2365 return true; 2366 2367 // If already constant, nothing to do. 2368 if (isa<Constant>(BI->getCondition())) 2369 return true; 2370 2371 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2372 if (isa<SCEVCouldNotCompute>(ExitCount)) 2373 return true; 2374 return false; 2375 }); 2376 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2377 2378 if (ExitingBlocks.empty()) 2379 return false; 2380 2381 // Get a symbolic upper bound on the loop backedge taken count. 2382 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2383 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2384 return false; 2385 2386 // Visit our exit blocks in order of dominance. We know from the fact that 2387 // all exits (left) are analyzeable that the must be a total dominance order 2388 // between them as each must dominate the latch. The visit order only 2389 // matters for the provably equal case. 2390 llvm::sort(ExitingBlocks, 2391 [&](BasicBlock *A, BasicBlock *B) { 2392 // std::sort sorts in ascending order, so we want the inverse of 2393 // the normal dominance relation. 2394 if (DT->properlyDominates(A, B)) return true; 2395 if (DT->properlyDominates(B, A)) return false; 2396 llvm_unreachable("expected total dominance order!"); 2397 }); 2398 #ifdef ASSERT 2399 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2400 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2401 } 2402 #endif 2403 2404 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2405 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2406 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2407 auto *OldCond = BI->getCondition(); 2408 auto *NewCond = ConstantInt::get(OldCond->getType(), 2409 IsTaken ? ExitIfTrue : !ExitIfTrue); 2410 BI->setCondition(NewCond); 2411 if (OldCond->use_empty()) 2412 DeadInsts.push_back(OldCond); 2413 }; 2414 2415 bool Changed = false; 2416 SmallSet<const SCEV*, 8> DominatingExitCounts; 2417 for (BasicBlock *ExitingBB : ExitingBlocks) { 2418 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2419 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2420 2421 // If we know we'd exit on the first iteration, rewrite the exit to 2422 // reflect this. This does not imply the loop must exit through this 2423 // exit; there may be an earlier one taken on the first iteration. 2424 // TODO: Given we know the backedge can't be taken, we should go ahead 2425 // and break it. Or at least, kill all the header phis and simplify. 2426 if (ExitCount->isZero()) { 2427 FoldExit(ExitingBB, true); 2428 Changed = true; 2429 continue; 2430 } 2431 2432 // If we end up with a pointer exit count, bail. Note that we can end up 2433 // with a pointer exit count for one exiting block, and not for another in 2434 // the same loop. 2435 if (!ExitCount->getType()->isIntegerTy() || 2436 !MaxExitCount->getType()->isIntegerTy()) 2437 continue; 2438 2439 Type *WiderType = 2440 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2441 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2442 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2443 assert(MaxExitCount->getType() == ExitCount->getType()); 2444 2445 // Can we prove that some other exit must be taken strictly before this 2446 // one? 2447 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2448 MaxExitCount, ExitCount)) { 2449 FoldExit(ExitingBB, false); 2450 Changed = true; 2451 continue; 2452 } 2453 2454 // As we run, keep track of which exit counts we've encountered. If we 2455 // find a duplicate, we've found an exit which would have exited on the 2456 // exiting iteration, but (from the visit order) strictly follows another 2457 // which does the same and is thus dead. 2458 if (!DominatingExitCounts.insert(ExitCount).second) { 2459 FoldExit(ExitingBB, false); 2460 Changed = true; 2461 continue; 2462 } 2463 2464 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2465 // here. If we kept track of the min of dominanting exits so far, we could 2466 // discharge exits with EC >= MDEC. This is less powerful than the existing 2467 // transform (since later exits aren't considered), but potentially more 2468 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2469 // or a >u b. Such a case is not currently known. 2470 } 2471 return Changed; 2472 } 2473 2474 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2475 SmallVector<BasicBlock*, 16> ExitingBlocks; 2476 L->getExitingBlocks(ExitingBlocks); 2477 2478 // Finally, see if we can rewrite our exit conditions into a loop invariant 2479 // form. If we have a read-only loop, and we can tell that we must exit down 2480 // a path which does not need any of the values computed within the loop, we 2481 // can rewrite the loop to exit on the first iteration. Note that this 2482 // doesn't either a) tell us the loop exits on the first iteration (unless 2483 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2484 // This transformation looks a lot like a restricted form of dead loop 2485 // elimination, but restricted to read-only loops and without neccesssarily 2486 // needing to kill the loop entirely. 2487 if (!LoopPredication) 2488 return false; 2489 2490 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2491 return false; 2492 2493 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2494 // through *explicit* control flow. We have to eliminate the possibility of 2495 // implicit exits (see below) before we know it's truly exact. 2496 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2497 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2498 !SE->isLoopInvariant(ExactBTC, L) || 2499 !isSafeToExpand(ExactBTC, *SE)) 2500 return false; 2501 2502 // If we end up with a pointer exit count, bail. It may be unsized. 2503 if (!ExactBTC->getType()->isIntegerTy()) 2504 return false; 2505 2506 auto BadExit = [&](BasicBlock *ExitingBB) { 2507 // If our exiting block exits multiple loops, we can only rewrite the 2508 // innermost one. Otherwise, we're changing how many times the innermost 2509 // loop runs before it exits. 2510 if (LI->getLoopFor(ExitingBB) != L) 2511 return true; 2512 2513 // Can't rewrite non-branch yet. 2514 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2515 if (!BI) 2516 return true; 2517 2518 // If already constant, nothing to do. 2519 if (isa<Constant>(BI->getCondition())) 2520 return true; 2521 2522 // If the exit block has phis, we need to be able to compute the values 2523 // within the loop which contains them. This assumes trivially lcssa phis 2524 // have already been removed; TODO: generalize 2525 BasicBlock *ExitBlock = 2526 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2527 if (!ExitBlock->phis().empty()) 2528 return true; 2529 2530 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2531 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2532 if (!SE->isLoopInvariant(ExitCount, L) || 2533 !isSafeToExpand(ExitCount, *SE)) 2534 return true; 2535 2536 // If we end up with a pointer exit count, bail. It may be unsized. 2537 if (!ExitCount->getType()->isIntegerTy()) 2538 return true; 2539 2540 return false; 2541 }; 2542 2543 // If we have any exits which can't be predicated themselves, than we can't 2544 // predicate any exit which isn't guaranteed to execute before it. Consider 2545 // two exits (a) and (b) which would both exit on the same iteration. If we 2546 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2547 // we could convert a loop from exiting through (a) to one exiting through 2548 // (b). Note that this problem exists only for exits with the same exit 2549 // count, and we could be more aggressive when exit counts are known inequal. 2550 llvm::sort(ExitingBlocks, 2551 [&](BasicBlock *A, BasicBlock *B) { 2552 // std::sort sorts in ascending order, so we want the inverse of 2553 // the normal dominance relation, plus a tie breaker for blocks 2554 // unordered by dominance. 2555 if (DT->properlyDominates(A, B)) return true; 2556 if (DT->properlyDominates(B, A)) return false; 2557 return A->getName() < B->getName(); 2558 }); 2559 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2560 // exits before the backedge). If they aren't, reasoning about reachability 2561 // is complicated and we choose not to for now. 2562 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2563 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2564 return false; 2565 2566 // Given our sorted total order, we know that exit[j] must be evaluated 2567 // after all exit[i] such j > i. 2568 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2569 if (BadExit(ExitingBlocks[i])) { 2570 ExitingBlocks.resize(i); 2571 break; 2572 } 2573 2574 if (ExitingBlocks.empty()) 2575 return false; 2576 2577 // We rely on not being able to reach an exiting block on a later iteration 2578 // then it's statically compute exit count. The implementaton of 2579 // getExitCount currently has this invariant, but assert it here so that 2580 // breakage is obvious if this ever changes.. 2581 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2582 return DT->dominates(ExitingBB, L->getLoopLatch()); 2583 })); 2584 2585 // At this point, ExitingBlocks consists of only those blocks which are 2586 // predicatable. Given that, we know we have at least one exit we can 2587 // predicate if the loop is doesn't have side effects and doesn't have any 2588 // implicit exits (because then our exact BTC isn't actually exact). 2589 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2590 // suggestions on how to improve this? I can obviously bail out for outer 2591 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2592 // is that enough for *all* side effects? 2593 for (BasicBlock *BB : L->blocks()) 2594 for (auto &I : *BB) 2595 // TODO:isGuaranteedToTransfer 2596 if (I.mayHaveSideEffects() || I.mayThrow()) 2597 return false; 2598 2599 bool Changed = false; 2600 // Finally, do the actual predication for all predicatable blocks. A couple 2601 // of notes here: 2602 // 1) We don't bother to constant fold dominated exits with identical exit 2603 // counts; that's simply a form of CSE/equality propagation and we leave 2604 // it for dedicated passes. 2605 // 2) We insert the comparison at the branch. Hoisting introduces additional 2606 // legality constraints and we leave that to dedicated logic. We want to 2607 // predicate even if we can't insert a loop invariant expression as 2608 // peeling or unrolling will likely reduce the cost of the otherwise loop 2609 // varying check. 2610 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2611 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2612 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2613 for (BasicBlock *ExitingBB : ExitingBlocks) { 2614 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2615 2616 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2617 Value *NewCond; 2618 if (ExitCount == ExactBTC) { 2619 NewCond = L->contains(BI->getSuccessor(0)) ? 2620 B.getFalse() : B.getTrue(); 2621 } else { 2622 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2623 if (!ExactBTCV) 2624 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2625 Value *RHS = ExactBTCV; 2626 if (ECV->getType() != RHS->getType()) { 2627 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2628 ECV = B.CreateZExt(ECV, WiderTy); 2629 RHS = B.CreateZExt(RHS, WiderTy); 2630 } 2631 auto Pred = L->contains(BI->getSuccessor(0)) ? 2632 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2633 NewCond = B.CreateICmp(Pred, ECV, RHS); 2634 } 2635 Value *OldCond = BI->getCondition(); 2636 BI->setCondition(NewCond); 2637 if (OldCond->use_empty()) 2638 DeadInsts.push_back(OldCond); 2639 Changed = true; 2640 } 2641 2642 return Changed; 2643 } 2644 2645 //===----------------------------------------------------------------------===// 2646 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2647 //===----------------------------------------------------------------------===// 2648 2649 bool IndVarSimplify::run(Loop *L) { 2650 // We need (and expect!) the incoming loop to be in LCSSA. 2651 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2652 "LCSSA required to run indvars!"); 2653 2654 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2655 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2656 // canonicalization can be a pessimization without LSR to "clean up" 2657 // afterwards. 2658 // - We depend on having a preheader; in particular, 2659 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2660 // and we're in trouble if we can't find the induction variable even when 2661 // we've manually inserted one. 2662 // - LFTR relies on having a single backedge. 2663 if (!L->isLoopSimplifyForm()) 2664 return false; 2665 2666 #ifndef NDEBUG 2667 // Used below for a consistency check only 2668 // Note: Since the result returned by ScalarEvolution may depend on the order 2669 // in which previous results are added to its cache, the call to 2670 // getBackedgeTakenCount() may change following SCEV queries. 2671 const SCEV *BackedgeTakenCount; 2672 if (VerifyIndvars) 2673 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2674 #endif 2675 2676 bool Changed = false; 2677 // If there are any floating-point recurrences, attempt to 2678 // transform them to use integer recurrences. 2679 Changed |= rewriteNonIntegerIVs(L); 2680 2681 // Create a rewriter object which we'll use to transform the code with. 2682 SCEVExpander Rewriter(*SE, DL, "indvars"); 2683 #ifndef NDEBUG 2684 Rewriter.setDebugType(DEBUG_TYPE); 2685 #endif 2686 2687 // Eliminate redundant IV users. 2688 // 2689 // Simplification works best when run before other consumers of SCEV. We 2690 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2691 // other expressions involving loop IVs have been evaluated. This helps SCEV 2692 // set no-wrap flags before normalizing sign/zero extension. 2693 Rewriter.disableCanonicalMode(); 2694 Changed |= simplifyAndExtend(L, Rewriter, LI); 2695 2696 // Check to see if we can compute the final value of any expressions 2697 // that are recurrent in the loop, and substitute the exit values from the 2698 // loop into any instructions outside of the loop that use the final values 2699 // of the current expressions. 2700 if (ReplaceExitValue != NeverRepl) { 2701 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2702 ReplaceExitValue, DeadInsts)) { 2703 NumReplaced += Rewrites; 2704 Changed = true; 2705 } 2706 } 2707 2708 // Eliminate redundant IV cycles. 2709 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2710 2711 // Try to eliminate loop exits based on analyzeable exit counts 2712 if (optimizeLoopExits(L, Rewriter)) { 2713 Changed = true; 2714 // Given we've changed exit counts, notify SCEV 2715 SE->forgetLoop(L); 2716 } 2717 2718 // Try to form loop invariant tests for loop exits by changing how many 2719 // iterations of the loop run when that is unobservable. 2720 if (predicateLoopExits(L, Rewriter)) { 2721 Changed = true; 2722 // Given we've changed exit counts, notify SCEV 2723 SE->forgetLoop(L); 2724 } 2725 2726 // If we have a trip count expression, rewrite the loop's exit condition 2727 // using it. 2728 if (!DisableLFTR) { 2729 BasicBlock *PreHeader = L->getLoopPreheader(); 2730 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 2731 2732 SmallVector<BasicBlock*, 16> ExitingBlocks; 2733 L->getExitingBlocks(ExitingBlocks); 2734 for (BasicBlock *ExitingBB : ExitingBlocks) { 2735 // Can't rewrite non-branch yet. 2736 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2737 continue; 2738 2739 // If our exitting block exits multiple loops, we can only rewrite the 2740 // innermost one. Otherwise, we're changing how many times the innermost 2741 // loop runs before it exits. 2742 if (LI->getLoopFor(ExitingBB) != L) 2743 continue; 2744 2745 if (!needsLFTR(L, ExitingBB)) 2746 continue; 2747 2748 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2749 if (isa<SCEVCouldNotCompute>(ExitCount)) 2750 continue; 2751 2752 // This was handled above, but as we form SCEVs, we can sometimes refine 2753 // existing ones; this allows exit counts to be folded to zero which 2754 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2755 // until stable to handle cases like this better. 2756 if (ExitCount->isZero()) 2757 continue; 2758 2759 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2760 if (!IndVar) 2761 continue; 2762 2763 // Avoid high cost expansions. Note: This heuristic is questionable in 2764 // that our definition of "high cost" is not exactly principled. 2765 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2766 TTI, PreHeaderBR)) 2767 continue; 2768 2769 // Check preconditions for proper SCEVExpander operation. SCEV does not 2770 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2771 // any pass that uses the SCEVExpander must do it. This does not work 2772 // well for loop passes because SCEVExpander makes assumptions about 2773 // all loops, while LoopPassManager only forces the current loop to be 2774 // simplified. 2775 // 2776 // FIXME: SCEV expansion has no way to bail out, so the caller must 2777 // explicitly check any assumptions made by SCEV. Brittle. 2778 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2779 if (!AR || AR->getLoop()->getLoopPreheader()) 2780 Changed |= linearFunctionTestReplace(L, ExitingBB, 2781 ExitCount, IndVar, 2782 Rewriter); 2783 } 2784 } 2785 // Clear the rewriter cache, because values that are in the rewriter's cache 2786 // can be deleted in the loop below, causing the AssertingVH in the cache to 2787 // trigger. 2788 Rewriter.clear(); 2789 2790 // Now that we're done iterating through lists, clean up any instructions 2791 // which are now dead. 2792 while (!DeadInsts.empty()) 2793 if (Instruction *Inst = 2794 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2795 Changed |= 2796 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2797 2798 // The Rewriter may not be used from this point on. 2799 2800 // Loop-invariant instructions in the preheader that aren't used in the 2801 // loop may be sunk below the loop to reduce register pressure. 2802 Changed |= sinkUnusedInvariants(L); 2803 2804 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2805 // trip count and therefore can further simplify exit values in addition to 2806 // rewriteLoopExitValues. 2807 Changed |= rewriteFirstIterationLoopExitValues(L); 2808 2809 // Clean up dead instructions. 2810 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2811 2812 // Check a post-condition. 2813 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2814 "Indvars did not preserve LCSSA!"); 2815 2816 // Verify that LFTR, and any other change have not interfered with SCEV's 2817 // ability to compute trip count. We may have *changed* the exit count, but 2818 // only by reducing it. 2819 #ifndef NDEBUG 2820 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2821 SE->forgetLoop(L); 2822 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2823 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2824 SE->getTypeSizeInBits(NewBECount->getType())) 2825 NewBECount = SE->getTruncateOrNoop(NewBECount, 2826 BackedgeTakenCount->getType()); 2827 else 2828 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2829 NewBECount->getType()); 2830 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2831 NewBECount) && "indvars must preserve SCEV"); 2832 } 2833 if (VerifyMemorySSA && MSSAU) 2834 MSSAU->getMemorySSA()->verifyMemorySSA(); 2835 #endif 2836 2837 return Changed; 2838 } 2839 2840 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2841 LoopStandardAnalysisResults &AR, 2842 LPMUpdater &) { 2843 Function *F = L.getHeader()->getParent(); 2844 const DataLayout &DL = F->getParent()->getDataLayout(); 2845 2846 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2847 if (!IVS.run(&L)) 2848 return PreservedAnalyses::all(); 2849 2850 auto PA = getLoopPassPreservedAnalyses(); 2851 PA.preserveSet<CFGAnalyses>(); 2852 if (AR.MSSA) 2853 PA.preserve<MemorySSAAnalysis>(); 2854 return PA; 2855 } 2856 2857 namespace { 2858 2859 struct IndVarSimplifyLegacyPass : public LoopPass { 2860 static char ID; // Pass identification, replacement for typeid 2861 2862 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2863 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2864 } 2865 2866 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2867 if (skipLoop(L)) 2868 return false; 2869 2870 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2871 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2872 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2873 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2874 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2875 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2876 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2877 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2878 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2879 MemorySSA *MSSA = nullptr; 2880 if (MSSAAnalysis) 2881 MSSA = &MSSAAnalysis->getMSSA(); 2882 2883 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2884 return IVS.run(L); 2885 } 2886 2887 void getAnalysisUsage(AnalysisUsage &AU) const override { 2888 AU.setPreservesCFG(); 2889 AU.addPreserved<MemorySSAWrapperPass>(); 2890 getLoopAnalysisUsage(AU); 2891 } 2892 }; 2893 2894 } // end anonymous namespace 2895 2896 char IndVarSimplifyLegacyPass::ID = 0; 2897 2898 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2899 "Induction Variable Simplification", false, false) 2900 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2901 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2902 "Induction Variable Simplification", false, false) 2903 2904 Pass *llvm::createIndVarSimplifyPass() { 2905 return new IndVarSimplifyLegacyPass(); 2906 } 2907