1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpander.h" 45 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/TargetTransformInfo.h" 48 #include "llvm/Analysis/ValueTracking.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/Constant.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/IRBuilder.h" 58 #include "llvm/IR/InstrTypes.h" 59 #include "llvm/IR/Instruction.h" 60 #include "llvm/IR/Instructions.h" 61 #include "llvm/IR/IntrinsicInst.h" 62 #include "llvm/IR/Intrinsics.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/IR/PassManager.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/Use.h" 69 #include "llvm/IR/User.h" 70 #include "llvm/IR/Value.h" 71 #include "llvm/IR/ValueHandle.h" 72 #include "llvm/InitializePasses.h" 73 #include "llvm/Pass.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/CommandLine.h" 76 #include "llvm/Support/Compiler.h" 77 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/ErrorHandling.h" 79 #include "llvm/Support/MathExtras.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Transforms/Scalar.h" 82 #include "llvm/Transforms/Scalar/LoopPassManager.h" 83 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 84 #include "llvm/Transforms/Utils/Local.h" 85 #include "llvm/Transforms/Utils/LoopUtils.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars")); 107 108 static cl::opt<ReplaceExitVal> ReplaceExitValue( 109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 112 clEnumValN(OnlyCheapRepl, "cheap", 113 "only replace exit value when the cost is cheap"), 114 clEnumValN(NoHardUse, "noharduse", 115 "only replace exit values when loop def likely dead"), 116 clEnumValN(AlwaysRepl, "always", 117 "always replace exit value whenever possible"))); 118 119 static cl::opt<bool> UsePostIncrementRanges( 120 "indvars-post-increment-ranges", cl::Hidden, 121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 122 cl::init(true)); 123 124 static cl::opt<bool> 125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 126 cl::desc("Disable Linear Function Test Replace optimization")); 127 128 static cl::opt<bool> 129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 130 cl::desc("Predicate conditions in read only loops")); 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 MemorySSA *MSSA; 144 std::unique_ptr<MemorySSAUpdater> MSSAU; 145 146 SmallVector<WeakTrackingVH, 16> DeadInsts; 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 /// Try to eliminate loop exits based on analyzeable exit counts 153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 154 /// Try to form loop invariant tests for loop exits by changing how many 155 /// iterations of the loop run when that is unobservable. 156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 157 158 bool rewriteFirstIterationLoopExitValues(Loop *L); 159 160 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 161 const SCEV *ExitCount, 162 PHINode *IndVar, SCEVExpander &Rewriter); 163 164 bool sinkUnusedInvariants(Loop *L); 165 166 public: 167 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 168 const DataLayout &DL, TargetLibraryInfo *TLI, 169 TargetTransformInfo *TTI, MemorySSA *MSSA) 170 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), MSSA(MSSA) { 171 if (MSSA) 172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 173 } 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 /// Determine the insertion point for this user. By default, insert immediately 181 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 182 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 183 /// common dominator for the incoming blocks. A nullptr can be returned if no 184 /// viable location is found: it may happen if User is a PHI and Def only comes 185 /// to this PHI from unreachable blocks. 186 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 187 DominatorTree *DT, LoopInfo *LI) { 188 PHINode *PHI = dyn_cast<PHINode>(User); 189 if (!PHI) 190 return User; 191 192 Instruction *InsertPt = nullptr; 193 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 194 if (PHI->getIncomingValue(i) != Def) 195 continue; 196 197 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 198 199 if (!DT->isReachableFromEntry(InsertBB)) 200 continue; 201 202 if (!InsertPt) { 203 InsertPt = InsertBB->getTerminator(); 204 continue; 205 } 206 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 207 InsertPt = InsertBB->getTerminator(); 208 } 209 210 // If we have skipped all inputs, it means that Def only comes to Phi from 211 // unreachable blocks. 212 if (!InsertPt) 213 return nullptr; 214 215 auto *DefI = dyn_cast<Instruction>(Def); 216 if (!DefI) 217 return InsertPt; 218 219 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 220 221 auto *L = LI->getLoopFor(DefI->getParent()); 222 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 223 224 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 225 if (LI->getLoopFor(DTN->getBlock()) == L) 226 return DTN->getBlock()->getTerminator(); 227 228 llvm_unreachable("DefI dominates InsertPt!"); 229 } 230 231 //===----------------------------------------------------------------------===// 232 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 233 //===----------------------------------------------------------------------===// 234 235 /// Convert APF to an integer, if possible. 236 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 237 bool isExact = false; 238 // See if we can convert this to an int64_t 239 uint64_t UIntVal; 240 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 241 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 242 !isExact) 243 return false; 244 IntVal = UIntVal; 245 return true; 246 } 247 248 /// If the loop has floating induction variable then insert corresponding 249 /// integer induction variable if possible. 250 /// For example, 251 /// for(double i = 0; i < 10000; ++i) 252 /// bar(i) 253 /// is converted into 254 /// for(int i = 0; i < 10000; ++i) 255 /// bar((double)i); 256 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 257 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 258 unsigned BackEdge = IncomingEdge^1; 259 260 // Check incoming value. 261 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 262 263 int64_t InitValue; 264 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 265 return false; 266 267 // Check IV increment. Reject this PN if increment operation is not 268 // an add or increment value can not be represented by an integer. 269 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 270 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 271 272 // If this is not an add of the PHI with a constantfp, or if the constant fp 273 // is not an integer, bail out. 274 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 275 int64_t IncValue; 276 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 277 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 278 return false; 279 280 // Check Incr uses. One user is PN and the other user is an exit condition 281 // used by the conditional terminator. 282 Value::user_iterator IncrUse = Incr->user_begin(); 283 Instruction *U1 = cast<Instruction>(*IncrUse++); 284 if (IncrUse == Incr->user_end()) return false; 285 Instruction *U2 = cast<Instruction>(*IncrUse++); 286 if (IncrUse != Incr->user_end()) return false; 287 288 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 289 // only used by a branch, we can't transform it. 290 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 291 if (!Compare) 292 Compare = dyn_cast<FCmpInst>(U2); 293 if (!Compare || !Compare->hasOneUse() || 294 !isa<BranchInst>(Compare->user_back())) 295 return false; 296 297 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 298 299 // We need to verify that the branch actually controls the iteration count 300 // of the loop. If not, the new IV can overflow and no one will notice. 301 // The branch block must be in the loop and one of the successors must be out 302 // of the loop. 303 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 304 if (!L->contains(TheBr->getParent()) || 305 (L->contains(TheBr->getSuccessor(0)) && 306 L->contains(TheBr->getSuccessor(1)))) 307 return false; 308 309 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 310 // transform it. 311 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 312 int64_t ExitValue; 313 if (ExitValueVal == nullptr || 314 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 315 return false; 316 317 // Find new predicate for integer comparison. 318 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 319 switch (Compare->getPredicate()) { 320 default: return false; // Unknown comparison. 321 case CmpInst::FCMP_OEQ: 322 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 323 case CmpInst::FCMP_ONE: 324 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 325 case CmpInst::FCMP_OGT: 326 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 327 case CmpInst::FCMP_OGE: 328 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 329 case CmpInst::FCMP_OLT: 330 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 331 case CmpInst::FCMP_OLE: 332 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 333 } 334 335 // We convert the floating point induction variable to a signed i32 value if 336 // we can. This is only safe if the comparison will not overflow in a way 337 // that won't be trapped by the integer equivalent operations. Check for this 338 // now. 339 // TODO: We could use i64 if it is native and the range requires it. 340 341 // The start/stride/exit values must all fit in signed i32. 342 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 343 return false; 344 345 // If not actually striding (add x, 0.0), avoid touching the code. 346 if (IncValue == 0) 347 return false; 348 349 // Positive and negative strides have different safety conditions. 350 if (IncValue > 0) { 351 // If we have a positive stride, we require the init to be less than the 352 // exit value. 353 if (InitValue >= ExitValue) 354 return false; 355 356 uint32_t Range = uint32_t(ExitValue-InitValue); 357 // Check for infinite loop, either: 358 // while (i <= Exit) or until (i > Exit) 359 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 360 if (++Range == 0) return false; // Range overflows. 361 } 362 363 unsigned Leftover = Range % uint32_t(IncValue); 364 365 // If this is an equality comparison, we require that the strided value 366 // exactly land on the exit value, otherwise the IV condition will wrap 367 // around and do things the fp IV wouldn't. 368 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 369 Leftover != 0) 370 return false; 371 372 // If the stride would wrap around the i32 before exiting, we can't 373 // transform the IV. 374 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 375 return false; 376 } else { 377 // If we have a negative stride, we require the init to be greater than the 378 // exit value. 379 if (InitValue <= ExitValue) 380 return false; 381 382 uint32_t Range = uint32_t(InitValue-ExitValue); 383 // Check for infinite loop, either: 384 // while (i >= Exit) or until (i < Exit) 385 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 386 if (++Range == 0) return false; // Range overflows. 387 } 388 389 unsigned Leftover = Range % uint32_t(-IncValue); 390 391 // If this is an equality comparison, we require that the strided value 392 // exactly land on the exit value, otherwise the IV condition will wrap 393 // around and do things the fp IV wouldn't. 394 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 395 Leftover != 0) 396 return false; 397 398 // If the stride would wrap around the i32 before exiting, we can't 399 // transform the IV. 400 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 401 return false; 402 } 403 404 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 405 406 // Insert new integer induction variable. 407 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 408 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 409 PN->getIncomingBlock(IncomingEdge)); 410 411 Value *NewAdd = 412 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 413 Incr->getName()+".int", Incr); 414 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 415 416 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 417 ConstantInt::get(Int32Ty, ExitValue), 418 Compare->getName()); 419 420 // In the following deletions, PN may become dead and may be deleted. 421 // Use a WeakTrackingVH to observe whether this happens. 422 WeakTrackingVH WeakPH = PN; 423 424 // Delete the old floating point exit comparison. The branch starts using the 425 // new comparison. 426 NewCompare->takeName(Compare); 427 Compare->replaceAllUsesWith(NewCompare); 428 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 429 430 // Delete the old floating point increment. 431 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 432 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 433 434 // If the FP induction variable still has uses, this is because something else 435 // in the loop uses its value. In order to canonicalize the induction 436 // variable, we chose to eliminate the IV and rewrite it in terms of an 437 // int->fp cast. 438 // 439 // We give preference to sitofp over uitofp because it is faster on most 440 // platforms. 441 if (WeakPH) { 442 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 443 &*PN->getParent()->getFirstInsertionPt()); 444 PN->replaceAllUsesWith(Conv); 445 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 446 } 447 return true; 448 } 449 450 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 451 // First step. Check to see if there are any floating-point recurrences. 452 // If there are, change them into integer recurrences, permitting analysis by 453 // the SCEV routines. 454 BasicBlock *Header = L->getHeader(); 455 456 SmallVector<WeakTrackingVH, 8> PHIs; 457 for (PHINode &PN : Header->phis()) 458 PHIs.push_back(&PN); 459 460 bool Changed = false; 461 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 462 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 463 Changed |= handleFloatingPointIV(L, PN); 464 465 // If the loop previously had floating-point IV, ScalarEvolution 466 // may not have been able to compute a trip count. Now that we've done some 467 // re-writing, the trip count may be computable. 468 if (Changed) 469 SE->forgetLoop(L); 470 return Changed; 471 } 472 473 //===---------------------------------------------------------------------===// 474 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 475 // they will exit at the first iteration. 476 //===---------------------------------------------------------------------===// 477 478 /// Check to see if this loop has loop invariant conditions which lead to loop 479 /// exits. If so, we know that if the exit path is taken, it is at the first 480 /// loop iteration. This lets us predict exit values of PHI nodes that live in 481 /// loop header. 482 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 483 // Verify the input to the pass is already in LCSSA form. 484 assert(L->isLCSSAForm(*DT)); 485 486 SmallVector<BasicBlock *, 8> ExitBlocks; 487 L->getUniqueExitBlocks(ExitBlocks); 488 489 bool MadeAnyChanges = false; 490 for (auto *ExitBB : ExitBlocks) { 491 // If there are no more PHI nodes in this exit block, then no more 492 // values defined inside the loop are used on this path. 493 for (PHINode &PN : ExitBB->phis()) { 494 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 495 IncomingValIdx != E; ++IncomingValIdx) { 496 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 497 498 // Can we prove that the exit must run on the first iteration if it 499 // runs at all? (i.e. early exits are fine for our purposes, but 500 // traces which lead to this exit being taken on the 2nd iteration 501 // aren't.) Note that this is about whether the exit branch is 502 // executed, not about whether it is taken. 503 if (!L->getLoopLatch() || 504 !DT->dominates(IncomingBB, L->getLoopLatch())) 505 continue; 506 507 // Get condition that leads to the exit path. 508 auto *TermInst = IncomingBB->getTerminator(); 509 510 Value *Cond = nullptr; 511 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 512 // Must be a conditional branch, otherwise the block 513 // should not be in the loop. 514 Cond = BI->getCondition(); 515 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 516 Cond = SI->getCondition(); 517 else 518 continue; 519 520 if (!L->isLoopInvariant(Cond)) 521 continue; 522 523 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 524 525 // Only deal with PHIs in the loop header. 526 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 527 continue; 528 529 // If ExitVal is a PHI on the loop header, then we know its 530 // value along this exit because the exit can only be taken 531 // on the first iteration. 532 auto *LoopPreheader = L->getLoopPreheader(); 533 assert(LoopPreheader && "Invalid loop"); 534 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 535 if (PreheaderIdx != -1) { 536 assert(ExitVal->getParent() == L->getHeader() && 537 "ExitVal must be in loop header"); 538 MadeAnyChanges = true; 539 PN.setIncomingValue(IncomingValIdx, 540 ExitVal->getIncomingValue(PreheaderIdx)); 541 } 542 } 543 } 544 } 545 return MadeAnyChanges; 546 } 547 548 //===----------------------------------------------------------------------===// 549 // IV Widening - Extend the width of an IV to cover its widest uses. 550 //===----------------------------------------------------------------------===// 551 552 namespace { 553 554 // Collect information about induction variables that are used by sign/zero 555 // extend operations. This information is recorded by CollectExtend and provides 556 // the input to WidenIV. 557 struct WideIVInfo { 558 PHINode *NarrowIV = nullptr; 559 560 // Widest integer type created [sz]ext 561 Type *WidestNativeType = nullptr; 562 563 // Was a sext user seen before a zext? 564 bool IsSigned = false; 565 }; 566 567 } // end anonymous namespace 568 569 /// Update information about the induction variable that is extended by this 570 /// sign or zero extend operation. This is used to determine the final width of 571 /// the IV before actually widening it. 572 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 573 const TargetTransformInfo *TTI) { 574 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 575 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 576 return; 577 578 Type *Ty = Cast->getType(); 579 uint64_t Width = SE->getTypeSizeInBits(Ty); 580 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 581 return; 582 583 // Check that `Cast` actually extends the induction variable (we rely on this 584 // later). This takes care of cases where `Cast` is extending a truncation of 585 // the narrow induction variable, and thus can end up being narrower than the 586 // "narrow" induction variable. 587 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 588 if (NarrowIVWidth >= Width) 589 return; 590 591 // Cast is either an sext or zext up to this point. 592 // We should not widen an indvar if arithmetics on the wider indvar are more 593 // expensive than those on the narrower indvar. We check only the cost of ADD 594 // because at least an ADD is required to increment the induction variable. We 595 // could compute more comprehensively the cost of all instructions on the 596 // induction variable when necessary. 597 if (TTI && 598 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 599 TTI->getArithmeticInstrCost(Instruction::Add, 600 Cast->getOperand(0)->getType())) { 601 return; 602 } 603 604 if (!WI.WidestNativeType) { 605 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 606 WI.IsSigned = IsSigned; 607 return; 608 } 609 610 // We extend the IV to satisfy the sign of its first user, arbitrarily. 611 if (WI.IsSigned != IsSigned) 612 return; 613 614 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 615 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 616 } 617 618 namespace { 619 620 /// Record a link in the Narrow IV def-use chain along with the WideIV that 621 /// computes the same value as the Narrow IV def. This avoids caching Use* 622 /// pointers. 623 struct NarrowIVDefUse { 624 Instruction *NarrowDef = nullptr; 625 Instruction *NarrowUse = nullptr; 626 Instruction *WideDef = nullptr; 627 628 // True if the narrow def is never negative. Tracking this information lets 629 // us use a sign extension instead of a zero extension or vice versa, when 630 // profitable and legal. 631 bool NeverNegative = false; 632 633 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 634 bool NeverNegative) 635 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 636 NeverNegative(NeverNegative) {} 637 }; 638 639 /// The goal of this transform is to remove sign and zero extends without 640 /// creating any new induction variables. To do this, it creates a new phi of 641 /// the wider type and redirects all users, either removing extends or inserting 642 /// truncs whenever we stop propagating the type. 643 class WidenIV { 644 // Parameters 645 PHINode *OrigPhi; 646 Type *WideType; 647 648 // Context 649 LoopInfo *LI; 650 Loop *L; 651 ScalarEvolution *SE; 652 DominatorTree *DT; 653 654 // Does the module have any calls to the llvm.experimental.guard intrinsic 655 // at all? If not we can avoid scanning instructions looking for guards. 656 bool HasGuards; 657 658 // Result 659 PHINode *WidePhi = nullptr; 660 Instruction *WideInc = nullptr; 661 const SCEV *WideIncExpr = nullptr; 662 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 663 664 SmallPtrSet<Instruction *,16> Widened; 665 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 666 667 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 668 669 // A map tracking the kind of extension used to widen each narrow IV 670 // and narrow IV user. 671 // Key: pointer to a narrow IV or IV user. 672 // Value: the kind of extension used to widen this Instruction. 673 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 674 675 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 676 677 // A map with control-dependent ranges for post increment IV uses. The key is 678 // a pair of IV def and a use of this def denoting the context. The value is 679 // a ConstantRange representing possible values of the def at the given 680 // context. 681 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 682 683 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 684 Instruction *UseI) { 685 DefUserPair Key(Def, UseI); 686 auto It = PostIncRangeInfos.find(Key); 687 return It == PostIncRangeInfos.end() 688 ? Optional<ConstantRange>(None) 689 : Optional<ConstantRange>(It->second); 690 } 691 692 void calculatePostIncRanges(PHINode *OrigPhi); 693 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 694 695 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 696 DefUserPair Key(Def, UseI); 697 auto It = PostIncRangeInfos.find(Key); 698 if (It == PostIncRangeInfos.end()) 699 PostIncRangeInfos.insert({Key, R}); 700 else 701 It->second = R.intersectWith(It->second); 702 } 703 704 public: 705 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 706 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 707 bool HasGuards) 708 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 709 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 710 HasGuards(HasGuards), DeadInsts(DI) { 711 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 712 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 713 } 714 715 PHINode *createWideIV(SCEVExpander &Rewriter); 716 717 protected: 718 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 719 Instruction *Use); 720 721 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 722 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 723 const SCEVAddRecExpr *WideAR); 724 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 725 726 ExtendKind getExtendKind(Instruction *I); 727 728 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 729 730 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 731 732 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 733 734 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 735 unsigned OpCode) const; 736 737 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 738 739 bool widenLoopCompare(NarrowIVDefUse DU); 740 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 741 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 742 743 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 744 }; 745 746 } // end anonymous namespace 747 748 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 749 bool IsSigned, Instruction *Use) { 750 // Set the debug location and conservative insertion point. 751 IRBuilder<> Builder(Use); 752 // Hoist the insertion point into loop preheaders as far as possible. 753 for (const Loop *L = LI->getLoopFor(Use->getParent()); 754 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 755 L = L->getParentLoop()) 756 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 757 758 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 759 Builder.CreateZExt(NarrowOper, WideType); 760 } 761 762 /// Instantiate a wide operation to replace a narrow operation. This only needs 763 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 764 /// 0 for any operation we decide not to clone. 765 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 766 const SCEVAddRecExpr *WideAR) { 767 unsigned Opcode = DU.NarrowUse->getOpcode(); 768 switch (Opcode) { 769 default: 770 return nullptr; 771 case Instruction::Add: 772 case Instruction::Mul: 773 case Instruction::UDiv: 774 case Instruction::Sub: 775 return cloneArithmeticIVUser(DU, WideAR); 776 777 case Instruction::And: 778 case Instruction::Or: 779 case Instruction::Xor: 780 case Instruction::Shl: 781 case Instruction::LShr: 782 case Instruction::AShr: 783 return cloneBitwiseIVUser(DU); 784 } 785 } 786 787 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 788 Instruction *NarrowUse = DU.NarrowUse; 789 Instruction *NarrowDef = DU.NarrowDef; 790 Instruction *WideDef = DU.WideDef; 791 792 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 793 794 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 795 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 796 // invariant and will be folded or hoisted. If it actually comes from a 797 // widened IV, it should be removed during a future call to widenIVUse. 798 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 799 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 800 ? WideDef 801 : createExtendInst(NarrowUse->getOperand(0), WideType, 802 IsSigned, NarrowUse); 803 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 804 ? WideDef 805 : createExtendInst(NarrowUse->getOperand(1), WideType, 806 IsSigned, NarrowUse); 807 808 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 809 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 810 NarrowBO->getName()); 811 IRBuilder<> Builder(NarrowUse); 812 Builder.Insert(WideBO); 813 WideBO->copyIRFlags(NarrowBO); 814 return WideBO; 815 } 816 817 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 818 const SCEVAddRecExpr *WideAR) { 819 Instruction *NarrowUse = DU.NarrowUse; 820 Instruction *NarrowDef = DU.NarrowDef; 821 Instruction *WideDef = DU.WideDef; 822 823 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 824 825 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 826 827 // We're trying to find X such that 828 // 829 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 830 // 831 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 832 // and check using SCEV if any of them are correct. 833 834 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 835 // correct solution to X. 836 auto GuessNonIVOperand = [&](bool SignExt) { 837 const SCEV *WideLHS; 838 const SCEV *WideRHS; 839 840 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 841 if (SignExt) 842 return SE->getSignExtendExpr(S, Ty); 843 return SE->getZeroExtendExpr(S, Ty); 844 }; 845 846 if (IVOpIdx == 0) { 847 WideLHS = SE->getSCEV(WideDef); 848 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 849 WideRHS = GetExtend(NarrowRHS, WideType); 850 } else { 851 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 852 WideLHS = GetExtend(NarrowLHS, WideType); 853 WideRHS = SE->getSCEV(WideDef); 854 } 855 856 // WideUse is "WideDef `op.wide` X" as described in the comment. 857 const SCEV *WideUse = nullptr; 858 859 switch (NarrowUse->getOpcode()) { 860 default: 861 llvm_unreachable("No other possibility!"); 862 863 case Instruction::Add: 864 WideUse = SE->getAddExpr(WideLHS, WideRHS); 865 break; 866 867 case Instruction::Mul: 868 WideUse = SE->getMulExpr(WideLHS, WideRHS); 869 break; 870 871 case Instruction::UDiv: 872 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 873 break; 874 875 case Instruction::Sub: 876 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 877 break; 878 } 879 880 return WideUse == WideAR; 881 }; 882 883 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 884 if (!GuessNonIVOperand(SignExtend)) { 885 SignExtend = !SignExtend; 886 if (!GuessNonIVOperand(SignExtend)) 887 return nullptr; 888 } 889 890 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 891 ? WideDef 892 : createExtendInst(NarrowUse->getOperand(0), WideType, 893 SignExtend, NarrowUse); 894 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 895 ? WideDef 896 : createExtendInst(NarrowUse->getOperand(1), WideType, 897 SignExtend, NarrowUse); 898 899 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 900 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 901 NarrowBO->getName()); 902 903 IRBuilder<> Builder(NarrowUse); 904 Builder.Insert(WideBO); 905 WideBO->copyIRFlags(NarrowBO); 906 return WideBO; 907 } 908 909 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 910 auto It = ExtendKindMap.find(I); 911 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 912 return It->second; 913 } 914 915 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 916 unsigned OpCode) const { 917 if (OpCode == Instruction::Add) 918 return SE->getAddExpr(LHS, RHS); 919 if (OpCode == Instruction::Sub) 920 return SE->getMinusSCEV(LHS, RHS); 921 if (OpCode == Instruction::Mul) 922 return SE->getMulExpr(LHS, RHS); 923 924 llvm_unreachable("Unsupported opcode."); 925 } 926 927 /// No-wrap operations can transfer sign extension of their result to their 928 /// operands. Generate the SCEV value for the widened operation without 929 /// actually modifying the IR yet. If the expression after extending the 930 /// operands is an AddRec for this loop, return the AddRec and the kind of 931 /// extension used. 932 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 933 // Handle the common case of add<nsw/nuw> 934 const unsigned OpCode = DU.NarrowUse->getOpcode(); 935 // Only Add/Sub/Mul instructions supported yet. 936 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 937 OpCode != Instruction::Mul) 938 return {nullptr, Unknown}; 939 940 // One operand (NarrowDef) has already been extended to WideDef. Now determine 941 // if extending the other will lead to a recurrence. 942 const unsigned ExtendOperIdx = 943 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 944 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 945 946 const SCEV *ExtendOperExpr = nullptr; 947 const OverflowingBinaryOperator *OBO = 948 cast<OverflowingBinaryOperator>(DU.NarrowUse); 949 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 950 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 951 ExtendOperExpr = SE->getSignExtendExpr( 952 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 953 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 954 ExtendOperExpr = SE->getZeroExtendExpr( 955 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 956 else 957 return {nullptr, Unknown}; 958 959 // When creating this SCEV expr, don't apply the current operations NSW or NUW 960 // flags. This instruction may be guarded by control flow that the no-wrap 961 // behavior depends on. Non-control-equivalent instructions can be mapped to 962 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 963 // semantics to those operations. 964 const SCEV *lhs = SE->getSCEV(DU.WideDef); 965 const SCEV *rhs = ExtendOperExpr; 966 967 // Let's swap operands to the initial order for the case of non-commutative 968 // operations, like SUB. See PR21014. 969 if (ExtendOperIdx == 0) 970 std::swap(lhs, rhs); 971 const SCEVAddRecExpr *AddRec = 972 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 973 974 if (!AddRec || AddRec->getLoop() != L) 975 return {nullptr, Unknown}; 976 977 return {AddRec, ExtKind}; 978 } 979 980 /// Is this instruction potentially interesting for further simplification after 981 /// widening it's type? In other words, can the extend be safely hoisted out of 982 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 983 /// so, return the extended recurrence and the kind of extension used. Otherwise 984 /// return {nullptr, Unknown}. 985 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 986 if (!SE->isSCEVable(DU.NarrowUse->getType())) 987 return {nullptr, Unknown}; 988 989 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 990 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 991 SE->getTypeSizeInBits(WideType)) { 992 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 993 // index. So don't follow this use. 994 return {nullptr, Unknown}; 995 } 996 997 const SCEV *WideExpr; 998 ExtendKind ExtKind; 999 if (DU.NeverNegative) { 1000 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1001 if (isa<SCEVAddRecExpr>(WideExpr)) 1002 ExtKind = SignExtended; 1003 else { 1004 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1005 ExtKind = ZeroExtended; 1006 } 1007 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1008 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1009 ExtKind = SignExtended; 1010 } else { 1011 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1012 ExtKind = ZeroExtended; 1013 } 1014 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1015 if (!AddRec || AddRec->getLoop() != L) 1016 return {nullptr, Unknown}; 1017 return {AddRec, ExtKind}; 1018 } 1019 1020 /// This IV user cannot be widened. Replace this use of the original narrow IV 1021 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1022 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1023 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1024 if (!InsertPt) 1025 return; 1026 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1027 << *DU.NarrowUse << "\n"); 1028 IRBuilder<> Builder(InsertPt); 1029 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1030 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1031 } 1032 1033 /// If the narrow use is a compare instruction, then widen the compare 1034 // (and possibly the other operand). The extend operation is hoisted into the 1035 // loop preheader as far as possible. 1036 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1037 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1038 if (!Cmp) 1039 return false; 1040 1041 // We can legally widen the comparison in the following two cases: 1042 // 1043 // - The signedness of the IV extension and comparison match 1044 // 1045 // - The narrow IV is always positive (and thus its sign extension is equal 1046 // to its zero extension). For instance, let's say we're zero extending 1047 // %narrow for the following use 1048 // 1049 // icmp slt i32 %narrow, %val ... (A) 1050 // 1051 // and %narrow is always positive. Then 1052 // 1053 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1054 // == icmp slt i32 zext(%narrow), sext(%val) 1055 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1056 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1057 return false; 1058 1059 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1060 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1061 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1062 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1063 1064 // Widen the compare instruction. 1065 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1066 if (!InsertPt) 1067 return false; 1068 IRBuilder<> Builder(InsertPt); 1069 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1070 1071 // Widen the other operand of the compare, if necessary. 1072 if (CastWidth < IVWidth) { 1073 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1074 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1075 } 1076 return true; 1077 } 1078 1079 /// If the narrow use is an instruction whose two operands are the defining 1080 /// instruction of DU and a load instruction, then we have the following: 1081 /// if the load is hoisted outside the loop, then we do not reach this function 1082 /// as scalar evolution analysis works fine in widenIVUse with variables 1083 /// hoisted outside the loop and efficient code is subsequently generated by 1084 /// not emitting truncate instructions. But when the load is not hoisted 1085 /// (whether due to limitation in alias analysis or due to a true legality), 1086 /// then scalar evolution can not proceed with loop variant values and 1087 /// inefficient code is generated. This function handles the non-hoisted load 1088 /// special case by making the optimization generate the same type of code for 1089 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1090 /// instruction). This special case is important especially when the induction 1091 /// variables are affecting addressing mode in code generation. 1092 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1093 Instruction *NarrowUse = DU.NarrowUse; 1094 Instruction *NarrowDef = DU.NarrowDef; 1095 Instruction *WideDef = DU.WideDef; 1096 1097 // Handle the common case of add<nsw/nuw> 1098 const unsigned OpCode = NarrowUse->getOpcode(); 1099 // Only Add/Sub/Mul instructions are supported. 1100 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1101 OpCode != Instruction::Mul) 1102 return false; 1103 1104 // The operand that is not defined by NarrowDef of DU. Let's call it the 1105 // other operand. 1106 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1107 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1108 "bad DU"); 1109 1110 const SCEV *ExtendOperExpr = nullptr; 1111 const OverflowingBinaryOperator *OBO = 1112 cast<OverflowingBinaryOperator>(NarrowUse); 1113 ExtendKind ExtKind = getExtendKind(NarrowDef); 1114 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1115 ExtendOperExpr = SE->getSignExtendExpr( 1116 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1117 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1118 ExtendOperExpr = SE->getZeroExtendExpr( 1119 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1120 else 1121 return false; 1122 1123 // We are interested in the other operand being a load instruction. 1124 // But, we should look into relaxing this restriction later on. 1125 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1126 if (I && I->getOpcode() != Instruction::Load) 1127 return false; 1128 1129 // Verifying that Defining operand is an AddRec 1130 const SCEV *Op1 = SE->getSCEV(WideDef); 1131 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1132 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1133 return false; 1134 // Verifying that other operand is an Extend. 1135 if (ExtKind == SignExtended) { 1136 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1137 return false; 1138 } else { 1139 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1140 return false; 1141 } 1142 1143 if (ExtKind == SignExtended) { 1144 for (Use &U : NarrowUse->uses()) { 1145 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1146 if (!User || User->getType() != WideType) 1147 return false; 1148 } 1149 } else { // ExtKind == ZeroExtended 1150 for (Use &U : NarrowUse->uses()) { 1151 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1152 if (!User || User->getType() != WideType) 1153 return false; 1154 } 1155 } 1156 1157 return true; 1158 } 1159 1160 /// Special Case for widening with variant Loads (see 1161 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1162 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1163 Instruction *NarrowUse = DU.NarrowUse; 1164 Instruction *NarrowDef = DU.NarrowDef; 1165 Instruction *WideDef = DU.WideDef; 1166 1167 ExtendKind ExtKind = getExtendKind(NarrowDef); 1168 1169 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1170 1171 // Generating a widening use instruction. 1172 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1173 ? WideDef 1174 : createExtendInst(NarrowUse->getOperand(0), WideType, 1175 ExtKind, NarrowUse); 1176 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1177 ? WideDef 1178 : createExtendInst(NarrowUse->getOperand(1), WideType, 1179 ExtKind, NarrowUse); 1180 1181 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1182 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1183 NarrowBO->getName()); 1184 IRBuilder<> Builder(NarrowUse); 1185 Builder.Insert(WideBO); 1186 WideBO->copyIRFlags(NarrowBO); 1187 1188 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1189 1190 ExtendKindMap[NarrowUse] = ExtKind; 1191 1192 for (Use &U : NarrowUse->uses()) { 1193 Instruction *User = nullptr; 1194 if (ExtKind == SignExtended) 1195 User = dyn_cast<SExtInst>(U.getUser()); 1196 else 1197 User = dyn_cast<ZExtInst>(U.getUser()); 1198 if (User && User->getType() == WideType) { 1199 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1200 << *WideBO << "\n"); 1201 ++NumElimExt; 1202 User->replaceAllUsesWith(WideBO); 1203 DeadInsts.emplace_back(User); 1204 } 1205 } 1206 } 1207 1208 /// Determine whether an individual user of the narrow IV can be widened. If so, 1209 /// return the wide clone of the user. 1210 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1211 assert(ExtendKindMap.count(DU.NarrowDef) && 1212 "Should already know the kind of extension used to widen NarrowDef"); 1213 1214 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1215 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1216 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1217 // For LCSSA phis, sink the truncate outside the loop. 1218 // After SimplifyCFG most loop exit targets have a single predecessor. 1219 // Otherwise fall back to a truncate within the loop. 1220 if (UsePhi->getNumOperands() != 1) 1221 truncateIVUse(DU, DT, LI); 1222 else { 1223 // Widening the PHI requires us to insert a trunc. The logical place 1224 // for this trunc is in the same BB as the PHI. This is not possible if 1225 // the BB is terminated by a catchswitch. 1226 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1227 return nullptr; 1228 1229 PHINode *WidePhi = 1230 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1231 UsePhi); 1232 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1233 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1234 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1235 UsePhi->replaceAllUsesWith(Trunc); 1236 DeadInsts.emplace_back(UsePhi); 1237 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1238 << *WidePhi << "\n"); 1239 } 1240 return nullptr; 1241 } 1242 } 1243 1244 // This narrow use can be widened by a sext if it's non-negative or its narrow 1245 // def was widended by a sext. Same for zext. 1246 auto canWidenBySExt = [&]() { 1247 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1248 }; 1249 auto canWidenByZExt = [&]() { 1250 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1251 }; 1252 1253 // Our raison d'etre! Eliminate sign and zero extension. 1254 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1255 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1256 Value *NewDef = DU.WideDef; 1257 if (DU.NarrowUse->getType() != WideType) { 1258 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1259 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1260 if (CastWidth < IVWidth) { 1261 // The cast isn't as wide as the IV, so insert a Trunc. 1262 IRBuilder<> Builder(DU.NarrowUse); 1263 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1264 } 1265 else { 1266 // A wider extend was hidden behind a narrower one. This may induce 1267 // another round of IV widening in which the intermediate IV becomes 1268 // dead. It should be very rare. 1269 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1270 << " not wide enough to subsume " << *DU.NarrowUse 1271 << "\n"); 1272 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1273 NewDef = DU.NarrowUse; 1274 } 1275 } 1276 if (NewDef != DU.NarrowUse) { 1277 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1278 << " replaced by " << *DU.WideDef << "\n"); 1279 ++NumElimExt; 1280 DU.NarrowUse->replaceAllUsesWith(NewDef); 1281 DeadInsts.emplace_back(DU.NarrowUse); 1282 } 1283 // Now that the extend is gone, we want to expose it's uses for potential 1284 // further simplification. We don't need to directly inform SimplifyIVUsers 1285 // of the new users, because their parent IV will be processed later as a 1286 // new loop phi. If we preserved IVUsers analysis, we would also want to 1287 // push the uses of WideDef here. 1288 1289 // No further widening is needed. The deceased [sz]ext had done it for us. 1290 return nullptr; 1291 } 1292 1293 // Does this user itself evaluate to a recurrence after widening? 1294 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1295 if (!WideAddRec.first) 1296 WideAddRec = getWideRecurrence(DU); 1297 1298 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1299 if (!WideAddRec.first) { 1300 // If use is a loop condition, try to promote the condition instead of 1301 // truncating the IV first. 1302 if (widenLoopCompare(DU)) 1303 return nullptr; 1304 1305 // We are here about to generate a truncate instruction that may hurt 1306 // performance because the scalar evolution expression computed earlier 1307 // in WideAddRec.first does not indicate a polynomial induction expression. 1308 // In that case, look at the operands of the use instruction to determine 1309 // if we can still widen the use instead of truncating its operand. 1310 if (widenWithVariantLoadUse(DU)) { 1311 widenWithVariantLoadUseCodegen(DU); 1312 return nullptr; 1313 } 1314 1315 // This user does not evaluate to a recurrence after widening, so don't 1316 // follow it. Instead insert a Trunc to kill off the original use, 1317 // eventually isolating the original narrow IV so it can be removed. 1318 truncateIVUse(DU, DT, LI); 1319 return nullptr; 1320 } 1321 // Assume block terminators cannot evaluate to a recurrence. We can't to 1322 // insert a Trunc after a terminator if there happens to be a critical edge. 1323 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1324 "SCEV is not expected to evaluate a block terminator"); 1325 1326 // Reuse the IV increment that SCEVExpander created as long as it dominates 1327 // NarrowUse. 1328 Instruction *WideUse = nullptr; 1329 if (WideAddRec.first == WideIncExpr && 1330 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1331 WideUse = WideInc; 1332 else { 1333 WideUse = cloneIVUser(DU, WideAddRec.first); 1334 if (!WideUse) 1335 return nullptr; 1336 } 1337 // Evaluation of WideAddRec ensured that the narrow expression could be 1338 // extended outside the loop without overflow. This suggests that the wide use 1339 // evaluates to the same expression as the extended narrow use, but doesn't 1340 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1341 // where it fails, we simply throw away the newly created wide use. 1342 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1343 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1344 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1345 << "\n"); 1346 DeadInsts.emplace_back(WideUse); 1347 return nullptr; 1348 } 1349 1350 // if we reached this point then we are going to replace 1351 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1352 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1353 1354 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1355 // Returning WideUse pushes it on the worklist. 1356 return WideUse; 1357 } 1358 1359 /// Add eligible users of NarrowDef to NarrowIVUsers. 1360 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1361 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1362 bool NonNegativeDef = 1363 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1364 SE->getConstant(NarrowSCEV->getType(), 0)); 1365 for (User *U : NarrowDef->users()) { 1366 Instruction *NarrowUser = cast<Instruction>(U); 1367 1368 // Handle data flow merges and bizarre phi cycles. 1369 if (!Widened.insert(NarrowUser).second) 1370 continue; 1371 1372 bool NonNegativeUse = false; 1373 if (!NonNegativeDef) { 1374 // We might have a control-dependent range information for this context. 1375 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1376 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1377 } 1378 1379 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1380 NonNegativeDef || NonNegativeUse); 1381 } 1382 } 1383 1384 /// Process a single induction variable. First use the SCEVExpander to create a 1385 /// wide induction variable that evaluates to the same recurrence as the 1386 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1387 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1388 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1389 /// 1390 /// It would be simpler to delete uses as they are processed, but we must avoid 1391 /// invalidating SCEV expressions. 1392 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1393 // Is this phi an induction variable? 1394 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1395 if (!AddRec) 1396 return nullptr; 1397 1398 // Widen the induction variable expression. 1399 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1400 ? SE->getSignExtendExpr(AddRec, WideType) 1401 : SE->getZeroExtendExpr(AddRec, WideType); 1402 1403 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1404 "Expect the new IV expression to preserve its type"); 1405 1406 // Can the IV be extended outside the loop without overflow? 1407 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1408 if (!AddRec || AddRec->getLoop() != L) 1409 return nullptr; 1410 1411 // An AddRec must have loop-invariant operands. Since this AddRec is 1412 // materialized by a loop header phi, the expression cannot have any post-loop 1413 // operands, so they must dominate the loop header. 1414 assert( 1415 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1416 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1417 "Loop header phi recurrence inputs do not dominate the loop"); 1418 1419 // Iterate over IV uses (including transitive ones) looking for IV increments 1420 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1421 // the increment calculate control-dependent range information basing on 1422 // dominating conditions inside of the loop (e.g. a range check inside of the 1423 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1424 // 1425 // Control-dependent range information is later used to prove that a narrow 1426 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1427 // this on demand because when pushNarrowIVUsers needs this information some 1428 // of the dominating conditions might be already widened. 1429 if (UsePostIncrementRanges) 1430 calculatePostIncRanges(OrigPhi); 1431 1432 // The rewriter provides a value for the desired IV expression. This may 1433 // either find an existing phi or materialize a new one. Either way, we 1434 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1435 // of the phi-SCC dominates the loop entry. 1436 Instruction *InsertPt = &L->getHeader()->front(); 1437 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1438 1439 // Remembering the WideIV increment generated by SCEVExpander allows 1440 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1441 // employ a general reuse mechanism because the call above is the only call to 1442 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1443 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1444 WideInc = 1445 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1446 WideIncExpr = SE->getSCEV(WideInc); 1447 // Propagate the debug location associated with the original loop increment 1448 // to the new (widened) increment. 1449 auto *OrigInc = 1450 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1451 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1452 } 1453 1454 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1455 ++NumWidened; 1456 1457 // Traverse the def-use chain using a worklist starting at the original IV. 1458 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1459 1460 Widened.insert(OrigPhi); 1461 pushNarrowIVUsers(OrigPhi, WidePhi); 1462 1463 while (!NarrowIVUsers.empty()) { 1464 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1465 1466 // Process a def-use edge. This may replace the use, so don't hold a 1467 // use_iterator across it. 1468 Instruction *WideUse = widenIVUse(DU, Rewriter); 1469 1470 // Follow all def-use edges from the previous narrow use. 1471 if (WideUse) 1472 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1473 1474 // widenIVUse may have removed the def-use edge. 1475 if (DU.NarrowDef->use_empty()) 1476 DeadInsts.emplace_back(DU.NarrowDef); 1477 } 1478 1479 // Attach any debug information to the new PHI. 1480 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1481 1482 return WidePhi; 1483 } 1484 1485 /// Calculates control-dependent range for the given def at the given context 1486 /// by looking at dominating conditions inside of the loop 1487 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1488 Instruction *NarrowUser) { 1489 using namespace llvm::PatternMatch; 1490 1491 Value *NarrowDefLHS; 1492 const APInt *NarrowDefRHS; 1493 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1494 m_APInt(NarrowDefRHS))) || 1495 !NarrowDefRHS->isNonNegative()) 1496 return; 1497 1498 auto UpdateRangeFromCondition = [&] (Value *Condition, 1499 bool TrueDest) { 1500 CmpInst::Predicate Pred; 1501 Value *CmpRHS; 1502 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1503 m_Value(CmpRHS)))) 1504 return; 1505 1506 CmpInst::Predicate P = 1507 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1508 1509 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1510 auto CmpConstrainedLHSRange = 1511 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1512 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1513 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1514 1515 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1516 }; 1517 1518 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1519 if (!HasGuards) 1520 return; 1521 1522 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1523 Ctx->getParent()->rend())) { 1524 Value *C = nullptr; 1525 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1526 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1527 } 1528 }; 1529 1530 UpdateRangeFromGuards(NarrowUser); 1531 1532 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1533 // If NarrowUserBB is statically unreachable asking dominator queries may 1534 // yield surprising results. (e.g. the block may not have a dom tree node) 1535 if (!DT->isReachableFromEntry(NarrowUserBB)) 1536 return; 1537 1538 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1539 L->contains(DTB->getBlock()); 1540 DTB = DTB->getIDom()) { 1541 auto *BB = DTB->getBlock(); 1542 auto *TI = BB->getTerminator(); 1543 UpdateRangeFromGuards(TI); 1544 1545 auto *BI = dyn_cast<BranchInst>(TI); 1546 if (!BI || !BI->isConditional()) 1547 continue; 1548 1549 auto *TrueSuccessor = BI->getSuccessor(0); 1550 auto *FalseSuccessor = BI->getSuccessor(1); 1551 1552 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1553 return BBE.isSingleEdge() && 1554 DT->dominates(BBE, NarrowUser->getParent()); 1555 }; 1556 1557 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1558 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1559 1560 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1561 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1562 } 1563 } 1564 1565 /// Calculates PostIncRangeInfos map for the given IV 1566 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1567 SmallPtrSet<Instruction *, 16> Visited; 1568 SmallVector<Instruction *, 6> Worklist; 1569 Worklist.push_back(OrigPhi); 1570 Visited.insert(OrigPhi); 1571 1572 while (!Worklist.empty()) { 1573 Instruction *NarrowDef = Worklist.pop_back_val(); 1574 1575 for (Use &U : NarrowDef->uses()) { 1576 auto *NarrowUser = cast<Instruction>(U.getUser()); 1577 1578 // Don't go looking outside the current loop. 1579 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1580 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1581 continue; 1582 1583 if (!Visited.insert(NarrowUser).second) 1584 continue; 1585 1586 Worklist.push_back(NarrowUser); 1587 1588 calculatePostIncRange(NarrowDef, NarrowUser); 1589 } 1590 } 1591 } 1592 1593 //===----------------------------------------------------------------------===// 1594 // Live IV Reduction - Minimize IVs live across the loop. 1595 //===----------------------------------------------------------------------===// 1596 1597 //===----------------------------------------------------------------------===// 1598 // Simplification of IV users based on SCEV evaluation. 1599 //===----------------------------------------------------------------------===// 1600 1601 namespace { 1602 1603 class IndVarSimplifyVisitor : public IVVisitor { 1604 ScalarEvolution *SE; 1605 const TargetTransformInfo *TTI; 1606 PHINode *IVPhi; 1607 1608 public: 1609 WideIVInfo WI; 1610 1611 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1612 const TargetTransformInfo *TTI, 1613 const DominatorTree *DTree) 1614 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1615 DT = DTree; 1616 WI.NarrowIV = IVPhi; 1617 } 1618 1619 // Implement the interface used by simplifyUsersOfIV. 1620 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1621 }; 1622 1623 } // end anonymous namespace 1624 1625 /// Iteratively perform simplification on a worklist of IV users. Each 1626 /// successive simplification may push more users which may themselves be 1627 /// candidates for simplification. 1628 /// 1629 /// Sign/Zero extend elimination is interleaved with IV simplification. 1630 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1631 SCEVExpander &Rewriter, 1632 LoopInfo *LI) { 1633 SmallVector<WideIVInfo, 8> WideIVs; 1634 1635 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1636 Intrinsic::getName(Intrinsic::experimental_guard)); 1637 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1638 1639 SmallVector<PHINode*, 8> LoopPhis; 1640 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1641 LoopPhis.push_back(cast<PHINode>(I)); 1642 } 1643 // Each round of simplification iterates through the SimplifyIVUsers worklist 1644 // for all current phis, then determines whether any IVs can be 1645 // widened. Widening adds new phis to LoopPhis, inducing another round of 1646 // simplification on the wide IVs. 1647 bool Changed = false; 1648 while (!LoopPhis.empty()) { 1649 // Evaluate as many IV expressions as possible before widening any IVs. This 1650 // forces SCEV to set no-wrap flags before evaluating sign/zero 1651 // extension. The first time SCEV attempts to normalize sign/zero extension, 1652 // the result becomes final. So for the most predictable results, we delay 1653 // evaluation of sign/zero extend evaluation until needed, and avoid running 1654 // other SCEV based analysis prior to simplifyAndExtend. 1655 do { 1656 PHINode *CurrIV = LoopPhis.pop_back_val(); 1657 1658 // Information about sign/zero extensions of CurrIV. 1659 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1660 1661 Changed |= 1662 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1663 1664 if (Visitor.WI.WidestNativeType) { 1665 WideIVs.push_back(Visitor.WI); 1666 } 1667 } while(!LoopPhis.empty()); 1668 1669 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1670 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1671 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1672 Changed = true; 1673 LoopPhis.push_back(WidePhi); 1674 } 1675 } 1676 } 1677 return Changed; 1678 } 1679 1680 //===----------------------------------------------------------------------===// 1681 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1682 //===----------------------------------------------------------------------===// 1683 1684 /// Given an Value which is hoped to be part of an add recurance in the given 1685 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1686 /// that this is less general than SCEVs AddRec checking. 1687 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1688 Instruction *IncI = dyn_cast<Instruction>(IncV); 1689 if (!IncI) 1690 return nullptr; 1691 1692 switch (IncI->getOpcode()) { 1693 case Instruction::Add: 1694 case Instruction::Sub: 1695 break; 1696 case Instruction::GetElementPtr: 1697 // An IV counter must preserve its type. 1698 if (IncI->getNumOperands() == 2) 1699 break; 1700 LLVM_FALLTHROUGH; 1701 default: 1702 return nullptr; 1703 } 1704 1705 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1706 if (Phi && Phi->getParent() == L->getHeader()) { 1707 if (L->isLoopInvariant(IncI->getOperand(1))) 1708 return Phi; 1709 return nullptr; 1710 } 1711 if (IncI->getOpcode() == Instruction::GetElementPtr) 1712 return nullptr; 1713 1714 // Allow add/sub to be commuted. 1715 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1716 if (Phi && Phi->getParent() == L->getHeader()) { 1717 if (L->isLoopInvariant(IncI->getOperand(0))) 1718 return Phi; 1719 } 1720 return nullptr; 1721 } 1722 1723 /// Whether the current loop exit test is based on this value. Currently this 1724 /// is limited to a direct use in the loop condition. 1725 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1726 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1727 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1728 // TODO: Allow non-icmp loop test. 1729 if (!ICmp) 1730 return false; 1731 1732 // TODO: Allow indirect use. 1733 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1734 } 1735 1736 /// linearFunctionTestReplace policy. Return true unless we can show that the 1737 /// current exit test is already sufficiently canonical. 1738 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1739 assert(L->getLoopLatch() && "Must be in simplified form"); 1740 1741 // Avoid converting a constant or loop invariant test back to a runtime 1742 // test. This is critical for when SCEV's cached ExitCount is less precise 1743 // than the current IR (such as after we've proven a particular exit is 1744 // actually dead and thus the BE count never reaches our ExitCount.) 1745 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1746 if (L->isLoopInvariant(BI->getCondition())) 1747 return false; 1748 1749 // Do LFTR to simplify the exit condition to an ICMP. 1750 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1751 if (!Cond) 1752 return true; 1753 1754 // Do LFTR to simplify the exit ICMP to EQ/NE 1755 ICmpInst::Predicate Pred = Cond->getPredicate(); 1756 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1757 return true; 1758 1759 // Look for a loop invariant RHS 1760 Value *LHS = Cond->getOperand(0); 1761 Value *RHS = Cond->getOperand(1); 1762 if (!L->isLoopInvariant(RHS)) { 1763 if (!L->isLoopInvariant(LHS)) 1764 return true; 1765 std::swap(LHS, RHS); 1766 } 1767 // Look for a simple IV counter LHS 1768 PHINode *Phi = dyn_cast<PHINode>(LHS); 1769 if (!Phi) 1770 Phi = getLoopPhiForCounter(LHS, L); 1771 1772 if (!Phi) 1773 return true; 1774 1775 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1776 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1777 if (Idx < 0) 1778 return true; 1779 1780 // Do LFTR if the exit condition's IV is *not* a simple counter. 1781 Value *IncV = Phi->getIncomingValue(Idx); 1782 return Phi != getLoopPhiForCounter(IncV, L); 1783 } 1784 1785 /// Return true if undefined behavior would provable be executed on the path to 1786 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1787 /// anything about whether OnPathTo is actually executed or whether Root is 1788 /// actually poison. This can be used to assess whether a new use of Root can 1789 /// be added at a location which is control equivalent with OnPathTo (such as 1790 /// immediately before it) without introducing UB which didn't previously 1791 /// exist. Note that a false result conveys no information. 1792 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1793 Instruction *OnPathTo, 1794 DominatorTree *DT) { 1795 // Basic approach is to assume Root is poison, propagate poison forward 1796 // through all users we can easily track, and then check whether any of those 1797 // users are provable UB and must execute before out exiting block might 1798 // exit. 1799 1800 // The set of all recursive users we've visited (which are assumed to all be 1801 // poison because of said visit) 1802 SmallSet<const Value *, 16> KnownPoison; 1803 SmallVector<const Instruction*, 16> Worklist; 1804 Worklist.push_back(Root); 1805 while (!Worklist.empty()) { 1806 const Instruction *I = Worklist.pop_back_val(); 1807 1808 // If we know this must trigger UB on a path leading our target. 1809 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1810 return true; 1811 1812 // If we can't analyze propagation through this instruction, just skip it 1813 // and transitive users. Safe as false is a conservative result. 1814 if (!propagatesFullPoison(I) && I != Root) 1815 continue; 1816 1817 if (KnownPoison.insert(I).second) 1818 for (const User *User : I->users()) 1819 Worklist.push_back(cast<Instruction>(User)); 1820 } 1821 1822 // Might be non-UB, or might have a path we couldn't prove must execute on 1823 // way to exiting bb. 1824 return false; 1825 } 1826 1827 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1828 /// down to checking that all operands are constant and listing instructions 1829 /// that may hide undef. 1830 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1831 unsigned Depth) { 1832 if (isa<Constant>(V)) 1833 return !isa<UndefValue>(V); 1834 1835 if (Depth >= 6) 1836 return false; 1837 1838 // Conservatively handle non-constant non-instructions. For example, Arguments 1839 // may be undef. 1840 Instruction *I = dyn_cast<Instruction>(V); 1841 if (!I) 1842 return false; 1843 1844 // Load and return values may be undef. 1845 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1846 return false; 1847 1848 // Optimistically handle other instructions. 1849 for (Value *Op : I->operands()) { 1850 if (!Visited.insert(Op).second) 1851 continue; 1852 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1853 return false; 1854 } 1855 return true; 1856 } 1857 1858 /// Return true if the given value is concrete. We must prove that undef can 1859 /// never reach it. 1860 /// 1861 /// TODO: If we decide that this is a good approach to checking for undef, we 1862 /// may factor it into a common location. 1863 static bool hasConcreteDef(Value *V) { 1864 SmallPtrSet<Value*, 8> Visited; 1865 Visited.insert(V); 1866 return hasConcreteDefImpl(V, Visited, 0); 1867 } 1868 1869 /// Return true if this IV has any uses other than the (soon to be rewritten) 1870 /// loop exit test. 1871 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1872 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1873 Value *IncV = Phi->getIncomingValue(LatchIdx); 1874 1875 for (User *U : Phi->users()) 1876 if (U != Cond && U != IncV) return false; 1877 1878 for (User *U : IncV->users()) 1879 if (U != Cond && U != Phi) return false; 1880 return true; 1881 } 1882 1883 /// Return true if the given phi is a "counter" in L. A counter is an 1884 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1885 /// step of 1. Note that L must have exactly one latch. 1886 static bool isLoopCounter(PHINode* Phi, Loop *L, 1887 ScalarEvolution *SE) { 1888 assert(Phi->getParent() == L->getHeader()); 1889 assert(L->getLoopLatch()); 1890 1891 if (!SE->isSCEVable(Phi->getType())) 1892 return false; 1893 1894 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1895 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1896 return false; 1897 1898 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1899 if (!Step || !Step->isOne()) 1900 return false; 1901 1902 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1903 Value *IncV = Phi->getIncomingValue(LatchIdx); 1904 return (getLoopPhiForCounter(IncV, L) == Phi); 1905 } 1906 1907 /// Search the loop header for a loop counter (anadd rec w/step of one) 1908 /// suitable for use by LFTR. If multiple counters are available, select the 1909 /// "best" one based profitable heuristics. 1910 /// 1911 /// BECount may be an i8* pointer type. The pointer difference is already 1912 /// valid count without scaling the address stride, so it remains a pointer 1913 /// expression as far as SCEV is concerned. 1914 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1915 const SCEV *BECount, 1916 ScalarEvolution *SE, DominatorTree *DT) { 1917 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1918 1919 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1920 1921 // Loop over all of the PHI nodes, looking for a simple counter. 1922 PHINode *BestPhi = nullptr; 1923 const SCEV *BestInit = nullptr; 1924 BasicBlock *LatchBlock = L->getLoopLatch(); 1925 assert(LatchBlock && "Must be in simplified form"); 1926 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1927 1928 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1929 PHINode *Phi = cast<PHINode>(I); 1930 if (!isLoopCounter(Phi, L, SE)) 1931 continue; 1932 1933 // Avoid comparing an integer IV against a pointer Limit. 1934 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1935 continue; 1936 1937 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1938 1939 // AR may be a pointer type, while BECount is an integer type. 1940 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1941 // AR may not be a narrower type, or we may never exit. 1942 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1943 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1944 continue; 1945 1946 // Avoid reusing a potentially undef value to compute other values that may 1947 // have originally had a concrete definition. 1948 if (!hasConcreteDef(Phi)) { 1949 // We explicitly allow unknown phis as long as they are already used by 1950 // the loop exit test. This is legal since performing LFTR could not 1951 // increase the number of undef users. 1952 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1953 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1954 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1955 continue; 1956 } 1957 1958 // Avoid introducing undefined behavior due to poison which didn't exist in 1959 // the original program. (Annoyingly, the rules for poison and undef 1960 // propagation are distinct, so this does NOT cover the undef case above.) 1961 // We have to ensure that we don't introduce UB by introducing a use on an 1962 // iteration where said IV produces poison. Our strategy here differs for 1963 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1964 // see code in linearFunctionTestReplace. For pointers, we restrict 1965 // transforms as there is no good way to reinfer inbounds once lost. 1966 if (!Phi->getType()->isIntegerTy() && 1967 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1968 continue; 1969 1970 const SCEV *Init = AR->getStart(); 1971 1972 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1973 // Don't force a live loop counter if another IV can be used. 1974 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1975 continue; 1976 1977 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1978 // also prefers integer to pointer IVs. 1979 if (BestInit->isZero() != Init->isZero()) { 1980 if (BestInit->isZero()) 1981 continue; 1982 } 1983 // If two IVs both count from zero or both count from nonzero then the 1984 // narrower is likely a dead phi that has been widened. Use the wider phi 1985 // to allow the other to be eliminated. 1986 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1987 continue; 1988 } 1989 BestPhi = Phi; 1990 BestInit = Init; 1991 } 1992 return BestPhi; 1993 } 1994 1995 /// Insert an IR expression which computes the value held by the IV IndVar 1996 /// (which must be an loop counter w/unit stride) after the backedge of loop L 1997 /// is taken ExitCount times. 1998 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 1999 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2000 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2001 assert(isLoopCounter(IndVar, L, SE)); 2002 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2003 const SCEV *IVInit = AR->getStart(); 2004 2005 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2006 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2007 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2008 // the existing GEPs whenever possible. 2009 if (IndVar->getType()->isPointerTy() && 2010 !ExitCount->getType()->isPointerTy()) { 2011 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2012 // signed value. ExitCount on the other hand represents the loop trip count, 2013 // which is an unsigned value. FindLoopCounter only allows induction 2014 // variables that have a positive unit stride of one. This means we don't 2015 // have to handle the case of negative offsets (yet) and just need to zero 2016 // extend ExitCount. 2017 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2018 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2019 if (UsePostInc) 2020 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2021 2022 // Expand the code for the iteration count. 2023 assert(SE->isLoopInvariant(IVOffset, L) && 2024 "Computed iteration count is not loop invariant!"); 2025 2026 // We could handle pointer IVs other than i8*, but we need to compensate for 2027 // gep index scaling. 2028 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2029 cast<PointerType>(IndVar->getType()) 2030 ->getElementType())->isOne() && 2031 "unit stride pointer IV must be i8*"); 2032 2033 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2034 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2035 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2036 } else { 2037 // In any other case, convert both IVInit and ExitCount to integers before 2038 // comparing. This may result in SCEV expansion of pointers, but in practice 2039 // SCEV will fold the pointer arithmetic away as such: 2040 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2041 // 2042 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2043 // for simple memset-style loops. 2044 // 2045 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2046 // were generated on top of case #2, which is not expected. 2047 2048 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2049 // For unit stride, IVCount = Start + ExitCount with 2's complement 2050 // overflow. 2051 2052 // For integer IVs, truncate the IV before computing IVInit + BECount, 2053 // unless we know apriori that the limit must be a constant when evaluated 2054 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2055 // of the IV in the loop over a (potentially) expensive expansion of the 2056 // widened exit count add(zext(add)) expression. 2057 if (SE->getTypeSizeInBits(IVInit->getType()) 2058 > SE->getTypeSizeInBits(ExitCount->getType())) { 2059 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2060 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2061 else 2062 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2063 } 2064 2065 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2066 2067 if (UsePostInc) 2068 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2069 2070 // Expand the code for the iteration count. 2071 assert(SE->isLoopInvariant(IVLimit, L) && 2072 "Computed iteration count is not loop invariant!"); 2073 // Ensure that we generate the same type as IndVar, or a smaller integer 2074 // type. In the presence of null pointer values, we have an integer type 2075 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2076 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2077 IndVar->getType() : ExitCount->getType(); 2078 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2079 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2080 } 2081 } 2082 2083 /// This method rewrites the exit condition of the loop to be a canonical != 2084 /// comparison against the incremented loop induction variable. This pass is 2085 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2086 /// determine a loop-invariant trip count of the loop, which is actually a much 2087 /// broader range than just linear tests. 2088 bool IndVarSimplify:: 2089 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2090 const SCEV *ExitCount, 2091 PHINode *IndVar, SCEVExpander &Rewriter) { 2092 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2093 assert(isLoopCounter(IndVar, L, SE)); 2094 Instruction * const IncVar = 2095 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2096 2097 // Initialize CmpIndVar to the preincremented IV. 2098 Value *CmpIndVar = IndVar; 2099 bool UsePostInc = false; 2100 2101 // If the exiting block is the same as the backedge block, we prefer to 2102 // compare against the post-incremented value, otherwise we must compare 2103 // against the preincremented value. 2104 if (ExitingBB == L->getLoopLatch()) { 2105 // For pointer IVs, we chose to not strip inbounds which requires us not 2106 // to add a potentially UB introducing use. We need to either a) show 2107 // the loop test we're modifying is already in post-inc form, or b) show 2108 // that adding a use must not introduce UB. 2109 bool SafeToPostInc = 2110 IndVar->getType()->isIntegerTy() || 2111 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2112 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2113 if (SafeToPostInc) { 2114 UsePostInc = true; 2115 CmpIndVar = IncVar; 2116 } 2117 } 2118 2119 // It may be necessary to drop nowrap flags on the incrementing instruction 2120 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2121 // case the increment might have previously been poison on the last iteration 2122 // only) or if LFTR switches to a different IV that was previously dynamically 2123 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2124 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2125 // check), because the pre-inc addrec flags may be adopted from the original 2126 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2127 // TODO: This handling is inaccurate for one case: If we switch to a 2128 // dynamically dead IV that wraps on the first loop iteration only, which is 2129 // not covered by the post-inc addrec. (If the new IV was not dynamically 2130 // dead, it could not be poison on the first iteration in the first place.) 2131 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2132 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2133 if (BO->hasNoUnsignedWrap()) 2134 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2135 if (BO->hasNoSignedWrap()) 2136 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2137 } 2138 2139 Value *ExitCnt = genLoopLimit( 2140 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2141 assert(ExitCnt->getType()->isPointerTy() == 2142 IndVar->getType()->isPointerTy() && 2143 "genLoopLimit missed a cast"); 2144 2145 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2146 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2147 ICmpInst::Predicate P; 2148 if (L->contains(BI->getSuccessor(0))) 2149 P = ICmpInst::ICMP_NE; 2150 else 2151 P = ICmpInst::ICMP_EQ; 2152 2153 IRBuilder<> Builder(BI); 2154 2155 // The new loop exit condition should reuse the debug location of the 2156 // original loop exit condition. 2157 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2158 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2159 2160 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2161 // avoid the expensive expansion of the limit expression in the wider type, 2162 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2163 // since we know (from the exit count bitwidth), that we can't self-wrap in 2164 // the narrower type. 2165 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2166 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2167 if (CmpIndVarSize > ExitCntSize) { 2168 assert(!CmpIndVar->getType()->isPointerTy() && 2169 !ExitCnt->getType()->isPointerTy()); 2170 2171 // Before resorting to actually inserting the truncate, use the same 2172 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2173 // the other side of the comparison instead. We still evaluate the limit 2174 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2175 // a truncate within in. 2176 bool Extended = false; 2177 const SCEV *IV = SE->getSCEV(CmpIndVar); 2178 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2179 ExitCnt->getType()); 2180 const SCEV *ZExtTrunc = 2181 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2182 2183 if (ZExtTrunc == IV) { 2184 Extended = true; 2185 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2186 "wide.trip.count"); 2187 } else { 2188 const SCEV *SExtTrunc = 2189 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2190 if (SExtTrunc == IV) { 2191 Extended = true; 2192 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2193 "wide.trip.count"); 2194 } 2195 } 2196 2197 if (Extended) { 2198 bool Discard; 2199 L->makeLoopInvariant(ExitCnt, Discard); 2200 } else 2201 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2202 "lftr.wideiv"); 2203 } 2204 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2205 << " LHS:" << *CmpIndVar << '\n' 2206 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2207 << "\n" 2208 << " RHS:\t" << *ExitCnt << "\n" 2209 << "ExitCount:\t" << *ExitCount << "\n" 2210 << " was: " << *BI->getCondition() << "\n"); 2211 2212 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2213 Value *OrigCond = BI->getCondition(); 2214 // It's tempting to use replaceAllUsesWith here to fully replace the old 2215 // comparison, but that's not immediately safe, since users of the old 2216 // comparison may not be dominated by the new comparison. Instead, just 2217 // update the branch to use the new comparison; in the common case this 2218 // will make old comparison dead. 2219 BI->setCondition(Cond); 2220 DeadInsts.push_back(OrigCond); 2221 2222 ++NumLFTR; 2223 return true; 2224 } 2225 2226 //===----------------------------------------------------------------------===// 2227 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2228 //===----------------------------------------------------------------------===// 2229 2230 /// If there's a single exit block, sink any loop-invariant values that 2231 /// were defined in the preheader but not used inside the loop into the 2232 /// exit block to reduce register pressure in the loop. 2233 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2234 BasicBlock *ExitBlock = L->getExitBlock(); 2235 if (!ExitBlock) return false; 2236 2237 BasicBlock *Preheader = L->getLoopPreheader(); 2238 if (!Preheader) return false; 2239 2240 bool MadeAnyChanges = false; 2241 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2242 BasicBlock::iterator I(Preheader->getTerminator()); 2243 while (I != Preheader->begin()) { 2244 --I; 2245 // New instructions were inserted at the end of the preheader. 2246 if (isa<PHINode>(I)) 2247 break; 2248 2249 // Don't move instructions which might have side effects, since the side 2250 // effects need to complete before instructions inside the loop. Also don't 2251 // move instructions which might read memory, since the loop may modify 2252 // memory. Note that it's okay if the instruction might have undefined 2253 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2254 // block. 2255 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2256 continue; 2257 2258 // Skip debug info intrinsics. 2259 if (isa<DbgInfoIntrinsic>(I)) 2260 continue; 2261 2262 // Skip eh pad instructions. 2263 if (I->isEHPad()) 2264 continue; 2265 2266 // Don't sink alloca: we never want to sink static alloca's out of the 2267 // entry block, and correctly sinking dynamic alloca's requires 2268 // checks for stacksave/stackrestore intrinsics. 2269 // FIXME: Refactor this check somehow? 2270 if (isa<AllocaInst>(I)) 2271 continue; 2272 2273 // Determine if there is a use in or before the loop (direct or 2274 // otherwise). 2275 bool UsedInLoop = false; 2276 for (Use &U : I->uses()) { 2277 Instruction *User = cast<Instruction>(U.getUser()); 2278 BasicBlock *UseBB = User->getParent(); 2279 if (PHINode *P = dyn_cast<PHINode>(User)) { 2280 unsigned i = 2281 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2282 UseBB = P->getIncomingBlock(i); 2283 } 2284 if (UseBB == Preheader || L->contains(UseBB)) { 2285 UsedInLoop = true; 2286 break; 2287 } 2288 } 2289 2290 // If there is, the def must remain in the preheader. 2291 if (UsedInLoop) 2292 continue; 2293 2294 // Otherwise, sink it to the exit block. 2295 Instruction *ToMove = &*I; 2296 bool Done = false; 2297 2298 if (I != Preheader->begin()) { 2299 // Skip debug info intrinsics. 2300 do { 2301 --I; 2302 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2303 2304 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2305 Done = true; 2306 } else { 2307 Done = true; 2308 } 2309 2310 MadeAnyChanges = true; 2311 ToMove->moveBefore(*ExitBlock, InsertPt); 2312 if (Done) break; 2313 InsertPt = ToMove->getIterator(); 2314 } 2315 2316 return MadeAnyChanges; 2317 } 2318 2319 /// Return a symbolic upper bound for the backedge taken count of the loop. 2320 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2321 /// an arbitrary expression as opposed to only constants. 2322 /// TODO: Move into the ScalarEvolution class. 2323 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2324 DominatorTree &DT, Loop *L) { 2325 SmallVector<BasicBlock*, 16> ExitingBlocks; 2326 L->getExitingBlocks(ExitingBlocks); 2327 2328 // Form an expression for the maximum exit count possible for this loop. We 2329 // merge the max and exact information to approximate a version of 2330 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2331 SmallVector<const SCEV*, 4> ExitCounts; 2332 for (BasicBlock *ExitingBB : ExitingBlocks) { 2333 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2334 if (isa<SCEVCouldNotCompute>(ExitCount)) 2335 ExitCount = SE.getExitCount(L, ExitingBB, 2336 ScalarEvolution::ConstantMaximum); 2337 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2338 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2339 "We should only have known counts for exiting blocks that " 2340 "dominate latch!"); 2341 ExitCounts.push_back(ExitCount); 2342 } 2343 } 2344 if (ExitCounts.empty()) 2345 return SE.getCouldNotCompute(); 2346 return SE.getUMinFromMismatchedTypes(ExitCounts); 2347 } 2348 2349 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2350 SmallVector<BasicBlock*, 16> ExitingBlocks; 2351 L->getExitingBlocks(ExitingBlocks); 2352 2353 // Remove all exits which aren't both rewriteable and analyzeable. 2354 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2355 // If our exitting block exits multiple loops, we can only rewrite the 2356 // innermost one. Otherwise, we're changing how many times the innermost 2357 // loop runs before it exits. 2358 if (LI->getLoopFor(ExitingBB) != L) 2359 return true; 2360 2361 // Can't rewrite non-branch yet. 2362 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2363 if (!BI) 2364 return true; 2365 2366 // If already constant, nothing to do. 2367 if (isa<Constant>(BI->getCondition())) 2368 return true; 2369 2370 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2371 if (isa<SCEVCouldNotCompute>(ExitCount)) 2372 return true; 2373 return false; 2374 }); 2375 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2376 2377 if (ExitingBlocks.empty()) 2378 return false; 2379 2380 // Get a symbolic upper bound on the loop backedge taken count. 2381 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2382 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2383 return false; 2384 2385 // Visit our exit blocks in order of dominance. We know from the fact that 2386 // all exits (left) are analyzeable that the must be a total dominance order 2387 // between them as each must dominate the latch. The visit order only 2388 // matters for the provably equal case. 2389 llvm::sort(ExitingBlocks, 2390 [&](BasicBlock *A, BasicBlock *B) { 2391 // std::sort sorts in ascending order, so we want the inverse of 2392 // the normal dominance relation. 2393 if (DT->properlyDominates(A, B)) return true; 2394 if (DT->properlyDominates(B, A)) return false; 2395 llvm_unreachable("expected total dominance order!"); 2396 }); 2397 #ifdef ASSERT 2398 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2399 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2400 } 2401 #endif 2402 2403 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2404 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2405 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2406 auto *OldCond = BI->getCondition(); 2407 auto *NewCond = ConstantInt::get(OldCond->getType(), 2408 IsTaken ? ExitIfTrue : !ExitIfTrue); 2409 BI->setCondition(NewCond); 2410 if (OldCond->use_empty()) 2411 DeadInsts.push_back(OldCond); 2412 }; 2413 2414 bool Changed = false; 2415 SmallSet<const SCEV*, 8> DominatingExitCounts; 2416 for (BasicBlock *ExitingBB : ExitingBlocks) { 2417 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2418 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2419 2420 // If we know we'd exit on the first iteration, rewrite the exit to 2421 // reflect this. This does not imply the loop must exit through this 2422 // exit; there may be an earlier one taken on the first iteration. 2423 // TODO: Given we know the backedge can't be taken, we should go ahead 2424 // and break it. Or at least, kill all the header phis and simplify. 2425 if (ExitCount->isZero()) { 2426 FoldExit(ExitingBB, true); 2427 Changed = true; 2428 continue; 2429 } 2430 2431 // If we end up with a pointer exit count, bail. Note that we can end up 2432 // with a pointer exit count for one exiting block, and not for another in 2433 // the same loop. 2434 if (!ExitCount->getType()->isIntegerTy() || 2435 !MaxExitCount->getType()->isIntegerTy()) 2436 continue; 2437 2438 Type *WiderType = 2439 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2440 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2441 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2442 assert(MaxExitCount->getType() == ExitCount->getType()); 2443 2444 // Can we prove that some other exit must be taken strictly before this 2445 // one? 2446 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2447 MaxExitCount, ExitCount)) { 2448 FoldExit(ExitingBB, false); 2449 Changed = true; 2450 continue; 2451 } 2452 2453 // As we run, keep track of which exit counts we've encountered. If we 2454 // find a duplicate, we've found an exit which would have exited on the 2455 // exiting iteration, but (from the visit order) strictly follows another 2456 // which does the same and is thus dead. 2457 if (!DominatingExitCounts.insert(ExitCount).second) { 2458 FoldExit(ExitingBB, false); 2459 Changed = true; 2460 continue; 2461 } 2462 2463 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2464 // here. If we kept track of the min of dominanting exits so far, we could 2465 // discharge exits with EC >= MDEC. This is less powerful than the existing 2466 // transform (since later exits aren't considered), but potentially more 2467 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2468 // or a >u b. Such a case is not currently known. 2469 } 2470 return Changed; 2471 } 2472 2473 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2474 SmallVector<BasicBlock*, 16> ExitingBlocks; 2475 L->getExitingBlocks(ExitingBlocks); 2476 2477 // Finally, see if we can rewrite our exit conditions into a loop invariant 2478 // form. If we have a read-only loop, and we can tell that we must exit down 2479 // a path which does not need any of the values computed within the loop, we 2480 // can rewrite the loop to exit on the first iteration. Note that this 2481 // doesn't either a) tell us the loop exits on the first iteration (unless 2482 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2483 // This transformation looks a lot like a restricted form of dead loop 2484 // elimination, but restricted to read-only loops and without neccesssarily 2485 // needing to kill the loop entirely. 2486 if (!LoopPredication) 2487 return false; 2488 2489 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2490 return false; 2491 2492 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2493 // through *explicit* control flow. We have to eliminate the possibility of 2494 // implicit exits (see below) before we know it's truly exact. 2495 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2496 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2497 !SE->isLoopInvariant(ExactBTC, L) || 2498 !isSafeToExpand(ExactBTC, *SE)) 2499 return false; 2500 2501 // If we end up with a pointer exit count, bail. It may be unsized. 2502 if (!ExactBTC->getType()->isIntegerTy()) 2503 return false; 2504 2505 auto BadExit = [&](BasicBlock *ExitingBB) { 2506 // If our exiting block exits multiple loops, we can only rewrite the 2507 // innermost one. Otherwise, we're changing how many times the innermost 2508 // loop runs before it exits. 2509 if (LI->getLoopFor(ExitingBB) != L) 2510 return true; 2511 2512 // Can't rewrite non-branch yet. 2513 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2514 if (!BI) 2515 return true; 2516 2517 // If already constant, nothing to do. 2518 if (isa<Constant>(BI->getCondition())) 2519 return true; 2520 2521 // If the exit block has phis, we need to be able to compute the values 2522 // within the loop which contains them. This assumes trivially lcssa phis 2523 // have already been removed; TODO: generalize 2524 BasicBlock *ExitBlock = 2525 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2526 if (!ExitBlock->phis().empty()) 2527 return true; 2528 2529 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2530 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2531 if (!SE->isLoopInvariant(ExitCount, L) || 2532 !isSafeToExpand(ExitCount, *SE)) 2533 return true; 2534 2535 // If we end up with a pointer exit count, bail. It may be unsized. 2536 if (!ExitCount->getType()->isIntegerTy()) 2537 return true; 2538 2539 return false; 2540 }; 2541 2542 // If we have any exits which can't be predicated themselves, than we can't 2543 // predicate any exit which isn't guaranteed to execute before it. Consider 2544 // two exits (a) and (b) which would both exit on the same iteration. If we 2545 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2546 // we could convert a loop from exiting through (a) to one exiting through 2547 // (b). Note that this problem exists only for exits with the same exit 2548 // count, and we could be more aggressive when exit counts are known inequal. 2549 llvm::sort(ExitingBlocks, 2550 [&](BasicBlock *A, BasicBlock *B) { 2551 // std::sort sorts in ascending order, so we want the inverse of 2552 // the normal dominance relation, plus a tie breaker for blocks 2553 // unordered by dominance. 2554 if (DT->properlyDominates(A, B)) return true; 2555 if (DT->properlyDominates(B, A)) return false; 2556 return A->getName() < B->getName(); 2557 }); 2558 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2559 // exits before the backedge). If they aren't, reasoning about reachability 2560 // is complicated and we choose not to for now. 2561 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2562 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2563 return false; 2564 2565 // Given our sorted total order, we know that exit[j] must be evaluated 2566 // after all exit[i] such j > i. 2567 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2568 if (BadExit(ExitingBlocks[i])) { 2569 ExitingBlocks.resize(i); 2570 break; 2571 } 2572 2573 if (ExitingBlocks.empty()) 2574 return false; 2575 2576 // We rely on not being able to reach an exiting block on a later iteration 2577 // then it's statically compute exit count. The implementaton of 2578 // getExitCount currently has this invariant, but assert it here so that 2579 // breakage is obvious if this ever changes.. 2580 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2581 return DT->dominates(ExitingBB, L->getLoopLatch()); 2582 })); 2583 2584 // At this point, ExitingBlocks consists of only those blocks which are 2585 // predicatable. Given that, we know we have at least one exit we can 2586 // predicate if the loop is doesn't have side effects and doesn't have any 2587 // implicit exits (because then our exact BTC isn't actually exact). 2588 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2589 // suggestions on how to improve this? I can obviously bail out for outer 2590 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2591 // is that enough for *all* side effects? 2592 for (BasicBlock *BB : L->blocks()) 2593 for (auto &I : *BB) 2594 // TODO:isGuaranteedToTransfer 2595 if (I.mayHaveSideEffects() || I.mayThrow()) 2596 return false; 2597 2598 bool Changed = false; 2599 // Finally, do the actual predication for all predicatable blocks. A couple 2600 // of notes here: 2601 // 1) We don't bother to constant fold dominated exits with identical exit 2602 // counts; that's simply a form of CSE/equality propagation and we leave 2603 // it for dedicated passes. 2604 // 2) We insert the comparison at the branch. Hoisting introduces additional 2605 // legality constraints and we leave that to dedicated logic. We want to 2606 // predicate even if we can't insert a loop invariant expression as 2607 // peeling or unrolling will likely reduce the cost of the otherwise loop 2608 // varying check. 2609 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2610 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2611 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2612 for (BasicBlock *ExitingBB : ExitingBlocks) { 2613 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2614 2615 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2616 Value *NewCond; 2617 if (ExitCount == ExactBTC) { 2618 NewCond = L->contains(BI->getSuccessor(0)) ? 2619 B.getFalse() : B.getTrue(); 2620 } else { 2621 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2622 if (!ExactBTCV) 2623 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2624 Value *RHS = ExactBTCV; 2625 if (ECV->getType() != RHS->getType()) { 2626 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2627 ECV = B.CreateZExt(ECV, WiderTy); 2628 RHS = B.CreateZExt(RHS, WiderTy); 2629 } 2630 auto Pred = L->contains(BI->getSuccessor(0)) ? 2631 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2632 NewCond = B.CreateICmp(Pred, ECV, RHS); 2633 } 2634 Value *OldCond = BI->getCondition(); 2635 BI->setCondition(NewCond); 2636 if (OldCond->use_empty()) 2637 DeadInsts.push_back(OldCond); 2638 Changed = true; 2639 } 2640 2641 return Changed; 2642 } 2643 2644 //===----------------------------------------------------------------------===// 2645 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2646 //===----------------------------------------------------------------------===// 2647 2648 bool IndVarSimplify::run(Loop *L) { 2649 // We need (and expect!) the incoming loop to be in LCSSA. 2650 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2651 "LCSSA required to run indvars!"); 2652 2653 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2654 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2655 // canonicalization can be a pessimization without LSR to "clean up" 2656 // afterwards. 2657 // - We depend on having a preheader; in particular, 2658 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2659 // and we're in trouble if we can't find the induction variable even when 2660 // we've manually inserted one. 2661 // - LFTR relies on having a single backedge. 2662 if (!L->isLoopSimplifyForm()) 2663 return false; 2664 2665 #ifndef NDEBUG 2666 // Used below for a consistency check only 2667 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2668 #endif 2669 2670 bool Changed = false; 2671 // If there are any floating-point recurrences, attempt to 2672 // transform them to use integer recurrences. 2673 Changed |= rewriteNonIntegerIVs(L); 2674 2675 // Create a rewriter object which we'll use to transform the code with. 2676 SCEVExpander Rewriter(*SE, DL, "indvars"); 2677 #ifndef NDEBUG 2678 Rewriter.setDebugType(DEBUG_TYPE); 2679 #endif 2680 2681 // Eliminate redundant IV users. 2682 // 2683 // Simplification works best when run before other consumers of SCEV. We 2684 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2685 // other expressions involving loop IVs have been evaluated. This helps SCEV 2686 // set no-wrap flags before normalizing sign/zero extension. 2687 Rewriter.disableCanonicalMode(); 2688 Changed |= simplifyAndExtend(L, Rewriter, LI); 2689 2690 // Check to see if we can compute the final value of any expressions 2691 // that are recurrent in the loop, and substitute the exit values from the 2692 // loop into any instructions outside of the loop that use the final values 2693 // of the current expressions. 2694 if (ReplaceExitValue != NeverRepl) { 2695 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, 2696 ReplaceExitValue, DeadInsts)) { 2697 NumReplaced += Rewrites; 2698 Changed = true; 2699 } 2700 } 2701 2702 // Eliminate redundant IV cycles. 2703 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2704 2705 // Try to eliminate loop exits based on analyzeable exit counts 2706 if (optimizeLoopExits(L, Rewriter)) { 2707 Changed = true; 2708 // Given we've changed exit counts, notify SCEV 2709 SE->forgetLoop(L); 2710 } 2711 2712 // Try to form loop invariant tests for loop exits by changing how many 2713 // iterations of the loop run when that is unobservable. 2714 if (predicateLoopExits(L, Rewriter)) { 2715 Changed = true; 2716 // Given we've changed exit counts, notify SCEV 2717 SE->forgetLoop(L); 2718 } 2719 2720 // If we have a trip count expression, rewrite the loop's exit condition 2721 // using it. 2722 if (!DisableLFTR) { 2723 SmallVector<BasicBlock*, 16> ExitingBlocks; 2724 L->getExitingBlocks(ExitingBlocks); 2725 for (BasicBlock *ExitingBB : ExitingBlocks) { 2726 // Can't rewrite non-branch yet. 2727 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2728 continue; 2729 2730 // If our exitting block exits multiple loops, we can only rewrite the 2731 // innermost one. Otherwise, we're changing how many times the innermost 2732 // loop runs before it exits. 2733 if (LI->getLoopFor(ExitingBB) != L) 2734 continue; 2735 2736 if (!needsLFTR(L, ExitingBB)) 2737 continue; 2738 2739 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2740 if (isa<SCEVCouldNotCompute>(ExitCount)) 2741 continue; 2742 2743 // This was handled above, but as we form SCEVs, we can sometimes refine 2744 // existing ones; this allows exit counts to be folded to zero which 2745 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2746 // until stable to handle cases like this better. 2747 if (ExitCount->isZero()) 2748 continue; 2749 2750 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2751 if (!IndVar) 2752 continue; 2753 2754 // Avoid high cost expansions. Note: This heuristic is questionable in 2755 // that our definition of "high cost" is not exactly principled. 2756 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2757 continue; 2758 2759 // Check preconditions for proper SCEVExpander operation. SCEV does not 2760 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2761 // any pass that uses the SCEVExpander must do it. This does not work 2762 // well for loop passes because SCEVExpander makes assumptions about 2763 // all loops, while LoopPassManager only forces the current loop to be 2764 // simplified. 2765 // 2766 // FIXME: SCEV expansion has no way to bail out, so the caller must 2767 // explicitly check any assumptions made by SCEV. Brittle. 2768 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2769 if (!AR || AR->getLoop()->getLoopPreheader()) 2770 Changed |= linearFunctionTestReplace(L, ExitingBB, 2771 ExitCount, IndVar, 2772 Rewriter); 2773 } 2774 } 2775 // Clear the rewriter cache, because values that are in the rewriter's cache 2776 // can be deleted in the loop below, causing the AssertingVH in the cache to 2777 // trigger. 2778 Rewriter.clear(); 2779 2780 // Now that we're done iterating through lists, clean up any instructions 2781 // which are now dead. 2782 while (!DeadInsts.empty()) 2783 if (Instruction *Inst = 2784 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2785 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2786 2787 // The Rewriter may not be used from this point on. 2788 2789 // Loop-invariant instructions in the preheader that aren't used in the 2790 // loop may be sunk below the loop to reduce register pressure. 2791 Changed |= sinkUnusedInvariants(L); 2792 2793 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2794 // trip count and therefore can further simplify exit values in addition to 2795 // rewriteLoopExitValues. 2796 Changed |= rewriteFirstIterationLoopExitValues(L); 2797 2798 // Clean up dead instructions. 2799 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2800 2801 // Check a post-condition. 2802 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2803 "Indvars did not preserve LCSSA!"); 2804 2805 // Verify that LFTR, and any other change have not interfered with SCEV's 2806 // ability to compute trip count. We may have *changed* the exit count, but 2807 // only by reducing it. 2808 #ifndef NDEBUG 2809 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2810 SE->forgetLoop(L); 2811 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2812 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2813 SE->getTypeSizeInBits(NewBECount->getType())) 2814 NewBECount = SE->getTruncateOrNoop(NewBECount, 2815 BackedgeTakenCount->getType()); 2816 else 2817 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2818 NewBECount->getType()); 2819 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2820 NewBECount) && "indvars must preserve SCEV"); 2821 } 2822 #endif 2823 2824 return Changed; 2825 } 2826 2827 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2828 LoopStandardAnalysisResults &AR, 2829 LPMUpdater &) { 2830 Function *F = L.getHeader()->getParent(); 2831 const DataLayout &DL = F->getParent()->getDataLayout(); 2832 2833 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2834 if (!IVS.run(&L)) 2835 return PreservedAnalyses::all(); 2836 2837 auto PA = getLoopPassPreservedAnalyses(); 2838 PA.preserveSet<CFGAnalyses>(); 2839 if (AR.MSSA) 2840 PA.preserve<MemorySSAAnalysis>(); 2841 return PA; 2842 } 2843 2844 namespace { 2845 2846 struct IndVarSimplifyLegacyPass : public LoopPass { 2847 static char ID; // Pass identification, replacement for typeid 2848 2849 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2850 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2851 } 2852 2853 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2854 if (skipLoop(L)) 2855 return false; 2856 2857 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2858 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2859 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2860 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2861 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2862 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2863 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2864 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2865 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2866 MemorySSA *MSSA = nullptr; 2867 if (MSSAAnalysis) 2868 MSSA = &MSSAAnalysis->getMSSA(); 2869 2870 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2871 return IVS.run(L); 2872 } 2873 2874 void getAnalysisUsage(AnalysisUsage &AU) const override { 2875 AU.setPreservesCFG(); 2876 AU.addPreserved<MemorySSAWrapperPass>(); 2877 getLoopAnalysisUsage(AU); 2878 } 2879 }; 2880 2881 } // end anonymous namespace 2882 2883 char IndVarSimplifyLegacyPass::ID = 0; 2884 2885 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2886 "Induction Variable Simplification", false, false) 2887 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2888 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2889 "Induction Variable Simplification", false, false) 2890 2891 Pass *llvm::createIndVarSimplifyPass() { 2892 return new IndVarSimplifyLegacyPass(); 2893 } 2894