1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 struct RewritePhi; 141 142 class IndVarSimplify { 143 LoopInfo *LI; 144 ScalarEvolution *SE; 145 DominatorTree *DT; 146 const DataLayout &DL; 147 TargetLibraryInfo *TLI; 148 const TargetTransformInfo *TTI; 149 std::unique_ptr<MemorySSAUpdater> MSSAU; 150 151 SmallVector<WeakTrackingVH, 16> DeadInsts; 152 153 bool handleFloatingPointIV(Loop *L, PHINode *PH); 154 bool rewriteNonIntegerIVs(Loop *L); 155 156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 157 /// Try to eliminate loop exits based on analyzeable exit counts 158 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 159 /// Try to form loop invariant tests for loop exits by changing how many 160 /// iterations of the loop run when that is unobservable. 161 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 162 163 bool rewriteFirstIterationLoopExitValues(Loop *L); 164 165 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 166 const SCEV *ExitCount, 167 PHINode *IndVar, SCEVExpander &Rewriter); 168 169 bool sinkUnusedInvariants(Loop *L); 170 171 public: 172 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 173 const DataLayout &DL, TargetLibraryInfo *TLI, 174 TargetTransformInfo *TTI, MemorySSA *MSSA) 175 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 176 if (MSSA) 177 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 178 } 179 180 bool run(Loop *L); 181 }; 182 183 } // end anonymous namespace 184 185 /// Determine the insertion point for this user. By default, insert immediately 186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 188 /// common dominator for the incoming blocks. A nullptr can be returned if no 189 /// viable location is found: it may happen if User is a PHI and Def only comes 190 /// to this PHI from unreachable blocks. 191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 192 DominatorTree *DT, LoopInfo *LI) { 193 PHINode *PHI = dyn_cast<PHINode>(User); 194 if (!PHI) 195 return User; 196 197 Instruction *InsertPt = nullptr; 198 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 199 if (PHI->getIncomingValue(i) != Def) 200 continue; 201 202 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 203 204 if (!DT->isReachableFromEntry(InsertBB)) 205 continue; 206 207 if (!InsertPt) { 208 InsertPt = InsertBB->getTerminator(); 209 continue; 210 } 211 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 212 InsertPt = InsertBB->getTerminator(); 213 } 214 215 // If we have skipped all inputs, it means that Def only comes to Phi from 216 // unreachable blocks. 217 if (!InsertPt) 218 return nullptr; 219 220 auto *DefI = dyn_cast<Instruction>(Def); 221 if (!DefI) 222 return InsertPt; 223 224 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 225 226 auto *L = LI->getLoopFor(DefI->getParent()); 227 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 228 229 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 230 if (LI->getLoopFor(DTN->getBlock()) == L) 231 return DTN->getBlock()->getTerminator(); 232 233 llvm_unreachable("DefI dominates InsertPt!"); 234 } 235 236 //===----------------------------------------------------------------------===// 237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 238 //===----------------------------------------------------------------------===// 239 240 /// Convert APF to an integer, if possible. 241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 242 bool isExact = false; 243 // See if we can convert this to an int64_t 244 uint64_t UIntVal; 245 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 246 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 247 !isExact) 248 return false; 249 IntVal = UIntVal; 250 return true; 251 } 252 253 /// If the loop has floating induction variable then insert corresponding 254 /// integer induction variable if possible. 255 /// For example, 256 /// for(double i = 0; i < 10000; ++i) 257 /// bar(i) 258 /// is converted into 259 /// for(int i = 0; i < 10000; ++i) 260 /// bar((double)i); 261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 262 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 263 unsigned BackEdge = IncomingEdge^1; 264 265 // Check incoming value. 266 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 267 268 int64_t InitValue; 269 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 270 return false; 271 272 // Check IV increment. Reject this PN if increment operation is not 273 // an add or increment value can not be represented by an integer. 274 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 275 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 276 277 // If this is not an add of the PHI with a constantfp, or if the constant fp 278 // is not an integer, bail out. 279 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 280 int64_t IncValue; 281 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 282 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 283 return false; 284 285 // Check Incr uses. One user is PN and the other user is an exit condition 286 // used by the conditional terminator. 287 Value::user_iterator IncrUse = Incr->user_begin(); 288 Instruction *U1 = cast<Instruction>(*IncrUse++); 289 if (IncrUse == Incr->user_end()) return false; 290 Instruction *U2 = cast<Instruction>(*IncrUse++); 291 if (IncrUse != Incr->user_end()) return false; 292 293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 294 // only used by a branch, we can't transform it. 295 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 296 if (!Compare) 297 Compare = dyn_cast<FCmpInst>(U2); 298 if (!Compare || !Compare->hasOneUse() || 299 !isa<BranchInst>(Compare->user_back())) 300 return false; 301 302 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 303 304 // We need to verify that the branch actually controls the iteration count 305 // of the loop. If not, the new IV can overflow and no one will notice. 306 // The branch block must be in the loop and one of the successors must be out 307 // of the loop. 308 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 309 if (!L->contains(TheBr->getParent()) || 310 (L->contains(TheBr->getSuccessor(0)) && 311 L->contains(TheBr->getSuccessor(1)))) 312 return false; 313 314 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 315 // transform it. 316 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 317 int64_t ExitValue; 318 if (ExitValueVal == nullptr || 319 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 320 return false; 321 322 // Find new predicate for integer comparison. 323 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 324 switch (Compare->getPredicate()) { 325 default: return false; // Unknown comparison. 326 case CmpInst::FCMP_OEQ: 327 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 328 case CmpInst::FCMP_ONE: 329 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 330 case CmpInst::FCMP_OGT: 331 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 332 case CmpInst::FCMP_OGE: 333 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 334 case CmpInst::FCMP_OLT: 335 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 336 case CmpInst::FCMP_OLE: 337 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 338 } 339 340 // We convert the floating point induction variable to a signed i32 value if 341 // we can. This is only safe if the comparison will not overflow in a way 342 // that won't be trapped by the integer equivalent operations. Check for this 343 // now. 344 // TODO: We could use i64 if it is native and the range requires it. 345 346 // The start/stride/exit values must all fit in signed i32. 347 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 348 return false; 349 350 // If not actually striding (add x, 0.0), avoid touching the code. 351 if (IncValue == 0) 352 return false; 353 354 // Positive and negative strides have different safety conditions. 355 if (IncValue > 0) { 356 // If we have a positive stride, we require the init to be less than the 357 // exit value. 358 if (InitValue >= ExitValue) 359 return false; 360 361 uint32_t Range = uint32_t(ExitValue-InitValue); 362 // Check for infinite loop, either: 363 // while (i <= Exit) or until (i > Exit) 364 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 365 if (++Range == 0) return false; // Range overflows. 366 } 367 368 unsigned Leftover = Range % uint32_t(IncValue); 369 370 // If this is an equality comparison, we require that the strided value 371 // exactly land on the exit value, otherwise the IV condition will wrap 372 // around and do things the fp IV wouldn't. 373 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 374 Leftover != 0) 375 return false; 376 377 // If the stride would wrap around the i32 before exiting, we can't 378 // transform the IV. 379 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 380 return false; 381 } else { 382 // If we have a negative stride, we require the init to be greater than the 383 // exit value. 384 if (InitValue <= ExitValue) 385 return false; 386 387 uint32_t Range = uint32_t(InitValue-ExitValue); 388 // Check for infinite loop, either: 389 // while (i >= Exit) or until (i < Exit) 390 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 391 if (++Range == 0) return false; // Range overflows. 392 } 393 394 unsigned Leftover = Range % uint32_t(-IncValue); 395 396 // If this is an equality comparison, we require that the strided value 397 // exactly land on the exit value, otherwise the IV condition will wrap 398 // around and do things the fp IV wouldn't. 399 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 400 Leftover != 0) 401 return false; 402 403 // If the stride would wrap around the i32 before exiting, we can't 404 // transform the IV. 405 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 406 return false; 407 } 408 409 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 410 411 // Insert new integer induction variable. 412 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 413 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 414 PN->getIncomingBlock(IncomingEdge)); 415 416 Value *NewAdd = 417 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 418 Incr->getName()+".int", Incr); 419 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 420 421 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 422 ConstantInt::get(Int32Ty, ExitValue), 423 Compare->getName()); 424 425 // In the following deletions, PN may become dead and may be deleted. 426 // Use a WeakTrackingVH to observe whether this happens. 427 WeakTrackingVH WeakPH = PN; 428 429 // Delete the old floating point exit comparison. The branch starts using the 430 // new comparison. 431 NewCompare->takeName(Compare); 432 Compare->replaceAllUsesWith(NewCompare); 433 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 434 435 // Delete the old floating point increment. 436 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 437 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 438 439 // If the FP induction variable still has uses, this is because something else 440 // in the loop uses its value. In order to canonicalize the induction 441 // variable, we chose to eliminate the IV and rewrite it in terms of an 442 // int->fp cast. 443 // 444 // We give preference to sitofp over uitofp because it is faster on most 445 // platforms. 446 if (WeakPH) { 447 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 448 &*PN->getParent()->getFirstInsertionPt()); 449 PN->replaceAllUsesWith(Conv); 450 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 451 } 452 return true; 453 } 454 455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 456 // First step. Check to see if there are any floating-point recurrences. 457 // If there are, change them into integer recurrences, permitting analysis by 458 // the SCEV routines. 459 BasicBlock *Header = L->getHeader(); 460 461 SmallVector<WeakTrackingVH, 8> PHIs; 462 for (PHINode &PN : Header->phis()) 463 PHIs.push_back(&PN); 464 465 bool Changed = false; 466 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 467 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 468 Changed |= handleFloatingPointIV(L, PN); 469 470 // If the loop previously had floating-point IV, ScalarEvolution 471 // may not have been able to compute a trip count. Now that we've done some 472 // re-writing, the trip count may be computable. 473 if (Changed) 474 SE->forgetLoop(L); 475 return Changed; 476 } 477 478 //===---------------------------------------------------------------------===// 479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 480 // they will exit at the first iteration. 481 //===---------------------------------------------------------------------===// 482 483 /// Check to see if this loop has loop invariant conditions which lead to loop 484 /// exits. If so, we know that if the exit path is taken, it is at the first 485 /// loop iteration. This lets us predict exit values of PHI nodes that live in 486 /// loop header. 487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 488 // Verify the input to the pass is already in LCSSA form. 489 assert(L->isLCSSAForm(*DT)); 490 491 SmallVector<BasicBlock *, 8> ExitBlocks; 492 L->getUniqueExitBlocks(ExitBlocks); 493 494 bool MadeAnyChanges = false; 495 for (auto *ExitBB : ExitBlocks) { 496 // If there are no more PHI nodes in this exit block, then no more 497 // values defined inside the loop are used on this path. 498 for (PHINode &PN : ExitBB->phis()) { 499 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 500 IncomingValIdx != E; ++IncomingValIdx) { 501 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 502 503 // Can we prove that the exit must run on the first iteration if it 504 // runs at all? (i.e. early exits are fine for our purposes, but 505 // traces which lead to this exit being taken on the 2nd iteration 506 // aren't.) Note that this is about whether the exit branch is 507 // executed, not about whether it is taken. 508 if (!L->getLoopLatch() || 509 !DT->dominates(IncomingBB, L->getLoopLatch())) 510 continue; 511 512 // Get condition that leads to the exit path. 513 auto *TermInst = IncomingBB->getTerminator(); 514 515 Value *Cond = nullptr; 516 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 517 // Must be a conditional branch, otherwise the block 518 // should not be in the loop. 519 Cond = BI->getCondition(); 520 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 521 Cond = SI->getCondition(); 522 else 523 continue; 524 525 if (!L->isLoopInvariant(Cond)) 526 continue; 527 528 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 529 530 // Only deal with PHIs in the loop header. 531 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 532 continue; 533 534 // If ExitVal is a PHI on the loop header, then we know its 535 // value along this exit because the exit can only be taken 536 // on the first iteration. 537 auto *LoopPreheader = L->getLoopPreheader(); 538 assert(LoopPreheader && "Invalid loop"); 539 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 540 if (PreheaderIdx != -1) { 541 assert(ExitVal->getParent() == L->getHeader() && 542 "ExitVal must be in loop header"); 543 MadeAnyChanges = true; 544 PN.setIncomingValue(IncomingValIdx, 545 ExitVal->getIncomingValue(PreheaderIdx)); 546 } 547 } 548 } 549 } 550 return MadeAnyChanges; 551 } 552 553 //===----------------------------------------------------------------------===// 554 // IV Widening - Extend the width of an IV to cover its widest uses. 555 //===----------------------------------------------------------------------===// 556 557 namespace { 558 559 // Collect information about induction variables that are used by sign/zero 560 // extend operations. This information is recorded by CollectExtend and provides 561 // the input to WidenIV. 562 struct WideIVInfo { 563 PHINode *NarrowIV = nullptr; 564 565 // Widest integer type created [sz]ext 566 Type *WidestNativeType = nullptr; 567 568 // Was a sext user seen before a zext? 569 bool IsSigned = false; 570 }; 571 572 } // end anonymous namespace 573 574 /// Update information about the induction variable that is extended by this 575 /// sign or zero extend operation. This is used to determine the final width of 576 /// the IV before actually widening it. 577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 578 const TargetTransformInfo *TTI) { 579 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 580 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 581 return; 582 583 Type *Ty = Cast->getType(); 584 uint64_t Width = SE->getTypeSizeInBits(Ty); 585 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 586 return; 587 588 // Check that `Cast` actually extends the induction variable (we rely on this 589 // later). This takes care of cases where `Cast` is extending a truncation of 590 // the narrow induction variable, and thus can end up being narrower than the 591 // "narrow" induction variable. 592 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 593 if (NarrowIVWidth >= Width) 594 return; 595 596 // Cast is either an sext or zext up to this point. 597 // We should not widen an indvar if arithmetics on the wider indvar are more 598 // expensive than those on the narrower indvar. We check only the cost of ADD 599 // because at least an ADD is required to increment the induction variable. We 600 // could compute more comprehensively the cost of all instructions on the 601 // induction variable when necessary. 602 if (TTI && 603 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 604 TTI->getArithmeticInstrCost(Instruction::Add, 605 Cast->getOperand(0)->getType())) { 606 return; 607 } 608 609 if (!WI.WidestNativeType) { 610 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 611 WI.IsSigned = IsSigned; 612 return; 613 } 614 615 // We extend the IV to satisfy the sign of its first user, arbitrarily. 616 if (WI.IsSigned != IsSigned) 617 return; 618 619 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 620 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 621 } 622 623 namespace { 624 625 /// Record a link in the Narrow IV def-use chain along with the WideIV that 626 /// computes the same value as the Narrow IV def. This avoids caching Use* 627 /// pointers. 628 struct NarrowIVDefUse { 629 Instruction *NarrowDef = nullptr; 630 Instruction *NarrowUse = nullptr; 631 Instruction *WideDef = nullptr; 632 633 // True if the narrow def is never negative. Tracking this information lets 634 // us use a sign extension instead of a zero extension or vice versa, when 635 // profitable and legal. 636 bool NeverNegative = false; 637 638 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 639 bool NeverNegative) 640 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 641 NeverNegative(NeverNegative) {} 642 }; 643 644 /// The goal of this transform is to remove sign and zero extends without 645 /// creating any new induction variables. To do this, it creates a new phi of 646 /// the wider type and redirects all users, either removing extends or inserting 647 /// truncs whenever we stop propagating the type. 648 class WidenIV { 649 // Parameters 650 PHINode *OrigPhi; 651 Type *WideType; 652 653 // Context 654 LoopInfo *LI; 655 Loop *L; 656 ScalarEvolution *SE; 657 DominatorTree *DT; 658 659 // Does the module have any calls to the llvm.experimental.guard intrinsic 660 // at all? If not we can avoid scanning instructions looking for guards. 661 bool HasGuards; 662 663 // Result 664 PHINode *WidePhi = nullptr; 665 Instruction *WideInc = nullptr; 666 const SCEV *WideIncExpr = nullptr; 667 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 668 669 SmallPtrSet<Instruction *,16> Widened; 670 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 671 672 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 673 674 // A map tracking the kind of extension used to widen each narrow IV 675 // and narrow IV user. 676 // Key: pointer to a narrow IV or IV user. 677 // Value: the kind of extension used to widen this Instruction. 678 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 679 680 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 681 682 // A map with control-dependent ranges for post increment IV uses. The key is 683 // a pair of IV def and a use of this def denoting the context. The value is 684 // a ConstantRange representing possible values of the def at the given 685 // context. 686 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 687 688 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 689 Instruction *UseI) { 690 DefUserPair Key(Def, UseI); 691 auto It = PostIncRangeInfos.find(Key); 692 return It == PostIncRangeInfos.end() 693 ? Optional<ConstantRange>(None) 694 : Optional<ConstantRange>(It->second); 695 } 696 697 void calculatePostIncRanges(PHINode *OrigPhi); 698 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 699 700 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 701 DefUserPair Key(Def, UseI); 702 auto It = PostIncRangeInfos.find(Key); 703 if (It == PostIncRangeInfos.end()) 704 PostIncRangeInfos.insert({Key, R}); 705 else 706 It->second = R.intersectWith(It->second); 707 } 708 709 public: 710 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 711 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 712 bool HasGuards) 713 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 714 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 715 HasGuards(HasGuards), DeadInsts(DI) { 716 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 717 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 718 } 719 720 PHINode *createWideIV(SCEVExpander &Rewriter); 721 722 protected: 723 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 724 Instruction *Use); 725 726 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 727 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 728 const SCEVAddRecExpr *WideAR); 729 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 730 731 ExtendKind getExtendKind(Instruction *I); 732 733 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 734 735 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 736 737 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 738 739 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 740 unsigned OpCode) const; 741 742 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 743 744 bool widenLoopCompare(NarrowIVDefUse DU); 745 bool widenWithVariantUse(NarrowIVDefUse DU); 746 void widenWithVariantUseCodegen(NarrowIVDefUse DU); 747 748 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 749 }; 750 751 } // end anonymous namespace 752 753 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 754 bool IsSigned, Instruction *Use) { 755 // Set the debug location and conservative insertion point. 756 IRBuilder<> Builder(Use); 757 // Hoist the insertion point into loop preheaders as far as possible. 758 for (const Loop *L = LI->getLoopFor(Use->getParent()); 759 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 760 L = L->getParentLoop()) 761 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 762 763 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 764 Builder.CreateZExt(NarrowOper, WideType); 765 } 766 767 /// Instantiate a wide operation to replace a narrow operation. This only needs 768 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 769 /// 0 for any operation we decide not to clone. 770 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 771 const SCEVAddRecExpr *WideAR) { 772 unsigned Opcode = DU.NarrowUse->getOpcode(); 773 switch (Opcode) { 774 default: 775 return nullptr; 776 case Instruction::Add: 777 case Instruction::Mul: 778 case Instruction::UDiv: 779 case Instruction::Sub: 780 return cloneArithmeticIVUser(DU, WideAR); 781 782 case Instruction::And: 783 case Instruction::Or: 784 case Instruction::Xor: 785 case Instruction::Shl: 786 case Instruction::LShr: 787 case Instruction::AShr: 788 return cloneBitwiseIVUser(DU); 789 } 790 } 791 792 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 793 Instruction *NarrowUse = DU.NarrowUse; 794 Instruction *NarrowDef = DU.NarrowDef; 795 Instruction *WideDef = DU.WideDef; 796 797 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 798 799 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 800 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 801 // invariant and will be folded or hoisted. If it actually comes from a 802 // widened IV, it should be removed during a future call to widenIVUse. 803 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 804 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 805 ? WideDef 806 : createExtendInst(NarrowUse->getOperand(0), WideType, 807 IsSigned, NarrowUse); 808 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 809 ? WideDef 810 : createExtendInst(NarrowUse->getOperand(1), WideType, 811 IsSigned, NarrowUse); 812 813 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 814 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 815 NarrowBO->getName()); 816 IRBuilder<> Builder(NarrowUse); 817 Builder.Insert(WideBO); 818 WideBO->copyIRFlags(NarrowBO); 819 return WideBO; 820 } 821 822 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 823 const SCEVAddRecExpr *WideAR) { 824 Instruction *NarrowUse = DU.NarrowUse; 825 Instruction *NarrowDef = DU.NarrowDef; 826 Instruction *WideDef = DU.WideDef; 827 828 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 829 830 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 831 832 // We're trying to find X such that 833 // 834 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 835 // 836 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 837 // and check using SCEV if any of them are correct. 838 839 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 840 // correct solution to X. 841 auto GuessNonIVOperand = [&](bool SignExt) { 842 const SCEV *WideLHS; 843 const SCEV *WideRHS; 844 845 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 846 if (SignExt) 847 return SE->getSignExtendExpr(S, Ty); 848 return SE->getZeroExtendExpr(S, Ty); 849 }; 850 851 if (IVOpIdx == 0) { 852 WideLHS = SE->getSCEV(WideDef); 853 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 854 WideRHS = GetExtend(NarrowRHS, WideType); 855 } else { 856 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 857 WideLHS = GetExtend(NarrowLHS, WideType); 858 WideRHS = SE->getSCEV(WideDef); 859 } 860 861 // WideUse is "WideDef `op.wide` X" as described in the comment. 862 const SCEV *WideUse = nullptr; 863 864 switch (NarrowUse->getOpcode()) { 865 default: 866 llvm_unreachable("No other possibility!"); 867 868 case Instruction::Add: 869 WideUse = SE->getAddExpr(WideLHS, WideRHS); 870 break; 871 872 case Instruction::Mul: 873 WideUse = SE->getMulExpr(WideLHS, WideRHS); 874 break; 875 876 case Instruction::UDiv: 877 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 878 break; 879 880 case Instruction::Sub: 881 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 882 break; 883 } 884 885 return WideUse == WideAR; 886 }; 887 888 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 889 if (!GuessNonIVOperand(SignExtend)) { 890 SignExtend = !SignExtend; 891 if (!GuessNonIVOperand(SignExtend)) 892 return nullptr; 893 } 894 895 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 896 ? WideDef 897 : createExtendInst(NarrowUse->getOperand(0), WideType, 898 SignExtend, NarrowUse); 899 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 900 ? WideDef 901 : createExtendInst(NarrowUse->getOperand(1), WideType, 902 SignExtend, NarrowUse); 903 904 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 905 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 906 NarrowBO->getName()); 907 908 IRBuilder<> Builder(NarrowUse); 909 Builder.Insert(WideBO); 910 WideBO->copyIRFlags(NarrowBO); 911 return WideBO; 912 } 913 914 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 915 auto It = ExtendKindMap.find(I); 916 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 917 return It->second; 918 } 919 920 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 921 unsigned OpCode) const { 922 if (OpCode == Instruction::Add) 923 return SE->getAddExpr(LHS, RHS); 924 if (OpCode == Instruction::Sub) 925 return SE->getMinusSCEV(LHS, RHS); 926 if (OpCode == Instruction::Mul) 927 return SE->getMulExpr(LHS, RHS); 928 929 llvm_unreachable("Unsupported opcode."); 930 } 931 932 /// No-wrap operations can transfer sign extension of their result to their 933 /// operands. Generate the SCEV value for the widened operation without 934 /// actually modifying the IR yet. If the expression after extending the 935 /// operands is an AddRec for this loop, return the AddRec and the kind of 936 /// extension used. 937 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 938 // Handle the common case of add<nsw/nuw> 939 const unsigned OpCode = DU.NarrowUse->getOpcode(); 940 // Only Add/Sub/Mul instructions supported yet. 941 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 942 OpCode != Instruction::Mul) 943 return {nullptr, Unknown}; 944 945 // One operand (NarrowDef) has already been extended to WideDef. Now determine 946 // if extending the other will lead to a recurrence. 947 const unsigned ExtendOperIdx = 948 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 949 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 950 951 const SCEV *ExtendOperExpr = nullptr; 952 const OverflowingBinaryOperator *OBO = 953 cast<OverflowingBinaryOperator>(DU.NarrowUse); 954 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 955 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 956 ExtendOperExpr = SE->getSignExtendExpr( 957 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 958 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 959 ExtendOperExpr = SE->getZeroExtendExpr( 960 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 961 else 962 return {nullptr, Unknown}; 963 964 // When creating this SCEV expr, don't apply the current operations NSW or NUW 965 // flags. This instruction may be guarded by control flow that the no-wrap 966 // behavior depends on. Non-control-equivalent instructions can be mapped to 967 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 968 // semantics to those operations. 969 const SCEV *lhs = SE->getSCEV(DU.WideDef); 970 const SCEV *rhs = ExtendOperExpr; 971 972 // Let's swap operands to the initial order for the case of non-commutative 973 // operations, like SUB. See PR21014. 974 if (ExtendOperIdx == 0) 975 std::swap(lhs, rhs); 976 const SCEVAddRecExpr *AddRec = 977 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 978 979 if (!AddRec || AddRec->getLoop() != L) 980 return {nullptr, Unknown}; 981 982 return {AddRec, ExtKind}; 983 } 984 985 /// Is this instruction potentially interesting for further simplification after 986 /// widening it's type? In other words, can the extend be safely hoisted out of 987 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 988 /// so, return the extended recurrence and the kind of extension used. Otherwise 989 /// return {nullptr, Unknown}. 990 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 991 if (!SE->isSCEVable(DU.NarrowUse->getType())) 992 return {nullptr, Unknown}; 993 994 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 995 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 996 SE->getTypeSizeInBits(WideType)) { 997 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 998 // index. So don't follow this use. 999 return {nullptr, Unknown}; 1000 } 1001 1002 const SCEV *WideExpr; 1003 ExtendKind ExtKind; 1004 if (DU.NeverNegative) { 1005 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1006 if (isa<SCEVAddRecExpr>(WideExpr)) 1007 ExtKind = SignExtended; 1008 else { 1009 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1010 ExtKind = ZeroExtended; 1011 } 1012 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1013 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1014 ExtKind = SignExtended; 1015 } else { 1016 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1017 ExtKind = ZeroExtended; 1018 } 1019 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1020 if (!AddRec || AddRec->getLoop() != L) 1021 return {nullptr, Unknown}; 1022 return {AddRec, ExtKind}; 1023 } 1024 1025 /// This IV user cannot be widened. Replace this use of the original narrow IV 1026 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1027 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1028 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1029 if (!InsertPt) 1030 return; 1031 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1032 << *DU.NarrowUse << "\n"); 1033 IRBuilder<> Builder(InsertPt); 1034 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1035 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1036 } 1037 1038 /// If the narrow use is a compare instruction, then widen the compare 1039 // (and possibly the other operand). The extend operation is hoisted into the 1040 // loop preheader as far as possible. 1041 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1042 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1043 if (!Cmp) 1044 return false; 1045 1046 // We can legally widen the comparison in the following two cases: 1047 // 1048 // - The signedness of the IV extension and comparison match 1049 // 1050 // - The narrow IV is always positive (and thus its sign extension is equal 1051 // to its zero extension). For instance, let's say we're zero extending 1052 // %narrow for the following use 1053 // 1054 // icmp slt i32 %narrow, %val ... (A) 1055 // 1056 // and %narrow is always positive. Then 1057 // 1058 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1059 // == icmp slt i32 zext(%narrow), sext(%val) 1060 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1061 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1062 return false; 1063 1064 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1065 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1066 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1067 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1068 1069 // Widen the compare instruction. 1070 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1071 if (!InsertPt) 1072 return false; 1073 IRBuilder<> Builder(InsertPt); 1074 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1075 1076 // Widen the other operand of the compare, if necessary. 1077 if (CastWidth < IVWidth) { 1078 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1079 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1080 } 1081 return true; 1082 } 1083 1084 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this 1085 // will not work when: 1086 // 1) SCEV traces back to an instruction inside the loop that SCEV can not 1087 // expand, eg. add %indvar, (load %addr) 1088 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant 1089 // While SCEV fails to avoid trunc, we can still try to use instruction 1090 // combining approach to prove trunc is not required. This can be further 1091 // extended with other instruction combining checks, but for now we handle the 1092 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") 1093 // 1094 // Src: 1095 // %c = sub nsw %b, %indvar 1096 // %d = sext %c to i64 1097 // Dst: 1098 // %indvar.ext1 = sext %indvar to i64 1099 // %m = sext %b to i64 1100 // %d = sub nsw i64 %m, %indvar.ext1 1101 // Therefore, as long as the result of add/sub/mul is extended to wide type, no 1102 // trunc is required regardless of how %b is generated. This pattern is common 1103 // when calculating address in 64 bit architecture 1104 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { 1105 Instruction *NarrowUse = DU.NarrowUse; 1106 Instruction *NarrowDef = DU.NarrowDef; 1107 Instruction *WideDef = DU.WideDef; 1108 1109 // Handle the common case of add<nsw/nuw> 1110 const unsigned OpCode = NarrowUse->getOpcode(); 1111 // Only Add/Sub/Mul instructions are supported. 1112 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1113 OpCode != Instruction::Mul) 1114 return false; 1115 1116 // The operand that is not defined by NarrowDef of DU. Let's call it the 1117 // other operand. 1118 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1119 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1120 "bad DU"); 1121 1122 const OverflowingBinaryOperator *OBO = 1123 cast<OverflowingBinaryOperator>(NarrowUse); 1124 ExtendKind ExtKind = getExtendKind(NarrowDef); 1125 bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); 1126 bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); 1127 if (!CanSignExtend && !CanZeroExtend) 1128 return false; 1129 1130 // Verifying that Defining operand is an AddRec 1131 const SCEV *Op1 = SE->getSCEV(WideDef); 1132 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1133 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1134 return false; 1135 1136 if (ExtKind == SignExtended) { 1137 for (Use &U : NarrowUse->uses()) { 1138 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1139 if (!User || User->getType() != WideType) 1140 return false; 1141 } 1142 } else { // ExtKind == ZeroExtended 1143 for (Use &U : NarrowUse->uses()) { 1144 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1145 if (!User || User->getType() != WideType) 1146 return false; 1147 } 1148 } 1149 1150 return true; 1151 } 1152 1153 /// Special Case for widening with loop variant (see 1154 /// WidenIV::widenWithVariant). This is the code generation part. 1155 void WidenIV::widenWithVariantUseCodegen(NarrowIVDefUse DU) { 1156 Instruction *NarrowUse = DU.NarrowUse; 1157 Instruction *NarrowDef = DU.NarrowDef; 1158 Instruction *WideDef = DU.WideDef; 1159 1160 ExtendKind ExtKind = getExtendKind(NarrowDef); 1161 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1162 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1163 1164 // Generating a widening use instruction. 1165 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1166 ? WideDef 1167 : createExtendInst(NarrowUse->getOperand(0), WideType, 1168 ExtKind, NarrowUse); 1169 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1170 ? WideDef 1171 : createExtendInst(NarrowUse->getOperand(1), WideType, 1172 ExtKind, NarrowUse); 1173 1174 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1175 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1176 NarrowBO->getName()); 1177 IRBuilder<> Builder(NarrowUse); 1178 Builder.Insert(WideBO); 1179 WideBO->copyIRFlags(NarrowBO); 1180 ExtendKindMap[NarrowUse] = ExtKind; 1181 1182 for (Use &U : NarrowUse->uses()) { 1183 Instruction *User = nullptr; 1184 if (ExtKind == SignExtended) 1185 User = cast<SExtInst>(U.getUser()); 1186 else 1187 User = cast<ZExtInst>(U.getUser()); 1188 assert(User->getType() == WideType && "Checked before!"); 1189 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1190 << *WideBO << "\n"); 1191 ++NumElimExt; 1192 User->replaceAllUsesWith(WideBO); 1193 DeadInsts.emplace_back(User); 1194 } 1195 } 1196 1197 /// Determine whether an individual user of the narrow IV can be widened. If so, 1198 /// return the wide clone of the user. 1199 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1200 assert(ExtendKindMap.count(DU.NarrowDef) && 1201 "Should already know the kind of extension used to widen NarrowDef"); 1202 1203 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1204 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1205 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1206 // For LCSSA phis, sink the truncate outside the loop. 1207 // After SimplifyCFG most loop exit targets have a single predecessor. 1208 // Otherwise fall back to a truncate within the loop. 1209 if (UsePhi->getNumOperands() != 1) 1210 truncateIVUse(DU, DT, LI); 1211 else { 1212 // Widening the PHI requires us to insert a trunc. The logical place 1213 // for this trunc is in the same BB as the PHI. This is not possible if 1214 // the BB is terminated by a catchswitch. 1215 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1216 return nullptr; 1217 1218 PHINode *WidePhi = 1219 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1220 UsePhi); 1221 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1222 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1223 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1224 UsePhi->replaceAllUsesWith(Trunc); 1225 DeadInsts.emplace_back(UsePhi); 1226 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1227 << *WidePhi << "\n"); 1228 } 1229 return nullptr; 1230 } 1231 } 1232 1233 // This narrow use can be widened by a sext if it's non-negative or its narrow 1234 // def was widended by a sext. Same for zext. 1235 auto canWidenBySExt = [&]() { 1236 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1237 }; 1238 auto canWidenByZExt = [&]() { 1239 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1240 }; 1241 1242 // Our raison d'etre! Eliminate sign and zero extension. 1243 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1244 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1245 Value *NewDef = DU.WideDef; 1246 if (DU.NarrowUse->getType() != WideType) { 1247 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1248 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1249 if (CastWidth < IVWidth) { 1250 // The cast isn't as wide as the IV, so insert a Trunc. 1251 IRBuilder<> Builder(DU.NarrowUse); 1252 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1253 } 1254 else { 1255 // A wider extend was hidden behind a narrower one. This may induce 1256 // another round of IV widening in which the intermediate IV becomes 1257 // dead. It should be very rare. 1258 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1259 << " not wide enough to subsume " << *DU.NarrowUse 1260 << "\n"); 1261 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1262 NewDef = DU.NarrowUse; 1263 } 1264 } 1265 if (NewDef != DU.NarrowUse) { 1266 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1267 << " replaced by " << *DU.WideDef << "\n"); 1268 ++NumElimExt; 1269 DU.NarrowUse->replaceAllUsesWith(NewDef); 1270 DeadInsts.emplace_back(DU.NarrowUse); 1271 } 1272 // Now that the extend is gone, we want to expose it's uses for potential 1273 // further simplification. We don't need to directly inform SimplifyIVUsers 1274 // of the new users, because their parent IV will be processed later as a 1275 // new loop phi. If we preserved IVUsers analysis, we would also want to 1276 // push the uses of WideDef here. 1277 1278 // No further widening is needed. The deceased [sz]ext had done it for us. 1279 return nullptr; 1280 } 1281 1282 // Does this user itself evaluate to a recurrence after widening? 1283 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1284 if (!WideAddRec.first) 1285 WideAddRec = getWideRecurrence(DU); 1286 1287 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1288 if (!WideAddRec.first) { 1289 // If use is a loop condition, try to promote the condition instead of 1290 // truncating the IV first. 1291 if (widenLoopCompare(DU)) 1292 return nullptr; 1293 1294 // We are here about to generate a truncate instruction that may hurt 1295 // performance because the scalar evolution expression computed earlier 1296 // in WideAddRec.first does not indicate a polynomial induction expression. 1297 // In that case, look at the operands of the use instruction to determine 1298 // if we can still widen the use instead of truncating its operand. 1299 if (widenWithVariantUse(DU)) { 1300 widenWithVariantUseCodegen(DU); 1301 return nullptr; 1302 } 1303 1304 // This user does not evaluate to a recurrence after widening, so don't 1305 // follow it. Instead insert a Trunc to kill off the original use, 1306 // eventually isolating the original narrow IV so it can be removed. 1307 truncateIVUse(DU, DT, LI); 1308 return nullptr; 1309 } 1310 // Assume block terminators cannot evaluate to a recurrence. We can't to 1311 // insert a Trunc after a terminator if there happens to be a critical edge. 1312 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1313 "SCEV is not expected to evaluate a block terminator"); 1314 1315 // Reuse the IV increment that SCEVExpander created as long as it dominates 1316 // NarrowUse. 1317 Instruction *WideUse = nullptr; 1318 if (WideAddRec.first == WideIncExpr && 1319 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1320 WideUse = WideInc; 1321 else { 1322 WideUse = cloneIVUser(DU, WideAddRec.first); 1323 if (!WideUse) 1324 return nullptr; 1325 } 1326 // Evaluation of WideAddRec ensured that the narrow expression could be 1327 // extended outside the loop without overflow. This suggests that the wide use 1328 // evaluates to the same expression as the extended narrow use, but doesn't 1329 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1330 // where it fails, we simply throw away the newly created wide use. 1331 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1332 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1333 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1334 << "\n"); 1335 DeadInsts.emplace_back(WideUse); 1336 return nullptr; 1337 } 1338 1339 // if we reached this point then we are going to replace 1340 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1341 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1342 1343 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1344 // Returning WideUse pushes it on the worklist. 1345 return WideUse; 1346 } 1347 1348 /// Add eligible users of NarrowDef to NarrowIVUsers. 1349 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1350 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1351 bool NonNegativeDef = 1352 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1353 SE->getZero(NarrowSCEV->getType())); 1354 for (User *U : NarrowDef->users()) { 1355 Instruction *NarrowUser = cast<Instruction>(U); 1356 1357 // Handle data flow merges and bizarre phi cycles. 1358 if (!Widened.insert(NarrowUser).second) 1359 continue; 1360 1361 bool NonNegativeUse = false; 1362 if (!NonNegativeDef) { 1363 // We might have a control-dependent range information for this context. 1364 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1365 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1366 } 1367 1368 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1369 NonNegativeDef || NonNegativeUse); 1370 } 1371 } 1372 1373 /// Process a single induction variable. First use the SCEVExpander to create a 1374 /// wide induction variable that evaluates to the same recurrence as the 1375 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1376 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1377 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1378 /// 1379 /// It would be simpler to delete uses as they are processed, but we must avoid 1380 /// invalidating SCEV expressions. 1381 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1382 // Bail if we disallowed widening. 1383 if(!AllowIVWidening) 1384 return nullptr; 1385 1386 // Is this phi an induction variable? 1387 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1388 if (!AddRec) 1389 return nullptr; 1390 1391 // Widen the induction variable expression. 1392 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1393 ? SE->getSignExtendExpr(AddRec, WideType) 1394 : SE->getZeroExtendExpr(AddRec, WideType); 1395 1396 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1397 "Expect the new IV expression to preserve its type"); 1398 1399 // Can the IV be extended outside the loop without overflow? 1400 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1401 if (!AddRec || AddRec->getLoop() != L) 1402 return nullptr; 1403 1404 // An AddRec must have loop-invariant operands. Since this AddRec is 1405 // materialized by a loop header phi, the expression cannot have any post-loop 1406 // operands, so they must dominate the loop header. 1407 assert( 1408 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1409 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1410 "Loop header phi recurrence inputs do not dominate the loop"); 1411 1412 // Iterate over IV uses (including transitive ones) looking for IV increments 1413 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1414 // the increment calculate control-dependent range information basing on 1415 // dominating conditions inside of the loop (e.g. a range check inside of the 1416 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1417 // 1418 // Control-dependent range information is later used to prove that a narrow 1419 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1420 // this on demand because when pushNarrowIVUsers needs this information some 1421 // of the dominating conditions might be already widened. 1422 if (UsePostIncrementRanges) 1423 calculatePostIncRanges(OrigPhi); 1424 1425 // The rewriter provides a value for the desired IV expression. This may 1426 // either find an existing phi or materialize a new one. Either way, we 1427 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1428 // of the phi-SCC dominates the loop entry. 1429 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1430 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); 1431 // If the wide phi is not a phi node, for example a cast node, like bitcast, 1432 // inttoptr, ptrtoint, just skip for now. 1433 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { 1434 // if the cast node is an inserted instruction without any user, we should 1435 // remove it to make sure the pass don't touch the function as we can not 1436 // wide the phi. 1437 if (ExpandInst->hasNUses(0) && 1438 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) 1439 DeadInsts.emplace_back(ExpandInst); 1440 return nullptr; 1441 } 1442 1443 // Remembering the WideIV increment generated by SCEVExpander allows 1444 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1445 // employ a general reuse mechanism because the call above is the only call to 1446 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1447 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1448 WideInc = 1449 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1450 WideIncExpr = SE->getSCEV(WideInc); 1451 // Propagate the debug location associated with the original loop increment 1452 // to the new (widened) increment. 1453 auto *OrigInc = 1454 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1455 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1456 } 1457 1458 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1459 ++NumWidened; 1460 1461 // Traverse the def-use chain using a worklist starting at the original IV. 1462 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1463 1464 Widened.insert(OrigPhi); 1465 pushNarrowIVUsers(OrigPhi, WidePhi); 1466 1467 while (!NarrowIVUsers.empty()) { 1468 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1469 1470 // Process a def-use edge. This may replace the use, so don't hold a 1471 // use_iterator across it. 1472 Instruction *WideUse = widenIVUse(DU, Rewriter); 1473 1474 // Follow all def-use edges from the previous narrow use. 1475 if (WideUse) 1476 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1477 1478 // widenIVUse may have removed the def-use edge. 1479 if (DU.NarrowDef->use_empty()) 1480 DeadInsts.emplace_back(DU.NarrowDef); 1481 } 1482 1483 // Attach any debug information to the new PHI. 1484 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1485 1486 return WidePhi; 1487 } 1488 1489 /// Calculates control-dependent range for the given def at the given context 1490 /// by looking at dominating conditions inside of the loop 1491 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1492 Instruction *NarrowUser) { 1493 using namespace llvm::PatternMatch; 1494 1495 Value *NarrowDefLHS; 1496 const APInt *NarrowDefRHS; 1497 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1498 m_APInt(NarrowDefRHS))) || 1499 !NarrowDefRHS->isNonNegative()) 1500 return; 1501 1502 auto UpdateRangeFromCondition = [&] (Value *Condition, 1503 bool TrueDest) { 1504 CmpInst::Predicate Pred; 1505 Value *CmpRHS; 1506 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1507 m_Value(CmpRHS)))) 1508 return; 1509 1510 CmpInst::Predicate P = 1511 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1512 1513 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1514 auto CmpConstrainedLHSRange = 1515 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1516 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1517 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1518 1519 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1520 }; 1521 1522 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1523 if (!HasGuards) 1524 return; 1525 1526 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1527 Ctx->getParent()->rend())) { 1528 Value *C = nullptr; 1529 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1530 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1531 } 1532 }; 1533 1534 UpdateRangeFromGuards(NarrowUser); 1535 1536 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1537 // If NarrowUserBB is statically unreachable asking dominator queries may 1538 // yield surprising results. (e.g. the block may not have a dom tree node) 1539 if (!DT->isReachableFromEntry(NarrowUserBB)) 1540 return; 1541 1542 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1543 L->contains(DTB->getBlock()); 1544 DTB = DTB->getIDom()) { 1545 auto *BB = DTB->getBlock(); 1546 auto *TI = BB->getTerminator(); 1547 UpdateRangeFromGuards(TI); 1548 1549 auto *BI = dyn_cast<BranchInst>(TI); 1550 if (!BI || !BI->isConditional()) 1551 continue; 1552 1553 auto *TrueSuccessor = BI->getSuccessor(0); 1554 auto *FalseSuccessor = BI->getSuccessor(1); 1555 1556 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1557 return BBE.isSingleEdge() && 1558 DT->dominates(BBE, NarrowUser->getParent()); 1559 }; 1560 1561 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1562 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1563 1564 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1565 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1566 } 1567 } 1568 1569 /// Calculates PostIncRangeInfos map for the given IV 1570 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1571 SmallPtrSet<Instruction *, 16> Visited; 1572 SmallVector<Instruction *, 6> Worklist; 1573 Worklist.push_back(OrigPhi); 1574 Visited.insert(OrigPhi); 1575 1576 while (!Worklist.empty()) { 1577 Instruction *NarrowDef = Worklist.pop_back_val(); 1578 1579 for (Use &U : NarrowDef->uses()) { 1580 auto *NarrowUser = cast<Instruction>(U.getUser()); 1581 1582 // Don't go looking outside the current loop. 1583 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1584 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1585 continue; 1586 1587 if (!Visited.insert(NarrowUser).second) 1588 continue; 1589 1590 Worklist.push_back(NarrowUser); 1591 1592 calculatePostIncRange(NarrowDef, NarrowUser); 1593 } 1594 } 1595 } 1596 1597 //===----------------------------------------------------------------------===// 1598 // Live IV Reduction - Minimize IVs live across the loop. 1599 //===----------------------------------------------------------------------===// 1600 1601 //===----------------------------------------------------------------------===// 1602 // Simplification of IV users based on SCEV evaluation. 1603 //===----------------------------------------------------------------------===// 1604 1605 namespace { 1606 1607 class IndVarSimplifyVisitor : public IVVisitor { 1608 ScalarEvolution *SE; 1609 const TargetTransformInfo *TTI; 1610 PHINode *IVPhi; 1611 1612 public: 1613 WideIVInfo WI; 1614 1615 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1616 const TargetTransformInfo *TTI, 1617 const DominatorTree *DTree) 1618 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1619 DT = DTree; 1620 WI.NarrowIV = IVPhi; 1621 } 1622 1623 // Implement the interface used by simplifyUsersOfIV. 1624 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1625 }; 1626 1627 } // end anonymous namespace 1628 1629 /// Iteratively perform simplification on a worklist of IV users. Each 1630 /// successive simplification may push more users which may themselves be 1631 /// candidates for simplification. 1632 /// 1633 /// Sign/Zero extend elimination is interleaved with IV simplification. 1634 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1635 SCEVExpander &Rewriter, 1636 LoopInfo *LI) { 1637 SmallVector<WideIVInfo, 8> WideIVs; 1638 1639 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1640 Intrinsic::getName(Intrinsic::experimental_guard)); 1641 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1642 1643 SmallVector<PHINode*, 8> LoopPhis; 1644 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1645 LoopPhis.push_back(cast<PHINode>(I)); 1646 } 1647 // Each round of simplification iterates through the SimplifyIVUsers worklist 1648 // for all current phis, then determines whether any IVs can be 1649 // widened. Widening adds new phis to LoopPhis, inducing another round of 1650 // simplification on the wide IVs. 1651 bool Changed = false; 1652 while (!LoopPhis.empty()) { 1653 // Evaluate as many IV expressions as possible before widening any IVs. This 1654 // forces SCEV to set no-wrap flags before evaluating sign/zero 1655 // extension. The first time SCEV attempts to normalize sign/zero extension, 1656 // the result becomes final. So for the most predictable results, we delay 1657 // evaluation of sign/zero extend evaluation until needed, and avoid running 1658 // other SCEV based analysis prior to simplifyAndExtend. 1659 do { 1660 PHINode *CurrIV = LoopPhis.pop_back_val(); 1661 1662 // Information about sign/zero extensions of CurrIV. 1663 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1664 1665 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 1666 &Visitor); 1667 1668 if (Visitor.WI.WidestNativeType) { 1669 WideIVs.push_back(Visitor.WI); 1670 } 1671 } while(!LoopPhis.empty()); 1672 1673 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1674 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1675 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1676 Changed = true; 1677 LoopPhis.push_back(WidePhi); 1678 } 1679 } 1680 } 1681 return Changed; 1682 } 1683 1684 //===----------------------------------------------------------------------===// 1685 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1686 //===----------------------------------------------------------------------===// 1687 1688 /// Given an Value which is hoped to be part of an add recurance in the given 1689 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1690 /// that this is less general than SCEVs AddRec checking. 1691 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1692 Instruction *IncI = dyn_cast<Instruction>(IncV); 1693 if (!IncI) 1694 return nullptr; 1695 1696 switch (IncI->getOpcode()) { 1697 case Instruction::Add: 1698 case Instruction::Sub: 1699 break; 1700 case Instruction::GetElementPtr: 1701 // An IV counter must preserve its type. 1702 if (IncI->getNumOperands() == 2) 1703 break; 1704 LLVM_FALLTHROUGH; 1705 default: 1706 return nullptr; 1707 } 1708 1709 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1710 if (Phi && Phi->getParent() == L->getHeader()) { 1711 if (L->isLoopInvariant(IncI->getOperand(1))) 1712 return Phi; 1713 return nullptr; 1714 } 1715 if (IncI->getOpcode() == Instruction::GetElementPtr) 1716 return nullptr; 1717 1718 // Allow add/sub to be commuted. 1719 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1720 if (Phi && Phi->getParent() == L->getHeader()) { 1721 if (L->isLoopInvariant(IncI->getOperand(0))) 1722 return Phi; 1723 } 1724 return nullptr; 1725 } 1726 1727 /// Whether the current loop exit test is based on this value. Currently this 1728 /// is limited to a direct use in the loop condition. 1729 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1730 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1731 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1732 // TODO: Allow non-icmp loop test. 1733 if (!ICmp) 1734 return false; 1735 1736 // TODO: Allow indirect use. 1737 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1738 } 1739 1740 /// linearFunctionTestReplace policy. Return true unless we can show that the 1741 /// current exit test is already sufficiently canonical. 1742 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1743 assert(L->getLoopLatch() && "Must be in simplified form"); 1744 1745 // Avoid converting a constant or loop invariant test back to a runtime 1746 // test. This is critical for when SCEV's cached ExitCount is less precise 1747 // than the current IR (such as after we've proven a particular exit is 1748 // actually dead and thus the BE count never reaches our ExitCount.) 1749 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1750 if (L->isLoopInvariant(BI->getCondition())) 1751 return false; 1752 1753 // Do LFTR to simplify the exit condition to an ICMP. 1754 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1755 if (!Cond) 1756 return true; 1757 1758 // Do LFTR to simplify the exit ICMP to EQ/NE 1759 ICmpInst::Predicate Pred = Cond->getPredicate(); 1760 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1761 return true; 1762 1763 // Look for a loop invariant RHS 1764 Value *LHS = Cond->getOperand(0); 1765 Value *RHS = Cond->getOperand(1); 1766 if (!L->isLoopInvariant(RHS)) { 1767 if (!L->isLoopInvariant(LHS)) 1768 return true; 1769 std::swap(LHS, RHS); 1770 } 1771 // Look for a simple IV counter LHS 1772 PHINode *Phi = dyn_cast<PHINode>(LHS); 1773 if (!Phi) 1774 Phi = getLoopPhiForCounter(LHS, L); 1775 1776 if (!Phi) 1777 return true; 1778 1779 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1780 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1781 if (Idx < 0) 1782 return true; 1783 1784 // Do LFTR if the exit condition's IV is *not* a simple counter. 1785 Value *IncV = Phi->getIncomingValue(Idx); 1786 return Phi != getLoopPhiForCounter(IncV, L); 1787 } 1788 1789 /// Return true if undefined behavior would provable be executed on the path to 1790 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1791 /// anything about whether OnPathTo is actually executed or whether Root is 1792 /// actually poison. This can be used to assess whether a new use of Root can 1793 /// be added at a location which is control equivalent with OnPathTo (such as 1794 /// immediately before it) without introducing UB which didn't previously 1795 /// exist. Note that a false result conveys no information. 1796 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1797 Instruction *OnPathTo, 1798 DominatorTree *DT) { 1799 // Basic approach is to assume Root is poison, propagate poison forward 1800 // through all users we can easily track, and then check whether any of those 1801 // users are provable UB and must execute before out exiting block might 1802 // exit. 1803 1804 // The set of all recursive users we've visited (which are assumed to all be 1805 // poison because of said visit) 1806 SmallSet<const Value *, 16> KnownPoison; 1807 SmallVector<const Instruction*, 16> Worklist; 1808 Worklist.push_back(Root); 1809 while (!Worklist.empty()) { 1810 const Instruction *I = Worklist.pop_back_val(); 1811 1812 // If we know this must trigger UB on a path leading our target. 1813 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1814 return true; 1815 1816 // If we can't analyze propagation through this instruction, just skip it 1817 // and transitive users. Safe as false is a conservative result. 1818 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 1819 continue; 1820 1821 if (KnownPoison.insert(I).second) 1822 for (const User *User : I->users()) 1823 Worklist.push_back(cast<Instruction>(User)); 1824 } 1825 1826 // Might be non-UB, or might have a path we couldn't prove must execute on 1827 // way to exiting bb. 1828 return false; 1829 } 1830 1831 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1832 /// down to checking that all operands are constant and listing instructions 1833 /// that may hide undef. 1834 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1835 unsigned Depth) { 1836 if (isa<Constant>(V)) 1837 return !isa<UndefValue>(V); 1838 1839 if (Depth >= 6) 1840 return false; 1841 1842 // Conservatively handle non-constant non-instructions. For example, Arguments 1843 // may be undef. 1844 Instruction *I = dyn_cast<Instruction>(V); 1845 if (!I) 1846 return false; 1847 1848 // Load and return values may be undef. 1849 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1850 return false; 1851 1852 // Optimistically handle other instructions. 1853 for (Value *Op : I->operands()) { 1854 if (!Visited.insert(Op).second) 1855 continue; 1856 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1857 return false; 1858 } 1859 return true; 1860 } 1861 1862 /// Return true if the given value is concrete. We must prove that undef can 1863 /// never reach it. 1864 /// 1865 /// TODO: If we decide that this is a good approach to checking for undef, we 1866 /// may factor it into a common location. 1867 static bool hasConcreteDef(Value *V) { 1868 SmallPtrSet<Value*, 8> Visited; 1869 Visited.insert(V); 1870 return hasConcreteDefImpl(V, Visited, 0); 1871 } 1872 1873 /// Return true if this IV has any uses other than the (soon to be rewritten) 1874 /// loop exit test. 1875 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1876 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1877 Value *IncV = Phi->getIncomingValue(LatchIdx); 1878 1879 for (User *U : Phi->users()) 1880 if (U != Cond && U != IncV) return false; 1881 1882 for (User *U : IncV->users()) 1883 if (U != Cond && U != Phi) return false; 1884 return true; 1885 } 1886 1887 /// Return true if the given phi is a "counter" in L. A counter is an 1888 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1889 /// step of 1. Note that L must have exactly one latch. 1890 static bool isLoopCounter(PHINode* Phi, Loop *L, 1891 ScalarEvolution *SE) { 1892 assert(Phi->getParent() == L->getHeader()); 1893 assert(L->getLoopLatch()); 1894 1895 if (!SE->isSCEVable(Phi->getType())) 1896 return false; 1897 1898 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1899 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1900 return false; 1901 1902 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1903 if (!Step || !Step->isOne()) 1904 return false; 1905 1906 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1907 Value *IncV = Phi->getIncomingValue(LatchIdx); 1908 return (getLoopPhiForCounter(IncV, L) == Phi); 1909 } 1910 1911 /// Search the loop header for a loop counter (anadd rec w/step of one) 1912 /// suitable for use by LFTR. If multiple counters are available, select the 1913 /// "best" one based profitable heuristics. 1914 /// 1915 /// BECount may be an i8* pointer type. The pointer difference is already 1916 /// valid count without scaling the address stride, so it remains a pointer 1917 /// expression as far as SCEV is concerned. 1918 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1919 const SCEV *BECount, 1920 ScalarEvolution *SE, DominatorTree *DT) { 1921 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1922 1923 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1924 1925 // Loop over all of the PHI nodes, looking for a simple counter. 1926 PHINode *BestPhi = nullptr; 1927 const SCEV *BestInit = nullptr; 1928 BasicBlock *LatchBlock = L->getLoopLatch(); 1929 assert(LatchBlock && "Must be in simplified form"); 1930 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1931 1932 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1933 PHINode *Phi = cast<PHINode>(I); 1934 if (!isLoopCounter(Phi, L, SE)) 1935 continue; 1936 1937 // Avoid comparing an integer IV against a pointer Limit. 1938 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1939 continue; 1940 1941 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1942 1943 // AR may be a pointer type, while BECount is an integer type. 1944 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1945 // AR may not be a narrower type, or we may never exit. 1946 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1947 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1948 continue; 1949 1950 // Avoid reusing a potentially undef value to compute other values that may 1951 // have originally had a concrete definition. 1952 if (!hasConcreteDef(Phi)) { 1953 // We explicitly allow unknown phis as long as they are already used by 1954 // the loop exit test. This is legal since performing LFTR could not 1955 // increase the number of undef users. 1956 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1957 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1958 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1959 continue; 1960 } 1961 1962 // Avoid introducing undefined behavior due to poison which didn't exist in 1963 // the original program. (Annoyingly, the rules for poison and undef 1964 // propagation are distinct, so this does NOT cover the undef case above.) 1965 // We have to ensure that we don't introduce UB by introducing a use on an 1966 // iteration where said IV produces poison. Our strategy here differs for 1967 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1968 // see code in linearFunctionTestReplace. For pointers, we restrict 1969 // transforms as there is no good way to reinfer inbounds once lost. 1970 if (!Phi->getType()->isIntegerTy() && 1971 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1972 continue; 1973 1974 const SCEV *Init = AR->getStart(); 1975 1976 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1977 // Don't force a live loop counter if another IV can be used. 1978 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1979 continue; 1980 1981 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1982 // also prefers integer to pointer IVs. 1983 if (BestInit->isZero() != Init->isZero()) { 1984 if (BestInit->isZero()) 1985 continue; 1986 } 1987 // If two IVs both count from zero or both count from nonzero then the 1988 // narrower is likely a dead phi that has been widened. Use the wider phi 1989 // to allow the other to be eliminated. 1990 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1991 continue; 1992 } 1993 BestPhi = Phi; 1994 BestInit = Init; 1995 } 1996 return BestPhi; 1997 } 1998 1999 /// Insert an IR expression which computes the value held by the IV IndVar 2000 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2001 /// is taken ExitCount times. 2002 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2003 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2004 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2005 assert(isLoopCounter(IndVar, L, SE)); 2006 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2007 const SCEV *IVInit = AR->getStart(); 2008 2009 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2010 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2011 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2012 // the existing GEPs whenever possible. 2013 if (IndVar->getType()->isPointerTy() && 2014 !ExitCount->getType()->isPointerTy()) { 2015 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2016 // signed value. ExitCount on the other hand represents the loop trip count, 2017 // which is an unsigned value. FindLoopCounter only allows induction 2018 // variables that have a positive unit stride of one. This means we don't 2019 // have to handle the case of negative offsets (yet) and just need to zero 2020 // extend ExitCount. 2021 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2022 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2023 if (UsePostInc) 2024 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2025 2026 // Expand the code for the iteration count. 2027 assert(SE->isLoopInvariant(IVOffset, L) && 2028 "Computed iteration count is not loop invariant!"); 2029 2030 // We could handle pointer IVs other than i8*, but we need to compensate for 2031 // gep index scaling. 2032 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2033 cast<PointerType>(IndVar->getType()) 2034 ->getElementType())->isOne() && 2035 "unit stride pointer IV must be i8*"); 2036 2037 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2038 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2039 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2040 } else { 2041 // In any other case, convert both IVInit and ExitCount to integers before 2042 // comparing. This may result in SCEV expansion of pointers, but in practice 2043 // SCEV will fold the pointer arithmetic away as such: 2044 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2045 // 2046 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2047 // for simple memset-style loops. 2048 // 2049 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2050 // were generated on top of case #2, which is not expected. 2051 2052 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2053 // For unit stride, IVCount = Start + ExitCount with 2's complement 2054 // overflow. 2055 2056 // For integer IVs, truncate the IV before computing IVInit + BECount, 2057 // unless we know apriori that the limit must be a constant when evaluated 2058 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2059 // of the IV in the loop over a (potentially) expensive expansion of the 2060 // widened exit count add(zext(add)) expression. 2061 if (SE->getTypeSizeInBits(IVInit->getType()) 2062 > SE->getTypeSizeInBits(ExitCount->getType())) { 2063 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2064 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2065 else 2066 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2067 } 2068 2069 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2070 2071 if (UsePostInc) 2072 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2073 2074 // Expand the code for the iteration count. 2075 assert(SE->isLoopInvariant(IVLimit, L) && 2076 "Computed iteration count is not loop invariant!"); 2077 // Ensure that we generate the same type as IndVar, or a smaller integer 2078 // type. In the presence of null pointer values, we have an integer type 2079 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2080 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2081 IndVar->getType() : ExitCount->getType(); 2082 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2083 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2084 } 2085 } 2086 2087 /// This method rewrites the exit condition of the loop to be a canonical != 2088 /// comparison against the incremented loop induction variable. This pass is 2089 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2090 /// determine a loop-invariant trip count of the loop, which is actually a much 2091 /// broader range than just linear tests. 2092 bool IndVarSimplify:: 2093 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2094 const SCEV *ExitCount, 2095 PHINode *IndVar, SCEVExpander &Rewriter) { 2096 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2097 assert(isLoopCounter(IndVar, L, SE)); 2098 Instruction * const IncVar = 2099 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2100 2101 // Initialize CmpIndVar to the preincremented IV. 2102 Value *CmpIndVar = IndVar; 2103 bool UsePostInc = false; 2104 2105 // If the exiting block is the same as the backedge block, we prefer to 2106 // compare against the post-incremented value, otherwise we must compare 2107 // against the preincremented value. 2108 if (ExitingBB == L->getLoopLatch()) { 2109 // For pointer IVs, we chose to not strip inbounds which requires us not 2110 // to add a potentially UB introducing use. We need to either a) show 2111 // the loop test we're modifying is already in post-inc form, or b) show 2112 // that adding a use must not introduce UB. 2113 bool SafeToPostInc = 2114 IndVar->getType()->isIntegerTy() || 2115 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2116 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2117 if (SafeToPostInc) { 2118 UsePostInc = true; 2119 CmpIndVar = IncVar; 2120 } 2121 } 2122 2123 // It may be necessary to drop nowrap flags on the incrementing instruction 2124 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2125 // case the increment might have previously been poison on the last iteration 2126 // only) or if LFTR switches to a different IV that was previously dynamically 2127 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2128 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2129 // check), because the pre-inc addrec flags may be adopted from the original 2130 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2131 // TODO: This handling is inaccurate for one case: If we switch to a 2132 // dynamically dead IV that wraps on the first loop iteration only, which is 2133 // not covered by the post-inc addrec. (If the new IV was not dynamically 2134 // dead, it could not be poison on the first iteration in the first place.) 2135 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2136 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2137 if (BO->hasNoUnsignedWrap()) 2138 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2139 if (BO->hasNoSignedWrap()) 2140 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2141 } 2142 2143 Value *ExitCnt = genLoopLimit( 2144 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2145 assert(ExitCnt->getType()->isPointerTy() == 2146 IndVar->getType()->isPointerTy() && 2147 "genLoopLimit missed a cast"); 2148 2149 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2150 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2151 ICmpInst::Predicate P; 2152 if (L->contains(BI->getSuccessor(0))) 2153 P = ICmpInst::ICMP_NE; 2154 else 2155 P = ICmpInst::ICMP_EQ; 2156 2157 IRBuilder<> Builder(BI); 2158 2159 // The new loop exit condition should reuse the debug location of the 2160 // original loop exit condition. 2161 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2162 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2163 2164 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2165 // avoid the expensive expansion of the limit expression in the wider type, 2166 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2167 // since we know (from the exit count bitwidth), that we can't self-wrap in 2168 // the narrower type. 2169 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2170 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2171 if (CmpIndVarSize > ExitCntSize) { 2172 assert(!CmpIndVar->getType()->isPointerTy() && 2173 !ExitCnt->getType()->isPointerTy()); 2174 2175 // Before resorting to actually inserting the truncate, use the same 2176 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2177 // the other side of the comparison instead. We still evaluate the limit 2178 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2179 // a truncate within in. 2180 bool Extended = false; 2181 const SCEV *IV = SE->getSCEV(CmpIndVar); 2182 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2183 ExitCnt->getType()); 2184 const SCEV *ZExtTrunc = 2185 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2186 2187 if (ZExtTrunc == IV) { 2188 Extended = true; 2189 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2190 "wide.trip.count"); 2191 } else { 2192 const SCEV *SExtTrunc = 2193 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2194 if (SExtTrunc == IV) { 2195 Extended = true; 2196 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2197 "wide.trip.count"); 2198 } 2199 } 2200 2201 if (Extended) { 2202 bool Discard; 2203 L->makeLoopInvariant(ExitCnt, Discard); 2204 } else 2205 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2206 "lftr.wideiv"); 2207 } 2208 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2209 << " LHS:" << *CmpIndVar << '\n' 2210 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2211 << "\n" 2212 << " RHS:\t" << *ExitCnt << "\n" 2213 << "ExitCount:\t" << *ExitCount << "\n" 2214 << " was: " << *BI->getCondition() << "\n"); 2215 2216 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2217 Value *OrigCond = BI->getCondition(); 2218 // It's tempting to use replaceAllUsesWith here to fully replace the old 2219 // comparison, but that's not immediately safe, since users of the old 2220 // comparison may not be dominated by the new comparison. Instead, just 2221 // update the branch to use the new comparison; in the common case this 2222 // will make old comparison dead. 2223 BI->setCondition(Cond); 2224 DeadInsts.emplace_back(OrigCond); 2225 2226 ++NumLFTR; 2227 return true; 2228 } 2229 2230 //===----------------------------------------------------------------------===// 2231 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2232 //===----------------------------------------------------------------------===// 2233 2234 /// If there's a single exit block, sink any loop-invariant values that 2235 /// were defined in the preheader but not used inside the loop into the 2236 /// exit block to reduce register pressure in the loop. 2237 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2238 BasicBlock *ExitBlock = L->getExitBlock(); 2239 if (!ExitBlock) return false; 2240 2241 BasicBlock *Preheader = L->getLoopPreheader(); 2242 if (!Preheader) return false; 2243 2244 bool MadeAnyChanges = false; 2245 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2246 BasicBlock::iterator I(Preheader->getTerminator()); 2247 while (I != Preheader->begin()) { 2248 --I; 2249 // New instructions were inserted at the end of the preheader. 2250 if (isa<PHINode>(I)) 2251 break; 2252 2253 // Don't move instructions which might have side effects, since the side 2254 // effects need to complete before instructions inside the loop. Also don't 2255 // move instructions which might read memory, since the loop may modify 2256 // memory. Note that it's okay if the instruction might have undefined 2257 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2258 // block. 2259 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2260 continue; 2261 2262 // Skip debug info intrinsics. 2263 if (isa<DbgInfoIntrinsic>(I)) 2264 continue; 2265 2266 // Skip eh pad instructions. 2267 if (I->isEHPad()) 2268 continue; 2269 2270 // Don't sink alloca: we never want to sink static alloca's out of the 2271 // entry block, and correctly sinking dynamic alloca's requires 2272 // checks for stacksave/stackrestore intrinsics. 2273 // FIXME: Refactor this check somehow? 2274 if (isa<AllocaInst>(I)) 2275 continue; 2276 2277 // Determine if there is a use in or before the loop (direct or 2278 // otherwise). 2279 bool UsedInLoop = false; 2280 for (Use &U : I->uses()) { 2281 Instruction *User = cast<Instruction>(U.getUser()); 2282 BasicBlock *UseBB = User->getParent(); 2283 if (PHINode *P = dyn_cast<PHINode>(User)) { 2284 unsigned i = 2285 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2286 UseBB = P->getIncomingBlock(i); 2287 } 2288 if (UseBB == Preheader || L->contains(UseBB)) { 2289 UsedInLoop = true; 2290 break; 2291 } 2292 } 2293 2294 // If there is, the def must remain in the preheader. 2295 if (UsedInLoop) 2296 continue; 2297 2298 // Otherwise, sink it to the exit block. 2299 Instruction *ToMove = &*I; 2300 bool Done = false; 2301 2302 if (I != Preheader->begin()) { 2303 // Skip debug info intrinsics. 2304 do { 2305 --I; 2306 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2307 2308 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2309 Done = true; 2310 } else { 2311 Done = true; 2312 } 2313 2314 MadeAnyChanges = true; 2315 ToMove->moveBefore(*ExitBlock, InsertPt); 2316 if (Done) break; 2317 InsertPt = ToMove->getIterator(); 2318 } 2319 2320 return MadeAnyChanges; 2321 } 2322 2323 // Returns true if the condition of \p BI being checked is invariant and can be 2324 // proved to be trivially true. 2325 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE, 2326 bool ProvingLoopExit) { 2327 ICmpInst::Predicate Pred; 2328 Value *LHS, *RHS; 2329 using namespace PatternMatch; 2330 BasicBlock *TrueSucc, *FalseSucc; 2331 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 2332 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 2333 return false; 2334 2335 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 2336 "Not a loop exit!"); 2337 2338 // 'LHS pred RHS' should now mean that we stay in loop. 2339 if (L->contains(FalseSucc)) 2340 Pred = CmpInst::getInversePredicate(Pred); 2341 2342 // If we are proving loop exit, invert the predicate. 2343 if (ProvingLoopExit) 2344 Pred = CmpInst::getInversePredicate(Pred); 2345 2346 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 2347 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 2348 // Can we prove it to be trivially true? 2349 if (SE->isKnownPredicate(Pred, LHSS, RHSS)) 2350 return true; 2351 2352 return false; 2353 } 2354 2355 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2356 SmallVector<BasicBlock*, 16> ExitingBlocks; 2357 L->getExitingBlocks(ExitingBlocks); 2358 2359 // Remove all exits which aren't both rewriteable and execute on every 2360 // iteration. 2361 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2362 // If our exitting block exits multiple loops, we can only rewrite the 2363 // innermost one. Otherwise, we're changing how many times the innermost 2364 // loop runs before it exits. 2365 if (LI->getLoopFor(ExitingBB) != L) 2366 return true; 2367 2368 // Can't rewrite non-branch yet. 2369 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2370 if (!BI) 2371 return true; 2372 2373 // If already constant, nothing to do. 2374 if (isa<Constant>(BI->getCondition())) 2375 return true; 2376 2377 // Likewise, the loop latch must be dominated by the exiting BB. 2378 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 2379 return true; 2380 2381 return false; 2382 }); 2383 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2384 2385 if (ExitingBlocks.empty()) 2386 return false; 2387 2388 // Get a symbolic upper bound on the loop backedge taken count. 2389 const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L); 2390 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2391 return false; 2392 2393 // Visit our exit blocks in order of dominance. We know from the fact that 2394 // all exits must dominate the latch, so there is a total dominance order 2395 // between them. 2396 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 2397 // std::sort sorts in ascending order, so we want the inverse of 2398 // the normal dominance relation. 2399 if (A == B) return false; 2400 if (DT->properlyDominates(A, B)) 2401 return true; 2402 else { 2403 assert(DT->properlyDominates(B, A) && 2404 "expected total dominance order!"); 2405 return false; 2406 } 2407 }); 2408 #ifdef ASSERT 2409 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2410 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2411 } 2412 #endif 2413 2414 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2415 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2416 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2417 auto *OldCond = BI->getCondition(); 2418 auto *NewCond = ConstantInt::get(OldCond->getType(), 2419 IsTaken ? ExitIfTrue : !ExitIfTrue); 2420 BI->setCondition(NewCond); 2421 if (OldCond->use_empty()) 2422 DeadInsts.emplace_back(OldCond); 2423 }; 2424 2425 bool Changed = false; 2426 SmallSet<const SCEV*, 8> DominatingExitCounts; 2427 for (BasicBlock *ExitingBB : ExitingBlocks) { 2428 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2429 if (isa<SCEVCouldNotCompute>(ExitCount)) { 2430 // Okay, we do not know the exit count here. Can we at least prove that it 2431 // will remain the same within iteration space? 2432 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2433 if (isTrivialCond(L, BI, SE, false)) { 2434 FoldExit(ExitingBB, false); 2435 Changed = true; 2436 } 2437 if (isTrivialCond(L, BI, SE, true)) { 2438 FoldExit(ExitingBB, true); 2439 Changed = true; 2440 } 2441 continue; 2442 } 2443 2444 // If we know we'd exit on the first iteration, rewrite the exit to 2445 // reflect this. This does not imply the loop must exit through this 2446 // exit; there may be an earlier one taken on the first iteration. 2447 // TODO: Given we know the backedge can't be taken, we should go ahead 2448 // and break it. Or at least, kill all the header phis and simplify. 2449 if (ExitCount->isZero()) { 2450 FoldExit(ExitingBB, true); 2451 Changed = true; 2452 continue; 2453 } 2454 2455 // If we end up with a pointer exit count, bail. Note that we can end up 2456 // with a pointer exit count for one exiting block, and not for another in 2457 // the same loop. 2458 if (!ExitCount->getType()->isIntegerTy() || 2459 !MaxExitCount->getType()->isIntegerTy()) 2460 continue; 2461 2462 Type *WiderType = 2463 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2464 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2465 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2466 assert(MaxExitCount->getType() == ExitCount->getType()); 2467 2468 // Can we prove that some other exit must be taken strictly before this 2469 // one? 2470 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2471 MaxExitCount, ExitCount)) { 2472 FoldExit(ExitingBB, false); 2473 Changed = true; 2474 continue; 2475 } 2476 2477 // As we run, keep track of which exit counts we've encountered. If we 2478 // find a duplicate, we've found an exit which would have exited on the 2479 // exiting iteration, but (from the visit order) strictly follows another 2480 // which does the same and is thus dead. 2481 if (!DominatingExitCounts.insert(ExitCount).second) { 2482 FoldExit(ExitingBB, false); 2483 Changed = true; 2484 continue; 2485 } 2486 2487 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2488 // here. If we kept track of the min of dominanting exits so far, we could 2489 // discharge exits with EC >= MDEC. This is less powerful than the existing 2490 // transform (since later exits aren't considered), but potentially more 2491 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2492 // or a >u b. Such a case is not currently known. 2493 } 2494 return Changed; 2495 } 2496 2497 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2498 SmallVector<BasicBlock*, 16> ExitingBlocks; 2499 L->getExitingBlocks(ExitingBlocks); 2500 2501 // Finally, see if we can rewrite our exit conditions into a loop invariant 2502 // form. If we have a read-only loop, and we can tell that we must exit down 2503 // a path which does not need any of the values computed within the loop, we 2504 // can rewrite the loop to exit on the first iteration. Note that this 2505 // doesn't either a) tell us the loop exits on the first iteration (unless 2506 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2507 // This transformation looks a lot like a restricted form of dead loop 2508 // elimination, but restricted to read-only loops and without neccesssarily 2509 // needing to kill the loop entirely. 2510 if (!LoopPredication) 2511 return false; 2512 2513 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2514 return false; 2515 2516 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2517 // through *explicit* control flow. We have to eliminate the possibility of 2518 // implicit exits (see below) before we know it's truly exact. 2519 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2520 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2521 !SE->isLoopInvariant(ExactBTC, L) || 2522 !isSafeToExpand(ExactBTC, *SE)) 2523 return false; 2524 2525 // If we end up with a pointer exit count, bail. It may be unsized. 2526 if (!ExactBTC->getType()->isIntegerTy()) 2527 return false; 2528 2529 auto BadExit = [&](BasicBlock *ExitingBB) { 2530 // If our exiting block exits multiple loops, we can only rewrite the 2531 // innermost one. Otherwise, we're changing how many times the innermost 2532 // loop runs before it exits. 2533 if (LI->getLoopFor(ExitingBB) != L) 2534 return true; 2535 2536 // Can't rewrite non-branch yet. 2537 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2538 if (!BI) 2539 return true; 2540 2541 // If already constant, nothing to do. 2542 if (isa<Constant>(BI->getCondition())) 2543 return true; 2544 2545 // If the exit block has phis, we need to be able to compute the values 2546 // within the loop which contains them. This assumes trivially lcssa phis 2547 // have already been removed; TODO: generalize 2548 BasicBlock *ExitBlock = 2549 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2550 if (!ExitBlock->phis().empty()) 2551 return true; 2552 2553 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2554 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2555 if (!SE->isLoopInvariant(ExitCount, L) || 2556 !isSafeToExpand(ExitCount, *SE)) 2557 return true; 2558 2559 // If we end up with a pointer exit count, bail. It may be unsized. 2560 if (!ExitCount->getType()->isIntegerTy()) 2561 return true; 2562 2563 return false; 2564 }; 2565 2566 // If we have any exits which can't be predicated themselves, than we can't 2567 // predicate any exit which isn't guaranteed to execute before it. Consider 2568 // two exits (a) and (b) which would both exit on the same iteration. If we 2569 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2570 // we could convert a loop from exiting through (a) to one exiting through 2571 // (b). Note that this problem exists only for exits with the same exit 2572 // count, and we could be more aggressive when exit counts are known inequal. 2573 llvm::sort(ExitingBlocks, 2574 [&](BasicBlock *A, BasicBlock *B) { 2575 // std::sort sorts in ascending order, so we want the inverse of 2576 // the normal dominance relation, plus a tie breaker for blocks 2577 // unordered by dominance. 2578 if (DT->properlyDominates(A, B)) return true; 2579 if (DT->properlyDominates(B, A)) return false; 2580 return A->getName() < B->getName(); 2581 }); 2582 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2583 // exits before the backedge). If they aren't, reasoning about reachability 2584 // is complicated and we choose not to for now. 2585 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2586 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2587 return false; 2588 2589 // Given our sorted total order, we know that exit[j] must be evaluated 2590 // after all exit[i] such j > i. 2591 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2592 if (BadExit(ExitingBlocks[i])) { 2593 ExitingBlocks.resize(i); 2594 break; 2595 } 2596 2597 if (ExitingBlocks.empty()) 2598 return false; 2599 2600 // We rely on not being able to reach an exiting block on a later iteration 2601 // then it's statically compute exit count. The implementaton of 2602 // getExitCount currently has this invariant, but assert it here so that 2603 // breakage is obvious if this ever changes.. 2604 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2605 return DT->dominates(ExitingBB, L->getLoopLatch()); 2606 })); 2607 2608 // At this point, ExitingBlocks consists of only those blocks which are 2609 // predicatable. Given that, we know we have at least one exit we can 2610 // predicate if the loop is doesn't have side effects and doesn't have any 2611 // implicit exits (because then our exact BTC isn't actually exact). 2612 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2613 // suggestions on how to improve this? I can obviously bail out for outer 2614 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2615 // is that enough for *all* side effects? 2616 for (BasicBlock *BB : L->blocks()) 2617 for (auto &I : *BB) 2618 // TODO:isGuaranteedToTransfer 2619 if (I.mayHaveSideEffects() || I.mayThrow()) 2620 return false; 2621 2622 bool Changed = false; 2623 // Finally, do the actual predication for all predicatable blocks. A couple 2624 // of notes here: 2625 // 1) We don't bother to constant fold dominated exits with identical exit 2626 // counts; that's simply a form of CSE/equality propagation and we leave 2627 // it for dedicated passes. 2628 // 2) We insert the comparison at the branch. Hoisting introduces additional 2629 // legality constraints and we leave that to dedicated logic. We want to 2630 // predicate even if we can't insert a loop invariant expression as 2631 // peeling or unrolling will likely reduce the cost of the otherwise loop 2632 // varying check. 2633 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2634 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2635 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2636 for (BasicBlock *ExitingBB : ExitingBlocks) { 2637 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2638 2639 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2640 Value *NewCond; 2641 if (ExitCount == ExactBTC) { 2642 NewCond = L->contains(BI->getSuccessor(0)) ? 2643 B.getFalse() : B.getTrue(); 2644 } else { 2645 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2646 if (!ExactBTCV) 2647 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2648 Value *RHS = ExactBTCV; 2649 if (ECV->getType() != RHS->getType()) { 2650 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2651 ECV = B.CreateZExt(ECV, WiderTy); 2652 RHS = B.CreateZExt(RHS, WiderTy); 2653 } 2654 auto Pred = L->contains(BI->getSuccessor(0)) ? 2655 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2656 NewCond = B.CreateICmp(Pred, ECV, RHS); 2657 } 2658 Value *OldCond = BI->getCondition(); 2659 BI->setCondition(NewCond); 2660 if (OldCond->use_empty()) 2661 DeadInsts.emplace_back(OldCond); 2662 Changed = true; 2663 } 2664 2665 return Changed; 2666 } 2667 2668 //===----------------------------------------------------------------------===// 2669 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2670 //===----------------------------------------------------------------------===// 2671 2672 bool IndVarSimplify::run(Loop *L) { 2673 // We need (and expect!) the incoming loop to be in LCSSA. 2674 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2675 "LCSSA required to run indvars!"); 2676 2677 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2678 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2679 // canonicalization can be a pessimization without LSR to "clean up" 2680 // afterwards. 2681 // - We depend on having a preheader; in particular, 2682 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2683 // and we're in trouble if we can't find the induction variable even when 2684 // we've manually inserted one. 2685 // - LFTR relies on having a single backedge. 2686 if (!L->isLoopSimplifyForm()) 2687 return false; 2688 2689 #ifndef NDEBUG 2690 // Used below for a consistency check only 2691 // Note: Since the result returned by ScalarEvolution may depend on the order 2692 // in which previous results are added to its cache, the call to 2693 // getBackedgeTakenCount() may change following SCEV queries. 2694 const SCEV *BackedgeTakenCount; 2695 if (VerifyIndvars) 2696 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2697 #endif 2698 2699 bool Changed = false; 2700 // If there are any floating-point recurrences, attempt to 2701 // transform them to use integer recurrences. 2702 Changed |= rewriteNonIntegerIVs(L); 2703 2704 // Create a rewriter object which we'll use to transform the code with. 2705 SCEVExpander Rewriter(*SE, DL, "indvars"); 2706 #ifndef NDEBUG 2707 Rewriter.setDebugType(DEBUG_TYPE); 2708 #endif 2709 2710 // Eliminate redundant IV users. 2711 // 2712 // Simplification works best when run before other consumers of SCEV. We 2713 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2714 // other expressions involving loop IVs have been evaluated. This helps SCEV 2715 // set no-wrap flags before normalizing sign/zero extension. 2716 Rewriter.disableCanonicalMode(); 2717 Changed |= simplifyAndExtend(L, Rewriter, LI); 2718 2719 // Check to see if we can compute the final value of any expressions 2720 // that are recurrent in the loop, and substitute the exit values from the 2721 // loop into any instructions outside of the loop that use the final values 2722 // of the current expressions. 2723 if (ReplaceExitValue != NeverRepl) { 2724 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2725 ReplaceExitValue, DeadInsts)) { 2726 NumReplaced += Rewrites; 2727 Changed = true; 2728 } 2729 } 2730 2731 // Eliminate redundant IV cycles. 2732 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2733 2734 // Try to eliminate loop exits based on analyzeable exit counts 2735 if (optimizeLoopExits(L, Rewriter)) { 2736 Changed = true; 2737 // Given we've changed exit counts, notify SCEV 2738 SE->forgetLoop(L); 2739 } 2740 2741 // Try to form loop invariant tests for loop exits by changing how many 2742 // iterations of the loop run when that is unobservable. 2743 if (predicateLoopExits(L, Rewriter)) { 2744 Changed = true; 2745 // Given we've changed exit counts, notify SCEV 2746 SE->forgetLoop(L); 2747 } 2748 2749 // If we have a trip count expression, rewrite the loop's exit condition 2750 // using it. 2751 if (!DisableLFTR) { 2752 BasicBlock *PreHeader = L->getLoopPreheader(); 2753 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 2754 2755 SmallVector<BasicBlock*, 16> ExitingBlocks; 2756 L->getExitingBlocks(ExitingBlocks); 2757 for (BasicBlock *ExitingBB : ExitingBlocks) { 2758 // Can't rewrite non-branch yet. 2759 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2760 continue; 2761 2762 // If our exitting block exits multiple loops, we can only rewrite the 2763 // innermost one. Otherwise, we're changing how many times the innermost 2764 // loop runs before it exits. 2765 if (LI->getLoopFor(ExitingBB) != L) 2766 continue; 2767 2768 if (!needsLFTR(L, ExitingBB)) 2769 continue; 2770 2771 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2772 if (isa<SCEVCouldNotCompute>(ExitCount)) 2773 continue; 2774 2775 // This was handled above, but as we form SCEVs, we can sometimes refine 2776 // existing ones; this allows exit counts to be folded to zero which 2777 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2778 // until stable to handle cases like this better. 2779 if (ExitCount->isZero()) 2780 continue; 2781 2782 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2783 if (!IndVar) 2784 continue; 2785 2786 // Avoid high cost expansions. Note: This heuristic is questionable in 2787 // that our definition of "high cost" is not exactly principled. 2788 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2789 TTI, PreHeaderBR)) 2790 continue; 2791 2792 // Check preconditions for proper SCEVExpander operation. SCEV does not 2793 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2794 // any pass that uses the SCEVExpander must do it. This does not work 2795 // well for loop passes because SCEVExpander makes assumptions about 2796 // all loops, while LoopPassManager only forces the current loop to be 2797 // simplified. 2798 // 2799 // FIXME: SCEV expansion has no way to bail out, so the caller must 2800 // explicitly check any assumptions made by SCEV. Brittle. 2801 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2802 if (!AR || AR->getLoop()->getLoopPreheader()) 2803 Changed |= linearFunctionTestReplace(L, ExitingBB, 2804 ExitCount, IndVar, 2805 Rewriter); 2806 } 2807 } 2808 // Clear the rewriter cache, because values that are in the rewriter's cache 2809 // can be deleted in the loop below, causing the AssertingVH in the cache to 2810 // trigger. 2811 Rewriter.clear(); 2812 2813 // Now that we're done iterating through lists, clean up any instructions 2814 // which are now dead. 2815 while (!DeadInsts.empty()) 2816 if (Instruction *Inst = 2817 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2818 Changed |= 2819 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2820 2821 // The Rewriter may not be used from this point on. 2822 2823 // Loop-invariant instructions in the preheader that aren't used in the 2824 // loop may be sunk below the loop to reduce register pressure. 2825 Changed |= sinkUnusedInvariants(L); 2826 2827 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2828 // trip count and therefore can further simplify exit values in addition to 2829 // rewriteLoopExitValues. 2830 Changed |= rewriteFirstIterationLoopExitValues(L); 2831 2832 // Clean up dead instructions. 2833 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2834 2835 // Check a post-condition. 2836 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2837 "Indvars did not preserve LCSSA!"); 2838 2839 // Verify that LFTR, and any other change have not interfered with SCEV's 2840 // ability to compute trip count. We may have *changed* the exit count, but 2841 // only by reducing it. 2842 #ifndef NDEBUG 2843 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2844 SE->forgetLoop(L); 2845 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2846 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2847 SE->getTypeSizeInBits(NewBECount->getType())) 2848 NewBECount = SE->getTruncateOrNoop(NewBECount, 2849 BackedgeTakenCount->getType()); 2850 else 2851 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2852 NewBECount->getType()); 2853 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2854 NewBECount) && "indvars must preserve SCEV"); 2855 } 2856 if (VerifyMemorySSA && MSSAU) 2857 MSSAU->getMemorySSA()->verifyMemorySSA(); 2858 #endif 2859 2860 return Changed; 2861 } 2862 2863 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2864 LoopStandardAnalysisResults &AR, 2865 LPMUpdater &) { 2866 Function *F = L.getHeader()->getParent(); 2867 const DataLayout &DL = F->getParent()->getDataLayout(); 2868 2869 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2870 if (!IVS.run(&L)) 2871 return PreservedAnalyses::all(); 2872 2873 auto PA = getLoopPassPreservedAnalyses(); 2874 PA.preserveSet<CFGAnalyses>(); 2875 if (AR.MSSA) 2876 PA.preserve<MemorySSAAnalysis>(); 2877 return PA; 2878 } 2879 2880 namespace { 2881 2882 struct IndVarSimplifyLegacyPass : public LoopPass { 2883 static char ID; // Pass identification, replacement for typeid 2884 2885 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2886 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2887 } 2888 2889 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2890 if (skipLoop(L)) 2891 return false; 2892 2893 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2894 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2895 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2896 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2897 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2898 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2899 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2900 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2901 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2902 MemorySSA *MSSA = nullptr; 2903 if (MSSAAnalysis) 2904 MSSA = &MSSAAnalysis->getMSSA(); 2905 2906 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2907 return IVS.run(L); 2908 } 2909 2910 void getAnalysisUsage(AnalysisUsage &AU) const override { 2911 AU.setPreservesCFG(); 2912 AU.addPreserved<MemorySSAWrapperPass>(); 2913 getLoopAnalysisUsage(AU); 2914 } 2915 }; 2916 2917 } // end anonymous namespace 2918 2919 char IndVarSimplifyLegacyPass::ID = 0; 2920 2921 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2922 "Induction Variable Simplification", false, false) 2923 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2924 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2925 "Induction Variable Simplification", false, false) 2926 2927 Pass *llvm::createIndVarSimplifyPass() { 2928 return new IndVarSimplifyLegacyPass(); 2929 } 2930