1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 struct RewritePhi; 141 142 class IndVarSimplify { 143 LoopInfo *LI; 144 ScalarEvolution *SE; 145 DominatorTree *DT; 146 const DataLayout &DL; 147 TargetLibraryInfo *TLI; 148 const TargetTransformInfo *TTI; 149 std::unique_ptr<MemorySSAUpdater> MSSAU; 150 151 SmallVector<WeakTrackingVH, 16> DeadInsts; 152 153 bool handleFloatingPointIV(Loop *L, PHINode *PH); 154 bool rewriteNonIntegerIVs(Loop *L); 155 156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 157 /// Try to eliminate loop exits based on analyzeable exit counts 158 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 159 /// Try to form loop invariant tests for loop exits by changing how many 160 /// iterations of the loop run when that is unobservable. 161 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 162 163 bool rewriteFirstIterationLoopExitValues(Loop *L); 164 165 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 166 const SCEV *ExitCount, 167 PHINode *IndVar, SCEVExpander &Rewriter); 168 169 bool sinkUnusedInvariants(Loop *L); 170 171 public: 172 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 173 const DataLayout &DL, TargetLibraryInfo *TLI, 174 TargetTransformInfo *TTI, MemorySSA *MSSA) 175 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 176 if (MSSA) 177 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 178 } 179 180 bool run(Loop *L); 181 }; 182 183 } // end anonymous namespace 184 185 /// Determine the insertion point for this user. By default, insert immediately 186 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 187 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 188 /// common dominator for the incoming blocks. A nullptr can be returned if no 189 /// viable location is found: it may happen if User is a PHI and Def only comes 190 /// to this PHI from unreachable blocks. 191 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 192 DominatorTree *DT, LoopInfo *LI) { 193 PHINode *PHI = dyn_cast<PHINode>(User); 194 if (!PHI) 195 return User; 196 197 Instruction *InsertPt = nullptr; 198 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 199 if (PHI->getIncomingValue(i) != Def) 200 continue; 201 202 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 203 204 if (!DT->isReachableFromEntry(InsertBB)) 205 continue; 206 207 if (!InsertPt) { 208 InsertPt = InsertBB->getTerminator(); 209 continue; 210 } 211 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 212 InsertPt = InsertBB->getTerminator(); 213 } 214 215 // If we have skipped all inputs, it means that Def only comes to Phi from 216 // unreachable blocks. 217 if (!InsertPt) 218 return nullptr; 219 220 auto *DefI = dyn_cast<Instruction>(Def); 221 if (!DefI) 222 return InsertPt; 223 224 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 225 226 auto *L = LI->getLoopFor(DefI->getParent()); 227 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 228 229 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 230 if (LI->getLoopFor(DTN->getBlock()) == L) 231 return DTN->getBlock()->getTerminator(); 232 233 llvm_unreachable("DefI dominates InsertPt!"); 234 } 235 236 //===----------------------------------------------------------------------===// 237 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 238 //===----------------------------------------------------------------------===// 239 240 /// Convert APF to an integer, if possible. 241 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 242 bool isExact = false; 243 // See if we can convert this to an int64_t 244 uint64_t UIntVal; 245 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 246 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 247 !isExact) 248 return false; 249 IntVal = UIntVal; 250 return true; 251 } 252 253 /// If the loop has floating induction variable then insert corresponding 254 /// integer induction variable if possible. 255 /// For example, 256 /// for(double i = 0; i < 10000; ++i) 257 /// bar(i) 258 /// is converted into 259 /// for(int i = 0; i < 10000; ++i) 260 /// bar((double)i); 261 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 262 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 263 unsigned BackEdge = IncomingEdge^1; 264 265 // Check incoming value. 266 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 267 268 int64_t InitValue; 269 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 270 return false; 271 272 // Check IV increment. Reject this PN if increment operation is not 273 // an add or increment value can not be represented by an integer. 274 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 275 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 276 277 // If this is not an add of the PHI with a constantfp, or if the constant fp 278 // is not an integer, bail out. 279 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 280 int64_t IncValue; 281 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 282 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 283 return false; 284 285 // Check Incr uses. One user is PN and the other user is an exit condition 286 // used by the conditional terminator. 287 Value::user_iterator IncrUse = Incr->user_begin(); 288 Instruction *U1 = cast<Instruction>(*IncrUse++); 289 if (IncrUse == Incr->user_end()) return false; 290 Instruction *U2 = cast<Instruction>(*IncrUse++); 291 if (IncrUse != Incr->user_end()) return false; 292 293 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 294 // only used by a branch, we can't transform it. 295 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 296 if (!Compare) 297 Compare = dyn_cast<FCmpInst>(U2); 298 if (!Compare || !Compare->hasOneUse() || 299 !isa<BranchInst>(Compare->user_back())) 300 return false; 301 302 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 303 304 // We need to verify that the branch actually controls the iteration count 305 // of the loop. If not, the new IV can overflow and no one will notice. 306 // The branch block must be in the loop and one of the successors must be out 307 // of the loop. 308 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 309 if (!L->contains(TheBr->getParent()) || 310 (L->contains(TheBr->getSuccessor(0)) && 311 L->contains(TheBr->getSuccessor(1)))) 312 return false; 313 314 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 315 // transform it. 316 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 317 int64_t ExitValue; 318 if (ExitValueVal == nullptr || 319 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 320 return false; 321 322 // Find new predicate for integer comparison. 323 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 324 switch (Compare->getPredicate()) { 325 default: return false; // Unknown comparison. 326 case CmpInst::FCMP_OEQ: 327 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 328 case CmpInst::FCMP_ONE: 329 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 330 case CmpInst::FCMP_OGT: 331 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 332 case CmpInst::FCMP_OGE: 333 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 334 case CmpInst::FCMP_OLT: 335 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 336 case CmpInst::FCMP_OLE: 337 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 338 } 339 340 // We convert the floating point induction variable to a signed i32 value if 341 // we can. This is only safe if the comparison will not overflow in a way 342 // that won't be trapped by the integer equivalent operations. Check for this 343 // now. 344 // TODO: We could use i64 if it is native and the range requires it. 345 346 // The start/stride/exit values must all fit in signed i32. 347 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 348 return false; 349 350 // If not actually striding (add x, 0.0), avoid touching the code. 351 if (IncValue == 0) 352 return false; 353 354 // Positive and negative strides have different safety conditions. 355 if (IncValue > 0) { 356 // If we have a positive stride, we require the init to be less than the 357 // exit value. 358 if (InitValue >= ExitValue) 359 return false; 360 361 uint32_t Range = uint32_t(ExitValue-InitValue); 362 // Check for infinite loop, either: 363 // while (i <= Exit) or until (i > Exit) 364 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 365 if (++Range == 0) return false; // Range overflows. 366 } 367 368 unsigned Leftover = Range % uint32_t(IncValue); 369 370 // If this is an equality comparison, we require that the strided value 371 // exactly land on the exit value, otherwise the IV condition will wrap 372 // around and do things the fp IV wouldn't. 373 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 374 Leftover != 0) 375 return false; 376 377 // If the stride would wrap around the i32 before exiting, we can't 378 // transform the IV. 379 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 380 return false; 381 } else { 382 // If we have a negative stride, we require the init to be greater than the 383 // exit value. 384 if (InitValue <= ExitValue) 385 return false; 386 387 uint32_t Range = uint32_t(InitValue-ExitValue); 388 // Check for infinite loop, either: 389 // while (i >= Exit) or until (i < Exit) 390 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 391 if (++Range == 0) return false; // Range overflows. 392 } 393 394 unsigned Leftover = Range % uint32_t(-IncValue); 395 396 // If this is an equality comparison, we require that the strided value 397 // exactly land on the exit value, otherwise the IV condition will wrap 398 // around and do things the fp IV wouldn't. 399 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 400 Leftover != 0) 401 return false; 402 403 // If the stride would wrap around the i32 before exiting, we can't 404 // transform the IV. 405 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 406 return false; 407 } 408 409 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 410 411 // Insert new integer induction variable. 412 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 413 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 414 PN->getIncomingBlock(IncomingEdge)); 415 416 Value *NewAdd = 417 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 418 Incr->getName()+".int", Incr); 419 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 420 421 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 422 ConstantInt::get(Int32Ty, ExitValue), 423 Compare->getName()); 424 425 // In the following deletions, PN may become dead and may be deleted. 426 // Use a WeakTrackingVH to observe whether this happens. 427 WeakTrackingVH WeakPH = PN; 428 429 // Delete the old floating point exit comparison. The branch starts using the 430 // new comparison. 431 NewCompare->takeName(Compare); 432 Compare->replaceAllUsesWith(NewCompare); 433 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 434 435 // Delete the old floating point increment. 436 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 437 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 438 439 // If the FP induction variable still has uses, this is because something else 440 // in the loop uses its value. In order to canonicalize the induction 441 // variable, we chose to eliminate the IV and rewrite it in terms of an 442 // int->fp cast. 443 // 444 // We give preference to sitofp over uitofp because it is faster on most 445 // platforms. 446 if (WeakPH) { 447 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 448 &*PN->getParent()->getFirstInsertionPt()); 449 PN->replaceAllUsesWith(Conv); 450 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 451 } 452 return true; 453 } 454 455 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 456 // First step. Check to see if there are any floating-point recurrences. 457 // If there are, change them into integer recurrences, permitting analysis by 458 // the SCEV routines. 459 BasicBlock *Header = L->getHeader(); 460 461 SmallVector<WeakTrackingVH, 8> PHIs; 462 for (PHINode &PN : Header->phis()) 463 PHIs.push_back(&PN); 464 465 bool Changed = false; 466 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 467 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 468 Changed |= handleFloatingPointIV(L, PN); 469 470 // If the loop previously had floating-point IV, ScalarEvolution 471 // may not have been able to compute a trip count. Now that we've done some 472 // re-writing, the trip count may be computable. 473 if (Changed) 474 SE->forgetLoop(L); 475 return Changed; 476 } 477 478 //===---------------------------------------------------------------------===// 479 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 480 // they will exit at the first iteration. 481 //===---------------------------------------------------------------------===// 482 483 /// Check to see if this loop has loop invariant conditions which lead to loop 484 /// exits. If so, we know that if the exit path is taken, it is at the first 485 /// loop iteration. This lets us predict exit values of PHI nodes that live in 486 /// loop header. 487 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 488 // Verify the input to the pass is already in LCSSA form. 489 assert(L->isLCSSAForm(*DT)); 490 491 SmallVector<BasicBlock *, 8> ExitBlocks; 492 L->getUniqueExitBlocks(ExitBlocks); 493 494 bool MadeAnyChanges = false; 495 for (auto *ExitBB : ExitBlocks) { 496 // If there are no more PHI nodes in this exit block, then no more 497 // values defined inside the loop are used on this path. 498 for (PHINode &PN : ExitBB->phis()) { 499 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 500 IncomingValIdx != E; ++IncomingValIdx) { 501 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 502 503 // Can we prove that the exit must run on the first iteration if it 504 // runs at all? (i.e. early exits are fine for our purposes, but 505 // traces which lead to this exit being taken on the 2nd iteration 506 // aren't.) Note that this is about whether the exit branch is 507 // executed, not about whether it is taken. 508 if (!L->getLoopLatch() || 509 !DT->dominates(IncomingBB, L->getLoopLatch())) 510 continue; 511 512 // Get condition that leads to the exit path. 513 auto *TermInst = IncomingBB->getTerminator(); 514 515 Value *Cond = nullptr; 516 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 517 // Must be a conditional branch, otherwise the block 518 // should not be in the loop. 519 Cond = BI->getCondition(); 520 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 521 Cond = SI->getCondition(); 522 else 523 continue; 524 525 if (!L->isLoopInvariant(Cond)) 526 continue; 527 528 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 529 530 // Only deal with PHIs in the loop header. 531 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 532 continue; 533 534 // If ExitVal is a PHI on the loop header, then we know its 535 // value along this exit because the exit can only be taken 536 // on the first iteration. 537 auto *LoopPreheader = L->getLoopPreheader(); 538 assert(LoopPreheader && "Invalid loop"); 539 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 540 if (PreheaderIdx != -1) { 541 assert(ExitVal->getParent() == L->getHeader() && 542 "ExitVal must be in loop header"); 543 MadeAnyChanges = true; 544 PN.setIncomingValue(IncomingValIdx, 545 ExitVal->getIncomingValue(PreheaderIdx)); 546 } 547 } 548 } 549 } 550 return MadeAnyChanges; 551 } 552 553 //===----------------------------------------------------------------------===// 554 // IV Widening - Extend the width of an IV to cover its widest uses. 555 //===----------------------------------------------------------------------===// 556 557 namespace { 558 559 // Collect information about induction variables that are used by sign/zero 560 // extend operations. This information is recorded by CollectExtend and provides 561 // the input to WidenIV. 562 struct WideIVInfo { 563 PHINode *NarrowIV = nullptr; 564 565 // Widest integer type created [sz]ext 566 Type *WidestNativeType = nullptr; 567 568 // Was a sext user seen before a zext? 569 bool IsSigned = false; 570 }; 571 572 } // end anonymous namespace 573 574 /// Update information about the induction variable that is extended by this 575 /// sign or zero extend operation. This is used to determine the final width of 576 /// the IV before actually widening it. 577 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 578 const TargetTransformInfo *TTI) { 579 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 580 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 581 return; 582 583 Type *Ty = Cast->getType(); 584 uint64_t Width = SE->getTypeSizeInBits(Ty); 585 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 586 return; 587 588 // Check that `Cast` actually extends the induction variable (we rely on this 589 // later). This takes care of cases where `Cast` is extending a truncation of 590 // the narrow induction variable, and thus can end up being narrower than the 591 // "narrow" induction variable. 592 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 593 if (NarrowIVWidth >= Width) 594 return; 595 596 // Cast is either an sext or zext up to this point. 597 // We should not widen an indvar if arithmetics on the wider indvar are more 598 // expensive than those on the narrower indvar. We check only the cost of ADD 599 // because at least an ADD is required to increment the induction variable. We 600 // could compute more comprehensively the cost of all instructions on the 601 // induction variable when necessary. 602 if (TTI && 603 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 604 TTI->getArithmeticInstrCost(Instruction::Add, 605 Cast->getOperand(0)->getType())) { 606 return; 607 } 608 609 if (!WI.WidestNativeType) { 610 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 611 WI.IsSigned = IsSigned; 612 return; 613 } 614 615 // We extend the IV to satisfy the sign of its first user, arbitrarily. 616 if (WI.IsSigned != IsSigned) 617 return; 618 619 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 620 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 621 } 622 623 namespace { 624 625 /// Record a link in the Narrow IV def-use chain along with the WideIV that 626 /// computes the same value as the Narrow IV def. This avoids caching Use* 627 /// pointers. 628 struct NarrowIVDefUse { 629 Instruction *NarrowDef = nullptr; 630 Instruction *NarrowUse = nullptr; 631 Instruction *WideDef = nullptr; 632 633 // True if the narrow def is never negative. Tracking this information lets 634 // us use a sign extension instead of a zero extension or vice versa, when 635 // profitable and legal. 636 bool NeverNegative = false; 637 638 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 639 bool NeverNegative) 640 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 641 NeverNegative(NeverNegative) {} 642 }; 643 644 /// The goal of this transform is to remove sign and zero extends without 645 /// creating any new induction variables. To do this, it creates a new phi of 646 /// the wider type and redirects all users, either removing extends or inserting 647 /// truncs whenever we stop propagating the type. 648 class WidenIV { 649 // Parameters 650 PHINode *OrigPhi; 651 Type *WideType; 652 653 // Context 654 LoopInfo *LI; 655 Loop *L; 656 ScalarEvolution *SE; 657 DominatorTree *DT; 658 659 // Does the module have any calls to the llvm.experimental.guard intrinsic 660 // at all? If not we can avoid scanning instructions looking for guards. 661 bool HasGuards; 662 663 // Result 664 PHINode *WidePhi = nullptr; 665 Instruction *WideInc = nullptr; 666 const SCEV *WideIncExpr = nullptr; 667 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 668 669 SmallPtrSet<Instruction *,16> Widened; 670 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 671 672 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 673 674 // A map tracking the kind of extension used to widen each narrow IV 675 // and narrow IV user. 676 // Key: pointer to a narrow IV or IV user. 677 // Value: the kind of extension used to widen this Instruction. 678 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 679 680 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 681 682 // A map with control-dependent ranges for post increment IV uses. The key is 683 // a pair of IV def and a use of this def denoting the context. The value is 684 // a ConstantRange representing possible values of the def at the given 685 // context. 686 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 687 688 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 689 Instruction *UseI) { 690 DefUserPair Key(Def, UseI); 691 auto It = PostIncRangeInfos.find(Key); 692 return It == PostIncRangeInfos.end() 693 ? Optional<ConstantRange>(None) 694 : Optional<ConstantRange>(It->second); 695 } 696 697 void calculatePostIncRanges(PHINode *OrigPhi); 698 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 699 700 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 701 DefUserPair Key(Def, UseI); 702 auto It = PostIncRangeInfos.find(Key); 703 if (It == PostIncRangeInfos.end()) 704 PostIncRangeInfos.insert({Key, R}); 705 else 706 It->second = R.intersectWith(It->second); 707 } 708 709 public: 710 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 711 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 712 bool HasGuards) 713 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 714 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 715 HasGuards(HasGuards), DeadInsts(DI) { 716 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 717 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 718 } 719 720 PHINode *createWideIV(SCEVExpander &Rewriter); 721 722 protected: 723 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 724 Instruction *Use); 725 726 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 727 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 728 const SCEVAddRecExpr *WideAR); 729 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 730 731 ExtendKind getExtendKind(Instruction *I); 732 733 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 734 735 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 736 737 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 738 739 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 740 unsigned OpCode) const; 741 742 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 743 744 bool widenLoopCompare(NarrowIVDefUse DU); 745 bool widenWithVariantUse(NarrowIVDefUse DU); 746 747 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 748 }; 749 750 } // end anonymous namespace 751 752 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 753 bool IsSigned, Instruction *Use) { 754 // Set the debug location and conservative insertion point. 755 IRBuilder<> Builder(Use); 756 // Hoist the insertion point into loop preheaders as far as possible. 757 for (const Loop *L = LI->getLoopFor(Use->getParent()); 758 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 759 L = L->getParentLoop()) 760 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 761 762 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 763 Builder.CreateZExt(NarrowOper, WideType); 764 } 765 766 /// Instantiate a wide operation to replace a narrow operation. This only needs 767 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 768 /// 0 for any operation we decide not to clone. 769 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 770 const SCEVAddRecExpr *WideAR) { 771 unsigned Opcode = DU.NarrowUse->getOpcode(); 772 switch (Opcode) { 773 default: 774 return nullptr; 775 case Instruction::Add: 776 case Instruction::Mul: 777 case Instruction::UDiv: 778 case Instruction::Sub: 779 return cloneArithmeticIVUser(DU, WideAR); 780 781 case Instruction::And: 782 case Instruction::Or: 783 case Instruction::Xor: 784 case Instruction::Shl: 785 case Instruction::LShr: 786 case Instruction::AShr: 787 return cloneBitwiseIVUser(DU); 788 } 789 } 790 791 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 792 Instruction *NarrowUse = DU.NarrowUse; 793 Instruction *NarrowDef = DU.NarrowDef; 794 Instruction *WideDef = DU.WideDef; 795 796 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 797 798 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 799 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 800 // invariant and will be folded or hoisted. If it actually comes from a 801 // widened IV, it should be removed during a future call to widenIVUse. 802 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 803 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 804 ? WideDef 805 : createExtendInst(NarrowUse->getOperand(0), WideType, 806 IsSigned, NarrowUse); 807 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 808 ? WideDef 809 : createExtendInst(NarrowUse->getOperand(1), WideType, 810 IsSigned, NarrowUse); 811 812 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 813 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 814 NarrowBO->getName()); 815 IRBuilder<> Builder(NarrowUse); 816 Builder.Insert(WideBO); 817 WideBO->copyIRFlags(NarrowBO); 818 return WideBO; 819 } 820 821 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 822 const SCEVAddRecExpr *WideAR) { 823 Instruction *NarrowUse = DU.NarrowUse; 824 Instruction *NarrowDef = DU.NarrowDef; 825 Instruction *WideDef = DU.WideDef; 826 827 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 828 829 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 830 831 // We're trying to find X such that 832 // 833 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 834 // 835 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 836 // and check using SCEV if any of them are correct. 837 838 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 839 // correct solution to X. 840 auto GuessNonIVOperand = [&](bool SignExt) { 841 const SCEV *WideLHS; 842 const SCEV *WideRHS; 843 844 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 845 if (SignExt) 846 return SE->getSignExtendExpr(S, Ty); 847 return SE->getZeroExtendExpr(S, Ty); 848 }; 849 850 if (IVOpIdx == 0) { 851 WideLHS = SE->getSCEV(WideDef); 852 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 853 WideRHS = GetExtend(NarrowRHS, WideType); 854 } else { 855 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 856 WideLHS = GetExtend(NarrowLHS, WideType); 857 WideRHS = SE->getSCEV(WideDef); 858 } 859 860 // WideUse is "WideDef `op.wide` X" as described in the comment. 861 const SCEV *WideUse = nullptr; 862 863 switch (NarrowUse->getOpcode()) { 864 default: 865 llvm_unreachable("No other possibility!"); 866 867 case Instruction::Add: 868 WideUse = SE->getAddExpr(WideLHS, WideRHS); 869 break; 870 871 case Instruction::Mul: 872 WideUse = SE->getMulExpr(WideLHS, WideRHS); 873 break; 874 875 case Instruction::UDiv: 876 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 877 break; 878 879 case Instruction::Sub: 880 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 881 break; 882 } 883 884 return WideUse == WideAR; 885 }; 886 887 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 888 if (!GuessNonIVOperand(SignExtend)) { 889 SignExtend = !SignExtend; 890 if (!GuessNonIVOperand(SignExtend)) 891 return nullptr; 892 } 893 894 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 895 ? WideDef 896 : createExtendInst(NarrowUse->getOperand(0), WideType, 897 SignExtend, NarrowUse); 898 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 899 ? WideDef 900 : createExtendInst(NarrowUse->getOperand(1), WideType, 901 SignExtend, NarrowUse); 902 903 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 904 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 905 NarrowBO->getName()); 906 907 IRBuilder<> Builder(NarrowUse); 908 Builder.Insert(WideBO); 909 WideBO->copyIRFlags(NarrowBO); 910 return WideBO; 911 } 912 913 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 914 auto It = ExtendKindMap.find(I); 915 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 916 return It->second; 917 } 918 919 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 920 unsigned OpCode) const { 921 if (OpCode == Instruction::Add) 922 return SE->getAddExpr(LHS, RHS); 923 if (OpCode == Instruction::Sub) 924 return SE->getMinusSCEV(LHS, RHS); 925 if (OpCode == Instruction::Mul) 926 return SE->getMulExpr(LHS, RHS); 927 928 llvm_unreachable("Unsupported opcode."); 929 } 930 931 /// No-wrap operations can transfer sign extension of their result to their 932 /// operands. Generate the SCEV value for the widened operation without 933 /// actually modifying the IR yet. If the expression after extending the 934 /// operands is an AddRec for this loop, return the AddRec and the kind of 935 /// extension used. 936 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 937 // Handle the common case of add<nsw/nuw> 938 const unsigned OpCode = DU.NarrowUse->getOpcode(); 939 // Only Add/Sub/Mul instructions supported yet. 940 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 941 OpCode != Instruction::Mul) 942 return {nullptr, Unknown}; 943 944 // One operand (NarrowDef) has already been extended to WideDef. Now determine 945 // if extending the other will lead to a recurrence. 946 const unsigned ExtendOperIdx = 947 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 948 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 949 950 const SCEV *ExtendOperExpr = nullptr; 951 const OverflowingBinaryOperator *OBO = 952 cast<OverflowingBinaryOperator>(DU.NarrowUse); 953 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 954 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 955 ExtendOperExpr = SE->getSignExtendExpr( 956 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 957 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 958 ExtendOperExpr = SE->getZeroExtendExpr( 959 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 960 else 961 return {nullptr, Unknown}; 962 963 // When creating this SCEV expr, don't apply the current operations NSW or NUW 964 // flags. This instruction may be guarded by control flow that the no-wrap 965 // behavior depends on. Non-control-equivalent instructions can be mapped to 966 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 967 // semantics to those operations. 968 const SCEV *lhs = SE->getSCEV(DU.WideDef); 969 const SCEV *rhs = ExtendOperExpr; 970 971 // Let's swap operands to the initial order for the case of non-commutative 972 // operations, like SUB. See PR21014. 973 if (ExtendOperIdx == 0) 974 std::swap(lhs, rhs); 975 const SCEVAddRecExpr *AddRec = 976 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 977 978 if (!AddRec || AddRec->getLoop() != L) 979 return {nullptr, Unknown}; 980 981 return {AddRec, ExtKind}; 982 } 983 984 /// Is this instruction potentially interesting for further simplification after 985 /// widening it's type? In other words, can the extend be safely hoisted out of 986 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 987 /// so, return the extended recurrence and the kind of extension used. Otherwise 988 /// return {nullptr, Unknown}. 989 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 990 if (!SE->isSCEVable(DU.NarrowUse->getType())) 991 return {nullptr, Unknown}; 992 993 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 994 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 995 SE->getTypeSizeInBits(WideType)) { 996 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 997 // index. So don't follow this use. 998 return {nullptr, Unknown}; 999 } 1000 1001 const SCEV *WideExpr; 1002 ExtendKind ExtKind; 1003 if (DU.NeverNegative) { 1004 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1005 if (isa<SCEVAddRecExpr>(WideExpr)) 1006 ExtKind = SignExtended; 1007 else { 1008 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1009 ExtKind = ZeroExtended; 1010 } 1011 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1012 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1013 ExtKind = SignExtended; 1014 } else { 1015 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1016 ExtKind = ZeroExtended; 1017 } 1018 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1019 if (!AddRec || AddRec->getLoop() != L) 1020 return {nullptr, Unknown}; 1021 return {AddRec, ExtKind}; 1022 } 1023 1024 /// This IV user cannot be widened. Replace this use of the original narrow IV 1025 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1026 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1027 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1028 if (!InsertPt) 1029 return; 1030 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1031 << *DU.NarrowUse << "\n"); 1032 IRBuilder<> Builder(InsertPt); 1033 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1034 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1035 } 1036 1037 /// If the narrow use is a compare instruction, then widen the compare 1038 // (and possibly the other operand). The extend operation is hoisted into the 1039 // loop preheader as far as possible. 1040 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1041 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1042 if (!Cmp) 1043 return false; 1044 1045 // We can legally widen the comparison in the following two cases: 1046 // 1047 // - The signedness of the IV extension and comparison match 1048 // 1049 // - The narrow IV is always positive (and thus its sign extension is equal 1050 // to its zero extension). For instance, let's say we're zero extending 1051 // %narrow for the following use 1052 // 1053 // icmp slt i32 %narrow, %val ... (A) 1054 // 1055 // and %narrow is always positive. Then 1056 // 1057 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1058 // == icmp slt i32 zext(%narrow), sext(%val) 1059 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1060 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1061 return false; 1062 1063 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1064 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1065 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1066 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1067 1068 // Widen the compare instruction. 1069 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1070 if (!InsertPt) 1071 return false; 1072 IRBuilder<> Builder(InsertPt); 1073 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1074 1075 // Widen the other operand of the compare, if necessary. 1076 if (CastWidth < IVWidth) { 1077 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1078 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1079 } 1080 return true; 1081 } 1082 1083 // The widenIVUse avoids generating trunc by evaluating the use as AddRec, this 1084 // will not work when: 1085 // 1) SCEV traces back to an instruction inside the loop that SCEV can not 1086 // expand, eg. add %indvar, (load %addr) 1087 // 2) SCEV finds a loop variant, eg. add %indvar, %loopvariant 1088 // While SCEV fails to avoid trunc, we can still try to use instruction 1089 // combining approach to prove trunc is not required. This can be further 1090 // extended with other instruction combining checks, but for now we handle the 1091 // following case (sub can be "add" and "mul", "nsw + sext" can be "nus + zext") 1092 // 1093 // Src: 1094 // %c = sub nsw %b, %indvar 1095 // %d = sext %c to i64 1096 // Dst: 1097 // %indvar.ext1 = sext %indvar to i64 1098 // %m = sext %b to i64 1099 // %d = sub nsw i64 %m, %indvar.ext1 1100 // Therefore, as long as the result of add/sub/mul is extended to wide type, no 1101 // trunc is required regardless of how %b is generated. This pattern is common 1102 // when calculating address in 64 bit architecture 1103 bool WidenIV::widenWithVariantUse(NarrowIVDefUse DU) { 1104 Instruction *NarrowUse = DU.NarrowUse; 1105 Instruction *NarrowDef = DU.NarrowDef; 1106 Instruction *WideDef = DU.WideDef; 1107 1108 // Handle the common case of add<nsw/nuw> 1109 const unsigned OpCode = NarrowUse->getOpcode(); 1110 // Only Add/Sub/Mul instructions are supported. 1111 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1112 OpCode != Instruction::Mul) 1113 return false; 1114 1115 // The operand that is not defined by NarrowDef of DU. Let's call it the 1116 // other operand. 1117 assert((NarrowUse->getOperand(0) == NarrowDef || 1118 NarrowUse->getOperand(1) == NarrowDef) && 1119 "bad DU"); 1120 1121 const OverflowingBinaryOperator *OBO = 1122 cast<OverflowingBinaryOperator>(NarrowUse); 1123 ExtendKind ExtKind = getExtendKind(NarrowDef); 1124 bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); 1125 bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); 1126 if (!CanSignExtend && !CanZeroExtend) 1127 return false; 1128 1129 // Verifying that Defining operand is an AddRec 1130 const SCEV *Op1 = SE->getSCEV(WideDef); 1131 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1132 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1133 return false; 1134 1135 for (Use &U : NarrowUse->uses()) { 1136 Instruction *User = nullptr; 1137 if (ExtKind == SignExtended) 1138 User = dyn_cast<SExtInst>(U.getUser()); 1139 else 1140 User = dyn_cast<ZExtInst>(U.getUser()); 1141 if (!User || User->getType() != WideType) 1142 return false; 1143 } 1144 1145 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1146 1147 // Generating a widening use instruction. 1148 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1149 ? WideDef 1150 : createExtendInst(NarrowUse->getOperand(0), WideType, 1151 ExtKind, NarrowUse); 1152 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1153 ? WideDef 1154 : createExtendInst(NarrowUse->getOperand(1), WideType, 1155 ExtKind, NarrowUse); 1156 1157 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1158 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1159 NarrowBO->getName()); 1160 IRBuilder<> Builder(NarrowUse); 1161 Builder.Insert(WideBO); 1162 WideBO->copyIRFlags(NarrowBO); 1163 ExtendKindMap[NarrowUse] = ExtKind; 1164 1165 for (Use &U : NarrowUse->uses()) { 1166 Instruction *User = nullptr; 1167 if (ExtKind == SignExtended) 1168 User = cast<SExtInst>(U.getUser()); 1169 else 1170 User = cast<ZExtInst>(U.getUser()); 1171 assert(User->getType() == WideType && "Checked before!"); 1172 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1173 << *WideBO << "\n"); 1174 ++NumElimExt; 1175 User->replaceAllUsesWith(WideBO); 1176 DeadInsts.emplace_back(User); 1177 } 1178 return true; 1179 } 1180 1181 /// Determine whether an individual user of the narrow IV can be widened. If so, 1182 /// return the wide clone of the user. 1183 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1184 assert(ExtendKindMap.count(DU.NarrowDef) && 1185 "Should already know the kind of extension used to widen NarrowDef"); 1186 1187 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1188 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1189 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1190 // For LCSSA phis, sink the truncate outside the loop. 1191 // After SimplifyCFG most loop exit targets have a single predecessor. 1192 // Otherwise fall back to a truncate within the loop. 1193 if (UsePhi->getNumOperands() != 1) 1194 truncateIVUse(DU, DT, LI); 1195 else { 1196 // Widening the PHI requires us to insert a trunc. The logical place 1197 // for this trunc is in the same BB as the PHI. This is not possible if 1198 // the BB is terminated by a catchswitch. 1199 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1200 return nullptr; 1201 1202 PHINode *WidePhi = 1203 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1204 UsePhi); 1205 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1206 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1207 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1208 UsePhi->replaceAllUsesWith(Trunc); 1209 DeadInsts.emplace_back(UsePhi); 1210 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1211 << *WidePhi << "\n"); 1212 } 1213 return nullptr; 1214 } 1215 } 1216 1217 // This narrow use can be widened by a sext if it's non-negative or its narrow 1218 // def was widended by a sext. Same for zext. 1219 auto canWidenBySExt = [&]() { 1220 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1221 }; 1222 auto canWidenByZExt = [&]() { 1223 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1224 }; 1225 1226 // Our raison d'etre! Eliminate sign and zero extension. 1227 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1228 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1229 Value *NewDef = DU.WideDef; 1230 if (DU.NarrowUse->getType() != WideType) { 1231 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1232 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1233 if (CastWidth < IVWidth) { 1234 // The cast isn't as wide as the IV, so insert a Trunc. 1235 IRBuilder<> Builder(DU.NarrowUse); 1236 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1237 } 1238 else { 1239 // A wider extend was hidden behind a narrower one. This may induce 1240 // another round of IV widening in which the intermediate IV becomes 1241 // dead. It should be very rare. 1242 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1243 << " not wide enough to subsume " << *DU.NarrowUse 1244 << "\n"); 1245 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1246 NewDef = DU.NarrowUse; 1247 } 1248 } 1249 if (NewDef != DU.NarrowUse) { 1250 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1251 << " replaced by " << *DU.WideDef << "\n"); 1252 ++NumElimExt; 1253 DU.NarrowUse->replaceAllUsesWith(NewDef); 1254 DeadInsts.emplace_back(DU.NarrowUse); 1255 } 1256 // Now that the extend is gone, we want to expose it's uses for potential 1257 // further simplification. We don't need to directly inform SimplifyIVUsers 1258 // of the new users, because their parent IV will be processed later as a 1259 // new loop phi. If we preserved IVUsers analysis, we would also want to 1260 // push the uses of WideDef here. 1261 1262 // No further widening is needed. The deceased [sz]ext had done it for us. 1263 return nullptr; 1264 } 1265 1266 // Does this user itself evaluate to a recurrence after widening? 1267 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1268 if (!WideAddRec.first) 1269 WideAddRec = getWideRecurrence(DU); 1270 1271 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1272 if (!WideAddRec.first) { 1273 // If use is a loop condition, try to promote the condition instead of 1274 // truncating the IV first. 1275 if (widenLoopCompare(DU)) 1276 return nullptr; 1277 1278 // We are here about to generate a truncate instruction that may hurt 1279 // performance because the scalar evolution expression computed earlier 1280 // in WideAddRec.first does not indicate a polynomial induction expression. 1281 // In that case, look at the operands of the use instruction to determine 1282 // if we can still widen the use instead of truncating its operand. 1283 if (widenWithVariantUse(DU)) 1284 return nullptr; 1285 1286 // This user does not evaluate to a recurrence after widening, so don't 1287 // follow it. Instead insert a Trunc to kill off the original use, 1288 // eventually isolating the original narrow IV so it can be removed. 1289 truncateIVUse(DU, DT, LI); 1290 return nullptr; 1291 } 1292 // Assume block terminators cannot evaluate to a recurrence. We can't to 1293 // insert a Trunc after a terminator if there happens to be a critical edge. 1294 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1295 "SCEV is not expected to evaluate a block terminator"); 1296 1297 // Reuse the IV increment that SCEVExpander created as long as it dominates 1298 // NarrowUse. 1299 Instruction *WideUse = nullptr; 1300 if (WideAddRec.first == WideIncExpr && 1301 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1302 WideUse = WideInc; 1303 else { 1304 WideUse = cloneIVUser(DU, WideAddRec.first); 1305 if (!WideUse) 1306 return nullptr; 1307 } 1308 // Evaluation of WideAddRec ensured that the narrow expression could be 1309 // extended outside the loop without overflow. This suggests that the wide use 1310 // evaluates to the same expression as the extended narrow use, but doesn't 1311 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1312 // where it fails, we simply throw away the newly created wide use. 1313 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1314 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1315 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1316 << "\n"); 1317 DeadInsts.emplace_back(WideUse); 1318 return nullptr; 1319 } 1320 1321 // if we reached this point then we are going to replace 1322 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1323 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1324 1325 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1326 // Returning WideUse pushes it on the worklist. 1327 return WideUse; 1328 } 1329 1330 /// Add eligible users of NarrowDef to NarrowIVUsers. 1331 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1332 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1333 bool NonNegativeDef = 1334 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1335 SE->getZero(NarrowSCEV->getType())); 1336 for (User *U : NarrowDef->users()) { 1337 Instruction *NarrowUser = cast<Instruction>(U); 1338 1339 // Handle data flow merges and bizarre phi cycles. 1340 if (!Widened.insert(NarrowUser).second) 1341 continue; 1342 1343 bool NonNegativeUse = false; 1344 if (!NonNegativeDef) { 1345 // We might have a control-dependent range information for this context. 1346 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1347 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1348 } 1349 1350 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1351 NonNegativeDef || NonNegativeUse); 1352 } 1353 } 1354 1355 /// Process a single induction variable. First use the SCEVExpander to create a 1356 /// wide induction variable that evaluates to the same recurrence as the 1357 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1358 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1359 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1360 /// 1361 /// It would be simpler to delete uses as they are processed, but we must avoid 1362 /// invalidating SCEV expressions. 1363 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1364 // Bail if we disallowed widening. 1365 if(!AllowIVWidening) 1366 return nullptr; 1367 1368 // Is this phi an induction variable? 1369 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1370 if (!AddRec) 1371 return nullptr; 1372 1373 // Widen the induction variable expression. 1374 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1375 ? SE->getSignExtendExpr(AddRec, WideType) 1376 : SE->getZeroExtendExpr(AddRec, WideType); 1377 1378 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1379 "Expect the new IV expression to preserve its type"); 1380 1381 // Can the IV be extended outside the loop without overflow? 1382 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1383 if (!AddRec || AddRec->getLoop() != L) 1384 return nullptr; 1385 1386 // An AddRec must have loop-invariant operands. Since this AddRec is 1387 // materialized by a loop header phi, the expression cannot have any post-loop 1388 // operands, so they must dominate the loop header. 1389 assert( 1390 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1391 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1392 "Loop header phi recurrence inputs do not dominate the loop"); 1393 1394 // Iterate over IV uses (including transitive ones) looking for IV increments 1395 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1396 // the increment calculate control-dependent range information basing on 1397 // dominating conditions inside of the loop (e.g. a range check inside of the 1398 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1399 // 1400 // Control-dependent range information is later used to prove that a narrow 1401 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1402 // this on demand because when pushNarrowIVUsers needs this information some 1403 // of the dominating conditions might be already widened. 1404 if (UsePostIncrementRanges) 1405 calculatePostIncRanges(OrigPhi); 1406 1407 // The rewriter provides a value for the desired IV expression. This may 1408 // either find an existing phi or materialize a new one. Either way, we 1409 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1410 // of the phi-SCC dominates the loop entry. 1411 Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1412 Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt); 1413 // If the wide phi is not a phi node, for example a cast node, like bitcast, 1414 // inttoptr, ptrtoint, just skip for now. 1415 if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) { 1416 // if the cast node is an inserted instruction without any user, we should 1417 // remove it to make sure the pass don't touch the function as we can not 1418 // wide the phi. 1419 if (ExpandInst->hasNUses(0) && 1420 Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst))) 1421 DeadInsts.emplace_back(ExpandInst); 1422 return nullptr; 1423 } 1424 1425 // Remembering the WideIV increment generated by SCEVExpander allows 1426 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1427 // employ a general reuse mechanism because the call above is the only call to 1428 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1429 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1430 WideInc = 1431 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1432 WideIncExpr = SE->getSCEV(WideInc); 1433 // Propagate the debug location associated with the original loop increment 1434 // to the new (widened) increment. 1435 auto *OrigInc = 1436 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1437 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1438 } 1439 1440 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1441 ++NumWidened; 1442 1443 // Traverse the def-use chain using a worklist starting at the original IV. 1444 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1445 1446 Widened.insert(OrigPhi); 1447 pushNarrowIVUsers(OrigPhi, WidePhi); 1448 1449 while (!NarrowIVUsers.empty()) { 1450 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1451 1452 // Process a def-use edge. This may replace the use, so don't hold a 1453 // use_iterator across it. 1454 Instruction *WideUse = widenIVUse(DU, Rewriter); 1455 1456 // Follow all def-use edges from the previous narrow use. 1457 if (WideUse) 1458 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1459 1460 // widenIVUse may have removed the def-use edge. 1461 if (DU.NarrowDef->use_empty()) 1462 DeadInsts.emplace_back(DU.NarrowDef); 1463 } 1464 1465 // Attach any debug information to the new PHI. 1466 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1467 1468 return WidePhi; 1469 } 1470 1471 /// Calculates control-dependent range for the given def at the given context 1472 /// by looking at dominating conditions inside of the loop 1473 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1474 Instruction *NarrowUser) { 1475 using namespace llvm::PatternMatch; 1476 1477 Value *NarrowDefLHS; 1478 const APInt *NarrowDefRHS; 1479 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1480 m_APInt(NarrowDefRHS))) || 1481 !NarrowDefRHS->isNonNegative()) 1482 return; 1483 1484 auto UpdateRangeFromCondition = [&] (Value *Condition, 1485 bool TrueDest) { 1486 CmpInst::Predicate Pred; 1487 Value *CmpRHS; 1488 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1489 m_Value(CmpRHS)))) 1490 return; 1491 1492 CmpInst::Predicate P = 1493 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1494 1495 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1496 auto CmpConstrainedLHSRange = 1497 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1498 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1499 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1500 1501 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1502 }; 1503 1504 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1505 if (!HasGuards) 1506 return; 1507 1508 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1509 Ctx->getParent()->rend())) { 1510 Value *C = nullptr; 1511 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1512 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1513 } 1514 }; 1515 1516 UpdateRangeFromGuards(NarrowUser); 1517 1518 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1519 // If NarrowUserBB is statically unreachable asking dominator queries may 1520 // yield surprising results. (e.g. the block may not have a dom tree node) 1521 if (!DT->isReachableFromEntry(NarrowUserBB)) 1522 return; 1523 1524 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1525 L->contains(DTB->getBlock()); 1526 DTB = DTB->getIDom()) { 1527 auto *BB = DTB->getBlock(); 1528 auto *TI = BB->getTerminator(); 1529 UpdateRangeFromGuards(TI); 1530 1531 auto *BI = dyn_cast<BranchInst>(TI); 1532 if (!BI || !BI->isConditional()) 1533 continue; 1534 1535 auto *TrueSuccessor = BI->getSuccessor(0); 1536 auto *FalseSuccessor = BI->getSuccessor(1); 1537 1538 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1539 return BBE.isSingleEdge() && 1540 DT->dominates(BBE, NarrowUser->getParent()); 1541 }; 1542 1543 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1544 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1545 1546 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1547 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1548 } 1549 } 1550 1551 /// Calculates PostIncRangeInfos map for the given IV 1552 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1553 SmallPtrSet<Instruction *, 16> Visited; 1554 SmallVector<Instruction *, 6> Worklist; 1555 Worklist.push_back(OrigPhi); 1556 Visited.insert(OrigPhi); 1557 1558 while (!Worklist.empty()) { 1559 Instruction *NarrowDef = Worklist.pop_back_val(); 1560 1561 for (Use &U : NarrowDef->uses()) { 1562 auto *NarrowUser = cast<Instruction>(U.getUser()); 1563 1564 // Don't go looking outside the current loop. 1565 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1566 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1567 continue; 1568 1569 if (!Visited.insert(NarrowUser).second) 1570 continue; 1571 1572 Worklist.push_back(NarrowUser); 1573 1574 calculatePostIncRange(NarrowDef, NarrowUser); 1575 } 1576 } 1577 } 1578 1579 //===----------------------------------------------------------------------===// 1580 // Live IV Reduction - Minimize IVs live across the loop. 1581 //===----------------------------------------------------------------------===// 1582 1583 //===----------------------------------------------------------------------===// 1584 // Simplification of IV users based on SCEV evaluation. 1585 //===----------------------------------------------------------------------===// 1586 1587 namespace { 1588 1589 class IndVarSimplifyVisitor : public IVVisitor { 1590 ScalarEvolution *SE; 1591 const TargetTransformInfo *TTI; 1592 PHINode *IVPhi; 1593 1594 public: 1595 WideIVInfo WI; 1596 1597 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1598 const TargetTransformInfo *TTI, 1599 const DominatorTree *DTree) 1600 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1601 DT = DTree; 1602 WI.NarrowIV = IVPhi; 1603 } 1604 1605 // Implement the interface used by simplifyUsersOfIV. 1606 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1607 }; 1608 1609 } // end anonymous namespace 1610 1611 /// Iteratively perform simplification on a worklist of IV users. Each 1612 /// successive simplification may push more users which may themselves be 1613 /// candidates for simplification. 1614 /// 1615 /// Sign/Zero extend elimination is interleaved with IV simplification. 1616 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1617 SCEVExpander &Rewriter, 1618 LoopInfo *LI) { 1619 SmallVector<WideIVInfo, 8> WideIVs; 1620 1621 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1622 Intrinsic::getName(Intrinsic::experimental_guard)); 1623 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1624 1625 SmallVector<PHINode*, 8> LoopPhis; 1626 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1627 LoopPhis.push_back(cast<PHINode>(I)); 1628 } 1629 // Each round of simplification iterates through the SimplifyIVUsers worklist 1630 // for all current phis, then determines whether any IVs can be 1631 // widened. Widening adds new phis to LoopPhis, inducing another round of 1632 // simplification on the wide IVs. 1633 bool Changed = false; 1634 while (!LoopPhis.empty()) { 1635 // Evaluate as many IV expressions as possible before widening any IVs. This 1636 // forces SCEV to set no-wrap flags before evaluating sign/zero 1637 // extension. The first time SCEV attempts to normalize sign/zero extension, 1638 // the result becomes final. So for the most predictable results, we delay 1639 // evaluation of sign/zero extend evaluation until needed, and avoid running 1640 // other SCEV based analysis prior to simplifyAndExtend. 1641 do { 1642 PHINode *CurrIV = LoopPhis.pop_back_val(); 1643 1644 // Information about sign/zero extensions of CurrIV. 1645 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1646 1647 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 1648 &Visitor); 1649 1650 if (Visitor.WI.WidestNativeType) { 1651 WideIVs.push_back(Visitor.WI); 1652 } 1653 } while(!LoopPhis.empty()); 1654 1655 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1656 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1657 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1658 Changed = true; 1659 LoopPhis.push_back(WidePhi); 1660 } 1661 } 1662 } 1663 return Changed; 1664 } 1665 1666 //===----------------------------------------------------------------------===// 1667 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1668 //===----------------------------------------------------------------------===// 1669 1670 /// Given an Value which is hoped to be part of an add recurance in the given 1671 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1672 /// that this is less general than SCEVs AddRec checking. 1673 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1674 Instruction *IncI = dyn_cast<Instruction>(IncV); 1675 if (!IncI) 1676 return nullptr; 1677 1678 switch (IncI->getOpcode()) { 1679 case Instruction::Add: 1680 case Instruction::Sub: 1681 break; 1682 case Instruction::GetElementPtr: 1683 // An IV counter must preserve its type. 1684 if (IncI->getNumOperands() == 2) 1685 break; 1686 LLVM_FALLTHROUGH; 1687 default: 1688 return nullptr; 1689 } 1690 1691 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1692 if (Phi && Phi->getParent() == L->getHeader()) { 1693 if (L->isLoopInvariant(IncI->getOperand(1))) 1694 return Phi; 1695 return nullptr; 1696 } 1697 if (IncI->getOpcode() == Instruction::GetElementPtr) 1698 return nullptr; 1699 1700 // Allow add/sub to be commuted. 1701 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1702 if (Phi && Phi->getParent() == L->getHeader()) { 1703 if (L->isLoopInvariant(IncI->getOperand(0))) 1704 return Phi; 1705 } 1706 return nullptr; 1707 } 1708 1709 /// Whether the current loop exit test is based on this value. Currently this 1710 /// is limited to a direct use in the loop condition. 1711 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1712 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1713 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1714 // TODO: Allow non-icmp loop test. 1715 if (!ICmp) 1716 return false; 1717 1718 // TODO: Allow indirect use. 1719 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1720 } 1721 1722 /// linearFunctionTestReplace policy. Return true unless we can show that the 1723 /// current exit test is already sufficiently canonical. 1724 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1725 assert(L->getLoopLatch() && "Must be in simplified form"); 1726 1727 // Avoid converting a constant or loop invariant test back to a runtime 1728 // test. This is critical for when SCEV's cached ExitCount is less precise 1729 // than the current IR (such as after we've proven a particular exit is 1730 // actually dead and thus the BE count never reaches our ExitCount.) 1731 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1732 if (L->isLoopInvariant(BI->getCondition())) 1733 return false; 1734 1735 // Do LFTR to simplify the exit condition to an ICMP. 1736 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1737 if (!Cond) 1738 return true; 1739 1740 // Do LFTR to simplify the exit ICMP to EQ/NE 1741 ICmpInst::Predicate Pred = Cond->getPredicate(); 1742 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1743 return true; 1744 1745 // Look for a loop invariant RHS 1746 Value *LHS = Cond->getOperand(0); 1747 Value *RHS = Cond->getOperand(1); 1748 if (!L->isLoopInvariant(RHS)) { 1749 if (!L->isLoopInvariant(LHS)) 1750 return true; 1751 std::swap(LHS, RHS); 1752 } 1753 // Look for a simple IV counter LHS 1754 PHINode *Phi = dyn_cast<PHINode>(LHS); 1755 if (!Phi) 1756 Phi = getLoopPhiForCounter(LHS, L); 1757 1758 if (!Phi) 1759 return true; 1760 1761 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1762 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1763 if (Idx < 0) 1764 return true; 1765 1766 // Do LFTR if the exit condition's IV is *not* a simple counter. 1767 Value *IncV = Phi->getIncomingValue(Idx); 1768 return Phi != getLoopPhiForCounter(IncV, L); 1769 } 1770 1771 /// Return true if undefined behavior would provable be executed on the path to 1772 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1773 /// anything about whether OnPathTo is actually executed or whether Root is 1774 /// actually poison. This can be used to assess whether a new use of Root can 1775 /// be added at a location which is control equivalent with OnPathTo (such as 1776 /// immediately before it) without introducing UB which didn't previously 1777 /// exist. Note that a false result conveys no information. 1778 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1779 Instruction *OnPathTo, 1780 DominatorTree *DT) { 1781 // Basic approach is to assume Root is poison, propagate poison forward 1782 // through all users we can easily track, and then check whether any of those 1783 // users are provable UB and must execute before out exiting block might 1784 // exit. 1785 1786 // The set of all recursive users we've visited (which are assumed to all be 1787 // poison because of said visit) 1788 SmallSet<const Value *, 16> KnownPoison; 1789 SmallVector<const Instruction*, 16> Worklist; 1790 Worklist.push_back(Root); 1791 while (!Worklist.empty()) { 1792 const Instruction *I = Worklist.pop_back_val(); 1793 1794 // If we know this must trigger UB on a path leading our target. 1795 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1796 return true; 1797 1798 // If we can't analyze propagation through this instruction, just skip it 1799 // and transitive users. Safe as false is a conservative result. 1800 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 1801 continue; 1802 1803 if (KnownPoison.insert(I).second) 1804 for (const User *User : I->users()) 1805 Worklist.push_back(cast<Instruction>(User)); 1806 } 1807 1808 // Might be non-UB, or might have a path we couldn't prove must execute on 1809 // way to exiting bb. 1810 return false; 1811 } 1812 1813 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1814 /// down to checking that all operands are constant and listing instructions 1815 /// that may hide undef. 1816 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1817 unsigned Depth) { 1818 if (isa<Constant>(V)) 1819 return !isa<UndefValue>(V); 1820 1821 if (Depth >= 6) 1822 return false; 1823 1824 // Conservatively handle non-constant non-instructions. For example, Arguments 1825 // may be undef. 1826 Instruction *I = dyn_cast<Instruction>(V); 1827 if (!I) 1828 return false; 1829 1830 // Load and return values may be undef. 1831 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1832 return false; 1833 1834 // Optimistically handle other instructions. 1835 for (Value *Op : I->operands()) { 1836 if (!Visited.insert(Op).second) 1837 continue; 1838 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1839 return false; 1840 } 1841 return true; 1842 } 1843 1844 /// Return true if the given value is concrete. We must prove that undef can 1845 /// never reach it. 1846 /// 1847 /// TODO: If we decide that this is a good approach to checking for undef, we 1848 /// may factor it into a common location. 1849 static bool hasConcreteDef(Value *V) { 1850 SmallPtrSet<Value*, 8> Visited; 1851 Visited.insert(V); 1852 return hasConcreteDefImpl(V, Visited, 0); 1853 } 1854 1855 /// Return true if this IV has any uses other than the (soon to be rewritten) 1856 /// loop exit test. 1857 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1858 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1859 Value *IncV = Phi->getIncomingValue(LatchIdx); 1860 1861 for (User *U : Phi->users()) 1862 if (U != Cond && U != IncV) return false; 1863 1864 for (User *U : IncV->users()) 1865 if (U != Cond && U != Phi) return false; 1866 return true; 1867 } 1868 1869 /// Return true if the given phi is a "counter" in L. A counter is an 1870 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1871 /// step of 1. Note that L must have exactly one latch. 1872 static bool isLoopCounter(PHINode* Phi, Loop *L, 1873 ScalarEvolution *SE) { 1874 assert(Phi->getParent() == L->getHeader()); 1875 assert(L->getLoopLatch()); 1876 1877 if (!SE->isSCEVable(Phi->getType())) 1878 return false; 1879 1880 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1881 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1882 return false; 1883 1884 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1885 if (!Step || !Step->isOne()) 1886 return false; 1887 1888 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1889 Value *IncV = Phi->getIncomingValue(LatchIdx); 1890 return (getLoopPhiForCounter(IncV, L) == Phi); 1891 } 1892 1893 /// Search the loop header for a loop counter (anadd rec w/step of one) 1894 /// suitable for use by LFTR. If multiple counters are available, select the 1895 /// "best" one based profitable heuristics. 1896 /// 1897 /// BECount may be an i8* pointer type. The pointer difference is already 1898 /// valid count without scaling the address stride, so it remains a pointer 1899 /// expression as far as SCEV is concerned. 1900 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1901 const SCEV *BECount, 1902 ScalarEvolution *SE, DominatorTree *DT) { 1903 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1904 1905 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1906 1907 // Loop over all of the PHI nodes, looking for a simple counter. 1908 PHINode *BestPhi = nullptr; 1909 const SCEV *BestInit = nullptr; 1910 BasicBlock *LatchBlock = L->getLoopLatch(); 1911 assert(LatchBlock && "Must be in simplified form"); 1912 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1913 1914 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1915 PHINode *Phi = cast<PHINode>(I); 1916 if (!isLoopCounter(Phi, L, SE)) 1917 continue; 1918 1919 // Avoid comparing an integer IV against a pointer Limit. 1920 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1921 continue; 1922 1923 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1924 1925 // AR may be a pointer type, while BECount is an integer type. 1926 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1927 // AR may not be a narrower type, or we may never exit. 1928 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1929 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1930 continue; 1931 1932 // Avoid reusing a potentially undef value to compute other values that may 1933 // have originally had a concrete definition. 1934 if (!hasConcreteDef(Phi)) { 1935 // We explicitly allow unknown phis as long as they are already used by 1936 // the loop exit test. This is legal since performing LFTR could not 1937 // increase the number of undef users. 1938 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1939 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1940 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1941 continue; 1942 } 1943 1944 // Avoid introducing undefined behavior due to poison which didn't exist in 1945 // the original program. (Annoyingly, the rules for poison and undef 1946 // propagation are distinct, so this does NOT cover the undef case above.) 1947 // We have to ensure that we don't introduce UB by introducing a use on an 1948 // iteration where said IV produces poison. Our strategy here differs for 1949 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1950 // see code in linearFunctionTestReplace. For pointers, we restrict 1951 // transforms as there is no good way to reinfer inbounds once lost. 1952 if (!Phi->getType()->isIntegerTy() && 1953 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1954 continue; 1955 1956 const SCEV *Init = AR->getStart(); 1957 1958 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1959 // Don't force a live loop counter if another IV can be used. 1960 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1961 continue; 1962 1963 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1964 // also prefers integer to pointer IVs. 1965 if (BestInit->isZero() != Init->isZero()) { 1966 if (BestInit->isZero()) 1967 continue; 1968 } 1969 // If two IVs both count from zero or both count from nonzero then the 1970 // narrower is likely a dead phi that has been widened. Use the wider phi 1971 // to allow the other to be eliminated. 1972 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1973 continue; 1974 } 1975 BestPhi = Phi; 1976 BestInit = Init; 1977 } 1978 return BestPhi; 1979 } 1980 1981 /// Insert an IR expression which computes the value held by the IV IndVar 1982 /// (which must be an loop counter w/unit stride) after the backedge of loop L 1983 /// is taken ExitCount times. 1984 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 1985 const SCEV *ExitCount, bool UsePostInc, Loop *L, 1986 SCEVExpander &Rewriter, ScalarEvolution *SE) { 1987 assert(isLoopCounter(IndVar, L, SE)); 1988 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 1989 const SCEV *IVInit = AR->getStart(); 1990 1991 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 1992 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 1993 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 1994 // the existing GEPs whenever possible. 1995 if (IndVar->getType()->isPointerTy() && 1996 !ExitCount->getType()->isPointerTy()) { 1997 // IVOffset will be the new GEP offset that is interpreted by GEP as a 1998 // signed value. ExitCount on the other hand represents the loop trip count, 1999 // which is an unsigned value. FindLoopCounter only allows induction 2000 // variables that have a positive unit stride of one. This means we don't 2001 // have to handle the case of negative offsets (yet) and just need to zero 2002 // extend ExitCount. 2003 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2004 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2005 if (UsePostInc) 2006 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2007 2008 // Expand the code for the iteration count. 2009 assert(SE->isLoopInvariant(IVOffset, L) && 2010 "Computed iteration count is not loop invariant!"); 2011 2012 // We could handle pointer IVs other than i8*, but we need to compensate for 2013 // gep index scaling. 2014 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2015 cast<PointerType>(IndVar->getType()) 2016 ->getElementType())->isOne() && 2017 "unit stride pointer IV must be i8*"); 2018 2019 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2020 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2021 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2022 } else { 2023 // In any other case, convert both IVInit and ExitCount to integers before 2024 // comparing. This may result in SCEV expansion of pointers, but in practice 2025 // SCEV will fold the pointer arithmetic away as such: 2026 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2027 // 2028 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2029 // for simple memset-style loops. 2030 // 2031 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2032 // were generated on top of case #2, which is not expected. 2033 2034 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2035 // For unit stride, IVCount = Start + ExitCount with 2's complement 2036 // overflow. 2037 2038 // For integer IVs, truncate the IV before computing IVInit + BECount, 2039 // unless we know apriori that the limit must be a constant when evaluated 2040 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2041 // of the IV in the loop over a (potentially) expensive expansion of the 2042 // widened exit count add(zext(add)) expression. 2043 if (SE->getTypeSizeInBits(IVInit->getType()) 2044 > SE->getTypeSizeInBits(ExitCount->getType())) { 2045 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2046 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2047 else 2048 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2049 } 2050 2051 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2052 2053 if (UsePostInc) 2054 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2055 2056 // Expand the code for the iteration count. 2057 assert(SE->isLoopInvariant(IVLimit, L) && 2058 "Computed iteration count is not loop invariant!"); 2059 // Ensure that we generate the same type as IndVar, or a smaller integer 2060 // type. In the presence of null pointer values, we have an integer type 2061 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2062 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2063 IndVar->getType() : ExitCount->getType(); 2064 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2065 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2066 } 2067 } 2068 2069 /// This method rewrites the exit condition of the loop to be a canonical != 2070 /// comparison against the incremented loop induction variable. This pass is 2071 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2072 /// determine a loop-invariant trip count of the loop, which is actually a much 2073 /// broader range than just linear tests. 2074 bool IndVarSimplify:: 2075 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2076 const SCEV *ExitCount, 2077 PHINode *IndVar, SCEVExpander &Rewriter) { 2078 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2079 assert(isLoopCounter(IndVar, L, SE)); 2080 Instruction * const IncVar = 2081 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2082 2083 // Initialize CmpIndVar to the preincremented IV. 2084 Value *CmpIndVar = IndVar; 2085 bool UsePostInc = false; 2086 2087 // If the exiting block is the same as the backedge block, we prefer to 2088 // compare against the post-incremented value, otherwise we must compare 2089 // against the preincremented value. 2090 if (ExitingBB == L->getLoopLatch()) { 2091 // For pointer IVs, we chose to not strip inbounds which requires us not 2092 // to add a potentially UB introducing use. We need to either a) show 2093 // the loop test we're modifying is already in post-inc form, or b) show 2094 // that adding a use must not introduce UB. 2095 bool SafeToPostInc = 2096 IndVar->getType()->isIntegerTy() || 2097 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2098 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2099 if (SafeToPostInc) { 2100 UsePostInc = true; 2101 CmpIndVar = IncVar; 2102 } 2103 } 2104 2105 // It may be necessary to drop nowrap flags on the incrementing instruction 2106 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2107 // case the increment might have previously been poison on the last iteration 2108 // only) or if LFTR switches to a different IV that was previously dynamically 2109 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2110 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2111 // check), because the pre-inc addrec flags may be adopted from the original 2112 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2113 // TODO: This handling is inaccurate for one case: If we switch to a 2114 // dynamically dead IV that wraps on the first loop iteration only, which is 2115 // not covered by the post-inc addrec. (If the new IV was not dynamically 2116 // dead, it could not be poison on the first iteration in the first place.) 2117 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2118 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2119 if (BO->hasNoUnsignedWrap()) 2120 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2121 if (BO->hasNoSignedWrap()) 2122 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2123 } 2124 2125 Value *ExitCnt = genLoopLimit( 2126 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2127 assert(ExitCnt->getType()->isPointerTy() == 2128 IndVar->getType()->isPointerTy() && 2129 "genLoopLimit missed a cast"); 2130 2131 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2132 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2133 ICmpInst::Predicate P; 2134 if (L->contains(BI->getSuccessor(0))) 2135 P = ICmpInst::ICMP_NE; 2136 else 2137 P = ICmpInst::ICMP_EQ; 2138 2139 IRBuilder<> Builder(BI); 2140 2141 // The new loop exit condition should reuse the debug location of the 2142 // original loop exit condition. 2143 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2144 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2145 2146 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2147 // avoid the expensive expansion of the limit expression in the wider type, 2148 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2149 // since we know (from the exit count bitwidth), that we can't self-wrap in 2150 // the narrower type. 2151 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2152 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2153 if (CmpIndVarSize > ExitCntSize) { 2154 assert(!CmpIndVar->getType()->isPointerTy() && 2155 !ExitCnt->getType()->isPointerTy()); 2156 2157 // Before resorting to actually inserting the truncate, use the same 2158 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2159 // the other side of the comparison instead. We still evaluate the limit 2160 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2161 // a truncate within in. 2162 bool Extended = false; 2163 const SCEV *IV = SE->getSCEV(CmpIndVar); 2164 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2165 ExitCnt->getType()); 2166 const SCEV *ZExtTrunc = 2167 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2168 2169 if (ZExtTrunc == IV) { 2170 Extended = true; 2171 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2172 "wide.trip.count"); 2173 } else { 2174 const SCEV *SExtTrunc = 2175 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2176 if (SExtTrunc == IV) { 2177 Extended = true; 2178 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2179 "wide.trip.count"); 2180 } 2181 } 2182 2183 if (Extended) { 2184 bool Discard; 2185 L->makeLoopInvariant(ExitCnt, Discard); 2186 } else 2187 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2188 "lftr.wideiv"); 2189 } 2190 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2191 << " LHS:" << *CmpIndVar << '\n' 2192 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2193 << "\n" 2194 << " RHS:\t" << *ExitCnt << "\n" 2195 << "ExitCount:\t" << *ExitCount << "\n" 2196 << " was: " << *BI->getCondition() << "\n"); 2197 2198 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2199 Value *OrigCond = BI->getCondition(); 2200 // It's tempting to use replaceAllUsesWith here to fully replace the old 2201 // comparison, but that's not immediately safe, since users of the old 2202 // comparison may not be dominated by the new comparison. Instead, just 2203 // update the branch to use the new comparison; in the common case this 2204 // will make old comparison dead. 2205 BI->setCondition(Cond); 2206 DeadInsts.emplace_back(OrigCond); 2207 2208 ++NumLFTR; 2209 return true; 2210 } 2211 2212 //===----------------------------------------------------------------------===// 2213 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2214 //===----------------------------------------------------------------------===// 2215 2216 /// If there's a single exit block, sink any loop-invariant values that 2217 /// were defined in the preheader but not used inside the loop into the 2218 /// exit block to reduce register pressure in the loop. 2219 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2220 BasicBlock *ExitBlock = L->getExitBlock(); 2221 if (!ExitBlock) return false; 2222 2223 BasicBlock *Preheader = L->getLoopPreheader(); 2224 if (!Preheader) return false; 2225 2226 bool MadeAnyChanges = false; 2227 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2228 BasicBlock::iterator I(Preheader->getTerminator()); 2229 while (I != Preheader->begin()) { 2230 --I; 2231 // New instructions were inserted at the end of the preheader. 2232 if (isa<PHINode>(I)) 2233 break; 2234 2235 // Don't move instructions which might have side effects, since the side 2236 // effects need to complete before instructions inside the loop. Also don't 2237 // move instructions which might read memory, since the loop may modify 2238 // memory. Note that it's okay if the instruction might have undefined 2239 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2240 // block. 2241 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2242 continue; 2243 2244 // Skip debug info intrinsics. 2245 if (isa<DbgInfoIntrinsic>(I)) 2246 continue; 2247 2248 // Skip eh pad instructions. 2249 if (I->isEHPad()) 2250 continue; 2251 2252 // Don't sink alloca: we never want to sink static alloca's out of the 2253 // entry block, and correctly sinking dynamic alloca's requires 2254 // checks for stacksave/stackrestore intrinsics. 2255 // FIXME: Refactor this check somehow? 2256 if (isa<AllocaInst>(I)) 2257 continue; 2258 2259 // Determine if there is a use in or before the loop (direct or 2260 // otherwise). 2261 bool UsedInLoop = false; 2262 for (Use &U : I->uses()) { 2263 Instruction *User = cast<Instruction>(U.getUser()); 2264 BasicBlock *UseBB = User->getParent(); 2265 if (PHINode *P = dyn_cast<PHINode>(User)) { 2266 unsigned i = 2267 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2268 UseBB = P->getIncomingBlock(i); 2269 } 2270 if (UseBB == Preheader || L->contains(UseBB)) { 2271 UsedInLoop = true; 2272 break; 2273 } 2274 } 2275 2276 // If there is, the def must remain in the preheader. 2277 if (UsedInLoop) 2278 continue; 2279 2280 // Otherwise, sink it to the exit block. 2281 Instruction *ToMove = &*I; 2282 bool Done = false; 2283 2284 if (I != Preheader->begin()) { 2285 // Skip debug info intrinsics. 2286 do { 2287 --I; 2288 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2289 2290 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2291 Done = true; 2292 } else { 2293 Done = true; 2294 } 2295 2296 MadeAnyChanges = true; 2297 ToMove->moveBefore(*ExitBlock, InsertPt); 2298 if (Done) break; 2299 InsertPt = ToMove->getIterator(); 2300 } 2301 2302 return MadeAnyChanges; 2303 } 2304 2305 // Returns true if the condition of \p BI being checked is invariant and can be 2306 // proved to be trivially true during at least first \p MaxIter iterations. 2307 static bool isTrivialCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE, 2308 bool ProvingLoopExit, const SCEV *MaxIter) { 2309 ICmpInst::Predicate Pred; 2310 Value *LHS, *RHS; 2311 using namespace PatternMatch; 2312 BasicBlock *TrueSucc, *FalseSucc; 2313 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 2314 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 2315 return false; 2316 2317 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 2318 "Not a loop exit!"); 2319 2320 // 'LHS pred RHS' should now mean that we stay in loop. 2321 if (L->contains(FalseSucc)) 2322 Pred = CmpInst::getInversePredicate(Pred); 2323 2324 // If we are proving loop exit, invert the predicate. 2325 if (ProvingLoopExit) 2326 Pred = CmpInst::getInversePredicate(Pred); 2327 2328 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 2329 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 2330 // Can we prove it to be trivially true? 2331 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) 2332 return true; 2333 2334 if (ProvingLoopExit) 2335 return false; 2336 2337 ICmpInst::Predicate InvariantPred; 2338 const SCEV *InvariantLHS, *InvariantRHS; 2339 2340 // Check if there is a loop-invariant predicate equivalent to our check. 2341 if (!SE->isLoopInvariantExitCondDuringFirstIterations( 2342 Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS, 2343 InvariantRHS)) 2344 return false; 2345 2346 // Can we prove it to be trivially true? 2347 return SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI); 2348 } 2349 2350 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2351 SmallVector<BasicBlock*, 16> ExitingBlocks; 2352 L->getExitingBlocks(ExitingBlocks); 2353 2354 // Remove all exits which aren't both rewriteable and execute on every 2355 // iteration. 2356 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2357 // If our exitting block exits multiple loops, we can only rewrite the 2358 // innermost one. Otherwise, we're changing how many times the innermost 2359 // loop runs before it exits. 2360 if (LI->getLoopFor(ExitingBB) != L) 2361 return true; 2362 2363 // Can't rewrite non-branch yet. 2364 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2365 if (!BI) 2366 return true; 2367 2368 // If already constant, nothing to do. 2369 if (isa<Constant>(BI->getCondition())) 2370 return true; 2371 2372 // Likewise, the loop latch must be dominated by the exiting BB. 2373 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 2374 return true; 2375 2376 return false; 2377 }); 2378 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2379 2380 if (ExitingBlocks.empty()) 2381 return false; 2382 2383 // Get a symbolic upper bound on the loop backedge taken count. 2384 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 2385 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2386 return false; 2387 2388 // Visit our exit blocks in order of dominance. We know from the fact that 2389 // all exits must dominate the latch, so there is a total dominance order 2390 // between them. 2391 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 2392 // std::sort sorts in ascending order, so we want the inverse of 2393 // the normal dominance relation. 2394 if (A == B) return false; 2395 if (DT->properlyDominates(A, B)) 2396 return true; 2397 else { 2398 assert(DT->properlyDominates(B, A) && 2399 "expected total dominance order!"); 2400 return false; 2401 } 2402 }); 2403 #ifdef ASSERT 2404 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2405 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2406 } 2407 #endif 2408 2409 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2410 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2411 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2412 auto *OldCond = BI->getCondition(); 2413 auto *NewCond = ConstantInt::get(OldCond->getType(), 2414 IsTaken ? ExitIfTrue : !ExitIfTrue); 2415 BI->setCondition(NewCond); 2416 if (OldCond->use_empty()) 2417 DeadInsts.emplace_back(OldCond); 2418 }; 2419 2420 bool Changed = false; 2421 SmallSet<const SCEV*, 8> DominatingExitCounts; 2422 for (BasicBlock *ExitingBB : ExitingBlocks) { 2423 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2424 if (isa<SCEVCouldNotCompute>(ExitCount)) { 2425 // Okay, we do not know the exit count here. Can we at least prove that it 2426 // will remain the same within iteration space? 2427 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2428 auto OptimizeCond = [&](bool Inverted) { 2429 if (isTrivialCond(L, BI, SE, Inverted, MaxExitCount)) { 2430 FoldExit(ExitingBB, Inverted); 2431 return true; 2432 } 2433 return false; 2434 }; 2435 if (OptimizeCond(false) || OptimizeCond(true)) 2436 Changed = true; 2437 continue; 2438 } 2439 2440 // If we know we'd exit on the first iteration, rewrite the exit to 2441 // reflect this. This does not imply the loop must exit through this 2442 // exit; there may be an earlier one taken on the first iteration. 2443 // TODO: Given we know the backedge can't be taken, we should go ahead 2444 // and break it. Or at least, kill all the header phis and simplify. 2445 if (ExitCount->isZero()) { 2446 FoldExit(ExitingBB, true); 2447 Changed = true; 2448 continue; 2449 } 2450 2451 // If we end up with a pointer exit count, bail. Note that we can end up 2452 // with a pointer exit count for one exiting block, and not for another in 2453 // the same loop. 2454 if (!ExitCount->getType()->isIntegerTy() || 2455 !MaxExitCount->getType()->isIntegerTy()) 2456 continue; 2457 2458 Type *WiderType = 2459 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2460 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2461 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2462 assert(MaxExitCount->getType() == ExitCount->getType()); 2463 2464 // Can we prove that some other exit must be taken strictly before this 2465 // one? 2466 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2467 MaxExitCount, ExitCount)) { 2468 FoldExit(ExitingBB, false); 2469 Changed = true; 2470 continue; 2471 } 2472 2473 // As we run, keep track of which exit counts we've encountered. If we 2474 // find a duplicate, we've found an exit which would have exited on the 2475 // exiting iteration, but (from the visit order) strictly follows another 2476 // which does the same and is thus dead. 2477 if (!DominatingExitCounts.insert(ExitCount).second) { 2478 FoldExit(ExitingBB, false); 2479 Changed = true; 2480 continue; 2481 } 2482 2483 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2484 // here. If we kept track of the min of dominanting exits so far, we could 2485 // discharge exits with EC >= MDEC. This is less powerful than the existing 2486 // transform (since later exits aren't considered), but potentially more 2487 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2488 // or a >u b. Such a case is not currently known. 2489 } 2490 return Changed; 2491 } 2492 2493 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2494 SmallVector<BasicBlock*, 16> ExitingBlocks; 2495 L->getExitingBlocks(ExitingBlocks); 2496 2497 // Finally, see if we can rewrite our exit conditions into a loop invariant 2498 // form. If we have a read-only loop, and we can tell that we must exit down 2499 // a path which does not need any of the values computed within the loop, we 2500 // can rewrite the loop to exit on the first iteration. Note that this 2501 // doesn't either a) tell us the loop exits on the first iteration (unless 2502 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2503 // This transformation looks a lot like a restricted form of dead loop 2504 // elimination, but restricted to read-only loops and without neccesssarily 2505 // needing to kill the loop entirely. 2506 if (!LoopPredication) 2507 return false; 2508 2509 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2510 return false; 2511 2512 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2513 // through *explicit* control flow. We have to eliminate the possibility of 2514 // implicit exits (see below) before we know it's truly exact. 2515 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2516 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2517 !SE->isLoopInvariant(ExactBTC, L) || 2518 !isSafeToExpand(ExactBTC, *SE)) 2519 return false; 2520 2521 // If we end up with a pointer exit count, bail. It may be unsized. 2522 if (!ExactBTC->getType()->isIntegerTy()) 2523 return false; 2524 2525 auto BadExit = [&](BasicBlock *ExitingBB) { 2526 // If our exiting block exits multiple loops, we can only rewrite the 2527 // innermost one. Otherwise, we're changing how many times the innermost 2528 // loop runs before it exits. 2529 if (LI->getLoopFor(ExitingBB) != L) 2530 return true; 2531 2532 // Can't rewrite non-branch yet. 2533 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2534 if (!BI) 2535 return true; 2536 2537 // If already constant, nothing to do. 2538 if (isa<Constant>(BI->getCondition())) 2539 return true; 2540 2541 // If the exit block has phis, we need to be able to compute the values 2542 // within the loop which contains them. This assumes trivially lcssa phis 2543 // have already been removed; TODO: generalize 2544 BasicBlock *ExitBlock = 2545 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2546 if (!ExitBlock->phis().empty()) 2547 return true; 2548 2549 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2550 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2551 if (!SE->isLoopInvariant(ExitCount, L) || 2552 !isSafeToExpand(ExitCount, *SE)) 2553 return true; 2554 2555 // If we end up with a pointer exit count, bail. It may be unsized. 2556 if (!ExitCount->getType()->isIntegerTy()) 2557 return true; 2558 2559 return false; 2560 }; 2561 2562 // If we have any exits which can't be predicated themselves, than we can't 2563 // predicate any exit which isn't guaranteed to execute before it. Consider 2564 // two exits (a) and (b) which would both exit on the same iteration. If we 2565 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2566 // we could convert a loop from exiting through (a) to one exiting through 2567 // (b). Note that this problem exists only for exits with the same exit 2568 // count, and we could be more aggressive when exit counts are known inequal. 2569 llvm::sort(ExitingBlocks, 2570 [&](BasicBlock *A, BasicBlock *B) { 2571 // std::sort sorts in ascending order, so we want the inverse of 2572 // the normal dominance relation, plus a tie breaker for blocks 2573 // unordered by dominance. 2574 if (DT->properlyDominates(A, B)) return true; 2575 if (DT->properlyDominates(B, A)) return false; 2576 return A->getName() < B->getName(); 2577 }); 2578 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2579 // exits before the backedge). If they aren't, reasoning about reachability 2580 // is complicated and we choose not to for now. 2581 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2582 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2583 return false; 2584 2585 // Given our sorted total order, we know that exit[j] must be evaluated 2586 // after all exit[i] such j > i. 2587 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2588 if (BadExit(ExitingBlocks[i])) { 2589 ExitingBlocks.resize(i); 2590 break; 2591 } 2592 2593 if (ExitingBlocks.empty()) 2594 return false; 2595 2596 // We rely on not being able to reach an exiting block on a later iteration 2597 // then it's statically compute exit count. The implementaton of 2598 // getExitCount currently has this invariant, but assert it here so that 2599 // breakage is obvious if this ever changes.. 2600 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2601 return DT->dominates(ExitingBB, L->getLoopLatch()); 2602 })); 2603 2604 // At this point, ExitingBlocks consists of only those blocks which are 2605 // predicatable. Given that, we know we have at least one exit we can 2606 // predicate if the loop is doesn't have side effects and doesn't have any 2607 // implicit exits (because then our exact BTC isn't actually exact). 2608 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2609 // suggestions on how to improve this? I can obviously bail out for outer 2610 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2611 // is that enough for *all* side effects? 2612 for (BasicBlock *BB : L->blocks()) 2613 for (auto &I : *BB) 2614 // TODO:isGuaranteedToTransfer 2615 if (I.mayHaveSideEffects() || I.mayThrow()) 2616 return false; 2617 2618 bool Changed = false; 2619 // Finally, do the actual predication for all predicatable blocks. A couple 2620 // of notes here: 2621 // 1) We don't bother to constant fold dominated exits with identical exit 2622 // counts; that's simply a form of CSE/equality propagation and we leave 2623 // it for dedicated passes. 2624 // 2) We insert the comparison at the branch. Hoisting introduces additional 2625 // legality constraints and we leave that to dedicated logic. We want to 2626 // predicate even if we can't insert a loop invariant expression as 2627 // peeling or unrolling will likely reduce the cost of the otherwise loop 2628 // varying check. 2629 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2630 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2631 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2632 for (BasicBlock *ExitingBB : ExitingBlocks) { 2633 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2634 2635 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2636 Value *NewCond; 2637 if (ExitCount == ExactBTC) { 2638 NewCond = L->contains(BI->getSuccessor(0)) ? 2639 B.getFalse() : B.getTrue(); 2640 } else { 2641 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2642 if (!ExactBTCV) 2643 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2644 Value *RHS = ExactBTCV; 2645 if (ECV->getType() != RHS->getType()) { 2646 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2647 ECV = B.CreateZExt(ECV, WiderTy); 2648 RHS = B.CreateZExt(RHS, WiderTy); 2649 } 2650 auto Pred = L->contains(BI->getSuccessor(0)) ? 2651 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2652 NewCond = B.CreateICmp(Pred, ECV, RHS); 2653 } 2654 Value *OldCond = BI->getCondition(); 2655 BI->setCondition(NewCond); 2656 if (OldCond->use_empty()) 2657 DeadInsts.emplace_back(OldCond); 2658 Changed = true; 2659 } 2660 2661 return Changed; 2662 } 2663 2664 //===----------------------------------------------------------------------===// 2665 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2666 //===----------------------------------------------------------------------===// 2667 2668 bool IndVarSimplify::run(Loop *L) { 2669 // We need (and expect!) the incoming loop to be in LCSSA. 2670 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2671 "LCSSA required to run indvars!"); 2672 2673 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2674 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2675 // canonicalization can be a pessimization without LSR to "clean up" 2676 // afterwards. 2677 // - We depend on having a preheader; in particular, 2678 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2679 // and we're in trouble if we can't find the induction variable even when 2680 // we've manually inserted one. 2681 // - LFTR relies on having a single backedge. 2682 if (!L->isLoopSimplifyForm()) 2683 return false; 2684 2685 #ifndef NDEBUG 2686 // Used below for a consistency check only 2687 // Note: Since the result returned by ScalarEvolution may depend on the order 2688 // in which previous results are added to its cache, the call to 2689 // getBackedgeTakenCount() may change following SCEV queries. 2690 const SCEV *BackedgeTakenCount; 2691 if (VerifyIndvars) 2692 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2693 #endif 2694 2695 bool Changed = false; 2696 // If there are any floating-point recurrences, attempt to 2697 // transform them to use integer recurrences. 2698 Changed |= rewriteNonIntegerIVs(L); 2699 2700 // Create a rewriter object which we'll use to transform the code with. 2701 SCEVExpander Rewriter(*SE, DL, "indvars"); 2702 #ifndef NDEBUG 2703 Rewriter.setDebugType(DEBUG_TYPE); 2704 #endif 2705 2706 // Eliminate redundant IV users. 2707 // 2708 // Simplification works best when run before other consumers of SCEV. We 2709 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2710 // other expressions involving loop IVs have been evaluated. This helps SCEV 2711 // set no-wrap flags before normalizing sign/zero extension. 2712 Rewriter.disableCanonicalMode(); 2713 Changed |= simplifyAndExtend(L, Rewriter, LI); 2714 2715 // Check to see if we can compute the final value of any expressions 2716 // that are recurrent in the loop, and substitute the exit values from the 2717 // loop into any instructions outside of the loop that use the final values 2718 // of the current expressions. 2719 if (ReplaceExitValue != NeverRepl) { 2720 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2721 ReplaceExitValue, DeadInsts)) { 2722 NumReplaced += Rewrites; 2723 Changed = true; 2724 } 2725 } 2726 2727 // Eliminate redundant IV cycles. 2728 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2729 2730 // Try to eliminate loop exits based on analyzeable exit counts 2731 if (optimizeLoopExits(L, Rewriter)) { 2732 Changed = true; 2733 // Given we've changed exit counts, notify SCEV 2734 SE->forgetLoop(L); 2735 } 2736 2737 // Try to form loop invariant tests for loop exits by changing how many 2738 // iterations of the loop run when that is unobservable. 2739 if (predicateLoopExits(L, Rewriter)) { 2740 Changed = true; 2741 // Given we've changed exit counts, notify SCEV 2742 SE->forgetLoop(L); 2743 } 2744 2745 // If we have a trip count expression, rewrite the loop's exit condition 2746 // using it. 2747 if (!DisableLFTR) { 2748 BasicBlock *PreHeader = L->getLoopPreheader(); 2749 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 2750 2751 SmallVector<BasicBlock*, 16> ExitingBlocks; 2752 L->getExitingBlocks(ExitingBlocks); 2753 for (BasicBlock *ExitingBB : ExitingBlocks) { 2754 // Can't rewrite non-branch yet. 2755 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2756 continue; 2757 2758 // If our exitting block exits multiple loops, we can only rewrite the 2759 // innermost one. Otherwise, we're changing how many times the innermost 2760 // loop runs before it exits. 2761 if (LI->getLoopFor(ExitingBB) != L) 2762 continue; 2763 2764 if (!needsLFTR(L, ExitingBB)) 2765 continue; 2766 2767 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2768 if (isa<SCEVCouldNotCompute>(ExitCount)) 2769 continue; 2770 2771 // This was handled above, but as we form SCEVs, we can sometimes refine 2772 // existing ones; this allows exit counts to be folded to zero which 2773 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2774 // until stable to handle cases like this better. 2775 if (ExitCount->isZero()) 2776 continue; 2777 2778 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2779 if (!IndVar) 2780 continue; 2781 2782 // Avoid high cost expansions. Note: This heuristic is questionable in 2783 // that our definition of "high cost" is not exactly principled. 2784 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2785 TTI, PreHeaderBR)) 2786 continue; 2787 2788 // Check preconditions for proper SCEVExpander operation. SCEV does not 2789 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2790 // any pass that uses the SCEVExpander must do it. This does not work 2791 // well for loop passes because SCEVExpander makes assumptions about 2792 // all loops, while LoopPassManager only forces the current loop to be 2793 // simplified. 2794 // 2795 // FIXME: SCEV expansion has no way to bail out, so the caller must 2796 // explicitly check any assumptions made by SCEV. Brittle. 2797 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2798 if (!AR || AR->getLoop()->getLoopPreheader()) 2799 Changed |= linearFunctionTestReplace(L, ExitingBB, 2800 ExitCount, IndVar, 2801 Rewriter); 2802 } 2803 } 2804 // Clear the rewriter cache, because values that are in the rewriter's cache 2805 // can be deleted in the loop below, causing the AssertingVH in the cache to 2806 // trigger. 2807 Rewriter.clear(); 2808 2809 // Now that we're done iterating through lists, clean up any instructions 2810 // which are now dead. 2811 while (!DeadInsts.empty()) 2812 if (Instruction *Inst = 2813 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2814 Changed |= 2815 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2816 2817 // The Rewriter may not be used from this point on. 2818 2819 // Loop-invariant instructions in the preheader that aren't used in the 2820 // loop may be sunk below the loop to reduce register pressure. 2821 Changed |= sinkUnusedInvariants(L); 2822 2823 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2824 // trip count and therefore can further simplify exit values in addition to 2825 // rewriteLoopExitValues. 2826 Changed |= rewriteFirstIterationLoopExitValues(L); 2827 2828 // Clean up dead instructions. 2829 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2830 2831 // Check a post-condition. 2832 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2833 "Indvars did not preserve LCSSA!"); 2834 2835 // Verify that LFTR, and any other change have not interfered with SCEV's 2836 // ability to compute trip count. We may have *changed* the exit count, but 2837 // only by reducing it. 2838 #ifndef NDEBUG 2839 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2840 SE->forgetLoop(L); 2841 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2842 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2843 SE->getTypeSizeInBits(NewBECount->getType())) 2844 NewBECount = SE->getTruncateOrNoop(NewBECount, 2845 BackedgeTakenCount->getType()); 2846 else 2847 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2848 NewBECount->getType()); 2849 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2850 NewBECount) && "indvars must preserve SCEV"); 2851 } 2852 if (VerifyMemorySSA && MSSAU) 2853 MSSAU->getMemorySSA()->verifyMemorySSA(); 2854 #endif 2855 2856 return Changed; 2857 } 2858 2859 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2860 LoopStandardAnalysisResults &AR, 2861 LPMUpdater &) { 2862 Function *F = L.getHeader()->getParent(); 2863 const DataLayout &DL = F->getParent()->getDataLayout(); 2864 2865 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2866 if (!IVS.run(&L)) 2867 return PreservedAnalyses::all(); 2868 2869 auto PA = getLoopPassPreservedAnalyses(); 2870 PA.preserveSet<CFGAnalyses>(); 2871 if (AR.MSSA) 2872 PA.preserve<MemorySSAAnalysis>(); 2873 return PA; 2874 } 2875 2876 namespace { 2877 2878 struct IndVarSimplifyLegacyPass : public LoopPass { 2879 static char ID; // Pass identification, replacement for typeid 2880 2881 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2882 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2883 } 2884 2885 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2886 if (skipLoop(L)) 2887 return false; 2888 2889 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2890 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2891 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2892 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2893 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2894 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2895 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2896 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2897 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2898 MemorySSA *MSSA = nullptr; 2899 if (MSSAAnalysis) 2900 MSSA = &MSSAAnalysis->getMSSA(); 2901 2902 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2903 return IVS.run(L); 2904 } 2905 2906 void getAnalysisUsage(AnalysisUsage &AU) const override { 2907 AU.setPreservesCFG(); 2908 AU.addPreserved<MemorySSAWrapperPass>(); 2909 getLoopAnalysisUsage(AU); 2910 } 2911 }; 2912 2913 } // end anonymous namespace 2914 2915 char IndVarSimplifyLegacyPass::ID = 0; 2916 2917 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2918 "Induction Variable Simplification", false, false) 2919 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2920 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2921 "Induction Variable Simplification", false, false) 2922 2923 Pass *llvm::createIndVarSimplifyPass() { 2924 return new IndVarSimplifyLegacyPass(); 2925 } 2926