1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpander.h" 45 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 46 #include "llvm/Analysis/TargetLibraryInfo.h" 47 #include "llvm/Analysis/TargetTransformInfo.h" 48 #include "llvm/Analysis/ValueTracking.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/Constant.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/IRBuilder.h" 58 #include "llvm/IR/InstrTypes.h" 59 #include "llvm/IR/Instruction.h" 60 #include "llvm/IR/Instructions.h" 61 #include "llvm/IR/IntrinsicInst.h" 62 #include "llvm/IR/Intrinsics.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/IR/PassManager.h" 66 #include "llvm/IR/PatternMatch.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/Use.h" 69 #include "llvm/IR/User.h" 70 #include "llvm/IR/Value.h" 71 #include "llvm/IR/ValueHandle.h" 72 #include "llvm/InitializePasses.h" 73 #include "llvm/Pass.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/CommandLine.h" 76 #include "llvm/Support/Compiler.h" 77 #include "llvm/Support/Debug.h" 78 #include "llvm/Support/ErrorHandling.h" 79 #include "llvm/Support/MathExtras.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Transforms/Scalar.h" 82 #include "llvm/Transforms/Scalar/LoopPassManager.h" 83 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 84 #include "llvm/Transforms/Utils/Local.h" 85 #include "llvm/Transforms/Utils/LoopUtils.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars")); 107 108 static cl::opt<ReplaceExitVal> ReplaceExitValue( 109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 112 clEnumValN(OnlyCheapRepl, "cheap", 113 "only replace exit value when the cost is cheap"), 114 clEnumValN(NoHardUse, "noharduse", 115 "only replace exit values when loop def likely dead"), 116 clEnumValN(AlwaysRepl, "always", 117 "always replace exit value whenever possible"))); 118 119 static cl::opt<bool> UsePostIncrementRanges( 120 "indvars-post-increment-ranges", cl::Hidden, 121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 122 cl::init(true)); 123 124 static cl::opt<bool> 125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 126 cl::desc("Disable Linear Function Test Replace optimization")); 127 128 static cl::opt<bool> 129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 130 cl::desc("Predicate conditions in read only loops")); 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 std::unique_ptr<MemorySSAUpdater> MSSAU; 144 145 SmallVector<WeakTrackingVH, 16> DeadInsts; 146 147 bool handleFloatingPointIV(Loop *L, PHINode *PH); 148 bool rewriteNonIntegerIVs(Loop *L); 149 150 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool rewriteFirstIterationLoopExitValues(Loop *L); 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI, MemorySSA *MSSA) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 170 if (MSSA) 171 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 172 } 173 174 bool run(Loop *L); 175 }; 176 177 } // end anonymous namespace 178 179 /// Determine the insertion point for this user. By default, insert immediately 180 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 181 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 182 /// common dominator for the incoming blocks. A nullptr can be returned if no 183 /// viable location is found: it may happen if User is a PHI and Def only comes 184 /// to this PHI from unreachable blocks. 185 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 186 DominatorTree *DT, LoopInfo *LI) { 187 PHINode *PHI = dyn_cast<PHINode>(User); 188 if (!PHI) 189 return User; 190 191 Instruction *InsertPt = nullptr; 192 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 193 if (PHI->getIncomingValue(i) != Def) 194 continue; 195 196 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 197 198 if (!DT->isReachableFromEntry(InsertBB)) 199 continue; 200 201 if (!InsertPt) { 202 InsertPt = InsertBB->getTerminator(); 203 continue; 204 } 205 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 206 InsertPt = InsertBB->getTerminator(); 207 } 208 209 // If we have skipped all inputs, it means that Def only comes to Phi from 210 // unreachable blocks. 211 if (!InsertPt) 212 return nullptr; 213 214 auto *DefI = dyn_cast<Instruction>(Def); 215 if (!DefI) 216 return InsertPt; 217 218 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 219 220 auto *L = LI->getLoopFor(DefI->getParent()); 221 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 222 223 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 224 if (LI->getLoopFor(DTN->getBlock()) == L) 225 return DTN->getBlock()->getTerminator(); 226 227 llvm_unreachable("DefI dominates InsertPt!"); 228 } 229 230 //===----------------------------------------------------------------------===// 231 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 232 //===----------------------------------------------------------------------===// 233 234 /// Convert APF to an integer, if possible. 235 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 236 bool isExact = false; 237 // See if we can convert this to an int64_t 238 uint64_t UIntVal; 239 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 240 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 241 !isExact) 242 return false; 243 IntVal = UIntVal; 244 return true; 245 } 246 247 /// If the loop has floating induction variable then insert corresponding 248 /// integer induction variable if possible. 249 /// For example, 250 /// for(double i = 0; i < 10000; ++i) 251 /// bar(i) 252 /// is converted into 253 /// for(int i = 0; i < 10000; ++i) 254 /// bar((double)i); 255 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 256 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 257 unsigned BackEdge = IncomingEdge^1; 258 259 // Check incoming value. 260 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 261 262 int64_t InitValue; 263 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 264 return false; 265 266 // Check IV increment. Reject this PN if increment operation is not 267 // an add or increment value can not be represented by an integer. 268 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 269 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 270 271 // If this is not an add of the PHI with a constantfp, or if the constant fp 272 // is not an integer, bail out. 273 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 274 int64_t IncValue; 275 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 276 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 277 return false; 278 279 // Check Incr uses. One user is PN and the other user is an exit condition 280 // used by the conditional terminator. 281 Value::user_iterator IncrUse = Incr->user_begin(); 282 Instruction *U1 = cast<Instruction>(*IncrUse++); 283 if (IncrUse == Incr->user_end()) return false; 284 Instruction *U2 = cast<Instruction>(*IncrUse++); 285 if (IncrUse != Incr->user_end()) return false; 286 287 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 288 // only used by a branch, we can't transform it. 289 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 290 if (!Compare) 291 Compare = dyn_cast<FCmpInst>(U2); 292 if (!Compare || !Compare->hasOneUse() || 293 !isa<BranchInst>(Compare->user_back())) 294 return false; 295 296 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 297 298 // We need to verify that the branch actually controls the iteration count 299 // of the loop. If not, the new IV can overflow and no one will notice. 300 // The branch block must be in the loop and one of the successors must be out 301 // of the loop. 302 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 303 if (!L->contains(TheBr->getParent()) || 304 (L->contains(TheBr->getSuccessor(0)) && 305 L->contains(TheBr->getSuccessor(1)))) 306 return false; 307 308 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 309 // transform it. 310 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 311 int64_t ExitValue; 312 if (ExitValueVal == nullptr || 313 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 314 return false; 315 316 // Find new predicate for integer comparison. 317 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 318 switch (Compare->getPredicate()) { 319 default: return false; // Unknown comparison. 320 case CmpInst::FCMP_OEQ: 321 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 322 case CmpInst::FCMP_ONE: 323 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 324 case CmpInst::FCMP_OGT: 325 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 326 case CmpInst::FCMP_OGE: 327 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 328 case CmpInst::FCMP_OLT: 329 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 330 case CmpInst::FCMP_OLE: 331 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 332 } 333 334 // We convert the floating point induction variable to a signed i32 value if 335 // we can. This is only safe if the comparison will not overflow in a way 336 // that won't be trapped by the integer equivalent operations. Check for this 337 // now. 338 // TODO: We could use i64 if it is native and the range requires it. 339 340 // The start/stride/exit values must all fit in signed i32. 341 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 342 return false; 343 344 // If not actually striding (add x, 0.0), avoid touching the code. 345 if (IncValue == 0) 346 return false; 347 348 // Positive and negative strides have different safety conditions. 349 if (IncValue > 0) { 350 // If we have a positive stride, we require the init to be less than the 351 // exit value. 352 if (InitValue >= ExitValue) 353 return false; 354 355 uint32_t Range = uint32_t(ExitValue-InitValue); 356 // Check for infinite loop, either: 357 // while (i <= Exit) or until (i > Exit) 358 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 359 if (++Range == 0) return false; // Range overflows. 360 } 361 362 unsigned Leftover = Range % uint32_t(IncValue); 363 364 // If this is an equality comparison, we require that the strided value 365 // exactly land on the exit value, otherwise the IV condition will wrap 366 // around and do things the fp IV wouldn't. 367 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 368 Leftover != 0) 369 return false; 370 371 // If the stride would wrap around the i32 before exiting, we can't 372 // transform the IV. 373 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 374 return false; 375 } else { 376 // If we have a negative stride, we require the init to be greater than the 377 // exit value. 378 if (InitValue <= ExitValue) 379 return false; 380 381 uint32_t Range = uint32_t(InitValue-ExitValue); 382 // Check for infinite loop, either: 383 // while (i >= Exit) or until (i < Exit) 384 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 385 if (++Range == 0) return false; // Range overflows. 386 } 387 388 unsigned Leftover = Range % uint32_t(-IncValue); 389 390 // If this is an equality comparison, we require that the strided value 391 // exactly land on the exit value, otherwise the IV condition will wrap 392 // around and do things the fp IV wouldn't. 393 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 394 Leftover != 0) 395 return false; 396 397 // If the stride would wrap around the i32 before exiting, we can't 398 // transform the IV. 399 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 400 return false; 401 } 402 403 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 404 405 // Insert new integer induction variable. 406 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 407 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 408 PN->getIncomingBlock(IncomingEdge)); 409 410 Value *NewAdd = 411 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 412 Incr->getName()+".int", Incr); 413 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 414 415 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 416 ConstantInt::get(Int32Ty, ExitValue), 417 Compare->getName()); 418 419 // In the following deletions, PN may become dead and may be deleted. 420 // Use a WeakTrackingVH to observe whether this happens. 421 WeakTrackingVH WeakPH = PN; 422 423 // Delete the old floating point exit comparison. The branch starts using the 424 // new comparison. 425 NewCompare->takeName(Compare); 426 Compare->replaceAllUsesWith(NewCompare); 427 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 428 429 // Delete the old floating point increment. 430 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 431 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 432 433 // If the FP induction variable still has uses, this is because something else 434 // in the loop uses its value. In order to canonicalize the induction 435 // variable, we chose to eliminate the IV and rewrite it in terms of an 436 // int->fp cast. 437 // 438 // We give preference to sitofp over uitofp because it is faster on most 439 // platforms. 440 if (WeakPH) { 441 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 442 &*PN->getParent()->getFirstInsertionPt()); 443 PN->replaceAllUsesWith(Conv); 444 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 445 } 446 return true; 447 } 448 449 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 450 // First step. Check to see if there are any floating-point recurrences. 451 // If there are, change them into integer recurrences, permitting analysis by 452 // the SCEV routines. 453 BasicBlock *Header = L->getHeader(); 454 455 SmallVector<WeakTrackingVH, 8> PHIs; 456 for (PHINode &PN : Header->phis()) 457 PHIs.push_back(&PN); 458 459 bool Changed = false; 460 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 461 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 462 Changed |= handleFloatingPointIV(L, PN); 463 464 // If the loop previously had floating-point IV, ScalarEvolution 465 // may not have been able to compute a trip count. Now that we've done some 466 // re-writing, the trip count may be computable. 467 if (Changed) 468 SE->forgetLoop(L); 469 return Changed; 470 } 471 472 //===---------------------------------------------------------------------===// 473 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 474 // they will exit at the first iteration. 475 //===---------------------------------------------------------------------===// 476 477 /// Check to see if this loop has loop invariant conditions which lead to loop 478 /// exits. If so, we know that if the exit path is taken, it is at the first 479 /// loop iteration. This lets us predict exit values of PHI nodes that live in 480 /// loop header. 481 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 482 // Verify the input to the pass is already in LCSSA form. 483 assert(L->isLCSSAForm(*DT)); 484 485 SmallVector<BasicBlock *, 8> ExitBlocks; 486 L->getUniqueExitBlocks(ExitBlocks); 487 488 bool MadeAnyChanges = false; 489 for (auto *ExitBB : ExitBlocks) { 490 // If there are no more PHI nodes in this exit block, then no more 491 // values defined inside the loop are used on this path. 492 for (PHINode &PN : ExitBB->phis()) { 493 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 494 IncomingValIdx != E; ++IncomingValIdx) { 495 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 496 497 // Can we prove that the exit must run on the first iteration if it 498 // runs at all? (i.e. early exits are fine for our purposes, but 499 // traces which lead to this exit being taken on the 2nd iteration 500 // aren't.) Note that this is about whether the exit branch is 501 // executed, not about whether it is taken. 502 if (!L->getLoopLatch() || 503 !DT->dominates(IncomingBB, L->getLoopLatch())) 504 continue; 505 506 // Get condition that leads to the exit path. 507 auto *TermInst = IncomingBB->getTerminator(); 508 509 Value *Cond = nullptr; 510 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 511 // Must be a conditional branch, otherwise the block 512 // should not be in the loop. 513 Cond = BI->getCondition(); 514 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 515 Cond = SI->getCondition(); 516 else 517 continue; 518 519 if (!L->isLoopInvariant(Cond)) 520 continue; 521 522 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 523 524 // Only deal with PHIs in the loop header. 525 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 526 continue; 527 528 // If ExitVal is a PHI on the loop header, then we know its 529 // value along this exit because the exit can only be taken 530 // on the first iteration. 531 auto *LoopPreheader = L->getLoopPreheader(); 532 assert(LoopPreheader && "Invalid loop"); 533 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 534 if (PreheaderIdx != -1) { 535 assert(ExitVal->getParent() == L->getHeader() && 536 "ExitVal must be in loop header"); 537 MadeAnyChanges = true; 538 PN.setIncomingValue(IncomingValIdx, 539 ExitVal->getIncomingValue(PreheaderIdx)); 540 } 541 } 542 } 543 } 544 return MadeAnyChanges; 545 } 546 547 //===----------------------------------------------------------------------===// 548 // IV Widening - Extend the width of an IV to cover its widest uses. 549 //===----------------------------------------------------------------------===// 550 551 namespace { 552 553 // Collect information about induction variables that are used by sign/zero 554 // extend operations. This information is recorded by CollectExtend and provides 555 // the input to WidenIV. 556 struct WideIVInfo { 557 PHINode *NarrowIV = nullptr; 558 559 // Widest integer type created [sz]ext 560 Type *WidestNativeType = nullptr; 561 562 // Was a sext user seen before a zext? 563 bool IsSigned = false; 564 }; 565 566 } // end anonymous namespace 567 568 /// Update information about the induction variable that is extended by this 569 /// sign or zero extend operation. This is used to determine the final width of 570 /// the IV before actually widening it. 571 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 572 const TargetTransformInfo *TTI) { 573 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 574 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 575 return; 576 577 Type *Ty = Cast->getType(); 578 uint64_t Width = SE->getTypeSizeInBits(Ty); 579 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 580 return; 581 582 // Check that `Cast` actually extends the induction variable (we rely on this 583 // later). This takes care of cases where `Cast` is extending a truncation of 584 // the narrow induction variable, and thus can end up being narrower than the 585 // "narrow" induction variable. 586 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 587 if (NarrowIVWidth >= Width) 588 return; 589 590 // Cast is either an sext or zext up to this point. 591 // We should not widen an indvar if arithmetics on the wider indvar are more 592 // expensive than those on the narrower indvar. We check only the cost of ADD 593 // because at least an ADD is required to increment the induction variable. We 594 // could compute more comprehensively the cost of all instructions on the 595 // induction variable when necessary. 596 if (TTI && 597 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 598 TTI->getArithmeticInstrCost(Instruction::Add, 599 Cast->getOperand(0)->getType())) { 600 return; 601 } 602 603 if (!WI.WidestNativeType) { 604 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 605 WI.IsSigned = IsSigned; 606 return; 607 } 608 609 // We extend the IV to satisfy the sign of its first user, arbitrarily. 610 if (WI.IsSigned != IsSigned) 611 return; 612 613 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 614 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 615 } 616 617 namespace { 618 619 /// Record a link in the Narrow IV def-use chain along with the WideIV that 620 /// computes the same value as the Narrow IV def. This avoids caching Use* 621 /// pointers. 622 struct NarrowIVDefUse { 623 Instruction *NarrowDef = nullptr; 624 Instruction *NarrowUse = nullptr; 625 Instruction *WideDef = nullptr; 626 627 // True if the narrow def is never negative. Tracking this information lets 628 // us use a sign extension instead of a zero extension or vice versa, when 629 // profitable and legal. 630 bool NeverNegative = false; 631 632 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 633 bool NeverNegative) 634 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 635 NeverNegative(NeverNegative) {} 636 }; 637 638 /// The goal of this transform is to remove sign and zero extends without 639 /// creating any new induction variables. To do this, it creates a new phi of 640 /// the wider type and redirects all users, either removing extends or inserting 641 /// truncs whenever we stop propagating the type. 642 class WidenIV { 643 // Parameters 644 PHINode *OrigPhi; 645 Type *WideType; 646 647 // Context 648 LoopInfo *LI; 649 Loop *L; 650 ScalarEvolution *SE; 651 DominatorTree *DT; 652 653 // Does the module have any calls to the llvm.experimental.guard intrinsic 654 // at all? If not we can avoid scanning instructions looking for guards. 655 bool HasGuards; 656 657 // Result 658 PHINode *WidePhi = nullptr; 659 Instruction *WideInc = nullptr; 660 const SCEV *WideIncExpr = nullptr; 661 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 662 663 SmallPtrSet<Instruction *,16> Widened; 664 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 665 666 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 667 668 // A map tracking the kind of extension used to widen each narrow IV 669 // and narrow IV user. 670 // Key: pointer to a narrow IV or IV user. 671 // Value: the kind of extension used to widen this Instruction. 672 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 673 674 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 675 676 // A map with control-dependent ranges for post increment IV uses. The key is 677 // a pair of IV def and a use of this def denoting the context. The value is 678 // a ConstantRange representing possible values of the def at the given 679 // context. 680 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 681 682 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 683 Instruction *UseI) { 684 DefUserPair Key(Def, UseI); 685 auto It = PostIncRangeInfos.find(Key); 686 return It == PostIncRangeInfos.end() 687 ? Optional<ConstantRange>(None) 688 : Optional<ConstantRange>(It->second); 689 } 690 691 void calculatePostIncRanges(PHINode *OrigPhi); 692 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 693 694 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 695 DefUserPair Key(Def, UseI); 696 auto It = PostIncRangeInfos.find(Key); 697 if (It == PostIncRangeInfos.end()) 698 PostIncRangeInfos.insert({Key, R}); 699 else 700 It->second = R.intersectWith(It->second); 701 } 702 703 public: 704 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 705 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 706 bool HasGuards) 707 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 708 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 709 HasGuards(HasGuards), DeadInsts(DI) { 710 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 711 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 712 } 713 714 PHINode *createWideIV(SCEVExpander &Rewriter); 715 716 protected: 717 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 718 Instruction *Use); 719 720 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 721 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 722 const SCEVAddRecExpr *WideAR); 723 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 724 725 ExtendKind getExtendKind(Instruction *I); 726 727 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 728 729 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 730 731 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 732 733 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 734 unsigned OpCode) const; 735 736 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 737 738 bool widenLoopCompare(NarrowIVDefUse DU); 739 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 740 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 741 742 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 743 }; 744 745 } // end anonymous namespace 746 747 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 748 bool IsSigned, Instruction *Use) { 749 // Set the debug location and conservative insertion point. 750 IRBuilder<> Builder(Use); 751 // Hoist the insertion point into loop preheaders as far as possible. 752 for (const Loop *L = LI->getLoopFor(Use->getParent()); 753 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 754 L = L->getParentLoop()) 755 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 756 757 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 758 Builder.CreateZExt(NarrowOper, WideType); 759 } 760 761 /// Instantiate a wide operation to replace a narrow operation. This only needs 762 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 763 /// 0 for any operation we decide not to clone. 764 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 765 const SCEVAddRecExpr *WideAR) { 766 unsigned Opcode = DU.NarrowUse->getOpcode(); 767 switch (Opcode) { 768 default: 769 return nullptr; 770 case Instruction::Add: 771 case Instruction::Mul: 772 case Instruction::UDiv: 773 case Instruction::Sub: 774 return cloneArithmeticIVUser(DU, WideAR); 775 776 case Instruction::And: 777 case Instruction::Or: 778 case Instruction::Xor: 779 case Instruction::Shl: 780 case Instruction::LShr: 781 case Instruction::AShr: 782 return cloneBitwiseIVUser(DU); 783 } 784 } 785 786 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 787 Instruction *NarrowUse = DU.NarrowUse; 788 Instruction *NarrowDef = DU.NarrowDef; 789 Instruction *WideDef = DU.WideDef; 790 791 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 792 793 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 794 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 795 // invariant and will be folded or hoisted. If it actually comes from a 796 // widened IV, it should be removed during a future call to widenIVUse. 797 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 798 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 799 ? WideDef 800 : createExtendInst(NarrowUse->getOperand(0), WideType, 801 IsSigned, NarrowUse); 802 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 803 ? WideDef 804 : createExtendInst(NarrowUse->getOperand(1), WideType, 805 IsSigned, NarrowUse); 806 807 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 808 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 809 NarrowBO->getName()); 810 IRBuilder<> Builder(NarrowUse); 811 Builder.Insert(WideBO); 812 WideBO->copyIRFlags(NarrowBO); 813 return WideBO; 814 } 815 816 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 817 const SCEVAddRecExpr *WideAR) { 818 Instruction *NarrowUse = DU.NarrowUse; 819 Instruction *NarrowDef = DU.NarrowDef; 820 Instruction *WideDef = DU.WideDef; 821 822 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 823 824 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 825 826 // We're trying to find X such that 827 // 828 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 829 // 830 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 831 // and check using SCEV if any of them are correct. 832 833 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 834 // correct solution to X. 835 auto GuessNonIVOperand = [&](bool SignExt) { 836 const SCEV *WideLHS; 837 const SCEV *WideRHS; 838 839 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 840 if (SignExt) 841 return SE->getSignExtendExpr(S, Ty); 842 return SE->getZeroExtendExpr(S, Ty); 843 }; 844 845 if (IVOpIdx == 0) { 846 WideLHS = SE->getSCEV(WideDef); 847 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 848 WideRHS = GetExtend(NarrowRHS, WideType); 849 } else { 850 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 851 WideLHS = GetExtend(NarrowLHS, WideType); 852 WideRHS = SE->getSCEV(WideDef); 853 } 854 855 // WideUse is "WideDef `op.wide` X" as described in the comment. 856 const SCEV *WideUse = nullptr; 857 858 switch (NarrowUse->getOpcode()) { 859 default: 860 llvm_unreachable("No other possibility!"); 861 862 case Instruction::Add: 863 WideUse = SE->getAddExpr(WideLHS, WideRHS); 864 break; 865 866 case Instruction::Mul: 867 WideUse = SE->getMulExpr(WideLHS, WideRHS); 868 break; 869 870 case Instruction::UDiv: 871 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 872 break; 873 874 case Instruction::Sub: 875 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 876 break; 877 } 878 879 return WideUse == WideAR; 880 }; 881 882 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 883 if (!GuessNonIVOperand(SignExtend)) { 884 SignExtend = !SignExtend; 885 if (!GuessNonIVOperand(SignExtend)) 886 return nullptr; 887 } 888 889 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 890 ? WideDef 891 : createExtendInst(NarrowUse->getOperand(0), WideType, 892 SignExtend, NarrowUse); 893 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 894 ? WideDef 895 : createExtendInst(NarrowUse->getOperand(1), WideType, 896 SignExtend, NarrowUse); 897 898 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 899 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 900 NarrowBO->getName()); 901 902 IRBuilder<> Builder(NarrowUse); 903 Builder.Insert(WideBO); 904 WideBO->copyIRFlags(NarrowBO); 905 return WideBO; 906 } 907 908 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 909 auto It = ExtendKindMap.find(I); 910 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 911 return It->second; 912 } 913 914 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 915 unsigned OpCode) const { 916 if (OpCode == Instruction::Add) 917 return SE->getAddExpr(LHS, RHS); 918 if (OpCode == Instruction::Sub) 919 return SE->getMinusSCEV(LHS, RHS); 920 if (OpCode == Instruction::Mul) 921 return SE->getMulExpr(LHS, RHS); 922 923 llvm_unreachable("Unsupported opcode."); 924 } 925 926 /// No-wrap operations can transfer sign extension of their result to their 927 /// operands. Generate the SCEV value for the widened operation without 928 /// actually modifying the IR yet. If the expression after extending the 929 /// operands is an AddRec for this loop, return the AddRec and the kind of 930 /// extension used. 931 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 932 // Handle the common case of add<nsw/nuw> 933 const unsigned OpCode = DU.NarrowUse->getOpcode(); 934 // Only Add/Sub/Mul instructions supported yet. 935 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 936 OpCode != Instruction::Mul) 937 return {nullptr, Unknown}; 938 939 // One operand (NarrowDef) has already been extended to WideDef. Now determine 940 // if extending the other will lead to a recurrence. 941 const unsigned ExtendOperIdx = 942 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 943 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 944 945 const SCEV *ExtendOperExpr = nullptr; 946 const OverflowingBinaryOperator *OBO = 947 cast<OverflowingBinaryOperator>(DU.NarrowUse); 948 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 949 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 950 ExtendOperExpr = SE->getSignExtendExpr( 951 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 952 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 953 ExtendOperExpr = SE->getZeroExtendExpr( 954 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 955 else 956 return {nullptr, Unknown}; 957 958 // When creating this SCEV expr, don't apply the current operations NSW or NUW 959 // flags. This instruction may be guarded by control flow that the no-wrap 960 // behavior depends on. Non-control-equivalent instructions can be mapped to 961 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 962 // semantics to those operations. 963 const SCEV *lhs = SE->getSCEV(DU.WideDef); 964 const SCEV *rhs = ExtendOperExpr; 965 966 // Let's swap operands to the initial order for the case of non-commutative 967 // operations, like SUB. See PR21014. 968 if (ExtendOperIdx == 0) 969 std::swap(lhs, rhs); 970 const SCEVAddRecExpr *AddRec = 971 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 972 973 if (!AddRec || AddRec->getLoop() != L) 974 return {nullptr, Unknown}; 975 976 return {AddRec, ExtKind}; 977 } 978 979 /// Is this instruction potentially interesting for further simplification after 980 /// widening it's type? In other words, can the extend be safely hoisted out of 981 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 982 /// so, return the extended recurrence and the kind of extension used. Otherwise 983 /// return {nullptr, Unknown}. 984 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 985 if (!SE->isSCEVable(DU.NarrowUse->getType())) 986 return {nullptr, Unknown}; 987 988 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 989 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 990 SE->getTypeSizeInBits(WideType)) { 991 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 992 // index. So don't follow this use. 993 return {nullptr, Unknown}; 994 } 995 996 const SCEV *WideExpr; 997 ExtendKind ExtKind; 998 if (DU.NeverNegative) { 999 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1000 if (isa<SCEVAddRecExpr>(WideExpr)) 1001 ExtKind = SignExtended; 1002 else { 1003 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1004 ExtKind = ZeroExtended; 1005 } 1006 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1007 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1008 ExtKind = SignExtended; 1009 } else { 1010 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1011 ExtKind = ZeroExtended; 1012 } 1013 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1014 if (!AddRec || AddRec->getLoop() != L) 1015 return {nullptr, Unknown}; 1016 return {AddRec, ExtKind}; 1017 } 1018 1019 /// This IV user cannot be widened. Replace this use of the original narrow IV 1020 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1021 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1022 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1023 if (!InsertPt) 1024 return; 1025 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1026 << *DU.NarrowUse << "\n"); 1027 IRBuilder<> Builder(InsertPt); 1028 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1029 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1030 } 1031 1032 /// If the narrow use is a compare instruction, then widen the compare 1033 // (and possibly the other operand). The extend operation is hoisted into the 1034 // loop preheader as far as possible. 1035 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1036 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1037 if (!Cmp) 1038 return false; 1039 1040 // We can legally widen the comparison in the following two cases: 1041 // 1042 // - The signedness of the IV extension and comparison match 1043 // 1044 // - The narrow IV is always positive (and thus its sign extension is equal 1045 // to its zero extension). For instance, let's say we're zero extending 1046 // %narrow for the following use 1047 // 1048 // icmp slt i32 %narrow, %val ... (A) 1049 // 1050 // and %narrow is always positive. Then 1051 // 1052 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1053 // == icmp slt i32 zext(%narrow), sext(%val) 1054 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1055 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1056 return false; 1057 1058 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1059 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1060 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1061 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1062 1063 // Widen the compare instruction. 1064 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1065 if (!InsertPt) 1066 return false; 1067 IRBuilder<> Builder(InsertPt); 1068 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1069 1070 // Widen the other operand of the compare, if necessary. 1071 if (CastWidth < IVWidth) { 1072 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1073 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1074 } 1075 return true; 1076 } 1077 1078 /// If the narrow use is an instruction whose two operands are the defining 1079 /// instruction of DU and a load instruction, then we have the following: 1080 /// if the load is hoisted outside the loop, then we do not reach this function 1081 /// as scalar evolution analysis works fine in widenIVUse with variables 1082 /// hoisted outside the loop and efficient code is subsequently generated by 1083 /// not emitting truncate instructions. But when the load is not hoisted 1084 /// (whether due to limitation in alias analysis or due to a true legality), 1085 /// then scalar evolution can not proceed with loop variant values and 1086 /// inefficient code is generated. This function handles the non-hoisted load 1087 /// special case by making the optimization generate the same type of code for 1088 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1089 /// instruction). This special case is important especially when the induction 1090 /// variables are affecting addressing mode in code generation. 1091 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1092 Instruction *NarrowUse = DU.NarrowUse; 1093 Instruction *NarrowDef = DU.NarrowDef; 1094 Instruction *WideDef = DU.WideDef; 1095 1096 // Handle the common case of add<nsw/nuw> 1097 const unsigned OpCode = NarrowUse->getOpcode(); 1098 // Only Add/Sub/Mul instructions are supported. 1099 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1100 OpCode != Instruction::Mul) 1101 return false; 1102 1103 // The operand that is not defined by NarrowDef of DU. Let's call it the 1104 // other operand. 1105 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1106 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1107 "bad DU"); 1108 1109 const SCEV *ExtendOperExpr = nullptr; 1110 const OverflowingBinaryOperator *OBO = 1111 cast<OverflowingBinaryOperator>(NarrowUse); 1112 ExtendKind ExtKind = getExtendKind(NarrowDef); 1113 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1114 ExtendOperExpr = SE->getSignExtendExpr( 1115 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1116 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1117 ExtendOperExpr = SE->getZeroExtendExpr( 1118 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1119 else 1120 return false; 1121 1122 // We are interested in the other operand being a load instruction. 1123 // But, we should look into relaxing this restriction later on. 1124 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1125 if (I && I->getOpcode() != Instruction::Load) 1126 return false; 1127 1128 // Verifying that Defining operand is an AddRec 1129 const SCEV *Op1 = SE->getSCEV(WideDef); 1130 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1131 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1132 return false; 1133 // Verifying that other operand is an Extend. 1134 if (ExtKind == SignExtended) { 1135 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1136 return false; 1137 } else { 1138 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1139 return false; 1140 } 1141 1142 if (ExtKind == SignExtended) { 1143 for (Use &U : NarrowUse->uses()) { 1144 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1145 if (!User || User->getType() != WideType) 1146 return false; 1147 } 1148 } else { // ExtKind == ZeroExtended 1149 for (Use &U : NarrowUse->uses()) { 1150 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1151 if (!User || User->getType() != WideType) 1152 return false; 1153 } 1154 } 1155 1156 return true; 1157 } 1158 1159 /// Special Case for widening with variant Loads (see 1160 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1161 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1162 Instruction *NarrowUse = DU.NarrowUse; 1163 Instruction *NarrowDef = DU.NarrowDef; 1164 Instruction *WideDef = DU.WideDef; 1165 1166 ExtendKind ExtKind = getExtendKind(NarrowDef); 1167 1168 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1169 1170 // Generating a widening use instruction. 1171 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1172 ? WideDef 1173 : createExtendInst(NarrowUse->getOperand(0), WideType, 1174 ExtKind, NarrowUse); 1175 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1176 ? WideDef 1177 : createExtendInst(NarrowUse->getOperand(1), WideType, 1178 ExtKind, NarrowUse); 1179 1180 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1181 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1182 NarrowBO->getName()); 1183 IRBuilder<> Builder(NarrowUse); 1184 Builder.Insert(WideBO); 1185 WideBO->copyIRFlags(NarrowBO); 1186 1187 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1188 1189 ExtendKindMap[NarrowUse] = ExtKind; 1190 1191 for (Use &U : NarrowUse->uses()) { 1192 Instruction *User = nullptr; 1193 if (ExtKind == SignExtended) 1194 User = dyn_cast<SExtInst>(U.getUser()); 1195 else 1196 User = dyn_cast<ZExtInst>(U.getUser()); 1197 if (User && User->getType() == WideType) { 1198 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1199 << *WideBO << "\n"); 1200 ++NumElimExt; 1201 User->replaceAllUsesWith(WideBO); 1202 DeadInsts.emplace_back(User); 1203 } 1204 } 1205 } 1206 1207 /// Determine whether an individual user of the narrow IV can be widened. If so, 1208 /// return the wide clone of the user. 1209 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1210 assert(ExtendKindMap.count(DU.NarrowDef) && 1211 "Should already know the kind of extension used to widen NarrowDef"); 1212 1213 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1214 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1215 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1216 // For LCSSA phis, sink the truncate outside the loop. 1217 // After SimplifyCFG most loop exit targets have a single predecessor. 1218 // Otherwise fall back to a truncate within the loop. 1219 if (UsePhi->getNumOperands() != 1) 1220 truncateIVUse(DU, DT, LI); 1221 else { 1222 // Widening the PHI requires us to insert a trunc. The logical place 1223 // for this trunc is in the same BB as the PHI. This is not possible if 1224 // the BB is terminated by a catchswitch. 1225 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1226 return nullptr; 1227 1228 PHINode *WidePhi = 1229 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1230 UsePhi); 1231 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1232 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1233 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1234 UsePhi->replaceAllUsesWith(Trunc); 1235 DeadInsts.emplace_back(UsePhi); 1236 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1237 << *WidePhi << "\n"); 1238 } 1239 return nullptr; 1240 } 1241 } 1242 1243 // This narrow use can be widened by a sext if it's non-negative or its narrow 1244 // def was widended by a sext. Same for zext. 1245 auto canWidenBySExt = [&]() { 1246 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1247 }; 1248 auto canWidenByZExt = [&]() { 1249 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1250 }; 1251 1252 // Our raison d'etre! Eliminate sign and zero extension. 1253 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1254 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1255 Value *NewDef = DU.WideDef; 1256 if (DU.NarrowUse->getType() != WideType) { 1257 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1258 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1259 if (CastWidth < IVWidth) { 1260 // The cast isn't as wide as the IV, so insert a Trunc. 1261 IRBuilder<> Builder(DU.NarrowUse); 1262 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1263 } 1264 else { 1265 // A wider extend was hidden behind a narrower one. This may induce 1266 // another round of IV widening in which the intermediate IV becomes 1267 // dead. It should be very rare. 1268 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1269 << " not wide enough to subsume " << *DU.NarrowUse 1270 << "\n"); 1271 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1272 NewDef = DU.NarrowUse; 1273 } 1274 } 1275 if (NewDef != DU.NarrowUse) { 1276 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1277 << " replaced by " << *DU.WideDef << "\n"); 1278 ++NumElimExt; 1279 DU.NarrowUse->replaceAllUsesWith(NewDef); 1280 DeadInsts.emplace_back(DU.NarrowUse); 1281 } 1282 // Now that the extend is gone, we want to expose it's uses for potential 1283 // further simplification. We don't need to directly inform SimplifyIVUsers 1284 // of the new users, because their parent IV will be processed later as a 1285 // new loop phi. If we preserved IVUsers analysis, we would also want to 1286 // push the uses of WideDef here. 1287 1288 // No further widening is needed. The deceased [sz]ext had done it for us. 1289 return nullptr; 1290 } 1291 1292 // Does this user itself evaluate to a recurrence after widening? 1293 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1294 if (!WideAddRec.first) 1295 WideAddRec = getWideRecurrence(DU); 1296 1297 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1298 if (!WideAddRec.first) { 1299 // If use is a loop condition, try to promote the condition instead of 1300 // truncating the IV first. 1301 if (widenLoopCompare(DU)) 1302 return nullptr; 1303 1304 // We are here about to generate a truncate instruction that may hurt 1305 // performance because the scalar evolution expression computed earlier 1306 // in WideAddRec.first does not indicate a polynomial induction expression. 1307 // In that case, look at the operands of the use instruction to determine 1308 // if we can still widen the use instead of truncating its operand. 1309 if (widenWithVariantLoadUse(DU)) { 1310 widenWithVariantLoadUseCodegen(DU); 1311 return nullptr; 1312 } 1313 1314 // This user does not evaluate to a recurrence after widening, so don't 1315 // follow it. Instead insert a Trunc to kill off the original use, 1316 // eventually isolating the original narrow IV so it can be removed. 1317 truncateIVUse(DU, DT, LI); 1318 return nullptr; 1319 } 1320 // Assume block terminators cannot evaluate to a recurrence. We can't to 1321 // insert a Trunc after a terminator if there happens to be a critical edge. 1322 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1323 "SCEV is not expected to evaluate a block terminator"); 1324 1325 // Reuse the IV increment that SCEVExpander created as long as it dominates 1326 // NarrowUse. 1327 Instruction *WideUse = nullptr; 1328 if (WideAddRec.first == WideIncExpr && 1329 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1330 WideUse = WideInc; 1331 else { 1332 WideUse = cloneIVUser(DU, WideAddRec.first); 1333 if (!WideUse) 1334 return nullptr; 1335 } 1336 // Evaluation of WideAddRec ensured that the narrow expression could be 1337 // extended outside the loop without overflow. This suggests that the wide use 1338 // evaluates to the same expression as the extended narrow use, but doesn't 1339 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1340 // where it fails, we simply throw away the newly created wide use. 1341 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1342 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1343 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1344 << "\n"); 1345 DeadInsts.emplace_back(WideUse); 1346 return nullptr; 1347 } 1348 1349 // if we reached this point then we are going to replace 1350 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1351 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1352 1353 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1354 // Returning WideUse pushes it on the worklist. 1355 return WideUse; 1356 } 1357 1358 /// Add eligible users of NarrowDef to NarrowIVUsers. 1359 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1360 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1361 bool NonNegativeDef = 1362 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1363 SE->getConstant(NarrowSCEV->getType(), 0)); 1364 for (User *U : NarrowDef->users()) { 1365 Instruction *NarrowUser = cast<Instruction>(U); 1366 1367 // Handle data flow merges and bizarre phi cycles. 1368 if (!Widened.insert(NarrowUser).second) 1369 continue; 1370 1371 bool NonNegativeUse = false; 1372 if (!NonNegativeDef) { 1373 // We might have a control-dependent range information for this context. 1374 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1375 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1376 } 1377 1378 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1379 NonNegativeDef || NonNegativeUse); 1380 } 1381 } 1382 1383 /// Process a single induction variable. First use the SCEVExpander to create a 1384 /// wide induction variable that evaluates to the same recurrence as the 1385 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1386 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1387 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1388 /// 1389 /// It would be simpler to delete uses as they are processed, but we must avoid 1390 /// invalidating SCEV expressions. 1391 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1392 // Is this phi an induction variable? 1393 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1394 if (!AddRec) 1395 return nullptr; 1396 1397 // Widen the induction variable expression. 1398 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1399 ? SE->getSignExtendExpr(AddRec, WideType) 1400 : SE->getZeroExtendExpr(AddRec, WideType); 1401 1402 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1403 "Expect the new IV expression to preserve its type"); 1404 1405 // Can the IV be extended outside the loop without overflow? 1406 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1407 if (!AddRec || AddRec->getLoop() != L) 1408 return nullptr; 1409 1410 // An AddRec must have loop-invariant operands. Since this AddRec is 1411 // materialized by a loop header phi, the expression cannot have any post-loop 1412 // operands, so they must dominate the loop header. 1413 assert( 1414 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1415 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1416 "Loop header phi recurrence inputs do not dominate the loop"); 1417 1418 // Iterate over IV uses (including transitive ones) looking for IV increments 1419 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1420 // the increment calculate control-dependent range information basing on 1421 // dominating conditions inside of the loop (e.g. a range check inside of the 1422 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1423 // 1424 // Control-dependent range information is later used to prove that a narrow 1425 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1426 // this on demand because when pushNarrowIVUsers needs this information some 1427 // of the dominating conditions might be already widened. 1428 if (UsePostIncrementRanges) 1429 calculatePostIncRanges(OrigPhi); 1430 1431 // The rewriter provides a value for the desired IV expression. This may 1432 // either find an existing phi or materialize a new one. Either way, we 1433 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1434 // of the phi-SCC dominates the loop entry. 1435 Instruction *InsertPt = &L->getHeader()->front(); 1436 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1437 1438 // Remembering the WideIV increment generated by SCEVExpander allows 1439 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1440 // employ a general reuse mechanism because the call above is the only call to 1441 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1442 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1443 WideInc = 1444 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1445 WideIncExpr = SE->getSCEV(WideInc); 1446 // Propagate the debug location associated with the original loop increment 1447 // to the new (widened) increment. 1448 auto *OrigInc = 1449 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1450 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1451 } 1452 1453 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1454 ++NumWidened; 1455 1456 // Traverse the def-use chain using a worklist starting at the original IV. 1457 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1458 1459 Widened.insert(OrigPhi); 1460 pushNarrowIVUsers(OrigPhi, WidePhi); 1461 1462 while (!NarrowIVUsers.empty()) { 1463 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1464 1465 // Process a def-use edge. This may replace the use, so don't hold a 1466 // use_iterator across it. 1467 Instruction *WideUse = widenIVUse(DU, Rewriter); 1468 1469 // Follow all def-use edges from the previous narrow use. 1470 if (WideUse) 1471 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1472 1473 // widenIVUse may have removed the def-use edge. 1474 if (DU.NarrowDef->use_empty()) 1475 DeadInsts.emplace_back(DU.NarrowDef); 1476 } 1477 1478 // Attach any debug information to the new PHI. 1479 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1480 1481 return WidePhi; 1482 } 1483 1484 /// Calculates control-dependent range for the given def at the given context 1485 /// by looking at dominating conditions inside of the loop 1486 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1487 Instruction *NarrowUser) { 1488 using namespace llvm::PatternMatch; 1489 1490 Value *NarrowDefLHS; 1491 const APInt *NarrowDefRHS; 1492 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1493 m_APInt(NarrowDefRHS))) || 1494 !NarrowDefRHS->isNonNegative()) 1495 return; 1496 1497 auto UpdateRangeFromCondition = [&] (Value *Condition, 1498 bool TrueDest) { 1499 CmpInst::Predicate Pred; 1500 Value *CmpRHS; 1501 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1502 m_Value(CmpRHS)))) 1503 return; 1504 1505 CmpInst::Predicate P = 1506 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1507 1508 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1509 auto CmpConstrainedLHSRange = 1510 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1511 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1512 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1513 1514 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1515 }; 1516 1517 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1518 if (!HasGuards) 1519 return; 1520 1521 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1522 Ctx->getParent()->rend())) { 1523 Value *C = nullptr; 1524 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1525 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1526 } 1527 }; 1528 1529 UpdateRangeFromGuards(NarrowUser); 1530 1531 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1532 // If NarrowUserBB is statically unreachable asking dominator queries may 1533 // yield surprising results. (e.g. the block may not have a dom tree node) 1534 if (!DT->isReachableFromEntry(NarrowUserBB)) 1535 return; 1536 1537 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1538 L->contains(DTB->getBlock()); 1539 DTB = DTB->getIDom()) { 1540 auto *BB = DTB->getBlock(); 1541 auto *TI = BB->getTerminator(); 1542 UpdateRangeFromGuards(TI); 1543 1544 auto *BI = dyn_cast<BranchInst>(TI); 1545 if (!BI || !BI->isConditional()) 1546 continue; 1547 1548 auto *TrueSuccessor = BI->getSuccessor(0); 1549 auto *FalseSuccessor = BI->getSuccessor(1); 1550 1551 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1552 return BBE.isSingleEdge() && 1553 DT->dominates(BBE, NarrowUser->getParent()); 1554 }; 1555 1556 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1557 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1558 1559 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1560 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1561 } 1562 } 1563 1564 /// Calculates PostIncRangeInfos map for the given IV 1565 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1566 SmallPtrSet<Instruction *, 16> Visited; 1567 SmallVector<Instruction *, 6> Worklist; 1568 Worklist.push_back(OrigPhi); 1569 Visited.insert(OrigPhi); 1570 1571 while (!Worklist.empty()) { 1572 Instruction *NarrowDef = Worklist.pop_back_val(); 1573 1574 for (Use &U : NarrowDef->uses()) { 1575 auto *NarrowUser = cast<Instruction>(U.getUser()); 1576 1577 // Don't go looking outside the current loop. 1578 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1579 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1580 continue; 1581 1582 if (!Visited.insert(NarrowUser).second) 1583 continue; 1584 1585 Worklist.push_back(NarrowUser); 1586 1587 calculatePostIncRange(NarrowDef, NarrowUser); 1588 } 1589 } 1590 } 1591 1592 //===----------------------------------------------------------------------===// 1593 // Live IV Reduction - Minimize IVs live across the loop. 1594 //===----------------------------------------------------------------------===// 1595 1596 //===----------------------------------------------------------------------===// 1597 // Simplification of IV users based on SCEV evaluation. 1598 //===----------------------------------------------------------------------===// 1599 1600 namespace { 1601 1602 class IndVarSimplifyVisitor : public IVVisitor { 1603 ScalarEvolution *SE; 1604 const TargetTransformInfo *TTI; 1605 PHINode *IVPhi; 1606 1607 public: 1608 WideIVInfo WI; 1609 1610 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1611 const TargetTransformInfo *TTI, 1612 const DominatorTree *DTree) 1613 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1614 DT = DTree; 1615 WI.NarrowIV = IVPhi; 1616 } 1617 1618 // Implement the interface used by simplifyUsersOfIV. 1619 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1620 }; 1621 1622 } // end anonymous namespace 1623 1624 /// Iteratively perform simplification on a worklist of IV users. Each 1625 /// successive simplification may push more users which may themselves be 1626 /// candidates for simplification. 1627 /// 1628 /// Sign/Zero extend elimination is interleaved with IV simplification. 1629 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1630 SCEVExpander &Rewriter, 1631 LoopInfo *LI) { 1632 SmallVector<WideIVInfo, 8> WideIVs; 1633 1634 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1635 Intrinsic::getName(Intrinsic::experimental_guard)); 1636 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1637 1638 SmallVector<PHINode*, 8> LoopPhis; 1639 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1640 LoopPhis.push_back(cast<PHINode>(I)); 1641 } 1642 // Each round of simplification iterates through the SimplifyIVUsers worklist 1643 // for all current phis, then determines whether any IVs can be 1644 // widened. Widening adds new phis to LoopPhis, inducing another round of 1645 // simplification on the wide IVs. 1646 bool Changed = false; 1647 while (!LoopPhis.empty()) { 1648 // Evaluate as many IV expressions as possible before widening any IVs. This 1649 // forces SCEV to set no-wrap flags before evaluating sign/zero 1650 // extension. The first time SCEV attempts to normalize sign/zero extension, 1651 // the result becomes final. So for the most predictable results, we delay 1652 // evaluation of sign/zero extend evaluation until needed, and avoid running 1653 // other SCEV based analysis prior to simplifyAndExtend. 1654 do { 1655 PHINode *CurrIV = LoopPhis.pop_back_val(); 1656 1657 // Information about sign/zero extensions of CurrIV. 1658 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1659 1660 Changed |= 1661 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1662 1663 if (Visitor.WI.WidestNativeType) { 1664 WideIVs.push_back(Visitor.WI); 1665 } 1666 } while(!LoopPhis.empty()); 1667 1668 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1669 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1670 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1671 Changed = true; 1672 LoopPhis.push_back(WidePhi); 1673 } 1674 } 1675 } 1676 return Changed; 1677 } 1678 1679 //===----------------------------------------------------------------------===// 1680 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1681 //===----------------------------------------------------------------------===// 1682 1683 /// Given an Value which is hoped to be part of an add recurance in the given 1684 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1685 /// that this is less general than SCEVs AddRec checking. 1686 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1687 Instruction *IncI = dyn_cast<Instruction>(IncV); 1688 if (!IncI) 1689 return nullptr; 1690 1691 switch (IncI->getOpcode()) { 1692 case Instruction::Add: 1693 case Instruction::Sub: 1694 break; 1695 case Instruction::GetElementPtr: 1696 // An IV counter must preserve its type. 1697 if (IncI->getNumOperands() == 2) 1698 break; 1699 LLVM_FALLTHROUGH; 1700 default: 1701 return nullptr; 1702 } 1703 1704 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1705 if (Phi && Phi->getParent() == L->getHeader()) { 1706 if (L->isLoopInvariant(IncI->getOperand(1))) 1707 return Phi; 1708 return nullptr; 1709 } 1710 if (IncI->getOpcode() == Instruction::GetElementPtr) 1711 return nullptr; 1712 1713 // Allow add/sub to be commuted. 1714 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1715 if (Phi && Phi->getParent() == L->getHeader()) { 1716 if (L->isLoopInvariant(IncI->getOperand(0))) 1717 return Phi; 1718 } 1719 return nullptr; 1720 } 1721 1722 /// Whether the current loop exit test is based on this value. Currently this 1723 /// is limited to a direct use in the loop condition. 1724 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1725 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1726 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1727 // TODO: Allow non-icmp loop test. 1728 if (!ICmp) 1729 return false; 1730 1731 // TODO: Allow indirect use. 1732 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1733 } 1734 1735 /// linearFunctionTestReplace policy. Return true unless we can show that the 1736 /// current exit test is already sufficiently canonical. 1737 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1738 assert(L->getLoopLatch() && "Must be in simplified form"); 1739 1740 // Avoid converting a constant or loop invariant test back to a runtime 1741 // test. This is critical for when SCEV's cached ExitCount is less precise 1742 // than the current IR (such as after we've proven a particular exit is 1743 // actually dead and thus the BE count never reaches our ExitCount.) 1744 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1745 if (L->isLoopInvariant(BI->getCondition())) 1746 return false; 1747 1748 // Do LFTR to simplify the exit condition to an ICMP. 1749 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1750 if (!Cond) 1751 return true; 1752 1753 // Do LFTR to simplify the exit ICMP to EQ/NE 1754 ICmpInst::Predicate Pred = Cond->getPredicate(); 1755 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1756 return true; 1757 1758 // Look for a loop invariant RHS 1759 Value *LHS = Cond->getOperand(0); 1760 Value *RHS = Cond->getOperand(1); 1761 if (!L->isLoopInvariant(RHS)) { 1762 if (!L->isLoopInvariant(LHS)) 1763 return true; 1764 std::swap(LHS, RHS); 1765 } 1766 // Look for a simple IV counter LHS 1767 PHINode *Phi = dyn_cast<PHINode>(LHS); 1768 if (!Phi) 1769 Phi = getLoopPhiForCounter(LHS, L); 1770 1771 if (!Phi) 1772 return true; 1773 1774 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1775 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1776 if (Idx < 0) 1777 return true; 1778 1779 // Do LFTR if the exit condition's IV is *not* a simple counter. 1780 Value *IncV = Phi->getIncomingValue(Idx); 1781 return Phi != getLoopPhiForCounter(IncV, L); 1782 } 1783 1784 /// Return true if undefined behavior would provable be executed on the path to 1785 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1786 /// anything about whether OnPathTo is actually executed or whether Root is 1787 /// actually poison. This can be used to assess whether a new use of Root can 1788 /// be added at a location which is control equivalent with OnPathTo (such as 1789 /// immediately before it) without introducing UB which didn't previously 1790 /// exist. Note that a false result conveys no information. 1791 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1792 Instruction *OnPathTo, 1793 DominatorTree *DT) { 1794 // Basic approach is to assume Root is poison, propagate poison forward 1795 // through all users we can easily track, and then check whether any of those 1796 // users are provable UB and must execute before out exiting block might 1797 // exit. 1798 1799 // The set of all recursive users we've visited (which are assumed to all be 1800 // poison because of said visit) 1801 SmallSet<const Value *, 16> KnownPoison; 1802 SmallVector<const Instruction*, 16> Worklist; 1803 Worklist.push_back(Root); 1804 while (!Worklist.empty()) { 1805 const Instruction *I = Worklist.pop_back_val(); 1806 1807 // If we know this must trigger UB on a path leading our target. 1808 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1809 return true; 1810 1811 // If we can't analyze propagation through this instruction, just skip it 1812 // and transitive users. Safe as false is a conservative result. 1813 if (!propagatesFullPoison(I) && I != Root) 1814 continue; 1815 1816 if (KnownPoison.insert(I).second) 1817 for (const User *User : I->users()) 1818 Worklist.push_back(cast<Instruction>(User)); 1819 } 1820 1821 // Might be non-UB, or might have a path we couldn't prove must execute on 1822 // way to exiting bb. 1823 return false; 1824 } 1825 1826 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1827 /// down to checking that all operands are constant and listing instructions 1828 /// that may hide undef. 1829 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1830 unsigned Depth) { 1831 if (isa<Constant>(V)) 1832 return !isa<UndefValue>(V); 1833 1834 if (Depth >= 6) 1835 return false; 1836 1837 // Conservatively handle non-constant non-instructions. For example, Arguments 1838 // may be undef. 1839 Instruction *I = dyn_cast<Instruction>(V); 1840 if (!I) 1841 return false; 1842 1843 // Load and return values may be undef. 1844 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1845 return false; 1846 1847 // Optimistically handle other instructions. 1848 for (Value *Op : I->operands()) { 1849 if (!Visited.insert(Op).second) 1850 continue; 1851 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1852 return false; 1853 } 1854 return true; 1855 } 1856 1857 /// Return true if the given value is concrete. We must prove that undef can 1858 /// never reach it. 1859 /// 1860 /// TODO: If we decide that this is a good approach to checking for undef, we 1861 /// may factor it into a common location. 1862 static bool hasConcreteDef(Value *V) { 1863 SmallPtrSet<Value*, 8> Visited; 1864 Visited.insert(V); 1865 return hasConcreteDefImpl(V, Visited, 0); 1866 } 1867 1868 /// Return true if this IV has any uses other than the (soon to be rewritten) 1869 /// loop exit test. 1870 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1871 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1872 Value *IncV = Phi->getIncomingValue(LatchIdx); 1873 1874 for (User *U : Phi->users()) 1875 if (U != Cond && U != IncV) return false; 1876 1877 for (User *U : IncV->users()) 1878 if (U != Cond && U != Phi) return false; 1879 return true; 1880 } 1881 1882 /// Return true if the given phi is a "counter" in L. A counter is an 1883 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1884 /// step of 1. Note that L must have exactly one latch. 1885 static bool isLoopCounter(PHINode* Phi, Loop *L, 1886 ScalarEvolution *SE) { 1887 assert(Phi->getParent() == L->getHeader()); 1888 assert(L->getLoopLatch()); 1889 1890 if (!SE->isSCEVable(Phi->getType())) 1891 return false; 1892 1893 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1894 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1895 return false; 1896 1897 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1898 if (!Step || !Step->isOne()) 1899 return false; 1900 1901 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1902 Value *IncV = Phi->getIncomingValue(LatchIdx); 1903 return (getLoopPhiForCounter(IncV, L) == Phi); 1904 } 1905 1906 /// Search the loop header for a loop counter (anadd rec w/step of one) 1907 /// suitable for use by LFTR. If multiple counters are available, select the 1908 /// "best" one based profitable heuristics. 1909 /// 1910 /// BECount may be an i8* pointer type. The pointer difference is already 1911 /// valid count without scaling the address stride, so it remains a pointer 1912 /// expression as far as SCEV is concerned. 1913 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1914 const SCEV *BECount, 1915 ScalarEvolution *SE, DominatorTree *DT) { 1916 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1917 1918 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1919 1920 // Loop over all of the PHI nodes, looking for a simple counter. 1921 PHINode *BestPhi = nullptr; 1922 const SCEV *BestInit = nullptr; 1923 BasicBlock *LatchBlock = L->getLoopLatch(); 1924 assert(LatchBlock && "Must be in simplified form"); 1925 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1926 1927 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1928 PHINode *Phi = cast<PHINode>(I); 1929 if (!isLoopCounter(Phi, L, SE)) 1930 continue; 1931 1932 // Avoid comparing an integer IV against a pointer Limit. 1933 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1934 continue; 1935 1936 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1937 1938 // AR may be a pointer type, while BECount is an integer type. 1939 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1940 // AR may not be a narrower type, or we may never exit. 1941 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1942 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1943 continue; 1944 1945 // Avoid reusing a potentially undef value to compute other values that may 1946 // have originally had a concrete definition. 1947 if (!hasConcreteDef(Phi)) { 1948 // We explicitly allow unknown phis as long as they are already used by 1949 // the loop exit test. This is legal since performing LFTR could not 1950 // increase the number of undef users. 1951 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1952 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1953 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1954 continue; 1955 } 1956 1957 // Avoid introducing undefined behavior due to poison which didn't exist in 1958 // the original program. (Annoyingly, the rules for poison and undef 1959 // propagation are distinct, so this does NOT cover the undef case above.) 1960 // We have to ensure that we don't introduce UB by introducing a use on an 1961 // iteration where said IV produces poison. Our strategy here differs for 1962 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1963 // see code in linearFunctionTestReplace. For pointers, we restrict 1964 // transforms as there is no good way to reinfer inbounds once lost. 1965 if (!Phi->getType()->isIntegerTy() && 1966 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1967 continue; 1968 1969 const SCEV *Init = AR->getStart(); 1970 1971 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1972 // Don't force a live loop counter if another IV can be used. 1973 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1974 continue; 1975 1976 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1977 // also prefers integer to pointer IVs. 1978 if (BestInit->isZero() != Init->isZero()) { 1979 if (BestInit->isZero()) 1980 continue; 1981 } 1982 // If two IVs both count from zero or both count from nonzero then the 1983 // narrower is likely a dead phi that has been widened. Use the wider phi 1984 // to allow the other to be eliminated. 1985 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1986 continue; 1987 } 1988 BestPhi = Phi; 1989 BestInit = Init; 1990 } 1991 return BestPhi; 1992 } 1993 1994 /// Insert an IR expression which computes the value held by the IV IndVar 1995 /// (which must be an loop counter w/unit stride) after the backedge of loop L 1996 /// is taken ExitCount times. 1997 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 1998 const SCEV *ExitCount, bool UsePostInc, Loop *L, 1999 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2000 assert(isLoopCounter(IndVar, L, SE)); 2001 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2002 const SCEV *IVInit = AR->getStart(); 2003 2004 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2005 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2006 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2007 // the existing GEPs whenever possible. 2008 if (IndVar->getType()->isPointerTy() && 2009 !ExitCount->getType()->isPointerTy()) { 2010 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2011 // signed value. ExitCount on the other hand represents the loop trip count, 2012 // which is an unsigned value. FindLoopCounter only allows induction 2013 // variables that have a positive unit stride of one. This means we don't 2014 // have to handle the case of negative offsets (yet) and just need to zero 2015 // extend ExitCount. 2016 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2017 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2018 if (UsePostInc) 2019 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2020 2021 // Expand the code for the iteration count. 2022 assert(SE->isLoopInvariant(IVOffset, L) && 2023 "Computed iteration count is not loop invariant!"); 2024 2025 // We could handle pointer IVs other than i8*, but we need to compensate for 2026 // gep index scaling. 2027 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2028 cast<PointerType>(IndVar->getType()) 2029 ->getElementType())->isOne() && 2030 "unit stride pointer IV must be i8*"); 2031 2032 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2033 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2034 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2035 } else { 2036 // In any other case, convert both IVInit and ExitCount to integers before 2037 // comparing. This may result in SCEV expansion of pointers, but in practice 2038 // SCEV will fold the pointer arithmetic away as such: 2039 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2040 // 2041 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2042 // for simple memset-style loops. 2043 // 2044 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2045 // were generated on top of case #2, which is not expected. 2046 2047 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2048 // For unit stride, IVCount = Start + ExitCount with 2's complement 2049 // overflow. 2050 2051 // For integer IVs, truncate the IV before computing IVInit + BECount, 2052 // unless we know apriori that the limit must be a constant when evaluated 2053 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2054 // of the IV in the loop over a (potentially) expensive expansion of the 2055 // widened exit count add(zext(add)) expression. 2056 if (SE->getTypeSizeInBits(IVInit->getType()) 2057 > SE->getTypeSizeInBits(ExitCount->getType())) { 2058 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2059 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2060 else 2061 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2062 } 2063 2064 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2065 2066 if (UsePostInc) 2067 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2068 2069 // Expand the code for the iteration count. 2070 assert(SE->isLoopInvariant(IVLimit, L) && 2071 "Computed iteration count is not loop invariant!"); 2072 // Ensure that we generate the same type as IndVar, or a smaller integer 2073 // type. In the presence of null pointer values, we have an integer type 2074 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2075 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2076 IndVar->getType() : ExitCount->getType(); 2077 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2078 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2079 } 2080 } 2081 2082 /// This method rewrites the exit condition of the loop to be a canonical != 2083 /// comparison against the incremented loop induction variable. This pass is 2084 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2085 /// determine a loop-invariant trip count of the loop, which is actually a much 2086 /// broader range than just linear tests. 2087 bool IndVarSimplify:: 2088 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2089 const SCEV *ExitCount, 2090 PHINode *IndVar, SCEVExpander &Rewriter) { 2091 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2092 assert(isLoopCounter(IndVar, L, SE)); 2093 Instruction * const IncVar = 2094 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2095 2096 // Initialize CmpIndVar to the preincremented IV. 2097 Value *CmpIndVar = IndVar; 2098 bool UsePostInc = false; 2099 2100 // If the exiting block is the same as the backedge block, we prefer to 2101 // compare against the post-incremented value, otherwise we must compare 2102 // against the preincremented value. 2103 if (ExitingBB == L->getLoopLatch()) { 2104 // For pointer IVs, we chose to not strip inbounds which requires us not 2105 // to add a potentially UB introducing use. We need to either a) show 2106 // the loop test we're modifying is already in post-inc form, or b) show 2107 // that adding a use must not introduce UB. 2108 bool SafeToPostInc = 2109 IndVar->getType()->isIntegerTy() || 2110 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2111 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2112 if (SafeToPostInc) { 2113 UsePostInc = true; 2114 CmpIndVar = IncVar; 2115 } 2116 } 2117 2118 // It may be necessary to drop nowrap flags on the incrementing instruction 2119 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2120 // case the increment might have previously been poison on the last iteration 2121 // only) or if LFTR switches to a different IV that was previously dynamically 2122 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2123 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2124 // check), because the pre-inc addrec flags may be adopted from the original 2125 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2126 // TODO: This handling is inaccurate for one case: If we switch to a 2127 // dynamically dead IV that wraps on the first loop iteration only, which is 2128 // not covered by the post-inc addrec. (If the new IV was not dynamically 2129 // dead, it could not be poison on the first iteration in the first place.) 2130 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2131 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2132 if (BO->hasNoUnsignedWrap()) 2133 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2134 if (BO->hasNoSignedWrap()) 2135 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2136 } 2137 2138 Value *ExitCnt = genLoopLimit( 2139 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2140 assert(ExitCnt->getType()->isPointerTy() == 2141 IndVar->getType()->isPointerTy() && 2142 "genLoopLimit missed a cast"); 2143 2144 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2145 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2146 ICmpInst::Predicate P; 2147 if (L->contains(BI->getSuccessor(0))) 2148 P = ICmpInst::ICMP_NE; 2149 else 2150 P = ICmpInst::ICMP_EQ; 2151 2152 IRBuilder<> Builder(BI); 2153 2154 // The new loop exit condition should reuse the debug location of the 2155 // original loop exit condition. 2156 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2157 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2158 2159 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2160 // avoid the expensive expansion of the limit expression in the wider type, 2161 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2162 // since we know (from the exit count bitwidth), that we can't self-wrap in 2163 // the narrower type. 2164 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2165 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2166 if (CmpIndVarSize > ExitCntSize) { 2167 assert(!CmpIndVar->getType()->isPointerTy() && 2168 !ExitCnt->getType()->isPointerTy()); 2169 2170 // Before resorting to actually inserting the truncate, use the same 2171 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2172 // the other side of the comparison instead. We still evaluate the limit 2173 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2174 // a truncate within in. 2175 bool Extended = false; 2176 const SCEV *IV = SE->getSCEV(CmpIndVar); 2177 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2178 ExitCnt->getType()); 2179 const SCEV *ZExtTrunc = 2180 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2181 2182 if (ZExtTrunc == IV) { 2183 Extended = true; 2184 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2185 "wide.trip.count"); 2186 } else { 2187 const SCEV *SExtTrunc = 2188 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2189 if (SExtTrunc == IV) { 2190 Extended = true; 2191 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2192 "wide.trip.count"); 2193 } 2194 } 2195 2196 if (Extended) { 2197 bool Discard; 2198 L->makeLoopInvariant(ExitCnt, Discard); 2199 } else 2200 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2201 "lftr.wideiv"); 2202 } 2203 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2204 << " LHS:" << *CmpIndVar << '\n' 2205 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2206 << "\n" 2207 << " RHS:\t" << *ExitCnt << "\n" 2208 << "ExitCount:\t" << *ExitCount << "\n" 2209 << " was: " << *BI->getCondition() << "\n"); 2210 2211 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2212 Value *OrigCond = BI->getCondition(); 2213 // It's tempting to use replaceAllUsesWith here to fully replace the old 2214 // comparison, but that's not immediately safe, since users of the old 2215 // comparison may not be dominated by the new comparison. Instead, just 2216 // update the branch to use the new comparison; in the common case this 2217 // will make old comparison dead. 2218 BI->setCondition(Cond); 2219 DeadInsts.push_back(OrigCond); 2220 2221 ++NumLFTR; 2222 return true; 2223 } 2224 2225 //===----------------------------------------------------------------------===// 2226 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2227 //===----------------------------------------------------------------------===// 2228 2229 /// If there's a single exit block, sink any loop-invariant values that 2230 /// were defined in the preheader but not used inside the loop into the 2231 /// exit block to reduce register pressure in the loop. 2232 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2233 BasicBlock *ExitBlock = L->getExitBlock(); 2234 if (!ExitBlock) return false; 2235 2236 BasicBlock *Preheader = L->getLoopPreheader(); 2237 if (!Preheader) return false; 2238 2239 bool MadeAnyChanges = false; 2240 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2241 BasicBlock::iterator I(Preheader->getTerminator()); 2242 while (I != Preheader->begin()) { 2243 --I; 2244 // New instructions were inserted at the end of the preheader. 2245 if (isa<PHINode>(I)) 2246 break; 2247 2248 // Don't move instructions which might have side effects, since the side 2249 // effects need to complete before instructions inside the loop. Also don't 2250 // move instructions which might read memory, since the loop may modify 2251 // memory. Note that it's okay if the instruction might have undefined 2252 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2253 // block. 2254 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2255 continue; 2256 2257 // Skip debug info intrinsics. 2258 if (isa<DbgInfoIntrinsic>(I)) 2259 continue; 2260 2261 // Skip eh pad instructions. 2262 if (I->isEHPad()) 2263 continue; 2264 2265 // Don't sink alloca: we never want to sink static alloca's out of the 2266 // entry block, and correctly sinking dynamic alloca's requires 2267 // checks for stacksave/stackrestore intrinsics. 2268 // FIXME: Refactor this check somehow? 2269 if (isa<AllocaInst>(I)) 2270 continue; 2271 2272 // Determine if there is a use in or before the loop (direct or 2273 // otherwise). 2274 bool UsedInLoop = false; 2275 for (Use &U : I->uses()) { 2276 Instruction *User = cast<Instruction>(U.getUser()); 2277 BasicBlock *UseBB = User->getParent(); 2278 if (PHINode *P = dyn_cast<PHINode>(User)) { 2279 unsigned i = 2280 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2281 UseBB = P->getIncomingBlock(i); 2282 } 2283 if (UseBB == Preheader || L->contains(UseBB)) { 2284 UsedInLoop = true; 2285 break; 2286 } 2287 } 2288 2289 // If there is, the def must remain in the preheader. 2290 if (UsedInLoop) 2291 continue; 2292 2293 // Otherwise, sink it to the exit block. 2294 Instruction *ToMove = &*I; 2295 bool Done = false; 2296 2297 if (I != Preheader->begin()) { 2298 // Skip debug info intrinsics. 2299 do { 2300 --I; 2301 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2302 2303 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2304 Done = true; 2305 } else { 2306 Done = true; 2307 } 2308 2309 MadeAnyChanges = true; 2310 ToMove->moveBefore(*ExitBlock, InsertPt); 2311 if (Done) break; 2312 InsertPt = ToMove->getIterator(); 2313 } 2314 2315 return MadeAnyChanges; 2316 } 2317 2318 /// Return a symbolic upper bound for the backedge taken count of the loop. 2319 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2320 /// an arbitrary expression as opposed to only constants. 2321 /// TODO: Move into the ScalarEvolution class. 2322 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2323 DominatorTree &DT, Loop *L) { 2324 SmallVector<BasicBlock*, 16> ExitingBlocks; 2325 L->getExitingBlocks(ExitingBlocks); 2326 2327 // Form an expression for the maximum exit count possible for this loop. We 2328 // merge the max and exact information to approximate a version of 2329 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2330 SmallVector<const SCEV*, 4> ExitCounts; 2331 for (BasicBlock *ExitingBB : ExitingBlocks) { 2332 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2333 if (isa<SCEVCouldNotCompute>(ExitCount)) 2334 ExitCount = SE.getExitCount(L, ExitingBB, 2335 ScalarEvolution::ConstantMaximum); 2336 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2337 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2338 "We should only have known counts for exiting blocks that " 2339 "dominate latch!"); 2340 ExitCounts.push_back(ExitCount); 2341 } 2342 } 2343 if (ExitCounts.empty()) 2344 return SE.getCouldNotCompute(); 2345 return SE.getUMinFromMismatchedTypes(ExitCounts); 2346 } 2347 2348 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2349 SmallVector<BasicBlock*, 16> ExitingBlocks; 2350 L->getExitingBlocks(ExitingBlocks); 2351 2352 // Remove all exits which aren't both rewriteable and analyzeable. 2353 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2354 // If our exitting block exits multiple loops, we can only rewrite the 2355 // innermost one. Otherwise, we're changing how many times the innermost 2356 // loop runs before it exits. 2357 if (LI->getLoopFor(ExitingBB) != L) 2358 return true; 2359 2360 // Can't rewrite non-branch yet. 2361 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2362 if (!BI) 2363 return true; 2364 2365 // If already constant, nothing to do. 2366 if (isa<Constant>(BI->getCondition())) 2367 return true; 2368 2369 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2370 if (isa<SCEVCouldNotCompute>(ExitCount)) 2371 return true; 2372 return false; 2373 }); 2374 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2375 2376 if (ExitingBlocks.empty()) 2377 return false; 2378 2379 // Get a symbolic upper bound on the loop backedge taken count. 2380 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2381 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2382 return false; 2383 2384 // Visit our exit blocks in order of dominance. We know from the fact that 2385 // all exits (left) are analyzeable that the must be a total dominance order 2386 // between them as each must dominate the latch. The visit order only 2387 // matters for the provably equal case. 2388 llvm::sort(ExitingBlocks, 2389 [&](BasicBlock *A, BasicBlock *B) { 2390 // std::sort sorts in ascending order, so we want the inverse of 2391 // the normal dominance relation. 2392 if (DT->properlyDominates(A, B)) return true; 2393 if (DT->properlyDominates(B, A)) return false; 2394 llvm_unreachable("expected total dominance order!"); 2395 }); 2396 #ifdef ASSERT 2397 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2398 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2399 } 2400 #endif 2401 2402 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2403 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2404 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2405 auto *OldCond = BI->getCondition(); 2406 auto *NewCond = ConstantInt::get(OldCond->getType(), 2407 IsTaken ? ExitIfTrue : !ExitIfTrue); 2408 BI->setCondition(NewCond); 2409 if (OldCond->use_empty()) 2410 DeadInsts.push_back(OldCond); 2411 }; 2412 2413 bool Changed = false; 2414 SmallSet<const SCEV*, 8> DominatingExitCounts; 2415 for (BasicBlock *ExitingBB : ExitingBlocks) { 2416 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2417 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2418 2419 // If we know we'd exit on the first iteration, rewrite the exit to 2420 // reflect this. This does not imply the loop must exit through this 2421 // exit; there may be an earlier one taken on the first iteration. 2422 // TODO: Given we know the backedge can't be taken, we should go ahead 2423 // and break it. Or at least, kill all the header phis and simplify. 2424 if (ExitCount->isZero()) { 2425 FoldExit(ExitingBB, true); 2426 Changed = true; 2427 continue; 2428 } 2429 2430 // If we end up with a pointer exit count, bail. Note that we can end up 2431 // with a pointer exit count for one exiting block, and not for another in 2432 // the same loop. 2433 if (!ExitCount->getType()->isIntegerTy() || 2434 !MaxExitCount->getType()->isIntegerTy()) 2435 continue; 2436 2437 Type *WiderType = 2438 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2439 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2440 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2441 assert(MaxExitCount->getType() == ExitCount->getType()); 2442 2443 // Can we prove that some other exit must be taken strictly before this 2444 // one? 2445 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2446 MaxExitCount, ExitCount)) { 2447 FoldExit(ExitingBB, false); 2448 Changed = true; 2449 continue; 2450 } 2451 2452 // As we run, keep track of which exit counts we've encountered. If we 2453 // find a duplicate, we've found an exit which would have exited on the 2454 // exiting iteration, but (from the visit order) strictly follows another 2455 // which does the same and is thus dead. 2456 if (!DominatingExitCounts.insert(ExitCount).second) { 2457 FoldExit(ExitingBB, false); 2458 Changed = true; 2459 continue; 2460 } 2461 2462 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2463 // here. If we kept track of the min of dominanting exits so far, we could 2464 // discharge exits with EC >= MDEC. This is less powerful than the existing 2465 // transform (since later exits aren't considered), but potentially more 2466 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2467 // or a >u b. Such a case is not currently known. 2468 } 2469 return Changed; 2470 } 2471 2472 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2473 SmallVector<BasicBlock*, 16> ExitingBlocks; 2474 L->getExitingBlocks(ExitingBlocks); 2475 2476 // Finally, see if we can rewrite our exit conditions into a loop invariant 2477 // form. If we have a read-only loop, and we can tell that we must exit down 2478 // a path which does not need any of the values computed within the loop, we 2479 // can rewrite the loop to exit on the first iteration. Note that this 2480 // doesn't either a) tell us the loop exits on the first iteration (unless 2481 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2482 // This transformation looks a lot like a restricted form of dead loop 2483 // elimination, but restricted to read-only loops and without neccesssarily 2484 // needing to kill the loop entirely. 2485 if (!LoopPredication) 2486 return false; 2487 2488 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2489 return false; 2490 2491 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2492 // through *explicit* control flow. We have to eliminate the possibility of 2493 // implicit exits (see below) before we know it's truly exact. 2494 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2495 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2496 !SE->isLoopInvariant(ExactBTC, L) || 2497 !isSafeToExpand(ExactBTC, *SE)) 2498 return false; 2499 2500 // If we end up with a pointer exit count, bail. It may be unsized. 2501 if (!ExactBTC->getType()->isIntegerTy()) 2502 return false; 2503 2504 auto BadExit = [&](BasicBlock *ExitingBB) { 2505 // If our exiting block exits multiple loops, we can only rewrite the 2506 // innermost one. Otherwise, we're changing how many times the innermost 2507 // loop runs before it exits. 2508 if (LI->getLoopFor(ExitingBB) != L) 2509 return true; 2510 2511 // Can't rewrite non-branch yet. 2512 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2513 if (!BI) 2514 return true; 2515 2516 // If already constant, nothing to do. 2517 if (isa<Constant>(BI->getCondition())) 2518 return true; 2519 2520 // If the exit block has phis, we need to be able to compute the values 2521 // within the loop which contains them. This assumes trivially lcssa phis 2522 // have already been removed; TODO: generalize 2523 BasicBlock *ExitBlock = 2524 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2525 if (!ExitBlock->phis().empty()) 2526 return true; 2527 2528 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2529 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2530 if (!SE->isLoopInvariant(ExitCount, L) || 2531 !isSafeToExpand(ExitCount, *SE)) 2532 return true; 2533 2534 // If we end up with a pointer exit count, bail. It may be unsized. 2535 if (!ExitCount->getType()->isIntegerTy()) 2536 return true; 2537 2538 return false; 2539 }; 2540 2541 // If we have any exits which can't be predicated themselves, than we can't 2542 // predicate any exit which isn't guaranteed to execute before it. Consider 2543 // two exits (a) and (b) which would both exit on the same iteration. If we 2544 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2545 // we could convert a loop from exiting through (a) to one exiting through 2546 // (b). Note that this problem exists only for exits with the same exit 2547 // count, and we could be more aggressive when exit counts are known inequal. 2548 llvm::sort(ExitingBlocks, 2549 [&](BasicBlock *A, BasicBlock *B) { 2550 // std::sort sorts in ascending order, so we want the inverse of 2551 // the normal dominance relation, plus a tie breaker for blocks 2552 // unordered by dominance. 2553 if (DT->properlyDominates(A, B)) return true; 2554 if (DT->properlyDominates(B, A)) return false; 2555 return A->getName() < B->getName(); 2556 }); 2557 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2558 // exits before the backedge). If they aren't, reasoning about reachability 2559 // is complicated and we choose not to for now. 2560 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2561 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2562 return false; 2563 2564 // Given our sorted total order, we know that exit[j] must be evaluated 2565 // after all exit[i] such j > i. 2566 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2567 if (BadExit(ExitingBlocks[i])) { 2568 ExitingBlocks.resize(i); 2569 break; 2570 } 2571 2572 if (ExitingBlocks.empty()) 2573 return false; 2574 2575 // We rely on not being able to reach an exiting block on a later iteration 2576 // then it's statically compute exit count. The implementaton of 2577 // getExitCount currently has this invariant, but assert it here so that 2578 // breakage is obvious if this ever changes.. 2579 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2580 return DT->dominates(ExitingBB, L->getLoopLatch()); 2581 })); 2582 2583 // At this point, ExitingBlocks consists of only those blocks which are 2584 // predicatable. Given that, we know we have at least one exit we can 2585 // predicate if the loop is doesn't have side effects and doesn't have any 2586 // implicit exits (because then our exact BTC isn't actually exact). 2587 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2588 // suggestions on how to improve this? I can obviously bail out for outer 2589 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2590 // is that enough for *all* side effects? 2591 for (BasicBlock *BB : L->blocks()) 2592 for (auto &I : *BB) 2593 // TODO:isGuaranteedToTransfer 2594 if (I.mayHaveSideEffects() || I.mayThrow()) 2595 return false; 2596 2597 bool Changed = false; 2598 // Finally, do the actual predication for all predicatable blocks. A couple 2599 // of notes here: 2600 // 1) We don't bother to constant fold dominated exits with identical exit 2601 // counts; that's simply a form of CSE/equality propagation and we leave 2602 // it for dedicated passes. 2603 // 2) We insert the comparison at the branch. Hoisting introduces additional 2604 // legality constraints and we leave that to dedicated logic. We want to 2605 // predicate even if we can't insert a loop invariant expression as 2606 // peeling or unrolling will likely reduce the cost of the otherwise loop 2607 // varying check. 2608 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2609 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2610 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2611 for (BasicBlock *ExitingBB : ExitingBlocks) { 2612 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2613 2614 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2615 Value *NewCond; 2616 if (ExitCount == ExactBTC) { 2617 NewCond = L->contains(BI->getSuccessor(0)) ? 2618 B.getFalse() : B.getTrue(); 2619 } else { 2620 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2621 if (!ExactBTCV) 2622 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2623 Value *RHS = ExactBTCV; 2624 if (ECV->getType() != RHS->getType()) { 2625 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2626 ECV = B.CreateZExt(ECV, WiderTy); 2627 RHS = B.CreateZExt(RHS, WiderTy); 2628 } 2629 auto Pred = L->contains(BI->getSuccessor(0)) ? 2630 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2631 NewCond = B.CreateICmp(Pred, ECV, RHS); 2632 } 2633 Value *OldCond = BI->getCondition(); 2634 BI->setCondition(NewCond); 2635 if (OldCond->use_empty()) 2636 DeadInsts.push_back(OldCond); 2637 Changed = true; 2638 } 2639 2640 return Changed; 2641 } 2642 2643 //===----------------------------------------------------------------------===// 2644 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2645 //===----------------------------------------------------------------------===// 2646 2647 bool IndVarSimplify::run(Loop *L) { 2648 // We need (and expect!) the incoming loop to be in LCSSA. 2649 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2650 "LCSSA required to run indvars!"); 2651 2652 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2653 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2654 // canonicalization can be a pessimization without LSR to "clean up" 2655 // afterwards. 2656 // - We depend on having a preheader; in particular, 2657 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2658 // and we're in trouble if we can't find the induction variable even when 2659 // we've manually inserted one. 2660 // - LFTR relies on having a single backedge. 2661 if (!L->isLoopSimplifyForm()) 2662 return false; 2663 2664 #ifndef NDEBUG 2665 // Used below for a consistency check only 2666 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2667 #endif 2668 2669 bool Changed = false; 2670 // If there are any floating-point recurrences, attempt to 2671 // transform them to use integer recurrences. 2672 Changed |= rewriteNonIntegerIVs(L); 2673 2674 // Create a rewriter object which we'll use to transform the code with. 2675 SCEVExpander Rewriter(*SE, DL, "indvars"); 2676 #ifndef NDEBUG 2677 Rewriter.setDebugType(DEBUG_TYPE); 2678 #endif 2679 2680 // Eliminate redundant IV users. 2681 // 2682 // Simplification works best when run before other consumers of SCEV. We 2683 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2684 // other expressions involving loop IVs have been evaluated. This helps SCEV 2685 // set no-wrap flags before normalizing sign/zero extension. 2686 Rewriter.disableCanonicalMode(); 2687 Changed |= simplifyAndExtend(L, Rewriter, LI); 2688 2689 // Check to see if we can compute the final value of any expressions 2690 // that are recurrent in the loop, and substitute the exit values from the 2691 // loop into any instructions outside of the loop that use the final values 2692 // of the current expressions. 2693 if (ReplaceExitValue != NeverRepl) { 2694 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, 2695 ReplaceExitValue, DeadInsts)) { 2696 NumReplaced += Rewrites; 2697 Changed = true; 2698 } 2699 } 2700 2701 // Eliminate redundant IV cycles. 2702 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2703 2704 // Try to eliminate loop exits based on analyzeable exit counts 2705 if (optimizeLoopExits(L, Rewriter)) { 2706 Changed = true; 2707 // Given we've changed exit counts, notify SCEV 2708 SE->forgetLoop(L); 2709 } 2710 2711 // Try to form loop invariant tests for loop exits by changing how many 2712 // iterations of the loop run when that is unobservable. 2713 if (predicateLoopExits(L, Rewriter)) { 2714 Changed = true; 2715 // Given we've changed exit counts, notify SCEV 2716 SE->forgetLoop(L); 2717 } 2718 2719 // If we have a trip count expression, rewrite the loop's exit condition 2720 // using it. 2721 if (!DisableLFTR) { 2722 SmallVector<BasicBlock*, 16> ExitingBlocks; 2723 L->getExitingBlocks(ExitingBlocks); 2724 for (BasicBlock *ExitingBB : ExitingBlocks) { 2725 // Can't rewrite non-branch yet. 2726 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2727 continue; 2728 2729 // If our exitting block exits multiple loops, we can only rewrite the 2730 // innermost one. Otherwise, we're changing how many times the innermost 2731 // loop runs before it exits. 2732 if (LI->getLoopFor(ExitingBB) != L) 2733 continue; 2734 2735 if (!needsLFTR(L, ExitingBB)) 2736 continue; 2737 2738 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2739 if (isa<SCEVCouldNotCompute>(ExitCount)) 2740 continue; 2741 2742 // This was handled above, but as we form SCEVs, we can sometimes refine 2743 // existing ones; this allows exit counts to be folded to zero which 2744 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2745 // until stable to handle cases like this better. 2746 if (ExitCount->isZero()) 2747 continue; 2748 2749 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2750 if (!IndVar) 2751 continue; 2752 2753 // Avoid high cost expansions. Note: This heuristic is questionable in 2754 // that our definition of "high cost" is not exactly principled. 2755 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2756 continue; 2757 2758 // Check preconditions for proper SCEVExpander operation. SCEV does not 2759 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2760 // any pass that uses the SCEVExpander must do it. This does not work 2761 // well for loop passes because SCEVExpander makes assumptions about 2762 // all loops, while LoopPassManager only forces the current loop to be 2763 // simplified. 2764 // 2765 // FIXME: SCEV expansion has no way to bail out, so the caller must 2766 // explicitly check any assumptions made by SCEV. Brittle. 2767 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2768 if (!AR || AR->getLoop()->getLoopPreheader()) 2769 Changed |= linearFunctionTestReplace(L, ExitingBB, 2770 ExitCount, IndVar, 2771 Rewriter); 2772 } 2773 } 2774 // Clear the rewriter cache, because values that are in the rewriter's cache 2775 // can be deleted in the loop below, causing the AssertingVH in the cache to 2776 // trigger. 2777 Rewriter.clear(); 2778 2779 // Now that we're done iterating through lists, clean up any instructions 2780 // which are now dead. 2781 while (!DeadInsts.empty()) 2782 if (Instruction *Inst = 2783 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2784 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2785 2786 // The Rewriter may not be used from this point on. 2787 2788 // Loop-invariant instructions in the preheader that aren't used in the 2789 // loop may be sunk below the loop to reduce register pressure. 2790 Changed |= sinkUnusedInvariants(L); 2791 2792 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2793 // trip count and therefore can further simplify exit values in addition to 2794 // rewriteLoopExitValues. 2795 Changed |= rewriteFirstIterationLoopExitValues(L); 2796 2797 // Clean up dead instructions. 2798 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2799 2800 // Check a post-condition. 2801 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2802 "Indvars did not preserve LCSSA!"); 2803 2804 // Verify that LFTR, and any other change have not interfered with SCEV's 2805 // ability to compute trip count. We may have *changed* the exit count, but 2806 // only by reducing it. 2807 #ifndef NDEBUG 2808 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2809 SE->forgetLoop(L); 2810 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2811 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2812 SE->getTypeSizeInBits(NewBECount->getType())) 2813 NewBECount = SE->getTruncateOrNoop(NewBECount, 2814 BackedgeTakenCount->getType()); 2815 else 2816 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2817 NewBECount->getType()); 2818 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2819 NewBECount) && "indvars must preserve SCEV"); 2820 } 2821 #endif 2822 2823 return Changed; 2824 } 2825 2826 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2827 LoopStandardAnalysisResults &AR, 2828 LPMUpdater &) { 2829 Function *F = L.getHeader()->getParent(); 2830 const DataLayout &DL = F->getParent()->getDataLayout(); 2831 2832 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 2833 if (!IVS.run(&L)) 2834 return PreservedAnalyses::all(); 2835 2836 auto PA = getLoopPassPreservedAnalyses(); 2837 PA.preserveSet<CFGAnalyses>(); 2838 if (AR.MSSA) 2839 PA.preserve<MemorySSAAnalysis>(); 2840 return PA; 2841 } 2842 2843 namespace { 2844 2845 struct IndVarSimplifyLegacyPass : public LoopPass { 2846 static char ID; // Pass identification, replacement for typeid 2847 2848 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2849 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2850 } 2851 2852 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2853 if (skipLoop(L)) 2854 return false; 2855 2856 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2857 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2858 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2859 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2860 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2861 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2862 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2863 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2864 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2865 MemorySSA *MSSA = nullptr; 2866 if (MSSAAnalysis) 2867 MSSA = &MSSAAnalysis->getMSSA(); 2868 2869 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 2870 return IVS.run(L); 2871 } 2872 2873 void getAnalysisUsage(AnalysisUsage &AU) const override { 2874 AU.setPreservesCFG(); 2875 AU.addPreserved<MemorySSAWrapperPass>(); 2876 getLoopAnalysisUsage(AU); 2877 } 2878 }; 2879 2880 } // end anonymous namespace 2881 2882 char IndVarSimplifyLegacyPass::ID = 0; 2883 2884 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2885 "Induction Variable Simplification", false, false) 2886 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2887 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2888 "Induction Variable Simplification", false, false) 2889 2890 Pass *llvm::createIndVarSimplifyPass() { 2891 return new IndVarSimplifyLegacyPass(); 2892 } 2893