1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/ConstantRange.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/Function.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InstrTypes.h" 57 #include "llvm/IR/Instruction.h" 58 #include "llvm/IR/Instructions.h" 59 #include "llvm/IR/IntrinsicInst.h" 60 #include "llvm/IR/Intrinsics.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/IR/PassManager.h" 64 #include "llvm/IR/PatternMatch.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/Use.h" 67 #include "llvm/IR/User.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/IR/ValueHandle.h" 70 #include "llvm/InitializePasses.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Local.h" 83 #include "llvm/Transforms/Utils/LoopUtils.h" 84 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 85 #include <cassert> 86 #include <cstdint> 87 #include <utility> 88 89 using namespace llvm; 90 91 #define DEBUG_TYPE "indvars" 92 93 STATISTIC(NumWidened , "Number of indvars widened"); 94 STATISTIC(NumReplaced , "Number of exit values replaced"); 95 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 96 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 97 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 98 99 // Trip count verification can be enabled by default under NDEBUG if we 100 // implement a strong expression equivalence checker in SCEV. Until then, we 101 // use the verify-indvars flag, which may assert in some cases. 102 static cl::opt<bool> VerifyIndvars( 103 "verify-indvars", cl::Hidden, 104 cl::desc("Verify the ScalarEvolution result after running indvars")); 105 106 static cl::opt<ReplaceExitVal> ReplaceExitValue( 107 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 108 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 109 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 110 clEnumValN(OnlyCheapRepl, "cheap", 111 "only replace exit value when the cost is cheap"), 112 clEnumValN(NoHardUse, "noharduse", 113 "only replace exit values when loop def likely dead"), 114 clEnumValN(AlwaysRepl, "always", 115 "always replace exit value whenever possible"))); 116 117 static cl::opt<bool> UsePostIncrementRanges( 118 "indvars-post-increment-ranges", cl::Hidden, 119 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 120 cl::init(true)); 121 122 static cl::opt<bool> 123 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 124 cl::desc("Disable Linear Function Test Replace optimization")); 125 126 static cl::opt<bool> 127 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 128 cl::desc("Predicate conditions in read only loops")); 129 130 namespace { 131 132 struct RewritePhi; 133 134 class IndVarSimplify { 135 LoopInfo *LI; 136 ScalarEvolution *SE; 137 DominatorTree *DT; 138 const DataLayout &DL; 139 TargetLibraryInfo *TLI; 140 const TargetTransformInfo *TTI; 141 142 SmallVector<WeakTrackingVH, 16> DeadInsts; 143 144 bool handleFloatingPointIV(Loop *L, PHINode *PH); 145 bool rewriteNonIntegerIVs(Loop *L); 146 147 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 148 /// Try to eliminate loop exits based on analyzeable exit counts 149 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 150 /// Try to form loop invariant tests for loop exits by changing how many 151 /// iterations of the loop run when that is unobservable. 152 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 153 154 bool rewriteFirstIterationLoopExitValues(Loop *L); 155 156 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 157 const SCEV *ExitCount, 158 PHINode *IndVar, SCEVExpander &Rewriter); 159 160 bool sinkUnusedInvariants(Loop *L); 161 162 public: 163 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 164 const DataLayout &DL, TargetLibraryInfo *TLI, 165 TargetTransformInfo *TTI) 166 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 167 168 bool run(Loop *L); 169 }; 170 171 } // end anonymous namespace 172 173 /// Determine the insertion point for this user. By default, insert immediately 174 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 175 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 176 /// common dominator for the incoming blocks. A nullptr can be returned if no 177 /// viable location is found: it may happen if User is a PHI and Def only comes 178 /// to this PHI from unreachable blocks. 179 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 180 DominatorTree *DT, LoopInfo *LI) { 181 PHINode *PHI = dyn_cast<PHINode>(User); 182 if (!PHI) 183 return User; 184 185 Instruction *InsertPt = nullptr; 186 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 187 if (PHI->getIncomingValue(i) != Def) 188 continue; 189 190 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 191 192 if (!DT->isReachableFromEntry(InsertBB)) 193 continue; 194 195 if (!InsertPt) { 196 InsertPt = InsertBB->getTerminator(); 197 continue; 198 } 199 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 200 InsertPt = InsertBB->getTerminator(); 201 } 202 203 // If we have skipped all inputs, it means that Def only comes to Phi from 204 // unreachable blocks. 205 if (!InsertPt) 206 return nullptr; 207 208 auto *DefI = dyn_cast<Instruction>(Def); 209 if (!DefI) 210 return InsertPt; 211 212 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 213 214 auto *L = LI->getLoopFor(DefI->getParent()); 215 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 216 217 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 218 if (LI->getLoopFor(DTN->getBlock()) == L) 219 return DTN->getBlock()->getTerminator(); 220 221 llvm_unreachable("DefI dominates InsertPt!"); 222 } 223 224 //===----------------------------------------------------------------------===// 225 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 226 //===----------------------------------------------------------------------===// 227 228 /// Convert APF to an integer, if possible. 229 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 230 bool isExact = false; 231 // See if we can convert this to an int64_t 232 uint64_t UIntVal; 233 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 234 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 235 !isExact) 236 return false; 237 IntVal = UIntVal; 238 return true; 239 } 240 241 /// If the loop has floating induction variable then insert corresponding 242 /// integer induction variable if possible. 243 /// For example, 244 /// for(double i = 0; i < 10000; ++i) 245 /// bar(i) 246 /// is converted into 247 /// for(int i = 0; i < 10000; ++i) 248 /// bar((double)i); 249 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 250 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 251 unsigned BackEdge = IncomingEdge^1; 252 253 // Check incoming value. 254 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 255 256 int64_t InitValue; 257 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 258 return false; 259 260 // Check IV increment. Reject this PN if increment operation is not 261 // an add or increment value can not be represented by an integer. 262 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 263 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 264 265 // If this is not an add of the PHI with a constantfp, or if the constant fp 266 // is not an integer, bail out. 267 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 268 int64_t IncValue; 269 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 270 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 271 return false; 272 273 // Check Incr uses. One user is PN and the other user is an exit condition 274 // used by the conditional terminator. 275 Value::user_iterator IncrUse = Incr->user_begin(); 276 Instruction *U1 = cast<Instruction>(*IncrUse++); 277 if (IncrUse == Incr->user_end()) return false; 278 Instruction *U2 = cast<Instruction>(*IncrUse++); 279 if (IncrUse != Incr->user_end()) return false; 280 281 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 282 // only used by a branch, we can't transform it. 283 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 284 if (!Compare) 285 Compare = dyn_cast<FCmpInst>(U2); 286 if (!Compare || !Compare->hasOneUse() || 287 !isa<BranchInst>(Compare->user_back())) 288 return false; 289 290 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 291 292 // We need to verify that the branch actually controls the iteration count 293 // of the loop. If not, the new IV can overflow and no one will notice. 294 // The branch block must be in the loop and one of the successors must be out 295 // of the loop. 296 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 297 if (!L->contains(TheBr->getParent()) || 298 (L->contains(TheBr->getSuccessor(0)) && 299 L->contains(TheBr->getSuccessor(1)))) 300 return false; 301 302 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 303 // transform it. 304 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 305 int64_t ExitValue; 306 if (ExitValueVal == nullptr || 307 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 308 return false; 309 310 // Find new predicate for integer comparison. 311 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 312 switch (Compare->getPredicate()) { 313 default: return false; // Unknown comparison. 314 case CmpInst::FCMP_OEQ: 315 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 316 case CmpInst::FCMP_ONE: 317 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 318 case CmpInst::FCMP_OGT: 319 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 320 case CmpInst::FCMP_OGE: 321 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 322 case CmpInst::FCMP_OLT: 323 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 324 case CmpInst::FCMP_OLE: 325 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 326 } 327 328 // We convert the floating point induction variable to a signed i32 value if 329 // we can. This is only safe if the comparison will not overflow in a way 330 // that won't be trapped by the integer equivalent operations. Check for this 331 // now. 332 // TODO: We could use i64 if it is native and the range requires it. 333 334 // The start/stride/exit values must all fit in signed i32. 335 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 336 return false; 337 338 // If not actually striding (add x, 0.0), avoid touching the code. 339 if (IncValue == 0) 340 return false; 341 342 // Positive and negative strides have different safety conditions. 343 if (IncValue > 0) { 344 // If we have a positive stride, we require the init to be less than the 345 // exit value. 346 if (InitValue >= ExitValue) 347 return false; 348 349 uint32_t Range = uint32_t(ExitValue-InitValue); 350 // Check for infinite loop, either: 351 // while (i <= Exit) or until (i > Exit) 352 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 353 if (++Range == 0) return false; // Range overflows. 354 } 355 356 unsigned Leftover = Range % uint32_t(IncValue); 357 358 // If this is an equality comparison, we require that the strided value 359 // exactly land on the exit value, otherwise the IV condition will wrap 360 // around and do things the fp IV wouldn't. 361 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 362 Leftover != 0) 363 return false; 364 365 // If the stride would wrap around the i32 before exiting, we can't 366 // transform the IV. 367 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 368 return false; 369 } else { 370 // If we have a negative stride, we require the init to be greater than the 371 // exit value. 372 if (InitValue <= ExitValue) 373 return false; 374 375 uint32_t Range = uint32_t(InitValue-ExitValue); 376 // Check for infinite loop, either: 377 // while (i >= Exit) or until (i < Exit) 378 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 379 if (++Range == 0) return false; // Range overflows. 380 } 381 382 unsigned Leftover = Range % uint32_t(-IncValue); 383 384 // If this is an equality comparison, we require that the strided value 385 // exactly land on the exit value, otherwise the IV condition will wrap 386 // around and do things the fp IV wouldn't. 387 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 388 Leftover != 0) 389 return false; 390 391 // If the stride would wrap around the i32 before exiting, we can't 392 // transform the IV. 393 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 394 return false; 395 } 396 397 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 398 399 // Insert new integer induction variable. 400 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 401 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 402 PN->getIncomingBlock(IncomingEdge)); 403 404 Value *NewAdd = 405 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 406 Incr->getName()+".int", Incr); 407 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 408 409 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 410 ConstantInt::get(Int32Ty, ExitValue), 411 Compare->getName()); 412 413 // In the following deletions, PN may become dead and may be deleted. 414 // Use a WeakTrackingVH to observe whether this happens. 415 WeakTrackingVH WeakPH = PN; 416 417 // Delete the old floating point exit comparison. The branch starts using the 418 // new comparison. 419 NewCompare->takeName(Compare); 420 Compare->replaceAllUsesWith(NewCompare); 421 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 422 423 // Delete the old floating point increment. 424 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 425 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 426 427 // If the FP induction variable still has uses, this is because something else 428 // in the loop uses its value. In order to canonicalize the induction 429 // variable, we chose to eliminate the IV and rewrite it in terms of an 430 // int->fp cast. 431 // 432 // We give preference to sitofp over uitofp because it is faster on most 433 // platforms. 434 if (WeakPH) { 435 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 436 &*PN->getParent()->getFirstInsertionPt()); 437 PN->replaceAllUsesWith(Conv); 438 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 439 } 440 return true; 441 } 442 443 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 444 // First step. Check to see if there are any floating-point recurrences. 445 // If there are, change them into integer recurrences, permitting analysis by 446 // the SCEV routines. 447 BasicBlock *Header = L->getHeader(); 448 449 SmallVector<WeakTrackingVH, 8> PHIs; 450 for (PHINode &PN : Header->phis()) 451 PHIs.push_back(&PN); 452 453 bool Changed = false; 454 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 455 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 456 Changed |= handleFloatingPointIV(L, PN); 457 458 // If the loop previously had floating-point IV, ScalarEvolution 459 // may not have been able to compute a trip count. Now that we've done some 460 // re-writing, the trip count may be computable. 461 if (Changed) 462 SE->forgetLoop(L); 463 return Changed; 464 } 465 466 //===---------------------------------------------------------------------===// 467 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 468 // they will exit at the first iteration. 469 //===---------------------------------------------------------------------===// 470 471 /// Check to see if this loop has loop invariant conditions which lead to loop 472 /// exits. If so, we know that if the exit path is taken, it is at the first 473 /// loop iteration. This lets us predict exit values of PHI nodes that live in 474 /// loop header. 475 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 476 // Verify the input to the pass is already in LCSSA form. 477 assert(L->isLCSSAForm(*DT)); 478 479 SmallVector<BasicBlock *, 8> ExitBlocks; 480 L->getUniqueExitBlocks(ExitBlocks); 481 482 bool MadeAnyChanges = false; 483 for (auto *ExitBB : ExitBlocks) { 484 // If there are no more PHI nodes in this exit block, then no more 485 // values defined inside the loop are used on this path. 486 for (PHINode &PN : ExitBB->phis()) { 487 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 488 IncomingValIdx != E; ++IncomingValIdx) { 489 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 490 491 // Can we prove that the exit must run on the first iteration if it 492 // runs at all? (i.e. early exits are fine for our purposes, but 493 // traces which lead to this exit being taken on the 2nd iteration 494 // aren't.) Note that this is about whether the exit branch is 495 // executed, not about whether it is taken. 496 if (!L->getLoopLatch() || 497 !DT->dominates(IncomingBB, L->getLoopLatch())) 498 continue; 499 500 // Get condition that leads to the exit path. 501 auto *TermInst = IncomingBB->getTerminator(); 502 503 Value *Cond = nullptr; 504 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 505 // Must be a conditional branch, otherwise the block 506 // should not be in the loop. 507 Cond = BI->getCondition(); 508 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 509 Cond = SI->getCondition(); 510 else 511 continue; 512 513 if (!L->isLoopInvariant(Cond)) 514 continue; 515 516 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 517 518 // Only deal with PHIs in the loop header. 519 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 520 continue; 521 522 // If ExitVal is a PHI on the loop header, then we know its 523 // value along this exit because the exit can only be taken 524 // on the first iteration. 525 auto *LoopPreheader = L->getLoopPreheader(); 526 assert(LoopPreheader && "Invalid loop"); 527 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 528 if (PreheaderIdx != -1) { 529 assert(ExitVal->getParent() == L->getHeader() && 530 "ExitVal must be in loop header"); 531 MadeAnyChanges = true; 532 PN.setIncomingValue(IncomingValIdx, 533 ExitVal->getIncomingValue(PreheaderIdx)); 534 } 535 } 536 } 537 } 538 return MadeAnyChanges; 539 } 540 541 //===----------------------------------------------------------------------===// 542 // IV Widening - Extend the width of an IV to cover its widest uses. 543 //===----------------------------------------------------------------------===// 544 545 namespace { 546 547 // Collect information about induction variables that are used by sign/zero 548 // extend operations. This information is recorded by CollectExtend and provides 549 // the input to WidenIV. 550 struct WideIVInfo { 551 PHINode *NarrowIV = nullptr; 552 553 // Widest integer type created [sz]ext 554 Type *WidestNativeType = nullptr; 555 556 // Was a sext user seen before a zext? 557 bool IsSigned = false; 558 }; 559 560 } // end anonymous namespace 561 562 /// Update information about the induction variable that is extended by this 563 /// sign or zero extend operation. This is used to determine the final width of 564 /// the IV before actually widening it. 565 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 566 const TargetTransformInfo *TTI) { 567 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 568 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 569 return; 570 571 Type *Ty = Cast->getType(); 572 uint64_t Width = SE->getTypeSizeInBits(Ty); 573 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 574 return; 575 576 // Check that `Cast` actually extends the induction variable (we rely on this 577 // later). This takes care of cases where `Cast` is extending a truncation of 578 // the narrow induction variable, and thus can end up being narrower than the 579 // "narrow" induction variable. 580 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 581 if (NarrowIVWidth >= Width) 582 return; 583 584 // Cast is either an sext or zext up to this point. 585 // We should not widen an indvar if arithmetics on the wider indvar are more 586 // expensive than those on the narrower indvar. We check only the cost of ADD 587 // because at least an ADD is required to increment the induction variable. We 588 // could compute more comprehensively the cost of all instructions on the 589 // induction variable when necessary. 590 if (TTI && 591 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 592 TTI->getArithmeticInstrCost(Instruction::Add, 593 Cast->getOperand(0)->getType())) { 594 return; 595 } 596 597 if (!WI.WidestNativeType) { 598 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 599 WI.IsSigned = IsSigned; 600 return; 601 } 602 603 // We extend the IV to satisfy the sign of its first user, arbitrarily. 604 if (WI.IsSigned != IsSigned) 605 return; 606 607 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 608 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 609 } 610 611 namespace { 612 613 /// Record a link in the Narrow IV def-use chain along with the WideIV that 614 /// computes the same value as the Narrow IV def. This avoids caching Use* 615 /// pointers. 616 struct NarrowIVDefUse { 617 Instruction *NarrowDef = nullptr; 618 Instruction *NarrowUse = nullptr; 619 Instruction *WideDef = nullptr; 620 621 // True if the narrow def is never negative. Tracking this information lets 622 // us use a sign extension instead of a zero extension or vice versa, when 623 // profitable and legal. 624 bool NeverNegative = false; 625 626 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 627 bool NeverNegative) 628 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 629 NeverNegative(NeverNegative) {} 630 }; 631 632 /// The goal of this transform is to remove sign and zero extends without 633 /// creating any new induction variables. To do this, it creates a new phi of 634 /// the wider type and redirects all users, either removing extends or inserting 635 /// truncs whenever we stop propagating the type. 636 class WidenIV { 637 // Parameters 638 PHINode *OrigPhi; 639 Type *WideType; 640 641 // Context 642 LoopInfo *LI; 643 Loop *L; 644 ScalarEvolution *SE; 645 DominatorTree *DT; 646 647 // Does the module have any calls to the llvm.experimental.guard intrinsic 648 // at all? If not we can avoid scanning instructions looking for guards. 649 bool HasGuards; 650 651 // Result 652 PHINode *WidePhi = nullptr; 653 Instruction *WideInc = nullptr; 654 const SCEV *WideIncExpr = nullptr; 655 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 656 657 SmallPtrSet<Instruction *,16> Widened; 658 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 659 660 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 661 662 // A map tracking the kind of extension used to widen each narrow IV 663 // and narrow IV user. 664 // Key: pointer to a narrow IV or IV user. 665 // Value: the kind of extension used to widen this Instruction. 666 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 667 668 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 669 670 // A map with control-dependent ranges for post increment IV uses. The key is 671 // a pair of IV def and a use of this def denoting the context. The value is 672 // a ConstantRange representing possible values of the def at the given 673 // context. 674 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 675 676 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 677 Instruction *UseI) { 678 DefUserPair Key(Def, UseI); 679 auto It = PostIncRangeInfos.find(Key); 680 return It == PostIncRangeInfos.end() 681 ? Optional<ConstantRange>(None) 682 : Optional<ConstantRange>(It->second); 683 } 684 685 void calculatePostIncRanges(PHINode *OrigPhi); 686 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 687 688 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 689 DefUserPair Key(Def, UseI); 690 auto It = PostIncRangeInfos.find(Key); 691 if (It == PostIncRangeInfos.end()) 692 PostIncRangeInfos.insert({Key, R}); 693 else 694 It->second = R.intersectWith(It->second); 695 } 696 697 public: 698 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 699 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 700 bool HasGuards) 701 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 702 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 703 HasGuards(HasGuards), DeadInsts(DI) { 704 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 705 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 706 } 707 708 PHINode *createWideIV(SCEVExpander &Rewriter); 709 710 protected: 711 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 712 Instruction *Use); 713 714 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 715 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 716 const SCEVAddRecExpr *WideAR); 717 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 718 719 ExtendKind getExtendKind(Instruction *I); 720 721 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 722 723 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 724 725 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 726 727 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 728 unsigned OpCode) const; 729 730 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 731 732 bool widenLoopCompare(NarrowIVDefUse DU); 733 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 734 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 735 736 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 737 }; 738 739 } // end anonymous namespace 740 741 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 742 bool IsSigned, Instruction *Use) { 743 // Set the debug location and conservative insertion point. 744 IRBuilder<> Builder(Use); 745 // Hoist the insertion point into loop preheaders as far as possible. 746 for (const Loop *L = LI->getLoopFor(Use->getParent()); 747 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 748 L = L->getParentLoop()) 749 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 750 751 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 752 Builder.CreateZExt(NarrowOper, WideType); 753 } 754 755 /// Instantiate a wide operation to replace a narrow operation. This only needs 756 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 757 /// 0 for any operation we decide not to clone. 758 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 759 const SCEVAddRecExpr *WideAR) { 760 unsigned Opcode = DU.NarrowUse->getOpcode(); 761 switch (Opcode) { 762 default: 763 return nullptr; 764 case Instruction::Add: 765 case Instruction::Mul: 766 case Instruction::UDiv: 767 case Instruction::Sub: 768 return cloneArithmeticIVUser(DU, WideAR); 769 770 case Instruction::And: 771 case Instruction::Or: 772 case Instruction::Xor: 773 case Instruction::Shl: 774 case Instruction::LShr: 775 case Instruction::AShr: 776 return cloneBitwiseIVUser(DU); 777 } 778 } 779 780 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 781 Instruction *NarrowUse = DU.NarrowUse; 782 Instruction *NarrowDef = DU.NarrowDef; 783 Instruction *WideDef = DU.WideDef; 784 785 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 786 787 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 788 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 789 // invariant and will be folded or hoisted. If it actually comes from a 790 // widened IV, it should be removed during a future call to widenIVUse. 791 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 792 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 793 ? WideDef 794 : createExtendInst(NarrowUse->getOperand(0), WideType, 795 IsSigned, NarrowUse); 796 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 797 ? WideDef 798 : createExtendInst(NarrowUse->getOperand(1), WideType, 799 IsSigned, NarrowUse); 800 801 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 802 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 803 NarrowBO->getName()); 804 IRBuilder<> Builder(NarrowUse); 805 Builder.Insert(WideBO); 806 WideBO->copyIRFlags(NarrowBO); 807 return WideBO; 808 } 809 810 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 811 const SCEVAddRecExpr *WideAR) { 812 Instruction *NarrowUse = DU.NarrowUse; 813 Instruction *NarrowDef = DU.NarrowDef; 814 Instruction *WideDef = DU.WideDef; 815 816 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 817 818 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 819 820 // We're trying to find X such that 821 // 822 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 823 // 824 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 825 // and check using SCEV if any of them are correct. 826 827 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 828 // correct solution to X. 829 auto GuessNonIVOperand = [&](bool SignExt) { 830 const SCEV *WideLHS; 831 const SCEV *WideRHS; 832 833 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 834 if (SignExt) 835 return SE->getSignExtendExpr(S, Ty); 836 return SE->getZeroExtendExpr(S, Ty); 837 }; 838 839 if (IVOpIdx == 0) { 840 WideLHS = SE->getSCEV(WideDef); 841 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 842 WideRHS = GetExtend(NarrowRHS, WideType); 843 } else { 844 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 845 WideLHS = GetExtend(NarrowLHS, WideType); 846 WideRHS = SE->getSCEV(WideDef); 847 } 848 849 // WideUse is "WideDef `op.wide` X" as described in the comment. 850 const SCEV *WideUse = nullptr; 851 852 switch (NarrowUse->getOpcode()) { 853 default: 854 llvm_unreachable("No other possibility!"); 855 856 case Instruction::Add: 857 WideUse = SE->getAddExpr(WideLHS, WideRHS); 858 break; 859 860 case Instruction::Mul: 861 WideUse = SE->getMulExpr(WideLHS, WideRHS); 862 break; 863 864 case Instruction::UDiv: 865 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 866 break; 867 868 case Instruction::Sub: 869 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 870 break; 871 } 872 873 return WideUse == WideAR; 874 }; 875 876 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 877 if (!GuessNonIVOperand(SignExtend)) { 878 SignExtend = !SignExtend; 879 if (!GuessNonIVOperand(SignExtend)) 880 return nullptr; 881 } 882 883 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 884 ? WideDef 885 : createExtendInst(NarrowUse->getOperand(0), WideType, 886 SignExtend, NarrowUse); 887 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 888 ? WideDef 889 : createExtendInst(NarrowUse->getOperand(1), WideType, 890 SignExtend, NarrowUse); 891 892 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 893 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 894 NarrowBO->getName()); 895 896 IRBuilder<> Builder(NarrowUse); 897 Builder.Insert(WideBO); 898 WideBO->copyIRFlags(NarrowBO); 899 return WideBO; 900 } 901 902 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 903 auto It = ExtendKindMap.find(I); 904 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 905 return It->second; 906 } 907 908 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 909 unsigned OpCode) const { 910 if (OpCode == Instruction::Add) 911 return SE->getAddExpr(LHS, RHS); 912 if (OpCode == Instruction::Sub) 913 return SE->getMinusSCEV(LHS, RHS); 914 if (OpCode == Instruction::Mul) 915 return SE->getMulExpr(LHS, RHS); 916 917 llvm_unreachable("Unsupported opcode."); 918 } 919 920 /// No-wrap operations can transfer sign extension of their result to their 921 /// operands. Generate the SCEV value for the widened operation without 922 /// actually modifying the IR yet. If the expression after extending the 923 /// operands is an AddRec for this loop, return the AddRec and the kind of 924 /// extension used. 925 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 926 // Handle the common case of add<nsw/nuw> 927 const unsigned OpCode = DU.NarrowUse->getOpcode(); 928 // Only Add/Sub/Mul instructions supported yet. 929 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 930 OpCode != Instruction::Mul) 931 return {nullptr, Unknown}; 932 933 // One operand (NarrowDef) has already been extended to WideDef. Now determine 934 // if extending the other will lead to a recurrence. 935 const unsigned ExtendOperIdx = 936 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 937 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 938 939 const SCEV *ExtendOperExpr = nullptr; 940 const OverflowingBinaryOperator *OBO = 941 cast<OverflowingBinaryOperator>(DU.NarrowUse); 942 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 943 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 944 ExtendOperExpr = SE->getSignExtendExpr( 945 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 946 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 947 ExtendOperExpr = SE->getZeroExtendExpr( 948 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 949 else 950 return {nullptr, Unknown}; 951 952 // When creating this SCEV expr, don't apply the current operations NSW or NUW 953 // flags. This instruction may be guarded by control flow that the no-wrap 954 // behavior depends on. Non-control-equivalent instructions can be mapped to 955 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 956 // semantics to those operations. 957 const SCEV *lhs = SE->getSCEV(DU.WideDef); 958 const SCEV *rhs = ExtendOperExpr; 959 960 // Let's swap operands to the initial order for the case of non-commutative 961 // operations, like SUB. See PR21014. 962 if (ExtendOperIdx == 0) 963 std::swap(lhs, rhs); 964 const SCEVAddRecExpr *AddRec = 965 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 966 967 if (!AddRec || AddRec->getLoop() != L) 968 return {nullptr, Unknown}; 969 970 return {AddRec, ExtKind}; 971 } 972 973 /// Is this instruction potentially interesting for further simplification after 974 /// widening it's type? In other words, can the extend be safely hoisted out of 975 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 976 /// so, return the extended recurrence and the kind of extension used. Otherwise 977 /// return {nullptr, Unknown}. 978 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 979 if (!SE->isSCEVable(DU.NarrowUse->getType())) 980 return {nullptr, Unknown}; 981 982 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 983 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 984 SE->getTypeSizeInBits(WideType)) { 985 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 986 // index. So don't follow this use. 987 return {nullptr, Unknown}; 988 } 989 990 const SCEV *WideExpr; 991 ExtendKind ExtKind; 992 if (DU.NeverNegative) { 993 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 994 if (isa<SCEVAddRecExpr>(WideExpr)) 995 ExtKind = SignExtended; 996 else { 997 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 998 ExtKind = ZeroExtended; 999 } 1000 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1001 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1002 ExtKind = SignExtended; 1003 } else { 1004 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1005 ExtKind = ZeroExtended; 1006 } 1007 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1008 if (!AddRec || AddRec->getLoop() != L) 1009 return {nullptr, Unknown}; 1010 return {AddRec, ExtKind}; 1011 } 1012 1013 /// This IV user cannot be widened. Replace this use of the original narrow IV 1014 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1015 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1016 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1017 if (!InsertPt) 1018 return; 1019 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1020 << *DU.NarrowUse << "\n"); 1021 IRBuilder<> Builder(InsertPt); 1022 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1023 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1024 } 1025 1026 /// If the narrow use is a compare instruction, then widen the compare 1027 // (and possibly the other operand). The extend operation is hoisted into the 1028 // loop preheader as far as possible. 1029 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1030 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1031 if (!Cmp) 1032 return false; 1033 1034 // We can legally widen the comparison in the following two cases: 1035 // 1036 // - The signedness of the IV extension and comparison match 1037 // 1038 // - The narrow IV is always positive (and thus its sign extension is equal 1039 // to its zero extension). For instance, let's say we're zero extending 1040 // %narrow for the following use 1041 // 1042 // icmp slt i32 %narrow, %val ... (A) 1043 // 1044 // and %narrow is always positive. Then 1045 // 1046 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1047 // == icmp slt i32 zext(%narrow), sext(%val) 1048 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1049 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1050 return false; 1051 1052 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1053 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1054 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1055 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1056 1057 // Widen the compare instruction. 1058 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1059 if (!InsertPt) 1060 return false; 1061 IRBuilder<> Builder(InsertPt); 1062 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1063 1064 // Widen the other operand of the compare, if necessary. 1065 if (CastWidth < IVWidth) { 1066 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1067 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1068 } 1069 return true; 1070 } 1071 1072 /// If the narrow use is an instruction whose two operands are the defining 1073 /// instruction of DU and a load instruction, then we have the following: 1074 /// if the load is hoisted outside the loop, then we do not reach this function 1075 /// as scalar evolution analysis works fine in widenIVUse with variables 1076 /// hoisted outside the loop and efficient code is subsequently generated by 1077 /// not emitting truncate instructions. But when the load is not hoisted 1078 /// (whether due to limitation in alias analysis or due to a true legality), 1079 /// then scalar evolution can not proceed with loop variant values and 1080 /// inefficient code is generated. This function handles the non-hoisted load 1081 /// special case by making the optimization generate the same type of code for 1082 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1083 /// instruction). This special case is important especially when the induction 1084 /// variables are affecting addressing mode in code generation. 1085 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1086 Instruction *NarrowUse = DU.NarrowUse; 1087 Instruction *NarrowDef = DU.NarrowDef; 1088 Instruction *WideDef = DU.WideDef; 1089 1090 // Handle the common case of add<nsw/nuw> 1091 const unsigned OpCode = NarrowUse->getOpcode(); 1092 // Only Add/Sub/Mul instructions are supported. 1093 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1094 OpCode != Instruction::Mul) 1095 return false; 1096 1097 // The operand that is not defined by NarrowDef of DU. Let's call it the 1098 // other operand. 1099 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1100 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1101 "bad DU"); 1102 1103 const SCEV *ExtendOperExpr = nullptr; 1104 const OverflowingBinaryOperator *OBO = 1105 cast<OverflowingBinaryOperator>(NarrowUse); 1106 ExtendKind ExtKind = getExtendKind(NarrowDef); 1107 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1108 ExtendOperExpr = SE->getSignExtendExpr( 1109 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1110 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1111 ExtendOperExpr = SE->getZeroExtendExpr( 1112 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1113 else 1114 return false; 1115 1116 // We are interested in the other operand being a load instruction. 1117 // But, we should look into relaxing this restriction later on. 1118 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1119 if (I && I->getOpcode() != Instruction::Load) 1120 return false; 1121 1122 // Verifying that Defining operand is an AddRec 1123 const SCEV *Op1 = SE->getSCEV(WideDef); 1124 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1125 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1126 return false; 1127 // Verifying that other operand is an Extend. 1128 if (ExtKind == SignExtended) { 1129 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1130 return false; 1131 } else { 1132 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1133 return false; 1134 } 1135 1136 if (ExtKind == SignExtended) { 1137 for (Use &U : NarrowUse->uses()) { 1138 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1139 if (!User || User->getType() != WideType) 1140 return false; 1141 } 1142 } else { // ExtKind == ZeroExtended 1143 for (Use &U : NarrowUse->uses()) { 1144 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1145 if (!User || User->getType() != WideType) 1146 return false; 1147 } 1148 } 1149 1150 return true; 1151 } 1152 1153 /// Special Case for widening with variant Loads (see 1154 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1155 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1156 Instruction *NarrowUse = DU.NarrowUse; 1157 Instruction *NarrowDef = DU.NarrowDef; 1158 Instruction *WideDef = DU.WideDef; 1159 1160 ExtendKind ExtKind = getExtendKind(NarrowDef); 1161 1162 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1163 1164 // Generating a widening use instruction. 1165 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1166 ? WideDef 1167 : createExtendInst(NarrowUse->getOperand(0), WideType, 1168 ExtKind, NarrowUse); 1169 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1170 ? WideDef 1171 : createExtendInst(NarrowUse->getOperand(1), WideType, 1172 ExtKind, NarrowUse); 1173 1174 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1175 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1176 NarrowBO->getName()); 1177 IRBuilder<> Builder(NarrowUse); 1178 Builder.Insert(WideBO); 1179 WideBO->copyIRFlags(NarrowBO); 1180 1181 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1182 1183 ExtendKindMap[NarrowUse] = ExtKind; 1184 1185 for (Use &U : NarrowUse->uses()) { 1186 Instruction *User = nullptr; 1187 if (ExtKind == SignExtended) 1188 User = dyn_cast<SExtInst>(U.getUser()); 1189 else 1190 User = dyn_cast<ZExtInst>(U.getUser()); 1191 if (User && User->getType() == WideType) { 1192 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1193 << *WideBO << "\n"); 1194 ++NumElimExt; 1195 User->replaceAllUsesWith(WideBO); 1196 DeadInsts.emplace_back(User); 1197 } 1198 } 1199 } 1200 1201 /// Determine whether an individual user of the narrow IV can be widened. If so, 1202 /// return the wide clone of the user. 1203 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1204 assert(ExtendKindMap.count(DU.NarrowDef) && 1205 "Should already know the kind of extension used to widen NarrowDef"); 1206 1207 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1208 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1209 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1210 // For LCSSA phis, sink the truncate outside the loop. 1211 // After SimplifyCFG most loop exit targets have a single predecessor. 1212 // Otherwise fall back to a truncate within the loop. 1213 if (UsePhi->getNumOperands() != 1) 1214 truncateIVUse(DU, DT, LI); 1215 else { 1216 // Widening the PHI requires us to insert a trunc. The logical place 1217 // for this trunc is in the same BB as the PHI. This is not possible if 1218 // the BB is terminated by a catchswitch. 1219 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1220 return nullptr; 1221 1222 PHINode *WidePhi = 1223 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1224 UsePhi); 1225 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1226 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1227 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1228 UsePhi->replaceAllUsesWith(Trunc); 1229 DeadInsts.emplace_back(UsePhi); 1230 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1231 << *WidePhi << "\n"); 1232 } 1233 return nullptr; 1234 } 1235 } 1236 1237 // This narrow use can be widened by a sext if it's non-negative or its narrow 1238 // def was widended by a sext. Same for zext. 1239 auto canWidenBySExt = [&]() { 1240 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1241 }; 1242 auto canWidenByZExt = [&]() { 1243 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1244 }; 1245 1246 // Our raison d'etre! Eliminate sign and zero extension. 1247 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1248 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1249 Value *NewDef = DU.WideDef; 1250 if (DU.NarrowUse->getType() != WideType) { 1251 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1252 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1253 if (CastWidth < IVWidth) { 1254 // The cast isn't as wide as the IV, so insert a Trunc. 1255 IRBuilder<> Builder(DU.NarrowUse); 1256 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1257 } 1258 else { 1259 // A wider extend was hidden behind a narrower one. This may induce 1260 // another round of IV widening in which the intermediate IV becomes 1261 // dead. It should be very rare. 1262 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1263 << " not wide enough to subsume " << *DU.NarrowUse 1264 << "\n"); 1265 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1266 NewDef = DU.NarrowUse; 1267 } 1268 } 1269 if (NewDef != DU.NarrowUse) { 1270 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1271 << " replaced by " << *DU.WideDef << "\n"); 1272 ++NumElimExt; 1273 DU.NarrowUse->replaceAllUsesWith(NewDef); 1274 DeadInsts.emplace_back(DU.NarrowUse); 1275 } 1276 // Now that the extend is gone, we want to expose it's uses for potential 1277 // further simplification. We don't need to directly inform SimplifyIVUsers 1278 // of the new users, because their parent IV will be processed later as a 1279 // new loop phi. If we preserved IVUsers analysis, we would also want to 1280 // push the uses of WideDef here. 1281 1282 // No further widening is needed. The deceased [sz]ext had done it for us. 1283 return nullptr; 1284 } 1285 1286 // Does this user itself evaluate to a recurrence after widening? 1287 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1288 if (!WideAddRec.first) 1289 WideAddRec = getWideRecurrence(DU); 1290 1291 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1292 if (!WideAddRec.first) { 1293 // If use is a loop condition, try to promote the condition instead of 1294 // truncating the IV first. 1295 if (widenLoopCompare(DU)) 1296 return nullptr; 1297 1298 // We are here about to generate a truncate instruction that may hurt 1299 // performance because the scalar evolution expression computed earlier 1300 // in WideAddRec.first does not indicate a polynomial induction expression. 1301 // In that case, look at the operands of the use instruction to determine 1302 // if we can still widen the use instead of truncating its operand. 1303 if (widenWithVariantLoadUse(DU)) { 1304 widenWithVariantLoadUseCodegen(DU); 1305 return nullptr; 1306 } 1307 1308 // This user does not evaluate to a recurrence after widening, so don't 1309 // follow it. Instead insert a Trunc to kill off the original use, 1310 // eventually isolating the original narrow IV so it can be removed. 1311 truncateIVUse(DU, DT, LI); 1312 return nullptr; 1313 } 1314 // Assume block terminators cannot evaluate to a recurrence. We can't to 1315 // insert a Trunc after a terminator if there happens to be a critical edge. 1316 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1317 "SCEV is not expected to evaluate a block terminator"); 1318 1319 // Reuse the IV increment that SCEVExpander created as long as it dominates 1320 // NarrowUse. 1321 Instruction *WideUse = nullptr; 1322 if (WideAddRec.first == WideIncExpr && 1323 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1324 WideUse = WideInc; 1325 else { 1326 WideUse = cloneIVUser(DU, WideAddRec.first); 1327 if (!WideUse) 1328 return nullptr; 1329 } 1330 // Evaluation of WideAddRec ensured that the narrow expression could be 1331 // extended outside the loop without overflow. This suggests that the wide use 1332 // evaluates to the same expression as the extended narrow use, but doesn't 1333 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1334 // where it fails, we simply throw away the newly created wide use. 1335 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1336 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1337 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1338 << "\n"); 1339 DeadInsts.emplace_back(WideUse); 1340 return nullptr; 1341 } 1342 1343 // if we reached this point then we are going to replace 1344 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1345 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1346 1347 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1348 // Returning WideUse pushes it on the worklist. 1349 return WideUse; 1350 } 1351 1352 /// Add eligible users of NarrowDef to NarrowIVUsers. 1353 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1354 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1355 bool NonNegativeDef = 1356 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1357 SE->getConstant(NarrowSCEV->getType(), 0)); 1358 for (User *U : NarrowDef->users()) { 1359 Instruction *NarrowUser = cast<Instruction>(U); 1360 1361 // Handle data flow merges and bizarre phi cycles. 1362 if (!Widened.insert(NarrowUser).second) 1363 continue; 1364 1365 bool NonNegativeUse = false; 1366 if (!NonNegativeDef) { 1367 // We might have a control-dependent range information for this context. 1368 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1369 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1370 } 1371 1372 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1373 NonNegativeDef || NonNegativeUse); 1374 } 1375 } 1376 1377 /// Process a single induction variable. First use the SCEVExpander to create a 1378 /// wide induction variable that evaluates to the same recurrence as the 1379 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1380 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1381 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1382 /// 1383 /// It would be simpler to delete uses as they are processed, but we must avoid 1384 /// invalidating SCEV expressions. 1385 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1386 // Is this phi an induction variable? 1387 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1388 if (!AddRec) 1389 return nullptr; 1390 1391 // Widen the induction variable expression. 1392 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1393 ? SE->getSignExtendExpr(AddRec, WideType) 1394 : SE->getZeroExtendExpr(AddRec, WideType); 1395 1396 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1397 "Expect the new IV expression to preserve its type"); 1398 1399 // Can the IV be extended outside the loop without overflow? 1400 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1401 if (!AddRec || AddRec->getLoop() != L) 1402 return nullptr; 1403 1404 // An AddRec must have loop-invariant operands. Since this AddRec is 1405 // materialized by a loop header phi, the expression cannot have any post-loop 1406 // operands, so they must dominate the loop header. 1407 assert( 1408 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1409 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1410 "Loop header phi recurrence inputs do not dominate the loop"); 1411 1412 // Iterate over IV uses (including transitive ones) looking for IV increments 1413 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1414 // the increment calculate control-dependent range information basing on 1415 // dominating conditions inside of the loop (e.g. a range check inside of the 1416 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1417 // 1418 // Control-dependent range information is later used to prove that a narrow 1419 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1420 // this on demand because when pushNarrowIVUsers needs this information some 1421 // of the dominating conditions might be already widened. 1422 if (UsePostIncrementRanges) 1423 calculatePostIncRanges(OrigPhi); 1424 1425 // The rewriter provides a value for the desired IV expression. This may 1426 // either find an existing phi or materialize a new one. Either way, we 1427 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1428 // of the phi-SCC dominates the loop entry. 1429 Instruction *InsertPt = &L->getHeader()->front(); 1430 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1431 1432 // Remembering the WideIV increment generated by SCEVExpander allows 1433 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1434 // employ a general reuse mechanism because the call above is the only call to 1435 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1436 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1437 WideInc = 1438 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1439 WideIncExpr = SE->getSCEV(WideInc); 1440 // Propagate the debug location associated with the original loop increment 1441 // to the new (widened) increment. 1442 auto *OrigInc = 1443 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1444 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1445 } 1446 1447 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1448 ++NumWidened; 1449 1450 // Traverse the def-use chain using a worklist starting at the original IV. 1451 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1452 1453 Widened.insert(OrigPhi); 1454 pushNarrowIVUsers(OrigPhi, WidePhi); 1455 1456 while (!NarrowIVUsers.empty()) { 1457 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1458 1459 // Process a def-use edge. This may replace the use, so don't hold a 1460 // use_iterator across it. 1461 Instruction *WideUse = widenIVUse(DU, Rewriter); 1462 1463 // Follow all def-use edges from the previous narrow use. 1464 if (WideUse) 1465 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1466 1467 // widenIVUse may have removed the def-use edge. 1468 if (DU.NarrowDef->use_empty()) 1469 DeadInsts.emplace_back(DU.NarrowDef); 1470 } 1471 1472 // Attach any debug information to the new PHI. 1473 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1474 1475 return WidePhi; 1476 } 1477 1478 /// Calculates control-dependent range for the given def at the given context 1479 /// by looking at dominating conditions inside of the loop 1480 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1481 Instruction *NarrowUser) { 1482 using namespace llvm::PatternMatch; 1483 1484 Value *NarrowDefLHS; 1485 const APInt *NarrowDefRHS; 1486 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1487 m_APInt(NarrowDefRHS))) || 1488 !NarrowDefRHS->isNonNegative()) 1489 return; 1490 1491 auto UpdateRangeFromCondition = [&] (Value *Condition, 1492 bool TrueDest) { 1493 CmpInst::Predicate Pred; 1494 Value *CmpRHS; 1495 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1496 m_Value(CmpRHS)))) 1497 return; 1498 1499 CmpInst::Predicate P = 1500 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1501 1502 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1503 auto CmpConstrainedLHSRange = 1504 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1505 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1506 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1507 1508 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1509 }; 1510 1511 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1512 if (!HasGuards) 1513 return; 1514 1515 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1516 Ctx->getParent()->rend())) { 1517 Value *C = nullptr; 1518 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1519 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1520 } 1521 }; 1522 1523 UpdateRangeFromGuards(NarrowUser); 1524 1525 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1526 // If NarrowUserBB is statically unreachable asking dominator queries may 1527 // yield surprising results. (e.g. the block may not have a dom tree node) 1528 if (!DT->isReachableFromEntry(NarrowUserBB)) 1529 return; 1530 1531 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1532 L->contains(DTB->getBlock()); 1533 DTB = DTB->getIDom()) { 1534 auto *BB = DTB->getBlock(); 1535 auto *TI = BB->getTerminator(); 1536 UpdateRangeFromGuards(TI); 1537 1538 auto *BI = dyn_cast<BranchInst>(TI); 1539 if (!BI || !BI->isConditional()) 1540 continue; 1541 1542 auto *TrueSuccessor = BI->getSuccessor(0); 1543 auto *FalseSuccessor = BI->getSuccessor(1); 1544 1545 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1546 return BBE.isSingleEdge() && 1547 DT->dominates(BBE, NarrowUser->getParent()); 1548 }; 1549 1550 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1551 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1552 1553 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1554 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1555 } 1556 } 1557 1558 /// Calculates PostIncRangeInfos map for the given IV 1559 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1560 SmallPtrSet<Instruction *, 16> Visited; 1561 SmallVector<Instruction *, 6> Worklist; 1562 Worklist.push_back(OrigPhi); 1563 Visited.insert(OrigPhi); 1564 1565 while (!Worklist.empty()) { 1566 Instruction *NarrowDef = Worklist.pop_back_val(); 1567 1568 for (Use &U : NarrowDef->uses()) { 1569 auto *NarrowUser = cast<Instruction>(U.getUser()); 1570 1571 // Don't go looking outside the current loop. 1572 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1573 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1574 continue; 1575 1576 if (!Visited.insert(NarrowUser).second) 1577 continue; 1578 1579 Worklist.push_back(NarrowUser); 1580 1581 calculatePostIncRange(NarrowDef, NarrowUser); 1582 } 1583 } 1584 } 1585 1586 //===----------------------------------------------------------------------===// 1587 // Live IV Reduction - Minimize IVs live across the loop. 1588 //===----------------------------------------------------------------------===// 1589 1590 //===----------------------------------------------------------------------===// 1591 // Simplification of IV users based on SCEV evaluation. 1592 //===----------------------------------------------------------------------===// 1593 1594 namespace { 1595 1596 class IndVarSimplifyVisitor : public IVVisitor { 1597 ScalarEvolution *SE; 1598 const TargetTransformInfo *TTI; 1599 PHINode *IVPhi; 1600 1601 public: 1602 WideIVInfo WI; 1603 1604 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1605 const TargetTransformInfo *TTI, 1606 const DominatorTree *DTree) 1607 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1608 DT = DTree; 1609 WI.NarrowIV = IVPhi; 1610 } 1611 1612 // Implement the interface used by simplifyUsersOfIV. 1613 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1614 }; 1615 1616 } // end anonymous namespace 1617 1618 /// Iteratively perform simplification on a worklist of IV users. Each 1619 /// successive simplification may push more users which may themselves be 1620 /// candidates for simplification. 1621 /// 1622 /// Sign/Zero extend elimination is interleaved with IV simplification. 1623 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1624 SCEVExpander &Rewriter, 1625 LoopInfo *LI) { 1626 SmallVector<WideIVInfo, 8> WideIVs; 1627 1628 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1629 Intrinsic::getName(Intrinsic::experimental_guard)); 1630 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1631 1632 SmallVector<PHINode*, 8> LoopPhis; 1633 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1634 LoopPhis.push_back(cast<PHINode>(I)); 1635 } 1636 // Each round of simplification iterates through the SimplifyIVUsers worklist 1637 // for all current phis, then determines whether any IVs can be 1638 // widened. Widening adds new phis to LoopPhis, inducing another round of 1639 // simplification on the wide IVs. 1640 bool Changed = false; 1641 while (!LoopPhis.empty()) { 1642 // Evaluate as many IV expressions as possible before widening any IVs. This 1643 // forces SCEV to set no-wrap flags before evaluating sign/zero 1644 // extension. The first time SCEV attempts to normalize sign/zero extension, 1645 // the result becomes final. So for the most predictable results, we delay 1646 // evaluation of sign/zero extend evaluation until needed, and avoid running 1647 // other SCEV based analysis prior to simplifyAndExtend. 1648 do { 1649 PHINode *CurrIV = LoopPhis.pop_back_val(); 1650 1651 // Information about sign/zero extensions of CurrIV. 1652 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1653 1654 Changed |= 1655 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1656 1657 if (Visitor.WI.WidestNativeType) { 1658 WideIVs.push_back(Visitor.WI); 1659 } 1660 } while(!LoopPhis.empty()); 1661 1662 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1663 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1664 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1665 Changed = true; 1666 LoopPhis.push_back(WidePhi); 1667 } 1668 } 1669 } 1670 return Changed; 1671 } 1672 1673 //===----------------------------------------------------------------------===// 1674 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1675 //===----------------------------------------------------------------------===// 1676 1677 /// Given an Value which is hoped to be part of an add recurance in the given 1678 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1679 /// that this is less general than SCEVs AddRec checking. 1680 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1681 Instruction *IncI = dyn_cast<Instruction>(IncV); 1682 if (!IncI) 1683 return nullptr; 1684 1685 switch (IncI->getOpcode()) { 1686 case Instruction::Add: 1687 case Instruction::Sub: 1688 break; 1689 case Instruction::GetElementPtr: 1690 // An IV counter must preserve its type. 1691 if (IncI->getNumOperands() == 2) 1692 break; 1693 LLVM_FALLTHROUGH; 1694 default: 1695 return nullptr; 1696 } 1697 1698 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 1699 if (Phi && Phi->getParent() == L->getHeader()) { 1700 if (L->isLoopInvariant(IncI->getOperand(1))) 1701 return Phi; 1702 return nullptr; 1703 } 1704 if (IncI->getOpcode() == Instruction::GetElementPtr) 1705 return nullptr; 1706 1707 // Allow add/sub to be commuted. 1708 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 1709 if (Phi && Phi->getParent() == L->getHeader()) { 1710 if (L->isLoopInvariant(IncI->getOperand(0))) 1711 return Phi; 1712 } 1713 return nullptr; 1714 } 1715 1716 /// Whether the current loop exit test is based on this value. Currently this 1717 /// is limited to a direct use in the loop condition. 1718 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 1719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1720 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1721 // TODO: Allow non-icmp loop test. 1722 if (!ICmp) 1723 return false; 1724 1725 // TODO: Allow indirect use. 1726 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 1727 } 1728 1729 /// linearFunctionTestReplace policy. Return true unless we can show that the 1730 /// current exit test is already sufficiently canonical. 1731 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 1732 assert(L->getLoopLatch() && "Must be in simplified form"); 1733 1734 // Avoid converting a constant or loop invariant test back to a runtime 1735 // test. This is critical for when SCEV's cached ExitCount is less precise 1736 // than the current IR (such as after we've proven a particular exit is 1737 // actually dead and thus the BE count never reaches our ExitCount.) 1738 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1739 if (L->isLoopInvariant(BI->getCondition())) 1740 return false; 1741 1742 // Do LFTR to simplify the exit condition to an ICMP. 1743 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 1744 if (!Cond) 1745 return true; 1746 1747 // Do LFTR to simplify the exit ICMP to EQ/NE 1748 ICmpInst::Predicate Pred = Cond->getPredicate(); 1749 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 1750 return true; 1751 1752 // Look for a loop invariant RHS 1753 Value *LHS = Cond->getOperand(0); 1754 Value *RHS = Cond->getOperand(1); 1755 if (!L->isLoopInvariant(RHS)) { 1756 if (!L->isLoopInvariant(LHS)) 1757 return true; 1758 std::swap(LHS, RHS); 1759 } 1760 // Look for a simple IV counter LHS 1761 PHINode *Phi = dyn_cast<PHINode>(LHS); 1762 if (!Phi) 1763 Phi = getLoopPhiForCounter(LHS, L); 1764 1765 if (!Phi) 1766 return true; 1767 1768 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 1769 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1770 if (Idx < 0) 1771 return true; 1772 1773 // Do LFTR if the exit condition's IV is *not* a simple counter. 1774 Value *IncV = Phi->getIncomingValue(Idx); 1775 return Phi != getLoopPhiForCounter(IncV, L); 1776 } 1777 1778 /// Return true if undefined behavior would provable be executed on the path to 1779 /// OnPathTo if Root produced a posion result. Note that this doesn't say 1780 /// anything about whether OnPathTo is actually executed or whether Root is 1781 /// actually poison. This can be used to assess whether a new use of Root can 1782 /// be added at a location which is control equivalent with OnPathTo (such as 1783 /// immediately before it) without introducing UB which didn't previously 1784 /// exist. Note that a false result conveys no information. 1785 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 1786 Instruction *OnPathTo, 1787 DominatorTree *DT) { 1788 // Basic approach is to assume Root is poison, propagate poison forward 1789 // through all users we can easily track, and then check whether any of those 1790 // users are provable UB and must execute before out exiting block might 1791 // exit. 1792 1793 // The set of all recursive users we've visited (which are assumed to all be 1794 // poison because of said visit) 1795 SmallSet<const Value *, 16> KnownPoison; 1796 SmallVector<const Instruction*, 16> Worklist; 1797 Worklist.push_back(Root); 1798 while (!Worklist.empty()) { 1799 const Instruction *I = Worklist.pop_back_val(); 1800 1801 // If we know this must trigger UB on a path leading our target. 1802 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 1803 return true; 1804 1805 // If we can't analyze propagation through this instruction, just skip it 1806 // and transitive users. Safe as false is a conservative result. 1807 if (!propagatesFullPoison(I) && I != Root) 1808 continue; 1809 1810 if (KnownPoison.insert(I).second) 1811 for (const User *User : I->users()) 1812 Worklist.push_back(cast<Instruction>(User)); 1813 } 1814 1815 // Might be non-UB, or might have a path we couldn't prove must execute on 1816 // way to exiting bb. 1817 return false; 1818 } 1819 1820 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 1821 /// down to checking that all operands are constant and listing instructions 1822 /// that may hide undef. 1823 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 1824 unsigned Depth) { 1825 if (isa<Constant>(V)) 1826 return !isa<UndefValue>(V); 1827 1828 if (Depth >= 6) 1829 return false; 1830 1831 // Conservatively handle non-constant non-instructions. For example, Arguments 1832 // may be undef. 1833 Instruction *I = dyn_cast<Instruction>(V); 1834 if (!I) 1835 return false; 1836 1837 // Load and return values may be undef. 1838 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 1839 return false; 1840 1841 // Optimistically handle other instructions. 1842 for (Value *Op : I->operands()) { 1843 if (!Visited.insert(Op).second) 1844 continue; 1845 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 1846 return false; 1847 } 1848 return true; 1849 } 1850 1851 /// Return true if the given value is concrete. We must prove that undef can 1852 /// never reach it. 1853 /// 1854 /// TODO: If we decide that this is a good approach to checking for undef, we 1855 /// may factor it into a common location. 1856 static bool hasConcreteDef(Value *V) { 1857 SmallPtrSet<Value*, 8> Visited; 1858 Visited.insert(V); 1859 return hasConcreteDefImpl(V, Visited, 0); 1860 } 1861 1862 /// Return true if this IV has any uses other than the (soon to be rewritten) 1863 /// loop exit test. 1864 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 1865 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 1866 Value *IncV = Phi->getIncomingValue(LatchIdx); 1867 1868 for (User *U : Phi->users()) 1869 if (U != Cond && U != IncV) return false; 1870 1871 for (User *U : IncV->users()) 1872 if (U != Cond && U != Phi) return false; 1873 return true; 1874 } 1875 1876 /// Return true if the given phi is a "counter" in L. A counter is an 1877 /// add recurance (of integer or pointer type) with an arbitrary start, and a 1878 /// step of 1. Note that L must have exactly one latch. 1879 static bool isLoopCounter(PHINode* Phi, Loop *L, 1880 ScalarEvolution *SE) { 1881 assert(Phi->getParent() == L->getHeader()); 1882 assert(L->getLoopLatch()); 1883 1884 if (!SE->isSCEVable(Phi->getType())) 1885 return false; 1886 1887 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1888 if (!AR || AR->getLoop() != L || !AR->isAffine()) 1889 return false; 1890 1891 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 1892 if (!Step || !Step->isOne()) 1893 return false; 1894 1895 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 1896 Value *IncV = Phi->getIncomingValue(LatchIdx); 1897 return (getLoopPhiForCounter(IncV, L) == Phi); 1898 } 1899 1900 /// Search the loop header for a loop counter (anadd rec w/step of one) 1901 /// suitable for use by LFTR. If multiple counters are available, select the 1902 /// "best" one based profitable heuristics. 1903 /// 1904 /// BECount may be an i8* pointer type. The pointer difference is already 1905 /// valid count without scaling the address stride, so it remains a pointer 1906 /// expression as far as SCEV is concerned. 1907 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 1908 const SCEV *BECount, 1909 ScalarEvolution *SE, DominatorTree *DT) { 1910 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 1911 1912 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 1913 1914 // Loop over all of the PHI nodes, looking for a simple counter. 1915 PHINode *BestPhi = nullptr; 1916 const SCEV *BestInit = nullptr; 1917 BasicBlock *LatchBlock = L->getLoopLatch(); 1918 assert(LatchBlock && "Must be in simplified form"); 1919 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1920 1921 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1922 PHINode *Phi = cast<PHINode>(I); 1923 if (!isLoopCounter(Phi, L, SE)) 1924 continue; 1925 1926 // Avoid comparing an integer IV against a pointer Limit. 1927 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 1928 continue; 1929 1930 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 1931 1932 // AR may be a pointer type, while BECount is an integer type. 1933 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 1934 // AR may not be a narrower type, or we may never exit. 1935 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 1936 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 1937 continue; 1938 1939 // Avoid reusing a potentially undef value to compute other values that may 1940 // have originally had a concrete definition. 1941 if (!hasConcreteDef(Phi)) { 1942 // We explicitly allow unknown phis as long as they are already used by 1943 // the loop exit test. This is legal since performing LFTR could not 1944 // increase the number of undef users. 1945 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 1946 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 1947 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 1948 continue; 1949 } 1950 1951 // Avoid introducing undefined behavior due to poison which didn't exist in 1952 // the original program. (Annoyingly, the rules for poison and undef 1953 // propagation are distinct, so this does NOT cover the undef case above.) 1954 // We have to ensure that we don't introduce UB by introducing a use on an 1955 // iteration where said IV produces poison. Our strategy here differs for 1956 // pointers and integer IVs. For integers, we strip and reinfer as needed, 1957 // see code in linearFunctionTestReplace. For pointers, we restrict 1958 // transforms as there is no good way to reinfer inbounds once lost. 1959 if (!Phi->getType()->isIntegerTy() && 1960 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 1961 continue; 1962 1963 const SCEV *Init = AR->getStart(); 1964 1965 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 1966 // Don't force a live loop counter if another IV can be used. 1967 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 1968 continue; 1969 1970 // Prefer to count-from-zero. This is a more "canonical" counter form. It 1971 // also prefers integer to pointer IVs. 1972 if (BestInit->isZero() != Init->isZero()) { 1973 if (BestInit->isZero()) 1974 continue; 1975 } 1976 // If two IVs both count from zero or both count from nonzero then the 1977 // narrower is likely a dead phi that has been widened. Use the wider phi 1978 // to allow the other to be eliminated. 1979 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 1980 continue; 1981 } 1982 BestPhi = Phi; 1983 BestInit = Init; 1984 } 1985 return BestPhi; 1986 } 1987 1988 /// Insert an IR expression which computes the value held by the IV IndVar 1989 /// (which must be an loop counter w/unit stride) after the backedge of loop L 1990 /// is taken ExitCount times. 1991 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 1992 const SCEV *ExitCount, bool UsePostInc, Loop *L, 1993 SCEVExpander &Rewriter, ScalarEvolution *SE) { 1994 assert(isLoopCounter(IndVar, L, SE)); 1995 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 1996 const SCEV *IVInit = AR->getStart(); 1997 1998 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 1999 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2000 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2001 // the existing GEPs whenever possible. 2002 if (IndVar->getType()->isPointerTy() && 2003 !ExitCount->getType()->isPointerTy()) { 2004 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2005 // signed value. ExitCount on the other hand represents the loop trip count, 2006 // which is an unsigned value. FindLoopCounter only allows induction 2007 // variables that have a positive unit stride of one. This means we don't 2008 // have to handle the case of negative offsets (yet) and just need to zero 2009 // extend ExitCount. 2010 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2011 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2012 if (UsePostInc) 2013 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2014 2015 // Expand the code for the iteration count. 2016 assert(SE->isLoopInvariant(IVOffset, L) && 2017 "Computed iteration count is not loop invariant!"); 2018 2019 // We could handle pointer IVs other than i8*, but we need to compensate for 2020 // gep index scaling. 2021 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2022 cast<PointerType>(IndVar->getType()) 2023 ->getElementType())->isOne() && 2024 "unit stride pointer IV must be i8*"); 2025 2026 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2027 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2028 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2029 } else { 2030 // In any other case, convert both IVInit and ExitCount to integers before 2031 // comparing. This may result in SCEV expansion of pointers, but in practice 2032 // SCEV will fold the pointer arithmetic away as such: 2033 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2034 // 2035 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2036 // for simple memset-style loops. 2037 // 2038 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2039 // were generated on top of case #2, which is not expected. 2040 2041 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2042 // For unit stride, IVCount = Start + ExitCount with 2's complement 2043 // overflow. 2044 2045 // For integer IVs, truncate the IV before computing IVInit + BECount, 2046 // unless we know apriori that the limit must be a constant when evaluated 2047 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2048 // of the IV in the loop over a (potentially) expensive expansion of the 2049 // widened exit count add(zext(add)) expression. 2050 if (SE->getTypeSizeInBits(IVInit->getType()) 2051 > SE->getTypeSizeInBits(ExitCount->getType())) { 2052 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2053 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2054 else 2055 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2056 } 2057 2058 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2059 2060 if (UsePostInc) 2061 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2062 2063 // Expand the code for the iteration count. 2064 assert(SE->isLoopInvariant(IVLimit, L) && 2065 "Computed iteration count is not loop invariant!"); 2066 // Ensure that we generate the same type as IndVar, or a smaller integer 2067 // type. In the presence of null pointer values, we have an integer type 2068 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2069 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2070 IndVar->getType() : ExitCount->getType(); 2071 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2072 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2073 } 2074 } 2075 2076 /// This method rewrites the exit condition of the loop to be a canonical != 2077 /// comparison against the incremented loop induction variable. This pass is 2078 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2079 /// determine a loop-invariant trip count of the loop, which is actually a much 2080 /// broader range than just linear tests. 2081 bool IndVarSimplify:: 2082 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2083 const SCEV *ExitCount, 2084 PHINode *IndVar, SCEVExpander &Rewriter) { 2085 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2086 assert(isLoopCounter(IndVar, L, SE)); 2087 Instruction * const IncVar = 2088 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2089 2090 // Initialize CmpIndVar to the preincremented IV. 2091 Value *CmpIndVar = IndVar; 2092 bool UsePostInc = false; 2093 2094 // If the exiting block is the same as the backedge block, we prefer to 2095 // compare against the post-incremented value, otherwise we must compare 2096 // against the preincremented value. 2097 if (ExitingBB == L->getLoopLatch()) { 2098 // For pointer IVs, we chose to not strip inbounds which requires us not 2099 // to add a potentially UB introducing use. We need to either a) show 2100 // the loop test we're modifying is already in post-inc form, or b) show 2101 // that adding a use must not introduce UB. 2102 bool SafeToPostInc = 2103 IndVar->getType()->isIntegerTy() || 2104 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2105 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2106 if (SafeToPostInc) { 2107 UsePostInc = true; 2108 CmpIndVar = IncVar; 2109 } 2110 } 2111 2112 // It may be necessary to drop nowrap flags on the incrementing instruction 2113 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2114 // case the increment might have previously been poison on the last iteration 2115 // only) or if LFTR switches to a different IV that was previously dynamically 2116 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2117 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2118 // check), because the pre-inc addrec flags may be adopted from the original 2119 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2120 // TODO: This handling is inaccurate for one case: If we switch to a 2121 // dynamically dead IV that wraps on the first loop iteration only, which is 2122 // not covered by the post-inc addrec. (If the new IV was not dynamically 2123 // dead, it could not be poison on the first iteration in the first place.) 2124 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2125 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2126 if (BO->hasNoUnsignedWrap()) 2127 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2128 if (BO->hasNoSignedWrap()) 2129 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2130 } 2131 2132 Value *ExitCnt = genLoopLimit( 2133 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2134 assert(ExitCnt->getType()->isPointerTy() == 2135 IndVar->getType()->isPointerTy() && 2136 "genLoopLimit missed a cast"); 2137 2138 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2139 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2140 ICmpInst::Predicate P; 2141 if (L->contains(BI->getSuccessor(0))) 2142 P = ICmpInst::ICMP_NE; 2143 else 2144 P = ICmpInst::ICMP_EQ; 2145 2146 IRBuilder<> Builder(BI); 2147 2148 // The new loop exit condition should reuse the debug location of the 2149 // original loop exit condition. 2150 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2151 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2152 2153 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2154 // avoid the expensive expansion of the limit expression in the wider type, 2155 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2156 // since we know (from the exit count bitwidth), that we can't self-wrap in 2157 // the narrower type. 2158 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2159 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2160 if (CmpIndVarSize > ExitCntSize) { 2161 assert(!CmpIndVar->getType()->isPointerTy() && 2162 !ExitCnt->getType()->isPointerTy()); 2163 2164 // Before resorting to actually inserting the truncate, use the same 2165 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2166 // the other side of the comparison instead. We still evaluate the limit 2167 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2168 // a truncate within in. 2169 bool Extended = false; 2170 const SCEV *IV = SE->getSCEV(CmpIndVar); 2171 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2172 ExitCnt->getType()); 2173 const SCEV *ZExtTrunc = 2174 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2175 2176 if (ZExtTrunc == IV) { 2177 Extended = true; 2178 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2179 "wide.trip.count"); 2180 } else { 2181 const SCEV *SExtTrunc = 2182 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2183 if (SExtTrunc == IV) { 2184 Extended = true; 2185 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2186 "wide.trip.count"); 2187 } 2188 } 2189 2190 if (Extended) { 2191 bool Discard; 2192 L->makeLoopInvariant(ExitCnt, Discard); 2193 } else 2194 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2195 "lftr.wideiv"); 2196 } 2197 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2198 << " LHS:" << *CmpIndVar << '\n' 2199 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2200 << "\n" 2201 << " RHS:\t" << *ExitCnt << "\n" 2202 << "ExitCount:\t" << *ExitCount << "\n" 2203 << " was: " << *BI->getCondition() << "\n"); 2204 2205 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2206 Value *OrigCond = BI->getCondition(); 2207 // It's tempting to use replaceAllUsesWith here to fully replace the old 2208 // comparison, but that's not immediately safe, since users of the old 2209 // comparison may not be dominated by the new comparison. Instead, just 2210 // update the branch to use the new comparison; in the common case this 2211 // will make old comparison dead. 2212 BI->setCondition(Cond); 2213 DeadInsts.push_back(OrigCond); 2214 2215 ++NumLFTR; 2216 return true; 2217 } 2218 2219 //===----------------------------------------------------------------------===// 2220 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2221 //===----------------------------------------------------------------------===// 2222 2223 /// If there's a single exit block, sink any loop-invariant values that 2224 /// were defined in the preheader but not used inside the loop into the 2225 /// exit block to reduce register pressure in the loop. 2226 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2227 BasicBlock *ExitBlock = L->getExitBlock(); 2228 if (!ExitBlock) return false; 2229 2230 BasicBlock *Preheader = L->getLoopPreheader(); 2231 if (!Preheader) return false; 2232 2233 bool MadeAnyChanges = false; 2234 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2235 BasicBlock::iterator I(Preheader->getTerminator()); 2236 while (I != Preheader->begin()) { 2237 --I; 2238 // New instructions were inserted at the end of the preheader. 2239 if (isa<PHINode>(I)) 2240 break; 2241 2242 // Don't move instructions which might have side effects, since the side 2243 // effects need to complete before instructions inside the loop. Also don't 2244 // move instructions which might read memory, since the loop may modify 2245 // memory. Note that it's okay if the instruction might have undefined 2246 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2247 // block. 2248 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2249 continue; 2250 2251 // Skip debug info intrinsics. 2252 if (isa<DbgInfoIntrinsic>(I)) 2253 continue; 2254 2255 // Skip eh pad instructions. 2256 if (I->isEHPad()) 2257 continue; 2258 2259 // Don't sink alloca: we never want to sink static alloca's out of the 2260 // entry block, and correctly sinking dynamic alloca's requires 2261 // checks for stacksave/stackrestore intrinsics. 2262 // FIXME: Refactor this check somehow? 2263 if (isa<AllocaInst>(I)) 2264 continue; 2265 2266 // Determine if there is a use in or before the loop (direct or 2267 // otherwise). 2268 bool UsedInLoop = false; 2269 for (Use &U : I->uses()) { 2270 Instruction *User = cast<Instruction>(U.getUser()); 2271 BasicBlock *UseBB = User->getParent(); 2272 if (PHINode *P = dyn_cast<PHINode>(User)) { 2273 unsigned i = 2274 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2275 UseBB = P->getIncomingBlock(i); 2276 } 2277 if (UseBB == Preheader || L->contains(UseBB)) { 2278 UsedInLoop = true; 2279 break; 2280 } 2281 } 2282 2283 // If there is, the def must remain in the preheader. 2284 if (UsedInLoop) 2285 continue; 2286 2287 // Otherwise, sink it to the exit block. 2288 Instruction *ToMove = &*I; 2289 bool Done = false; 2290 2291 if (I != Preheader->begin()) { 2292 // Skip debug info intrinsics. 2293 do { 2294 --I; 2295 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2296 2297 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2298 Done = true; 2299 } else { 2300 Done = true; 2301 } 2302 2303 MadeAnyChanges = true; 2304 ToMove->moveBefore(*ExitBlock, InsertPt); 2305 if (Done) break; 2306 InsertPt = ToMove->getIterator(); 2307 } 2308 2309 return MadeAnyChanges; 2310 } 2311 2312 /// Return a symbolic upper bound for the backedge taken count of the loop. 2313 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2314 /// an arbitrary expression as opposed to only constants. 2315 /// TODO: Move into the ScalarEvolution class. 2316 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2317 DominatorTree &DT, Loop *L) { 2318 SmallVector<BasicBlock*, 16> ExitingBlocks; 2319 L->getExitingBlocks(ExitingBlocks); 2320 2321 // Form an expression for the maximum exit count possible for this loop. We 2322 // merge the max and exact information to approximate a version of 2323 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2324 SmallVector<const SCEV*, 4> ExitCounts; 2325 for (BasicBlock *ExitingBB : ExitingBlocks) { 2326 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2327 if (isa<SCEVCouldNotCompute>(ExitCount)) 2328 ExitCount = SE.getExitCount(L, ExitingBB, 2329 ScalarEvolution::ConstantMaximum); 2330 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2331 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2332 "We should only have known counts for exiting blocks that " 2333 "dominate latch!"); 2334 ExitCounts.push_back(ExitCount); 2335 } 2336 } 2337 if (ExitCounts.empty()) 2338 return SE.getCouldNotCompute(); 2339 return SE.getUMinFromMismatchedTypes(ExitCounts); 2340 } 2341 2342 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2343 SmallVector<BasicBlock*, 16> ExitingBlocks; 2344 L->getExitingBlocks(ExitingBlocks); 2345 2346 // Remove all exits which aren't both rewriteable and analyzeable. 2347 auto NewEnd = llvm::remove_if(ExitingBlocks, 2348 [&](BasicBlock *ExitingBB) { 2349 // If our exitting block exits multiple loops, we can only rewrite the 2350 // innermost one. Otherwise, we're changing how many times the innermost 2351 // loop runs before it exits. 2352 if (LI->getLoopFor(ExitingBB) != L) 2353 return true; 2354 2355 // Can't rewrite non-branch yet. 2356 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2357 if (!BI) 2358 return true; 2359 2360 // If already constant, nothing to do. 2361 if (isa<Constant>(BI->getCondition())) 2362 return true; 2363 2364 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2365 if (isa<SCEVCouldNotCompute>(ExitCount)) 2366 return true; 2367 return false; 2368 }); 2369 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2370 2371 if (ExitingBlocks.empty()) 2372 return false; 2373 2374 // Get a symbolic upper bound on the loop backedge taken count. 2375 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2376 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2377 return false; 2378 2379 // Visit our exit blocks in order of dominance. We know from the fact that 2380 // all exits (left) are analyzeable that the must be a total dominance order 2381 // between them as each must dominate the latch. The visit order only 2382 // matters for the provably equal case. 2383 llvm::sort(ExitingBlocks, 2384 [&](BasicBlock *A, BasicBlock *B) { 2385 // std::sort sorts in ascending order, so we want the inverse of 2386 // the normal dominance relation. 2387 if (DT->properlyDominates(A, B)) return true; 2388 if (DT->properlyDominates(B, A)) return false; 2389 llvm_unreachable("expected total dominance order!"); 2390 }); 2391 #ifdef ASSERT 2392 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2393 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2394 } 2395 #endif 2396 2397 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2398 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2399 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2400 auto *OldCond = BI->getCondition(); 2401 auto *NewCond = ConstantInt::get(OldCond->getType(), 2402 IsTaken ? ExitIfTrue : !ExitIfTrue); 2403 BI->setCondition(NewCond); 2404 if (OldCond->use_empty()) 2405 DeadInsts.push_back(OldCond); 2406 }; 2407 2408 bool Changed = false; 2409 SmallSet<const SCEV*, 8> DominatingExitCounts; 2410 for (BasicBlock *ExitingBB : ExitingBlocks) { 2411 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2412 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2413 2414 // If we know we'd exit on the first iteration, rewrite the exit to 2415 // reflect this. This does not imply the loop must exit through this 2416 // exit; there may be an earlier one taken on the first iteration. 2417 // TODO: Given we know the backedge can't be taken, we should go ahead 2418 // and break it. Or at least, kill all the header phis and simplify. 2419 if (ExitCount->isZero()) { 2420 FoldExit(ExitingBB, true); 2421 Changed = true; 2422 continue; 2423 } 2424 2425 // If we end up with a pointer exit count, bail. Note that we can end up 2426 // with a pointer exit count for one exiting block, and not for another in 2427 // the same loop. 2428 if (!ExitCount->getType()->isIntegerTy() || 2429 !MaxExitCount->getType()->isIntegerTy()) 2430 continue; 2431 2432 Type *WiderType = 2433 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2434 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2435 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2436 assert(MaxExitCount->getType() == ExitCount->getType()); 2437 2438 // Can we prove that some other exit must be taken strictly before this 2439 // one? 2440 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2441 MaxExitCount, ExitCount)) { 2442 FoldExit(ExitingBB, false); 2443 Changed = true; 2444 continue; 2445 } 2446 2447 // As we run, keep track of which exit counts we've encountered. If we 2448 // find a duplicate, we've found an exit which would have exited on the 2449 // exiting iteration, but (from the visit order) strictly follows another 2450 // which does the same and is thus dead. 2451 if (!DominatingExitCounts.insert(ExitCount).second) { 2452 FoldExit(ExitingBB, false); 2453 Changed = true; 2454 continue; 2455 } 2456 2457 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2458 // here. If we kept track of the min of dominanting exits so far, we could 2459 // discharge exits with EC >= MDEC. This is less powerful than the existing 2460 // transform (since later exits aren't considered), but potentially more 2461 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2462 // or a >u b. Such a case is not currently known. 2463 } 2464 return Changed; 2465 } 2466 2467 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2468 SmallVector<BasicBlock*, 16> ExitingBlocks; 2469 L->getExitingBlocks(ExitingBlocks); 2470 2471 bool Changed = false; 2472 2473 // Finally, see if we can rewrite our exit conditions into a loop invariant 2474 // form. If we have a read-only loop, and we can tell that we must exit down 2475 // a path which does not need any of the values computed within the loop, we 2476 // can rewrite the loop to exit on the first iteration. Note that this 2477 // doesn't either a) tell us the loop exits on the first iteration (unless 2478 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2479 // This transformation looks a lot like a restricted form of dead loop 2480 // elimination, but restricted to read-only loops and without neccesssarily 2481 // needing to kill the loop entirely. 2482 if (!LoopPredication) 2483 return Changed; 2484 2485 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2486 return Changed; 2487 2488 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2489 // through *explicit* control flow. We have to eliminate the possibility of 2490 // implicit exits (see below) before we know it's truly exact. 2491 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2492 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2493 !SE->isLoopInvariant(ExactBTC, L) || 2494 !isSafeToExpand(ExactBTC, *SE)) 2495 return Changed; 2496 2497 // If we end up with a pointer exit count, bail. It may be unsized. 2498 if (!ExactBTC->getType()->isIntegerTy()) 2499 return Changed; 2500 2501 auto BadExit = [&](BasicBlock *ExitingBB) { 2502 // If our exiting block exits multiple loops, we can only rewrite the 2503 // innermost one. Otherwise, we're changing how many times the innermost 2504 // loop runs before it exits. 2505 if (LI->getLoopFor(ExitingBB) != L) 2506 return true; 2507 2508 // Can't rewrite non-branch yet. 2509 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2510 if (!BI) 2511 return true; 2512 2513 // If already constant, nothing to do. 2514 if (isa<Constant>(BI->getCondition())) 2515 return true; 2516 2517 // If the exit block has phis, we need to be able to compute the values 2518 // within the loop which contains them. This assumes trivially lcssa phis 2519 // have already been removed; TODO: generalize 2520 BasicBlock *ExitBlock = 2521 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2522 if (!ExitBlock->phis().empty()) 2523 return true; 2524 2525 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2526 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2527 if (!SE->isLoopInvariant(ExitCount, L) || 2528 !isSafeToExpand(ExitCount, *SE)) 2529 return true; 2530 2531 // If we end up with a pointer exit count, bail. It may be unsized. 2532 if (!ExitCount->getType()->isIntegerTy()) 2533 return true; 2534 2535 return false; 2536 }; 2537 2538 // If we have any exits which can't be predicated themselves, than we can't 2539 // predicate any exit which isn't guaranteed to execute before it. Consider 2540 // two exits (a) and (b) which would both exit on the same iteration. If we 2541 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2542 // we could convert a loop from exiting through (a) to one exiting through 2543 // (b). Note that this problem exists only for exits with the same exit 2544 // count, and we could be more aggressive when exit counts are known inequal. 2545 llvm::sort(ExitingBlocks, 2546 [&](BasicBlock *A, BasicBlock *B) { 2547 // std::sort sorts in ascending order, so we want the inverse of 2548 // the normal dominance relation, plus a tie breaker for blocks 2549 // unordered by dominance. 2550 if (DT->properlyDominates(A, B)) return true; 2551 if (DT->properlyDominates(B, A)) return false; 2552 return A->getName() < B->getName(); 2553 }); 2554 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2555 // exits before the backedge). If they aren't, reasoning about reachability 2556 // is complicated and we choose not to for now. 2557 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2558 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2559 return Changed; 2560 2561 // Given our sorted total order, we know that exit[j] must be evaluated 2562 // after all exit[i] such j > i. 2563 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2564 if (BadExit(ExitingBlocks[i])) { 2565 ExitingBlocks.resize(i); 2566 break; 2567 } 2568 2569 if (ExitingBlocks.empty()) 2570 return Changed; 2571 2572 // We rely on not being able to reach an exiting block on a later iteration 2573 // then it's statically compute exit count. The implementaton of 2574 // getExitCount currently has this invariant, but assert it here so that 2575 // breakage is obvious if this ever changes.. 2576 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2577 return DT->dominates(ExitingBB, L->getLoopLatch()); 2578 })); 2579 2580 // At this point, ExitingBlocks consists of only those blocks which are 2581 // predicatable. Given that, we know we have at least one exit we can 2582 // predicate if the loop is doesn't have side effects and doesn't have any 2583 // implicit exits (because then our exact BTC isn't actually exact). 2584 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2585 // suggestions on how to improve this? I can obviously bail out for outer 2586 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2587 // is that enough for *all* side effects? 2588 for (BasicBlock *BB : L->blocks()) 2589 for (auto &I : *BB) 2590 // TODO:isGuaranteedToTransfer 2591 if (I.mayHaveSideEffects() || I.mayThrow()) 2592 return Changed; 2593 2594 // Finally, do the actual predication for all predicatable blocks. A couple 2595 // of notes here: 2596 // 1) We don't bother to constant fold dominated exits with identical exit 2597 // counts; that's simply a form of CSE/equality propagation and we leave 2598 // it for dedicated passes. 2599 // 2) We insert the comparison at the branch. Hoisting introduces additional 2600 // legality constraints and we leave that to dedicated logic. We want to 2601 // predicate even if we can't insert a loop invariant expression as 2602 // peeling or unrolling will likely reduce the cost of the otherwise loop 2603 // varying check. 2604 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2605 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2606 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2607 for (BasicBlock *ExitingBB : ExitingBlocks) { 2608 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2609 2610 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2611 Value *NewCond; 2612 if (ExitCount == ExactBTC) { 2613 NewCond = L->contains(BI->getSuccessor(0)) ? 2614 B.getFalse() : B.getTrue(); 2615 } else { 2616 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2617 if (!ExactBTCV) 2618 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2619 Value *RHS = ExactBTCV; 2620 if (ECV->getType() != RHS->getType()) { 2621 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2622 ECV = B.CreateZExt(ECV, WiderTy); 2623 RHS = B.CreateZExt(RHS, WiderTy); 2624 } 2625 auto Pred = L->contains(BI->getSuccessor(0)) ? 2626 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2627 NewCond = B.CreateICmp(Pred, ECV, RHS); 2628 } 2629 Value *OldCond = BI->getCondition(); 2630 BI->setCondition(NewCond); 2631 if (OldCond->use_empty()) 2632 DeadInsts.push_back(OldCond); 2633 Changed = true; 2634 } 2635 2636 return Changed; 2637 } 2638 2639 //===----------------------------------------------------------------------===// 2640 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2641 //===----------------------------------------------------------------------===// 2642 2643 bool IndVarSimplify::run(Loop *L) { 2644 // We need (and expect!) the incoming loop to be in LCSSA. 2645 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2646 "LCSSA required to run indvars!"); 2647 bool Changed = false; 2648 2649 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2650 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2651 // canonicalization can be a pessimization without LSR to "clean up" 2652 // afterwards. 2653 // - We depend on having a preheader; in particular, 2654 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2655 // and we're in trouble if we can't find the induction variable even when 2656 // we've manually inserted one. 2657 // - LFTR relies on having a single backedge. 2658 if (!L->isLoopSimplifyForm()) 2659 return false; 2660 2661 #ifndef NDEBUG 2662 // Used below for a consistency check only 2663 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2664 #endif 2665 2666 // If there are any floating-point recurrences, attempt to 2667 // transform them to use integer recurrences. 2668 Changed |= rewriteNonIntegerIVs(L); 2669 2670 // Create a rewriter object which we'll use to transform the code with. 2671 SCEVExpander Rewriter(*SE, DL, "indvars"); 2672 #ifndef NDEBUG 2673 Rewriter.setDebugType(DEBUG_TYPE); 2674 #endif 2675 2676 // Eliminate redundant IV users. 2677 // 2678 // Simplification works best when run before other consumers of SCEV. We 2679 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2680 // other expressions involving loop IVs have been evaluated. This helps SCEV 2681 // set no-wrap flags before normalizing sign/zero extension. 2682 Rewriter.disableCanonicalMode(); 2683 Changed |= simplifyAndExtend(L, Rewriter, LI); 2684 2685 // Check to see if we can compute the final value of any expressions 2686 // that are recurrent in the loop, and substitute the exit values from the 2687 // loop into any instructions outside of the loop that use the final values 2688 // of the current expressions. 2689 if (ReplaceExitValue != NeverRepl) { 2690 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, 2691 ReplaceExitValue, DeadInsts)) { 2692 NumReplaced += Rewrites; 2693 Changed = true; 2694 } 2695 } 2696 2697 // Eliminate redundant IV cycles. 2698 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2699 2700 // Try to eliminate loop exits based on analyzeable exit counts 2701 if (optimizeLoopExits(L, Rewriter)) { 2702 Changed = true; 2703 // Given we've changed exit counts, notify SCEV 2704 SE->forgetLoop(L); 2705 } 2706 2707 // Try to form loop invariant tests for loop exits by changing how many 2708 // iterations of the loop run when that is unobservable. 2709 if (predicateLoopExits(L, Rewriter)) { 2710 Changed = true; 2711 // Given we've changed exit counts, notify SCEV 2712 SE->forgetLoop(L); 2713 } 2714 2715 // If we have a trip count expression, rewrite the loop's exit condition 2716 // using it. 2717 if (!DisableLFTR) { 2718 SmallVector<BasicBlock*, 16> ExitingBlocks; 2719 L->getExitingBlocks(ExitingBlocks); 2720 for (BasicBlock *ExitingBB : ExitingBlocks) { 2721 // Can't rewrite non-branch yet. 2722 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2723 continue; 2724 2725 // If our exitting block exits multiple loops, we can only rewrite the 2726 // innermost one. Otherwise, we're changing how many times the innermost 2727 // loop runs before it exits. 2728 if (LI->getLoopFor(ExitingBB) != L) 2729 continue; 2730 2731 if (!needsLFTR(L, ExitingBB)) 2732 continue; 2733 2734 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2735 if (isa<SCEVCouldNotCompute>(ExitCount)) 2736 continue; 2737 2738 // This was handled above, but as we form SCEVs, we can sometimes refine 2739 // existing ones; this allows exit counts to be folded to zero which 2740 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2741 // until stable to handle cases like this better. 2742 if (ExitCount->isZero()) 2743 continue; 2744 2745 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2746 if (!IndVar) 2747 continue; 2748 2749 // Avoid high cost expansions. Note: This heuristic is questionable in 2750 // that our definition of "high cost" is not exactly principled. 2751 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2752 continue; 2753 2754 // Check preconditions for proper SCEVExpander operation. SCEV does not 2755 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2756 // any pass that uses the SCEVExpander must do it. This does not work 2757 // well for loop passes because SCEVExpander makes assumptions about 2758 // all loops, while LoopPassManager only forces the current loop to be 2759 // simplified. 2760 // 2761 // FIXME: SCEV expansion has no way to bail out, so the caller must 2762 // explicitly check any assumptions made by SCEV. Brittle. 2763 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2764 if (!AR || AR->getLoop()->getLoopPreheader()) 2765 Changed |= linearFunctionTestReplace(L, ExitingBB, 2766 ExitCount, IndVar, 2767 Rewriter); 2768 } 2769 } 2770 // Clear the rewriter cache, because values that are in the rewriter's cache 2771 // can be deleted in the loop below, causing the AssertingVH in the cache to 2772 // trigger. 2773 Rewriter.clear(); 2774 2775 // Now that we're done iterating through lists, clean up any instructions 2776 // which are now dead. 2777 while (!DeadInsts.empty()) 2778 if (Instruction *Inst = 2779 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2780 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2781 2782 // The Rewriter may not be used from this point on. 2783 2784 // Loop-invariant instructions in the preheader that aren't used in the 2785 // loop may be sunk below the loop to reduce register pressure. 2786 Changed |= sinkUnusedInvariants(L); 2787 2788 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2789 // trip count and therefore can further simplify exit values in addition to 2790 // rewriteLoopExitValues. 2791 Changed |= rewriteFirstIterationLoopExitValues(L); 2792 2793 // Clean up dead instructions. 2794 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2795 2796 // Check a post-condition. 2797 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2798 "Indvars did not preserve LCSSA!"); 2799 2800 // Verify that LFTR, and any other change have not interfered with SCEV's 2801 // ability to compute trip count. We may have *changed* the exit count, but 2802 // only by reducing it. 2803 #ifndef NDEBUG 2804 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2805 SE->forgetLoop(L); 2806 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2807 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2808 SE->getTypeSizeInBits(NewBECount->getType())) 2809 NewBECount = SE->getTruncateOrNoop(NewBECount, 2810 BackedgeTakenCount->getType()); 2811 else 2812 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2813 NewBECount->getType()); 2814 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2815 NewBECount) && "indvars must preserve SCEV"); 2816 } 2817 #endif 2818 2819 return Changed; 2820 } 2821 2822 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2823 LoopStandardAnalysisResults &AR, 2824 LPMUpdater &) { 2825 Function *F = L.getHeader()->getParent(); 2826 const DataLayout &DL = F->getParent()->getDataLayout(); 2827 2828 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2829 if (!IVS.run(&L)) 2830 return PreservedAnalyses::all(); 2831 2832 auto PA = getLoopPassPreservedAnalyses(); 2833 PA.preserveSet<CFGAnalyses>(); 2834 return PA; 2835 } 2836 2837 namespace { 2838 2839 struct IndVarSimplifyLegacyPass : public LoopPass { 2840 static char ID; // Pass identification, replacement for typeid 2841 2842 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2843 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2844 } 2845 2846 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2847 if (skipLoop(L)) 2848 return false; 2849 2850 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2851 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2852 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2853 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2854 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2855 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2856 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2857 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2858 2859 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2860 return IVS.run(L); 2861 } 2862 2863 void getAnalysisUsage(AnalysisUsage &AU) const override { 2864 AU.setPreservesCFG(); 2865 getLoopAnalysisUsage(AU); 2866 } 2867 }; 2868 2869 } // end anonymous namespace 2870 2871 char IndVarSimplifyLegacyPass::ID = 0; 2872 2873 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2874 "Induction Variable Simplification", false, false) 2875 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2876 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2877 "Induction Variable Simplification", false, false) 2878 2879 Pass *llvm::createIndVarSimplifyPass() { 2880 return new IndVarSimplifyLegacyPass(); 2881 } 2882