1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/ADT/Statistic.h" 37 #include "llvm/ADT/iterator_range.h" 38 #include "llvm/Analysis/LoopInfo.h" 39 #include "llvm/Analysis/LoopPass.h" 40 #include "llvm/Analysis/ScalarEvolution.h" 41 #include "llvm/Analysis/ScalarEvolutionExpander.h" 42 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 43 #include "llvm/Analysis/TargetLibraryInfo.h" 44 #include "llvm/Analysis/TargetTransformInfo.h" 45 #include "llvm/Transforms/Utils/Local.h" 46 #include "llvm/IR/BasicBlock.h" 47 #include "llvm/IR/Constant.h" 48 #include "llvm/IR/ConstantRange.h" 49 #include "llvm/IR/Constants.h" 50 #include "llvm/IR/DataLayout.h" 51 #include "llvm/IR/DerivedTypes.h" 52 #include "llvm/IR/Dominators.h" 53 #include "llvm/IR/Function.h" 54 #include "llvm/IR/IRBuilder.h" 55 #include "llvm/IR/InstrTypes.h" 56 #include "llvm/IR/Instruction.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/IntrinsicInst.h" 59 #include "llvm/IR/Intrinsics.h" 60 #include "llvm/IR/Module.h" 61 #include "llvm/IR/Operator.h" 62 #include "llvm/IR/PassManager.h" 63 #include "llvm/IR/PatternMatch.h" 64 #include "llvm/IR/Type.h" 65 #include "llvm/IR/Use.h" 66 #include "llvm/IR/User.h" 67 #include "llvm/IR/Value.h" 68 #include "llvm/IR/ValueHandle.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/CommandLine.h" 72 #include "llvm/Support/Compiler.h" 73 #include "llvm/Support/Debug.h" 74 #include "llvm/Support/ErrorHandling.h" 75 #include "llvm/Support/MathExtras.h" 76 #include "llvm/Support/raw_ostream.h" 77 #include "llvm/Transforms/Scalar.h" 78 #include "llvm/Transforms/Scalar/LoopPassManager.h" 79 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 80 #include "llvm/Transforms/Utils/LoopUtils.h" 81 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 82 #include <cassert> 83 #include <cstdint> 84 #include <utility> 85 86 using namespace llvm; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 // Trip count verification can be enabled by default under NDEBUG if we 97 // implement a strong expression equivalence checker in SCEV. Until then, we 98 // use the verify-indvars flag, which may assert in some cases. 99 static cl::opt<bool> VerifyIndvars( 100 "verify-indvars", cl::Hidden, 101 cl::desc("Verify the ScalarEvolution result after running indvars")); 102 103 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 104 105 static cl::opt<ReplaceExitVal> ReplaceExitValue( 106 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 107 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 108 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 109 clEnumValN(OnlyCheapRepl, "cheap", 110 "only replace exit value when the cost is cheap"), 111 clEnumValN(NoHardUse, "noharduse", 112 "only replace exit values when loop def likely dead"), 113 clEnumValN(AlwaysRepl, "always", 114 "always replace exit value whenever possible"))); 115 116 static cl::opt<bool> UsePostIncrementRanges( 117 "indvars-post-increment-ranges", cl::Hidden, 118 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 119 cl::init(true)); 120 121 static cl::opt<bool> 122 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 123 cl::desc("Disable Linear Function Test Replace optimization")); 124 125 namespace { 126 127 struct RewritePhi; 128 129 class IndVarSimplify { 130 LoopInfo *LI; 131 ScalarEvolution *SE; 132 DominatorTree *DT; 133 const DataLayout &DL; 134 TargetLibraryInfo *TLI; 135 const TargetTransformInfo *TTI; 136 137 SmallVector<WeakTrackingVH, 16> DeadInsts; 138 139 bool isValidRewrite(Value *FromVal, Value *ToVal); 140 141 bool handleFloatingPointIV(Loop *L, PHINode *PH); 142 bool rewriteNonIntegerIVs(Loop *L); 143 144 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 145 146 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 147 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 148 bool rewriteFirstIterationLoopExitValues(Loop *L); 149 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 150 151 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 152 const SCEV *BackedgeTakenCount, 153 PHINode *IndVar, SCEVExpander &Rewriter); 154 155 bool sinkUnusedInvariants(Loop *L); 156 157 public: 158 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 159 const DataLayout &DL, TargetLibraryInfo *TLI, 160 TargetTransformInfo *TTI) 161 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 162 163 bool run(Loop *L); 164 }; 165 166 } // end anonymous namespace 167 168 /// Return true if the SCEV expansion generated by the rewriter can replace the 169 /// original value. SCEV guarantees that it produces the same value, but the way 170 /// it is produced may be illegal IR. Ideally, this function will only be 171 /// called for verification. 172 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 173 // If an SCEV expression subsumed multiple pointers, its expansion could 174 // reassociate the GEP changing the base pointer. This is illegal because the 175 // final address produced by a GEP chain must be inbounds relative to its 176 // underlying object. Otherwise basic alias analysis, among other things, 177 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 178 // producing an expression involving multiple pointers. Until then, we must 179 // bail out here. 180 // 181 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 182 // because it understands lcssa phis while SCEV does not. 183 Value *FromPtr = FromVal; 184 Value *ToPtr = ToVal; 185 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 186 FromPtr = GEP->getPointerOperand(); 187 } 188 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 189 ToPtr = GEP->getPointerOperand(); 190 } 191 if (FromPtr != FromVal || ToPtr != ToVal) { 192 // Quickly check the common case 193 if (FromPtr == ToPtr) 194 return true; 195 196 // SCEV may have rewritten an expression that produces the GEP's pointer 197 // operand. That's ok as long as the pointer operand has the same base 198 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 199 // base of a recurrence. This handles the case in which SCEV expansion 200 // converts a pointer type recurrence into a nonrecurrent pointer base 201 // indexed by an integer recurrence. 202 203 // If the GEP base pointer is a vector of pointers, abort. 204 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 205 return false; 206 207 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 208 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 209 if (FromBase == ToBase) 210 return true; 211 212 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 213 << " != " << *ToBase << "\n"); 214 215 return false; 216 } 217 return true; 218 } 219 220 /// Determine the insertion point for this user. By default, insert immediately 221 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 222 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 223 /// common dominator for the incoming blocks. A nullptr can be returned if no 224 /// viable location is found: it may happen if User is a PHI and Def only comes 225 /// to this PHI from unreachable blocks. 226 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 227 DominatorTree *DT, LoopInfo *LI) { 228 PHINode *PHI = dyn_cast<PHINode>(User); 229 if (!PHI) 230 return User; 231 232 Instruction *InsertPt = nullptr; 233 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 234 if (PHI->getIncomingValue(i) != Def) 235 continue; 236 237 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 238 239 if (!DT->isReachableFromEntry(InsertBB)) 240 continue; 241 242 if (!InsertPt) { 243 InsertPt = InsertBB->getTerminator(); 244 continue; 245 } 246 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 247 InsertPt = InsertBB->getTerminator(); 248 } 249 250 // If we have skipped all inputs, it means that Def only comes to Phi from 251 // unreachable blocks. 252 if (!InsertPt) 253 return nullptr; 254 255 auto *DefI = dyn_cast<Instruction>(Def); 256 if (!DefI) 257 return InsertPt; 258 259 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 260 261 auto *L = LI->getLoopFor(DefI->getParent()); 262 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 263 264 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 265 if (LI->getLoopFor(DTN->getBlock()) == L) 266 return DTN->getBlock()->getTerminator(); 267 268 llvm_unreachable("DefI dominates InsertPt!"); 269 } 270 271 //===----------------------------------------------------------------------===// 272 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 273 //===----------------------------------------------------------------------===// 274 275 /// Convert APF to an integer, if possible. 276 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 277 bool isExact = false; 278 // See if we can convert this to an int64_t 279 uint64_t UIntVal; 280 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 281 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 282 !isExact) 283 return false; 284 IntVal = UIntVal; 285 return true; 286 } 287 288 /// If the loop has floating induction variable then insert corresponding 289 /// integer induction variable if possible. 290 /// For example, 291 /// for(double i = 0; i < 10000; ++i) 292 /// bar(i) 293 /// is converted into 294 /// for(int i = 0; i < 10000; ++i) 295 /// bar((double)i); 296 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 297 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 298 unsigned BackEdge = IncomingEdge^1; 299 300 // Check incoming value. 301 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 302 303 int64_t InitValue; 304 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 305 return false; 306 307 // Check IV increment. Reject this PN if increment operation is not 308 // an add or increment value can not be represented by an integer. 309 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 310 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 311 312 // If this is not an add of the PHI with a constantfp, or if the constant fp 313 // is not an integer, bail out. 314 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 315 int64_t IncValue; 316 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 317 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 318 return false; 319 320 // Check Incr uses. One user is PN and the other user is an exit condition 321 // used by the conditional terminator. 322 Value::user_iterator IncrUse = Incr->user_begin(); 323 Instruction *U1 = cast<Instruction>(*IncrUse++); 324 if (IncrUse == Incr->user_end()) return false; 325 Instruction *U2 = cast<Instruction>(*IncrUse++); 326 if (IncrUse != Incr->user_end()) return false; 327 328 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 329 // only used by a branch, we can't transform it. 330 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 331 if (!Compare) 332 Compare = dyn_cast<FCmpInst>(U2); 333 if (!Compare || !Compare->hasOneUse() || 334 !isa<BranchInst>(Compare->user_back())) 335 return false; 336 337 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 338 339 // We need to verify that the branch actually controls the iteration count 340 // of the loop. If not, the new IV can overflow and no one will notice. 341 // The branch block must be in the loop and one of the successors must be out 342 // of the loop. 343 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 344 if (!L->contains(TheBr->getParent()) || 345 (L->contains(TheBr->getSuccessor(0)) && 346 L->contains(TheBr->getSuccessor(1)))) 347 return false; 348 349 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 350 // transform it. 351 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 352 int64_t ExitValue; 353 if (ExitValueVal == nullptr || 354 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 355 return false; 356 357 // Find new predicate for integer comparison. 358 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 359 switch (Compare->getPredicate()) { 360 default: return false; // Unknown comparison. 361 case CmpInst::FCMP_OEQ: 362 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 363 case CmpInst::FCMP_ONE: 364 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 365 case CmpInst::FCMP_OGT: 366 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 367 case CmpInst::FCMP_OGE: 368 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 369 case CmpInst::FCMP_OLT: 370 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 371 case CmpInst::FCMP_OLE: 372 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 373 } 374 375 // We convert the floating point induction variable to a signed i32 value if 376 // we can. This is only safe if the comparison will not overflow in a way 377 // that won't be trapped by the integer equivalent operations. Check for this 378 // now. 379 // TODO: We could use i64 if it is native and the range requires it. 380 381 // The start/stride/exit values must all fit in signed i32. 382 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 383 return false; 384 385 // If not actually striding (add x, 0.0), avoid touching the code. 386 if (IncValue == 0) 387 return false; 388 389 // Positive and negative strides have different safety conditions. 390 if (IncValue > 0) { 391 // If we have a positive stride, we require the init to be less than the 392 // exit value. 393 if (InitValue >= ExitValue) 394 return false; 395 396 uint32_t Range = uint32_t(ExitValue-InitValue); 397 // Check for infinite loop, either: 398 // while (i <= Exit) or until (i > Exit) 399 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 400 if (++Range == 0) return false; // Range overflows. 401 } 402 403 unsigned Leftover = Range % uint32_t(IncValue); 404 405 // If this is an equality comparison, we require that the strided value 406 // exactly land on the exit value, otherwise the IV condition will wrap 407 // around and do things the fp IV wouldn't. 408 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 409 Leftover != 0) 410 return false; 411 412 // If the stride would wrap around the i32 before exiting, we can't 413 // transform the IV. 414 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 415 return false; 416 } else { 417 // If we have a negative stride, we require the init to be greater than the 418 // exit value. 419 if (InitValue <= ExitValue) 420 return false; 421 422 uint32_t Range = uint32_t(InitValue-ExitValue); 423 // Check for infinite loop, either: 424 // while (i >= Exit) or until (i < Exit) 425 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 426 if (++Range == 0) return false; // Range overflows. 427 } 428 429 unsigned Leftover = Range % uint32_t(-IncValue); 430 431 // If this is an equality comparison, we require that the strided value 432 // exactly land on the exit value, otherwise the IV condition will wrap 433 // around and do things the fp IV wouldn't. 434 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 435 Leftover != 0) 436 return false; 437 438 // If the stride would wrap around the i32 before exiting, we can't 439 // transform the IV. 440 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 441 return false; 442 } 443 444 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 445 446 // Insert new integer induction variable. 447 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 448 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 449 PN->getIncomingBlock(IncomingEdge)); 450 451 Value *NewAdd = 452 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 453 Incr->getName()+".int", Incr); 454 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 455 456 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 457 ConstantInt::get(Int32Ty, ExitValue), 458 Compare->getName()); 459 460 // In the following deletions, PN may become dead and may be deleted. 461 // Use a WeakTrackingVH to observe whether this happens. 462 WeakTrackingVH WeakPH = PN; 463 464 // Delete the old floating point exit comparison. The branch starts using the 465 // new comparison. 466 NewCompare->takeName(Compare); 467 Compare->replaceAllUsesWith(NewCompare); 468 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 469 470 // Delete the old floating point increment. 471 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 472 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 473 474 // If the FP induction variable still has uses, this is because something else 475 // in the loop uses its value. In order to canonicalize the induction 476 // variable, we chose to eliminate the IV and rewrite it in terms of an 477 // int->fp cast. 478 // 479 // We give preference to sitofp over uitofp because it is faster on most 480 // platforms. 481 if (WeakPH) { 482 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 483 &*PN->getParent()->getFirstInsertionPt()); 484 PN->replaceAllUsesWith(Conv); 485 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 486 } 487 return true; 488 } 489 490 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 491 // First step. Check to see if there are any floating-point recurrences. 492 // If there are, change them into integer recurrences, permitting analysis by 493 // the SCEV routines. 494 BasicBlock *Header = L->getHeader(); 495 496 SmallVector<WeakTrackingVH, 8> PHIs; 497 for (PHINode &PN : Header->phis()) 498 PHIs.push_back(&PN); 499 500 bool Changed = false; 501 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 502 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 503 Changed |= handleFloatingPointIV(L, PN); 504 505 // If the loop previously had floating-point IV, ScalarEvolution 506 // may not have been able to compute a trip count. Now that we've done some 507 // re-writing, the trip count may be computable. 508 if (Changed) 509 SE->forgetLoop(L); 510 return Changed; 511 } 512 513 namespace { 514 515 // Collect information about PHI nodes which can be transformed in 516 // rewriteLoopExitValues. 517 struct RewritePhi { 518 PHINode *PN; 519 520 // Ith incoming value. 521 unsigned Ith; 522 523 // Exit value after expansion. 524 Value *Val; 525 526 // High Cost when expansion. 527 bool HighCost; 528 529 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 530 : PN(P), Ith(I), Val(V), HighCost(H) {} 531 }; 532 533 } // end anonymous namespace 534 535 //===----------------------------------------------------------------------===// 536 // rewriteLoopExitValues - Optimize IV users outside the loop. 537 // As a side effect, reduces the amount of IV processing within the loop. 538 //===----------------------------------------------------------------------===// 539 540 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 541 SmallPtrSet<const Instruction *, 8> Visited; 542 SmallVector<const Instruction *, 8> WorkList; 543 Visited.insert(I); 544 WorkList.push_back(I); 545 while (!WorkList.empty()) { 546 const Instruction *Curr = WorkList.pop_back_val(); 547 // This use is outside the loop, nothing to do. 548 if (!L->contains(Curr)) 549 continue; 550 // Do we assume it is a "hard" use which will not be eliminated easily? 551 if (Curr->mayHaveSideEffects()) 552 return true; 553 // Otherwise, add all its users to worklist. 554 for (auto U : Curr->users()) { 555 auto *UI = cast<Instruction>(U); 556 if (Visited.insert(UI).second) 557 WorkList.push_back(UI); 558 } 559 } 560 return false; 561 } 562 563 /// Check to see if this loop has a computable loop-invariant execution count. 564 /// If so, this means that we can compute the final value of any expressions 565 /// that are recurrent in the loop, and substitute the exit values from the loop 566 /// into any instructions outside of the loop that use the final values of the 567 /// current expressions. 568 /// 569 /// This is mostly redundant with the regular IndVarSimplify activities that 570 /// happen later, except that it's more powerful in some cases, because it's 571 /// able to brute-force evaluate arbitrary instructions as long as they have 572 /// constant operands at the beginning of the loop. 573 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 574 // Check a pre-condition. 575 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 576 "Indvars did not preserve LCSSA!"); 577 578 SmallVector<BasicBlock*, 8> ExitBlocks; 579 L->getUniqueExitBlocks(ExitBlocks); 580 581 SmallVector<RewritePhi, 8> RewritePhiSet; 582 // Find all values that are computed inside the loop, but used outside of it. 583 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 584 // the exit blocks of the loop to find them. 585 for (BasicBlock *ExitBB : ExitBlocks) { 586 // If there are no PHI nodes in this exit block, then no values defined 587 // inside the loop are used on this path, skip it. 588 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 589 if (!PN) continue; 590 591 unsigned NumPreds = PN->getNumIncomingValues(); 592 593 // Iterate over all of the PHI nodes. 594 BasicBlock::iterator BBI = ExitBB->begin(); 595 while ((PN = dyn_cast<PHINode>(BBI++))) { 596 if (PN->use_empty()) 597 continue; // dead use, don't replace it 598 599 if (!SE->isSCEVable(PN->getType())) 600 continue; 601 602 // It's necessary to tell ScalarEvolution about this explicitly so that 603 // it can walk the def-use list and forget all SCEVs, as it may not be 604 // watching the PHI itself. Once the new exit value is in place, there 605 // may not be a def-use connection between the loop and every instruction 606 // which got a SCEVAddRecExpr for that loop. 607 SE->forgetValue(PN); 608 609 // Iterate over all of the values in all the PHI nodes. 610 for (unsigned i = 0; i != NumPreds; ++i) { 611 // If the value being merged in is not integer or is not defined 612 // in the loop, skip it. 613 Value *InVal = PN->getIncomingValue(i); 614 if (!isa<Instruction>(InVal)) 615 continue; 616 617 // If this pred is for a subloop, not L itself, skip it. 618 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 619 continue; // The Block is in a subloop, skip it. 620 621 // Check that InVal is defined in the loop. 622 Instruction *Inst = cast<Instruction>(InVal); 623 if (!L->contains(Inst)) 624 continue; 625 626 // Okay, this instruction has a user outside of the current loop 627 // and varies predictably *inside* the loop. Evaluate the value it 628 // contains when the loop exits, if possible. 629 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 630 if (!SE->isLoopInvariant(ExitValue, L) || 631 !isSafeToExpand(ExitValue, *SE)) 632 continue; 633 634 // Computing the value outside of the loop brings no benefit if it is 635 // definitely used inside the loop in a way which can not be optimized 636 // away. 637 if (ReplaceExitValue != AlwaysRepl && 638 !isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst)) 639 continue; 640 641 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 642 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 643 644 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 645 << '\n' 646 << " LoopVal = " << *Inst << "\n"); 647 648 if (!isValidRewrite(Inst, ExitVal)) { 649 DeadInsts.push_back(ExitVal); 650 continue; 651 } 652 653 #ifndef NDEBUG 654 // If we reuse an instruction from a loop which is neither L nor one of 655 // its containing loops, we end up breaking LCSSA form for this loop by 656 // creating a new use of its instruction. 657 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 658 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 659 if (EVL != L) 660 assert(EVL->contains(L) && "LCSSA breach detected!"); 661 #endif 662 663 // Collect all the candidate PHINodes to be rewritten. 664 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 665 } 666 } 667 } 668 669 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 670 671 bool Changed = false; 672 // Transformation. 673 for (const RewritePhi &Phi : RewritePhiSet) { 674 PHINode *PN = Phi.PN; 675 Value *ExitVal = Phi.Val; 676 677 // Only do the rewrite when the ExitValue can be expanded cheaply. 678 // If LoopCanBeDel is true, rewrite exit value aggressively. 679 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 680 DeadInsts.push_back(ExitVal); 681 continue; 682 } 683 684 Changed = true; 685 ++NumReplaced; 686 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 687 PN->setIncomingValue(Phi.Ith, ExitVal); 688 689 // If this instruction is dead now, delete it. Don't do it now to avoid 690 // invalidating iterators. 691 if (isInstructionTriviallyDead(Inst, TLI)) 692 DeadInsts.push_back(Inst); 693 694 // Replace PN with ExitVal if that is legal and does not break LCSSA. 695 if (PN->getNumIncomingValues() == 1 && 696 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 697 PN->replaceAllUsesWith(ExitVal); 698 PN->eraseFromParent(); 699 } 700 } 701 702 // The insertion point instruction may have been deleted; clear it out 703 // so that the rewriter doesn't trip over it later. 704 Rewriter.clearInsertPoint(); 705 return Changed; 706 } 707 708 //===---------------------------------------------------------------------===// 709 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 710 // they will exit at the first iteration. 711 //===---------------------------------------------------------------------===// 712 713 /// Check to see if this loop has loop invariant conditions which lead to loop 714 /// exits. If so, we know that if the exit path is taken, it is at the first 715 /// loop iteration. This lets us predict exit values of PHI nodes that live in 716 /// loop header. 717 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 718 // Verify the input to the pass is already in LCSSA form. 719 assert(L->isLCSSAForm(*DT)); 720 721 SmallVector<BasicBlock *, 8> ExitBlocks; 722 L->getUniqueExitBlocks(ExitBlocks); 723 724 bool MadeAnyChanges = false; 725 for (auto *ExitBB : ExitBlocks) { 726 // If there are no more PHI nodes in this exit block, then no more 727 // values defined inside the loop are used on this path. 728 for (PHINode &PN : ExitBB->phis()) { 729 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 730 IncomingValIdx != E; ++IncomingValIdx) { 731 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 732 733 // Can we prove that the exit must run on the first iteration if it 734 // runs at all? (i.e. early exits are fine for our purposes, but 735 // traces which lead to this exit being taken on the 2nd iteration 736 // aren't.) Note that this is about whether the exit branch is 737 // executed, not about whether it is taken. 738 if (!L->getLoopLatch() || 739 !DT->dominates(IncomingBB, L->getLoopLatch())) 740 continue; 741 742 // Get condition that leads to the exit path. 743 auto *TermInst = IncomingBB->getTerminator(); 744 745 Value *Cond = nullptr; 746 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 747 // Must be a conditional branch, otherwise the block 748 // should not be in the loop. 749 Cond = BI->getCondition(); 750 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 751 Cond = SI->getCondition(); 752 else 753 continue; 754 755 if (!L->isLoopInvariant(Cond)) 756 continue; 757 758 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 759 760 // Only deal with PHIs in the loop header. 761 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 762 continue; 763 764 // If ExitVal is a PHI on the loop header, then we know its 765 // value along this exit because the exit can only be taken 766 // on the first iteration. 767 auto *LoopPreheader = L->getLoopPreheader(); 768 assert(LoopPreheader && "Invalid loop"); 769 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 770 if (PreheaderIdx != -1) { 771 assert(ExitVal->getParent() == L->getHeader() && 772 "ExitVal must be in loop header"); 773 MadeAnyChanges = true; 774 PN.setIncomingValue(IncomingValIdx, 775 ExitVal->getIncomingValue(PreheaderIdx)); 776 } 777 } 778 } 779 } 780 return MadeAnyChanges; 781 } 782 783 /// Check whether it is possible to delete the loop after rewriting exit 784 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 785 /// aggressively. 786 bool IndVarSimplify::canLoopBeDeleted( 787 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 788 BasicBlock *Preheader = L->getLoopPreheader(); 789 // If there is no preheader, the loop will not be deleted. 790 if (!Preheader) 791 return false; 792 793 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 794 // We obviate multiple ExitingBlocks case for simplicity. 795 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 796 // after exit value rewriting, we can enhance the logic here. 797 SmallVector<BasicBlock *, 4> ExitingBlocks; 798 L->getExitingBlocks(ExitingBlocks); 799 SmallVector<BasicBlock *, 8> ExitBlocks; 800 L->getUniqueExitBlocks(ExitBlocks); 801 if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) 802 return false; 803 804 BasicBlock *ExitBlock = ExitBlocks[0]; 805 BasicBlock::iterator BI = ExitBlock->begin(); 806 while (PHINode *P = dyn_cast<PHINode>(BI)) { 807 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 808 809 // If the Incoming value of P is found in RewritePhiSet, we know it 810 // could be rewritten to use a loop invariant value in transformation 811 // phase later. Skip it in the loop invariant check below. 812 bool found = false; 813 for (const RewritePhi &Phi : RewritePhiSet) { 814 unsigned i = Phi.Ith; 815 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 816 found = true; 817 break; 818 } 819 } 820 821 Instruction *I; 822 if (!found && (I = dyn_cast<Instruction>(Incoming))) 823 if (!L->hasLoopInvariantOperands(I)) 824 return false; 825 826 ++BI; 827 } 828 829 for (auto *BB : L->blocks()) 830 if (llvm::any_of(*BB, [](Instruction &I) { 831 return I.mayHaveSideEffects(); 832 })) 833 return false; 834 835 return true; 836 } 837 838 //===----------------------------------------------------------------------===// 839 // IV Widening - Extend the width of an IV to cover its widest uses. 840 //===----------------------------------------------------------------------===// 841 842 namespace { 843 844 // Collect information about induction variables that are used by sign/zero 845 // extend operations. This information is recorded by CollectExtend and provides 846 // the input to WidenIV. 847 struct WideIVInfo { 848 PHINode *NarrowIV = nullptr; 849 850 // Widest integer type created [sz]ext 851 Type *WidestNativeType = nullptr; 852 853 // Was a sext user seen before a zext? 854 bool IsSigned = false; 855 }; 856 857 } // end anonymous namespace 858 859 /// Update information about the induction variable that is extended by this 860 /// sign or zero extend operation. This is used to determine the final width of 861 /// the IV before actually widening it. 862 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 863 const TargetTransformInfo *TTI) { 864 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 865 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 866 return; 867 868 Type *Ty = Cast->getType(); 869 uint64_t Width = SE->getTypeSizeInBits(Ty); 870 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 871 return; 872 873 // Check that `Cast` actually extends the induction variable (we rely on this 874 // later). This takes care of cases where `Cast` is extending a truncation of 875 // the narrow induction variable, and thus can end up being narrower than the 876 // "narrow" induction variable. 877 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 878 if (NarrowIVWidth >= Width) 879 return; 880 881 // Cast is either an sext or zext up to this point. 882 // We should not widen an indvar if arithmetics on the wider indvar are more 883 // expensive than those on the narrower indvar. We check only the cost of ADD 884 // because at least an ADD is required to increment the induction variable. We 885 // could compute more comprehensively the cost of all instructions on the 886 // induction variable when necessary. 887 if (TTI && 888 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 889 TTI->getArithmeticInstrCost(Instruction::Add, 890 Cast->getOperand(0)->getType())) { 891 return; 892 } 893 894 if (!WI.WidestNativeType) { 895 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 896 WI.IsSigned = IsSigned; 897 return; 898 } 899 900 // We extend the IV to satisfy the sign of its first user, arbitrarily. 901 if (WI.IsSigned != IsSigned) 902 return; 903 904 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 905 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 906 } 907 908 namespace { 909 910 /// Record a link in the Narrow IV def-use chain along with the WideIV that 911 /// computes the same value as the Narrow IV def. This avoids caching Use* 912 /// pointers. 913 struct NarrowIVDefUse { 914 Instruction *NarrowDef = nullptr; 915 Instruction *NarrowUse = nullptr; 916 Instruction *WideDef = nullptr; 917 918 // True if the narrow def is never negative. Tracking this information lets 919 // us use a sign extension instead of a zero extension or vice versa, when 920 // profitable and legal. 921 bool NeverNegative = false; 922 923 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 924 bool NeverNegative) 925 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 926 NeverNegative(NeverNegative) {} 927 }; 928 929 /// The goal of this transform is to remove sign and zero extends without 930 /// creating any new induction variables. To do this, it creates a new phi of 931 /// the wider type and redirects all users, either removing extends or inserting 932 /// truncs whenever we stop propagating the type. 933 class WidenIV { 934 // Parameters 935 PHINode *OrigPhi; 936 Type *WideType; 937 938 // Context 939 LoopInfo *LI; 940 Loop *L; 941 ScalarEvolution *SE; 942 DominatorTree *DT; 943 944 // Does the module have any calls to the llvm.experimental.guard intrinsic 945 // at all? If not we can avoid scanning instructions looking for guards. 946 bool HasGuards; 947 948 // Result 949 PHINode *WidePhi = nullptr; 950 Instruction *WideInc = nullptr; 951 const SCEV *WideIncExpr = nullptr; 952 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 953 954 SmallPtrSet<Instruction *,16> Widened; 955 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 956 957 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 958 959 // A map tracking the kind of extension used to widen each narrow IV 960 // and narrow IV user. 961 // Key: pointer to a narrow IV or IV user. 962 // Value: the kind of extension used to widen this Instruction. 963 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 964 965 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 966 967 // A map with control-dependent ranges for post increment IV uses. The key is 968 // a pair of IV def and a use of this def denoting the context. The value is 969 // a ConstantRange representing possible values of the def at the given 970 // context. 971 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 972 973 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 974 Instruction *UseI) { 975 DefUserPair Key(Def, UseI); 976 auto It = PostIncRangeInfos.find(Key); 977 return It == PostIncRangeInfos.end() 978 ? Optional<ConstantRange>(None) 979 : Optional<ConstantRange>(It->second); 980 } 981 982 void calculatePostIncRanges(PHINode *OrigPhi); 983 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 984 985 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 986 DefUserPair Key(Def, UseI); 987 auto It = PostIncRangeInfos.find(Key); 988 if (It == PostIncRangeInfos.end()) 989 PostIncRangeInfos.insert({Key, R}); 990 else 991 It->second = R.intersectWith(It->second); 992 } 993 994 public: 995 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 996 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 997 bool HasGuards) 998 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 999 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1000 HasGuards(HasGuards), DeadInsts(DI) { 1001 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1002 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1003 } 1004 1005 PHINode *createWideIV(SCEVExpander &Rewriter); 1006 1007 protected: 1008 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1009 Instruction *Use); 1010 1011 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1012 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1013 const SCEVAddRecExpr *WideAR); 1014 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1015 1016 ExtendKind getExtendKind(Instruction *I); 1017 1018 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1019 1020 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1021 1022 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1023 1024 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1025 unsigned OpCode) const; 1026 1027 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1028 1029 bool widenLoopCompare(NarrowIVDefUse DU); 1030 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1031 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1032 1033 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1034 }; 1035 1036 } // end anonymous namespace 1037 1038 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1039 bool IsSigned, Instruction *Use) { 1040 // Set the debug location and conservative insertion point. 1041 IRBuilder<> Builder(Use); 1042 // Hoist the insertion point into loop preheaders as far as possible. 1043 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1044 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1045 L = L->getParentLoop()) 1046 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1047 1048 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1049 Builder.CreateZExt(NarrowOper, WideType); 1050 } 1051 1052 /// Instantiate a wide operation to replace a narrow operation. This only needs 1053 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1054 /// 0 for any operation we decide not to clone. 1055 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1056 const SCEVAddRecExpr *WideAR) { 1057 unsigned Opcode = DU.NarrowUse->getOpcode(); 1058 switch (Opcode) { 1059 default: 1060 return nullptr; 1061 case Instruction::Add: 1062 case Instruction::Mul: 1063 case Instruction::UDiv: 1064 case Instruction::Sub: 1065 return cloneArithmeticIVUser(DU, WideAR); 1066 1067 case Instruction::And: 1068 case Instruction::Or: 1069 case Instruction::Xor: 1070 case Instruction::Shl: 1071 case Instruction::LShr: 1072 case Instruction::AShr: 1073 return cloneBitwiseIVUser(DU); 1074 } 1075 } 1076 1077 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1078 Instruction *NarrowUse = DU.NarrowUse; 1079 Instruction *NarrowDef = DU.NarrowDef; 1080 Instruction *WideDef = DU.WideDef; 1081 1082 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1083 1084 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1085 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1086 // invariant and will be folded or hoisted. If it actually comes from a 1087 // widened IV, it should be removed during a future call to widenIVUse. 1088 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1089 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1090 ? WideDef 1091 : createExtendInst(NarrowUse->getOperand(0), WideType, 1092 IsSigned, NarrowUse); 1093 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1094 ? WideDef 1095 : createExtendInst(NarrowUse->getOperand(1), WideType, 1096 IsSigned, NarrowUse); 1097 1098 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1099 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1100 NarrowBO->getName()); 1101 IRBuilder<> Builder(NarrowUse); 1102 Builder.Insert(WideBO); 1103 WideBO->copyIRFlags(NarrowBO); 1104 return WideBO; 1105 } 1106 1107 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1108 const SCEVAddRecExpr *WideAR) { 1109 Instruction *NarrowUse = DU.NarrowUse; 1110 Instruction *NarrowDef = DU.NarrowDef; 1111 Instruction *WideDef = DU.WideDef; 1112 1113 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1114 1115 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1116 1117 // We're trying to find X such that 1118 // 1119 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1120 // 1121 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1122 // and check using SCEV if any of them are correct. 1123 1124 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1125 // correct solution to X. 1126 auto GuessNonIVOperand = [&](bool SignExt) { 1127 const SCEV *WideLHS; 1128 const SCEV *WideRHS; 1129 1130 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1131 if (SignExt) 1132 return SE->getSignExtendExpr(S, Ty); 1133 return SE->getZeroExtendExpr(S, Ty); 1134 }; 1135 1136 if (IVOpIdx == 0) { 1137 WideLHS = SE->getSCEV(WideDef); 1138 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1139 WideRHS = GetExtend(NarrowRHS, WideType); 1140 } else { 1141 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1142 WideLHS = GetExtend(NarrowLHS, WideType); 1143 WideRHS = SE->getSCEV(WideDef); 1144 } 1145 1146 // WideUse is "WideDef `op.wide` X" as described in the comment. 1147 const SCEV *WideUse = nullptr; 1148 1149 switch (NarrowUse->getOpcode()) { 1150 default: 1151 llvm_unreachable("No other possibility!"); 1152 1153 case Instruction::Add: 1154 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1155 break; 1156 1157 case Instruction::Mul: 1158 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1159 break; 1160 1161 case Instruction::UDiv: 1162 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1163 break; 1164 1165 case Instruction::Sub: 1166 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1167 break; 1168 } 1169 1170 return WideUse == WideAR; 1171 }; 1172 1173 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1174 if (!GuessNonIVOperand(SignExtend)) { 1175 SignExtend = !SignExtend; 1176 if (!GuessNonIVOperand(SignExtend)) 1177 return nullptr; 1178 } 1179 1180 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1181 ? WideDef 1182 : createExtendInst(NarrowUse->getOperand(0), WideType, 1183 SignExtend, NarrowUse); 1184 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1185 ? WideDef 1186 : createExtendInst(NarrowUse->getOperand(1), WideType, 1187 SignExtend, NarrowUse); 1188 1189 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1190 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1191 NarrowBO->getName()); 1192 1193 IRBuilder<> Builder(NarrowUse); 1194 Builder.Insert(WideBO); 1195 WideBO->copyIRFlags(NarrowBO); 1196 return WideBO; 1197 } 1198 1199 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1200 auto It = ExtendKindMap.find(I); 1201 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1202 return It->second; 1203 } 1204 1205 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1206 unsigned OpCode) const { 1207 if (OpCode == Instruction::Add) 1208 return SE->getAddExpr(LHS, RHS); 1209 if (OpCode == Instruction::Sub) 1210 return SE->getMinusSCEV(LHS, RHS); 1211 if (OpCode == Instruction::Mul) 1212 return SE->getMulExpr(LHS, RHS); 1213 1214 llvm_unreachable("Unsupported opcode."); 1215 } 1216 1217 /// No-wrap operations can transfer sign extension of their result to their 1218 /// operands. Generate the SCEV value for the widened operation without 1219 /// actually modifying the IR yet. If the expression after extending the 1220 /// operands is an AddRec for this loop, return the AddRec and the kind of 1221 /// extension used. 1222 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1223 // Handle the common case of add<nsw/nuw> 1224 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1225 // Only Add/Sub/Mul instructions supported yet. 1226 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1227 OpCode != Instruction::Mul) 1228 return {nullptr, Unknown}; 1229 1230 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1231 // if extending the other will lead to a recurrence. 1232 const unsigned ExtendOperIdx = 1233 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1234 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1235 1236 const SCEV *ExtendOperExpr = nullptr; 1237 const OverflowingBinaryOperator *OBO = 1238 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1239 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1240 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1241 ExtendOperExpr = SE->getSignExtendExpr( 1242 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1243 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1244 ExtendOperExpr = SE->getZeroExtendExpr( 1245 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1246 else 1247 return {nullptr, Unknown}; 1248 1249 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1250 // flags. This instruction may be guarded by control flow that the no-wrap 1251 // behavior depends on. Non-control-equivalent instructions can be mapped to 1252 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1253 // semantics to those operations. 1254 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1255 const SCEV *rhs = ExtendOperExpr; 1256 1257 // Let's swap operands to the initial order for the case of non-commutative 1258 // operations, like SUB. See PR21014. 1259 if (ExtendOperIdx == 0) 1260 std::swap(lhs, rhs); 1261 const SCEVAddRecExpr *AddRec = 1262 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1263 1264 if (!AddRec || AddRec->getLoop() != L) 1265 return {nullptr, Unknown}; 1266 1267 return {AddRec, ExtKind}; 1268 } 1269 1270 /// Is this instruction potentially interesting for further simplification after 1271 /// widening it's type? In other words, can the extend be safely hoisted out of 1272 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1273 /// so, return the extended recurrence and the kind of extension used. Otherwise 1274 /// return {nullptr, Unknown}. 1275 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1276 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1277 return {nullptr, Unknown}; 1278 1279 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1280 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1281 SE->getTypeSizeInBits(WideType)) { 1282 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1283 // index. So don't follow this use. 1284 return {nullptr, Unknown}; 1285 } 1286 1287 const SCEV *WideExpr; 1288 ExtendKind ExtKind; 1289 if (DU.NeverNegative) { 1290 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1291 if (isa<SCEVAddRecExpr>(WideExpr)) 1292 ExtKind = SignExtended; 1293 else { 1294 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1295 ExtKind = ZeroExtended; 1296 } 1297 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1298 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1299 ExtKind = SignExtended; 1300 } else { 1301 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1302 ExtKind = ZeroExtended; 1303 } 1304 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1305 if (!AddRec || AddRec->getLoop() != L) 1306 return {nullptr, Unknown}; 1307 return {AddRec, ExtKind}; 1308 } 1309 1310 /// This IV user cannot be widen. Replace this use of the original narrow IV 1311 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1312 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1313 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1314 if (!InsertPt) 1315 return; 1316 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1317 << *DU.NarrowUse << "\n"); 1318 IRBuilder<> Builder(InsertPt); 1319 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1320 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1321 } 1322 1323 /// If the narrow use is a compare instruction, then widen the compare 1324 // (and possibly the other operand). The extend operation is hoisted into the 1325 // loop preheader as far as possible. 1326 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1327 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1328 if (!Cmp) 1329 return false; 1330 1331 // We can legally widen the comparison in the following two cases: 1332 // 1333 // - The signedness of the IV extension and comparison match 1334 // 1335 // - The narrow IV is always positive (and thus its sign extension is equal 1336 // to its zero extension). For instance, let's say we're zero extending 1337 // %narrow for the following use 1338 // 1339 // icmp slt i32 %narrow, %val ... (A) 1340 // 1341 // and %narrow is always positive. Then 1342 // 1343 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1344 // == icmp slt i32 zext(%narrow), sext(%val) 1345 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1346 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1347 return false; 1348 1349 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1350 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1351 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1352 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1353 1354 // Widen the compare instruction. 1355 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1356 if (!InsertPt) 1357 return false; 1358 IRBuilder<> Builder(InsertPt); 1359 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1360 1361 // Widen the other operand of the compare, if necessary. 1362 if (CastWidth < IVWidth) { 1363 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1364 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1365 } 1366 return true; 1367 } 1368 1369 /// If the narrow use is an instruction whose two operands are the defining 1370 /// instruction of DU and a load instruction, then we have the following: 1371 /// if the load is hoisted outside the loop, then we do not reach this function 1372 /// as scalar evolution analysis works fine in widenIVUse with variables 1373 /// hoisted outside the loop and efficient code is subsequently generated by 1374 /// not emitting truncate instructions. But when the load is not hoisted 1375 /// (whether due to limitation in alias analysis or due to a true legality), 1376 /// then scalar evolution can not proceed with loop variant values and 1377 /// inefficient code is generated. This function handles the non-hoisted load 1378 /// special case by making the optimization generate the same type of code for 1379 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1380 /// instruction). This special case is important especially when the induction 1381 /// variables are affecting addressing mode in code generation. 1382 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1383 Instruction *NarrowUse = DU.NarrowUse; 1384 Instruction *NarrowDef = DU.NarrowDef; 1385 Instruction *WideDef = DU.WideDef; 1386 1387 // Handle the common case of add<nsw/nuw> 1388 const unsigned OpCode = NarrowUse->getOpcode(); 1389 // Only Add/Sub/Mul instructions are supported. 1390 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1391 OpCode != Instruction::Mul) 1392 return false; 1393 1394 // The operand that is not defined by NarrowDef of DU. Let's call it the 1395 // other operand. 1396 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1397 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1398 "bad DU"); 1399 1400 const SCEV *ExtendOperExpr = nullptr; 1401 const OverflowingBinaryOperator *OBO = 1402 cast<OverflowingBinaryOperator>(NarrowUse); 1403 ExtendKind ExtKind = getExtendKind(NarrowDef); 1404 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1405 ExtendOperExpr = SE->getSignExtendExpr( 1406 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1407 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1408 ExtendOperExpr = SE->getZeroExtendExpr( 1409 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1410 else 1411 return false; 1412 1413 // We are interested in the other operand being a load instruction. 1414 // But, we should look into relaxing this restriction later on. 1415 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1416 if (I && I->getOpcode() != Instruction::Load) 1417 return false; 1418 1419 // Verifying that Defining operand is an AddRec 1420 const SCEV *Op1 = SE->getSCEV(WideDef); 1421 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1422 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1423 return false; 1424 // Verifying that other operand is an Extend. 1425 if (ExtKind == SignExtended) { 1426 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1427 return false; 1428 } else { 1429 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1430 return false; 1431 } 1432 1433 if (ExtKind == SignExtended) { 1434 for (Use &U : NarrowUse->uses()) { 1435 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1436 if (!User || User->getType() != WideType) 1437 return false; 1438 } 1439 } else { // ExtKind == ZeroExtended 1440 for (Use &U : NarrowUse->uses()) { 1441 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1442 if (!User || User->getType() != WideType) 1443 return false; 1444 } 1445 } 1446 1447 return true; 1448 } 1449 1450 /// Special Case for widening with variant Loads (see 1451 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1452 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1453 Instruction *NarrowUse = DU.NarrowUse; 1454 Instruction *NarrowDef = DU.NarrowDef; 1455 Instruction *WideDef = DU.WideDef; 1456 1457 ExtendKind ExtKind = getExtendKind(NarrowDef); 1458 1459 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1460 1461 // Generating a widening use instruction. 1462 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1463 ? WideDef 1464 : createExtendInst(NarrowUse->getOperand(0), WideType, 1465 ExtKind, NarrowUse); 1466 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1467 ? WideDef 1468 : createExtendInst(NarrowUse->getOperand(1), WideType, 1469 ExtKind, NarrowUse); 1470 1471 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1472 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1473 NarrowBO->getName()); 1474 IRBuilder<> Builder(NarrowUse); 1475 Builder.Insert(WideBO); 1476 WideBO->copyIRFlags(NarrowBO); 1477 1478 if (ExtKind == SignExtended) 1479 ExtendKindMap[NarrowUse] = SignExtended; 1480 else 1481 ExtendKindMap[NarrowUse] = ZeroExtended; 1482 1483 // Update the Use. 1484 if (ExtKind == SignExtended) { 1485 for (Use &U : NarrowUse->uses()) { 1486 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1487 if (User && User->getType() == WideType) { 1488 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1489 << *WideBO << "\n"); 1490 ++NumElimExt; 1491 User->replaceAllUsesWith(WideBO); 1492 DeadInsts.emplace_back(User); 1493 } 1494 } 1495 } else { // ExtKind == ZeroExtended 1496 for (Use &U : NarrowUse->uses()) { 1497 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1498 if (User && User->getType() == WideType) { 1499 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1500 << *WideBO << "\n"); 1501 ++NumElimExt; 1502 User->replaceAllUsesWith(WideBO); 1503 DeadInsts.emplace_back(User); 1504 } 1505 } 1506 } 1507 } 1508 1509 /// Determine whether an individual user of the narrow IV can be widened. If so, 1510 /// return the wide clone of the user. 1511 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1512 assert(ExtendKindMap.count(DU.NarrowDef) && 1513 "Should already know the kind of extension used to widen NarrowDef"); 1514 1515 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1516 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1517 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1518 // For LCSSA phis, sink the truncate outside the loop. 1519 // After SimplifyCFG most loop exit targets have a single predecessor. 1520 // Otherwise fall back to a truncate within the loop. 1521 if (UsePhi->getNumOperands() != 1) 1522 truncateIVUse(DU, DT, LI); 1523 else { 1524 // Widening the PHI requires us to insert a trunc. The logical place 1525 // for this trunc is in the same BB as the PHI. This is not possible if 1526 // the BB is terminated by a catchswitch. 1527 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1528 return nullptr; 1529 1530 PHINode *WidePhi = 1531 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1532 UsePhi); 1533 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1534 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1535 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1536 UsePhi->replaceAllUsesWith(Trunc); 1537 DeadInsts.emplace_back(UsePhi); 1538 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1539 << *WidePhi << "\n"); 1540 } 1541 return nullptr; 1542 } 1543 } 1544 1545 // This narrow use can be widened by a sext if it's non-negative or its narrow 1546 // def was widended by a sext. Same for zext. 1547 auto canWidenBySExt = [&]() { 1548 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1549 }; 1550 auto canWidenByZExt = [&]() { 1551 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1552 }; 1553 1554 // Our raison d'etre! Eliminate sign and zero extension. 1555 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1556 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1557 Value *NewDef = DU.WideDef; 1558 if (DU.NarrowUse->getType() != WideType) { 1559 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1560 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1561 if (CastWidth < IVWidth) { 1562 // The cast isn't as wide as the IV, so insert a Trunc. 1563 IRBuilder<> Builder(DU.NarrowUse); 1564 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1565 } 1566 else { 1567 // A wider extend was hidden behind a narrower one. This may induce 1568 // another round of IV widening in which the intermediate IV becomes 1569 // dead. It should be very rare. 1570 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1571 << " not wide enough to subsume " << *DU.NarrowUse 1572 << "\n"); 1573 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1574 NewDef = DU.NarrowUse; 1575 } 1576 } 1577 if (NewDef != DU.NarrowUse) { 1578 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1579 << " replaced by " << *DU.WideDef << "\n"); 1580 ++NumElimExt; 1581 DU.NarrowUse->replaceAllUsesWith(NewDef); 1582 DeadInsts.emplace_back(DU.NarrowUse); 1583 } 1584 // Now that the extend is gone, we want to expose it's uses for potential 1585 // further simplification. We don't need to directly inform SimplifyIVUsers 1586 // of the new users, because their parent IV will be processed later as a 1587 // new loop phi. If we preserved IVUsers analysis, we would also want to 1588 // push the uses of WideDef here. 1589 1590 // No further widening is needed. The deceased [sz]ext had done it for us. 1591 return nullptr; 1592 } 1593 1594 // Does this user itself evaluate to a recurrence after widening? 1595 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1596 if (!WideAddRec.first) 1597 WideAddRec = getWideRecurrence(DU); 1598 1599 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1600 if (!WideAddRec.first) { 1601 // If use is a loop condition, try to promote the condition instead of 1602 // truncating the IV first. 1603 if (widenLoopCompare(DU)) 1604 return nullptr; 1605 1606 // We are here about to generate a truncate instruction that may hurt 1607 // performance because the scalar evolution expression computed earlier 1608 // in WideAddRec.first does not indicate a polynomial induction expression. 1609 // In that case, look at the operands of the use instruction to determine 1610 // if we can still widen the use instead of truncating its operand. 1611 if (widenWithVariantLoadUse(DU)) { 1612 widenWithVariantLoadUseCodegen(DU); 1613 return nullptr; 1614 } 1615 1616 // This user does not evaluate to a recurrence after widening, so don't 1617 // follow it. Instead insert a Trunc to kill off the original use, 1618 // eventually isolating the original narrow IV so it can be removed. 1619 truncateIVUse(DU, DT, LI); 1620 return nullptr; 1621 } 1622 // Assume block terminators cannot evaluate to a recurrence. We can't to 1623 // insert a Trunc after a terminator if there happens to be a critical edge. 1624 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1625 "SCEV is not expected to evaluate a block terminator"); 1626 1627 // Reuse the IV increment that SCEVExpander created as long as it dominates 1628 // NarrowUse. 1629 Instruction *WideUse = nullptr; 1630 if (WideAddRec.first == WideIncExpr && 1631 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1632 WideUse = WideInc; 1633 else { 1634 WideUse = cloneIVUser(DU, WideAddRec.first); 1635 if (!WideUse) 1636 return nullptr; 1637 } 1638 // Evaluation of WideAddRec ensured that the narrow expression could be 1639 // extended outside the loop without overflow. This suggests that the wide use 1640 // evaluates to the same expression as the extended narrow use, but doesn't 1641 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1642 // where it fails, we simply throw away the newly created wide use. 1643 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1644 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1645 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1646 << "\n"); 1647 DeadInsts.emplace_back(WideUse); 1648 return nullptr; 1649 } 1650 1651 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1652 // Returning WideUse pushes it on the worklist. 1653 return WideUse; 1654 } 1655 1656 /// Add eligible users of NarrowDef to NarrowIVUsers. 1657 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1658 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1659 bool NonNegativeDef = 1660 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1661 SE->getConstant(NarrowSCEV->getType(), 0)); 1662 for (User *U : NarrowDef->users()) { 1663 Instruction *NarrowUser = cast<Instruction>(U); 1664 1665 // Handle data flow merges and bizarre phi cycles. 1666 if (!Widened.insert(NarrowUser).second) 1667 continue; 1668 1669 bool NonNegativeUse = false; 1670 if (!NonNegativeDef) { 1671 // We might have a control-dependent range information for this context. 1672 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1673 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1674 } 1675 1676 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1677 NonNegativeDef || NonNegativeUse); 1678 } 1679 } 1680 1681 /// Process a single induction variable. First use the SCEVExpander to create a 1682 /// wide induction variable that evaluates to the same recurrence as the 1683 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1684 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1685 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1686 /// 1687 /// It would be simpler to delete uses as they are processed, but we must avoid 1688 /// invalidating SCEV expressions. 1689 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1690 // Is this phi an induction variable? 1691 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1692 if (!AddRec) 1693 return nullptr; 1694 1695 // Widen the induction variable expression. 1696 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1697 ? SE->getSignExtendExpr(AddRec, WideType) 1698 : SE->getZeroExtendExpr(AddRec, WideType); 1699 1700 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1701 "Expect the new IV expression to preserve its type"); 1702 1703 // Can the IV be extended outside the loop without overflow? 1704 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1705 if (!AddRec || AddRec->getLoop() != L) 1706 return nullptr; 1707 1708 // An AddRec must have loop-invariant operands. Since this AddRec is 1709 // materialized by a loop header phi, the expression cannot have any post-loop 1710 // operands, so they must dominate the loop header. 1711 assert( 1712 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1713 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1714 "Loop header phi recurrence inputs do not dominate the loop"); 1715 1716 // Iterate over IV uses (including transitive ones) looking for IV increments 1717 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1718 // the increment calculate control-dependent range information basing on 1719 // dominating conditions inside of the loop (e.g. a range check inside of the 1720 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1721 // 1722 // Control-dependent range information is later used to prove that a narrow 1723 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1724 // this on demand because when pushNarrowIVUsers needs this information some 1725 // of the dominating conditions might be already widened. 1726 if (UsePostIncrementRanges) 1727 calculatePostIncRanges(OrigPhi); 1728 1729 // The rewriter provides a value for the desired IV expression. This may 1730 // either find an existing phi or materialize a new one. Either way, we 1731 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1732 // of the phi-SCC dominates the loop entry. 1733 Instruction *InsertPt = &L->getHeader()->front(); 1734 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1735 1736 // Remembering the WideIV increment generated by SCEVExpander allows 1737 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1738 // employ a general reuse mechanism because the call above is the only call to 1739 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1740 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1741 WideInc = 1742 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1743 WideIncExpr = SE->getSCEV(WideInc); 1744 // Propagate the debug location associated with the original loop increment 1745 // to the new (widened) increment. 1746 auto *OrigInc = 1747 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1748 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1749 } 1750 1751 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1752 ++NumWidened; 1753 1754 // Traverse the def-use chain using a worklist starting at the original IV. 1755 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1756 1757 Widened.insert(OrigPhi); 1758 pushNarrowIVUsers(OrigPhi, WidePhi); 1759 1760 while (!NarrowIVUsers.empty()) { 1761 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1762 1763 // Process a def-use edge. This may replace the use, so don't hold a 1764 // use_iterator across it. 1765 Instruction *WideUse = widenIVUse(DU, Rewriter); 1766 1767 // Follow all def-use edges from the previous narrow use. 1768 if (WideUse) 1769 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1770 1771 // widenIVUse may have removed the def-use edge. 1772 if (DU.NarrowDef->use_empty()) 1773 DeadInsts.emplace_back(DU.NarrowDef); 1774 } 1775 1776 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1777 // evaluate the same recurrence, we can just copy the debug info over. 1778 SmallVector<DbgValueInst *, 1> DbgValues; 1779 llvm::findDbgValues(DbgValues, OrigPhi); 1780 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1781 ValueAsMetadata::get(WidePhi)); 1782 for (auto &DbgValue : DbgValues) 1783 DbgValue->setOperand(0, MDPhi); 1784 return WidePhi; 1785 } 1786 1787 /// Calculates control-dependent range for the given def at the given context 1788 /// by looking at dominating conditions inside of the loop 1789 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1790 Instruction *NarrowUser) { 1791 using namespace llvm::PatternMatch; 1792 1793 Value *NarrowDefLHS; 1794 const APInt *NarrowDefRHS; 1795 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1796 m_APInt(NarrowDefRHS))) || 1797 !NarrowDefRHS->isNonNegative()) 1798 return; 1799 1800 auto UpdateRangeFromCondition = [&] (Value *Condition, 1801 bool TrueDest) { 1802 CmpInst::Predicate Pred; 1803 Value *CmpRHS; 1804 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1805 m_Value(CmpRHS)))) 1806 return; 1807 1808 CmpInst::Predicate P = 1809 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1810 1811 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1812 auto CmpConstrainedLHSRange = 1813 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1814 auto NarrowDefRange = 1815 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1816 1817 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1818 }; 1819 1820 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1821 if (!HasGuards) 1822 return; 1823 1824 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1825 Ctx->getParent()->rend())) { 1826 Value *C = nullptr; 1827 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1828 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1829 } 1830 }; 1831 1832 UpdateRangeFromGuards(NarrowUser); 1833 1834 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1835 // If NarrowUserBB is statically unreachable asking dominator queries may 1836 // yield surprising results. (e.g. the block may not have a dom tree node) 1837 if (!DT->isReachableFromEntry(NarrowUserBB)) 1838 return; 1839 1840 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1841 L->contains(DTB->getBlock()); 1842 DTB = DTB->getIDom()) { 1843 auto *BB = DTB->getBlock(); 1844 auto *TI = BB->getTerminator(); 1845 UpdateRangeFromGuards(TI); 1846 1847 auto *BI = dyn_cast<BranchInst>(TI); 1848 if (!BI || !BI->isConditional()) 1849 continue; 1850 1851 auto *TrueSuccessor = BI->getSuccessor(0); 1852 auto *FalseSuccessor = BI->getSuccessor(1); 1853 1854 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1855 return BBE.isSingleEdge() && 1856 DT->dominates(BBE, NarrowUser->getParent()); 1857 }; 1858 1859 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1860 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1861 1862 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1863 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1864 } 1865 } 1866 1867 /// Calculates PostIncRangeInfos map for the given IV 1868 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1869 SmallPtrSet<Instruction *, 16> Visited; 1870 SmallVector<Instruction *, 6> Worklist; 1871 Worklist.push_back(OrigPhi); 1872 Visited.insert(OrigPhi); 1873 1874 while (!Worklist.empty()) { 1875 Instruction *NarrowDef = Worklist.pop_back_val(); 1876 1877 for (Use &U : NarrowDef->uses()) { 1878 auto *NarrowUser = cast<Instruction>(U.getUser()); 1879 1880 // Don't go looking outside the current loop. 1881 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1882 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1883 continue; 1884 1885 if (!Visited.insert(NarrowUser).second) 1886 continue; 1887 1888 Worklist.push_back(NarrowUser); 1889 1890 calculatePostIncRange(NarrowDef, NarrowUser); 1891 } 1892 } 1893 } 1894 1895 //===----------------------------------------------------------------------===// 1896 // Live IV Reduction - Minimize IVs live across the loop. 1897 //===----------------------------------------------------------------------===// 1898 1899 //===----------------------------------------------------------------------===// 1900 // Simplification of IV users based on SCEV evaluation. 1901 //===----------------------------------------------------------------------===// 1902 1903 namespace { 1904 1905 class IndVarSimplifyVisitor : public IVVisitor { 1906 ScalarEvolution *SE; 1907 const TargetTransformInfo *TTI; 1908 PHINode *IVPhi; 1909 1910 public: 1911 WideIVInfo WI; 1912 1913 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1914 const TargetTransformInfo *TTI, 1915 const DominatorTree *DTree) 1916 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1917 DT = DTree; 1918 WI.NarrowIV = IVPhi; 1919 } 1920 1921 // Implement the interface used by simplifyUsersOfIV. 1922 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1923 }; 1924 1925 } // end anonymous namespace 1926 1927 /// Iteratively perform simplification on a worklist of IV users. Each 1928 /// successive simplification may push more users which may themselves be 1929 /// candidates for simplification. 1930 /// 1931 /// Sign/Zero extend elimination is interleaved with IV simplification. 1932 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1933 SCEVExpander &Rewriter, 1934 LoopInfo *LI) { 1935 SmallVector<WideIVInfo, 8> WideIVs; 1936 1937 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1938 Intrinsic::getName(Intrinsic::experimental_guard)); 1939 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1940 1941 SmallVector<PHINode*, 8> LoopPhis; 1942 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1943 LoopPhis.push_back(cast<PHINode>(I)); 1944 } 1945 // Each round of simplification iterates through the SimplifyIVUsers worklist 1946 // for all current phis, then determines whether any IVs can be 1947 // widened. Widening adds new phis to LoopPhis, inducing another round of 1948 // simplification on the wide IVs. 1949 bool Changed = false; 1950 while (!LoopPhis.empty()) { 1951 // Evaluate as many IV expressions as possible before widening any IVs. This 1952 // forces SCEV to set no-wrap flags before evaluating sign/zero 1953 // extension. The first time SCEV attempts to normalize sign/zero extension, 1954 // the result becomes final. So for the most predictable results, we delay 1955 // evaluation of sign/zero extend evaluation until needed, and avoid running 1956 // other SCEV based analysis prior to simplifyAndExtend. 1957 do { 1958 PHINode *CurrIV = LoopPhis.pop_back_val(); 1959 1960 // Information about sign/zero extensions of CurrIV. 1961 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1962 1963 Changed |= 1964 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1965 1966 if (Visitor.WI.WidestNativeType) { 1967 WideIVs.push_back(Visitor.WI); 1968 } 1969 } while(!LoopPhis.empty()); 1970 1971 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1972 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1973 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1974 Changed = true; 1975 LoopPhis.push_back(WidePhi); 1976 } 1977 } 1978 } 1979 return Changed; 1980 } 1981 1982 //===----------------------------------------------------------------------===// 1983 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1984 //===----------------------------------------------------------------------===// 1985 1986 /// Given an Value which is hoped to be part of an add recurance in the given 1987 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1988 /// that this is less general than SCEVs AddRec checking. 1989 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1990 Instruction *IncI = dyn_cast<Instruction>(IncV); 1991 if (!IncI) 1992 return nullptr; 1993 1994 switch (IncI->getOpcode()) { 1995 case Instruction::Add: 1996 case Instruction::Sub: 1997 break; 1998 case Instruction::GetElementPtr: 1999 // An IV counter must preserve its type. 2000 if (IncI->getNumOperands() == 2) 2001 break; 2002 LLVM_FALLTHROUGH; 2003 default: 2004 return nullptr; 2005 } 2006 2007 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2008 if (Phi && Phi->getParent() == L->getHeader()) { 2009 if (L->isLoopInvariant(IncI->getOperand(1))) 2010 return Phi; 2011 return nullptr; 2012 } 2013 if (IncI->getOpcode() == Instruction::GetElementPtr) 2014 return nullptr; 2015 2016 // Allow add/sub to be commuted. 2017 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2018 if (Phi && Phi->getParent() == L->getHeader()) { 2019 if (L->isLoopInvariant(IncI->getOperand(0))) 2020 return Phi; 2021 } 2022 return nullptr; 2023 } 2024 2025 /// Given a loop with one backedge and one exit, return the ICmpInst 2026 /// controlling the sole loop exit. There is no guarantee that the exiting 2027 /// block is also the latch. 2028 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) { 2029 2030 BasicBlock *LatchBlock = L->getLoopLatch(); 2031 // Don't bother with LFTR if the loop is not properly simplified. 2032 if (!LatchBlock) 2033 return nullptr; 2034 2035 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2036 assert(BI && "expected exit branch"); 2037 2038 return dyn_cast<ICmpInst>(BI->getCondition()); 2039 } 2040 2041 /// linearFunctionTestReplace policy. Return true unless we can show that the 2042 /// current exit test is already sufficiently canonical. 2043 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2044 // Do LFTR to simplify the exit condition to an ICMP. 2045 ICmpInst *Cond = getLoopTest(L, ExitingBB); 2046 if (!Cond) 2047 return true; 2048 2049 // Do LFTR to simplify the exit ICMP to EQ/NE 2050 ICmpInst::Predicate Pred = Cond->getPredicate(); 2051 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2052 return true; 2053 2054 // Look for a loop invariant RHS 2055 Value *LHS = Cond->getOperand(0); 2056 Value *RHS = Cond->getOperand(1); 2057 if (!L->isLoopInvariant(RHS)) { 2058 if (!L->isLoopInvariant(LHS)) 2059 return true; 2060 std::swap(LHS, RHS); 2061 } 2062 // Look for a simple IV counter LHS 2063 PHINode *Phi = dyn_cast<PHINode>(LHS); 2064 if (!Phi) 2065 Phi = getLoopPhiForCounter(LHS, L); 2066 2067 if (!Phi) 2068 return true; 2069 2070 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2071 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2072 if (Idx < 0) 2073 return true; 2074 2075 // Do LFTR if the exit condition's IV is *not* a simple counter. 2076 Value *IncV = Phi->getIncomingValue(Idx); 2077 return Phi != getLoopPhiForCounter(IncV, L); 2078 } 2079 2080 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2081 /// down to checking that all operands are constant and listing instructions 2082 /// that may hide undef. 2083 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2084 unsigned Depth) { 2085 if (isa<Constant>(V)) 2086 return !isa<UndefValue>(V); 2087 2088 if (Depth >= 6) 2089 return false; 2090 2091 // Conservatively handle non-constant non-instructions. For example, Arguments 2092 // may be undef. 2093 Instruction *I = dyn_cast<Instruction>(V); 2094 if (!I) 2095 return false; 2096 2097 // Load and return values may be undef. 2098 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2099 return false; 2100 2101 // Optimistically handle other instructions. 2102 for (Value *Op : I->operands()) { 2103 if (!Visited.insert(Op).second) 2104 continue; 2105 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2106 return false; 2107 } 2108 return true; 2109 } 2110 2111 /// Return true if the given value is concrete. We must prove that undef can 2112 /// never reach it. 2113 /// 2114 /// TODO: If we decide that this is a good approach to checking for undef, we 2115 /// may factor it into a common location. 2116 static bool hasConcreteDef(Value *V) { 2117 SmallPtrSet<Value*, 8> Visited; 2118 Visited.insert(V); 2119 return hasConcreteDefImpl(V, Visited, 0); 2120 } 2121 2122 /// Return true if this IV has any uses other than the (soon to be rewritten) 2123 /// loop exit test. 2124 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2125 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2126 Value *IncV = Phi->getIncomingValue(LatchIdx); 2127 2128 for (User *U : Phi->users()) 2129 if (U != Cond && U != IncV) return false; 2130 2131 for (User *U : IncV->users()) 2132 if (U != Cond && U != Phi) return false; 2133 return true; 2134 } 2135 2136 /// Return true if the given phi is a "counter" in L. A counter is an 2137 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2138 /// step of 1. Note that L must have exactly one latch. 2139 static bool isLoopCounter(PHINode* Phi, Loop *L, 2140 ScalarEvolution *SE) { 2141 assert(Phi->getParent() == L->getHeader()); 2142 assert(L->getLoopLatch()); 2143 2144 if (!SE->isSCEVable(Phi->getType())) 2145 return false; 2146 2147 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2148 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2149 return false; 2150 2151 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2152 if (!Step || !Step->isOne()) 2153 return false; 2154 2155 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2156 Value *IncV = Phi->getIncomingValue(LatchIdx); 2157 return (getLoopPhiForCounter(IncV, L) == Phi); 2158 } 2159 2160 /// Search the loop header for a loop counter (anadd rec w/step of one) 2161 /// suitable for use by LFTR. If multiple counters are available, select the 2162 /// "best" one based profitable heuristics. 2163 /// 2164 /// BECount may be an i8* pointer type. The pointer difference is already 2165 /// valid count without scaling the address stride, so it remains a pointer 2166 /// expression as far as SCEV is concerned. 2167 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2168 const SCEV *BECount, ScalarEvolution *SE) { 2169 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2170 2171 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2172 2173 // Loop over all of the PHI nodes, looking for a simple counter. 2174 PHINode *BestPhi = nullptr; 2175 const SCEV *BestInit = nullptr; 2176 BasicBlock *LatchBlock = L->getLoopLatch(); 2177 assert(LatchBlock && "needsLFTR should guarantee a loop latch"); 2178 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2179 2180 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2181 PHINode *Phi = cast<PHINode>(I); 2182 if (!isLoopCounter(Phi, L, SE)) 2183 continue; 2184 2185 // Avoid comparing an integer IV against a pointer Limit. 2186 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2187 continue; 2188 2189 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2190 2191 // AR may be a pointer type, while BECount is an integer type. 2192 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2193 // AR may not be a narrower type, or we may never exit. 2194 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2195 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2196 continue; 2197 2198 // Avoid reusing a potentially undef value to compute other values that may 2199 // have originally had a concrete definition. 2200 if (!hasConcreteDef(Phi)) { 2201 // We explicitly allow unknown phis as long as they are already used by 2202 // the loop test. In this case we assume that performing LFTR could not 2203 // increase the number of undef users. 2204 // TODO: Generalize this to allow *any* loop exit which is known to 2205 // execute on each iteration 2206 if (L->getExitingBlock()) 2207 if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) 2208 if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && 2209 Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) 2210 continue; 2211 } 2212 2213 const SCEV *Init = AR->getStart(); 2214 2215 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2216 // Don't force a live loop counter if another IV can be used. 2217 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2218 continue; 2219 2220 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2221 // also prefers integer to pointer IVs. 2222 if (BestInit->isZero() != Init->isZero()) { 2223 if (BestInit->isZero()) 2224 continue; 2225 } 2226 // If two IVs both count from zero or both count from nonzero then the 2227 // narrower is likely a dead phi that has been widened. Use the wider phi 2228 // to allow the other to be eliminated. 2229 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2230 continue; 2231 } 2232 BestPhi = Phi; 2233 BestInit = Init; 2234 } 2235 return BestPhi; 2236 } 2237 2238 /// Insert an IR expression which computes the value held by the IV IndVar 2239 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2240 /// is taken IVCount times. 2241 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2242 const SCEV *IVCount, Loop *L, 2243 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2244 assert(isLoopCounter(IndVar, L, SE)); 2245 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2246 const SCEV *IVInit = AR->getStart(); 2247 2248 // IVInit may be a pointer while IVCount is an integer when FindLoopCounter 2249 // finds a valid pointer IV. Sign extend BECount in order to materialize a 2250 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2251 // the existing GEPs whenever possible. 2252 if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) { 2253 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2254 // signed value. IVCount on the other hand represents the loop trip count, 2255 // which is an unsigned value. FindLoopCounter only allows induction 2256 // variables that have a positive unit stride of one. This means we don't 2257 // have to handle the case of negative offsets (yet) and just need to zero 2258 // extend IVCount. 2259 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2260 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy); 2261 2262 // Expand the code for the iteration count. 2263 assert(SE->isLoopInvariant(IVOffset, L) && 2264 "Computed iteration count is not loop invariant!"); 2265 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2266 Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); 2267 2268 Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); 2269 assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter"); 2270 // We could handle pointer IVs other than i8*, but we need to compensate for 2271 // gep index scaling. 2272 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2273 cast<PointerType>(GEPBase->getType()) 2274 ->getElementType())->isOne() && 2275 "unit stride pointer IV must be i8*"); 2276 2277 IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); 2278 return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(), 2279 GEPBase, GEPOffset, "lftr.limit"); 2280 } else { 2281 // In any other case, convert both IVInit and IVCount to integers before 2282 // comparing. This may result in SCEV expansion of pointers, but in practice 2283 // SCEV will fold the pointer arithmetic away as such: 2284 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2285 // 2286 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2287 // for simple memset-style loops. 2288 // 2289 // IVInit integer and IVCount pointer would only occur if a canonical IV 2290 // were generated on top of case #2, which is not expected. 2291 2292 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2293 const SCEV *IVLimit = nullptr; 2294 // For unit stride, IVCount = Start + BECount with 2's complement overflow. 2295 // For non-zero Start, compute IVCount here. 2296 if (AR->getStart()->isZero()) 2297 IVLimit = IVCount; 2298 else { 2299 const SCEV *IVInit = AR->getStart(); 2300 2301 // For integer IVs, truncate the IV before computing IVInit + BECount. 2302 if (SE->getTypeSizeInBits(IVInit->getType()) 2303 > SE->getTypeSizeInBits(IVCount->getType())) 2304 IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); 2305 2306 IVLimit = SE->getAddExpr(IVInit, IVCount); 2307 } 2308 // Expand the code for the iteration count. 2309 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2310 IRBuilder<> Builder(BI); 2311 assert(SE->isLoopInvariant(IVLimit, L) && 2312 "Computed iteration count is not loop invariant!"); 2313 // Ensure that we generate the same type as IndVar, or a smaller integer 2314 // type. In the presence of null pointer values, we have an integer type 2315 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2316 Type *LimitTy = IVCount->getType()->isPointerTy() ? 2317 IndVar->getType() : IVCount->getType(); 2318 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2319 } 2320 } 2321 2322 /// This method rewrites the exit condition of the loop to be a canonical != 2323 /// comparison against the incremented loop induction variable. This pass is 2324 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2325 /// determine a loop-invariant trip count of the loop, which is actually a much 2326 /// broader range than just linear tests. 2327 bool IndVarSimplify:: 2328 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2329 const SCEV *BackedgeTakenCount, 2330 PHINode *IndVar, SCEVExpander &Rewriter) { 2331 // Initialize CmpIndVar and IVCount to their preincremented values. 2332 Value *CmpIndVar = IndVar; 2333 const SCEV *IVCount = BackedgeTakenCount; 2334 2335 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2336 2337 // If the exiting block is the same as the backedge block, we prefer to 2338 // compare against the post-incremented value, otherwise we must compare 2339 // against the preincremented value. 2340 if (ExitingBB == L->getLoopLatch()) { 2341 // Add one to the "backedge-taken" count to get the trip count. 2342 // This addition may overflow, which is valid as long as the comparison is 2343 // truncated to BackedgeTakenCount->getType(). 2344 IVCount = SE->getAddExpr(BackedgeTakenCount, 2345 SE->getOne(BackedgeTakenCount->getType())); 2346 // The BackedgeTaken expression contains the number of times that the 2347 // backedge branches to the loop header. This is one less than the 2348 // number of times the loop executes, so use the incremented indvar. 2349 CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB); 2350 } 2351 2352 // It may be necessary to drop nowrap flags on the incrementing instruction 2353 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2354 // case the increment might have previously been poison on the last iteration 2355 // only) or if LFTR switches to a different IV that was previously dynamically 2356 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2357 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2358 // check), because the pre-inc addrec flags may be adopted from the original 2359 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2360 // TODO: This handling is inaccurate for one case: If we switch to a 2361 // dynamically dead IV that wraps on the first loop iteration only, which is 2362 // not covered by the post-inc addrec. (If the new IV was not dynamically 2363 // dead, it could not be poison on the first iteration in the first place.) 2364 Value *IncVar = IndVar->getIncomingValueForBlock(L->getLoopLatch()); 2365 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2366 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2367 if (BO->hasNoUnsignedWrap()) 2368 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2369 if (BO->hasNoSignedWrap()) 2370 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2371 } 2372 2373 Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE); 2374 assert(ExitCnt->getType()->isPointerTy() == 2375 IndVar->getType()->isPointerTy() && 2376 "genLoopLimit missed a cast"); 2377 2378 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2379 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2380 ICmpInst::Predicate P; 2381 if (L->contains(BI->getSuccessor(0))) 2382 P = ICmpInst::ICMP_NE; 2383 else 2384 P = ICmpInst::ICMP_EQ; 2385 2386 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2387 << " LHS:" << *CmpIndVar << '\n' 2388 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2389 << "\n" 2390 << " RHS:\t" << *ExitCnt << "\n" 2391 << " IVCount:\t" << *IVCount << "\n"); 2392 2393 IRBuilder<> Builder(BI); 2394 2395 // The new loop exit condition should reuse the debug location of the 2396 // original loop exit condition. 2397 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2398 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2399 2400 // LFTR can ignore IV overflow and truncate to the width of 2401 // BECount. This avoids materializing the add(zext(add)) expression. 2402 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2403 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2404 if (CmpIndVarSize > ExitCntSize) { 2405 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2406 const SCEV *ARStart = AR->getStart(); 2407 const SCEV *ARStep = AR->getStepRecurrence(*SE); 2408 // For constant IVCount, avoid truncation. 2409 if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) { 2410 const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt(); 2411 APInt Count = cast<SCEVConstant>(IVCount)->getAPInt(); 2412 // Note that the post-inc value of BackedgeTakenCount may have overflowed 2413 // above such that IVCount is now zero. 2414 if (IVCount != BackedgeTakenCount && Count == 0) { 2415 Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize); 2416 ++Count; 2417 } 2418 else 2419 Count = Count.zext(CmpIndVarSize); 2420 APInt NewLimit; 2421 if (cast<SCEVConstant>(ARStep)->getValue()->isNegative()) 2422 NewLimit = Start - Count; 2423 else 2424 NewLimit = Start + Count; 2425 ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit); 2426 2427 LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); 2428 } else { 2429 // We try to extend trip count first. If that doesn't work we truncate IV. 2430 // Zext(trunc(IV)) == IV implies equivalence of the following two: 2431 // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If 2432 // one of the two holds, extend the trip count, otherwise we truncate IV. 2433 bool Extended = false; 2434 const SCEV *IV = SE->getSCEV(CmpIndVar); 2435 const SCEV *ZExtTrunc = 2436 SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2437 ExitCnt->getType()), 2438 CmpIndVar->getType()); 2439 2440 if (ZExtTrunc == IV) { 2441 Extended = true; 2442 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2443 "wide.trip.count"); 2444 } else { 2445 const SCEV *SExtTrunc = 2446 SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2447 ExitCnt->getType()), 2448 CmpIndVar->getType()); 2449 if (SExtTrunc == IV) { 2450 Extended = true; 2451 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2452 "wide.trip.count"); 2453 } 2454 } 2455 2456 if (!Extended) 2457 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2458 "lftr.wideiv"); 2459 } 2460 } 2461 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2462 Value *OrigCond = BI->getCondition(); 2463 // It's tempting to use replaceAllUsesWith here to fully replace the old 2464 // comparison, but that's not immediately safe, since users of the old 2465 // comparison may not be dominated by the new comparison. Instead, just 2466 // update the branch to use the new comparison; in the common case this 2467 // will make old comparison dead. 2468 BI->setCondition(Cond); 2469 DeadInsts.push_back(OrigCond); 2470 2471 ++NumLFTR; 2472 return true; 2473 } 2474 2475 //===----------------------------------------------------------------------===// 2476 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2477 //===----------------------------------------------------------------------===// 2478 2479 /// If there's a single exit block, sink any loop-invariant values that 2480 /// were defined in the preheader but not used inside the loop into the 2481 /// exit block to reduce register pressure in the loop. 2482 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2483 BasicBlock *ExitBlock = L->getExitBlock(); 2484 if (!ExitBlock) return false; 2485 2486 BasicBlock *Preheader = L->getLoopPreheader(); 2487 if (!Preheader) return false; 2488 2489 bool MadeAnyChanges = false; 2490 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2491 BasicBlock::iterator I(Preheader->getTerminator()); 2492 while (I != Preheader->begin()) { 2493 --I; 2494 // New instructions were inserted at the end of the preheader. 2495 if (isa<PHINode>(I)) 2496 break; 2497 2498 // Don't move instructions which might have side effects, since the side 2499 // effects need to complete before instructions inside the loop. Also don't 2500 // move instructions which might read memory, since the loop may modify 2501 // memory. Note that it's okay if the instruction might have undefined 2502 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2503 // block. 2504 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2505 continue; 2506 2507 // Skip debug info intrinsics. 2508 if (isa<DbgInfoIntrinsic>(I)) 2509 continue; 2510 2511 // Skip eh pad instructions. 2512 if (I->isEHPad()) 2513 continue; 2514 2515 // Don't sink alloca: we never want to sink static alloca's out of the 2516 // entry block, and correctly sinking dynamic alloca's requires 2517 // checks for stacksave/stackrestore intrinsics. 2518 // FIXME: Refactor this check somehow? 2519 if (isa<AllocaInst>(I)) 2520 continue; 2521 2522 // Determine if there is a use in or before the loop (direct or 2523 // otherwise). 2524 bool UsedInLoop = false; 2525 for (Use &U : I->uses()) { 2526 Instruction *User = cast<Instruction>(U.getUser()); 2527 BasicBlock *UseBB = User->getParent(); 2528 if (PHINode *P = dyn_cast<PHINode>(User)) { 2529 unsigned i = 2530 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2531 UseBB = P->getIncomingBlock(i); 2532 } 2533 if (UseBB == Preheader || L->contains(UseBB)) { 2534 UsedInLoop = true; 2535 break; 2536 } 2537 } 2538 2539 // If there is, the def must remain in the preheader. 2540 if (UsedInLoop) 2541 continue; 2542 2543 // Otherwise, sink it to the exit block. 2544 Instruction *ToMove = &*I; 2545 bool Done = false; 2546 2547 if (I != Preheader->begin()) { 2548 // Skip debug info intrinsics. 2549 do { 2550 --I; 2551 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2552 2553 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2554 Done = true; 2555 } else { 2556 Done = true; 2557 } 2558 2559 MadeAnyChanges = true; 2560 ToMove->moveBefore(*ExitBlock, InsertPt); 2561 if (Done) break; 2562 InsertPt = ToMove->getIterator(); 2563 } 2564 2565 return MadeAnyChanges; 2566 } 2567 2568 //===----------------------------------------------------------------------===// 2569 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2570 //===----------------------------------------------------------------------===// 2571 2572 bool IndVarSimplify::run(Loop *L) { 2573 // We need (and expect!) the incoming loop to be in LCSSA. 2574 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2575 "LCSSA required to run indvars!"); 2576 bool Changed = false; 2577 2578 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2579 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2580 // canonicalization can be a pessimization without LSR to "clean up" 2581 // afterwards. 2582 // - We depend on having a preheader; in particular, 2583 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2584 // and we're in trouble if we can't find the induction variable even when 2585 // we've manually inserted one. 2586 // - LFTR relies on having a single backedge. 2587 if (!L->isLoopSimplifyForm()) 2588 return false; 2589 2590 // If there are any floating-point recurrences, attempt to 2591 // transform them to use integer recurrences. 2592 Changed |= rewriteNonIntegerIVs(L); 2593 2594 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2595 2596 // Create a rewriter object which we'll use to transform the code with. 2597 SCEVExpander Rewriter(*SE, DL, "indvars"); 2598 #ifndef NDEBUG 2599 Rewriter.setDebugType(DEBUG_TYPE); 2600 #endif 2601 2602 // Eliminate redundant IV users. 2603 // 2604 // Simplification works best when run before other consumers of SCEV. We 2605 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2606 // other expressions involving loop IVs have been evaluated. This helps SCEV 2607 // set no-wrap flags before normalizing sign/zero extension. 2608 Rewriter.disableCanonicalMode(); 2609 Changed |= simplifyAndExtend(L, Rewriter, LI); 2610 2611 // Check to see if this loop has a computable loop-invariant execution count. 2612 // If so, this means that we can compute the final value of any expressions 2613 // that are recurrent in the loop, and substitute the exit values from the 2614 // loop into any instructions outside of the loop that use the final values of 2615 // the current expressions. 2616 // 2617 if (ReplaceExitValue != NeverRepl && 2618 !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 2619 Changed |= rewriteLoopExitValues(L, Rewriter); 2620 2621 // Eliminate redundant IV cycles. 2622 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2623 2624 // If we have a trip count expression, rewrite the loop's exit condition 2625 // using it. 2626 if (!DisableLFTR) { 2627 // For the moment, we only do LFTR for single exit loops. The code is 2628 // structured as it is in the expectation of generalization to multi-exit 2629 // loops in the near future. See D62625 for context. 2630 SmallVector<BasicBlock*, 16> ExitingBlocks; 2631 if (auto *ExitingBB = L->getExitingBlock()) 2632 ExitingBlocks.push_back(ExitingBB); 2633 for (BasicBlock *ExitingBB : ExitingBlocks) { 2634 // Can't rewrite non-branch yet. 2635 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2636 continue; 2637 2638 if (!needsLFTR(L, ExitingBB)) 2639 continue; 2640 2641 const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB); 2642 if (isa<SCEVCouldNotCompute>(BETakenCount)) 2643 continue; 2644 2645 // Better to fold to true (TODO: do so!) 2646 if (BETakenCount->isZero()) 2647 continue; 2648 2649 PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE); 2650 if (!IndVar) 2651 continue; 2652 2653 // Avoid high cost expansions. Note: This heuristic is questionable in 2654 // that our definition of "high cost" is not exactly principled. 2655 if (Rewriter.isHighCostExpansion(BETakenCount, L)) 2656 continue; 2657 2658 // Check preconditions for proper SCEVExpander operation. SCEV does not 2659 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2660 // any pass that uses the SCEVExpander must do it. This does not work 2661 // well for loop passes because SCEVExpander makes assumptions about 2662 // all loops, while LoopPassManager only forces the current loop to be 2663 // simplified. 2664 // 2665 // FIXME: SCEV expansion has no way to bail out, so the caller must 2666 // explicitly check any assumptions made by SCEV. Brittle. 2667 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount); 2668 if (!AR || AR->getLoop()->getLoopPreheader()) 2669 Changed |= linearFunctionTestReplace(L, ExitingBB, 2670 BETakenCount, IndVar, 2671 Rewriter); 2672 } 2673 } 2674 // Clear the rewriter cache, because values that are in the rewriter's cache 2675 // can be deleted in the loop below, causing the AssertingVH in the cache to 2676 // trigger. 2677 Rewriter.clear(); 2678 2679 // Now that we're done iterating through lists, clean up any instructions 2680 // which are now dead. 2681 while (!DeadInsts.empty()) 2682 if (Instruction *Inst = 2683 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2684 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2685 2686 // The Rewriter may not be used from this point on. 2687 2688 // Loop-invariant instructions in the preheader that aren't used in the 2689 // loop may be sunk below the loop to reduce register pressure. 2690 Changed |= sinkUnusedInvariants(L); 2691 2692 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2693 // trip count and therefore can further simplify exit values in addition to 2694 // rewriteLoopExitValues. 2695 Changed |= rewriteFirstIterationLoopExitValues(L); 2696 2697 // Clean up dead instructions. 2698 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2699 2700 // Check a post-condition. 2701 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2702 "Indvars did not preserve LCSSA!"); 2703 2704 // Verify that LFTR, and any other change have not interfered with SCEV's 2705 // ability to compute trip count. 2706 #ifndef NDEBUG 2707 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2708 SE->forgetLoop(L); 2709 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2710 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2711 SE->getTypeSizeInBits(NewBECount->getType())) 2712 NewBECount = SE->getTruncateOrNoop(NewBECount, 2713 BackedgeTakenCount->getType()); 2714 else 2715 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2716 NewBECount->getType()); 2717 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2718 } 2719 #endif 2720 2721 return Changed; 2722 } 2723 2724 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2725 LoopStandardAnalysisResults &AR, 2726 LPMUpdater &) { 2727 Function *F = L.getHeader()->getParent(); 2728 const DataLayout &DL = F->getParent()->getDataLayout(); 2729 2730 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2731 if (!IVS.run(&L)) 2732 return PreservedAnalyses::all(); 2733 2734 auto PA = getLoopPassPreservedAnalyses(); 2735 PA.preserveSet<CFGAnalyses>(); 2736 return PA; 2737 } 2738 2739 namespace { 2740 2741 struct IndVarSimplifyLegacyPass : public LoopPass { 2742 static char ID; // Pass identification, replacement for typeid 2743 2744 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2745 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2746 } 2747 2748 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2749 if (skipLoop(L)) 2750 return false; 2751 2752 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2753 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2754 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2755 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2756 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2757 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2758 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2759 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2760 2761 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2762 return IVS.run(L); 2763 } 2764 2765 void getAnalysisUsage(AnalysisUsage &AU) const override { 2766 AU.setPreservesCFG(); 2767 getLoopAnalysisUsage(AU); 2768 } 2769 }; 2770 2771 } // end anonymous namespace 2772 2773 char IndVarSimplifyLegacyPass::ID = 0; 2774 2775 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2776 "Induction Variable Simplification", false, false) 2777 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2778 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2779 "Induction Variable Simplification", false, false) 2780 2781 Pass *llvm::createIndVarSimplifyPass() { 2782 return new IndVarSimplifyLegacyPass(); 2783 } 2784