1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 namespace { 128 129 struct RewritePhi; 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 139 SmallVector<WeakTrackingVH, 16> DeadInsts; 140 141 bool isValidRewrite(Value *FromVal, Value *ToVal); 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 bool optimizeLoopExits(Loop *L); 148 149 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 150 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 151 bool rewriteFirstIterationLoopExitValues(Loop *L); 152 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 153 154 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 155 const SCEV *ExitCount, 156 PHINode *IndVar, SCEVExpander &Rewriter); 157 158 bool sinkUnusedInvariants(Loop *L); 159 160 public: 161 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 162 const DataLayout &DL, TargetLibraryInfo *TLI, 163 TargetTransformInfo *TTI) 164 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 165 166 bool run(Loop *L); 167 }; 168 169 } // end anonymous namespace 170 171 /// Return true if the SCEV expansion generated by the rewriter can replace the 172 /// original value. SCEV guarantees that it produces the same value, but the way 173 /// it is produced may be illegal IR. Ideally, this function will only be 174 /// called for verification. 175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 176 // If an SCEV expression subsumed multiple pointers, its expansion could 177 // reassociate the GEP changing the base pointer. This is illegal because the 178 // final address produced by a GEP chain must be inbounds relative to its 179 // underlying object. Otherwise basic alias analysis, among other things, 180 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 181 // producing an expression involving multiple pointers. Until then, we must 182 // bail out here. 183 // 184 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 185 // because it understands lcssa phis while SCEV does not. 186 Value *FromPtr = FromVal; 187 Value *ToPtr = ToVal; 188 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 189 FromPtr = GEP->getPointerOperand(); 190 } 191 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 192 ToPtr = GEP->getPointerOperand(); 193 } 194 if (FromPtr != FromVal || ToPtr != ToVal) { 195 // Quickly check the common case 196 if (FromPtr == ToPtr) 197 return true; 198 199 // SCEV may have rewritten an expression that produces the GEP's pointer 200 // operand. That's ok as long as the pointer operand has the same base 201 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 202 // base of a recurrence. This handles the case in which SCEV expansion 203 // converts a pointer type recurrence into a nonrecurrent pointer base 204 // indexed by an integer recurrence. 205 206 // If the GEP base pointer is a vector of pointers, abort. 207 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 208 return false; 209 210 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 211 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 212 if (FromBase == ToBase) 213 return true; 214 215 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 216 << " != " << *ToBase << "\n"); 217 218 return false; 219 } 220 return true; 221 } 222 223 /// Determine the insertion point for this user. By default, insert immediately 224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 226 /// common dominator for the incoming blocks. A nullptr can be returned if no 227 /// viable location is found: it may happen if User is a PHI and Def only comes 228 /// to this PHI from unreachable blocks. 229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 230 DominatorTree *DT, LoopInfo *LI) { 231 PHINode *PHI = dyn_cast<PHINode>(User); 232 if (!PHI) 233 return User; 234 235 Instruction *InsertPt = nullptr; 236 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 237 if (PHI->getIncomingValue(i) != Def) 238 continue; 239 240 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 241 242 if (!DT->isReachableFromEntry(InsertBB)) 243 continue; 244 245 if (!InsertPt) { 246 InsertPt = InsertBB->getTerminator(); 247 continue; 248 } 249 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 250 InsertPt = InsertBB->getTerminator(); 251 } 252 253 // If we have skipped all inputs, it means that Def only comes to Phi from 254 // unreachable blocks. 255 if (!InsertPt) 256 return nullptr; 257 258 auto *DefI = dyn_cast<Instruction>(Def); 259 if (!DefI) 260 return InsertPt; 261 262 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 263 264 auto *L = LI->getLoopFor(DefI->getParent()); 265 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 266 267 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 268 if (LI->getLoopFor(DTN->getBlock()) == L) 269 return DTN->getBlock()->getTerminator(); 270 271 llvm_unreachable("DefI dominates InsertPt!"); 272 } 273 274 //===----------------------------------------------------------------------===// 275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 276 //===----------------------------------------------------------------------===// 277 278 /// Convert APF to an integer, if possible. 279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 280 bool isExact = false; 281 // See if we can convert this to an int64_t 282 uint64_t UIntVal; 283 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 284 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 285 !isExact) 286 return false; 287 IntVal = UIntVal; 288 return true; 289 } 290 291 /// If the loop has floating induction variable then insert corresponding 292 /// integer induction variable if possible. 293 /// For example, 294 /// for(double i = 0; i < 10000; ++i) 295 /// bar(i) 296 /// is converted into 297 /// for(int i = 0; i < 10000; ++i) 298 /// bar((double)i); 299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 300 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 301 unsigned BackEdge = IncomingEdge^1; 302 303 // Check incoming value. 304 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 305 306 int64_t InitValue; 307 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 308 return false; 309 310 // Check IV increment. Reject this PN if increment operation is not 311 // an add or increment value can not be represented by an integer. 312 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 313 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 314 315 // If this is not an add of the PHI with a constantfp, or if the constant fp 316 // is not an integer, bail out. 317 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 318 int64_t IncValue; 319 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 320 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 321 return false; 322 323 // Check Incr uses. One user is PN and the other user is an exit condition 324 // used by the conditional terminator. 325 Value::user_iterator IncrUse = Incr->user_begin(); 326 Instruction *U1 = cast<Instruction>(*IncrUse++); 327 if (IncrUse == Incr->user_end()) return false; 328 Instruction *U2 = cast<Instruction>(*IncrUse++); 329 if (IncrUse != Incr->user_end()) return false; 330 331 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 332 // only used by a branch, we can't transform it. 333 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 334 if (!Compare) 335 Compare = dyn_cast<FCmpInst>(U2); 336 if (!Compare || !Compare->hasOneUse() || 337 !isa<BranchInst>(Compare->user_back())) 338 return false; 339 340 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 341 342 // We need to verify that the branch actually controls the iteration count 343 // of the loop. If not, the new IV can overflow and no one will notice. 344 // The branch block must be in the loop and one of the successors must be out 345 // of the loop. 346 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 347 if (!L->contains(TheBr->getParent()) || 348 (L->contains(TheBr->getSuccessor(0)) && 349 L->contains(TheBr->getSuccessor(1)))) 350 return false; 351 352 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 353 // transform it. 354 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 355 int64_t ExitValue; 356 if (ExitValueVal == nullptr || 357 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 358 return false; 359 360 // Find new predicate for integer comparison. 361 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 362 switch (Compare->getPredicate()) { 363 default: return false; // Unknown comparison. 364 case CmpInst::FCMP_OEQ: 365 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 366 case CmpInst::FCMP_ONE: 367 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 368 case CmpInst::FCMP_OGT: 369 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 370 case CmpInst::FCMP_OGE: 371 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 372 case CmpInst::FCMP_OLT: 373 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 374 case CmpInst::FCMP_OLE: 375 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 376 } 377 378 // We convert the floating point induction variable to a signed i32 value if 379 // we can. This is only safe if the comparison will not overflow in a way 380 // that won't be trapped by the integer equivalent operations. Check for this 381 // now. 382 // TODO: We could use i64 if it is native and the range requires it. 383 384 // The start/stride/exit values must all fit in signed i32. 385 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 386 return false; 387 388 // If not actually striding (add x, 0.0), avoid touching the code. 389 if (IncValue == 0) 390 return false; 391 392 // Positive and negative strides have different safety conditions. 393 if (IncValue > 0) { 394 // If we have a positive stride, we require the init to be less than the 395 // exit value. 396 if (InitValue >= ExitValue) 397 return false; 398 399 uint32_t Range = uint32_t(ExitValue-InitValue); 400 // Check for infinite loop, either: 401 // while (i <= Exit) or until (i > Exit) 402 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 403 if (++Range == 0) return false; // Range overflows. 404 } 405 406 unsigned Leftover = Range % uint32_t(IncValue); 407 408 // If this is an equality comparison, we require that the strided value 409 // exactly land on the exit value, otherwise the IV condition will wrap 410 // around and do things the fp IV wouldn't. 411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 412 Leftover != 0) 413 return false; 414 415 // If the stride would wrap around the i32 before exiting, we can't 416 // transform the IV. 417 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 418 return false; 419 } else { 420 // If we have a negative stride, we require the init to be greater than the 421 // exit value. 422 if (InitValue <= ExitValue) 423 return false; 424 425 uint32_t Range = uint32_t(InitValue-ExitValue); 426 // Check for infinite loop, either: 427 // while (i >= Exit) or until (i < Exit) 428 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 429 if (++Range == 0) return false; // Range overflows. 430 } 431 432 unsigned Leftover = Range % uint32_t(-IncValue); 433 434 // If this is an equality comparison, we require that the strided value 435 // exactly land on the exit value, otherwise the IV condition will wrap 436 // around and do things the fp IV wouldn't. 437 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 438 Leftover != 0) 439 return false; 440 441 // If the stride would wrap around the i32 before exiting, we can't 442 // transform the IV. 443 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 444 return false; 445 } 446 447 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 448 449 // Insert new integer induction variable. 450 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 451 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 452 PN->getIncomingBlock(IncomingEdge)); 453 454 Value *NewAdd = 455 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 456 Incr->getName()+".int", Incr); 457 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 458 459 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 460 ConstantInt::get(Int32Ty, ExitValue), 461 Compare->getName()); 462 463 // In the following deletions, PN may become dead and may be deleted. 464 // Use a WeakTrackingVH to observe whether this happens. 465 WeakTrackingVH WeakPH = PN; 466 467 // Delete the old floating point exit comparison. The branch starts using the 468 // new comparison. 469 NewCompare->takeName(Compare); 470 Compare->replaceAllUsesWith(NewCompare); 471 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 472 473 // Delete the old floating point increment. 474 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 475 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 476 477 // If the FP induction variable still has uses, this is because something else 478 // in the loop uses its value. In order to canonicalize the induction 479 // variable, we chose to eliminate the IV and rewrite it in terms of an 480 // int->fp cast. 481 // 482 // We give preference to sitofp over uitofp because it is faster on most 483 // platforms. 484 if (WeakPH) { 485 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 486 &*PN->getParent()->getFirstInsertionPt()); 487 PN->replaceAllUsesWith(Conv); 488 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 489 } 490 return true; 491 } 492 493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 494 // First step. Check to see if there are any floating-point recurrences. 495 // If there are, change them into integer recurrences, permitting analysis by 496 // the SCEV routines. 497 BasicBlock *Header = L->getHeader(); 498 499 SmallVector<WeakTrackingVH, 8> PHIs; 500 for (PHINode &PN : Header->phis()) 501 PHIs.push_back(&PN); 502 503 bool Changed = false; 504 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 505 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 506 Changed |= handleFloatingPointIV(L, PN); 507 508 // If the loop previously had floating-point IV, ScalarEvolution 509 // may not have been able to compute a trip count. Now that we've done some 510 // re-writing, the trip count may be computable. 511 if (Changed) 512 SE->forgetLoop(L); 513 return Changed; 514 } 515 516 namespace { 517 518 // Collect information about PHI nodes which can be transformed in 519 // rewriteLoopExitValues. 520 struct RewritePhi { 521 PHINode *PN; 522 523 // Ith incoming value. 524 unsigned Ith; 525 526 // Exit value after expansion. 527 Value *Val; 528 529 // High Cost when expansion. 530 bool HighCost; 531 532 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 533 : PN(P), Ith(I), Val(V), HighCost(H) {} 534 }; 535 536 } // end anonymous namespace 537 538 //===----------------------------------------------------------------------===// 539 // rewriteLoopExitValues - Optimize IV users outside the loop. 540 // As a side effect, reduces the amount of IV processing within the loop. 541 //===----------------------------------------------------------------------===// 542 543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 544 SmallPtrSet<const Instruction *, 8> Visited; 545 SmallVector<const Instruction *, 8> WorkList; 546 Visited.insert(I); 547 WorkList.push_back(I); 548 while (!WorkList.empty()) { 549 const Instruction *Curr = WorkList.pop_back_val(); 550 // This use is outside the loop, nothing to do. 551 if (!L->contains(Curr)) 552 continue; 553 // Do we assume it is a "hard" use which will not be eliminated easily? 554 if (Curr->mayHaveSideEffects()) 555 return true; 556 // Otherwise, add all its users to worklist. 557 for (auto U : Curr->users()) { 558 auto *UI = cast<Instruction>(U); 559 if (Visited.insert(UI).second) 560 WorkList.push_back(UI); 561 } 562 } 563 return false; 564 } 565 566 /// Check to see if this loop has a computable loop-invariant execution count. 567 /// If so, this means that we can compute the final value of any expressions 568 /// that are recurrent in the loop, and substitute the exit values from the loop 569 /// into any instructions outside of the loop that use the final values of the 570 /// current expressions. 571 /// 572 /// This is mostly redundant with the regular IndVarSimplify activities that 573 /// happen later, except that it's more powerful in some cases, because it's 574 /// able to brute-force evaluate arbitrary instructions as long as they have 575 /// constant operands at the beginning of the loop. 576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 577 // Check a pre-condition. 578 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 579 "Indvars did not preserve LCSSA!"); 580 581 SmallVector<BasicBlock*, 8> ExitBlocks; 582 L->getUniqueExitBlocks(ExitBlocks); 583 584 SmallVector<RewritePhi, 8> RewritePhiSet; 585 // Find all values that are computed inside the loop, but used outside of it. 586 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 587 // the exit blocks of the loop to find them. 588 for (BasicBlock *ExitBB : ExitBlocks) { 589 // If there are no PHI nodes in this exit block, then no values defined 590 // inside the loop are used on this path, skip it. 591 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 592 if (!PN) continue; 593 594 unsigned NumPreds = PN->getNumIncomingValues(); 595 596 // Iterate over all of the PHI nodes. 597 BasicBlock::iterator BBI = ExitBB->begin(); 598 while ((PN = dyn_cast<PHINode>(BBI++))) { 599 if (PN->use_empty()) 600 continue; // dead use, don't replace it 601 602 if (!SE->isSCEVable(PN->getType())) 603 continue; 604 605 // It's necessary to tell ScalarEvolution about this explicitly so that 606 // it can walk the def-use list and forget all SCEVs, as it may not be 607 // watching the PHI itself. Once the new exit value is in place, there 608 // may not be a def-use connection between the loop and every instruction 609 // which got a SCEVAddRecExpr for that loop. 610 SE->forgetValue(PN); 611 612 // Iterate over all of the values in all the PHI nodes. 613 for (unsigned i = 0; i != NumPreds; ++i) { 614 // If the value being merged in is not integer or is not defined 615 // in the loop, skip it. 616 Value *InVal = PN->getIncomingValue(i); 617 if (!isa<Instruction>(InVal)) 618 continue; 619 620 // If this pred is for a subloop, not L itself, skip it. 621 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 622 continue; // The Block is in a subloop, skip it. 623 624 // Check that InVal is defined in the loop. 625 Instruction *Inst = cast<Instruction>(InVal); 626 if (!L->contains(Inst)) 627 continue; 628 629 // Okay, this instruction has a user outside of the current loop 630 // and varies predictably *inside* the loop. Evaluate the value it 631 // contains when the loop exits, if possible. We prefer to start with 632 // expressions which are true for all exits (so as to maximize 633 // expression reuse by the SCEVExpander), but resort to per-exit 634 // evaluation if that fails. 635 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 636 if (isa<SCEVCouldNotCompute>(ExitValue) || 637 !SE->isLoopInvariant(ExitValue, L) || 638 !isSafeToExpand(ExitValue, *SE)) { 639 // TODO: This should probably be sunk into SCEV in some way; maybe a 640 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 641 // most SCEV expressions and other recurrence types (e.g. shift 642 // recurrences). Is there existing code we can reuse? 643 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 644 if (isa<SCEVCouldNotCompute>(ExitCount)) 645 continue; 646 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 647 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 648 if (isa<SCEVCouldNotCompute>(ExitValue) || 649 !SE->isLoopInvariant(ExitValue, L) || 650 !isSafeToExpand(ExitValue, *SE)) 651 continue; 652 } 653 654 // Computing the value outside of the loop brings no benefit if it is 655 // definitely used inside the loop in a way which can not be optimized 656 // away. Avoid doing so unless we know we have a value which computes 657 // the ExitValue already. TODO: This should be merged into SCEV 658 // expander to leverage its knowledge of existing expressions. 659 if (ReplaceExitValue != AlwaysRepl && 660 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 661 hasHardUserWithinLoop(L, Inst)) 662 continue; 663 664 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 665 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 666 667 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 668 << '\n' 669 << " LoopVal = " << *Inst << "\n"); 670 671 if (!isValidRewrite(Inst, ExitVal)) { 672 DeadInsts.push_back(ExitVal); 673 continue; 674 } 675 676 #ifndef NDEBUG 677 // If we reuse an instruction from a loop which is neither L nor one of 678 // its containing loops, we end up breaking LCSSA form for this loop by 679 // creating a new use of its instruction. 680 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 681 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 682 if (EVL != L) 683 assert(EVL->contains(L) && "LCSSA breach detected!"); 684 #endif 685 686 // Collect all the candidate PHINodes to be rewritten. 687 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 688 } 689 } 690 } 691 692 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 693 694 bool Changed = false; 695 // Transformation. 696 for (const RewritePhi &Phi : RewritePhiSet) { 697 PHINode *PN = Phi.PN; 698 Value *ExitVal = Phi.Val; 699 700 // Only do the rewrite when the ExitValue can be expanded cheaply. 701 // If LoopCanBeDel is true, rewrite exit value aggressively. 702 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 703 DeadInsts.push_back(ExitVal); 704 continue; 705 } 706 707 Changed = true; 708 ++NumReplaced; 709 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 710 PN->setIncomingValue(Phi.Ith, ExitVal); 711 712 // If this instruction is dead now, delete it. Don't do it now to avoid 713 // invalidating iterators. 714 if (isInstructionTriviallyDead(Inst, TLI)) 715 DeadInsts.push_back(Inst); 716 717 // Replace PN with ExitVal if that is legal and does not break LCSSA. 718 if (PN->getNumIncomingValues() == 1 && 719 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 720 PN->replaceAllUsesWith(ExitVal); 721 PN->eraseFromParent(); 722 } 723 } 724 725 // The insertion point instruction may have been deleted; clear it out 726 // so that the rewriter doesn't trip over it later. 727 Rewriter.clearInsertPoint(); 728 return Changed; 729 } 730 731 //===---------------------------------------------------------------------===// 732 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 733 // they will exit at the first iteration. 734 //===---------------------------------------------------------------------===// 735 736 /// Check to see if this loop has loop invariant conditions which lead to loop 737 /// exits. If so, we know that if the exit path is taken, it is at the first 738 /// loop iteration. This lets us predict exit values of PHI nodes that live in 739 /// loop header. 740 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 741 // Verify the input to the pass is already in LCSSA form. 742 assert(L->isLCSSAForm(*DT)); 743 744 SmallVector<BasicBlock *, 8> ExitBlocks; 745 L->getUniqueExitBlocks(ExitBlocks); 746 747 bool MadeAnyChanges = false; 748 for (auto *ExitBB : ExitBlocks) { 749 // If there are no more PHI nodes in this exit block, then no more 750 // values defined inside the loop are used on this path. 751 for (PHINode &PN : ExitBB->phis()) { 752 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 753 IncomingValIdx != E; ++IncomingValIdx) { 754 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 755 756 // Can we prove that the exit must run on the first iteration if it 757 // runs at all? (i.e. early exits are fine for our purposes, but 758 // traces which lead to this exit being taken on the 2nd iteration 759 // aren't.) Note that this is about whether the exit branch is 760 // executed, not about whether it is taken. 761 if (!L->getLoopLatch() || 762 !DT->dominates(IncomingBB, L->getLoopLatch())) 763 continue; 764 765 // Get condition that leads to the exit path. 766 auto *TermInst = IncomingBB->getTerminator(); 767 768 Value *Cond = nullptr; 769 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 770 // Must be a conditional branch, otherwise the block 771 // should not be in the loop. 772 Cond = BI->getCondition(); 773 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 774 Cond = SI->getCondition(); 775 else 776 continue; 777 778 if (!L->isLoopInvariant(Cond)) 779 continue; 780 781 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 782 783 // Only deal with PHIs in the loop header. 784 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 785 continue; 786 787 // If ExitVal is a PHI on the loop header, then we know its 788 // value along this exit because the exit can only be taken 789 // on the first iteration. 790 auto *LoopPreheader = L->getLoopPreheader(); 791 assert(LoopPreheader && "Invalid loop"); 792 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 793 if (PreheaderIdx != -1) { 794 assert(ExitVal->getParent() == L->getHeader() && 795 "ExitVal must be in loop header"); 796 MadeAnyChanges = true; 797 PN.setIncomingValue(IncomingValIdx, 798 ExitVal->getIncomingValue(PreheaderIdx)); 799 } 800 } 801 } 802 } 803 return MadeAnyChanges; 804 } 805 806 /// Check whether it is possible to delete the loop after rewriting exit 807 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 808 /// aggressively. 809 bool IndVarSimplify::canLoopBeDeleted( 810 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 811 BasicBlock *Preheader = L->getLoopPreheader(); 812 // If there is no preheader, the loop will not be deleted. 813 if (!Preheader) 814 return false; 815 816 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 817 // We obviate multiple ExitingBlocks case for simplicity. 818 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 819 // after exit value rewriting, we can enhance the logic here. 820 SmallVector<BasicBlock *, 4> ExitingBlocks; 821 L->getExitingBlocks(ExitingBlocks); 822 SmallVector<BasicBlock *, 8> ExitBlocks; 823 L->getUniqueExitBlocks(ExitBlocks); 824 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 825 return false; 826 827 BasicBlock *ExitBlock = ExitBlocks[0]; 828 BasicBlock::iterator BI = ExitBlock->begin(); 829 while (PHINode *P = dyn_cast<PHINode>(BI)) { 830 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 831 832 // If the Incoming value of P is found in RewritePhiSet, we know it 833 // could be rewritten to use a loop invariant value in transformation 834 // phase later. Skip it in the loop invariant check below. 835 bool found = false; 836 for (const RewritePhi &Phi : RewritePhiSet) { 837 unsigned i = Phi.Ith; 838 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 839 found = true; 840 break; 841 } 842 } 843 844 Instruction *I; 845 if (!found && (I = dyn_cast<Instruction>(Incoming))) 846 if (!L->hasLoopInvariantOperands(I)) 847 return false; 848 849 ++BI; 850 } 851 852 for (auto *BB : L->blocks()) 853 if (llvm::any_of(*BB, [](Instruction &I) { 854 return I.mayHaveSideEffects(); 855 })) 856 return false; 857 858 return true; 859 } 860 861 //===----------------------------------------------------------------------===// 862 // IV Widening - Extend the width of an IV to cover its widest uses. 863 //===----------------------------------------------------------------------===// 864 865 namespace { 866 867 // Collect information about induction variables that are used by sign/zero 868 // extend operations. This information is recorded by CollectExtend and provides 869 // the input to WidenIV. 870 struct WideIVInfo { 871 PHINode *NarrowIV = nullptr; 872 873 // Widest integer type created [sz]ext 874 Type *WidestNativeType = nullptr; 875 876 // Was a sext user seen before a zext? 877 bool IsSigned = false; 878 }; 879 880 } // end anonymous namespace 881 882 /// Update information about the induction variable that is extended by this 883 /// sign or zero extend operation. This is used to determine the final width of 884 /// the IV before actually widening it. 885 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 886 const TargetTransformInfo *TTI) { 887 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 888 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 889 return; 890 891 Type *Ty = Cast->getType(); 892 uint64_t Width = SE->getTypeSizeInBits(Ty); 893 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 894 return; 895 896 // Check that `Cast` actually extends the induction variable (we rely on this 897 // later). This takes care of cases where `Cast` is extending a truncation of 898 // the narrow induction variable, and thus can end up being narrower than the 899 // "narrow" induction variable. 900 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 901 if (NarrowIVWidth >= Width) 902 return; 903 904 // Cast is either an sext or zext up to this point. 905 // We should not widen an indvar if arithmetics on the wider indvar are more 906 // expensive than those on the narrower indvar. We check only the cost of ADD 907 // because at least an ADD is required to increment the induction variable. We 908 // could compute more comprehensively the cost of all instructions on the 909 // induction variable when necessary. 910 if (TTI && 911 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 912 TTI->getArithmeticInstrCost(Instruction::Add, 913 Cast->getOperand(0)->getType())) { 914 return; 915 } 916 917 if (!WI.WidestNativeType) { 918 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 919 WI.IsSigned = IsSigned; 920 return; 921 } 922 923 // We extend the IV to satisfy the sign of its first user, arbitrarily. 924 if (WI.IsSigned != IsSigned) 925 return; 926 927 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 928 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 929 } 930 931 namespace { 932 933 /// Record a link in the Narrow IV def-use chain along with the WideIV that 934 /// computes the same value as the Narrow IV def. This avoids caching Use* 935 /// pointers. 936 struct NarrowIVDefUse { 937 Instruction *NarrowDef = nullptr; 938 Instruction *NarrowUse = nullptr; 939 Instruction *WideDef = nullptr; 940 941 // True if the narrow def is never negative. Tracking this information lets 942 // us use a sign extension instead of a zero extension or vice versa, when 943 // profitable and legal. 944 bool NeverNegative = false; 945 946 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 947 bool NeverNegative) 948 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 949 NeverNegative(NeverNegative) {} 950 }; 951 952 /// The goal of this transform is to remove sign and zero extends without 953 /// creating any new induction variables. To do this, it creates a new phi of 954 /// the wider type and redirects all users, either removing extends or inserting 955 /// truncs whenever we stop propagating the type. 956 class WidenIV { 957 // Parameters 958 PHINode *OrigPhi; 959 Type *WideType; 960 961 // Context 962 LoopInfo *LI; 963 Loop *L; 964 ScalarEvolution *SE; 965 DominatorTree *DT; 966 967 // Does the module have any calls to the llvm.experimental.guard intrinsic 968 // at all? If not we can avoid scanning instructions looking for guards. 969 bool HasGuards; 970 971 // Result 972 PHINode *WidePhi = nullptr; 973 Instruction *WideInc = nullptr; 974 const SCEV *WideIncExpr = nullptr; 975 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 976 977 SmallPtrSet<Instruction *,16> Widened; 978 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 979 980 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 981 982 // A map tracking the kind of extension used to widen each narrow IV 983 // and narrow IV user. 984 // Key: pointer to a narrow IV or IV user. 985 // Value: the kind of extension used to widen this Instruction. 986 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 987 988 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 989 990 // A map with control-dependent ranges for post increment IV uses. The key is 991 // a pair of IV def and a use of this def denoting the context. The value is 992 // a ConstantRange representing possible values of the def at the given 993 // context. 994 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 995 996 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 997 Instruction *UseI) { 998 DefUserPair Key(Def, UseI); 999 auto It = PostIncRangeInfos.find(Key); 1000 return It == PostIncRangeInfos.end() 1001 ? Optional<ConstantRange>(None) 1002 : Optional<ConstantRange>(It->second); 1003 } 1004 1005 void calculatePostIncRanges(PHINode *OrigPhi); 1006 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1007 1008 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1009 DefUserPair Key(Def, UseI); 1010 auto It = PostIncRangeInfos.find(Key); 1011 if (It == PostIncRangeInfos.end()) 1012 PostIncRangeInfos.insert({Key, R}); 1013 else 1014 It->second = R.intersectWith(It->second); 1015 } 1016 1017 public: 1018 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1019 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1020 bool HasGuards) 1021 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1022 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1023 HasGuards(HasGuards), DeadInsts(DI) { 1024 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1025 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1026 } 1027 1028 PHINode *createWideIV(SCEVExpander &Rewriter); 1029 1030 protected: 1031 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1032 Instruction *Use); 1033 1034 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1035 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1036 const SCEVAddRecExpr *WideAR); 1037 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1038 1039 ExtendKind getExtendKind(Instruction *I); 1040 1041 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1042 1043 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1044 1045 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1046 1047 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1048 unsigned OpCode) const; 1049 1050 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1051 1052 bool widenLoopCompare(NarrowIVDefUse DU); 1053 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1054 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1055 1056 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1057 }; 1058 1059 } // end anonymous namespace 1060 1061 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1062 bool IsSigned, Instruction *Use) { 1063 // Set the debug location and conservative insertion point. 1064 IRBuilder<> Builder(Use); 1065 // Hoist the insertion point into loop preheaders as far as possible. 1066 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1067 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1068 L = L->getParentLoop()) 1069 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1070 1071 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1072 Builder.CreateZExt(NarrowOper, WideType); 1073 } 1074 1075 /// Instantiate a wide operation to replace a narrow operation. This only needs 1076 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1077 /// 0 for any operation we decide not to clone. 1078 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1079 const SCEVAddRecExpr *WideAR) { 1080 unsigned Opcode = DU.NarrowUse->getOpcode(); 1081 switch (Opcode) { 1082 default: 1083 return nullptr; 1084 case Instruction::Add: 1085 case Instruction::Mul: 1086 case Instruction::UDiv: 1087 case Instruction::Sub: 1088 return cloneArithmeticIVUser(DU, WideAR); 1089 1090 case Instruction::And: 1091 case Instruction::Or: 1092 case Instruction::Xor: 1093 case Instruction::Shl: 1094 case Instruction::LShr: 1095 case Instruction::AShr: 1096 return cloneBitwiseIVUser(DU); 1097 } 1098 } 1099 1100 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1101 Instruction *NarrowUse = DU.NarrowUse; 1102 Instruction *NarrowDef = DU.NarrowDef; 1103 Instruction *WideDef = DU.WideDef; 1104 1105 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1106 1107 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1108 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1109 // invariant and will be folded or hoisted. If it actually comes from a 1110 // widened IV, it should be removed during a future call to widenIVUse. 1111 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1112 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1113 ? WideDef 1114 : createExtendInst(NarrowUse->getOperand(0), WideType, 1115 IsSigned, NarrowUse); 1116 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1117 ? WideDef 1118 : createExtendInst(NarrowUse->getOperand(1), WideType, 1119 IsSigned, NarrowUse); 1120 1121 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1122 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1123 NarrowBO->getName()); 1124 IRBuilder<> Builder(NarrowUse); 1125 Builder.Insert(WideBO); 1126 WideBO->copyIRFlags(NarrowBO); 1127 return WideBO; 1128 } 1129 1130 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1131 const SCEVAddRecExpr *WideAR) { 1132 Instruction *NarrowUse = DU.NarrowUse; 1133 Instruction *NarrowDef = DU.NarrowDef; 1134 Instruction *WideDef = DU.WideDef; 1135 1136 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1137 1138 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1139 1140 // We're trying to find X such that 1141 // 1142 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1143 // 1144 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1145 // and check using SCEV if any of them are correct. 1146 1147 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1148 // correct solution to X. 1149 auto GuessNonIVOperand = [&](bool SignExt) { 1150 const SCEV *WideLHS; 1151 const SCEV *WideRHS; 1152 1153 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1154 if (SignExt) 1155 return SE->getSignExtendExpr(S, Ty); 1156 return SE->getZeroExtendExpr(S, Ty); 1157 }; 1158 1159 if (IVOpIdx == 0) { 1160 WideLHS = SE->getSCEV(WideDef); 1161 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1162 WideRHS = GetExtend(NarrowRHS, WideType); 1163 } else { 1164 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1165 WideLHS = GetExtend(NarrowLHS, WideType); 1166 WideRHS = SE->getSCEV(WideDef); 1167 } 1168 1169 // WideUse is "WideDef `op.wide` X" as described in the comment. 1170 const SCEV *WideUse = nullptr; 1171 1172 switch (NarrowUse->getOpcode()) { 1173 default: 1174 llvm_unreachable("No other possibility!"); 1175 1176 case Instruction::Add: 1177 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1178 break; 1179 1180 case Instruction::Mul: 1181 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1182 break; 1183 1184 case Instruction::UDiv: 1185 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1186 break; 1187 1188 case Instruction::Sub: 1189 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1190 break; 1191 } 1192 1193 return WideUse == WideAR; 1194 }; 1195 1196 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1197 if (!GuessNonIVOperand(SignExtend)) { 1198 SignExtend = !SignExtend; 1199 if (!GuessNonIVOperand(SignExtend)) 1200 return nullptr; 1201 } 1202 1203 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1204 ? WideDef 1205 : createExtendInst(NarrowUse->getOperand(0), WideType, 1206 SignExtend, NarrowUse); 1207 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1208 ? WideDef 1209 : createExtendInst(NarrowUse->getOperand(1), WideType, 1210 SignExtend, NarrowUse); 1211 1212 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1213 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1214 NarrowBO->getName()); 1215 1216 IRBuilder<> Builder(NarrowUse); 1217 Builder.Insert(WideBO); 1218 WideBO->copyIRFlags(NarrowBO); 1219 return WideBO; 1220 } 1221 1222 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1223 auto It = ExtendKindMap.find(I); 1224 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1225 return It->second; 1226 } 1227 1228 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1229 unsigned OpCode) const { 1230 if (OpCode == Instruction::Add) 1231 return SE->getAddExpr(LHS, RHS); 1232 if (OpCode == Instruction::Sub) 1233 return SE->getMinusSCEV(LHS, RHS); 1234 if (OpCode == Instruction::Mul) 1235 return SE->getMulExpr(LHS, RHS); 1236 1237 llvm_unreachable("Unsupported opcode."); 1238 } 1239 1240 /// No-wrap operations can transfer sign extension of their result to their 1241 /// operands. Generate the SCEV value for the widened operation without 1242 /// actually modifying the IR yet. If the expression after extending the 1243 /// operands is an AddRec for this loop, return the AddRec and the kind of 1244 /// extension used. 1245 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1246 // Handle the common case of add<nsw/nuw> 1247 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1248 // Only Add/Sub/Mul instructions supported yet. 1249 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1250 OpCode != Instruction::Mul) 1251 return {nullptr, Unknown}; 1252 1253 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1254 // if extending the other will lead to a recurrence. 1255 const unsigned ExtendOperIdx = 1256 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1257 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1258 1259 const SCEV *ExtendOperExpr = nullptr; 1260 const OverflowingBinaryOperator *OBO = 1261 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1262 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1263 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1264 ExtendOperExpr = SE->getSignExtendExpr( 1265 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1266 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1267 ExtendOperExpr = SE->getZeroExtendExpr( 1268 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1269 else 1270 return {nullptr, Unknown}; 1271 1272 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1273 // flags. This instruction may be guarded by control flow that the no-wrap 1274 // behavior depends on. Non-control-equivalent instructions can be mapped to 1275 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1276 // semantics to those operations. 1277 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1278 const SCEV *rhs = ExtendOperExpr; 1279 1280 // Let's swap operands to the initial order for the case of non-commutative 1281 // operations, like SUB. See PR21014. 1282 if (ExtendOperIdx == 0) 1283 std::swap(lhs, rhs); 1284 const SCEVAddRecExpr *AddRec = 1285 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1286 1287 if (!AddRec || AddRec->getLoop() != L) 1288 return {nullptr, Unknown}; 1289 1290 return {AddRec, ExtKind}; 1291 } 1292 1293 /// Is this instruction potentially interesting for further simplification after 1294 /// widening it's type? In other words, can the extend be safely hoisted out of 1295 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1296 /// so, return the extended recurrence and the kind of extension used. Otherwise 1297 /// return {nullptr, Unknown}. 1298 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1299 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1300 return {nullptr, Unknown}; 1301 1302 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1303 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1304 SE->getTypeSizeInBits(WideType)) { 1305 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1306 // index. So don't follow this use. 1307 return {nullptr, Unknown}; 1308 } 1309 1310 const SCEV *WideExpr; 1311 ExtendKind ExtKind; 1312 if (DU.NeverNegative) { 1313 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1314 if (isa<SCEVAddRecExpr>(WideExpr)) 1315 ExtKind = SignExtended; 1316 else { 1317 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1318 ExtKind = ZeroExtended; 1319 } 1320 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1321 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1322 ExtKind = SignExtended; 1323 } else { 1324 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1325 ExtKind = ZeroExtended; 1326 } 1327 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1328 if (!AddRec || AddRec->getLoop() != L) 1329 return {nullptr, Unknown}; 1330 return {AddRec, ExtKind}; 1331 } 1332 1333 /// This IV user cannot be widened. Replace this use of the original narrow IV 1334 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1335 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1336 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1337 if (!InsertPt) 1338 return; 1339 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1340 << *DU.NarrowUse << "\n"); 1341 IRBuilder<> Builder(InsertPt); 1342 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1343 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1344 } 1345 1346 /// If the narrow use is a compare instruction, then widen the compare 1347 // (and possibly the other operand). The extend operation is hoisted into the 1348 // loop preheader as far as possible. 1349 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1350 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1351 if (!Cmp) 1352 return false; 1353 1354 // We can legally widen the comparison in the following two cases: 1355 // 1356 // - The signedness of the IV extension and comparison match 1357 // 1358 // - The narrow IV is always positive (and thus its sign extension is equal 1359 // to its zero extension). For instance, let's say we're zero extending 1360 // %narrow for the following use 1361 // 1362 // icmp slt i32 %narrow, %val ... (A) 1363 // 1364 // and %narrow is always positive. Then 1365 // 1366 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1367 // == icmp slt i32 zext(%narrow), sext(%val) 1368 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1369 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1370 return false; 1371 1372 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1373 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1374 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1375 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1376 1377 // Widen the compare instruction. 1378 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1379 if (!InsertPt) 1380 return false; 1381 IRBuilder<> Builder(InsertPt); 1382 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1383 1384 // Widen the other operand of the compare, if necessary. 1385 if (CastWidth < IVWidth) { 1386 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1387 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1388 } 1389 return true; 1390 } 1391 1392 /// If the narrow use is an instruction whose two operands are the defining 1393 /// instruction of DU and a load instruction, then we have the following: 1394 /// if the load is hoisted outside the loop, then we do not reach this function 1395 /// as scalar evolution analysis works fine in widenIVUse with variables 1396 /// hoisted outside the loop and efficient code is subsequently generated by 1397 /// not emitting truncate instructions. But when the load is not hoisted 1398 /// (whether due to limitation in alias analysis or due to a true legality), 1399 /// then scalar evolution can not proceed with loop variant values and 1400 /// inefficient code is generated. This function handles the non-hoisted load 1401 /// special case by making the optimization generate the same type of code for 1402 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1403 /// instruction). This special case is important especially when the induction 1404 /// variables are affecting addressing mode in code generation. 1405 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1406 Instruction *NarrowUse = DU.NarrowUse; 1407 Instruction *NarrowDef = DU.NarrowDef; 1408 Instruction *WideDef = DU.WideDef; 1409 1410 // Handle the common case of add<nsw/nuw> 1411 const unsigned OpCode = NarrowUse->getOpcode(); 1412 // Only Add/Sub/Mul instructions are supported. 1413 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1414 OpCode != Instruction::Mul) 1415 return false; 1416 1417 // The operand that is not defined by NarrowDef of DU. Let's call it the 1418 // other operand. 1419 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1420 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1421 "bad DU"); 1422 1423 const SCEV *ExtendOperExpr = nullptr; 1424 const OverflowingBinaryOperator *OBO = 1425 cast<OverflowingBinaryOperator>(NarrowUse); 1426 ExtendKind ExtKind = getExtendKind(NarrowDef); 1427 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1428 ExtendOperExpr = SE->getSignExtendExpr( 1429 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1430 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1431 ExtendOperExpr = SE->getZeroExtendExpr( 1432 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1433 else 1434 return false; 1435 1436 // We are interested in the other operand being a load instruction. 1437 // But, we should look into relaxing this restriction later on. 1438 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1439 if (I && I->getOpcode() != Instruction::Load) 1440 return false; 1441 1442 // Verifying that Defining operand is an AddRec 1443 const SCEV *Op1 = SE->getSCEV(WideDef); 1444 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1445 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1446 return false; 1447 // Verifying that other operand is an Extend. 1448 if (ExtKind == SignExtended) { 1449 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1450 return false; 1451 } else { 1452 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1453 return false; 1454 } 1455 1456 if (ExtKind == SignExtended) { 1457 for (Use &U : NarrowUse->uses()) { 1458 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1459 if (!User || User->getType() != WideType) 1460 return false; 1461 } 1462 } else { // ExtKind == ZeroExtended 1463 for (Use &U : NarrowUse->uses()) { 1464 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1465 if (!User || User->getType() != WideType) 1466 return false; 1467 } 1468 } 1469 1470 return true; 1471 } 1472 1473 /// Special Case for widening with variant Loads (see 1474 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1475 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1476 Instruction *NarrowUse = DU.NarrowUse; 1477 Instruction *NarrowDef = DU.NarrowDef; 1478 Instruction *WideDef = DU.WideDef; 1479 1480 ExtendKind ExtKind = getExtendKind(NarrowDef); 1481 1482 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1483 1484 // Generating a widening use instruction. 1485 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1486 ? WideDef 1487 : createExtendInst(NarrowUse->getOperand(0), WideType, 1488 ExtKind, NarrowUse); 1489 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1490 ? WideDef 1491 : createExtendInst(NarrowUse->getOperand(1), WideType, 1492 ExtKind, NarrowUse); 1493 1494 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1495 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1496 NarrowBO->getName()); 1497 IRBuilder<> Builder(NarrowUse); 1498 Builder.Insert(WideBO); 1499 WideBO->copyIRFlags(NarrowBO); 1500 1501 if (ExtKind == SignExtended) 1502 ExtendKindMap[NarrowUse] = SignExtended; 1503 else 1504 ExtendKindMap[NarrowUse] = ZeroExtended; 1505 1506 // Update the Use. 1507 if (ExtKind == SignExtended) { 1508 for (Use &U : NarrowUse->uses()) { 1509 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1510 if (User && User->getType() == WideType) { 1511 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1512 << *WideBO << "\n"); 1513 ++NumElimExt; 1514 User->replaceAllUsesWith(WideBO); 1515 DeadInsts.emplace_back(User); 1516 } 1517 } 1518 } else { // ExtKind == ZeroExtended 1519 for (Use &U : NarrowUse->uses()) { 1520 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1521 if (User && User->getType() == WideType) { 1522 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1523 << *WideBO << "\n"); 1524 ++NumElimExt; 1525 User->replaceAllUsesWith(WideBO); 1526 DeadInsts.emplace_back(User); 1527 } 1528 } 1529 } 1530 } 1531 1532 /// Determine whether an individual user of the narrow IV can be widened. If so, 1533 /// return the wide clone of the user. 1534 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1535 assert(ExtendKindMap.count(DU.NarrowDef) && 1536 "Should already know the kind of extension used to widen NarrowDef"); 1537 1538 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1539 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1540 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1541 // For LCSSA phis, sink the truncate outside the loop. 1542 // After SimplifyCFG most loop exit targets have a single predecessor. 1543 // Otherwise fall back to a truncate within the loop. 1544 if (UsePhi->getNumOperands() != 1) 1545 truncateIVUse(DU, DT, LI); 1546 else { 1547 // Widening the PHI requires us to insert a trunc. The logical place 1548 // for this trunc is in the same BB as the PHI. This is not possible if 1549 // the BB is terminated by a catchswitch. 1550 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1551 return nullptr; 1552 1553 PHINode *WidePhi = 1554 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1555 UsePhi); 1556 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1557 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1558 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1559 UsePhi->replaceAllUsesWith(Trunc); 1560 DeadInsts.emplace_back(UsePhi); 1561 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1562 << *WidePhi << "\n"); 1563 } 1564 return nullptr; 1565 } 1566 } 1567 1568 // This narrow use can be widened by a sext if it's non-negative or its narrow 1569 // def was widended by a sext. Same for zext. 1570 auto canWidenBySExt = [&]() { 1571 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1572 }; 1573 auto canWidenByZExt = [&]() { 1574 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1575 }; 1576 1577 // Our raison d'etre! Eliminate sign and zero extension. 1578 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1579 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1580 Value *NewDef = DU.WideDef; 1581 if (DU.NarrowUse->getType() != WideType) { 1582 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1583 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1584 if (CastWidth < IVWidth) { 1585 // The cast isn't as wide as the IV, so insert a Trunc. 1586 IRBuilder<> Builder(DU.NarrowUse); 1587 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1588 } 1589 else { 1590 // A wider extend was hidden behind a narrower one. This may induce 1591 // another round of IV widening in which the intermediate IV becomes 1592 // dead. It should be very rare. 1593 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1594 << " not wide enough to subsume " << *DU.NarrowUse 1595 << "\n"); 1596 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1597 NewDef = DU.NarrowUse; 1598 } 1599 } 1600 if (NewDef != DU.NarrowUse) { 1601 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1602 << " replaced by " << *DU.WideDef << "\n"); 1603 ++NumElimExt; 1604 DU.NarrowUse->replaceAllUsesWith(NewDef); 1605 DeadInsts.emplace_back(DU.NarrowUse); 1606 } 1607 // Now that the extend is gone, we want to expose it's uses for potential 1608 // further simplification. We don't need to directly inform SimplifyIVUsers 1609 // of the new users, because their parent IV will be processed later as a 1610 // new loop phi. If we preserved IVUsers analysis, we would also want to 1611 // push the uses of WideDef here. 1612 1613 // No further widening is needed. The deceased [sz]ext had done it for us. 1614 return nullptr; 1615 } 1616 1617 // Does this user itself evaluate to a recurrence after widening? 1618 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1619 if (!WideAddRec.first) 1620 WideAddRec = getWideRecurrence(DU); 1621 1622 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1623 if (!WideAddRec.first) { 1624 // If use is a loop condition, try to promote the condition instead of 1625 // truncating the IV first. 1626 if (widenLoopCompare(DU)) 1627 return nullptr; 1628 1629 // We are here about to generate a truncate instruction that may hurt 1630 // performance because the scalar evolution expression computed earlier 1631 // in WideAddRec.first does not indicate a polynomial induction expression. 1632 // In that case, look at the operands of the use instruction to determine 1633 // if we can still widen the use instead of truncating its operand. 1634 if (widenWithVariantLoadUse(DU)) { 1635 widenWithVariantLoadUseCodegen(DU); 1636 return nullptr; 1637 } 1638 1639 // This user does not evaluate to a recurrence after widening, so don't 1640 // follow it. Instead insert a Trunc to kill off the original use, 1641 // eventually isolating the original narrow IV so it can be removed. 1642 truncateIVUse(DU, DT, LI); 1643 return nullptr; 1644 } 1645 // Assume block terminators cannot evaluate to a recurrence. We can't to 1646 // insert a Trunc after a terminator if there happens to be a critical edge. 1647 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1648 "SCEV is not expected to evaluate a block terminator"); 1649 1650 // Reuse the IV increment that SCEVExpander created as long as it dominates 1651 // NarrowUse. 1652 Instruction *WideUse = nullptr; 1653 if (WideAddRec.first == WideIncExpr && 1654 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1655 WideUse = WideInc; 1656 else { 1657 WideUse = cloneIVUser(DU, WideAddRec.first); 1658 if (!WideUse) 1659 return nullptr; 1660 } 1661 // Evaluation of WideAddRec ensured that the narrow expression could be 1662 // extended outside the loop without overflow. This suggests that the wide use 1663 // evaluates to the same expression as the extended narrow use, but doesn't 1664 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1665 // where it fails, we simply throw away the newly created wide use. 1666 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1667 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1668 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1669 << "\n"); 1670 DeadInsts.emplace_back(WideUse); 1671 return nullptr; 1672 } 1673 1674 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1675 // Returning WideUse pushes it on the worklist. 1676 return WideUse; 1677 } 1678 1679 /// Add eligible users of NarrowDef to NarrowIVUsers. 1680 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1681 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1682 bool NonNegativeDef = 1683 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1684 SE->getConstant(NarrowSCEV->getType(), 0)); 1685 for (User *U : NarrowDef->users()) { 1686 Instruction *NarrowUser = cast<Instruction>(U); 1687 1688 // Handle data flow merges and bizarre phi cycles. 1689 if (!Widened.insert(NarrowUser).second) 1690 continue; 1691 1692 bool NonNegativeUse = false; 1693 if (!NonNegativeDef) { 1694 // We might have a control-dependent range information for this context. 1695 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1696 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1697 } 1698 1699 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1700 NonNegativeDef || NonNegativeUse); 1701 } 1702 } 1703 1704 /// Process a single induction variable. First use the SCEVExpander to create a 1705 /// wide induction variable that evaluates to the same recurrence as the 1706 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1707 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1708 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1709 /// 1710 /// It would be simpler to delete uses as they are processed, but we must avoid 1711 /// invalidating SCEV expressions. 1712 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1713 // Is this phi an induction variable? 1714 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1715 if (!AddRec) 1716 return nullptr; 1717 1718 // Widen the induction variable expression. 1719 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1720 ? SE->getSignExtendExpr(AddRec, WideType) 1721 : SE->getZeroExtendExpr(AddRec, WideType); 1722 1723 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1724 "Expect the new IV expression to preserve its type"); 1725 1726 // Can the IV be extended outside the loop without overflow? 1727 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1728 if (!AddRec || AddRec->getLoop() != L) 1729 return nullptr; 1730 1731 // An AddRec must have loop-invariant operands. Since this AddRec is 1732 // materialized by a loop header phi, the expression cannot have any post-loop 1733 // operands, so they must dominate the loop header. 1734 assert( 1735 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1736 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1737 "Loop header phi recurrence inputs do not dominate the loop"); 1738 1739 // Iterate over IV uses (including transitive ones) looking for IV increments 1740 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1741 // the increment calculate control-dependent range information basing on 1742 // dominating conditions inside of the loop (e.g. a range check inside of the 1743 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1744 // 1745 // Control-dependent range information is later used to prove that a narrow 1746 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1747 // this on demand because when pushNarrowIVUsers needs this information some 1748 // of the dominating conditions might be already widened. 1749 if (UsePostIncrementRanges) 1750 calculatePostIncRanges(OrigPhi); 1751 1752 // The rewriter provides a value for the desired IV expression. This may 1753 // either find an existing phi or materialize a new one. Either way, we 1754 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1755 // of the phi-SCC dominates the loop entry. 1756 Instruction *InsertPt = &L->getHeader()->front(); 1757 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1758 1759 // Remembering the WideIV increment generated by SCEVExpander allows 1760 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1761 // employ a general reuse mechanism because the call above is the only call to 1762 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1763 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1764 WideInc = 1765 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1766 WideIncExpr = SE->getSCEV(WideInc); 1767 // Propagate the debug location associated with the original loop increment 1768 // to the new (widened) increment. 1769 auto *OrigInc = 1770 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1771 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1772 } 1773 1774 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1775 ++NumWidened; 1776 1777 // Traverse the def-use chain using a worklist starting at the original IV. 1778 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1779 1780 Widened.insert(OrigPhi); 1781 pushNarrowIVUsers(OrigPhi, WidePhi); 1782 1783 while (!NarrowIVUsers.empty()) { 1784 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1785 1786 // Process a def-use edge. This may replace the use, so don't hold a 1787 // use_iterator across it. 1788 Instruction *WideUse = widenIVUse(DU, Rewriter); 1789 1790 // Follow all def-use edges from the previous narrow use. 1791 if (WideUse) 1792 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1793 1794 // widenIVUse may have removed the def-use edge. 1795 if (DU.NarrowDef->use_empty()) 1796 DeadInsts.emplace_back(DU.NarrowDef); 1797 } 1798 1799 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1800 // evaluate the same recurrence, we can just copy the debug info over. 1801 SmallVector<DbgValueInst *, 1> DbgValues; 1802 llvm::findDbgValues(DbgValues, OrigPhi); 1803 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1804 ValueAsMetadata::get(WidePhi)); 1805 for (auto &DbgValue : DbgValues) 1806 DbgValue->setOperand(0, MDPhi); 1807 return WidePhi; 1808 } 1809 1810 /// Calculates control-dependent range for the given def at the given context 1811 /// by looking at dominating conditions inside of the loop 1812 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1813 Instruction *NarrowUser) { 1814 using namespace llvm::PatternMatch; 1815 1816 Value *NarrowDefLHS; 1817 const APInt *NarrowDefRHS; 1818 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1819 m_APInt(NarrowDefRHS))) || 1820 !NarrowDefRHS->isNonNegative()) 1821 return; 1822 1823 auto UpdateRangeFromCondition = [&] (Value *Condition, 1824 bool TrueDest) { 1825 CmpInst::Predicate Pred; 1826 Value *CmpRHS; 1827 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1828 m_Value(CmpRHS)))) 1829 return; 1830 1831 CmpInst::Predicate P = 1832 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1833 1834 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1835 auto CmpConstrainedLHSRange = 1836 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1837 auto NarrowDefRange = 1838 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1839 1840 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1841 }; 1842 1843 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1844 if (!HasGuards) 1845 return; 1846 1847 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1848 Ctx->getParent()->rend())) { 1849 Value *C = nullptr; 1850 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1851 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1852 } 1853 }; 1854 1855 UpdateRangeFromGuards(NarrowUser); 1856 1857 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1858 // If NarrowUserBB is statically unreachable asking dominator queries may 1859 // yield surprising results. (e.g. the block may not have a dom tree node) 1860 if (!DT->isReachableFromEntry(NarrowUserBB)) 1861 return; 1862 1863 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1864 L->contains(DTB->getBlock()); 1865 DTB = DTB->getIDom()) { 1866 auto *BB = DTB->getBlock(); 1867 auto *TI = BB->getTerminator(); 1868 UpdateRangeFromGuards(TI); 1869 1870 auto *BI = dyn_cast<BranchInst>(TI); 1871 if (!BI || !BI->isConditional()) 1872 continue; 1873 1874 auto *TrueSuccessor = BI->getSuccessor(0); 1875 auto *FalseSuccessor = BI->getSuccessor(1); 1876 1877 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1878 return BBE.isSingleEdge() && 1879 DT->dominates(BBE, NarrowUser->getParent()); 1880 }; 1881 1882 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1883 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1884 1885 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1886 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1887 } 1888 } 1889 1890 /// Calculates PostIncRangeInfos map for the given IV 1891 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1892 SmallPtrSet<Instruction *, 16> Visited; 1893 SmallVector<Instruction *, 6> Worklist; 1894 Worklist.push_back(OrigPhi); 1895 Visited.insert(OrigPhi); 1896 1897 while (!Worklist.empty()) { 1898 Instruction *NarrowDef = Worklist.pop_back_val(); 1899 1900 for (Use &U : NarrowDef->uses()) { 1901 auto *NarrowUser = cast<Instruction>(U.getUser()); 1902 1903 // Don't go looking outside the current loop. 1904 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1905 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1906 continue; 1907 1908 if (!Visited.insert(NarrowUser).second) 1909 continue; 1910 1911 Worklist.push_back(NarrowUser); 1912 1913 calculatePostIncRange(NarrowDef, NarrowUser); 1914 } 1915 } 1916 } 1917 1918 //===----------------------------------------------------------------------===// 1919 // Live IV Reduction - Minimize IVs live across the loop. 1920 //===----------------------------------------------------------------------===// 1921 1922 //===----------------------------------------------------------------------===// 1923 // Simplification of IV users based on SCEV evaluation. 1924 //===----------------------------------------------------------------------===// 1925 1926 namespace { 1927 1928 class IndVarSimplifyVisitor : public IVVisitor { 1929 ScalarEvolution *SE; 1930 const TargetTransformInfo *TTI; 1931 PHINode *IVPhi; 1932 1933 public: 1934 WideIVInfo WI; 1935 1936 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1937 const TargetTransformInfo *TTI, 1938 const DominatorTree *DTree) 1939 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1940 DT = DTree; 1941 WI.NarrowIV = IVPhi; 1942 } 1943 1944 // Implement the interface used by simplifyUsersOfIV. 1945 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1946 }; 1947 1948 } // end anonymous namespace 1949 1950 /// Iteratively perform simplification on a worklist of IV users. Each 1951 /// successive simplification may push more users which may themselves be 1952 /// candidates for simplification. 1953 /// 1954 /// Sign/Zero extend elimination is interleaved with IV simplification. 1955 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1956 SCEVExpander &Rewriter, 1957 LoopInfo *LI) { 1958 SmallVector<WideIVInfo, 8> WideIVs; 1959 1960 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1961 Intrinsic::getName(Intrinsic::experimental_guard)); 1962 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1963 1964 SmallVector<PHINode*, 8> LoopPhis; 1965 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1966 LoopPhis.push_back(cast<PHINode>(I)); 1967 } 1968 // Each round of simplification iterates through the SimplifyIVUsers worklist 1969 // for all current phis, then determines whether any IVs can be 1970 // widened. Widening adds new phis to LoopPhis, inducing another round of 1971 // simplification on the wide IVs. 1972 bool Changed = false; 1973 while (!LoopPhis.empty()) { 1974 // Evaluate as many IV expressions as possible before widening any IVs. This 1975 // forces SCEV to set no-wrap flags before evaluating sign/zero 1976 // extension. The first time SCEV attempts to normalize sign/zero extension, 1977 // the result becomes final. So for the most predictable results, we delay 1978 // evaluation of sign/zero extend evaluation until needed, and avoid running 1979 // other SCEV based analysis prior to simplifyAndExtend. 1980 do { 1981 PHINode *CurrIV = LoopPhis.pop_back_val(); 1982 1983 // Information about sign/zero extensions of CurrIV. 1984 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1985 1986 Changed |= 1987 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1988 1989 if (Visitor.WI.WidestNativeType) { 1990 WideIVs.push_back(Visitor.WI); 1991 } 1992 } while(!LoopPhis.empty()); 1993 1994 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1995 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1996 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1997 Changed = true; 1998 LoopPhis.push_back(WidePhi); 1999 } 2000 } 2001 } 2002 return Changed; 2003 } 2004 2005 //===----------------------------------------------------------------------===// 2006 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2007 //===----------------------------------------------------------------------===// 2008 2009 /// Given an Value which is hoped to be part of an add recurance in the given 2010 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2011 /// that this is less general than SCEVs AddRec checking. 2012 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2013 Instruction *IncI = dyn_cast<Instruction>(IncV); 2014 if (!IncI) 2015 return nullptr; 2016 2017 switch (IncI->getOpcode()) { 2018 case Instruction::Add: 2019 case Instruction::Sub: 2020 break; 2021 case Instruction::GetElementPtr: 2022 // An IV counter must preserve its type. 2023 if (IncI->getNumOperands() == 2) 2024 break; 2025 LLVM_FALLTHROUGH; 2026 default: 2027 return nullptr; 2028 } 2029 2030 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2031 if (Phi && Phi->getParent() == L->getHeader()) { 2032 if (L->isLoopInvariant(IncI->getOperand(1))) 2033 return Phi; 2034 return nullptr; 2035 } 2036 if (IncI->getOpcode() == Instruction::GetElementPtr) 2037 return nullptr; 2038 2039 // Allow add/sub to be commuted. 2040 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2041 if (Phi && Phi->getParent() == L->getHeader()) { 2042 if (L->isLoopInvariant(IncI->getOperand(0))) 2043 return Phi; 2044 } 2045 return nullptr; 2046 } 2047 2048 /// Whether the current loop exit test is based on this value. Currently this 2049 /// is limited to a direct use in the loop condition. 2050 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2052 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2053 // TODO: Allow non-icmp loop test. 2054 if (!ICmp) 2055 return false; 2056 2057 // TODO: Allow indirect use. 2058 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2059 } 2060 2061 /// linearFunctionTestReplace policy. Return true unless we can show that the 2062 /// current exit test is already sufficiently canonical. 2063 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2064 assert(L->getLoopLatch() && "Must be in simplified form"); 2065 2066 // Avoid converting a constant or loop invariant test back to a runtime 2067 // test. This is critical for when SCEV's cached ExitCount is less precise 2068 // than the current IR (such as after we've proven a particular exit is 2069 // actually dead and thus the BE count never reaches our ExitCount.) 2070 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2071 if (L->isLoopInvariant(BI->getCondition())) 2072 return false; 2073 2074 // Do LFTR to simplify the exit condition to an ICMP. 2075 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2076 if (!Cond) 2077 return true; 2078 2079 // Do LFTR to simplify the exit ICMP to EQ/NE 2080 ICmpInst::Predicate Pred = Cond->getPredicate(); 2081 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2082 return true; 2083 2084 // Look for a loop invariant RHS 2085 Value *LHS = Cond->getOperand(0); 2086 Value *RHS = Cond->getOperand(1); 2087 if (!L->isLoopInvariant(RHS)) { 2088 if (!L->isLoopInvariant(LHS)) 2089 return true; 2090 std::swap(LHS, RHS); 2091 } 2092 // Look for a simple IV counter LHS 2093 PHINode *Phi = dyn_cast<PHINode>(LHS); 2094 if (!Phi) 2095 Phi = getLoopPhiForCounter(LHS, L); 2096 2097 if (!Phi) 2098 return true; 2099 2100 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2101 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2102 if (Idx < 0) 2103 return true; 2104 2105 // Do LFTR if the exit condition's IV is *not* a simple counter. 2106 Value *IncV = Phi->getIncomingValue(Idx); 2107 return Phi != getLoopPhiForCounter(IncV, L); 2108 } 2109 2110 /// Return true if undefined behavior would provable be executed on the path to 2111 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2112 /// anything about whether OnPathTo is actually executed or whether Root is 2113 /// actually poison. This can be used to assess whether a new use of Root can 2114 /// be added at a location which is control equivalent with OnPathTo (such as 2115 /// immediately before it) without introducing UB which didn't previously 2116 /// exist. Note that a false result conveys no information. 2117 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2118 Instruction *OnPathTo, 2119 DominatorTree *DT) { 2120 // Basic approach is to assume Root is poison, propagate poison forward 2121 // through all users we can easily track, and then check whether any of those 2122 // users are provable UB and must execute before out exiting block might 2123 // exit. 2124 2125 // The set of all recursive users we've visited (which are assumed to all be 2126 // poison because of said visit) 2127 SmallSet<const Value *, 16> KnownPoison; 2128 SmallVector<const Instruction*, 16> Worklist; 2129 Worklist.push_back(Root); 2130 while (!Worklist.empty()) { 2131 const Instruction *I = Worklist.pop_back_val(); 2132 2133 // If we know this must trigger UB on a path leading our target. 2134 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2135 return true; 2136 2137 // If we can't analyze propagation through this instruction, just skip it 2138 // and transitive users. Safe as false is a conservative result. 2139 if (!propagatesFullPoison(I) && I != Root) 2140 continue; 2141 2142 if (KnownPoison.insert(I).second) 2143 for (const User *User : I->users()) 2144 Worklist.push_back(cast<Instruction>(User)); 2145 } 2146 2147 // Might be non-UB, or might have a path we couldn't prove must execute on 2148 // way to exiting bb. 2149 return false; 2150 } 2151 2152 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2153 /// down to checking that all operands are constant and listing instructions 2154 /// that may hide undef. 2155 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2156 unsigned Depth) { 2157 if (isa<Constant>(V)) 2158 return !isa<UndefValue>(V); 2159 2160 if (Depth >= 6) 2161 return false; 2162 2163 // Conservatively handle non-constant non-instructions. For example, Arguments 2164 // may be undef. 2165 Instruction *I = dyn_cast<Instruction>(V); 2166 if (!I) 2167 return false; 2168 2169 // Load and return values may be undef. 2170 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2171 return false; 2172 2173 // Optimistically handle other instructions. 2174 for (Value *Op : I->operands()) { 2175 if (!Visited.insert(Op).second) 2176 continue; 2177 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2178 return false; 2179 } 2180 return true; 2181 } 2182 2183 /// Return true if the given value is concrete. We must prove that undef can 2184 /// never reach it. 2185 /// 2186 /// TODO: If we decide that this is a good approach to checking for undef, we 2187 /// may factor it into a common location. 2188 static bool hasConcreteDef(Value *V) { 2189 SmallPtrSet<Value*, 8> Visited; 2190 Visited.insert(V); 2191 return hasConcreteDefImpl(V, Visited, 0); 2192 } 2193 2194 /// Return true if this IV has any uses other than the (soon to be rewritten) 2195 /// loop exit test. 2196 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2197 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2198 Value *IncV = Phi->getIncomingValue(LatchIdx); 2199 2200 for (User *U : Phi->users()) 2201 if (U != Cond && U != IncV) return false; 2202 2203 for (User *U : IncV->users()) 2204 if (U != Cond && U != Phi) return false; 2205 return true; 2206 } 2207 2208 /// Return true if the given phi is a "counter" in L. A counter is an 2209 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2210 /// step of 1. Note that L must have exactly one latch. 2211 static bool isLoopCounter(PHINode* Phi, Loop *L, 2212 ScalarEvolution *SE) { 2213 assert(Phi->getParent() == L->getHeader()); 2214 assert(L->getLoopLatch()); 2215 2216 if (!SE->isSCEVable(Phi->getType())) 2217 return false; 2218 2219 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2220 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2221 return false; 2222 2223 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2224 if (!Step || !Step->isOne()) 2225 return false; 2226 2227 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2228 Value *IncV = Phi->getIncomingValue(LatchIdx); 2229 return (getLoopPhiForCounter(IncV, L) == Phi); 2230 } 2231 2232 /// Search the loop header for a loop counter (anadd rec w/step of one) 2233 /// suitable for use by LFTR. If multiple counters are available, select the 2234 /// "best" one based profitable heuristics. 2235 /// 2236 /// BECount may be an i8* pointer type. The pointer difference is already 2237 /// valid count without scaling the address stride, so it remains a pointer 2238 /// expression as far as SCEV is concerned. 2239 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2240 const SCEV *BECount, 2241 ScalarEvolution *SE, DominatorTree *DT) { 2242 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2243 2244 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2245 2246 // Loop over all of the PHI nodes, looking for a simple counter. 2247 PHINode *BestPhi = nullptr; 2248 const SCEV *BestInit = nullptr; 2249 BasicBlock *LatchBlock = L->getLoopLatch(); 2250 assert(LatchBlock && "Must be in simplified form"); 2251 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2252 2253 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2254 PHINode *Phi = cast<PHINode>(I); 2255 if (!isLoopCounter(Phi, L, SE)) 2256 continue; 2257 2258 // Avoid comparing an integer IV against a pointer Limit. 2259 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2260 continue; 2261 2262 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2263 2264 // AR may be a pointer type, while BECount is an integer type. 2265 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2266 // AR may not be a narrower type, or we may never exit. 2267 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2268 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2269 continue; 2270 2271 // Avoid reusing a potentially undef value to compute other values that may 2272 // have originally had a concrete definition. 2273 if (!hasConcreteDef(Phi)) { 2274 // We explicitly allow unknown phis as long as they are already used by 2275 // the loop exit test. This is legal since performing LFTR could not 2276 // increase the number of undef users. 2277 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2278 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2279 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2280 continue; 2281 } 2282 2283 // Avoid introducing undefined behavior due to poison which didn't exist in 2284 // the original program. (Annoyingly, the rules for poison and undef 2285 // propagation are distinct, so this does NOT cover the undef case above.) 2286 // We have to ensure that we don't introduce UB by introducing a use on an 2287 // iteration where said IV produces poison. Our strategy here differs for 2288 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2289 // see code in linearFunctionTestReplace. For pointers, we restrict 2290 // transforms as there is no good way to reinfer inbounds once lost. 2291 if (!Phi->getType()->isIntegerTy() && 2292 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2293 continue; 2294 2295 const SCEV *Init = AR->getStart(); 2296 2297 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2298 // Don't force a live loop counter if another IV can be used. 2299 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2300 continue; 2301 2302 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2303 // also prefers integer to pointer IVs. 2304 if (BestInit->isZero() != Init->isZero()) { 2305 if (BestInit->isZero()) 2306 continue; 2307 } 2308 // If two IVs both count from zero or both count from nonzero then the 2309 // narrower is likely a dead phi that has been widened. Use the wider phi 2310 // to allow the other to be eliminated. 2311 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2312 continue; 2313 } 2314 BestPhi = Phi; 2315 BestInit = Init; 2316 } 2317 return BestPhi; 2318 } 2319 2320 /// Insert an IR expression which computes the value held by the IV IndVar 2321 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2322 /// is taken ExitCount times. 2323 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2324 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2325 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2326 assert(isLoopCounter(IndVar, L, SE)); 2327 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2328 const SCEV *IVInit = AR->getStart(); 2329 2330 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2331 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2332 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2333 // the existing GEPs whenever possible. 2334 if (IndVar->getType()->isPointerTy() && 2335 !ExitCount->getType()->isPointerTy()) { 2336 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2337 // signed value. ExitCount on the other hand represents the loop trip count, 2338 // which is an unsigned value. FindLoopCounter only allows induction 2339 // variables that have a positive unit stride of one. This means we don't 2340 // have to handle the case of negative offsets (yet) and just need to zero 2341 // extend ExitCount. 2342 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2343 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2344 if (UsePostInc) 2345 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2346 2347 // Expand the code for the iteration count. 2348 assert(SE->isLoopInvariant(IVOffset, L) && 2349 "Computed iteration count is not loop invariant!"); 2350 2351 // We could handle pointer IVs other than i8*, but we need to compensate for 2352 // gep index scaling. 2353 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2354 cast<PointerType>(IndVar->getType()) 2355 ->getElementType())->isOne() && 2356 "unit stride pointer IV must be i8*"); 2357 2358 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2359 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2360 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2361 } else { 2362 // In any other case, convert both IVInit and ExitCount to integers before 2363 // comparing. This may result in SCEV expansion of pointers, but in practice 2364 // SCEV will fold the pointer arithmetic away as such: 2365 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2366 // 2367 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2368 // for simple memset-style loops. 2369 // 2370 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2371 // were generated on top of case #2, which is not expected. 2372 2373 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2374 // For unit stride, IVCount = Start + ExitCount with 2's complement 2375 // overflow. 2376 2377 // For integer IVs, truncate the IV before computing IVInit + BECount, 2378 // unless we know apriori that the limit must be a constant when evaluated 2379 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2380 // of the IV in the loop over a (potentially) expensive expansion of the 2381 // widened exit count add(zext(add)) expression. 2382 if (SE->getTypeSizeInBits(IVInit->getType()) 2383 > SE->getTypeSizeInBits(ExitCount->getType())) { 2384 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2385 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2386 else 2387 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2388 } 2389 2390 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2391 2392 if (UsePostInc) 2393 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2394 2395 // Expand the code for the iteration count. 2396 assert(SE->isLoopInvariant(IVLimit, L) && 2397 "Computed iteration count is not loop invariant!"); 2398 // Ensure that we generate the same type as IndVar, or a smaller integer 2399 // type. In the presence of null pointer values, we have an integer type 2400 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2401 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2402 IndVar->getType() : ExitCount->getType(); 2403 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2404 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2405 } 2406 } 2407 2408 /// This method rewrites the exit condition of the loop to be a canonical != 2409 /// comparison against the incremented loop induction variable. This pass is 2410 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2411 /// determine a loop-invariant trip count of the loop, which is actually a much 2412 /// broader range than just linear tests. 2413 bool IndVarSimplify:: 2414 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2415 const SCEV *ExitCount, 2416 PHINode *IndVar, SCEVExpander &Rewriter) { 2417 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2418 assert(isLoopCounter(IndVar, L, SE)); 2419 Instruction * const IncVar = 2420 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2421 2422 // Initialize CmpIndVar to the preincremented IV. 2423 Value *CmpIndVar = IndVar; 2424 bool UsePostInc = false; 2425 2426 // If the exiting block is the same as the backedge block, we prefer to 2427 // compare against the post-incremented value, otherwise we must compare 2428 // against the preincremented value. 2429 if (ExitingBB == L->getLoopLatch()) { 2430 // For pointer IVs, we chose to not strip inbounds which requires us not 2431 // to add a potentially UB introducing use. We need to either a) show 2432 // the loop test we're modifying is already in post-inc form, or b) show 2433 // that adding a use must not introduce UB. 2434 bool SafeToPostInc = 2435 IndVar->getType()->isIntegerTy() || 2436 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2437 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2438 if (SafeToPostInc) { 2439 UsePostInc = true; 2440 CmpIndVar = IncVar; 2441 } 2442 } 2443 2444 // It may be necessary to drop nowrap flags on the incrementing instruction 2445 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2446 // case the increment might have previously been poison on the last iteration 2447 // only) or if LFTR switches to a different IV that was previously dynamically 2448 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2449 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2450 // check), because the pre-inc addrec flags may be adopted from the original 2451 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2452 // TODO: This handling is inaccurate for one case: If we switch to a 2453 // dynamically dead IV that wraps on the first loop iteration only, which is 2454 // not covered by the post-inc addrec. (If the new IV was not dynamically 2455 // dead, it could not be poison on the first iteration in the first place.) 2456 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2457 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2458 if (BO->hasNoUnsignedWrap()) 2459 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2460 if (BO->hasNoSignedWrap()) 2461 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2462 } 2463 2464 Value *ExitCnt = genLoopLimit( 2465 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2466 assert(ExitCnt->getType()->isPointerTy() == 2467 IndVar->getType()->isPointerTy() && 2468 "genLoopLimit missed a cast"); 2469 2470 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2471 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2472 ICmpInst::Predicate P; 2473 if (L->contains(BI->getSuccessor(0))) 2474 P = ICmpInst::ICMP_NE; 2475 else 2476 P = ICmpInst::ICMP_EQ; 2477 2478 IRBuilder<> Builder(BI); 2479 2480 // The new loop exit condition should reuse the debug location of the 2481 // original loop exit condition. 2482 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2483 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2484 2485 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2486 // avoid the expensive expansion of the limit expression in the wider type, 2487 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2488 // since we know (from the exit count bitwidth), that we can't self-wrap in 2489 // the narrower type. 2490 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2491 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2492 if (CmpIndVarSize > ExitCntSize) { 2493 assert(!CmpIndVar->getType()->isPointerTy() && 2494 !ExitCnt->getType()->isPointerTy()); 2495 2496 // Before resorting to actually inserting the truncate, use the same 2497 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2498 // the other side of the comparison instead. We still evaluate the limit 2499 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2500 // a truncate within in. 2501 bool Extended = false; 2502 const SCEV *IV = SE->getSCEV(CmpIndVar); 2503 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2504 ExitCnt->getType()); 2505 const SCEV *ZExtTrunc = 2506 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2507 2508 if (ZExtTrunc == IV) { 2509 Extended = true; 2510 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2511 "wide.trip.count"); 2512 } else { 2513 const SCEV *SExtTrunc = 2514 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2515 if (SExtTrunc == IV) { 2516 Extended = true; 2517 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2518 "wide.trip.count"); 2519 } 2520 } 2521 2522 if (Extended) { 2523 bool Discard; 2524 L->makeLoopInvariant(ExitCnt, Discard); 2525 } else 2526 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2527 "lftr.wideiv"); 2528 } 2529 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2530 << " LHS:" << *CmpIndVar << '\n' 2531 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2532 << "\n" 2533 << " RHS:\t" << *ExitCnt << "\n" 2534 << "ExitCount:\t" << *ExitCount << "\n" 2535 << " was: " << *BI->getCondition() << "\n"); 2536 2537 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2538 Value *OrigCond = BI->getCondition(); 2539 // It's tempting to use replaceAllUsesWith here to fully replace the old 2540 // comparison, but that's not immediately safe, since users of the old 2541 // comparison may not be dominated by the new comparison. Instead, just 2542 // update the branch to use the new comparison; in the common case this 2543 // will make old comparison dead. 2544 BI->setCondition(Cond); 2545 DeadInsts.push_back(OrigCond); 2546 2547 ++NumLFTR; 2548 return true; 2549 } 2550 2551 //===----------------------------------------------------------------------===// 2552 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2553 //===----------------------------------------------------------------------===// 2554 2555 /// If there's a single exit block, sink any loop-invariant values that 2556 /// were defined in the preheader but not used inside the loop into the 2557 /// exit block to reduce register pressure in the loop. 2558 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2559 BasicBlock *ExitBlock = L->getExitBlock(); 2560 if (!ExitBlock) return false; 2561 2562 BasicBlock *Preheader = L->getLoopPreheader(); 2563 if (!Preheader) return false; 2564 2565 bool MadeAnyChanges = false; 2566 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2567 BasicBlock::iterator I(Preheader->getTerminator()); 2568 while (I != Preheader->begin()) { 2569 --I; 2570 // New instructions were inserted at the end of the preheader. 2571 if (isa<PHINode>(I)) 2572 break; 2573 2574 // Don't move instructions which might have side effects, since the side 2575 // effects need to complete before instructions inside the loop. Also don't 2576 // move instructions which might read memory, since the loop may modify 2577 // memory. Note that it's okay if the instruction might have undefined 2578 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2579 // block. 2580 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2581 continue; 2582 2583 // Skip debug info intrinsics. 2584 if (isa<DbgInfoIntrinsic>(I)) 2585 continue; 2586 2587 // Skip eh pad instructions. 2588 if (I->isEHPad()) 2589 continue; 2590 2591 // Don't sink alloca: we never want to sink static alloca's out of the 2592 // entry block, and correctly sinking dynamic alloca's requires 2593 // checks for stacksave/stackrestore intrinsics. 2594 // FIXME: Refactor this check somehow? 2595 if (isa<AllocaInst>(I)) 2596 continue; 2597 2598 // Determine if there is a use in or before the loop (direct or 2599 // otherwise). 2600 bool UsedInLoop = false; 2601 for (Use &U : I->uses()) { 2602 Instruction *User = cast<Instruction>(U.getUser()); 2603 BasicBlock *UseBB = User->getParent(); 2604 if (PHINode *P = dyn_cast<PHINode>(User)) { 2605 unsigned i = 2606 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2607 UseBB = P->getIncomingBlock(i); 2608 } 2609 if (UseBB == Preheader || L->contains(UseBB)) { 2610 UsedInLoop = true; 2611 break; 2612 } 2613 } 2614 2615 // If there is, the def must remain in the preheader. 2616 if (UsedInLoop) 2617 continue; 2618 2619 // Otherwise, sink it to the exit block. 2620 Instruction *ToMove = &*I; 2621 bool Done = false; 2622 2623 if (I != Preheader->begin()) { 2624 // Skip debug info intrinsics. 2625 do { 2626 --I; 2627 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2628 2629 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2630 Done = true; 2631 } else { 2632 Done = true; 2633 } 2634 2635 MadeAnyChanges = true; 2636 ToMove->moveBefore(*ExitBlock, InsertPt); 2637 if (Done) break; 2638 InsertPt = ToMove->getIterator(); 2639 } 2640 2641 return MadeAnyChanges; 2642 } 2643 2644 bool IndVarSimplify::optimizeLoopExits(Loop *L) { 2645 SmallVector<BasicBlock*, 16> ExitingBlocks; 2646 L->getExitingBlocks(ExitingBlocks); 2647 2648 // Form an expression for the maximum exit count possible for this loop. We 2649 // merge the max and exact information to approximate a version of 2650 // getMaxBackedgeTakenInfo which isn't restricted to just constants. 2651 // TODO: factor this out as a version of getMaxBackedgeTakenCount which 2652 // isn't guaranteed to return a constant. 2653 SmallVector<const SCEV*, 4> ExitCounts; 2654 const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); 2655 if (!isa<SCEVCouldNotCompute>(MaxConstEC)) 2656 ExitCounts.push_back(MaxConstEC); 2657 for (BasicBlock *ExitingBB : ExitingBlocks) { 2658 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2659 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2660 assert(DT->dominates(ExitingBB, L->getLoopLatch()) && 2661 "We should only have known counts for exiting blocks that " 2662 "dominate latch!"); 2663 ExitCounts.push_back(ExitCount); 2664 } 2665 } 2666 if (ExitCounts.empty()) 2667 return false; 2668 const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); 2669 2670 bool Changed = false; 2671 for (BasicBlock *ExitingBB : ExitingBlocks) { 2672 // If our exitting block exits multiple loops, we can only rewrite the 2673 // innermost one. Otherwise, we're changing how many times the innermost 2674 // loop runs before it exits. 2675 if (LI->getLoopFor(ExitingBB) != L) 2676 continue; 2677 2678 // Can't rewrite non-branch yet. 2679 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2680 if (!BI) 2681 continue; 2682 2683 // If already constant, nothing to do. 2684 if (isa<Constant>(BI->getCondition())) 2685 continue; 2686 2687 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2688 if (isa<SCEVCouldNotCompute>(ExitCount)) 2689 continue; 2690 2691 // If we know we'd exit on the first iteration, rewrite the exit to 2692 // reflect this. This does not imply the loop must exit through this 2693 // exit; there may be an earlier one taken on the first iteration. 2694 // TODO: Given we know the backedge can't be taken, we should go ahead 2695 // and break it. Or at least, kill all the header phis and simplify. 2696 if (ExitCount->isZero()) { 2697 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2698 auto *OldCond = BI->getCondition(); 2699 auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : 2700 ConstantInt::getFalse(OldCond->getType()); 2701 BI->setCondition(NewCond); 2702 if (OldCond->use_empty()) 2703 DeadInsts.push_back(OldCond); 2704 Changed = true; 2705 continue; 2706 } 2707 2708 // If we end up with a pointer exit count, bail. Note that we can end up 2709 // with a pointer exit count for one exiting block, and not for another in 2710 // the same loop. 2711 if (!ExitCount->getType()->isIntegerTy() || 2712 !MaxExitCount->getType()->isIntegerTy()) 2713 continue; 2714 2715 Type *WiderType = 2716 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2717 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2718 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2719 assert(MaxExitCount->getType() == ExitCount->getType()); 2720 2721 // Can we prove that some other exit must be taken strictly before this 2722 // one? TODO: handle cases where ule is known, and equality is covered 2723 // by a dominating exit 2724 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2725 MaxExitCount, ExitCount)) { 2726 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2727 auto *OldCond = BI->getCondition(); 2728 auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : 2729 ConstantInt::getTrue(OldCond->getType()); 2730 BI->setCondition(NewCond); 2731 if (OldCond->use_empty()) 2732 DeadInsts.push_back(OldCond); 2733 Changed = true; 2734 continue; 2735 } 2736 2737 // TODO: If we can prove that the exiting iteration is equal to the exit 2738 // count for this exit and that no previous exit oppurtunities exist within 2739 // the loop, then we can discharge all other exits. (May fall out of 2740 // previous TODO.) 2741 2742 // TODO: If we can't prove any relation between our exit count and the 2743 // loops exit count, but taking this exit doesn't require actually running 2744 // the loop (i.e. no side effects, no computed values used in exit), then 2745 // we can replace the exit test with a loop invariant test which exits on 2746 // the first iteration. 2747 } 2748 return Changed; 2749 } 2750 2751 //===----------------------------------------------------------------------===// 2752 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2753 //===----------------------------------------------------------------------===// 2754 2755 bool IndVarSimplify::run(Loop *L) { 2756 // We need (and expect!) the incoming loop to be in LCSSA. 2757 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2758 "LCSSA required to run indvars!"); 2759 bool Changed = false; 2760 2761 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2762 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2763 // canonicalization can be a pessimization without LSR to "clean up" 2764 // afterwards. 2765 // - We depend on having a preheader; in particular, 2766 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2767 // and we're in trouble if we can't find the induction variable even when 2768 // we've manually inserted one. 2769 // - LFTR relies on having a single backedge. 2770 if (!L->isLoopSimplifyForm()) 2771 return false; 2772 2773 // If there are any floating-point recurrences, attempt to 2774 // transform them to use integer recurrences. 2775 Changed |= rewriteNonIntegerIVs(L); 2776 2777 #ifndef NDEBUG 2778 // Used below for a consistency check only 2779 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2780 #endif 2781 2782 // Create a rewriter object which we'll use to transform the code with. 2783 SCEVExpander Rewriter(*SE, DL, "indvars"); 2784 #ifndef NDEBUG 2785 Rewriter.setDebugType(DEBUG_TYPE); 2786 #endif 2787 2788 // Eliminate redundant IV users. 2789 // 2790 // Simplification works best when run before other consumers of SCEV. We 2791 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2792 // other expressions involving loop IVs have been evaluated. This helps SCEV 2793 // set no-wrap flags before normalizing sign/zero extension. 2794 Rewriter.disableCanonicalMode(); 2795 Changed |= simplifyAndExtend(L, Rewriter, LI); 2796 2797 // Check to see if we can compute the final value of any expressions 2798 // that are recurrent in the loop, and substitute the exit values from the 2799 // loop into any instructions outside of the loop that use the final values 2800 // of the current expressions. 2801 if (ReplaceExitValue != NeverRepl) 2802 Changed |= rewriteLoopExitValues(L, Rewriter); 2803 2804 // Eliminate redundant IV cycles. 2805 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2806 2807 Changed |= optimizeLoopExits(L); 2808 2809 // If we have a trip count expression, rewrite the loop's exit condition 2810 // using it. 2811 if (!DisableLFTR) { 2812 SmallVector<BasicBlock*, 16> ExitingBlocks; 2813 L->getExitingBlocks(ExitingBlocks); 2814 for (BasicBlock *ExitingBB : ExitingBlocks) { 2815 // Can't rewrite non-branch yet. 2816 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2817 continue; 2818 2819 // If our exitting block exits multiple loops, we can only rewrite the 2820 // innermost one. Otherwise, we're changing how many times the innermost 2821 // loop runs before it exits. 2822 if (LI->getLoopFor(ExitingBB) != L) 2823 continue; 2824 2825 if (!needsLFTR(L, ExitingBB)) 2826 continue; 2827 2828 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2829 if (isa<SCEVCouldNotCompute>(ExitCount)) 2830 continue; 2831 2832 // This was handled above, but as we form SCEVs, we can sometimes refine 2833 // existing ones; this allows exit counts to be folded to zero which 2834 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2835 // until stable to handle cases like this better. 2836 if (ExitCount->isZero()) 2837 continue; 2838 2839 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2840 if (!IndVar) 2841 continue; 2842 2843 // Avoid high cost expansions. Note: This heuristic is questionable in 2844 // that our definition of "high cost" is not exactly principled. 2845 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2846 continue; 2847 2848 // Check preconditions for proper SCEVExpander operation. SCEV does not 2849 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2850 // any pass that uses the SCEVExpander must do it. This does not work 2851 // well for loop passes because SCEVExpander makes assumptions about 2852 // all loops, while LoopPassManager only forces the current loop to be 2853 // simplified. 2854 // 2855 // FIXME: SCEV expansion has no way to bail out, so the caller must 2856 // explicitly check any assumptions made by SCEV. Brittle. 2857 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2858 if (!AR || AR->getLoop()->getLoopPreheader()) 2859 Changed |= linearFunctionTestReplace(L, ExitingBB, 2860 ExitCount, IndVar, 2861 Rewriter); 2862 } 2863 } 2864 // Clear the rewriter cache, because values that are in the rewriter's cache 2865 // can be deleted in the loop below, causing the AssertingVH in the cache to 2866 // trigger. 2867 Rewriter.clear(); 2868 2869 // Now that we're done iterating through lists, clean up any instructions 2870 // which are now dead. 2871 while (!DeadInsts.empty()) 2872 if (Instruction *Inst = 2873 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2874 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2875 2876 // The Rewriter may not be used from this point on. 2877 2878 // Loop-invariant instructions in the preheader that aren't used in the 2879 // loop may be sunk below the loop to reduce register pressure. 2880 Changed |= sinkUnusedInvariants(L); 2881 2882 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2883 // trip count and therefore can further simplify exit values in addition to 2884 // rewriteLoopExitValues. 2885 Changed |= rewriteFirstIterationLoopExitValues(L); 2886 2887 // Clean up dead instructions. 2888 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2889 2890 // Check a post-condition. 2891 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2892 "Indvars did not preserve LCSSA!"); 2893 2894 // Verify that LFTR, and any other change have not interfered with SCEV's 2895 // ability to compute trip count. 2896 #ifndef NDEBUG 2897 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2898 SE->forgetLoop(L); 2899 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2900 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2901 SE->getTypeSizeInBits(NewBECount->getType())) 2902 NewBECount = SE->getTruncateOrNoop(NewBECount, 2903 BackedgeTakenCount->getType()); 2904 else 2905 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2906 NewBECount->getType()); 2907 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2908 } 2909 #endif 2910 2911 return Changed; 2912 } 2913 2914 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2915 LoopStandardAnalysisResults &AR, 2916 LPMUpdater &) { 2917 Function *F = L.getHeader()->getParent(); 2918 const DataLayout &DL = F->getParent()->getDataLayout(); 2919 2920 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2921 if (!IVS.run(&L)) 2922 return PreservedAnalyses::all(); 2923 2924 auto PA = getLoopPassPreservedAnalyses(); 2925 PA.preserveSet<CFGAnalyses>(); 2926 return PA; 2927 } 2928 2929 namespace { 2930 2931 struct IndVarSimplifyLegacyPass : public LoopPass { 2932 static char ID; // Pass identification, replacement for typeid 2933 2934 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2935 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2936 } 2937 2938 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2939 if (skipLoop(L)) 2940 return false; 2941 2942 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2943 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2944 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2945 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2946 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2947 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2948 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2949 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2950 2951 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2952 return IVS.run(L); 2953 } 2954 2955 void getAnalysisUsage(AnalysisUsage &AU) const override { 2956 AU.setPreservesCFG(); 2957 getLoopAnalysisUsage(AU); 2958 } 2959 }; 2960 2961 } // end anonymous namespace 2962 2963 char IndVarSimplifyLegacyPass::ID = 0; 2964 2965 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2966 "Induction Variable Simplification", false, false) 2967 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2968 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2969 "Induction Variable Simplification", false, false) 2970 2971 Pass *llvm::createIndVarSimplifyPass() { 2972 return new IndVarSimplifyLegacyPass(); 2973 } 2974