1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 namespace { 128 129 struct RewritePhi; 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 139 SmallVector<WeakTrackingVH, 16> DeadInsts; 140 141 bool isValidRewrite(Value *FromVal, Value *ToVal); 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 148 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 149 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 150 bool rewriteFirstIterationLoopExitValues(Loop *L); 151 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 152 153 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 154 const SCEV *BackedgeTakenCount, 155 PHINode *IndVar, SCEVExpander &Rewriter); 156 157 bool sinkUnusedInvariants(Loop *L); 158 159 public: 160 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 161 const DataLayout &DL, TargetLibraryInfo *TLI, 162 TargetTransformInfo *TTI) 163 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 164 165 bool run(Loop *L); 166 }; 167 168 } // end anonymous namespace 169 170 /// Return true if the SCEV expansion generated by the rewriter can replace the 171 /// original value. SCEV guarantees that it produces the same value, but the way 172 /// it is produced may be illegal IR. Ideally, this function will only be 173 /// called for verification. 174 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 175 // If an SCEV expression subsumed multiple pointers, its expansion could 176 // reassociate the GEP changing the base pointer. This is illegal because the 177 // final address produced by a GEP chain must be inbounds relative to its 178 // underlying object. Otherwise basic alias analysis, among other things, 179 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 180 // producing an expression involving multiple pointers. Until then, we must 181 // bail out here. 182 // 183 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 184 // because it understands lcssa phis while SCEV does not. 185 Value *FromPtr = FromVal; 186 Value *ToPtr = ToVal; 187 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 188 FromPtr = GEP->getPointerOperand(); 189 } 190 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 191 ToPtr = GEP->getPointerOperand(); 192 } 193 if (FromPtr != FromVal || ToPtr != ToVal) { 194 // Quickly check the common case 195 if (FromPtr == ToPtr) 196 return true; 197 198 // SCEV may have rewritten an expression that produces the GEP's pointer 199 // operand. That's ok as long as the pointer operand has the same base 200 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 201 // base of a recurrence. This handles the case in which SCEV expansion 202 // converts a pointer type recurrence into a nonrecurrent pointer base 203 // indexed by an integer recurrence. 204 205 // If the GEP base pointer is a vector of pointers, abort. 206 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 207 return false; 208 209 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 210 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 211 if (FromBase == ToBase) 212 return true; 213 214 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 215 << " != " << *ToBase << "\n"); 216 217 return false; 218 } 219 return true; 220 } 221 222 /// Determine the insertion point for this user. By default, insert immediately 223 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 224 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 225 /// common dominator for the incoming blocks. A nullptr can be returned if no 226 /// viable location is found: it may happen if User is a PHI and Def only comes 227 /// to this PHI from unreachable blocks. 228 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 229 DominatorTree *DT, LoopInfo *LI) { 230 PHINode *PHI = dyn_cast<PHINode>(User); 231 if (!PHI) 232 return User; 233 234 Instruction *InsertPt = nullptr; 235 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 236 if (PHI->getIncomingValue(i) != Def) 237 continue; 238 239 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 240 241 if (!DT->isReachableFromEntry(InsertBB)) 242 continue; 243 244 if (!InsertPt) { 245 InsertPt = InsertBB->getTerminator(); 246 continue; 247 } 248 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 249 InsertPt = InsertBB->getTerminator(); 250 } 251 252 // If we have skipped all inputs, it means that Def only comes to Phi from 253 // unreachable blocks. 254 if (!InsertPt) 255 return nullptr; 256 257 auto *DefI = dyn_cast<Instruction>(Def); 258 if (!DefI) 259 return InsertPt; 260 261 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 262 263 auto *L = LI->getLoopFor(DefI->getParent()); 264 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 265 266 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 267 if (LI->getLoopFor(DTN->getBlock()) == L) 268 return DTN->getBlock()->getTerminator(); 269 270 llvm_unreachable("DefI dominates InsertPt!"); 271 } 272 273 //===----------------------------------------------------------------------===// 274 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 275 //===----------------------------------------------------------------------===// 276 277 /// Convert APF to an integer, if possible. 278 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 279 bool isExact = false; 280 // See if we can convert this to an int64_t 281 uint64_t UIntVal; 282 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 283 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 284 !isExact) 285 return false; 286 IntVal = UIntVal; 287 return true; 288 } 289 290 /// If the loop has floating induction variable then insert corresponding 291 /// integer induction variable if possible. 292 /// For example, 293 /// for(double i = 0; i < 10000; ++i) 294 /// bar(i) 295 /// is converted into 296 /// for(int i = 0; i < 10000; ++i) 297 /// bar((double)i); 298 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 299 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 300 unsigned BackEdge = IncomingEdge^1; 301 302 // Check incoming value. 303 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 304 305 int64_t InitValue; 306 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 307 return false; 308 309 // Check IV increment. Reject this PN if increment operation is not 310 // an add or increment value can not be represented by an integer. 311 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 312 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 313 314 // If this is not an add of the PHI with a constantfp, or if the constant fp 315 // is not an integer, bail out. 316 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 317 int64_t IncValue; 318 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 319 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 320 return false; 321 322 // Check Incr uses. One user is PN and the other user is an exit condition 323 // used by the conditional terminator. 324 Value::user_iterator IncrUse = Incr->user_begin(); 325 Instruction *U1 = cast<Instruction>(*IncrUse++); 326 if (IncrUse == Incr->user_end()) return false; 327 Instruction *U2 = cast<Instruction>(*IncrUse++); 328 if (IncrUse != Incr->user_end()) return false; 329 330 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 331 // only used by a branch, we can't transform it. 332 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 333 if (!Compare) 334 Compare = dyn_cast<FCmpInst>(U2); 335 if (!Compare || !Compare->hasOneUse() || 336 !isa<BranchInst>(Compare->user_back())) 337 return false; 338 339 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 340 341 // We need to verify that the branch actually controls the iteration count 342 // of the loop. If not, the new IV can overflow and no one will notice. 343 // The branch block must be in the loop and one of the successors must be out 344 // of the loop. 345 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 346 if (!L->contains(TheBr->getParent()) || 347 (L->contains(TheBr->getSuccessor(0)) && 348 L->contains(TheBr->getSuccessor(1)))) 349 return false; 350 351 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 352 // transform it. 353 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 354 int64_t ExitValue; 355 if (ExitValueVal == nullptr || 356 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 357 return false; 358 359 // Find new predicate for integer comparison. 360 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 361 switch (Compare->getPredicate()) { 362 default: return false; // Unknown comparison. 363 case CmpInst::FCMP_OEQ: 364 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 365 case CmpInst::FCMP_ONE: 366 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 367 case CmpInst::FCMP_OGT: 368 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 369 case CmpInst::FCMP_OGE: 370 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 371 case CmpInst::FCMP_OLT: 372 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 373 case CmpInst::FCMP_OLE: 374 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 375 } 376 377 // We convert the floating point induction variable to a signed i32 value if 378 // we can. This is only safe if the comparison will not overflow in a way 379 // that won't be trapped by the integer equivalent operations. Check for this 380 // now. 381 // TODO: We could use i64 if it is native and the range requires it. 382 383 // The start/stride/exit values must all fit in signed i32. 384 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 385 return false; 386 387 // If not actually striding (add x, 0.0), avoid touching the code. 388 if (IncValue == 0) 389 return false; 390 391 // Positive and negative strides have different safety conditions. 392 if (IncValue > 0) { 393 // If we have a positive stride, we require the init to be less than the 394 // exit value. 395 if (InitValue >= ExitValue) 396 return false; 397 398 uint32_t Range = uint32_t(ExitValue-InitValue); 399 // Check for infinite loop, either: 400 // while (i <= Exit) or until (i > Exit) 401 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 402 if (++Range == 0) return false; // Range overflows. 403 } 404 405 unsigned Leftover = Range % uint32_t(IncValue); 406 407 // If this is an equality comparison, we require that the strided value 408 // exactly land on the exit value, otherwise the IV condition will wrap 409 // around and do things the fp IV wouldn't. 410 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 411 Leftover != 0) 412 return false; 413 414 // If the stride would wrap around the i32 before exiting, we can't 415 // transform the IV. 416 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 417 return false; 418 } else { 419 // If we have a negative stride, we require the init to be greater than the 420 // exit value. 421 if (InitValue <= ExitValue) 422 return false; 423 424 uint32_t Range = uint32_t(InitValue-ExitValue); 425 // Check for infinite loop, either: 426 // while (i >= Exit) or until (i < Exit) 427 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 428 if (++Range == 0) return false; // Range overflows. 429 } 430 431 unsigned Leftover = Range % uint32_t(-IncValue); 432 433 // If this is an equality comparison, we require that the strided value 434 // exactly land on the exit value, otherwise the IV condition will wrap 435 // around and do things the fp IV wouldn't. 436 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 437 Leftover != 0) 438 return false; 439 440 // If the stride would wrap around the i32 before exiting, we can't 441 // transform the IV. 442 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 443 return false; 444 } 445 446 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 447 448 // Insert new integer induction variable. 449 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 450 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 451 PN->getIncomingBlock(IncomingEdge)); 452 453 Value *NewAdd = 454 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 455 Incr->getName()+".int", Incr); 456 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 457 458 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 459 ConstantInt::get(Int32Ty, ExitValue), 460 Compare->getName()); 461 462 // In the following deletions, PN may become dead and may be deleted. 463 // Use a WeakTrackingVH to observe whether this happens. 464 WeakTrackingVH WeakPH = PN; 465 466 // Delete the old floating point exit comparison. The branch starts using the 467 // new comparison. 468 NewCompare->takeName(Compare); 469 Compare->replaceAllUsesWith(NewCompare); 470 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 471 472 // Delete the old floating point increment. 473 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 474 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 475 476 // If the FP induction variable still has uses, this is because something else 477 // in the loop uses its value. In order to canonicalize the induction 478 // variable, we chose to eliminate the IV and rewrite it in terms of an 479 // int->fp cast. 480 // 481 // We give preference to sitofp over uitofp because it is faster on most 482 // platforms. 483 if (WeakPH) { 484 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 485 &*PN->getParent()->getFirstInsertionPt()); 486 PN->replaceAllUsesWith(Conv); 487 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 488 } 489 return true; 490 } 491 492 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 493 // First step. Check to see if there are any floating-point recurrences. 494 // If there are, change them into integer recurrences, permitting analysis by 495 // the SCEV routines. 496 BasicBlock *Header = L->getHeader(); 497 498 SmallVector<WeakTrackingVH, 8> PHIs; 499 for (PHINode &PN : Header->phis()) 500 PHIs.push_back(&PN); 501 502 bool Changed = false; 503 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 504 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 505 Changed |= handleFloatingPointIV(L, PN); 506 507 // If the loop previously had floating-point IV, ScalarEvolution 508 // may not have been able to compute a trip count. Now that we've done some 509 // re-writing, the trip count may be computable. 510 if (Changed) 511 SE->forgetLoop(L); 512 return Changed; 513 } 514 515 namespace { 516 517 // Collect information about PHI nodes which can be transformed in 518 // rewriteLoopExitValues. 519 struct RewritePhi { 520 PHINode *PN; 521 522 // Ith incoming value. 523 unsigned Ith; 524 525 // Exit value after expansion. 526 Value *Val; 527 528 // High Cost when expansion. 529 bool HighCost; 530 531 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 532 : PN(P), Ith(I), Val(V), HighCost(H) {} 533 }; 534 535 } // end anonymous namespace 536 537 //===----------------------------------------------------------------------===// 538 // rewriteLoopExitValues - Optimize IV users outside the loop. 539 // As a side effect, reduces the amount of IV processing within the loop. 540 //===----------------------------------------------------------------------===// 541 542 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 543 SmallPtrSet<const Instruction *, 8> Visited; 544 SmallVector<const Instruction *, 8> WorkList; 545 Visited.insert(I); 546 WorkList.push_back(I); 547 while (!WorkList.empty()) { 548 const Instruction *Curr = WorkList.pop_back_val(); 549 // This use is outside the loop, nothing to do. 550 if (!L->contains(Curr)) 551 continue; 552 // Do we assume it is a "hard" use which will not be eliminated easily? 553 if (Curr->mayHaveSideEffects()) 554 return true; 555 // Otherwise, add all its users to worklist. 556 for (auto U : Curr->users()) { 557 auto *UI = cast<Instruction>(U); 558 if (Visited.insert(UI).second) 559 WorkList.push_back(UI); 560 } 561 } 562 return false; 563 } 564 565 /// Check to see if this loop has a computable loop-invariant execution count. 566 /// If so, this means that we can compute the final value of any expressions 567 /// that are recurrent in the loop, and substitute the exit values from the loop 568 /// into any instructions outside of the loop that use the final values of the 569 /// current expressions. 570 /// 571 /// This is mostly redundant with the regular IndVarSimplify activities that 572 /// happen later, except that it's more powerful in some cases, because it's 573 /// able to brute-force evaluate arbitrary instructions as long as they have 574 /// constant operands at the beginning of the loop. 575 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 576 // Check a pre-condition. 577 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 578 "Indvars did not preserve LCSSA!"); 579 580 SmallVector<BasicBlock*, 8> ExitBlocks; 581 L->getUniqueExitBlocks(ExitBlocks); 582 583 SmallVector<RewritePhi, 8> RewritePhiSet; 584 // Find all values that are computed inside the loop, but used outside of it. 585 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 586 // the exit blocks of the loop to find them. 587 for (BasicBlock *ExitBB : ExitBlocks) { 588 // If there are no PHI nodes in this exit block, then no values defined 589 // inside the loop are used on this path, skip it. 590 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 591 if (!PN) continue; 592 593 unsigned NumPreds = PN->getNumIncomingValues(); 594 595 // Iterate over all of the PHI nodes. 596 BasicBlock::iterator BBI = ExitBB->begin(); 597 while ((PN = dyn_cast<PHINode>(BBI++))) { 598 if (PN->use_empty()) 599 continue; // dead use, don't replace it 600 601 if (!SE->isSCEVable(PN->getType())) 602 continue; 603 604 // It's necessary to tell ScalarEvolution about this explicitly so that 605 // it can walk the def-use list and forget all SCEVs, as it may not be 606 // watching the PHI itself. Once the new exit value is in place, there 607 // may not be a def-use connection between the loop and every instruction 608 // which got a SCEVAddRecExpr for that loop. 609 SE->forgetValue(PN); 610 611 // Iterate over all of the values in all the PHI nodes. 612 for (unsigned i = 0; i != NumPreds; ++i) { 613 // If the value being merged in is not integer or is not defined 614 // in the loop, skip it. 615 Value *InVal = PN->getIncomingValue(i); 616 if (!isa<Instruction>(InVal)) 617 continue; 618 619 // If this pred is for a subloop, not L itself, skip it. 620 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 621 continue; // The Block is in a subloop, skip it. 622 623 // Check that InVal is defined in the loop. 624 Instruction *Inst = cast<Instruction>(InVal); 625 if (!L->contains(Inst)) 626 continue; 627 628 // Okay, this instruction has a user outside of the current loop 629 // and varies predictably *inside* the loop. Evaluate the value it 630 // contains when the loop exits, if possible. 631 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 632 if (!SE->isLoopInvariant(ExitValue, L) || 633 !isSafeToExpand(ExitValue, *SE)) 634 continue; 635 636 // Computing the value outside of the loop brings no benefit if it is 637 // definitely used inside the loop in a way which can not be optimized 638 // away. 639 if (ReplaceExitValue != AlwaysRepl && 640 !isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst)) 641 continue; 642 643 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 644 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 645 646 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 647 << '\n' 648 << " LoopVal = " << *Inst << "\n"); 649 650 if (!isValidRewrite(Inst, ExitVal)) { 651 DeadInsts.push_back(ExitVal); 652 continue; 653 } 654 655 #ifndef NDEBUG 656 // If we reuse an instruction from a loop which is neither L nor one of 657 // its containing loops, we end up breaking LCSSA form for this loop by 658 // creating a new use of its instruction. 659 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 660 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 661 if (EVL != L) 662 assert(EVL->contains(L) && "LCSSA breach detected!"); 663 #endif 664 665 // Collect all the candidate PHINodes to be rewritten. 666 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 667 } 668 } 669 } 670 671 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 672 673 bool Changed = false; 674 // Transformation. 675 for (const RewritePhi &Phi : RewritePhiSet) { 676 PHINode *PN = Phi.PN; 677 Value *ExitVal = Phi.Val; 678 679 // Only do the rewrite when the ExitValue can be expanded cheaply. 680 // If LoopCanBeDel is true, rewrite exit value aggressively. 681 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 682 DeadInsts.push_back(ExitVal); 683 continue; 684 } 685 686 Changed = true; 687 ++NumReplaced; 688 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 689 PN->setIncomingValue(Phi.Ith, ExitVal); 690 691 // If this instruction is dead now, delete it. Don't do it now to avoid 692 // invalidating iterators. 693 if (isInstructionTriviallyDead(Inst, TLI)) 694 DeadInsts.push_back(Inst); 695 696 // Replace PN with ExitVal if that is legal and does not break LCSSA. 697 if (PN->getNumIncomingValues() == 1 && 698 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 699 PN->replaceAllUsesWith(ExitVal); 700 PN->eraseFromParent(); 701 } 702 } 703 704 // The insertion point instruction may have been deleted; clear it out 705 // so that the rewriter doesn't trip over it later. 706 Rewriter.clearInsertPoint(); 707 return Changed; 708 } 709 710 //===---------------------------------------------------------------------===// 711 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 712 // they will exit at the first iteration. 713 //===---------------------------------------------------------------------===// 714 715 /// Check to see if this loop has loop invariant conditions which lead to loop 716 /// exits. If so, we know that if the exit path is taken, it is at the first 717 /// loop iteration. This lets us predict exit values of PHI nodes that live in 718 /// loop header. 719 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 720 // Verify the input to the pass is already in LCSSA form. 721 assert(L->isLCSSAForm(*DT)); 722 723 SmallVector<BasicBlock *, 8> ExitBlocks; 724 L->getUniqueExitBlocks(ExitBlocks); 725 726 bool MadeAnyChanges = false; 727 for (auto *ExitBB : ExitBlocks) { 728 // If there are no more PHI nodes in this exit block, then no more 729 // values defined inside the loop are used on this path. 730 for (PHINode &PN : ExitBB->phis()) { 731 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 732 IncomingValIdx != E; ++IncomingValIdx) { 733 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 734 735 // Can we prove that the exit must run on the first iteration if it 736 // runs at all? (i.e. early exits are fine for our purposes, but 737 // traces which lead to this exit being taken on the 2nd iteration 738 // aren't.) Note that this is about whether the exit branch is 739 // executed, not about whether it is taken. 740 if (!L->getLoopLatch() || 741 !DT->dominates(IncomingBB, L->getLoopLatch())) 742 continue; 743 744 // Get condition that leads to the exit path. 745 auto *TermInst = IncomingBB->getTerminator(); 746 747 Value *Cond = nullptr; 748 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 749 // Must be a conditional branch, otherwise the block 750 // should not be in the loop. 751 Cond = BI->getCondition(); 752 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 753 Cond = SI->getCondition(); 754 else 755 continue; 756 757 if (!L->isLoopInvariant(Cond)) 758 continue; 759 760 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 761 762 // Only deal with PHIs in the loop header. 763 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 764 continue; 765 766 // If ExitVal is a PHI on the loop header, then we know its 767 // value along this exit because the exit can only be taken 768 // on the first iteration. 769 auto *LoopPreheader = L->getLoopPreheader(); 770 assert(LoopPreheader && "Invalid loop"); 771 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 772 if (PreheaderIdx != -1) { 773 assert(ExitVal->getParent() == L->getHeader() && 774 "ExitVal must be in loop header"); 775 MadeAnyChanges = true; 776 PN.setIncomingValue(IncomingValIdx, 777 ExitVal->getIncomingValue(PreheaderIdx)); 778 } 779 } 780 } 781 } 782 return MadeAnyChanges; 783 } 784 785 /// Check whether it is possible to delete the loop after rewriting exit 786 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 787 /// aggressively. 788 bool IndVarSimplify::canLoopBeDeleted( 789 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 790 BasicBlock *Preheader = L->getLoopPreheader(); 791 // If there is no preheader, the loop will not be deleted. 792 if (!Preheader) 793 return false; 794 795 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 796 // We obviate multiple ExitingBlocks case for simplicity. 797 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 798 // after exit value rewriting, we can enhance the logic here. 799 SmallVector<BasicBlock *, 4> ExitingBlocks; 800 L->getExitingBlocks(ExitingBlocks); 801 SmallVector<BasicBlock *, 8> ExitBlocks; 802 L->getUniqueExitBlocks(ExitBlocks); 803 if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) 804 return false; 805 806 BasicBlock *ExitBlock = ExitBlocks[0]; 807 BasicBlock::iterator BI = ExitBlock->begin(); 808 while (PHINode *P = dyn_cast<PHINode>(BI)) { 809 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 810 811 // If the Incoming value of P is found in RewritePhiSet, we know it 812 // could be rewritten to use a loop invariant value in transformation 813 // phase later. Skip it in the loop invariant check below. 814 bool found = false; 815 for (const RewritePhi &Phi : RewritePhiSet) { 816 unsigned i = Phi.Ith; 817 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 818 found = true; 819 break; 820 } 821 } 822 823 Instruction *I; 824 if (!found && (I = dyn_cast<Instruction>(Incoming))) 825 if (!L->hasLoopInvariantOperands(I)) 826 return false; 827 828 ++BI; 829 } 830 831 for (auto *BB : L->blocks()) 832 if (llvm::any_of(*BB, [](Instruction &I) { 833 return I.mayHaveSideEffects(); 834 })) 835 return false; 836 837 return true; 838 } 839 840 //===----------------------------------------------------------------------===// 841 // IV Widening - Extend the width of an IV to cover its widest uses. 842 //===----------------------------------------------------------------------===// 843 844 namespace { 845 846 // Collect information about induction variables that are used by sign/zero 847 // extend operations. This information is recorded by CollectExtend and provides 848 // the input to WidenIV. 849 struct WideIVInfo { 850 PHINode *NarrowIV = nullptr; 851 852 // Widest integer type created [sz]ext 853 Type *WidestNativeType = nullptr; 854 855 // Was a sext user seen before a zext? 856 bool IsSigned = false; 857 }; 858 859 } // end anonymous namespace 860 861 /// Update information about the induction variable that is extended by this 862 /// sign or zero extend operation. This is used to determine the final width of 863 /// the IV before actually widening it. 864 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 865 const TargetTransformInfo *TTI) { 866 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 867 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 868 return; 869 870 Type *Ty = Cast->getType(); 871 uint64_t Width = SE->getTypeSizeInBits(Ty); 872 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 873 return; 874 875 // Check that `Cast` actually extends the induction variable (we rely on this 876 // later). This takes care of cases where `Cast` is extending a truncation of 877 // the narrow induction variable, and thus can end up being narrower than the 878 // "narrow" induction variable. 879 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 880 if (NarrowIVWidth >= Width) 881 return; 882 883 // Cast is either an sext or zext up to this point. 884 // We should not widen an indvar if arithmetics on the wider indvar are more 885 // expensive than those on the narrower indvar. We check only the cost of ADD 886 // because at least an ADD is required to increment the induction variable. We 887 // could compute more comprehensively the cost of all instructions on the 888 // induction variable when necessary. 889 if (TTI && 890 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 891 TTI->getArithmeticInstrCost(Instruction::Add, 892 Cast->getOperand(0)->getType())) { 893 return; 894 } 895 896 if (!WI.WidestNativeType) { 897 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 898 WI.IsSigned = IsSigned; 899 return; 900 } 901 902 // We extend the IV to satisfy the sign of its first user, arbitrarily. 903 if (WI.IsSigned != IsSigned) 904 return; 905 906 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 907 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 908 } 909 910 namespace { 911 912 /// Record a link in the Narrow IV def-use chain along with the WideIV that 913 /// computes the same value as the Narrow IV def. This avoids caching Use* 914 /// pointers. 915 struct NarrowIVDefUse { 916 Instruction *NarrowDef = nullptr; 917 Instruction *NarrowUse = nullptr; 918 Instruction *WideDef = nullptr; 919 920 // True if the narrow def is never negative. Tracking this information lets 921 // us use a sign extension instead of a zero extension or vice versa, when 922 // profitable and legal. 923 bool NeverNegative = false; 924 925 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 926 bool NeverNegative) 927 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 928 NeverNegative(NeverNegative) {} 929 }; 930 931 /// The goal of this transform is to remove sign and zero extends without 932 /// creating any new induction variables. To do this, it creates a new phi of 933 /// the wider type and redirects all users, either removing extends or inserting 934 /// truncs whenever we stop propagating the type. 935 class WidenIV { 936 // Parameters 937 PHINode *OrigPhi; 938 Type *WideType; 939 940 // Context 941 LoopInfo *LI; 942 Loop *L; 943 ScalarEvolution *SE; 944 DominatorTree *DT; 945 946 // Does the module have any calls to the llvm.experimental.guard intrinsic 947 // at all? If not we can avoid scanning instructions looking for guards. 948 bool HasGuards; 949 950 // Result 951 PHINode *WidePhi = nullptr; 952 Instruction *WideInc = nullptr; 953 const SCEV *WideIncExpr = nullptr; 954 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 955 956 SmallPtrSet<Instruction *,16> Widened; 957 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 958 959 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 960 961 // A map tracking the kind of extension used to widen each narrow IV 962 // and narrow IV user. 963 // Key: pointer to a narrow IV or IV user. 964 // Value: the kind of extension used to widen this Instruction. 965 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 966 967 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 968 969 // A map with control-dependent ranges for post increment IV uses. The key is 970 // a pair of IV def and a use of this def denoting the context. The value is 971 // a ConstantRange representing possible values of the def at the given 972 // context. 973 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 974 975 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 976 Instruction *UseI) { 977 DefUserPair Key(Def, UseI); 978 auto It = PostIncRangeInfos.find(Key); 979 return It == PostIncRangeInfos.end() 980 ? Optional<ConstantRange>(None) 981 : Optional<ConstantRange>(It->second); 982 } 983 984 void calculatePostIncRanges(PHINode *OrigPhi); 985 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 986 987 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 988 DefUserPair Key(Def, UseI); 989 auto It = PostIncRangeInfos.find(Key); 990 if (It == PostIncRangeInfos.end()) 991 PostIncRangeInfos.insert({Key, R}); 992 else 993 It->second = R.intersectWith(It->second); 994 } 995 996 public: 997 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 998 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 999 bool HasGuards) 1000 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1001 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1002 HasGuards(HasGuards), DeadInsts(DI) { 1003 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1004 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1005 } 1006 1007 PHINode *createWideIV(SCEVExpander &Rewriter); 1008 1009 protected: 1010 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1011 Instruction *Use); 1012 1013 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1014 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1015 const SCEVAddRecExpr *WideAR); 1016 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1017 1018 ExtendKind getExtendKind(Instruction *I); 1019 1020 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1021 1022 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1023 1024 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1025 1026 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1027 unsigned OpCode) const; 1028 1029 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1030 1031 bool widenLoopCompare(NarrowIVDefUse DU); 1032 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1033 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1034 1035 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1036 }; 1037 1038 } // end anonymous namespace 1039 1040 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1041 bool IsSigned, Instruction *Use) { 1042 // Set the debug location and conservative insertion point. 1043 IRBuilder<> Builder(Use); 1044 // Hoist the insertion point into loop preheaders as far as possible. 1045 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1046 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1047 L = L->getParentLoop()) 1048 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1049 1050 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1051 Builder.CreateZExt(NarrowOper, WideType); 1052 } 1053 1054 /// Instantiate a wide operation to replace a narrow operation. This only needs 1055 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1056 /// 0 for any operation we decide not to clone. 1057 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1058 const SCEVAddRecExpr *WideAR) { 1059 unsigned Opcode = DU.NarrowUse->getOpcode(); 1060 switch (Opcode) { 1061 default: 1062 return nullptr; 1063 case Instruction::Add: 1064 case Instruction::Mul: 1065 case Instruction::UDiv: 1066 case Instruction::Sub: 1067 return cloneArithmeticIVUser(DU, WideAR); 1068 1069 case Instruction::And: 1070 case Instruction::Or: 1071 case Instruction::Xor: 1072 case Instruction::Shl: 1073 case Instruction::LShr: 1074 case Instruction::AShr: 1075 return cloneBitwiseIVUser(DU); 1076 } 1077 } 1078 1079 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1080 Instruction *NarrowUse = DU.NarrowUse; 1081 Instruction *NarrowDef = DU.NarrowDef; 1082 Instruction *WideDef = DU.WideDef; 1083 1084 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1085 1086 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1087 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1088 // invariant and will be folded or hoisted. If it actually comes from a 1089 // widened IV, it should be removed during a future call to widenIVUse. 1090 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1091 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1092 ? WideDef 1093 : createExtendInst(NarrowUse->getOperand(0), WideType, 1094 IsSigned, NarrowUse); 1095 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1096 ? WideDef 1097 : createExtendInst(NarrowUse->getOperand(1), WideType, 1098 IsSigned, NarrowUse); 1099 1100 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1101 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1102 NarrowBO->getName()); 1103 IRBuilder<> Builder(NarrowUse); 1104 Builder.Insert(WideBO); 1105 WideBO->copyIRFlags(NarrowBO); 1106 return WideBO; 1107 } 1108 1109 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1110 const SCEVAddRecExpr *WideAR) { 1111 Instruction *NarrowUse = DU.NarrowUse; 1112 Instruction *NarrowDef = DU.NarrowDef; 1113 Instruction *WideDef = DU.WideDef; 1114 1115 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1116 1117 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1118 1119 // We're trying to find X such that 1120 // 1121 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1122 // 1123 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1124 // and check using SCEV if any of them are correct. 1125 1126 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1127 // correct solution to X. 1128 auto GuessNonIVOperand = [&](bool SignExt) { 1129 const SCEV *WideLHS; 1130 const SCEV *WideRHS; 1131 1132 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1133 if (SignExt) 1134 return SE->getSignExtendExpr(S, Ty); 1135 return SE->getZeroExtendExpr(S, Ty); 1136 }; 1137 1138 if (IVOpIdx == 0) { 1139 WideLHS = SE->getSCEV(WideDef); 1140 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1141 WideRHS = GetExtend(NarrowRHS, WideType); 1142 } else { 1143 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1144 WideLHS = GetExtend(NarrowLHS, WideType); 1145 WideRHS = SE->getSCEV(WideDef); 1146 } 1147 1148 // WideUse is "WideDef `op.wide` X" as described in the comment. 1149 const SCEV *WideUse = nullptr; 1150 1151 switch (NarrowUse->getOpcode()) { 1152 default: 1153 llvm_unreachable("No other possibility!"); 1154 1155 case Instruction::Add: 1156 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1157 break; 1158 1159 case Instruction::Mul: 1160 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1161 break; 1162 1163 case Instruction::UDiv: 1164 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1165 break; 1166 1167 case Instruction::Sub: 1168 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1169 break; 1170 } 1171 1172 return WideUse == WideAR; 1173 }; 1174 1175 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1176 if (!GuessNonIVOperand(SignExtend)) { 1177 SignExtend = !SignExtend; 1178 if (!GuessNonIVOperand(SignExtend)) 1179 return nullptr; 1180 } 1181 1182 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1183 ? WideDef 1184 : createExtendInst(NarrowUse->getOperand(0), WideType, 1185 SignExtend, NarrowUse); 1186 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1187 ? WideDef 1188 : createExtendInst(NarrowUse->getOperand(1), WideType, 1189 SignExtend, NarrowUse); 1190 1191 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1192 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1193 NarrowBO->getName()); 1194 1195 IRBuilder<> Builder(NarrowUse); 1196 Builder.Insert(WideBO); 1197 WideBO->copyIRFlags(NarrowBO); 1198 return WideBO; 1199 } 1200 1201 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1202 auto It = ExtendKindMap.find(I); 1203 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1204 return It->second; 1205 } 1206 1207 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1208 unsigned OpCode) const { 1209 if (OpCode == Instruction::Add) 1210 return SE->getAddExpr(LHS, RHS); 1211 if (OpCode == Instruction::Sub) 1212 return SE->getMinusSCEV(LHS, RHS); 1213 if (OpCode == Instruction::Mul) 1214 return SE->getMulExpr(LHS, RHS); 1215 1216 llvm_unreachable("Unsupported opcode."); 1217 } 1218 1219 /// No-wrap operations can transfer sign extension of their result to their 1220 /// operands. Generate the SCEV value for the widened operation without 1221 /// actually modifying the IR yet. If the expression after extending the 1222 /// operands is an AddRec for this loop, return the AddRec and the kind of 1223 /// extension used. 1224 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1225 // Handle the common case of add<nsw/nuw> 1226 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1227 // Only Add/Sub/Mul instructions supported yet. 1228 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1229 OpCode != Instruction::Mul) 1230 return {nullptr, Unknown}; 1231 1232 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1233 // if extending the other will lead to a recurrence. 1234 const unsigned ExtendOperIdx = 1235 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1236 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1237 1238 const SCEV *ExtendOperExpr = nullptr; 1239 const OverflowingBinaryOperator *OBO = 1240 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1241 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1242 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1243 ExtendOperExpr = SE->getSignExtendExpr( 1244 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1245 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1246 ExtendOperExpr = SE->getZeroExtendExpr( 1247 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1248 else 1249 return {nullptr, Unknown}; 1250 1251 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1252 // flags. This instruction may be guarded by control flow that the no-wrap 1253 // behavior depends on. Non-control-equivalent instructions can be mapped to 1254 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1255 // semantics to those operations. 1256 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1257 const SCEV *rhs = ExtendOperExpr; 1258 1259 // Let's swap operands to the initial order for the case of non-commutative 1260 // operations, like SUB. See PR21014. 1261 if (ExtendOperIdx == 0) 1262 std::swap(lhs, rhs); 1263 const SCEVAddRecExpr *AddRec = 1264 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1265 1266 if (!AddRec || AddRec->getLoop() != L) 1267 return {nullptr, Unknown}; 1268 1269 return {AddRec, ExtKind}; 1270 } 1271 1272 /// Is this instruction potentially interesting for further simplification after 1273 /// widening it's type? In other words, can the extend be safely hoisted out of 1274 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1275 /// so, return the extended recurrence and the kind of extension used. Otherwise 1276 /// return {nullptr, Unknown}. 1277 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1278 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1279 return {nullptr, Unknown}; 1280 1281 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1282 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1283 SE->getTypeSizeInBits(WideType)) { 1284 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1285 // index. So don't follow this use. 1286 return {nullptr, Unknown}; 1287 } 1288 1289 const SCEV *WideExpr; 1290 ExtendKind ExtKind; 1291 if (DU.NeverNegative) { 1292 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1293 if (isa<SCEVAddRecExpr>(WideExpr)) 1294 ExtKind = SignExtended; 1295 else { 1296 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1297 ExtKind = ZeroExtended; 1298 } 1299 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1300 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1301 ExtKind = SignExtended; 1302 } else { 1303 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1304 ExtKind = ZeroExtended; 1305 } 1306 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1307 if (!AddRec || AddRec->getLoop() != L) 1308 return {nullptr, Unknown}; 1309 return {AddRec, ExtKind}; 1310 } 1311 1312 /// This IV user cannot be widen. Replace this use of the original narrow IV 1313 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1314 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1315 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1316 if (!InsertPt) 1317 return; 1318 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1319 << *DU.NarrowUse << "\n"); 1320 IRBuilder<> Builder(InsertPt); 1321 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1322 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1323 } 1324 1325 /// If the narrow use is a compare instruction, then widen the compare 1326 // (and possibly the other operand). The extend operation is hoisted into the 1327 // loop preheader as far as possible. 1328 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1329 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1330 if (!Cmp) 1331 return false; 1332 1333 // We can legally widen the comparison in the following two cases: 1334 // 1335 // - The signedness of the IV extension and comparison match 1336 // 1337 // - The narrow IV is always positive (and thus its sign extension is equal 1338 // to its zero extension). For instance, let's say we're zero extending 1339 // %narrow for the following use 1340 // 1341 // icmp slt i32 %narrow, %val ... (A) 1342 // 1343 // and %narrow is always positive. Then 1344 // 1345 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1346 // == icmp slt i32 zext(%narrow), sext(%val) 1347 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1348 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1349 return false; 1350 1351 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1352 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1353 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1354 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1355 1356 // Widen the compare instruction. 1357 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1358 if (!InsertPt) 1359 return false; 1360 IRBuilder<> Builder(InsertPt); 1361 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1362 1363 // Widen the other operand of the compare, if necessary. 1364 if (CastWidth < IVWidth) { 1365 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1366 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1367 } 1368 return true; 1369 } 1370 1371 /// If the narrow use is an instruction whose two operands are the defining 1372 /// instruction of DU and a load instruction, then we have the following: 1373 /// if the load is hoisted outside the loop, then we do not reach this function 1374 /// as scalar evolution analysis works fine in widenIVUse with variables 1375 /// hoisted outside the loop and efficient code is subsequently generated by 1376 /// not emitting truncate instructions. But when the load is not hoisted 1377 /// (whether due to limitation in alias analysis or due to a true legality), 1378 /// then scalar evolution can not proceed with loop variant values and 1379 /// inefficient code is generated. This function handles the non-hoisted load 1380 /// special case by making the optimization generate the same type of code for 1381 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1382 /// instruction). This special case is important especially when the induction 1383 /// variables are affecting addressing mode in code generation. 1384 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1385 Instruction *NarrowUse = DU.NarrowUse; 1386 Instruction *NarrowDef = DU.NarrowDef; 1387 Instruction *WideDef = DU.WideDef; 1388 1389 // Handle the common case of add<nsw/nuw> 1390 const unsigned OpCode = NarrowUse->getOpcode(); 1391 // Only Add/Sub/Mul instructions are supported. 1392 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1393 OpCode != Instruction::Mul) 1394 return false; 1395 1396 // The operand that is not defined by NarrowDef of DU. Let's call it the 1397 // other operand. 1398 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1399 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1400 "bad DU"); 1401 1402 const SCEV *ExtendOperExpr = nullptr; 1403 const OverflowingBinaryOperator *OBO = 1404 cast<OverflowingBinaryOperator>(NarrowUse); 1405 ExtendKind ExtKind = getExtendKind(NarrowDef); 1406 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1407 ExtendOperExpr = SE->getSignExtendExpr( 1408 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1409 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1410 ExtendOperExpr = SE->getZeroExtendExpr( 1411 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1412 else 1413 return false; 1414 1415 // We are interested in the other operand being a load instruction. 1416 // But, we should look into relaxing this restriction later on. 1417 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1418 if (I && I->getOpcode() != Instruction::Load) 1419 return false; 1420 1421 // Verifying that Defining operand is an AddRec 1422 const SCEV *Op1 = SE->getSCEV(WideDef); 1423 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1424 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1425 return false; 1426 // Verifying that other operand is an Extend. 1427 if (ExtKind == SignExtended) { 1428 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1429 return false; 1430 } else { 1431 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1432 return false; 1433 } 1434 1435 if (ExtKind == SignExtended) { 1436 for (Use &U : NarrowUse->uses()) { 1437 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1438 if (!User || User->getType() != WideType) 1439 return false; 1440 } 1441 } else { // ExtKind == ZeroExtended 1442 for (Use &U : NarrowUse->uses()) { 1443 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1444 if (!User || User->getType() != WideType) 1445 return false; 1446 } 1447 } 1448 1449 return true; 1450 } 1451 1452 /// Special Case for widening with variant Loads (see 1453 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1454 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1455 Instruction *NarrowUse = DU.NarrowUse; 1456 Instruction *NarrowDef = DU.NarrowDef; 1457 Instruction *WideDef = DU.WideDef; 1458 1459 ExtendKind ExtKind = getExtendKind(NarrowDef); 1460 1461 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1462 1463 // Generating a widening use instruction. 1464 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1465 ? WideDef 1466 : createExtendInst(NarrowUse->getOperand(0), WideType, 1467 ExtKind, NarrowUse); 1468 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1469 ? WideDef 1470 : createExtendInst(NarrowUse->getOperand(1), WideType, 1471 ExtKind, NarrowUse); 1472 1473 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1474 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1475 NarrowBO->getName()); 1476 IRBuilder<> Builder(NarrowUse); 1477 Builder.Insert(WideBO); 1478 WideBO->copyIRFlags(NarrowBO); 1479 1480 if (ExtKind == SignExtended) 1481 ExtendKindMap[NarrowUse] = SignExtended; 1482 else 1483 ExtendKindMap[NarrowUse] = ZeroExtended; 1484 1485 // Update the Use. 1486 if (ExtKind == SignExtended) { 1487 for (Use &U : NarrowUse->uses()) { 1488 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1489 if (User && User->getType() == WideType) { 1490 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1491 << *WideBO << "\n"); 1492 ++NumElimExt; 1493 User->replaceAllUsesWith(WideBO); 1494 DeadInsts.emplace_back(User); 1495 } 1496 } 1497 } else { // ExtKind == ZeroExtended 1498 for (Use &U : NarrowUse->uses()) { 1499 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1500 if (User && User->getType() == WideType) { 1501 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1502 << *WideBO << "\n"); 1503 ++NumElimExt; 1504 User->replaceAllUsesWith(WideBO); 1505 DeadInsts.emplace_back(User); 1506 } 1507 } 1508 } 1509 } 1510 1511 /// Determine whether an individual user of the narrow IV can be widened. If so, 1512 /// return the wide clone of the user. 1513 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1514 assert(ExtendKindMap.count(DU.NarrowDef) && 1515 "Should already know the kind of extension used to widen NarrowDef"); 1516 1517 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1518 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1519 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1520 // For LCSSA phis, sink the truncate outside the loop. 1521 // After SimplifyCFG most loop exit targets have a single predecessor. 1522 // Otherwise fall back to a truncate within the loop. 1523 if (UsePhi->getNumOperands() != 1) 1524 truncateIVUse(DU, DT, LI); 1525 else { 1526 // Widening the PHI requires us to insert a trunc. The logical place 1527 // for this trunc is in the same BB as the PHI. This is not possible if 1528 // the BB is terminated by a catchswitch. 1529 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1530 return nullptr; 1531 1532 PHINode *WidePhi = 1533 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1534 UsePhi); 1535 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1536 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1537 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1538 UsePhi->replaceAllUsesWith(Trunc); 1539 DeadInsts.emplace_back(UsePhi); 1540 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1541 << *WidePhi << "\n"); 1542 } 1543 return nullptr; 1544 } 1545 } 1546 1547 // This narrow use can be widened by a sext if it's non-negative or its narrow 1548 // def was widended by a sext. Same for zext. 1549 auto canWidenBySExt = [&]() { 1550 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1551 }; 1552 auto canWidenByZExt = [&]() { 1553 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1554 }; 1555 1556 // Our raison d'etre! Eliminate sign and zero extension. 1557 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1558 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1559 Value *NewDef = DU.WideDef; 1560 if (DU.NarrowUse->getType() != WideType) { 1561 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1562 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1563 if (CastWidth < IVWidth) { 1564 // The cast isn't as wide as the IV, so insert a Trunc. 1565 IRBuilder<> Builder(DU.NarrowUse); 1566 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1567 } 1568 else { 1569 // A wider extend was hidden behind a narrower one. This may induce 1570 // another round of IV widening in which the intermediate IV becomes 1571 // dead. It should be very rare. 1572 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1573 << " not wide enough to subsume " << *DU.NarrowUse 1574 << "\n"); 1575 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1576 NewDef = DU.NarrowUse; 1577 } 1578 } 1579 if (NewDef != DU.NarrowUse) { 1580 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1581 << " replaced by " << *DU.WideDef << "\n"); 1582 ++NumElimExt; 1583 DU.NarrowUse->replaceAllUsesWith(NewDef); 1584 DeadInsts.emplace_back(DU.NarrowUse); 1585 } 1586 // Now that the extend is gone, we want to expose it's uses for potential 1587 // further simplification. We don't need to directly inform SimplifyIVUsers 1588 // of the new users, because their parent IV will be processed later as a 1589 // new loop phi. If we preserved IVUsers analysis, we would also want to 1590 // push the uses of WideDef here. 1591 1592 // No further widening is needed. The deceased [sz]ext had done it for us. 1593 return nullptr; 1594 } 1595 1596 // Does this user itself evaluate to a recurrence after widening? 1597 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1598 if (!WideAddRec.first) 1599 WideAddRec = getWideRecurrence(DU); 1600 1601 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1602 if (!WideAddRec.first) { 1603 // If use is a loop condition, try to promote the condition instead of 1604 // truncating the IV first. 1605 if (widenLoopCompare(DU)) 1606 return nullptr; 1607 1608 // We are here about to generate a truncate instruction that may hurt 1609 // performance because the scalar evolution expression computed earlier 1610 // in WideAddRec.first does not indicate a polynomial induction expression. 1611 // In that case, look at the operands of the use instruction to determine 1612 // if we can still widen the use instead of truncating its operand. 1613 if (widenWithVariantLoadUse(DU)) { 1614 widenWithVariantLoadUseCodegen(DU); 1615 return nullptr; 1616 } 1617 1618 // This user does not evaluate to a recurrence after widening, so don't 1619 // follow it. Instead insert a Trunc to kill off the original use, 1620 // eventually isolating the original narrow IV so it can be removed. 1621 truncateIVUse(DU, DT, LI); 1622 return nullptr; 1623 } 1624 // Assume block terminators cannot evaluate to a recurrence. We can't to 1625 // insert a Trunc after a terminator if there happens to be a critical edge. 1626 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1627 "SCEV is not expected to evaluate a block terminator"); 1628 1629 // Reuse the IV increment that SCEVExpander created as long as it dominates 1630 // NarrowUse. 1631 Instruction *WideUse = nullptr; 1632 if (WideAddRec.first == WideIncExpr && 1633 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1634 WideUse = WideInc; 1635 else { 1636 WideUse = cloneIVUser(DU, WideAddRec.first); 1637 if (!WideUse) 1638 return nullptr; 1639 } 1640 // Evaluation of WideAddRec ensured that the narrow expression could be 1641 // extended outside the loop without overflow. This suggests that the wide use 1642 // evaluates to the same expression as the extended narrow use, but doesn't 1643 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1644 // where it fails, we simply throw away the newly created wide use. 1645 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1646 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1647 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1648 << "\n"); 1649 DeadInsts.emplace_back(WideUse); 1650 return nullptr; 1651 } 1652 1653 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1654 // Returning WideUse pushes it on the worklist. 1655 return WideUse; 1656 } 1657 1658 /// Add eligible users of NarrowDef to NarrowIVUsers. 1659 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1660 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1661 bool NonNegativeDef = 1662 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1663 SE->getConstant(NarrowSCEV->getType(), 0)); 1664 for (User *U : NarrowDef->users()) { 1665 Instruction *NarrowUser = cast<Instruction>(U); 1666 1667 // Handle data flow merges and bizarre phi cycles. 1668 if (!Widened.insert(NarrowUser).second) 1669 continue; 1670 1671 bool NonNegativeUse = false; 1672 if (!NonNegativeDef) { 1673 // We might have a control-dependent range information for this context. 1674 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1675 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1676 } 1677 1678 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1679 NonNegativeDef || NonNegativeUse); 1680 } 1681 } 1682 1683 /// Process a single induction variable. First use the SCEVExpander to create a 1684 /// wide induction variable that evaluates to the same recurrence as the 1685 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1686 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1687 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1688 /// 1689 /// It would be simpler to delete uses as they are processed, but we must avoid 1690 /// invalidating SCEV expressions. 1691 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1692 // Is this phi an induction variable? 1693 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1694 if (!AddRec) 1695 return nullptr; 1696 1697 // Widen the induction variable expression. 1698 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1699 ? SE->getSignExtendExpr(AddRec, WideType) 1700 : SE->getZeroExtendExpr(AddRec, WideType); 1701 1702 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1703 "Expect the new IV expression to preserve its type"); 1704 1705 // Can the IV be extended outside the loop without overflow? 1706 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1707 if (!AddRec || AddRec->getLoop() != L) 1708 return nullptr; 1709 1710 // An AddRec must have loop-invariant operands. Since this AddRec is 1711 // materialized by a loop header phi, the expression cannot have any post-loop 1712 // operands, so they must dominate the loop header. 1713 assert( 1714 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1715 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1716 "Loop header phi recurrence inputs do not dominate the loop"); 1717 1718 // Iterate over IV uses (including transitive ones) looking for IV increments 1719 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1720 // the increment calculate control-dependent range information basing on 1721 // dominating conditions inside of the loop (e.g. a range check inside of the 1722 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1723 // 1724 // Control-dependent range information is later used to prove that a narrow 1725 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1726 // this on demand because when pushNarrowIVUsers needs this information some 1727 // of the dominating conditions might be already widened. 1728 if (UsePostIncrementRanges) 1729 calculatePostIncRanges(OrigPhi); 1730 1731 // The rewriter provides a value for the desired IV expression. This may 1732 // either find an existing phi or materialize a new one. Either way, we 1733 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1734 // of the phi-SCC dominates the loop entry. 1735 Instruction *InsertPt = &L->getHeader()->front(); 1736 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1737 1738 // Remembering the WideIV increment generated by SCEVExpander allows 1739 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1740 // employ a general reuse mechanism because the call above is the only call to 1741 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1742 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1743 WideInc = 1744 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1745 WideIncExpr = SE->getSCEV(WideInc); 1746 // Propagate the debug location associated with the original loop increment 1747 // to the new (widened) increment. 1748 auto *OrigInc = 1749 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1750 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1751 } 1752 1753 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1754 ++NumWidened; 1755 1756 // Traverse the def-use chain using a worklist starting at the original IV. 1757 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1758 1759 Widened.insert(OrigPhi); 1760 pushNarrowIVUsers(OrigPhi, WidePhi); 1761 1762 while (!NarrowIVUsers.empty()) { 1763 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1764 1765 // Process a def-use edge. This may replace the use, so don't hold a 1766 // use_iterator across it. 1767 Instruction *WideUse = widenIVUse(DU, Rewriter); 1768 1769 // Follow all def-use edges from the previous narrow use. 1770 if (WideUse) 1771 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1772 1773 // widenIVUse may have removed the def-use edge. 1774 if (DU.NarrowDef->use_empty()) 1775 DeadInsts.emplace_back(DU.NarrowDef); 1776 } 1777 1778 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1779 // evaluate the same recurrence, we can just copy the debug info over. 1780 SmallVector<DbgValueInst *, 1> DbgValues; 1781 llvm::findDbgValues(DbgValues, OrigPhi); 1782 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1783 ValueAsMetadata::get(WidePhi)); 1784 for (auto &DbgValue : DbgValues) 1785 DbgValue->setOperand(0, MDPhi); 1786 return WidePhi; 1787 } 1788 1789 /// Calculates control-dependent range for the given def at the given context 1790 /// by looking at dominating conditions inside of the loop 1791 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1792 Instruction *NarrowUser) { 1793 using namespace llvm::PatternMatch; 1794 1795 Value *NarrowDefLHS; 1796 const APInt *NarrowDefRHS; 1797 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1798 m_APInt(NarrowDefRHS))) || 1799 !NarrowDefRHS->isNonNegative()) 1800 return; 1801 1802 auto UpdateRangeFromCondition = [&] (Value *Condition, 1803 bool TrueDest) { 1804 CmpInst::Predicate Pred; 1805 Value *CmpRHS; 1806 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1807 m_Value(CmpRHS)))) 1808 return; 1809 1810 CmpInst::Predicate P = 1811 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1812 1813 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1814 auto CmpConstrainedLHSRange = 1815 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1816 auto NarrowDefRange = 1817 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1818 1819 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1820 }; 1821 1822 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1823 if (!HasGuards) 1824 return; 1825 1826 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1827 Ctx->getParent()->rend())) { 1828 Value *C = nullptr; 1829 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1830 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1831 } 1832 }; 1833 1834 UpdateRangeFromGuards(NarrowUser); 1835 1836 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1837 // If NarrowUserBB is statically unreachable asking dominator queries may 1838 // yield surprising results. (e.g. the block may not have a dom tree node) 1839 if (!DT->isReachableFromEntry(NarrowUserBB)) 1840 return; 1841 1842 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1843 L->contains(DTB->getBlock()); 1844 DTB = DTB->getIDom()) { 1845 auto *BB = DTB->getBlock(); 1846 auto *TI = BB->getTerminator(); 1847 UpdateRangeFromGuards(TI); 1848 1849 auto *BI = dyn_cast<BranchInst>(TI); 1850 if (!BI || !BI->isConditional()) 1851 continue; 1852 1853 auto *TrueSuccessor = BI->getSuccessor(0); 1854 auto *FalseSuccessor = BI->getSuccessor(1); 1855 1856 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1857 return BBE.isSingleEdge() && 1858 DT->dominates(BBE, NarrowUser->getParent()); 1859 }; 1860 1861 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1862 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1863 1864 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1865 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1866 } 1867 } 1868 1869 /// Calculates PostIncRangeInfos map for the given IV 1870 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1871 SmallPtrSet<Instruction *, 16> Visited; 1872 SmallVector<Instruction *, 6> Worklist; 1873 Worklist.push_back(OrigPhi); 1874 Visited.insert(OrigPhi); 1875 1876 while (!Worklist.empty()) { 1877 Instruction *NarrowDef = Worklist.pop_back_val(); 1878 1879 for (Use &U : NarrowDef->uses()) { 1880 auto *NarrowUser = cast<Instruction>(U.getUser()); 1881 1882 // Don't go looking outside the current loop. 1883 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1884 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1885 continue; 1886 1887 if (!Visited.insert(NarrowUser).second) 1888 continue; 1889 1890 Worklist.push_back(NarrowUser); 1891 1892 calculatePostIncRange(NarrowDef, NarrowUser); 1893 } 1894 } 1895 } 1896 1897 //===----------------------------------------------------------------------===// 1898 // Live IV Reduction - Minimize IVs live across the loop. 1899 //===----------------------------------------------------------------------===// 1900 1901 //===----------------------------------------------------------------------===// 1902 // Simplification of IV users based on SCEV evaluation. 1903 //===----------------------------------------------------------------------===// 1904 1905 namespace { 1906 1907 class IndVarSimplifyVisitor : public IVVisitor { 1908 ScalarEvolution *SE; 1909 const TargetTransformInfo *TTI; 1910 PHINode *IVPhi; 1911 1912 public: 1913 WideIVInfo WI; 1914 1915 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1916 const TargetTransformInfo *TTI, 1917 const DominatorTree *DTree) 1918 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1919 DT = DTree; 1920 WI.NarrowIV = IVPhi; 1921 } 1922 1923 // Implement the interface used by simplifyUsersOfIV. 1924 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1925 }; 1926 1927 } // end anonymous namespace 1928 1929 /// Iteratively perform simplification on a worklist of IV users. Each 1930 /// successive simplification may push more users which may themselves be 1931 /// candidates for simplification. 1932 /// 1933 /// Sign/Zero extend elimination is interleaved with IV simplification. 1934 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1935 SCEVExpander &Rewriter, 1936 LoopInfo *LI) { 1937 SmallVector<WideIVInfo, 8> WideIVs; 1938 1939 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1940 Intrinsic::getName(Intrinsic::experimental_guard)); 1941 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1942 1943 SmallVector<PHINode*, 8> LoopPhis; 1944 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1945 LoopPhis.push_back(cast<PHINode>(I)); 1946 } 1947 // Each round of simplification iterates through the SimplifyIVUsers worklist 1948 // for all current phis, then determines whether any IVs can be 1949 // widened. Widening adds new phis to LoopPhis, inducing another round of 1950 // simplification on the wide IVs. 1951 bool Changed = false; 1952 while (!LoopPhis.empty()) { 1953 // Evaluate as many IV expressions as possible before widening any IVs. This 1954 // forces SCEV to set no-wrap flags before evaluating sign/zero 1955 // extension. The first time SCEV attempts to normalize sign/zero extension, 1956 // the result becomes final. So for the most predictable results, we delay 1957 // evaluation of sign/zero extend evaluation until needed, and avoid running 1958 // other SCEV based analysis prior to simplifyAndExtend. 1959 do { 1960 PHINode *CurrIV = LoopPhis.pop_back_val(); 1961 1962 // Information about sign/zero extensions of CurrIV. 1963 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1964 1965 Changed |= 1966 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1967 1968 if (Visitor.WI.WidestNativeType) { 1969 WideIVs.push_back(Visitor.WI); 1970 } 1971 } while(!LoopPhis.empty()); 1972 1973 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1974 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1975 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1976 Changed = true; 1977 LoopPhis.push_back(WidePhi); 1978 } 1979 } 1980 } 1981 return Changed; 1982 } 1983 1984 //===----------------------------------------------------------------------===// 1985 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1986 //===----------------------------------------------------------------------===// 1987 1988 /// Given an Value which is hoped to be part of an add recurance in the given 1989 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1990 /// that this is less general than SCEVs AddRec checking. 1991 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1992 Instruction *IncI = dyn_cast<Instruction>(IncV); 1993 if (!IncI) 1994 return nullptr; 1995 1996 switch (IncI->getOpcode()) { 1997 case Instruction::Add: 1998 case Instruction::Sub: 1999 break; 2000 case Instruction::GetElementPtr: 2001 // An IV counter must preserve its type. 2002 if (IncI->getNumOperands() == 2) 2003 break; 2004 LLVM_FALLTHROUGH; 2005 default: 2006 return nullptr; 2007 } 2008 2009 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2010 if (Phi && Phi->getParent() == L->getHeader()) { 2011 if (L->isLoopInvariant(IncI->getOperand(1))) 2012 return Phi; 2013 return nullptr; 2014 } 2015 if (IncI->getOpcode() == Instruction::GetElementPtr) 2016 return nullptr; 2017 2018 // Allow add/sub to be commuted. 2019 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2020 if (Phi && Phi->getParent() == L->getHeader()) { 2021 if (L->isLoopInvariant(IncI->getOperand(0))) 2022 return Phi; 2023 } 2024 return nullptr; 2025 } 2026 2027 /// Given a loop with one backedge and one exit, return the ICmpInst 2028 /// controlling the sole loop exit. There is no guarantee that the exiting 2029 /// block is also the latch. 2030 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) { 2031 2032 BasicBlock *LatchBlock = L->getLoopLatch(); 2033 // Don't bother with LFTR if the loop is not properly simplified. 2034 if (!LatchBlock) 2035 return nullptr; 2036 2037 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2038 assert(BI && "expected exit branch"); 2039 2040 return dyn_cast<ICmpInst>(BI->getCondition()); 2041 } 2042 2043 /// linearFunctionTestReplace policy. Return true unless we can show that the 2044 /// current exit test is already sufficiently canonical. 2045 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2046 // Do LFTR to simplify the exit condition to an ICMP. 2047 ICmpInst *Cond = getLoopTest(L, ExitingBB); 2048 if (!Cond) 2049 return true; 2050 2051 // Do LFTR to simplify the exit ICMP to EQ/NE 2052 ICmpInst::Predicate Pred = Cond->getPredicate(); 2053 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2054 return true; 2055 2056 // Look for a loop invariant RHS 2057 Value *LHS = Cond->getOperand(0); 2058 Value *RHS = Cond->getOperand(1); 2059 if (!L->isLoopInvariant(RHS)) { 2060 if (!L->isLoopInvariant(LHS)) 2061 return true; 2062 std::swap(LHS, RHS); 2063 } 2064 // Look for a simple IV counter LHS 2065 PHINode *Phi = dyn_cast<PHINode>(LHS); 2066 if (!Phi) 2067 Phi = getLoopPhiForCounter(LHS, L); 2068 2069 if (!Phi) 2070 return true; 2071 2072 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2073 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2074 if (Idx < 0) 2075 return true; 2076 2077 // Do LFTR if the exit condition's IV is *not* a simple counter. 2078 Value *IncV = Phi->getIncomingValue(Idx); 2079 return Phi != getLoopPhiForCounter(IncV, L); 2080 } 2081 2082 /// Return true if undefined behavior would provable be executed on the path to 2083 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2084 /// anything about whether OnPathTo is actually executed or whether Root is 2085 /// actually poison. This can be used to assess whether a new use of Root can 2086 /// be added at a location which is control equivalent with OnPathTo (such as 2087 /// immediately before it) without introducing UB which didn't previously 2088 /// exist. Note that a false result conveys no information. 2089 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2090 Instruction *OnPathTo, 2091 DominatorTree *DT) { 2092 // Basic approach is to assume Root is poison, propagate poison forward 2093 // through all users we can easily track, and then check whether any of those 2094 // users are provable UB and must execute before out exiting block might 2095 // exit. 2096 2097 // The set of all recursive users we've visited (which are assumed to all be 2098 // poison because of said visit) 2099 SmallSet<const Value *, 16> KnownPoison; 2100 SmallVector<const Instruction*, 16> Worklist; 2101 Worklist.push_back(Root); 2102 while (!Worklist.empty()) { 2103 const Instruction *I = Worklist.pop_back_val(); 2104 2105 // If we know this must trigger UB on a path leading our target. 2106 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2107 return true; 2108 2109 // If we can't analyze propagation through this instruction, just skip it 2110 // and transitive users. Safe as false is a conservative result. 2111 if (!propagatesFullPoison(I) && I != Root) 2112 continue; 2113 2114 if (KnownPoison.insert(I).second) 2115 for (const User *User : I->users()) 2116 Worklist.push_back(cast<Instruction>(User)); 2117 } 2118 2119 // Might be non-UB, or might have a path we couldn't prove must execute on 2120 // way to exiting bb. 2121 return false; 2122 } 2123 2124 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2125 /// down to checking that all operands are constant and listing instructions 2126 /// that may hide undef. 2127 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2128 unsigned Depth) { 2129 if (isa<Constant>(V)) 2130 return !isa<UndefValue>(V); 2131 2132 if (Depth >= 6) 2133 return false; 2134 2135 // Conservatively handle non-constant non-instructions. For example, Arguments 2136 // may be undef. 2137 Instruction *I = dyn_cast<Instruction>(V); 2138 if (!I) 2139 return false; 2140 2141 // Load and return values may be undef. 2142 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2143 return false; 2144 2145 // Optimistically handle other instructions. 2146 for (Value *Op : I->operands()) { 2147 if (!Visited.insert(Op).second) 2148 continue; 2149 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2150 return false; 2151 } 2152 return true; 2153 } 2154 2155 /// Return true if the given value is concrete. We must prove that undef can 2156 /// never reach it. 2157 /// 2158 /// TODO: If we decide that this is a good approach to checking for undef, we 2159 /// may factor it into a common location. 2160 static bool hasConcreteDef(Value *V) { 2161 SmallPtrSet<Value*, 8> Visited; 2162 Visited.insert(V); 2163 return hasConcreteDefImpl(V, Visited, 0); 2164 } 2165 2166 /// Return true if this IV has any uses other than the (soon to be rewritten) 2167 /// loop exit test. 2168 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2169 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2170 Value *IncV = Phi->getIncomingValue(LatchIdx); 2171 2172 for (User *U : Phi->users()) 2173 if (U != Cond && U != IncV) return false; 2174 2175 for (User *U : IncV->users()) 2176 if (U != Cond && U != Phi) return false; 2177 return true; 2178 } 2179 2180 /// Return true if the given phi is a "counter" in L. A counter is an 2181 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2182 /// step of 1. Note that L must have exactly one latch. 2183 static bool isLoopCounter(PHINode* Phi, Loop *L, 2184 ScalarEvolution *SE) { 2185 assert(Phi->getParent() == L->getHeader()); 2186 assert(L->getLoopLatch()); 2187 2188 if (!SE->isSCEVable(Phi->getType())) 2189 return false; 2190 2191 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2192 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2193 return false; 2194 2195 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2196 if (!Step || !Step->isOne()) 2197 return false; 2198 2199 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2200 Value *IncV = Phi->getIncomingValue(LatchIdx); 2201 return (getLoopPhiForCounter(IncV, L) == Phi); 2202 } 2203 2204 /// Search the loop header for a loop counter (anadd rec w/step of one) 2205 /// suitable for use by LFTR. If multiple counters are available, select the 2206 /// "best" one based profitable heuristics. 2207 /// 2208 /// BECount may be an i8* pointer type. The pointer difference is already 2209 /// valid count without scaling the address stride, so it remains a pointer 2210 /// expression as far as SCEV is concerned. 2211 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2212 const SCEV *BECount, 2213 ScalarEvolution *SE, DominatorTree *DT) { 2214 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2215 2216 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2217 2218 // Loop over all of the PHI nodes, looking for a simple counter. 2219 PHINode *BestPhi = nullptr; 2220 const SCEV *BestInit = nullptr; 2221 BasicBlock *LatchBlock = L->getLoopLatch(); 2222 assert(LatchBlock && "needsLFTR should guarantee a loop latch"); 2223 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2224 2225 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2226 PHINode *Phi = cast<PHINode>(I); 2227 if (!isLoopCounter(Phi, L, SE)) 2228 continue; 2229 2230 // Avoid comparing an integer IV against a pointer Limit. 2231 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2232 continue; 2233 2234 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2235 2236 // AR may be a pointer type, while BECount is an integer type. 2237 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2238 // AR may not be a narrower type, or we may never exit. 2239 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2240 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2241 continue; 2242 2243 // Avoid reusing a potentially undef value to compute other values that may 2244 // have originally had a concrete definition. 2245 if (!hasConcreteDef(Phi)) { 2246 // We explicitly allow unknown phis as long as they are already used by 2247 // the loop test. In this case we assume that performing LFTR could not 2248 // increase the number of undef users. 2249 // TODO: Generalize this to allow *any* loop exit which is known to 2250 // execute on each iteration 2251 if (L->getExitingBlock()) 2252 if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) 2253 if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && 2254 Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) 2255 continue; 2256 } 2257 2258 // Avoid introducing undefined behavior due to poison which didn't exist in 2259 // the original program. (Annoyingly, the rules for poison and undef 2260 // propagation are distinct, so this does NOT cover the undef case above.) 2261 // We have to ensure that we don't introduce UB by introducing a use on an 2262 // iteration where said IV produces poison. Our strategy here differs for 2263 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2264 // see code in linearFunctionTestReplace. For pointers, we restrict 2265 // transforms as there is no good way to reinfer inbounds once lost. 2266 if (!Phi->getType()->isIntegerTy() && 2267 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2268 continue; 2269 2270 const SCEV *Init = AR->getStart(); 2271 2272 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2273 // Don't force a live loop counter if another IV can be used. 2274 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2275 continue; 2276 2277 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2278 // also prefers integer to pointer IVs. 2279 if (BestInit->isZero() != Init->isZero()) { 2280 if (BestInit->isZero()) 2281 continue; 2282 } 2283 // If two IVs both count from zero or both count from nonzero then the 2284 // narrower is likely a dead phi that has been widened. Use the wider phi 2285 // to allow the other to be eliminated. 2286 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2287 continue; 2288 } 2289 BestPhi = Phi; 2290 BestInit = Init; 2291 } 2292 return BestPhi; 2293 } 2294 2295 /// Insert an IR expression which computes the value held by the IV IndVar 2296 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2297 /// is taken IVCount times. 2298 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2299 const SCEV *IVCount, Loop *L, 2300 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2301 assert(isLoopCounter(IndVar, L, SE)); 2302 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2303 const SCEV *IVInit = AR->getStart(); 2304 2305 // IVInit may be a pointer while IVCount is an integer when FindLoopCounter 2306 // finds a valid pointer IV. Sign extend BECount in order to materialize a 2307 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2308 // the existing GEPs whenever possible. 2309 if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) { 2310 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2311 // signed value. IVCount on the other hand represents the loop trip count, 2312 // which is an unsigned value. FindLoopCounter only allows induction 2313 // variables that have a positive unit stride of one. This means we don't 2314 // have to handle the case of negative offsets (yet) and just need to zero 2315 // extend IVCount. 2316 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2317 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy); 2318 2319 // Expand the code for the iteration count. 2320 assert(SE->isLoopInvariant(IVOffset, L) && 2321 "Computed iteration count is not loop invariant!"); 2322 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2323 Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); 2324 2325 Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); 2326 assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter"); 2327 // We could handle pointer IVs other than i8*, but we need to compensate for 2328 // gep index scaling. 2329 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2330 cast<PointerType>(GEPBase->getType()) 2331 ->getElementType())->isOne() && 2332 "unit stride pointer IV must be i8*"); 2333 2334 IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); 2335 return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(), 2336 GEPBase, GEPOffset, "lftr.limit"); 2337 } else { 2338 // In any other case, convert both IVInit and IVCount to integers before 2339 // comparing. This may result in SCEV expansion of pointers, but in practice 2340 // SCEV will fold the pointer arithmetic away as such: 2341 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2342 // 2343 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2344 // for simple memset-style loops. 2345 // 2346 // IVInit integer and IVCount pointer would only occur if a canonical IV 2347 // were generated on top of case #2, which is not expected. 2348 2349 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2350 const SCEV *IVLimit = nullptr; 2351 // For unit stride, IVCount = Start + BECount with 2's complement overflow. 2352 // For non-zero Start, compute IVCount here. 2353 if (AR->getStart()->isZero()) 2354 IVLimit = IVCount; 2355 else { 2356 const SCEV *IVInit = AR->getStart(); 2357 2358 // For integer IVs, truncate the IV before computing IVInit + BECount. 2359 if (SE->getTypeSizeInBits(IVInit->getType()) 2360 > SE->getTypeSizeInBits(IVCount->getType())) 2361 IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); 2362 2363 IVLimit = SE->getAddExpr(IVInit, IVCount); 2364 } 2365 // Expand the code for the iteration count. 2366 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2367 IRBuilder<> Builder(BI); 2368 assert(SE->isLoopInvariant(IVLimit, L) && 2369 "Computed iteration count is not loop invariant!"); 2370 // Ensure that we generate the same type as IndVar, or a smaller integer 2371 // type. In the presence of null pointer values, we have an integer type 2372 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2373 Type *LimitTy = IVCount->getType()->isPointerTy() ? 2374 IndVar->getType() : IVCount->getType(); 2375 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2376 } 2377 } 2378 2379 /// This method rewrites the exit condition of the loop to be a canonical != 2380 /// comparison against the incremented loop induction variable. This pass is 2381 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2382 /// determine a loop-invariant trip count of the loop, which is actually a much 2383 /// broader range than just linear tests. 2384 bool IndVarSimplify:: 2385 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2386 const SCEV *BackedgeTakenCount, 2387 PHINode *IndVar, SCEVExpander &Rewriter) { 2388 assert(isLoopCounter(IndVar, L, SE)); 2389 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2390 Instruction * const IncVar = 2391 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2392 2393 // Initialize CmpIndVar and IVCount to their preincremented values. 2394 Value *CmpIndVar = IndVar; 2395 const SCEV *IVCount = BackedgeTakenCount; 2396 2397 // If the exiting block is the same as the backedge block, we prefer to 2398 // compare against the post-incremented value, otherwise we must compare 2399 // against the preincremented value. 2400 if (ExitingBB == L->getLoopLatch()) { 2401 bool SafeToPostInc = IndVar->getType()->isIntegerTy(); 2402 if (!SafeToPostInc) { 2403 // For pointer IVs, we chose to not strip inbounds which requires us not 2404 // to add a potentially UB introducing use. We need to either a) show 2405 // the loop test we're modifying is already in post-inc form, or b) show 2406 // that adding a use must not introduce UB. 2407 ICmpInst *LoopTest = getLoopTest(L, ExitingBB); 2408 SafeToPostInc = LoopTest->getOperand(0) == IncVar || 2409 LoopTest->getOperand(1) == IncVar || 2410 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2411 } 2412 if (SafeToPostInc) { 2413 // Add one to the "backedge-taken" count to get the trip count. 2414 // This addition may overflow, which is valid as long as the comparison 2415 // is truncated to BackedgeTakenCount->getType(). 2416 IVCount = SE->getAddExpr(BackedgeTakenCount, 2417 SE->getOne(BackedgeTakenCount->getType())); 2418 // The BackedgeTaken expression contains the number of times that the 2419 // backedge branches to the loop header. This is one less than the 2420 // number of times the loop executes, so use the incremented indvar. 2421 CmpIndVar = IncVar; 2422 } 2423 } 2424 2425 // It may be necessary to drop nowrap flags on the incrementing instruction 2426 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2427 // case the increment might have previously been poison on the last iteration 2428 // only) or if LFTR switches to a different IV that was previously dynamically 2429 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2430 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2431 // check), because the pre-inc addrec flags may be adopted from the original 2432 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2433 // TODO: This handling is inaccurate for one case: If we switch to a 2434 // dynamically dead IV that wraps on the first loop iteration only, which is 2435 // not covered by the post-inc addrec. (If the new IV was not dynamically 2436 // dead, it could not be poison on the first iteration in the first place.) 2437 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2438 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2439 if (BO->hasNoUnsignedWrap()) 2440 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2441 if (BO->hasNoSignedWrap()) 2442 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2443 } 2444 2445 Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE); 2446 assert(ExitCnt->getType()->isPointerTy() == 2447 IndVar->getType()->isPointerTy() && 2448 "genLoopLimit missed a cast"); 2449 2450 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2451 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2452 ICmpInst::Predicate P; 2453 if (L->contains(BI->getSuccessor(0))) 2454 P = ICmpInst::ICMP_NE; 2455 else 2456 P = ICmpInst::ICMP_EQ; 2457 2458 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2459 << " LHS:" << *CmpIndVar << '\n' 2460 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2461 << "\n" 2462 << " RHS:\t" << *ExitCnt << "\n" 2463 << " IVCount:\t" << *IVCount << "\n"); 2464 2465 IRBuilder<> Builder(BI); 2466 2467 // The new loop exit condition should reuse the debug location of the 2468 // original loop exit condition. 2469 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2470 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2471 2472 // LFTR can ignore IV overflow and truncate to the width of 2473 // BECount. This avoids materializing the add(zext(add)) expression. 2474 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2475 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2476 if (CmpIndVarSize > ExitCntSize) { 2477 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2478 const SCEV *ARStart = AR->getStart(); 2479 const SCEV *ARStep = AR->getStepRecurrence(*SE); 2480 // For constant IVCount, avoid truncation. 2481 if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) { 2482 const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt(); 2483 APInt Count = cast<SCEVConstant>(IVCount)->getAPInt(); 2484 // Note that the post-inc value of BackedgeTakenCount may have overflowed 2485 // above such that IVCount is now zero. 2486 if (IVCount != BackedgeTakenCount && Count == 0) { 2487 Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize); 2488 ++Count; 2489 } 2490 else 2491 Count = Count.zext(CmpIndVarSize); 2492 APInt NewLimit; 2493 if (cast<SCEVConstant>(ARStep)->getValue()->isNegative()) 2494 NewLimit = Start - Count; 2495 else 2496 NewLimit = Start + Count; 2497 ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit); 2498 2499 LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); 2500 } else { 2501 // We try to extend trip count first. If that doesn't work we truncate IV. 2502 // Zext(trunc(IV)) == IV implies equivalence of the following two: 2503 // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If 2504 // one of the two holds, extend the trip count, otherwise we truncate IV. 2505 bool Extended = false; 2506 const SCEV *IV = SE->getSCEV(CmpIndVar); 2507 const SCEV *ZExtTrunc = 2508 SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2509 ExitCnt->getType()), 2510 CmpIndVar->getType()); 2511 2512 if (ZExtTrunc == IV) { 2513 Extended = true; 2514 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2515 "wide.trip.count"); 2516 } else { 2517 const SCEV *SExtTrunc = 2518 SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2519 ExitCnt->getType()), 2520 CmpIndVar->getType()); 2521 if (SExtTrunc == IV) { 2522 Extended = true; 2523 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2524 "wide.trip.count"); 2525 } 2526 } 2527 2528 if (!Extended) 2529 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2530 "lftr.wideiv"); 2531 } 2532 } 2533 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2534 Value *OrigCond = BI->getCondition(); 2535 // It's tempting to use replaceAllUsesWith here to fully replace the old 2536 // comparison, but that's not immediately safe, since users of the old 2537 // comparison may not be dominated by the new comparison. Instead, just 2538 // update the branch to use the new comparison; in the common case this 2539 // will make old comparison dead. 2540 BI->setCondition(Cond); 2541 DeadInsts.push_back(OrigCond); 2542 2543 ++NumLFTR; 2544 return true; 2545 } 2546 2547 //===----------------------------------------------------------------------===// 2548 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2549 //===----------------------------------------------------------------------===// 2550 2551 /// If there's a single exit block, sink any loop-invariant values that 2552 /// were defined in the preheader but not used inside the loop into the 2553 /// exit block to reduce register pressure in the loop. 2554 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2555 BasicBlock *ExitBlock = L->getExitBlock(); 2556 if (!ExitBlock) return false; 2557 2558 BasicBlock *Preheader = L->getLoopPreheader(); 2559 if (!Preheader) return false; 2560 2561 bool MadeAnyChanges = false; 2562 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2563 BasicBlock::iterator I(Preheader->getTerminator()); 2564 while (I != Preheader->begin()) { 2565 --I; 2566 // New instructions were inserted at the end of the preheader. 2567 if (isa<PHINode>(I)) 2568 break; 2569 2570 // Don't move instructions which might have side effects, since the side 2571 // effects need to complete before instructions inside the loop. Also don't 2572 // move instructions which might read memory, since the loop may modify 2573 // memory. Note that it's okay if the instruction might have undefined 2574 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2575 // block. 2576 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2577 continue; 2578 2579 // Skip debug info intrinsics. 2580 if (isa<DbgInfoIntrinsic>(I)) 2581 continue; 2582 2583 // Skip eh pad instructions. 2584 if (I->isEHPad()) 2585 continue; 2586 2587 // Don't sink alloca: we never want to sink static alloca's out of the 2588 // entry block, and correctly sinking dynamic alloca's requires 2589 // checks for stacksave/stackrestore intrinsics. 2590 // FIXME: Refactor this check somehow? 2591 if (isa<AllocaInst>(I)) 2592 continue; 2593 2594 // Determine if there is a use in or before the loop (direct or 2595 // otherwise). 2596 bool UsedInLoop = false; 2597 for (Use &U : I->uses()) { 2598 Instruction *User = cast<Instruction>(U.getUser()); 2599 BasicBlock *UseBB = User->getParent(); 2600 if (PHINode *P = dyn_cast<PHINode>(User)) { 2601 unsigned i = 2602 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2603 UseBB = P->getIncomingBlock(i); 2604 } 2605 if (UseBB == Preheader || L->contains(UseBB)) { 2606 UsedInLoop = true; 2607 break; 2608 } 2609 } 2610 2611 // If there is, the def must remain in the preheader. 2612 if (UsedInLoop) 2613 continue; 2614 2615 // Otherwise, sink it to the exit block. 2616 Instruction *ToMove = &*I; 2617 bool Done = false; 2618 2619 if (I != Preheader->begin()) { 2620 // Skip debug info intrinsics. 2621 do { 2622 --I; 2623 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2624 2625 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2626 Done = true; 2627 } else { 2628 Done = true; 2629 } 2630 2631 MadeAnyChanges = true; 2632 ToMove->moveBefore(*ExitBlock, InsertPt); 2633 if (Done) break; 2634 InsertPt = ToMove->getIterator(); 2635 } 2636 2637 return MadeAnyChanges; 2638 } 2639 2640 //===----------------------------------------------------------------------===// 2641 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2642 //===----------------------------------------------------------------------===// 2643 2644 bool IndVarSimplify::run(Loop *L) { 2645 // We need (and expect!) the incoming loop to be in LCSSA. 2646 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2647 "LCSSA required to run indvars!"); 2648 bool Changed = false; 2649 2650 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2651 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2652 // canonicalization can be a pessimization without LSR to "clean up" 2653 // afterwards. 2654 // - We depend on having a preheader; in particular, 2655 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2656 // and we're in trouble if we can't find the induction variable even when 2657 // we've manually inserted one. 2658 // - LFTR relies on having a single backedge. 2659 if (!L->isLoopSimplifyForm()) 2660 return false; 2661 2662 // If there are any floating-point recurrences, attempt to 2663 // transform them to use integer recurrences. 2664 Changed |= rewriteNonIntegerIVs(L); 2665 2666 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2667 2668 // Create a rewriter object which we'll use to transform the code with. 2669 SCEVExpander Rewriter(*SE, DL, "indvars"); 2670 #ifndef NDEBUG 2671 Rewriter.setDebugType(DEBUG_TYPE); 2672 #endif 2673 2674 // Eliminate redundant IV users. 2675 // 2676 // Simplification works best when run before other consumers of SCEV. We 2677 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2678 // other expressions involving loop IVs have been evaluated. This helps SCEV 2679 // set no-wrap flags before normalizing sign/zero extension. 2680 Rewriter.disableCanonicalMode(); 2681 Changed |= simplifyAndExtend(L, Rewriter, LI); 2682 2683 // Check to see if this loop has a computable loop-invariant execution count. 2684 // If so, this means that we can compute the final value of any expressions 2685 // that are recurrent in the loop, and substitute the exit values from the 2686 // loop into any instructions outside of the loop that use the final values of 2687 // the current expressions. 2688 // 2689 if (ReplaceExitValue != NeverRepl && 2690 !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 2691 Changed |= rewriteLoopExitValues(L, Rewriter); 2692 2693 // Eliminate redundant IV cycles. 2694 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2695 2696 // If we have a trip count expression, rewrite the loop's exit condition 2697 // using it. 2698 if (!DisableLFTR) { 2699 // For the moment, we only do LFTR for single exit loops. The code is 2700 // structured as it is in the expectation of generalization to multi-exit 2701 // loops in the near future. See D62625 for context. 2702 SmallVector<BasicBlock*, 16> ExitingBlocks; 2703 if (auto *ExitingBB = L->getExitingBlock()) 2704 ExitingBlocks.push_back(ExitingBB); 2705 for (BasicBlock *ExitingBB : ExitingBlocks) { 2706 // Can't rewrite non-branch yet. 2707 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2708 continue; 2709 2710 if (!needsLFTR(L, ExitingBB)) 2711 continue; 2712 2713 const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB); 2714 if (isa<SCEVCouldNotCompute>(BETakenCount)) 2715 continue; 2716 2717 // Better to fold to true (TODO: do so!) 2718 if (BETakenCount->isZero()) 2719 continue; 2720 2721 PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT); 2722 if (!IndVar) 2723 continue; 2724 2725 // Avoid high cost expansions. Note: This heuristic is questionable in 2726 // that our definition of "high cost" is not exactly principled. 2727 if (Rewriter.isHighCostExpansion(BETakenCount, L)) 2728 continue; 2729 2730 // Check preconditions for proper SCEVExpander operation. SCEV does not 2731 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2732 // any pass that uses the SCEVExpander must do it. This does not work 2733 // well for loop passes because SCEVExpander makes assumptions about 2734 // all loops, while LoopPassManager only forces the current loop to be 2735 // simplified. 2736 // 2737 // FIXME: SCEV expansion has no way to bail out, so the caller must 2738 // explicitly check any assumptions made by SCEV. Brittle. 2739 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount); 2740 if (!AR || AR->getLoop()->getLoopPreheader()) 2741 Changed |= linearFunctionTestReplace(L, ExitingBB, 2742 BETakenCount, IndVar, 2743 Rewriter); 2744 } 2745 } 2746 // Clear the rewriter cache, because values that are in the rewriter's cache 2747 // can be deleted in the loop below, causing the AssertingVH in the cache to 2748 // trigger. 2749 Rewriter.clear(); 2750 2751 // Now that we're done iterating through lists, clean up any instructions 2752 // which are now dead. 2753 while (!DeadInsts.empty()) 2754 if (Instruction *Inst = 2755 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2756 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2757 2758 // The Rewriter may not be used from this point on. 2759 2760 // Loop-invariant instructions in the preheader that aren't used in the 2761 // loop may be sunk below the loop to reduce register pressure. 2762 Changed |= sinkUnusedInvariants(L); 2763 2764 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2765 // trip count and therefore can further simplify exit values in addition to 2766 // rewriteLoopExitValues. 2767 Changed |= rewriteFirstIterationLoopExitValues(L); 2768 2769 // Clean up dead instructions. 2770 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2771 2772 // Check a post-condition. 2773 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2774 "Indvars did not preserve LCSSA!"); 2775 2776 // Verify that LFTR, and any other change have not interfered with SCEV's 2777 // ability to compute trip count. 2778 #ifndef NDEBUG 2779 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2780 SE->forgetLoop(L); 2781 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2782 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2783 SE->getTypeSizeInBits(NewBECount->getType())) 2784 NewBECount = SE->getTruncateOrNoop(NewBECount, 2785 BackedgeTakenCount->getType()); 2786 else 2787 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2788 NewBECount->getType()); 2789 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2790 } 2791 #endif 2792 2793 return Changed; 2794 } 2795 2796 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2797 LoopStandardAnalysisResults &AR, 2798 LPMUpdater &) { 2799 Function *F = L.getHeader()->getParent(); 2800 const DataLayout &DL = F->getParent()->getDataLayout(); 2801 2802 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2803 if (!IVS.run(&L)) 2804 return PreservedAnalyses::all(); 2805 2806 auto PA = getLoopPassPreservedAnalyses(); 2807 PA.preserveSet<CFGAnalyses>(); 2808 return PA; 2809 } 2810 2811 namespace { 2812 2813 struct IndVarSimplifyLegacyPass : public LoopPass { 2814 static char ID; // Pass identification, replacement for typeid 2815 2816 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2817 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2818 } 2819 2820 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2821 if (skipLoop(L)) 2822 return false; 2823 2824 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2825 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2826 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2827 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2828 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2829 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2830 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2831 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2832 2833 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2834 return IVS.run(L); 2835 } 2836 2837 void getAnalysisUsage(AnalysisUsage &AU) const override { 2838 AU.setPreservesCFG(); 2839 getLoopAnalysisUsage(AU); 2840 } 2841 }; 2842 2843 } // end anonymous namespace 2844 2845 char IndVarSimplifyLegacyPass::ID = 0; 2846 2847 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2848 "Induction Variable Simplification", false, false) 2849 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2850 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2851 "Induction Variable Simplification", false, false) 2852 2853 Pass *llvm::createIndVarSimplifyPass() { 2854 return new IndVarSimplifyLegacyPass(); 2855 } 2856