1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 namespace { 128 129 struct RewritePhi; 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 139 SmallVector<WeakTrackingVH, 16> DeadInsts; 140 141 bool isValidRewrite(Value *FromVal, Value *ToVal); 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 148 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 149 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 150 bool rewriteFirstIterationLoopExitValues(Loop *L); 151 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 152 153 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 154 const SCEV *ExitCount, 155 PHINode *IndVar, SCEVExpander &Rewriter); 156 157 bool sinkUnusedInvariants(Loop *L); 158 159 public: 160 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 161 const DataLayout &DL, TargetLibraryInfo *TLI, 162 TargetTransformInfo *TTI) 163 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 164 165 bool run(Loop *L); 166 }; 167 168 } // end anonymous namespace 169 170 /// Return true if the SCEV expansion generated by the rewriter can replace the 171 /// original value. SCEV guarantees that it produces the same value, but the way 172 /// it is produced may be illegal IR. Ideally, this function will only be 173 /// called for verification. 174 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 175 // If an SCEV expression subsumed multiple pointers, its expansion could 176 // reassociate the GEP changing the base pointer. This is illegal because the 177 // final address produced by a GEP chain must be inbounds relative to its 178 // underlying object. Otherwise basic alias analysis, among other things, 179 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 180 // producing an expression involving multiple pointers. Until then, we must 181 // bail out here. 182 // 183 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 184 // because it understands lcssa phis while SCEV does not. 185 Value *FromPtr = FromVal; 186 Value *ToPtr = ToVal; 187 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 188 FromPtr = GEP->getPointerOperand(); 189 } 190 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 191 ToPtr = GEP->getPointerOperand(); 192 } 193 if (FromPtr != FromVal || ToPtr != ToVal) { 194 // Quickly check the common case 195 if (FromPtr == ToPtr) 196 return true; 197 198 // SCEV may have rewritten an expression that produces the GEP's pointer 199 // operand. That's ok as long as the pointer operand has the same base 200 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 201 // base of a recurrence. This handles the case in which SCEV expansion 202 // converts a pointer type recurrence into a nonrecurrent pointer base 203 // indexed by an integer recurrence. 204 205 // If the GEP base pointer is a vector of pointers, abort. 206 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 207 return false; 208 209 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 210 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 211 if (FromBase == ToBase) 212 return true; 213 214 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 215 << " != " << *ToBase << "\n"); 216 217 return false; 218 } 219 return true; 220 } 221 222 /// Determine the insertion point for this user. By default, insert immediately 223 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 224 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 225 /// common dominator for the incoming blocks. A nullptr can be returned if no 226 /// viable location is found: it may happen if User is a PHI and Def only comes 227 /// to this PHI from unreachable blocks. 228 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 229 DominatorTree *DT, LoopInfo *LI) { 230 PHINode *PHI = dyn_cast<PHINode>(User); 231 if (!PHI) 232 return User; 233 234 Instruction *InsertPt = nullptr; 235 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 236 if (PHI->getIncomingValue(i) != Def) 237 continue; 238 239 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 240 241 if (!DT->isReachableFromEntry(InsertBB)) 242 continue; 243 244 if (!InsertPt) { 245 InsertPt = InsertBB->getTerminator(); 246 continue; 247 } 248 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 249 InsertPt = InsertBB->getTerminator(); 250 } 251 252 // If we have skipped all inputs, it means that Def only comes to Phi from 253 // unreachable blocks. 254 if (!InsertPt) 255 return nullptr; 256 257 auto *DefI = dyn_cast<Instruction>(Def); 258 if (!DefI) 259 return InsertPt; 260 261 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 262 263 auto *L = LI->getLoopFor(DefI->getParent()); 264 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 265 266 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 267 if (LI->getLoopFor(DTN->getBlock()) == L) 268 return DTN->getBlock()->getTerminator(); 269 270 llvm_unreachable("DefI dominates InsertPt!"); 271 } 272 273 //===----------------------------------------------------------------------===// 274 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 275 //===----------------------------------------------------------------------===// 276 277 /// Convert APF to an integer, if possible. 278 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 279 bool isExact = false; 280 // See if we can convert this to an int64_t 281 uint64_t UIntVal; 282 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 283 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 284 !isExact) 285 return false; 286 IntVal = UIntVal; 287 return true; 288 } 289 290 /// If the loop has floating induction variable then insert corresponding 291 /// integer induction variable if possible. 292 /// For example, 293 /// for(double i = 0; i < 10000; ++i) 294 /// bar(i) 295 /// is converted into 296 /// for(int i = 0; i < 10000; ++i) 297 /// bar((double)i); 298 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 299 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 300 unsigned BackEdge = IncomingEdge^1; 301 302 // Check incoming value. 303 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 304 305 int64_t InitValue; 306 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 307 return false; 308 309 // Check IV increment. Reject this PN if increment operation is not 310 // an add or increment value can not be represented by an integer. 311 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 312 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 313 314 // If this is not an add of the PHI with a constantfp, or if the constant fp 315 // is not an integer, bail out. 316 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 317 int64_t IncValue; 318 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 319 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 320 return false; 321 322 // Check Incr uses. One user is PN and the other user is an exit condition 323 // used by the conditional terminator. 324 Value::user_iterator IncrUse = Incr->user_begin(); 325 Instruction *U1 = cast<Instruction>(*IncrUse++); 326 if (IncrUse == Incr->user_end()) return false; 327 Instruction *U2 = cast<Instruction>(*IncrUse++); 328 if (IncrUse != Incr->user_end()) return false; 329 330 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 331 // only used by a branch, we can't transform it. 332 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 333 if (!Compare) 334 Compare = dyn_cast<FCmpInst>(U2); 335 if (!Compare || !Compare->hasOneUse() || 336 !isa<BranchInst>(Compare->user_back())) 337 return false; 338 339 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 340 341 // We need to verify that the branch actually controls the iteration count 342 // of the loop. If not, the new IV can overflow and no one will notice. 343 // The branch block must be in the loop and one of the successors must be out 344 // of the loop. 345 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 346 if (!L->contains(TheBr->getParent()) || 347 (L->contains(TheBr->getSuccessor(0)) && 348 L->contains(TheBr->getSuccessor(1)))) 349 return false; 350 351 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 352 // transform it. 353 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 354 int64_t ExitValue; 355 if (ExitValueVal == nullptr || 356 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 357 return false; 358 359 // Find new predicate for integer comparison. 360 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 361 switch (Compare->getPredicate()) { 362 default: return false; // Unknown comparison. 363 case CmpInst::FCMP_OEQ: 364 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 365 case CmpInst::FCMP_ONE: 366 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 367 case CmpInst::FCMP_OGT: 368 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 369 case CmpInst::FCMP_OGE: 370 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 371 case CmpInst::FCMP_OLT: 372 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 373 case CmpInst::FCMP_OLE: 374 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 375 } 376 377 // We convert the floating point induction variable to a signed i32 value if 378 // we can. This is only safe if the comparison will not overflow in a way 379 // that won't be trapped by the integer equivalent operations. Check for this 380 // now. 381 // TODO: We could use i64 if it is native and the range requires it. 382 383 // The start/stride/exit values must all fit in signed i32. 384 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 385 return false; 386 387 // If not actually striding (add x, 0.0), avoid touching the code. 388 if (IncValue == 0) 389 return false; 390 391 // Positive and negative strides have different safety conditions. 392 if (IncValue > 0) { 393 // If we have a positive stride, we require the init to be less than the 394 // exit value. 395 if (InitValue >= ExitValue) 396 return false; 397 398 uint32_t Range = uint32_t(ExitValue-InitValue); 399 // Check for infinite loop, either: 400 // while (i <= Exit) or until (i > Exit) 401 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 402 if (++Range == 0) return false; // Range overflows. 403 } 404 405 unsigned Leftover = Range % uint32_t(IncValue); 406 407 // If this is an equality comparison, we require that the strided value 408 // exactly land on the exit value, otherwise the IV condition will wrap 409 // around and do things the fp IV wouldn't. 410 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 411 Leftover != 0) 412 return false; 413 414 // If the stride would wrap around the i32 before exiting, we can't 415 // transform the IV. 416 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 417 return false; 418 } else { 419 // If we have a negative stride, we require the init to be greater than the 420 // exit value. 421 if (InitValue <= ExitValue) 422 return false; 423 424 uint32_t Range = uint32_t(InitValue-ExitValue); 425 // Check for infinite loop, either: 426 // while (i >= Exit) or until (i < Exit) 427 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 428 if (++Range == 0) return false; // Range overflows. 429 } 430 431 unsigned Leftover = Range % uint32_t(-IncValue); 432 433 // If this is an equality comparison, we require that the strided value 434 // exactly land on the exit value, otherwise the IV condition will wrap 435 // around and do things the fp IV wouldn't. 436 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 437 Leftover != 0) 438 return false; 439 440 // If the stride would wrap around the i32 before exiting, we can't 441 // transform the IV. 442 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 443 return false; 444 } 445 446 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 447 448 // Insert new integer induction variable. 449 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 450 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 451 PN->getIncomingBlock(IncomingEdge)); 452 453 Value *NewAdd = 454 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 455 Incr->getName()+".int", Incr); 456 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 457 458 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 459 ConstantInt::get(Int32Ty, ExitValue), 460 Compare->getName()); 461 462 // In the following deletions, PN may become dead and may be deleted. 463 // Use a WeakTrackingVH to observe whether this happens. 464 WeakTrackingVH WeakPH = PN; 465 466 // Delete the old floating point exit comparison. The branch starts using the 467 // new comparison. 468 NewCompare->takeName(Compare); 469 Compare->replaceAllUsesWith(NewCompare); 470 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 471 472 // Delete the old floating point increment. 473 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 474 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 475 476 // If the FP induction variable still has uses, this is because something else 477 // in the loop uses its value. In order to canonicalize the induction 478 // variable, we chose to eliminate the IV and rewrite it in terms of an 479 // int->fp cast. 480 // 481 // We give preference to sitofp over uitofp because it is faster on most 482 // platforms. 483 if (WeakPH) { 484 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 485 &*PN->getParent()->getFirstInsertionPt()); 486 PN->replaceAllUsesWith(Conv); 487 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 488 } 489 return true; 490 } 491 492 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 493 // First step. Check to see if there are any floating-point recurrences. 494 // If there are, change them into integer recurrences, permitting analysis by 495 // the SCEV routines. 496 BasicBlock *Header = L->getHeader(); 497 498 SmallVector<WeakTrackingVH, 8> PHIs; 499 for (PHINode &PN : Header->phis()) 500 PHIs.push_back(&PN); 501 502 bool Changed = false; 503 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 504 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 505 Changed |= handleFloatingPointIV(L, PN); 506 507 // If the loop previously had floating-point IV, ScalarEvolution 508 // may not have been able to compute a trip count. Now that we've done some 509 // re-writing, the trip count may be computable. 510 if (Changed) 511 SE->forgetLoop(L); 512 return Changed; 513 } 514 515 namespace { 516 517 // Collect information about PHI nodes which can be transformed in 518 // rewriteLoopExitValues. 519 struct RewritePhi { 520 PHINode *PN; 521 522 // Ith incoming value. 523 unsigned Ith; 524 525 // Exit value after expansion. 526 Value *Val; 527 528 // High Cost when expansion. 529 bool HighCost; 530 531 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 532 : PN(P), Ith(I), Val(V), HighCost(H) {} 533 }; 534 535 } // end anonymous namespace 536 537 //===----------------------------------------------------------------------===// 538 // rewriteLoopExitValues - Optimize IV users outside the loop. 539 // As a side effect, reduces the amount of IV processing within the loop. 540 //===----------------------------------------------------------------------===// 541 542 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 543 SmallPtrSet<const Instruction *, 8> Visited; 544 SmallVector<const Instruction *, 8> WorkList; 545 Visited.insert(I); 546 WorkList.push_back(I); 547 while (!WorkList.empty()) { 548 const Instruction *Curr = WorkList.pop_back_val(); 549 // This use is outside the loop, nothing to do. 550 if (!L->contains(Curr)) 551 continue; 552 // Do we assume it is a "hard" use which will not be eliminated easily? 553 if (Curr->mayHaveSideEffects()) 554 return true; 555 // Otherwise, add all its users to worklist. 556 for (auto U : Curr->users()) { 557 auto *UI = cast<Instruction>(U); 558 if (Visited.insert(UI).second) 559 WorkList.push_back(UI); 560 } 561 } 562 return false; 563 } 564 565 /// Check to see if this loop has a computable loop-invariant execution count. 566 /// If so, this means that we can compute the final value of any expressions 567 /// that are recurrent in the loop, and substitute the exit values from the loop 568 /// into any instructions outside of the loop that use the final values of the 569 /// current expressions. 570 /// 571 /// This is mostly redundant with the regular IndVarSimplify activities that 572 /// happen later, except that it's more powerful in some cases, because it's 573 /// able to brute-force evaluate arbitrary instructions as long as they have 574 /// constant operands at the beginning of the loop. 575 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 576 // Check a pre-condition. 577 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 578 "Indvars did not preserve LCSSA!"); 579 580 SmallVector<BasicBlock*, 8> ExitBlocks; 581 L->getUniqueExitBlocks(ExitBlocks); 582 583 SmallVector<RewritePhi, 8> RewritePhiSet; 584 // Find all values that are computed inside the loop, but used outside of it. 585 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 586 // the exit blocks of the loop to find them. 587 for (BasicBlock *ExitBB : ExitBlocks) { 588 // If there are no PHI nodes in this exit block, then no values defined 589 // inside the loop are used on this path, skip it. 590 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 591 if (!PN) continue; 592 593 unsigned NumPreds = PN->getNumIncomingValues(); 594 595 // Iterate over all of the PHI nodes. 596 BasicBlock::iterator BBI = ExitBB->begin(); 597 while ((PN = dyn_cast<PHINode>(BBI++))) { 598 if (PN->use_empty()) 599 continue; // dead use, don't replace it 600 601 if (!SE->isSCEVable(PN->getType())) 602 continue; 603 604 // It's necessary to tell ScalarEvolution about this explicitly so that 605 // it can walk the def-use list and forget all SCEVs, as it may not be 606 // watching the PHI itself. Once the new exit value is in place, there 607 // may not be a def-use connection between the loop and every instruction 608 // which got a SCEVAddRecExpr for that loop. 609 SE->forgetValue(PN); 610 611 // Iterate over all of the values in all the PHI nodes. 612 for (unsigned i = 0; i != NumPreds; ++i) { 613 // If the value being merged in is not integer or is not defined 614 // in the loop, skip it. 615 Value *InVal = PN->getIncomingValue(i); 616 if (!isa<Instruction>(InVal)) 617 continue; 618 619 // If this pred is for a subloop, not L itself, skip it. 620 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 621 continue; // The Block is in a subloop, skip it. 622 623 // Check that InVal is defined in the loop. 624 Instruction *Inst = cast<Instruction>(InVal); 625 if (!L->contains(Inst)) 626 continue; 627 628 // Okay, this instruction has a user outside of the current loop 629 // and varies predictably *inside* the loop. Evaluate the value it 630 // contains when the loop exits, if possible. 631 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 632 if (!SE->isLoopInvariant(ExitValue, L) || 633 !isSafeToExpand(ExitValue, *SE)) 634 continue; 635 636 // Computing the value outside of the loop brings no benefit if it is 637 // definitely used inside the loop in a way which can not be optimized 638 // away. Avoid doing so unless we know we have a value which computes 639 // the ExitValue already. TODO: This should be merged into SCEV 640 // expander to leverage its knowledge of existing expressions. 641 if (ReplaceExitValue != AlwaysRepl && 642 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 643 hasHardUserWithinLoop(L, Inst)) 644 continue; 645 646 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 647 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 648 649 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 650 << '\n' 651 << " LoopVal = " << *Inst << "\n"); 652 653 if (!isValidRewrite(Inst, ExitVal)) { 654 DeadInsts.push_back(ExitVal); 655 continue; 656 } 657 658 #ifndef NDEBUG 659 // If we reuse an instruction from a loop which is neither L nor one of 660 // its containing loops, we end up breaking LCSSA form for this loop by 661 // creating a new use of its instruction. 662 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 663 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 664 if (EVL != L) 665 assert(EVL->contains(L) && "LCSSA breach detected!"); 666 #endif 667 668 // Collect all the candidate PHINodes to be rewritten. 669 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 670 } 671 } 672 } 673 674 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 675 676 bool Changed = false; 677 // Transformation. 678 for (const RewritePhi &Phi : RewritePhiSet) { 679 PHINode *PN = Phi.PN; 680 Value *ExitVal = Phi.Val; 681 682 // Only do the rewrite when the ExitValue can be expanded cheaply. 683 // If LoopCanBeDel is true, rewrite exit value aggressively. 684 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 685 DeadInsts.push_back(ExitVal); 686 continue; 687 } 688 689 Changed = true; 690 ++NumReplaced; 691 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 692 PN->setIncomingValue(Phi.Ith, ExitVal); 693 694 // If this instruction is dead now, delete it. Don't do it now to avoid 695 // invalidating iterators. 696 if (isInstructionTriviallyDead(Inst, TLI)) 697 DeadInsts.push_back(Inst); 698 699 // Replace PN with ExitVal if that is legal and does not break LCSSA. 700 if (PN->getNumIncomingValues() == 1 && 701 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 702 PN->replaceAllUsesWith(ExitVal); 703 PN->eraseFromParent(); 704 } 705 } 706 707 // The insertion point instruction may have been deleted; clear it out 708 // so that the rewriter doesn't trip over it later. 709 Rewriter.clearInsertPoint(); 710 return Changed; 711 } 712 713 //===---------------------------------------------------------------------===// 714 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 715 // they will exit at the first iteration. 716 //===---------------------------------------------------------------------===// 717 718 /// Check to see if this loop has loop invariant conditions which lead to loop 719 /// exits. If so, we know that if the exit path is taken, it is at the first 720 /// loop iteration. This lets us predict exit values of PHI nodes that live in 721 /// loop header. 722 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 723 // Verify the input to the pass is already in LCSSA form. 724 assert(L->isLCSSAForm(*DT)); 725 726 SmallVector<BasicBlock *, 8> ExitBlocks; 727 L->getUniqueExitBlocks(ExitBlocks); 728 729 bool MadeAnyChanges = false; 730 for (auto *ExitBB : ExitBlocks) { 731 // If there are no more PHI nodes in this exit block, then no more 732 // values defined inside the loop are used on this path. 733 for (PHINode &PN : ExitBB->phis()) { 734 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 735 IncomingValIdx != E; ++IncomingValIdx) { 736 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 737 738 // Can we prove that the exit must run on the first iteration if it 739 // runs at all? (i.e. early exits are fine for our purposes, but 740 // traces which lead to this exit being taken on the 2nd iteration 741 // aren't.) Note that this is about whether the exit branch is 742 // executed, not about whether it is taken. 743 if (!L->getLoopLatch() || 744 !DT->dominates(IncomingBB, L->getLoopLatch())) 745 continue; 746 747 // Get condition that leads to the exit path. 748 auto *TermInst = IncomingBB->getTerminator(); 749 750 Value *Cond = nullptr; 751 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 752 // Must be a conditional branch, otherwise the block 753 // should not be in the loop. 754 Cond = BI->getCondition(); 755 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 756 Cond = SI->getCondition(); 757 else 758 continue; 759 760 if (!L->isLoopInvariant(Cond)) 761 continue; 762 763 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 764 765 // Only deal with PHIs in the loop header. 766 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 767 continue; 768 769 // If ExitVal is a PHI on the loop header, then we know its 770 // value along this exit because the exit can only be taken 771 // on the first iteration. 772 auto *LoopPreheader = L->getLoopPreheader(); 773 assert(LoopPreheader && "Invalid loop"); 774 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 775 if (PreheaderIdx != -1) { 776 assert(ExitVal->getParent() == L->getHeader() && 777 "ExitVal must be in loop header"); 778 MadeAnyChanges = true; 779 PN.setIncomingValue(IncomingValIdx, 780 ExitVal->getIncomingValue(PreheaderIdx)); 781 } 782 } 783 } 784 } 785 return MadeAnyChanges; 786 } 787 788 /// Check whether it is possible to delete the loop after rewriting exit 789 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 790 /// aggressively. 791 bool IndVarSimplify::canLoopBeDeleted( 792 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 793 BasicBlock *Preheader = L->getLoopPreheader(); 794 // If there is no preheader, the loop will not be deleted. 795 if (!Preheader) 796 return false; 797 798 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 799 // We obviate multiple ExitingBlocks case for simplicity. 800 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 801 // after exit value rewriting, we can enhance the logic here. 802 SmallVector<BasicBlock *, 4> ExitingBlocks; 803 L->getExitingBlocks(ExitingBlocks); 804 SmallVector<BasicBlock *, 8> ExitBlocks; 805 L->getUniqueExitBlocks(ExitBlocks); 806 if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) 807 return false; 808 809 BasicBlock *ExitBlock = ExitBlocks[0]; 810 BasicBlock::iterator BI = ExitBlock->begin(); 811 while (PHINode *P = dyn_cast<PHINode>(BI)) { 812 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 813 814 // If the Incoming value of P is found in RewritePhiSet, we know it 815 // could be rewritten to use a loop invariant value in transformation 816 // phase later. Skip it in the loop invariant check below. 817 bool found = false; 818 for (const RewritePhi &Phi : RewritePhiSet) { 819 unsigned i = Phi.Ith; 820 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 821 found = true; 822 break; 823 } 824 } 825 826 Instruction *I; 827 if (!found && (I = dyn_cast<Instruction>(Incoming))) 828 if (!L->hasLoopInvariantOperands(I)) 829 return false; 830 831 ++BI; 832 } 833 834 for (auto *BB : L->blocks()) 835 if (llvm::any_of(*BB, [](Instruction &I) { 836 return I.mayHaveSideEffects(); 837 })) 838 return false; 839 840 return true; 841 } 842 843 //===----------------------------------------------------------------------===// 844 // IV Widening - Extend the width of an IV to cover its widest uses. 845 //===----------------------------------------------------------------------===// 846 847 namespace { 848 849 // Collect information about induction variables that are used by sign/zero 850 // extend operations. This information is recorded by CollectExtend and provides 851 // the input to WidenIV. 852 struct WideIVInfo { 853 PHINode *NarrowIV = nullptr; 854 855 // Widest integer type created [sz]ext 856 Type *WidestNativeType = nullptr; 857 858 // Was a sext user seen before a zext? 859 bool IsSigned = false; 860 }; 861 862 } // end anonymous namespace 863 864 /// Update information about the induction variable that is extended by this 865 /// sign or zero extend operation. This is used to determine the final width of 866 /// the IV before actually widening it. 867 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 868 const TargetTransformInfo *TTI) { 869 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 870 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 871 return; 872 873 Type *Ty = Cast->getType(); 874 uint64_t Width = SE->getTypeSizeInBits(Ty); 875 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 876 return; 877 878 // Check that `Cast` actually extends the induction variable (we rely on this 879 // later). This takes care of cases where `Cast` is extending a truncation of 880 // the narrow induction variable, and thus can end up being narrower than the 881 // "narrow" induction variable. 882 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 883 if (NarrowIVWidth >= Width) 884 return; 885 886 // Cast is either an sext or zext up to this point. 887 // We should not widen an indvar if arithmetics on the wider indvar are more 888 // expensive than those on the narrower indvar. We check only the cost of ADD 889 // because at least an ADD is required to increment the induction variable. We 890 // could compute more comprehensively the cost of all instructions on the 891 // induction variable when necessary. 892 if (TTI && 893 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 894 TTI->getArithmeticInstrCost(Instruction::Add, 895 Cast->getOperand(0)->getType())) { 896 return; 897 } 898 899 if (!WI.WidestNativeType) { 900 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 901 WI.IsSigned = IsSigned; 902 return; 903 } 904 905 // We extend the IV to satisfy the sign of its first user, arbitrarily. 906 if (WI.IsSigned != IsSigned) 907 return; 908 909 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 910 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 911 } 912 913 namespace { 914 915 /// Record a link in the Narrow IV def-use chain along with the WideIV that 916 /// computes the same value as the Narrow IV def. This avoids caching Use* 917 /// pointers. 918 struct NarrowIVDefUse { 919 Instruction *NarrowDef = nullptr; 920 Instruction *NarrowUse = nullptr; 921 Instruction *WideDef = nullptr; 922 923 // True if the narrow def is never negative. Tracking this information lets 924 // us use a sign extension instead of a zero extension or vice versa, when 925 // profitable and legal. 926 bool NeverNegative = false; 927 928 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 929 bool NeverNegative) 930 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 931 NeverNegative(NeverNegative) {} 932 }; 933 934 /// The goal of this transform is to remove sign and zero extends without 935 /// creating any new induction variables. To do this, it creates a new phi of 936 /// the wider type and redirects all users, either removing extends or inserting 937 /// truncs whenever we stop propagating the type. 938 class WidenIV { 939 // Parameters 940 PHINode *OrigPhi; 941 Type *WideType; 942 943 // Context 944 LoopInfo *LI; 945 Loop *L; 946 ScalarEvolution *SE; 947 DominatorTree *DT; 948 949 // Does the module have any calls to the llvm.experimental.guard intrinsic 950 // at all? If not we can avoid scanning instructions looking for guards. 951 bool HasGuards; 952 953 // Result 954 PHINode *WidePhi = nullptr; 955 Instruction *WideInc = nullptr; 956 const SCEV *WideIncExpr = nullptr; 957 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 958 959 SmallPtrSet<Instruction *,16> Widened; 960 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 961 962 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 963 964 // A map tracking the kind of extension used to widen each narrow IV 965 // and narrow IV user. 966 // Key: pointer to a narrow IV or IV user. 967 // Value: the kind of extension used to widen this Instruction. 968 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 969 970 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 971 972 // A map with control-dependent ranges for post increment IV uses. The key is 973 // a pair of IV def and a use of this def denoting the context. The value is 974 // a ConstantRange representing possible values of the def at the given 975 // context. 976 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 977 978 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 979 Instruction *UseI) { 980 DefUserPair Key(Def, UseI); 981 auto It = PostIncRangeInfos.find(Key); 982 return It == PostIncRangeInfos.end() 983 ? Optional<ConstantRange>(None) 984 : Optional<ConstantRange>(It->second); 985 } 986 987 void calculatePostIncRanges(PHINode *OrigPhi); 988 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 989 990 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 991 DefUserPair Key(Def, UseI); 992 auto It = PostIncRangeInfos.find(Key); 993 if (It == PostIncRangeInfos.end()) 994 PostIncRangeInfos.insert({Key, R}); 995 else 996 It->second = R.intersectWith(It->second); 997 } 998 999 public: 1000 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1001 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1002 bool HasGuards) 1003 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1004 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1005 HasGuards(HasGuards), DeadInsts(DI) { 1006 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1007 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1008 } 1009 1010 PHINode *createWideIV(SCEVExpander &Rewriter); 1011 1012 protected: 1013 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1014 Instruction *Use); 1015 1016 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1017 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1018 const SCEVAddRecExpr *WideAR); 1019 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1020 1021 ExtendKind getExtendKind(Instruction *I); 1022 1023 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1024 1025 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1026 1027 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1028 1029 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1030 unsigned OpCode) const; 1031 1032 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1033 1034 bool widenLoopCompare(NarrowIVDefUse DU); 1035 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1036 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1037 1038 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1039 }; 1040 1041 } // end anonymous namespace 1042 1043 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1044 bool IsSigned, Instruction *Use) { 1045 // Set the debug location and conservative insertion point. 1046 IRBuilder<> Builder(Use); 1047 // Hoist the insertion point into loop preheaders as far as possible. 1048 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1049 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1050 L = L->getParentLoop()) 1051 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1052 1053 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1054 Builder.CreateZExt(NarrowOper, WideType); 1055 } 1056 1057 /// Instantiate a wide operation to replace a narrow operation. This only needs 1058 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1059 /// 0 for any operation we decide not to clone. 1060 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1061 const SCEVAddRecExpr *WideAR) { 1062 unsigned Opcode = DU.NarrowUse->getOpcode(); 1063 switch (Opcode) { 1064 default: 1065 return nullptr; 1066 case Instruction::Add: 1067 case Instruction::Mul: 1068 case Instruction::UDiv: 1069 case Instruction::Sub: 1070 return cloneArithmeticIVUser(DU, WideAR); 1071 1072 case Instruction::And: 1073 case Instruction::Or: 1074 case Instruction::Xor: 1075 case Instruction::Shl: 1076 case Instruction::LShr: 1077 case Instruction::AShr: 1078 return cloneBitwiseIVUser(DU); 1079 } 1080 } 1081 1082 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1083 Instruction *NarrowUse = DU.NarrowUse; 1084 Instruction *NarrowDef = DU.NarrowDef; 1085 Instruction *WideDef = DU.WideDef; 1086 1087 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1088 1089 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1090 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1091 // invariant and will be folded or hoisted. If it actually comes from a 1092 // widened IV, it should be removed during a future call to widenIVUse. 1093 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1094 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1095 ? WideDef 1096 : createExtendInst(NarrowUse->getOperand(0), WideType, 1097 IsSigned, NarrowUse); 1098 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1099 ? WideDef 1100 : createExtendInst(NarrowUse->getOperand(1), WideType, 1101 IsSigned, NarrowUse); 1102 1103 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1104 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1105 NarrowBO->getName()); 1106 IRBuilder<> Builder(NarrowUse); 1107 Builder.Insert(WideBO); 1108 WideBO->copyIRFlags(NarrowBO); 1109 return WideBO; 1110 } 1111 1112 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1113 const SCEVAddRecExpr *WideAR) { 1114 Instruction *NarrowUse = DU.NarrowUse; 1115 Instruction *NarrowDef = DU.NarrowDef; 1116 Instruction *WideDef = DU.WideDef; 1117 1118 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1119 1120 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1121 1122 // We're trying to find X such that 1123 // 1124 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1125 // 1126 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1127 // and check using SCEV if any of them are correct. 1128 1129 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1130 // correct solution to X. 1131 auto GuessNonIVOperand = [&](bool SignExt) { 1132 const SCEV *WideLHS; 1133 const SCEV *WideRHS; 1134 1135 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1136 if (SignExt) 1137 return SE->getSignExtendExpr(S, Ty); 1138 return SE->getZeroExtendExpr(S, Ty); 1139 }; 1140 1141 if (IVOpIdx == 0) { 1142 WideLHS = SE->getSCEV(WideDef); 1143 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1144 WideRHS = GetExtend(NarrowRHS, WideType); 1145 } else { 1146 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1147 WideLHS = GetExtend(NarrowLHS, WideType); 1148 WideRHS = SE->getSCEV(WideDef); 1149 } 1150 1151 // WideUse is "WideDef `op.wide` X" as described in the comment. 1152 const SCEV *WideUse = nullptr; 1153 1154 switch (NarrowUse->getOpcode()) { 1155 default: 1156 llvm_unreachable("No other possibility!"); 1157 1158 case Instruction::Add: 1159 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1160 break; 1161 1162 case Instruction::Mul: 1163 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1164 break; 1165 1166 case Instruction::UDiv: 1167 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1168 break; 1169 1170 case Instruction::Sub: 1171 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1172 break; 1173 } 1174 1175 return WideUse == WideAR; 1176 }; 1177 1178 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1179 if (!GuessNonIVOperand(SignExtend)) { 1180 SignExtend = !SignExtend; 1181 if (!GuessNonIVOperand(SignExtend)) 1182 return nullptr; 1183 } 1184 1185 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1186 ? WideDef 1187 : createExtendInst(NarrowUse->getOperand(0), WideType, 1188 SignExtend, NarrowUse); 1189 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1190 ? WideDef 1191 : createExtendInst(NarrowUse->getOperand(1), WideType, 1192 SignExtend, NarrowUse); 1193 1194 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1195 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1196 NarrowBO->getName()); 1197 1198 IRBuilder<> Builder(NarrowUse); 1199 Builder.Insert(WideBO); 1200 WideBO->copyIRFlags(NarrowBO); 1201 return WideBO; 1202 } 1203 1204 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1205 auto It = ExtendKindMap.find(I); 1206 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1207 return It->second; 1208 } 1209 1210 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1211 unsigned OpCode) const { 1212 if (OpCode == Instruction::Add) 1213 return SE->getAddExpr(LHS, RHS); 1214 if (OpCode == Instruction::Sub) 1215 return SE->getMinusSCEV(LHS, RHS); 1216 if (OpCode == Instruction::Mul) 1217 return SE->getMulExpr(LHS, RHS); 1218 1219 llvm_unreachable("Unsupported opcode."); 1220 } 1221 1222 /// No-wrap operations can transfer sign extension of their result to their 1223 /// operands. Generate the SCEV value for the widened operation without 1224 /// actually modifying the IR yet. If the expression after extending the 1225 /// operands is an AddRec for this loop, return the AddRec and the kind of 1226 /// extension used. 1227 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1228 // Handle the common case of add<nsw/nuw> 1229 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1230 // Only Add/Sub/Mul instructions supported yet. 1231 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1232 OpCode != Instruction::Mul) 1233 return {nullptr, Unknown}; 1234 1235 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1236 // if extending the other will lead to a recurrence. 1237 const unsigned ExtendOperIdx = 1238 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1239 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1240 1241 const SCEV *ExtendOperExpr = nullptr; 1242 const OverflowingBinaryOperator *OBO = 1243 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1244 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1245 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1246 ExtendOperExpr = SE->getSignExtendExpr( 1247 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1248 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1249 ExtendOperExpr = SE->getZeroExtendExpr( 1250 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1251 else 1252 return {nullptr, Unknown}; 1253 1254 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1255 // flags. This instruction may be guarded by control flow that the no-wrap 1256 // behavior depends on. Non-control-equivalent instructions can be mapped to 1257 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1258 // semantics to those operations. 1259 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1260 const SCEV *rhs = ExtendOperExpr; 1261 1262 // Let's swap operands to the initial order for the case of non-commutative 1263 // operations, like SUB. See PR21014. 1264 if (ExtendOperIdx == 0) 1265 std::swap(lhs, rhs); 1266 const SCEVAddRecExpr *AddRec = 1267 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1268 1269 if (!AddRec || AddRec->getLoop() != L) 1270 return {nullptr, Unknown}; 1271 1272 return {AddRec, ExtKind}; 1273 } 1274 1275 /// Is this instruction potentially interesting for further simplification after 1276 /// widening it's type? In other words, can the extend be safely hoisted out of 1277 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1278 /// so, return the extended recurrence and the kind of extension used. Otherwise 1279 /// return {nullptr, Unknown}. 1280 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1281 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1282 return {nullptr, Unknown}; 1283 1284 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1285 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1286 SE->getTypeSizeInBits(WideType)) { 1287 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1288 // index. So don't follow this use. 1289 return {nullptr, Unknown}; 1290 } 1291 1292 const SCEV *WideExpr; 1293 ExtendKind ExtKind; 1294 if (DU.NeverNegative) { 1295 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1296 if (isa<SCEVAddRecExpr>(WideExpr)) 1297 ExtKind = SignExtended; 1298 else { 1299 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1300 ExtKind = ZeroExtended; 1301 } 1302 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1303 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1304 ExtKind = SignExtended; 1305 } else { 1306 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1307 ExtKind = ZeroExtended; 1308 } 1309 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1310 if (!AddRec || AddRec->getLoop() != L) 1311 return {nullptr, Unknown}; 1312 return {AddRec, ExtKind}; 1313 } 1314 1315 /// This IV user cannot be widen. Replace this use of the original narrow IV 1316 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1317 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1318 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1319 if (!InsertPt) 1320 return; 1321 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1322 << *DU.NarrowUse << "\n"); 1323 IRBuilder<> Builder(InsertPt); 1324 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1325 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1326 } 1327 1328 /// If the narrow use is a compare instruction, then widen the compare 1329 // (and possibly the other operand). The extend operation is hoisted into the 1330 // loop preheader as far as possible. 1331 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1332 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1333 if (!Cmp) 1334 return false; 1335 1336 // We can legally widen the comparison in the following two cases: 1337 // 1338 // - The signedness of the IV extension and comparison match 1339 // 1340 // - The narrow IV is always positive (and thus its sign extension is equal 1341 // to its zero extension). For instance, let's say we're zero extending 1342 // %narrow for the following use 1343 // 1344 // icmp slt i32 %narrow, %val ... (A) 1345 // 1346 // and %narrow is always positive. Then 1347 // 1348 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1349 // == icmp slt i32 zext(%narrow), sext(%val) 1350 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1351 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1352 return false; 1353 1354 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1355 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1356 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1357 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1358 1359 // Widen the compare instruction. 1360 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1361 if (!InsertPt) 1362 return false; 1363 IRBuilder<> Builder(InsertPt); 1364 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1365 1366 // Widen the other operand of the compare, if necessary. 1367 if (CastWidth < IVWidth) { 1368 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1369 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1370 } 1371 return true; 1372 } 1373 1374 /// If the narrow use is an instruction whose two operands are the defining 1375 /// instruction of DU and a load instruction, then we have the following: 1376 /// if the load is hoisted outside the loop, then we do not reach this function 1377 /// as scalar evolution analysis works fine in widenIVUse with variables 1378 /// hoisted outside the loop and efficient code is subsequently generated by 1379 /// not emitting truncate instructions. But when the load is not hoisted 1380 /// (whether due to limitation in alias analysis or due to a true legality), 1381 /// then scalar evolution can not proceed with loop variant values and 1382 /// inefficient code is generated. This function handles the non-hoisted load 1383 /// special case by making the optimization generate the same type of code for 1384 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1385 /// instruction). This special case is important especially when the induction 1386 /// variables are affecting addressing mode in code generation. 1387 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1388 Instruction *NarrowUse = DU.NarrowUse; 1389 Instruction *NarrowDef = DU.NarrowDef; 1390 Instruction *WideDef = DU.WideDef; 1391 1392 // Handle the common case of add<nsw/nuw> 1393 const unsigned OpCode = NarrowUse->getOpcode(); 1394 // Only Add/Sub/Mul instructions are supported. 1395 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1396 OpCode != Instruction::Mul) 1397 return false; 1398 1399 // The operand that is not defined by NarrowDef of DU. Let's call it the 1400 // other operand. 1401 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1402 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1403 "bad DU"); 1404 1405 const SCEV *ExtendOperExpr = nullptr; 1406 const OverflowingBinaryOperator *OBO = 1407 cast<OverflowingBinaryOperator>(NarrowUse); 1408 ExtendKind ExtKind = getExtendKind(NarrowDef); 1409 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1410 ExtendOperExpr = SE->getSignExtendExpr( 1411 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1412 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1413 ExtendOperExpr = SE->getZeroExtendExpr( 1414 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1415 else 1416 return false; 1417 1418 // We are interested in the other operand being a load instruction. 1419 // But, we should look into relaxing this restriction later on. 1420 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1421 if (I && I->getOpcode() != Instruction::Load) 1422 return false; 1423 1424 // Verifying that Defining operand is an AddRec 1425 const SCEV *Op1 = SE->getSCEV(WideDef); 1426 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1427 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1428 return false; 1429 // Verifying that other operand is an Extend. 1430 if (ExtKind == SignExtended) { 1431 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1432 return false; 1433 } else { 1434 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1435 return false; 1436 } 1437 1438 if (ExtKind == SignExtended) { 1439 for (Use &U : NarrowUse->uses()) { 1440 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1441 if (!User || User->getType() != WideType) 1442 return false; 1443 } 1444 } else { // ExtKind == ZeroExtended 1445 for (Use &U : NarrowUse->uses()) { 1446 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1447 if (!User || User->getType() != WideType) 1448 return false; 1449 } 1450 } 1451 1452 return true; 1453 } 1454 1455 /// Special Case for widening with variant Loads (see 1456 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1457 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1458 Instruction *NarrowUse = DU.NarrowUse; 1459 Instruction *NarrowDef = DU.NarrowDef; 1460 Instruction *WideDef = DU.WideDef; 1461 1462 ExtendKind ExtKind = getExtendKind(NarrowDef); 1463 1464 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1465 1466 // Generating a widening use instruction. 1467 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1468 ? WideDef 1469 : createExtendInst(NarrowUse->getOperand(0), WideType, 1470 ExtKind, NarrowUse); 1471 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1472 ? WideDef 1473 : createExtendInst(NarrowUse->getOperand(1), WideType, 1474 ExtKind, NarrowUse); 1475 1476 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1477 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1478 NarrowBO->getName()); 1479 IRBuilder<> Builder(NarrowUse); 1480 Builder.Insert(WideBO); 1481 WideBO->copyIRFlags(NarrowBO); 1482 1483 if (ExtKind == SignExtended) 1484 ExtendKindMap[NarrowUse] = SignExtended; 1485 else 1486 ExtendKindMap[NarrowUse] = ZeroExtended; 1487 1488 // Update the Use. 1489 if (ExtKind == SignExtended) { 1490 for (Use &U : NarrowUse->uses()) { 1491 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1492 if (User && User->getType() == WideType) { 1493 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1494 << *WideBO << "\n"); 1495 ++NumElimExt; 1496 User->replaceAllUsesWith(WideBO); 1497 DeadInsts.emplace_back(User); 1498 } 1499 } 1500 } else { // ExtKind == ZeroExtended 1501 for (Use &U : NarrowUse->uses()) { 1502 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1503 if (User && User->getType() == WideType) { 1504 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1505 << *WideBO << "\n"); 1506 ++NumElimExt; 1507 User->replaceAllUsesWith(WideBO); 1508 DeadInsts.emplace_back(User); 1509 } 1510 } 1511 } 1512 } 1513 1514 /// Determine whether an individual user of the narrow IV can be widened. If so, 1515 /// return the wide clone of the user. 1516 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1517 assert(ExtendKindMap.count(DU.NarrowDef) && 1518 "Should already know the kind of extension used to widen NarrowDef"); 1519 1520 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1521 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1522 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1523 // For LCSSA phis, sink the truncate outside the loop. 1524 // After SimplifyCFG most loop exit targets have a single predecessor. 1525 // Otherwise fall back to a truncate within the loop. 1526 if (UsePhi->getNumOperands() != 1) 1527 truncateIVUse(DU, DT, LI); 1528 else { 1529 // Widening the PHI requires us to insert a trunc. The logical place 1530 // for this trunc is in the same BB as the PHI. This is not possible if 1531 // the BB is terminated by a catchswitch. 1532 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1533 return nullptr; 1534 1535 PHINode *WidePhi = 1536 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1537 UsePhi); 1538 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1539 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1540 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1541 UsePhi->replaceAllUsesWith(Trunc); 1542 DeadInsts.emplace_back(UsePhi); 1543 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1544 << *WidePhi << "\n"); 1545 } 1546 return nullptr; 1547 } 1548 } 1549 1550 // This narrow use can be widened by a sext if it's non-negative or its narrow 1551 // def was widended by a sext. Same for zext. 1552 auto canWidenBySExt = [&]() { 1553 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1554 }; 1555 auto canWidenByZExt = [&]() { 1556 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1557 }; 1558 1559 // Our raison d'etre! Eliminate sign and zero extension. 1560 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1561 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1562 Value *NewDef = DU.WideDef; 1563 if (DU.NarrowUse->getType() != WideType) { 1564 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1565 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1566 if (CastWidth < IVWidth) { 1567 // The cast isn't as wide as the IV, so insert a Trunc. 1568 IRBuilder<> Builder(DU.NarrowUse); 1569 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1570 } 1571 else { 1572 // A wider extend was hidden behind a narrower one. This may induce 1573 // another round of IV widening in which the intermediate IV becomes 1574 // dead. It should be very rare. 1575 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1576 << " not wide enough to subsume " << *DU.NarrowUse 1577 << "\n"); 1578 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1579 NewDef = DU.NarrowUse; 1580 } 1581 } 1582 if (NewDef != DU.NarrowUse) { 1583 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1584 << " replaced by " << *DU.WideDef << "\n"); 1585 ++NumElimExt; 1586 DU.NarrowUse->replaceAllUsesWith(NewDef); 1587 DeadInsts.emplace_back(DU.NarrowUse); 1588 } 1589 // Now that the extend is gone, we want to expose it's uses for potential 1590 // further simplification. We don't need to directly inform SimplifyIVUsers 1591 // of the new users, because their parent IV will be processed later as a 1592 // new loop phi. If we preserved IVUsers analysis, we would also want to 1593 // push the uses of WideDef here. 1594 1595 // No further widening is needed. The deceased [sz]ext had done it for us. 1596 return nullptr; 1597 } 1598 1599 // Does this user itself evaluate to a recurrence after widening? 1600 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1601 if (!WideAddRec.first) 1602 WideAddRec = getWideRecurrence(DU); 1603 1604 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1605 if (!WideAddRec.first) { 1606 // If use is a loop condition, try to promote the condition instead of 1607 // truncating the IV first. 1608 if (widenLoopCompare(DU)) 1609 return nullptr; 1610 1611 // We are here about to generate a truncate instruction that may hurt 1612 // performance because the scalar evolution expression computed earlier 1613 // in WideAddRec.first does not indicate a polynomial induction expression. 1614 // In that case, look at the operands of the use instruction to determine 1615 // if we can still widen the use instead of truncating its operand. 1616 if (widenWithVariantLoadUse(DU)) { 1617 widenWithVariantLoadUseCodegen(DU); 1618 return nullptr; 1619 } 1620 1621 // This user does not evaluate to a recurrence after widening, so don't 1622 // follow it. Instead insert a Trunc to kill off the original use, 1623 // eventually isolating the original narrow IV so it can be removed. 1624 truncateIVUse(DU, DT, LI); 1625 return nullptr; 1626 } 1627 // Assume block terminators cannot evaluate to a recurrence. We can't to 1628 // insert a Trunc after a terminator if there happens to be a critical edge. 1629 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1630 "SCEV is not expected to evaluate a block terminator"); 1631 1632 // Reuse the IV increment that SCEVExpander created as long as it dominates 1633 // NarrowUse. 1634 Instruction *WideUse = nullptr; 1635 if (WideAddRec.first == WideIncExpr && 1636 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1637 WideUse = WideInc; 1638 else { 1639 WideUse = cloneIVUser(DU, WideAddRec.first); 1640 if (!WideUse) 1641 return nullptr; 1642 } 1643 // Evaluation of WideAddRec ensured that the narrow expression could be 1644 // extended outside the loop without overflow. This suggests that the wide use 1645 // evaluates to the same expression as the extended narrow use, but doesn't 1646 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1647 // where it fails, we simply throw away the newly created wide use. 1648 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1649 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1650 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1651 << "\n"); 1652 DeadInsts.emplace_back(WideUse); 1653 return nullptr; 1654 } 1655 1656 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1657 // Returning WideUse pushes it on the worklist. 1658 return WideUse; 1659 } 1660 1661 /// Add eligible users of NarrowDef to NarrowIVUsers. 1662 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1663 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1664 bool NonNegativeDef = 1665 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1666 SE->getConstant(NarrowSCEV->getType(), 0)); 1667 for (User *U : NarrowDef->users()) { 1668 Instruction *NarrowUser = cast<Instruction>(U); 1669 1670 // Handle data flow merges and bizarre phi cycles. 1671 if (!Widened.insert(NarrowUser).second) 1672 continue; 1673 1674 bool NonNegativeUse = false; 1675 if (!NonNegativeDef) { 1676 // We might have a control-dependent range information for this context. 1677 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1678 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1679 } 1680 1681 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1682 NonNegativeDef || NonNegativeUse); 1683 } 1684 } 1685 1686 /// Process a single induction variable. First use the SCEVExpander to create a 1687 /// wide induction variable that evaluates to the same recurrence as the 1688 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1689 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1690 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1691 /// 1692 /// It would be simpler to delete uses as they are processed, but we must avoid 1693 /// invalidating SCEV expressions. 1694 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1695 // Is this phi an induction variable? 1696 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1697 if (!AddRec) 1698 return nullptr; 1699 1700 // Widen the induction variable expression. 1701 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1702 ? SE->getSignExtendExpr(AddRec, WideType) 1703 : SE->getZeroExtendExpr(AddRec, WideType); 1704 1705 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1706 "Expect the new IV expression to preserve its type"); 1707 1708 // Can the IV be extended outside the loop without overflow? 1709 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1710 if (!AddRec || AddRec->getLoop() != L) 1711 return nullptr; 1712 1713 // An AddRec must have loop-invariant operands. Since this AddRec is 1714 // materialized by a loop header phi, the expression cannot have any post-loop 1715 // operands, so they must dominate the loop header. 1716 assert( 1717 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1718 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1719 "Loop header phi recurrence inputs do not dominate the loop"); 1720 1721 // Iterate over IV uses (including transitive ones) looking for IV increments 1722 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1723 // the increment calculate control-dependent range information basing on 1724 // dominating conditions inside of the loop (e.g. a range check inside of the 1725 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1726 // 1727 // Control-dependent range information is later used to prove that a narrow 1728 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1729 // this on demand because when pushNarrowIVUsers needs this information some 1730 // of the dominating conditions might be already widened. 1731 if (UsePostIncrementRanges) 1732 calculatePostIncRanges(OrigPhi); 1733 1734 // The rewriter provides a value for the desired IV expression. This may 1735 // either find an existing phi or materialize a new one. Either way, we 1736 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1737 // of the phi-SCC dominates the loop entry. 1738 Instruction *InsertPt = &L->getHeader()->front(); 1739 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1740 1741 // Remembering the WideIV increment generated by SCEVExpander allows 1742 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1743 // employ a general reuse mechanism because the call above is the only call to 1744 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1745 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1746 WideInc = 1747 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1748 WideIncExpr = SE->getSCEV(WideInc); 1749 // Propagate the debug location associated with the original loop increment 1750 // to the new (widened) increment. 1751 auto *OrigInc = 1752 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1753 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1754 } 1755 1756 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1757 ++NumWidened; 1758 1759 // Traverse the def-use chain using a worklist starting at the original IV. 1760 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1761 1762 Widened.insert(OrigPhi); 1763 pushNarrowIVUsers(OrigPhi, WidePhi); 1764 1765 while (!NarrowIVUsers.empty()) { 1766 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1767 1768 // Process a def-use edge. This may replace the use, so don't hold a 1769 // use_iterator across it. 1770 Instruction *WideUse = widenIVUse(DU, Rewriter); 1771 1772 // Follow all def-use edges from the previous narrow use. 1773 if (WideUse) 1774 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1775 1776 // widenIVUse may have removed the def-use edge. 1777 if (DU.NarrowDef->use_empty()) 1778 DeadInsts.emplace_back(DU.NarrowDef); 1779 } 1780 1781 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1782 // evaluate the same recurrence, we can just copy the debug info over. 1783 SmallVector<DbgValueInst *, 1> DbgValues; 1784 llvm::findDbgValues(DbgValues, OrigPhi); 1785 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1786 ValueAsMetadata::get(WidePhi)); 1787 for (auto &DbgValue : DbgValues) 1788 DbgValue->setOperand(0, MDPhi); 1789 return WidePhi; 1790 } 1791 1792 /// Calculates control-dependent range for the given def at the given context 1793 /// by looking at dominating conditions inside of the loop 1794 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1795 Instruction *NarrowUser) { 1796 using namespace llvm::PatternMatch; 1797 1798 Value *NarrowDefLHS; 1799 const APInt *NarrowDefRHS; 1800 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1801 m_APInt(NarrowDefRHS))) || 1802 !NarrowDefRHS->isNonNegative()) 1803 return; 1804 1805 auto UpdateRangeFromCondition = [&] (Value *Condition, 1806 bool TrueDest) { 1807 CmpInst::Predicate Pred; 1808 Value *CmpRHS; 1809 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1810 m_Value(CmpRHS)))) 1811 return; 1812 1813 CmpInst::Predicate P = 1814 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1815 1816 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1817 auto CmpConstrainedLHSRange = 1818 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1819 auto NarrowDefRange = 1820 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1821 1822 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1823 }; 1824 1825 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1826 if (!HasGuards) 1827 return; 1828 1829 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1830 Ctx->getParent()->rend())) { 1831 Value *C = nullptr; 1832 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1833 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1834 } 1835 }; 1836 1837 UpdateRangeFromGuards(NarrowUser); 1838 1839 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1840 // If NarrowUserBB is statically unreachable asking dominator queries may 1841 // yield surprising results. (e.g. the block may not have a dom tree node) 1842 if (!DT->isReachableFromEntry(NarrowUserBB)) 1843 return; 1844 1845 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1846 L->contains(DTB->getBlock()); 1847 DTB = DTB->getIDom()) { 1848 auto *BB = DTB->getBlock(); 1849 auto *TI = BB->getTerminator(); 1850 UpdateRangeFromGuards(TI); 1851 1852 auto *BI = dyn_cast<BranchInst>(TI); 1853 if (!BI || !BI->isConditional()) 1854 continue; 1855 1856 auto *TrueSuccessor = BI->getSuccessor(0); 1857 auto *FalseSuccessor = BI->getSuccessor(1); 1858 1859 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1860 return BBE.isSingleEdge() && 1861 DT->dominates(BBE, NarrowUser->getParent()); 1862 }; 1863 1864 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1865 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1866 1867 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1868 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1869 } 1870 } 1871 1872 /// Calculates PostIncRangeInfos map for the given IV 1873 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1874 SmallPtrSet<Instruction *, 16> Visited; 1875 SmallVector<Instruction *, 6> Worklist; 1876 Worklist.push_back(OrigPhi); 1877 Visited.insert(OrigPhi); 1878 1879 while (!Worklist.empty()) { 1880 Instruction *NarrowDef = Worklist.pop_back_val(); 1881 1882 for (Use &U : NarrowDef->uses()) { 1883 auto *NarrowUser = cast<Instruction>(U.getUser()); 1884 1885 // Don't go looking outside the current loop. 1886 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1887 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1888 continue; 1889 1890 if (!Visited.insert(NarrowUser).second) 1891 continue; 1892 1893 Worklist.push_back(NarrowUser); 1894 1895 calculatePostIncRange(NarrowDef, NarrowUser); 1896 } 1897 } 1898 } 1899 1900 //===----------------------------------------------------------------------===// 1901 // Live IV Reduction - Minimize IVs live across the loop. 1902 //===----------------------------------------------------------------------===// 1903 1904 //===----------------------------------------------------------------------===// 1905 // Simplification of IV users based on SCEV evaluation. 1906 //===----------------------------------------------------------------------===// 1907 1908 namespace { 1909 1910 class IndVarSimplifyVisitor : public IVVisitor { 1911 ScalarEvolution *SE; 1912 const TargetTransformInfo *TTI; 1913 PHINode *IVPhi; 1914 1915 public: 1916 WideIVInfo WI; 1917 1918 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1919 const TargetTransformInfo *TTI, 1920 const DominatorTree *DTree) 1921 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1922 DT = DTree; 1923 WI.NarrowIV = IVPhi; 1924 } 1925 1926 // Implement the interface used by simplifyUsersOfIV. 1927 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1928 }; 1929 1930 } // end anonymous namespace 1931 1932 /// Iteratively perform simplification on a worklist of IV users. Each 1933 /// successive simplification may push more users which may themselves be 1934 /// candidates for simplification. 1935 /// 1936 /// Sign/Zero extend elimination is interleaved with IV simplification. 1937 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1938 SCEVExpander &Rewriter, 1939 LoopInfo *LI) { 1940 SmallVector<WideIVInfo, 8> WideIVs; 1941 1942 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1943 Intrinsic::getName(Intrinsic::experimental_guard)); 1944 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1945 1946 SmallVector<PHINode*, 8> LoopPhis; 1947 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1948 LoopPhis.push_back(cast<PHINode>(I)); 1949 } 1950 // Each round of simplification iterates through the SimplifyIVUsers worklist 1951 // for all current phis, then determines whether any IVs can be 1952 // widened. Widening adds new phis to LoopPhis, inducing another round of 1953 // simplification on the wide IVs. 1954 bool Changed = false; 1955 while (!LoopPhis.empty()) { 1956 // Evaluate as many IV expressions as possible before widening any IVs. This 1957 // forces SCEV to set no-wrap flags before evaluating sign/zero 1958 // extension. The first time SCEV attempts to normalize sign/zero extension, 1959 // the result becomes final. So for the most predictable results, we delay 1960 // evaluation of sign/zero extend evaluation until needed, and avoid running 1961 // other SCEV based analysis prior to simplifyAndExtend. 1962 do { 1963 PHINode *CurrIV = LoopPhis.pop_back_val(); 1964 1965 // Information about sign/zero extensions of CurrIV. 1966 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1967 1968 Changed |= 1969 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1970 1971 if (Visitor.WI.WidestNativeType) { 1972 WideIVs.push_back(Visitor.WI); 1973 } 1974 } while(!LoopPhis.empty()); 1975 1976 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1977 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1978 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1979 Changed = true; 1980 LoopPhis.push_back(WidePhi); 1981 } 1982 } 1983 } 1984 return Changed; 1985 } 1986 1987 //===----------------------------------------------------------------------===// 1988 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1989 //===----------------------------------------------------------------------===// 1990 1991 /// Given an Value which is hoped to be part of an add recurance in the given 1992 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1993 /// that this is less general than SCEVs AddRec checking. 1994 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1995 Instruction *IncI = dyn_cast<Instruction>(IncV); 1996 if (!IncI) 1997 return nullptr; 1998 1999 switch (IncI->getOpcode()) { 2000 case Instruction::Add: 2001 case Instruction::Sub: 2002 break; 2003 case Instruction::GetElementPtr: 2004 // An IV counter must preserve its type. 2005 if (IncI->getNumOperands() == 2) 2006 break; 2007 LLVM_FALLTHROUGH; 2008 default: 2009 return nullptr; 2010 } 2011 2012 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2013 if (Phi && Phi->getParent() == L->getHeader()) { 2014 if (L->isLoopInvariant(IncI->getOperand(1))) 2015 return Phi; 2016 return nullptr; 2017 } 2018 if (IncI->getOpcode() == Instruction::GetElementPtr) 2019 return nullptr; 2020 2021 // Allow add/sub to be commuted. 2022 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2023 if (Phi && Phi->getParent() == L->getHeader()) { 2024 if (L->isLoopInvariant(IncI->getOperand(0))) 2025 return Phi; 2026 } 2027 return nullptr; 2028 } 2029 2030 /// Given a loop with one backedge and one exit, return the ICmpInst 2031 /// controlling the sole loop exit. There is no guarantee that the exiting 2032 /// block is also the latch. 2033 static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) { 2034 2035 BasicBlock *LatchBlock = L->getLoopLatch(); 2036 // Don't bother with LFTR if the loop is not properly simplified. 2037 if (!LatchBlock) 2038 return nullptr; 2039 2040 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2041 return dyn_cast<ICmpInst>(BI->getCondition()); 2042 } 2043 2044 /// linearFunctionTestReplace policy. Return true unless we can show that the 2045 /// current exit test is already sufficiently canonical. 2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2047 // Avoid converting a constant or loop invariant test back to a runtime 2048 // test. This is critical for when SCEV's cached ExitCount is less precise 2049 // than the current IR (such as after we've proven a particular exit is 2050 // actually dead and thus the BE count never reaches our ExitCount.) 2051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2052 if (L->isLoopInvariant(BI->getCondition())) 2053 return false; 2054 2055 // Do LFTR to simplify the exit condition to an ICMP. 2056 ICmpInst *Cond = getLoopTest(L, ExitingBB); 2057 if (!Cond) 2058 return true; 2059 2060 // Do LFTR to simplify the exit ICMP to EQ/NE 2061 ICmpInst::Predicate Pred = Cond->getPredicate(); 2062 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2063 return true; 2064 2065 // Look for a loop invariant RHS 2066 Value *LHS = Cond->getOperand(0); 2067 Value *RHS = Cond->getOperand(1); 2068 if (!L->isLoopInvariant(RHS)) { 2069 if (!L->isLoopInvariant(LHS)) 2070 return true; 2071 std::swap(LHS, RHS); 2072 } 2073 // Look for a simple IV counter LHS 2074 PHINode *Phi = dyn_cast<PHINode>(LHS); 2075 if (!Phi) 2076 Phi = getLoopPhiForCounter(LHS, L); 2077 2078 if (!Phi) 2079 return true; 2080 2081 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2082 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2083 if (Idx < 0) 2084 return true; 2085 2086 // Do LFTR if the exit condition's IV is *not* a simple counter. 2087 Value *IncV = Phi->getIncomingValue(Idx); 2088 return Phi != getLoopPhiForCounter(IncV, L); 2089 } 2090 2091 /// Return true if undefined behavior would provable be executed on the path to 2092 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2093 /// anything about whether OnPathTo is actually executed or whether Root is 2094 /// actually poison. This can be used to assess whether a new use of Root can 2095 /// be added at a location which is control equivalent with OnPathTo (such as 2096 /// immediately before it) without introducing UB which didn't previously 2097 /// exist. Note that a false result conveys no information. 2098 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2099 Instruction *OnPathTo, 2100 DominatorTree *DT) { 2101 // Basic approach is to assume Root is poison, propagate poison forward 2102 // through all users we can easily track, and then check whether any of those 2103 // users are provable UB and must execute before out exiting block might 2104 // exit. 2105 2106 // The set of all recursive users we've visited (which are assumed to all be 2107 // poison because of said visit) 2108 SmallSet<const Value *, 16> KnownPoison; 2109 SmallVector<const Instruction*, 16> Worklist; 2110 Worklist.push_back(Root); 2111 while (!Worklist.empty()) { 2112 const Instruction *I = Worklist.pop_back_val(); 2113 2114 // If we know this must trigger UB on a path leading our target. 2115 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2116 return true; 2117 2118 // If we can't analyze propagation through this instruction, just skip it 2119 // and transitive users. Safe as false is a conservative result. 2120 if (!propagatesFullPoison(I) && I != Root) 2121 continue; 2122 2123 if (KnownPoison.insert(I).second) 2124 for (const User *User : I->users()) 2125 Worklist.push_back(cast<Instruction>(User)); 2126 } 2127 2128 // Might be non-UB, or might have a path we couldn't prove must execute on 2129 // way to exiting bb. 2130 return false; 2131 } 2132 2133 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2134 /// down to checking that all operands are constant and listing instructions 2135 /// that may hide undef. 2136 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2137 unsigned Depth) { 2138 if (isa<Constant>(V)) 2139 return !isa<UndefValue>(V); 2140 2141 if (Depth >= 6) 2142 return false; 2143 2144 // Conservatively handle non-constant non-instructions. For example, Arguments 2145 // may be undef. 2146 Instruction *I = dyn_cast<Instruction>(V); 2147 if (!I) 2148 return false; 2149 2150 // Load and return values may be undef. 2151 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2152 return false; 2153 2154 // Optimistically handle other instructions. 2155 for (Value *Op : I->operands()) { 2156 if (!Visited.insert(Op).second) 2157 continue; 2158 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2159 return false; 2160 } 2161 return true; 2162 } 2163 2164 /// Return true if the given value is concrete. We must prove that undef can 2165 /// never reach it. 2166 /// 2167 /// TODO: If we decide that this is a good approach to checking for undef, we 2168 /// may factor it into a common location. 2169 static bool hasConcreteDef(Value *V) { 2170 SmallPtrSet<Value*, 8> Visited; 2171 Visited.insert(V); 2172 return hasConcreteDefImpl(V, Visited, 0); 2173 } 2174 2175 /// Return true if this IV has any uses other than the (soon to be rewritten) 2176 /// loop exit test. 2177 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2178 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2179 Value *IncV = Phi->getIncomingValue(LatchIdx); 2180 2181 for (User *U : Phi->users()) 2182 if (U != Cond && U != IncV) return false; 2183 2184 for (User *U : IncV->users()) 2185 if (U != Cond && U != Phi) return false; 2186 return true; 2187 } 2188 2189 /// Return true if the given phi is a "counter" in L. A counter is an 2190 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2191 /// step of 1. Note that L must have exactly one latch. 2192 static bool isLoopCounter(PHINode* Phi, Loop *L, 2193 ScalarEvolution *SE) { 2194 assert(Phi->getParent() == L->getHeader()); 2195 assert(L->getLoopLatch()); 2196 2197 if (!SE->isSCEVable(Phi->getType())) 2198 return false; 2199 2200 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2201 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2202 return false; 2203 2204 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2205 if (!Step || !Step->isOne()) 2206 return false; 2207 2208 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2209 Value *IncV = Phi->getIncomingValue(LatchIdx); 2210 return (getLoopPhiForCounter(IncV, L) == Phi); 2211 } 2212 2213 /// Search the loop header for a loop counter (anadd rec w/step of one) 2214 /// suitable for use by LFTR. If multiple counters are available, select the 2215 /// "best" one based profitable heuristics. 2216 /// 2217 /// BECount may be an i8* pointer type. The pointer difference is already 2218 /// valid count without scaling the address stride, so it remains a pointer 2219 /// expression as far as SCEV is concerned. 2220 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2221 const SCEV *BECount, 2222 ScalarEvolution *SE, DominatorTree *DT) { 2223 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2224 2225 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2226 2227 // Loop over all of the PHI nodes, looking for a simple counter. 2228 PHINode *BestPhi = nullptr; 2229 const SCEV *BestInit = nullptr; 2230 BasicBlock *LatchBlock = L->getLoopLatch(); 2231 assert(LatchBlock && "needsLFTR should guarantee a loop latch"); 2232 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2233 2234 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2235 PHINode *Phi = cast<PHINode>(I); 2236 if (!isLoopCounter(Phi, L, SE)) 2237 continue; 2238 2239 // Avoid comparing an integer IV against a pointer Limit. 2240 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2241 continue; 2242 2243 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2244 2245 // AR may be a pointer type, while BECount is an integer type. 2246 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2247 // AR may not be a narrower type, or we may never exit. 2248 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2249 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2250 continue; 2251 2252 // Avoid reusing a potentially undef value to compute other values that may 2253 // have originally had a concrete definition. 2254 if (!hasConcreteDef(Phi)) { 2255 // We explicitly allow unknown phis as long as they are already used by 2256 // the loop exit test. This is legal since performing LFTR could not 2257 // increase the number of undef users. 2258 if (ICmpInst *Cond = getLoopTest(L, ExitingBB)) 2259 if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) && 2260 Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) 2261 continue; 2262 } 2263 2264 // Avoid introducing undefined behavior due to poison which didn't exist in 2265 // the original program. (Annoyingly, the rules for poison and undef 2266 // propagation are distinct, so this does NOT cover the undef case above.) 2267 // We have to ensure that we don't introduce UB by introducing a use on an 2268 // iteration where said IV produces poison. Our strategy here differs for 2269 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2270 // see code in linearFunctionTestReplace. For pointers, we restrict 2271 // transforms as there is no good way to reinfer inbounds once lost. 2272 if (!Phi->getType()->isIntegerTy() && 2273 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2274 continue; 2275 2276 const SCEV *Init = AR->getStart(); 2277 2278 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2279 // Don't force a live loop counter if another IV can be used. 2280 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2281 continue; 2282 2283 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2284 // also prefers integer to pointer IVs. 2285 if (BestInit->isZero() != Init->isZero()) { 2286 if (BestInit->isZero()) 2287 continue; 2288 } 2289 // If two IVs both count from zero or both count from nonzero then the 2290 // narrower is likely a dead phi that has been widened. Use the wider phi 2291 // to allow the other to be eliminated. 2292 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2293 continue; 2294 } 2295 BestPhi = Phi; 2296 BestInit = Init; 2297 } 2298 return BestPhi; 2299 } 2300 2301 /// Insert an IR expression which computes the value held by the IV IndVar 2302 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2303 /// is taken IVCount times. 2304 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2305 const SCEV *IVCount, Loop *L, 2306 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2307 assert(isLoopCounter(IndVar, L, SE)); 2308 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2309 const SCEV *IVInit = AR->getStart(); 2310 2311 // IVInit may be a pointer while IVCount is an integer when FindLoopCounter 2312 // finds a valid pointer IV. Sign extend BECount in order to materialize a 2313 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2314 // the existing GEPs whenever possible. 2315 if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) { 2316 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2317 // signed value. IVCount on the other hand represents the loop trip count, 2318 // which is an unsigned value. FindLoopCounter only allows induction 2319 // variables that have a positive unit stride of one. This means we don't 2320 // have to handle the case of negative offsets (yet) and just need to zero 2321 // extend IVCount. 2322 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2323 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy); 2324 2325 // Expand the code for the iteration count. 2326 assert(SE->isLoopInvariant(IVOffset, L) && 2327 "Computed iteration count is not loop invariant!"); 2328 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2329 Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); 2330 2331 Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); 2332 assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter"); 2333 // We could handle pointer IVs other than i8*, but we need to compensate for 2334 // gep index scaling. 2335 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2336 cast<PointerType>(GEPBase->getType()) 2337 ->getElementType())->isOne() && 2338 "unit stride pointer IV must be i8*"); 2339 2340 IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); 2341 return Builder.CreateGEP(GEPBase->getType()->getPointerElementType(), 2342 GEPBase, GEPOffset, "lftr.limit"); 2343 } else { 2344 // In any other case, convert both IVInit and IVCount to integers before 2345 // comparing. This may result in SCEV expansion of pointers, but in practice 2346 // SCEV will fold the pointer arithmetic away as such: 2347 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2348 // 2349 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2350 // for simple memset-style loops. 2351 // 2352 // IVInit integer and IVCount pointer would only occur if a canonical IV 2353 // were generated on top of case #2, which is not expected. 2354 2355 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2356 const SCEV *IVLimit = nullptr; 2357 // For unit stride, IVCount = Start + BECount with 2's complement overflow. 2358 // For non-zero Start, compute IVCount here. 2359 if (AR->getStart()->isZero()) 2360 IVLimit = IVCount; 2361 else { 2362 const SCEV *IVInit = AR->getStart(); 2363 2364 // For integer IVs, truncate the IV before computing IVInit + BECount. 2365 if (SE->getTypeSizeInBits(IVInit->getType()) 2366 > SE->getTypeSizeInBits(IVCount->getType())) 2367 IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); 2368 2369 IVLimit = SE->getAddExpr(IVInit, IVCount); 2370 } 2371 // Expand the code for the iteration count. 2372 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2373 IRBuilder<> Builder(BI); 2374 assert(SE->isLoopInvariant(IVLimit, L) && 2375 "Computed iteration count is not loop invariant!"); 2376 // Ensure that we generate the same type as IndVar, or a smaller integer 2377 // type. In the presence of null pointer values, we have an integer type 2378 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2379 Type *LimitTy = IVCount->getType()->isPointerTy() ? 2380 IndVar->getType() : IVCount->getType(); 2381 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2382 } 2383 } 2384 2385 /// This method rewrites the exit condition of the loop to be a canonical != 2386 /// comparison against the incremented loop induction variable. This pass is 2387 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2388 /// determine a loop-invariant trip count of the loop, which is actually a much 2389 /// broader range than just linear tests. 2390 bool IndVarSimplify:: 2391 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2392 const SCEV *ExitCount, 2393 PHINode *IndVar, SCEVExpander &Rewriter) { 2394 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2395 assert(isLoopCounter(IndVar, L, SE)); 2396 Instruction * const IncVar = 2397 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2398 2399 // Initialize CmpIndVar and IVCount to their preincremented values. 2400 Value *CmpIndVar = IndVar; 2401 const SCEV *IVCount = ExitCount; 2402 2403 // If the exiting block is the same as the backedge block, we prefer to 2404 // compare against the post-incremented value, otherwise we must compare 2405 // against the preincremented value. 2406 if (ExitingBB == L->getLoopLatch()) { 2407 bool SafeToPostInc = IndVar->getType()->isIntegerTy(); 2408 if (!SafeToPostInc) { 2409 // For pointer IVs, we chose to not strip inbounds which requires us not 2410 // to add a potentially UB introducing use. We need to either a) show 2411 // the loop test we're modifying is already in post-inc form, or b) show 2412 // that adding a use must not introduce UB. 2413 if (ICmpInst *LoopTest = getLoopTest(L, ExitingBB)) 2414 SafeToPostInc = LoopTest->getOperand(0) == IncVar || 2415 LoopTest->getOperand(1) == IncVar; 2416 if (!SafeToPostInc) 2417 SafeToPostInc = 2418 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2419 } 2420 if (SafeToPostInc) { 2421 // Add one to the "backedge-taken" count to get the trip count. 2422 // This addition may overflow, which is valid as long as the comparison 2423 // is truncated to ExitCount->getType(). 2424 IVCount = SE->getAddExpr(ExitCount, 2425 SE->getOne(ExitCount->getType())); 2426 // The BackedgeTaken expression contains the number of times that the 2427 // backedge branches to the loop header. This is one less than the 2428 // number of times the loop executes, so use the incremented indvar. 2429 CmpIndVar = IncVar; 2430 } 2431 } 2432 2433 // It may be necessary to drop nowrap flags on the incrementing instruction 2434 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2435 // case the increment might have previously been poison on the last iteration 2436 // only) or if LFTR switches to a different IV that was previously dynamically 2437 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2438 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2439 // check), because the pre-inc addrec flags may be adopted from the original 2440 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2441 // TODO: This handling is inaccurate for one case: If we switch to a 2442 // dynamically dead IV that wraps on the first loop iteration only, which is 2443 // not covered by the post-inc addrec. (If the new IV was not dynamically 2444 // dead, it could not be poison on the first iteration in the first place.) 2445 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2446 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2447 if (BO->hasNoUnsignedWrap()) 2448 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2449 if (BO->hasNoSignedWrap()) 2450 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2451 } 2452 2453 Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE); 2454 assert(ExitCnt->getType()->isPointerTy() == 2455 IndVar->getType()->isPointerTy() && 2456 "genLoopLimit missed a cast"); 2457 2458 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2459 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2460 ICmpInst::Predicate P; 2461 if (L->contains(BI->getSuccessor(0))) 2462 P = ICmpInst::ICMP_NE; 2463 else 2464 P = ICmpInst::ICMP_EQ; 2465 2466 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2467 << " LHS:" << *CmpIndVar << '\n' 2468 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2469 << "\n" 2470 << " RHS:\t" << *ExitCnt << "\n" 2471 << " IVCount:\t" << *IVCount << "\n"); 2472 2473 IRBuilder<> Builder(BI); 2474 2475 // The new loop exit condition should reuse the debug location of the 2476 // original loop exit condition. 2477 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2478 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2479 2480 // LFTR can ignore IV overflow and truncate to the width of 2481 // BECount. This avoids materializing the add(zext(add)) expression. 2482 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2483 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2484 if (CmpIndVarSize > ExitCntSize) { 2485 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2486 const SCEV *ARStart = AR->getStart(); 2487 const SCEV *ARStep = AR->getStepRecurrence(*SE); 2488 // For constant IVCount, avoid truncation. 2489 if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) { 2490 const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt(); 2491 APInt Count = cast<SCEVConstant>(IVCount)->getAPInt(); 2492 // Note that the post-inc value of ExitCount may have overflowed 2493 // above such that IVCount is now zero. 2494 if (IVCount != ExitCount && Count == 0) { 2495 Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize); 2496 ++Count; 2497 } 2498 else 2499 Count = Count.zext(CmpIndVarSize); 2500 APInt NewLimit; 2501 if (cast<SCEVConstant>(ARStep)->getValue()->isNegative()) 2502 NewLimit = Start - Count; 2503 else 2504 NewLimit = Start + Count; 2505 ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit); 2506 2507 LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n"); 2508 } else { 2509 // We try to extend trip count first. If that doesn't work we truncate IV. 2510 // Zext(trunc(IV)) == IV implies equivalence of the following two: 2511 // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If 2512 // one of the two holds, extend the trip count, otherwise we truncate IV. 2513 bool Extended = false; 2514 const SCEV *IV = SE->getSCEV(CmpIndVar); 2515 const SCEV *ZExtTrunc = 2516 SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2517 ExitCnt->getType()), 2518 CmpIndVar->getType()); 2519 2520 if (ZExtTrunc == IV) { 2521 Extended = true; 2522 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2523 "wide.trip.count"); 2524 } else { 2525 const SCEV *SExtTrunc = 2526 SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2527 ExitCnt->getType()), 2528 CmpIndVar->getType()); 2529 if (SExtTrunc == IV) { 2530 Extended = true; 2531 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2532 "wide.trip.count"); 2533 } 2534 } 2535 2536 if (!Extended) 2537 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2538 "lftr.wideiv"); 2539 } 2540 } 2541 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2542 Value *OrigCond = BI->getCondition(); 2543 // It's tempting to use replaceAllUsesWith here to fully replace the old 2544 // comparison, but that's not immediately safe, since users of the old 2545 // comparison may not be dominated by the new comparison. Instead, just 2546 // update the branch to use the new comparison; in the common case this 2547 // will make old comparison dead. 2548 BI->setCondition(Cond); 2549 DeadInsts.push_back(OrigCond); 2550 2551 ++NumLFTR; 2552 return true; 2553 } 2554 2555 //===----------------------------------------------------------------------===// 2556 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2557 //===----------------------------------------------------------------------===// 2558 2559 /// If there's a single exit block, sink any loop-invariant values that 2560 /// were defined in the preheader but not used inside the loop into the 2561 /// exit block to reduce register pressure in the loop. 2562 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2563 BasicBlock *ExitBlock = L->getExitBlock(); 2564 if (!ExitBlock) return false; 2565 2566 BasicBlock *Preheader = L->getLoopPreheader(); 2567 if (!Preheader) return false; 2568 2569 bool MadeAnyChanges = false; 2570 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2571 BasicBlock::iterator I(Preheader->getTerminator()); 2572 while (I != Preheader->begin()) { 2573 --I; 2574 // New instructions were inserted at the end of the preheader. 2575 if (isa<PHINode>(I)) 2576 break; 2577 2578 // Don't move instructions which might have side effects, since the side 2579 // effects need to complete before instructions inside the loop. Also don't 2580 // move instructions which might read memory, since the loop may modify 2581 // memory. Note that it's okay if the instruction might have undefined 2582 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2583 // block. 2584 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2585 continue; 2586 2587 // Skip debug info intrinsics. 2588 if (isa<DbgInfoIntrinsic>(I)) 2589 continue; 2590 2591 // Skip eh pad instructions. 2592 if (I->isEHPad()) 2593 continue; 2594 2595 // Don't sink alloca: we never want to sink static alloca's out of the 2596 // entry block, and correctly sinking dynamic alloca's requires 2597 // checks for stacksave/stackrestore intrinsics. 2598 // FIXME: Refactor this check somehow? 2599 if (isa<AllocaInst>(I)) 2600 continue; 2601 2602 // Determine if there is a use in or before the loop (direct or 2603 // otherwise). 2604 bool UsedInLoop = false; 2605 for (Use &U : I->uses()) { 2606 Instruction *User = cast<Instruction>(U.getUser()); 2607 BasicBlock *UseBB = User->getParent(); 2608 if (PHINode *P = dyn_cast<PHINode>(User)) { 2609 unsigned i = 2610 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2611 UseBB = P->getIncomingBlock(i); 2612 } 2613 if (UseBB == Preheader || L->contains(UseBB)) { 2614 UsedInLoop = true; 2615 break; 2616 } 2617 } 2618 2619 // If there is, the def must remain in the preheader. 2620 if (UsedInLoop) 2621 continue; 2622 2623 // Otherwise, sink it to the exit block. 2624 Instruction *ToMove = &*I; 2625 bool Done = false; 2626 2627 if (I != Preheader->begin()) { 2628 // Skip debug info intrinsics. 2629 do { 2630 --I; 2631 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2632 2633 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2634 Done = true; 2635 } else { 2636 Done = true; 2637 } 2638 2639 MadeAnyChanges = true; 2640 ToMove->moveBefore(*ExitBlock, InsertPt); 2641 if (Done) break; 2642 InsertPt = ToMove->getIterator(); 2643 } 2644 2645 return MadeAnyChanges; 2646 } 2647 2648 //===----------------------------------------------------------------------===// 2649 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2650 //===----------------------------------------------------------------------===// 2651 2652 bool IndVarSimplify::run(Loop *L) { 2653 // We need (and expect!) the incoming loop to be in LCSSA. 2654 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2655 "LCSSA required to run indvars!"); 2656 bool Changed = false; 2657 2658 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2659 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2660 // canonicalization can be a pessimization without LSR to "clean up" 2661 // afterwards. 2662 // - We depend on having a preheader; in particular, 2663 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2664 // and we're in trouble if we can't find the induction variable even when 2665 // we've manually inserted one. 2666 // - LFTR relies on having a single backedge. 2667 if (!L->isLoopSimplifyForm()) 2668 return false; 2669 2670 // If there are any floating-point recurrences, attempt to 2671 // transform them to use integer recurrences. 2672 Changed |= rewriteNonIntegerIVs(L); 2673 2674 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2675 2676 // Create a rewriter object which we'll use to transform the code with. 2677 SCEVExpander Rewriter(*SE, DL, "indvars"); 2678 #ifndef NDEBUG 2679 Rewriter.setDebugType(DEBUG_TYPE); 2680 #endif 2681 2682 // Eliminate redundant IV users. 2683 // 2684 // Simplification works best when run before other consumers of SCEV. We 2685 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2686 // other expressions involving loop IVs have been evaluated. This helps SCEV 2687 // set no-wrap flags before normalizing sign/zero extension. 2688 Rewriter.disableCanonicalMode(); 2689 Changed |= simplifyAndExtend(L, Rewriter, LI); 2690 2691 // Check to see if this loop has a computable loop-invariant execution count. 2692 // If so, this means that we can compute the final value of any expressions 2693 // that are recurrent in the loop, and substitute the exit values from the 2694 // loop into any instructions outside of the loop that use the final values of 2695 // the current expressions. 2696 // 2697 if (ReplaceExitValue != NeverRepl && 2698 !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 2699 Changed |= rewriteLoopExitValues(L, Rewriter); 2700 2701 // Eliminate redundant IV cycles. 2702 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2703 2704 // If we have a trip count expression, rewrite the loop's exit condition 2705 // using it. 2706 if (!DisableLFTR) { 2707 SmallVector<BasicBlock*, 16> ExitingBlocks; 2708 L->getExitingBlocks(ExitingBlocks); 2709 for (BasicBlock *ExitingBB : ExitingBlocks) { 2710 // Can't rewrite non-branch yet. 2711 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2712 continue; 2713 2714 // If our exitting block exits multiple loops, we can only rewrite the 2715 // innermost one. Otherwise, we're changing how many times the innermost 2716 // loop runs before it exits. 2717 if (LI->getLoopFor(ExitingBB) != L) 2718 continue; 2719 2720 if (!needsLFTR(L, ExitingBB)) 2721 continue; 2722 2723 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2724 if (isa<SCEVCouldNotCompute>(ExitCount)) 2725 continue; 2726 2727 // If we know we'd exit on the first iteration, rewrite the exit to 2728 // reflect this. This does not imply the loop must exit through this 2729 // exit; there may be an earlier one taken on the first iteration. 2730 // TODO: Given we know the backedge can't be taken, we should go ahead 2731 // and break it. Or at least, kill all the header phis and simplify. 2732 if (ExitCount->isZero()) { 2733 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2734 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2735 auto *NewCond = ExitIfTrue ? 2736 ConstantInt::getTrue(BI->getCondition()->getType()) : 2737 ConstantInt::getFalse(BI->getCondition()->getType()); 2738 BI->setCondition(NewCond); 2739 Changed = true; 2740 continue; 2741 } 2742 2743 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2744 if (!IndVar) 2745 continue; 2746 2747 // Avoid high cost expansions. Note: This heuristic is questionable in 2748 // that our definition of "high cost" is not exactly principled. 2749 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2750 continue; 2751 2752 // Check preconditions for proper SCEVExpander operation. SCEV does not 2753 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2754 // any pass that uses the SCEVExpander must do it. This does not work 2755 // well for loop passes because SCEVExpander makes assumptions about 2756 // all loops, while LoopPassManager only forces the current loop to be 2757 // simplified. 2758 // 2759 // FIXME: SCEV expansion has no way to bail out, so the caller must 2760 // explicitly check any assumptions made by SCEV. Brittle. 2761 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2762 if (!AR || AR->getLoop()->getLoopPreheader()) 2763 Changed |= linearFunctionTestReplace(L, ExitingBB, 2764 ExitCount, IndVar, 2765 Rewriter); 2766 } 2767 } 2768 // Clear the rewriter cache, because values that are in the rewriter's cache 2769 // can be deleted in the loop below, causing the AssertingVH in the cache to 2770 // trigger. 2771 Rewriter.clear(); 2772 2773 // Now that we're done iterating through lists, clean up any instructions 2774 // which are now dead. 2775 while (!DeadInsts.empty()) 2776 if (Instruction *Inst = 2777 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2778 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2779 2780 // The Rewriter may not be used from this point on. 2781 2782 // Loop-invariant instructions in the preheader that aren't used in the 2783 // loop may be sunk below the loop to reduce register pressure. 2784 Changed |= sinkUnusedInvariants(L); 2785 2786 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2787 // trip count and therefore can further simplify exit values in addition to 2788 // rewriteLoopExitValues. 2789 Changed |= rewriteFirstIterationLoopExitValues(L); 2790 2791 // Clean up dead instructions. 2792 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2793 2794 // Check a post-condition. 2795 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2796 "Indvars did not preserve LCSSA!"); 2797 2798 // Verify that LFTR, and any other change have not interfered with SCEV's 2799 // ability to compute trip count. 2800 #ifndef NDEBUG 2801 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2802 SE->forgetLoop(L); 2803 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2804 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2805 SE->getTypeSizeInBits(NewBECount->getType())) 2806 NewBECount = SE->getTruncateOrNoop(NewBECount, 2807 BackedgeTakenCount->getType()); 2808 else 2809 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2810 NewBECount->getType()); 2811 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2812 } 2813 #endif 2814 2815 return Changed; 2816 } 2817 2818 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2819 LoopStandardAnalysisResults &AR, 2820 LPMUpdater &) { 2821 Function *F = L.getHeader()->getParent(); 2822 const DataLayout &DL = F->getParent()->getDataLayout(); 2823 2824 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2825 if (!IVS.run(&L)) 2826 return PreservedAnalyses::all(); 2827 2828 auto PA = getLoopPassPreservedAnalyses(); 2829 PA.preserveSet<CFGAnalyses>(); 2830 return PA; 2831 } 2832 2833 namespace { 2834 2835 struct IndVarSimplifyLegacyPass : public LoopPass { 2836 static char ID; // Pass identification, replacement for typeid 2837 2838 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2839 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2840 } 2841 2842 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2843 if (skipLoop(L)) 2844 return false; 2845 2846 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2847 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2848 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2849 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2850 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2851 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2852 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2853 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2854 2855 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2856 return IVS.run(L); 2857 } 2858 2859 void getAnalysisUsage(AnalysisUsage &AU) const override { 2860 AU.setPreservesCFG(); 2861 getLoopAnalysisUsage(AU); 2862 } 2863 }; 2864 2865 } // end anonymous namespace 2866 2867 char IndVarSimplifyLegacyPass::ID = 0; 2868 2869 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2870 "Induction Variable Simplification", false, false) 2871 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2872 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2873 "Induction Variable Simplification", false, false) 2874 2875 Pass *llvm::createIndVarSimplifyPass() { 2876 return new IndVarSimplifyLegacyPass(); 2877 } 2878