1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 namespace { 128 129 struct RewritePhi; 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 139 SmallVector<WeakTrackingVH, 16> DeadInsts; 140 141 bool isValidRewrite(Value *FromVal, Value *ToVal); 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 bool optimizeLoopExits(Loop *L); 148 149 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 150 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 151 bool rewriteFirstIterationLoopExitValues(Loop *L); 152 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 153 154 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 155 const SCEV *ExitCount, 156 PHINode *IndVar, SCEVExpander &Rewriter); 157 158 bool sinkUnusedInvariants(Loop *L); 159 160 public: 161 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 162 const DataLayout &DL, TargetLibraryInfo *TLI, 163 TargetTransformInfo *TTI) 164 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 165 166 bool run(Loop *L); 167 }; 168 169 } // end anonymous namespace 170 171 /// Return true if the SCEV expansion generated by the rewriter can replace the 172 /// original value. SCEV guarantees that it produces the same value, but the way 173 /// it is produced may be illegal IR. Ideally, this function will only be 174 /// called for verification. 175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 176 // If an SCEV expression subsumed multiple pointers, its expansion could 177 // reassociate the GEP changing the base pointer. This is illegal because the 178 // final address produced by a GEP chain must be inbounds relative to its 179 // underlying object. Otherwise basic alias analysis, among other things, 180 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 181 // producing an expression involving multiple pointers. Until then, we must 182 // bail out here. 183 // 184 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 185 // because it understands lcssa phis while SCEV does not. 186 Value *FromPtr = FromVal; 187 Value *ToPtr = ToVal; 188 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 189 FromPtr = GEP->getPointerOperand(); 190 } 191 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 192 ToPtr = GEP->getPointerOperand(); 193 } 194 if (FromPtr != FromVal || ToPtr != ToVal) { 195 // Quickly check the common case 196 if (FromPtr == ToPtr) 197 return true; 198 199 // SCEV may have rewritten an expression that produces the GEP's pointer 200 // operand. That's ok as long as the pointer operand has the same base 201 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 202 // base of a recurrence. This handles the case in which SCEV expansion 203 // converts a pointer type recurrence into a nonrecurrent pointer base 204 // indexed by an integer recurrence. 205 206 // If the GEP base pointer is a vector of pointers, abort. 207 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 208 return false; 209 210 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 211 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 212 if (FromBase == ToBase) 213 return true; 214 215 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 216 << " != " << *ToBase << "\n"); 217 218 return false; 219 } 220 return true; 221 } 222 223 /// Determine the insertion point for this user. By default, insert immediately 224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 226 /// common dominator for the incoming blocks. A nullptr can be returned if no 227 /// viable location is found: it may happen if User is a PHI and Def only comes 228 /// to this PHI from unreachable blocks. 229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 230 DominatorTree *DT, LoopInfo *LI) { 231 PHINode *PHI = dyn_cast<PHINode>(User); 232 if (!PHI) 233 return User; 234 235 Instruction *InsertPt = nullptr; 236 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 237 if (PHI->getIncomingValue(i) != Def) 238 continue; 239 240 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 241 242 if (!DT->isReachableFromEntry(InsertBB)) 243 continue; 244 245 if (!InsertPt) { 246 InsertPt = InsertBB->getTerminator(); 247 continue; 248 } 249 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 250 InsertPt = InsertBB->getTerminator(); 251 } 252 253 // If we have skipped all inputs, it means that Def only comes to Phi from 254 // unreachable blocks. 255 if (!InsertPt) 256 return nullptr; 257 258 auto *DefI = dyn_cast<Instruction>(Def); 259 if (!DefI) 260 return InsertPt; 261 262 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 263 264 auto *L = LI->getLoopFor(DefI->getParent()); 265 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 266 267 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 268 if (LI->getLoopFor(DTN->getBlock()) == L) 269 return DTN->getBlock()->getTerminator(); 270 271 llvm_unreachable("DefI dominates InsertPt!"); 272 } 273 274 //===----------------------------------------------------------------------===// 275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 276 //===----------------------------------------------------------------------===// 277 278 /// Convert APF to an integer, if possible. 279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 280 bool isExact = false; 281 // See if we can convert this to an int64_t 282 uint64_t UIntVal; 283 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 284 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 285 !isExact) 286 return false; 287 IntVal = UIntVal; 288 return true; 289 } 290 291 /// If the loop has floating induction variable then insert corresponding 292 /// integer induction variable if possible. 293 /// For example, 294 /// for(double i = 0; i < 10000; ++i) 295 /// bar(i) 296 /// is converted into 297 /// for(int i = 0; i < 10000; ++i) 298 /// bar((double)i); 299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 300 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 301 unsigned BackEdge = IncomingEdge^1; 302 303 // Check incoming value. 304 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 305 306 int64_t InitValue; 307 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 308 return false; 309 310 // Check IV increment. Reject this PN if increment operation is not 311 // an add or increment value can not be represented by an integer. 312 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 313 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 314 315 // If this is not an add of the PHI with a constantfp, or if the constant fp 316 // is not an integer, bail out. 317 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 318 int64_t IncValue; 319 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 320 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 321 return false; 322 323 // Check Incr uses. One user is PN and the other user is an exit condition 324 // used by the conditional terminator. 325 Value::user_iterator IncrUse = Incr->user_begin(); 326 Instruction *U1 = cast<Instruction>(*IncrUse++); 327 if (IncrUse == Incr->user_end()) return false; 328 Instruction *U2 = cast<Instruction>(*IncrUse++); 329 if (IncrUse != Incr->user_end()) return false; 330 331 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 332 // only used by a branch, we can't transform it. 333 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 334 if (!Compare) 335 Compare = dyn_cast<FCmpInst>(U2); 336 if (!Compare || !Compare->hasOneUse() || 337 !isa<BranchInst>(Compare->user_back())) 338 return false; 339 340 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 341 342 // We need to verify that the branch actually controls the iteration count 343 // of the loop. If not, the new IV can overflow and no one will notice. 344 // The branch block must be in the loop and one of the successors must be out 345 // of the loop. 346 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 347 if (!L->contains(TheBr->getParent()) || 348 (L->contains(TheBr->getSuccessor(0)) && 349 L->contains(TheBr->getSuccessor(1)))) 350 return false; 351 352 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 353 // transform it. 354 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 355 int64_t ExitValue; 356 if (ExitValueVal == nullptr || 357 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 358 return false; 359 360 // Find new predicate for integer comparison. 361 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 362 switch (Compare->getPredicate()) { 363 default: return false; // Unknown comparison. 364 case CmpInst::FCMP_OEQ: 365 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 366 case CmpInst::FCMP_ONE: 367 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 368 case CmpInst::FCMP_OGT: 369 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 370 case CmpInst::FCMP_OGE: 371 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 372 case CmpInst::FCMP_OLT: 373 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 374 case CmpInst::FCMP_OLE: 375 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 376 } 377 378 // We convert the floating point induction variable to a signed i32 value if 379 // we can. This is only safe if the comparison will not overflow in a way 380 // that won't be trapped by the integer equivalent operations. Check for this 381 // now. 382 // TODO: We could use i64 if it is native and the range requires it. 383 384 // The start/stride/exit values must all fit in signed i32. 385 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 386 return false; 387 388 // If not actually striding (add x, 0.0), avoid touching the code. 389 if (IncValue == 0) 390 return false; 391 392 // Positive and negative strides have different safety conditions. 393 if (IncValue > 0) { 394 // If we have a positive stride, we require the init to be less than the 395 // exit value. 396 if (InitValue >= ExitValue) 397 return false; 398 399 uint32_t Range = uint32_t(ExitValue-InitValue); 400 // Check for infinite loop, either: 401 // while (i <= Exit) or until (i > Exit) 402 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 403 if (++Range == 0) return false; // Range overflows. 404 } 405 406 unsigned Leftover = Range % uint32_t(IncValue); 407 408 // If this is an equality comparison, we require that the strided value 409 // exactly land on the exit value, otherwise the IV condition will wrap 410 // around and do things the fp IV wouldn't. 411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 412 Leftover != 0) 413 return false; 414 415 // If the stride would wrap around the i32 before exiting, we can't 416 // transform the IV. 417 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 418 return false; 419 } else { 420 // If we have a negative stride, we require the init to be greater than the 421 // exit value. 422 if (InitValue <= ExitValue) 423 return false; 424 425 uint32_t Range = uint32_t(InitValue-ExitValue); 426 // Check for infinite loop, either: 427 // while (i >= Exit) or until (i < Exit) 428 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 429 if (++Range == 0) return false; // Range overflows. 430 } 431 432 unsigned Leftover = Range % uint32_t(-IncValue); 433 434 // If this is an equality comparison, we require that the strided value 435 // exactly land on the exit value, otherwise the IV condition will wrap 436 // around and do things the fp IV wouldn't. 437 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 438 Leftover != 0) 439 return false; 440 441 // If the stride would wrap around the i32 before exiting, we can't 442 // transform the IV. 443 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 444 return false; 445 } 446 447 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 448 449 // Insert new integer induction variable. 450 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 451 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 452 PN->getIncomingBlock(IncomingEdge)); 453 454 Value *NewAdd = 455 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 456 Incr->getName()+".int", Incr); 457 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 458 459 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 460 ConstantInt::get(Int32Ty, ExitValue), 461 Compare->getName()); 462 463 // In the following deletions, PN may become dead and may be deleted. 464 // Use a WeakTrackingVH to observe whether this happens. 465 WeakTrackingVH WeakPH = PN; 466 467 // Delete the old floating point exit comparison. The branch starts using the 468 // new comparison. 469 NewCompare->takeName(Compare); 470 Compare->replaceAllUsesWith(NewCompare); 471 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 472 473 // Delete the old floating point increment. 474 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 475 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 476 477 // If the FP induction variable still has uses, this is because something else 478 // in the loop uses its value. In order to canonicalize the induction 479 // variable, we chose to eliminate the IV and rewrite it in terms of an 480 // int->fp cast. 481 // 482 // We give preference to sitofp over uitofp because it is faster on most 483 // platforms. 484 if (WeakPH) { 485 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 486 &*PN->getParent()->getFirstInsertionPt()); 487 PN->replaceAllUsesWith(Conv); 488 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 489 } 490 return true; 491 } 492 493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 494 // First step. Check to see if there are any floating-point recurrences. 495 // If there are, change them into integer recurrences, permitting analysis by 496 // the SCEV routines. 497 BasicBlock *Header = L->getHeader(); 498 499 SmallVector<WeakTrackingVH, 8> PHIs; 500 for (PHINode &PN : Header->phis()) 501 PHIs.push_back(&PN); 502 503 bool Changed = false; 504 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 505 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 506 Changed |= handleFloatingPointIV(L, PN); 507 508 // If the loop previously had floating-point IV, ScalarEvolution 509 // may not have been able to compute a trip count. Now that we've done some 510 // re-writing, the trip count may be computable. 511 if (Changed) 512 SE->forgetLoop(L); 513 return Changed; 514 } 515 516 namespace { 517 518 // Collect information about PHI nodes which can be transformed in 519 // rewriteLoopExitValues. 520 struct RewritePhi { 521 PHINode *PN; 522 523 // Ith incoming value. 524 unsigned Ith; 525 526 // Exit value after expansion. 527 Value *Val; 528 529 // High Cost when expansion. 530 bool HighCost; 531 532 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 533 : PN(P), Ith(I), Val(V), HighCost(H) {} 534 }; 535 536 } // end anonymous namespace 537 538 //===----------------------------------------------------------------------===// 539 // rewriteLoopExitValues - Optimize IV users outside the loop. 540 // As a side effect, reduces the amount of IV processing within the loop. 541 //===----------------------------------------------------------------------===// 542 543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 544 SmallPtrSet<const Instruction *, 8> Visited; 545 SmallVector<const Instruction *, 8> WorkList; 546 Visited.insert(I); 547 WorkList.push_back(I); 548 while (!WorkList.empty()) { 549 const Instruction *Curr = WorkList.pop_back_val(); 550 // This use is outside the loop, nothing to do. 551 if (!L->contains(Curr)) 552 continue; 553 // Do we assume it is a "hard" use which will not be eliminated easily? 554 if (Curr->mayHaveSideEffects()) 555 return true; 556 // Otherwise, add all its users to worklist. 557 for (auto U : Curr->users()) { 558 auto *UI = cast<Instruction>(U); 559 if (Visited.insert(UI).second) 560 WorkList.push_back(UI); 561 } 562 } 563 return false; 564 } 565 566 /// Check to see if this loop has a computable loop-invariant execution count. 567 /// If so, this means that we can compute the final value of any expressions 568 /// that are recurrent in the loop, and substitute the exit values from the loop 569 /// into any instructions outside of the loop that use the final values of the 570 /// current expressions. 571 /// 572 /// This is mostly redundant with the regular IndVarSimplify activities that 573 /// happen later, except that it's more powerful in some cases, because it's 574 /// able to brute-force evaluate arbitrary instructions as long as they have 575 /// constant operands at the beginning of the loop. 576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 577 // Check a pre-condition. 578 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 579 "Indvars did not preserve LCSSA!"); 580 581 SmallVector<BasicBlock*, 8> ExitBlocks; 582 L->getUniqueExitBlocks(ExitBlocks); 583 584 SmallVector<RewritePhi, 8> RewritePhiSet; 585 // Find all values that are computed inside the loop, but used outside of it. 586 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 587 // the exit blocks of the loop to find them. 588 for (BasicBlock *ExitBB : ExitBlocks) { 589 // If there are no PHI nodes in this exit block, then no values defined 590 // inside the loop are used on this path, skip it. 591 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 592 if (!PN) continue; 593 594 unsigned NumPreds = PN->getNumIncomingValues(); 595 596 // Iterate over all of the PHI nodes. 597 BasicBlock::iterator BBI = ExitBB->begin(); 598 while ((PN = dyn_cast<PHINode>(BBI++))) { 599 if (PN->use_empty()) 600 continue; // dead use, don't replace it 601 602 if (!SE->isSCEVable(PN->getType())) 603 continue; 604 605 // It's necessary to tell ScalarEvolution about this explicitly so that 606 // it can walk the def-use list and forget all SCEVs, as it may not be 607 // watching the PHI itself. Once the new exit value is in place, there 608 // may not be a def-use connection between the loop and every instruction 609 // which got a SCEVAddRecExpr for that loop. 610 SE->forgetValue(PN); 611 612 // Iterate over all of the values in all the PHI nodes. 613 for (unsigned i = 0; i != NumPreds; ++i) { 614 // If the value being merged in is not integer or is not defined 615 // in the loop, skip it. 616 Value *InVal = PN->getIncomingValue(i); 617 if (!isa<Instruction>(InVal)) 618 continue; 619 620 // If this pred is for a subloop, not L itself, skip it. 621 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 622 continue; // The Block is in a subloop, skip it. 623 624 // Check that InVal is defined in the loop. 625 Instruction *Inst = cast<Instruction>(InVal); 626 if (!L->contains(Inst)) 627 continue; 628 629 // Okay, this instruction has a user outside of the current loop 630 // and varies predictably *inside* the loop. Evaluate the value it 631 // contains when the loop exits, if possible. 632 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 633 if (isa<SCEVCouldNotCompute>(ExitValue) || 634 !SE->isLoopInvariant(ExitValue, L) || 635 !isSafeToExpand(ExitValue, *SE)) 636 continue; 637 638 // Computing the value outside of the loop brings no benefit if it is 639 // definitely used inside the loop in a way which can not be optimized 640 // away. Avoid doing so unless we know we have a value which computes 641 // the ExitValue already. TODO: This should be merged into SCEV 642 // expander to leverage its knowledge of existing expressions. 643 if (ReplaceExitValue != AlwaysRepl && 644 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 645 hasHardUserWithinLoop(L, Inst)) 646 continue; 647 648 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 649 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 650 651 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 652 << '\n' 653 << " LoopVal = " << *Inst << "\n"); 654 655 if (!isValidRewrite(Inst, ExitVal)) { 656 DeadInsts.push_back(ExitVal); 657 continue; 658 } 659 660 #ifndef NDEBUG 661 // If we reuse an instruction from a loop which is neither L nor one of 662 // its containing loops, we end up breaking LCSSA form for this loop by 663 // creating a new use of its instruction. 664 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 665 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 666 if (EVL != L) 667 assert(EVL->contains(L) && "LCSSA breach detected!"); 668 #endif 669 670 // Collect all the candidate PHINodes to be rewritten. 671 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 672 } 673 } 674 } 675 676 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 677 678 bool Changed = false; 679 // Transformation. 680 for (const RewritePhi &Phi : RewritePhiSet) { 681 PHINode *PN = Phi.PN; 682 Value *ExitVal = Phi.Val; 683 684 // Only do the rewrite when the ExitValue can be expanded cheaply. 685 // If LoopCanBeDel is true, rewrite exit value aggressively. 686 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 687 DeadInsts.push_back(ExitVal); 688 continue; 689 } 690 691 Changed = true; 692 ++NumReplaced; 693 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 694 PN->setIncomingValue(Phi.Ith, ExitVal); 695 696 // If this instruction is dead now, delete it. Don't do it now to avoid 697 // invalidating iterators. 698 if (isInstructionTriviallyDead(Inst, TLI)) 699 DeadInsts.push_back(Inst); 700 701 // Replace PN with ExitVal if that is legal and does not break LCSSA. 702 if (PN->getNumIncomingValues() == 1 && 703 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 704 PN->replaceAllUsesWith(ExitVal); 705 PN->eraseFromParent(); 706 } 707 } 708 709 // The insertion point instruction may have been deleted; clear it out 710 // so that the rewriter doesn't trip over it later. 711 Rewriter.clearInsertPoint(); 712 return Changed; 713 } 714 715 //===---------------------------------------------------------------------===// 716 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 717 // they will exit at the first iteration. 718 //===---------------------------------------------------------------------===// 719 720 /// Check to see if this loop has loop invariant conditions which lead to loop 721 /// exits. If so, we know that if the exit path is taken, it is at the first 722 /// loop iteration. This lets us predict exit values of PHI nodes that live in 723 /// loop header. 724 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 725 // Verify the input to the pass is already in LCSSA form. 726 assert(L->isLCSSAForm(*DT)); 727 728 SmallVector<BasicBlock *, 8> ExitBlocks; 729 L->getUniqueExitBlocks(ExitBlocks); 730 731 bool MadeAnyChanges = false; 732 for (auto *ExitBB : ExitBlocks) { 733 // If there are no more PHI nodes in this exit block, then no more 734 // values defined inside the loop are used on this path. 735 for (PHINode &PN : ExitBB->phis()) { 736 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 737 IncomingValIdx != E; ++IncomingValIdx) { 738 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 739 740 // Can we prove that the exit must run on the first iteration if it 741 // runs at all? (i.e. early exits are fine for our purposes, but 742 // traces which lead to this exit being taken on the 2nd iteration 743 // aren't.) Note that this is about whether the exit branch is 744 // executed, not about whether it is taken. 745 if (!L->getLoopLatch() || 746 !DT->dominates(IncomingBB, L->getLoopLatch())) 747 continue; 748 749 // Get condition that leads to the exit path. 750 auto *TermInst = IncomingBB->getTerminator(); 751 752 Value *Cond = nullptr; 753 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 754 // Must be a conditional branch, otherwise the block 755 // should not be in the loop. 756 Cond = BI->getCondition(); 757 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 758 Cond = SI->getCondition(); 759 else 760 continue; 761 762 if (!L->isLoopInvariant(Cond)) 763 continue; 764 765 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 766 767 // Only deal with PHIs in the loop header. 768 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 769 continue; 770 771 // If ExitVal is a PHI on the loop header, then we know its 772 // value along this exit because the exit can only be taken 773 // on the first iteration. 774 auto *LoopPreheader = L->getLoopPreheader(); 775 assert(LoopPreheader && "Invalid loop"); 776 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 777 if (PreheaderIdx != -1) { 778 assert(ExitVal->getParent() == L->getHeader() && 779 "ExitVal must be in loop header"); 780 MadeAnyChanges = true; 781 PN.setIncomingValue(IncomingValIdx, 782 ExitVal->getIncomingValue(PreheaderIdx)); 783 } 784 } 785 } 786 } 787 return MadeAnyChanges; 788 } 789 790 /// Check whether it is possible to delete the loop after rewriting exit 791 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 792 /// aggressively. 793 bool IndVarSimplify::canLoopBeDeleted( 794 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 795 BasicBlock *Preheader = L->getLoopPreheader(); 796 // If there is no preheader, the loop will not be deleted. 797 if (!Preheader) 798 return false; 799 800 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 801 // We obviate multiple ExitingBlocks case for simplicity. 802 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 803 // after exit value rewriting, we can enhance the logic here. 804 SmallVector<BasicBlock *, 4> ExitingBlocks; 805 L->getExitingBlocks(ExitingBlocks); 806 SmallVector<BasicBlock *, 8> ExitBlocks; 807 L->getUniqueExitBlocks(ExitBlocks); 808 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 809 return false; 810 811 BasicBlock *ExitBlock = ExitBlocks[0]; 812 BasicBlock::iterator BI = ExitBlock->begin(); 813 while (PHINode *P = dyn_cast<PHINode>(BI)) { 814 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 815 816 // If the Incoming value of P is found in RewritePhiSet, we know it 817 // could be rewritten to use a loop invariant value in transformation 818 // phase later. Skip it in the loop invariant check below. 819 bool found = false; 820 for (const RewritePhi &Phi : RewritePhiSet) { 821 unsigned i = Phi.Ith; 822 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 823 found = true; 824 break; 825 } 826 } 827 828 Instruction *I; 829 if (!found && (I = dyn_cast<Instruction>(Incoming))) 830 if (!L->hasLoopInvariantOperands(I)) 831 return false; 832 833 ++BI; 834 } 835 836 for (auto *BB : L->blocks()) 837 if (llvm::any_of(*BB, [](Instruction &I) { 838 return I.mayHaveSideEffects(); 839 })) 840 return false; 841 842 return true; 843 } 844 845 //===----------------------------------------------------------------------===// 846 // IV Widening - Extend the width of an IV to cover its widest uses. 847 //===----------------------------------------------------------------------===// 848 849 namespace { 850 851 // Collect information about induction variables that are used by sign/zero 852 // extend operations. This information is recorded by CollectExtend and provides 853 // the input to WidenIV. 854 struct WideIVInfo { 855 PHINode *NarrowIV = nullptr; 856 857 // Widest integer type created [sz]ext 858 Type *WidestNativeType = nullptr; 859 860 // Was a sext user seen before a zext? 861 bool IsSigned = false; 862 }; 863 864 } // end anonymous namespace 865 866 /// Update information about the induction variable that is extended by this 867 /// sign or zero extend operation. This is used to determine the final width of 868 /// the IV before actually widening it. 869 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 870 const TargetTransformInfo *TTI) { 871 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 872 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 873 return; 874 875 Type *Ty = Cast->getType(); 876 uint64_t Width = SE->getTypeSizeInBits(Ty); 877 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 878 return; 879 880 // Check that `Cast` actually extends the induction variable (we rely on this 881 // later). This takes care of cases where `Cast` is extending a truncation of 882 // the narrow induction variable, and thus can end up being narrower than the 883 // "narrow" induction variable. 884 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 885 if (NarrowIVWidth >= Width) 886 return; 887 888 // Cast is either an sext or zext up to this point. 889 // We should not widen an indvar if arithmetics on the wider indvar are more 890 // expensive than those on the narrower indvar. We check only the cost of ADD 891 // because at least an ADD is required to increment the induction variable. We 892 // could compute more comprehensively the cost of all instructions on the 893 // induction variable when necessary. 894 if (TTI && 895 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 896 TTI->getArithmeticInstrCost(Instruction::Add, 897 Cast->getOperand(0)->getType())) { 898 return; 899 } 900 901 if (!WI.WidestNativeType) { 902 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 903 WI.IsSigned = IsSigned; 904 return; 905 } 906 907 // We extend the IV to satisfy the sign of its first user, arbitrarily. 908 if (WI.IsSigned != IsSigned) 909 return; 910 911 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 912 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 913 } 914 915 namespace { 916 917 /// Record a link in the Narrow IV def-use chain along with the WideIV that 918 /// computes the same value as the Narrow IV def. This avoids caching Use* 919 /// pointers. 920 struct NarrowIVDefUse { 921 Instruction *NarrowDef = nullptr; 922 Instruction *NarrowUse = nullptr; 923 Instruction *WideDef = nullptr; 924 925 // True if the narrow def is never negative. Tracking this information lets 926 // us use a sign extension instead of a zero extension or vice versa, when 927 // profitable and legal. 928 bool NeverNegative = false; 929 930 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 931 bool NeverNegative) 932 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 933 NeverNegative(NeverNegative) {} 934 }; 935 936 /// The goal of this transform is to remove sign and zero extends without 937 /// creating any new induction variables. To do this, it creates a new phi of 938 /// the wider type and redirects all users, either removing extends or inserting 939 /// truncs whenever we stop propagating the type. 940 class WidenIV { 941 // Parameters 942 PHINode *OrigPhi; 943 Type *WideType; 944 945 // Context 946 LoopInfo *LI; 947 Loop *L; 948 ScalarEvolution *SE; 949 DominatorTree *DT; 950 951 // Does the module have any calls to the llvm.experimental.guard intrinsic 952 // at all? If not we can avoid scanning instructions looking for guards. 953 bool HasGuards; 954 955 // Result 956 PHINode *WidePhi = nullptr; 957 Instruction *WideInc = nullptr; 958 const SCEV *WideIncExpr = nullptr; 959 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 960 961 SmallPtrSet<Instruction *,16> Widened; 962 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 963 964 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 965 966 // A map tracking the kind of extension used to widen each narrow IV 967 // and narrow IV user. 968 // Key: pointer to a narrow IV or IV user. 969 // Value: the kind of extension used to widen this Instruction. 970 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 971 972 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 973 974 // A map with control-dependent ranges for post increment IV uses. The key is 975 // a pair of IV def and a use of this def denoting the context. The value is 976 // a ConstantRange representing possible values of the def at the given 977 // context. 978 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 979 980 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 981 Instruction *UseI) { 982 DefUserPair Key(Def, UseI); 983 auto It = PostIncRangeInfos.find(Key); 984 return It == PostIncRangeInfos.end() 985 ? Optional<ConstantRange>(None) 986 : Optional<ConstantRange>(It->second); 987 } 988 989 void calculatePostIncRanges(PHINode *OrigPhi); 990 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 991 992 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 993 DefUserPair Key(Def, UseI); 994 auto It = PostIncRangeInfos.find(Key); 995 if (It == PostIncRangeInfos.end()) 996 PostIncRangeInfos.insert({Key, R}); 997 else 998 It->second = R.intersectWith(It->second); 999 } 1000 1001 public: 1002 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1003 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1004 bool HasGuards) 1005 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1006 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1007 HasGuards(HasGuards), DeadInsts(DI) { 1008 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1009 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1010 } 1011 1012 PHINode *createWideIV(SCEVExpander &Rewriter); 1013 1014 protected: 1015 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1016 Instruction *Use); 1017 1018 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1019 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1020 const SCEVAddRecExpr *WideAR); 1021 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1022 1023 ExtendKind getExtendKind(Instruction *I); 1024 1025 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1026 1027 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1028 1029 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1030 1031 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1032 unsigned OpCode) const; 1033 1034 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1035 1036 bool widenLoopCompare(NarrowIVDefUse DU); 1037 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1038 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1039 1040 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1041 }; 1042 1043 } // end anonymous namespace 1044 1045 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1046 bool IsSigned, Instruction *Use) { 1047 // Set the debug location and conservative insertion point. 1048 IRBuilder<> Builder(Use); 1049 // Hoist the insertion point into loop preheaders as far as possible. 1050 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1051 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1052 L = L->getParentLoop()) 1053 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1054 1055 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1056 Builder.CreateZExt(NarrowOper, WideType); 1057 } 1058 1059 /// Instantiate a wide operation to replace a narrow operation. This only needs 1060 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1061 /// 0 for any operation we decide not to clone. 1062 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1063 const SCEVAddRecExpr *WideAR) { 1064 unsigned Opcode = DU.NarrowUse->getOpcode(); 1065 switch (Opcode) { 1066 default: 1067 return nullptr; 1068 case Instruction::Add: 1069 case Instruction::Mul: 1070 case Instruction::UDiv: 1071 case Instruction::Sub: 1072 return cloneArithmeticIVUser(DU, WideAR); 1073 1074 case Instruction::And: 1075 case Instruction::Or: 1076 case Instruction::Xor: 1077 case Instruction::Shl: 1078 case Instruction::LShr: 1079 case Instruction::AShr: 1080 return cloneBitwiseIVUser(DU); 1081 } 1082 } 1083 1084 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1085 Instruction *NarrowUse = DU.NarrowUse; 1086 Instruction *NarrowDef = DU.NarrowDef; 1087 Instruction *WideDef = DU.WideDef; 1088 1089 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1090 1091 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1092 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1093 // invariant and will be folded or hoisted. If it actually comes from a 1094 // widened IV, it should be removed during a future call to widenIVUse. 1095 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1096 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1097 ? WideDef 1098 : createExtendInst(NarrowUse->getOperand(0), WideType, 1099 IsSigned, NarrowUse); 1100 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1101 ? WideDef 1102 : createExtendInst(NarrowUse->getOperand(1), WideType, 1103 IsSigned, NarrowUse); 1104 1105 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1106 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1107 NarrowBO->getName()); 1108 IRBuilder<> Builder(NarrowUse); 1109 Builder.Insert(WideBO); 1110 WideBO->copyIRFlags(NarrowBO); 1111 return WideBO; 1112 } 1113 1114 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1115 const SCEVAddRecExpr *WideAR) { 1116 Instruction *NarrowUse = DU.NarrowUse; 1117 Instruction *NarrowDef = DU.NarrowDef; 1118 Instruction *WideDef = DU.WideDef; 1119 1120 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1121 1122 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1123 1124 // We're trying to find X such that 1125 // 1126 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1127 // 1128 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1129 // and check using SCEV if any of them are correct. 1130 1131 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1132 // correct solution to X. 1133 auto GuessNonIVOperand = [&](bool SignExt) { 1134 const SCEV *WideLHS; 1135 const SCEV *WideRHS; 1136 1137 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1138 if (SignExt) 1139 return SE->getSignExtendExpr(S, Ty); 1140 return SE->getZeroExtendExpr(S, Ty); 1141 }; 1142 1143 if (IVOpIdx == 0) { 1144 WideLHS = SE->getSCEV(WideDef); 1145 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1146 WideRHS = GetExtend(NarrowRHS, WideType); 1147 } else { 1148 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1149 WideLHS = GetExtend(NarrowLHS, WideType); 1150 WideRHS = SE->getSCEV(WideDef); 1151 } 1152 1153 // WideUse is "WideDef `op.wide` X" as described in the comment. 1154 const SCEV *WideUse = nullptr; 1155 1156 switch (NarrowUse->getOpcode()) { 1157 default: 1158 llvm_unreachable("No other possibility!"); 1159 1160 case Instruction::Add: 1161 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1162 break; 1163 1164 case Instruction::Mul: 1165 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1166 break; 1167 1168 case Instruction::UDiv: 1169 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1170 break; 1171 1172 case Instruction::Sub: 1173 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1174 break; 1175 } 1176 1177 return WideUse == WideAR; 1178 }; 1179 1180 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1181 if (!GuessNonIVOperand(SignExtend)) { 1182 SignExtend = !SignExtend; 1183 if (!GuessNonIVOperand(SignExtend)) 1184 return nullptr; 1185 } 1186 1187 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1188 ? WideDef 1189 : createExtendInst(NarrowUse->getOperand(0), WideType, 1190 SignExtend, NarrowUse); 1191 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1192 ? WideDef 1193 : createExtendInst(NarrowUse->getOperand(1), WideType, 1194 SignExtend, NarrowUse); 1195 1196 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1197 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1198 NarrowBO->getName()); 1199 1200 IRBuilder<> Builder(NarrowUse); 1201 Builder.Insert(WideBO); 1202 WideBO->copyIRFlags(NarrowBO); 1203 return WideBO; 1204 } 1205 1206 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1207 auto It = ExtendKindMap.find(I); 1208 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1209 return It->second; 1210 } 1211 1212 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1213 unsigned OpCode) const { 1214 if (OpCode == Instruction::Add) 1215 return SE->getAddExpr(LHS, RHS); 1216 if (OpCode == Instruction::Sub) 1217 return SE->getMinusSCEV(LHS, RHS); 1218 if (OpCode == Instruction::Mul) 1219 return SE->getMulExpr(LHS, RHS); 1220 1221 llvm_unreachable("Unsupported opcode."); 1222 } 1223 1224 /// No-wrap operations can transfer sign extension of their result to their 1225 /// operands. Generate the SCEV value for the widened operation without 1226 /// actually modifying the IR yet. If the expression after extending the 1227 /// operands is an AddRec for this loop, return the AddRec and the kind of 1228 /// extension used. 1229 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1230 // Handle the common case of add<nsw/nuw> 1231 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1232 // Only Add/Sub/Mul instructions supported yet. 1233 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1234 OpCode != Instruction::Mul) 1235 return {nullptr, Unknown}; 1236 1237 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1238 // if extending the other will lead to a recurrence. 1239 const unsigned ExtendOperIdx = 1240 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1241 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1242 1243 const SCEV *ExtendOperExpr = nullptr; 1244 const OverflowingBinaryOperator *OBO = 1245 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1246 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1247 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1248 ExtendOperExpr = SE->getSignExtendExpr( 1249 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1250 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1251 ExtendOperExpr = SE->getZeroExtendExpr( 1252 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1253 else 1254 return {nullptr, Unknown}; 1255 1256 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1257 // flags. This instruction may be guarded by control flow that the no-wrap 1258 // behavior depends on. Non-control-equivalent instructions can be mapped to 1259 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1260 // semantics to those operations. 1261 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1262 const SCEV *rhs = ExtendOperExpr; 1263 1264 // Let's swap operands to the initial order for the case of non-commutative 1265 // operations, like SUB. See PR21014. 1266 if (ExtendOperIdx == 0) 1267 std::swap(lhs, rhs); 1268 const SCEVAddRecExpr *AddRec = 1269 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1270 1271 if (!AddRec || AddRec->getLoop() != L) 1272 return {nullptr, Unknown}; 1273 1274 return {AddRec, ExtKind}; 1275 } 1276 1277 /// Is this instruction potentially interesting for further simplification after 1278 /// widening it's type? In other words, can the extend be safely hoisted out of 1279 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1280 /// so, return the extended recurrence and the kind of extension used. Otherwise 1281 /// return {nullptr, Unknown}. 1282 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1283 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1284 return {nullptr, Unknown}; 1285 1286 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1287 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1288 SE->getTypeSizeInBits(WideType)) { 1289 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1290 // index. So don't follow this use. 1291 return {nullptr, Unknown}; 1292 } 1293 1294 const SCEV *WideExpr; 1295 ExtendKind ExtKind; 1296 if (DU.NeverNegative) { 1297 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1298 if (isa<SCEVAddRecExpr>(WideExpr)) 1299 ExtKind = SignExtended; 1300 else { 1301 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1302 ExtKind = ZeroExtended; 1303 } 1304 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1305 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1306 ExtKind = SignExtended; 1307 } else { 1308 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1309 ExtKind = ZeroExtended; 1310 } 1311 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1312 if (!AddRec || AddRec->getLoop() != L) 1313 return {nullptr, Unknown}; 1314 return {AddRec, ExtKind}; 1315 } 1316 1317 /// This IV user cannot be widened. Replace this use of the original narrow IV 1318 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1319 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1320 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1321 if (!InsertPt) 1322 return; 1323 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1324 << *DU.NarrowUse << "\n"); 1325 IRBuilder<> Builder(InsertPt); 1326 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1327 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1328 } 1329 1330 /// If the narrow use is a compare instruction, then widen the compare 1331 // (and possibly the other operand). The extend operation is hoisted into the 1332 // loop preheader as far as possible. 1333 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1334 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1335 if (!Cmp) 1336 return false; 1337 1338 // We can legally widen the comparison in the following two cases: 1339 // 1340 // - The signedness of the IV extension and comparison match 1341 // 1342 // - The narrow IV is always positive (and thus its sign extension is equal 1343 // to its zero extension). For instance, let's say we're zero extending 1344 // %narrow for the following use 1345 // 1346 // icmp slt i32 %narrow, %val ... (A) 1347 // 1348 // and %narrow is always positive. Then 1349 // 1350 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1351 // == icmp slt i32 zext(%narrow), sext(%val) 1352 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1353 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1354 return false; 1355 1356 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1357 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1358 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1359 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1360 1361 // Widen the compare instruction. 1362 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1363 if (!InsertPt) 1364 return false; 1365 IRBuilder<> Builder(InsertPt); 1366 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1367 1368 // Widen the other operand of the compare, if necessary. 1369 if (CastWidth < IVWidth) { 1370 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1371 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1372 } 1373 return true; 1374 } 1375 1376 /// If the narrow use is an instruction whose two operands are the defining 1377 /// instruction of DU and a load instruction, then we have the following: 1378 /// if the load is hoisted outside the loop, then we do not reach this function 1379 /// as scalar evolution analysis works fine in widenIVUse with variables 1380 /// hoisted outside the loop and efficient code is subsequently generated by 1381 /// not emitting truncate instructions. But when the load is not hoisted 1382 /// (whether due to limitation in alias analysis or due to a true legality), 1383 /// then scalar evolution can not proceed with loop variant values and 1384 /// inefficient code is generated. This function handles the non-hoisted load 1385 /// special case by making the optimization generate the same type of code for 1386 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1387 /// instruction). This special case is important especially when the induction 1388 /// variables are affecting addressing mode in code generation. 1389 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1390 Instruction *NarrowUse = DU.NarrowUse; 1391 Instruction *NarrowDef = DU.NarrowDef; 1392 Instruction *WideDef = DU.WideDef; 1393 1394 // Handle the common case of add<nsw/nuw> 1395 const unsigned OpCode = NarrowUse->getOpcode(); 1396 // Only Add/Sub/Mul instructions are supported. 1397 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1398 OpCode != Instruction::Mul) 1399 return false; 1400 1401 // The operand that is not defined by NarrowDef of DU. Let's call it the 1402 // other operand. 1403 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1404 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1405 "bad DU"); 1406 1407 const SCEV *ExtendOperExpr = nullptr; 1408 const OverflowingBinaryOperator *OBO = 1409 cast<OverflowingBinaryOperator>(NarrowUse); 1410 ExtendKind ExtKind = getExtendKind(NarrowDef); 1411 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1412 ExtendOperExpr = SE->getSignExtendExpr( 1413 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1414 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1415 ExtendOperExpr = SE->getZeroExtendExpr( 1416 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1417 else 1418 return false; 1419 1420 // We are interested in the other operand being a load instruction. 1421 // But, we should look into relaxing this restriction later on. 1422 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1423 if (I && I->getOpcode() != Instruction::Load) 1424 return false; 1425 1426 // Verifying that Defining operand is an AddRec 1427 const SCEV *Op1 = SE->getSCEV(WideDef); 1428 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1429 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1430 return false; 1431 // Verifying that other operand is an Extend. 1432 if (ExtKind == SignExtended) { 1433 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1434 return false; 1435 } else { 1436 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1437 return false; 1438 } 1439 1440 if (ExtKind == SignExtended) { 1441 for (Use &U : NarrowUse->uses()) { 1442 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1443 if (!User || User->getType() != WideType) 1444 return false; 1445 } 1446 } else { // ExtKind == ZeroExtended 1447 for (Use &U : NarrowUse->uses()) { 1448 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1449 if (!User || User->getType() != WideType) 1450 return false; 1451 } 1452 } 1453 1454 return true; 1455 } 1456 1457 /// Special Case for widening with variant Loads (see 1458 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1459 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1460 Instruction *NarrowUse = DU.NarrowUse; 1461 Instruction *NarrowDef = DU.NarrowDef; 1462 Instruction *WideDef = DU.WideDef; 1463 1464 ExtendKind ExtKind = getExtendKind(NarrowDef); 1465 1466 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1467 1468 // Generating a widening use instruction. 1469 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1470 ? WideDef 1471 : createExtendInst(NarrowUse->getOperand(0), WideType, 1472 ExtKind, NarrowUse); 1473 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1474 ? WideDef 1475 : createExtendInst(NarrowUse->getOperand(1), WideType, 1476 ExtKind, NarrowUse); 1477 1478 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1479 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1480 NarrowBO->getName()); 1481 IRBuilder<> Builder(NarrowUse); 1482 Builder.Insert(WideBO); 1483 WideBO->copyIRFlags(NarrowBO); 1484 1485 if (ExtKind == SignExtended) 1486 ExtendKindMap[NarrowUse] = SignExtended; 1487 else 1488 ExtendKindMap[NarrowUse] = ZeroExtended; 1489 1490 // Update the Use. 1491 if (ExtKind == SignExtended) { 1492 for (Use &U : NarrowUse->uses()) { 1493 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1494 if (User && User->getType() == WideType) { 1495 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1496 << *WideBO << "\n"); 1497 ++NumElimExt; 1498 User->replaceAllUsesWith(WideBO); 1499 DeadInsts.emplace_back(User); 1500 } 1501 } 1502 } else { // ExtKind == ZeroExtended 1503 for (Use &U : NarrowUse->uses()) { 1504 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1505 if (User && User->getType() == WideType) { 1506 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1507 << *WideBO << "\n"); 1508 ++NumElimExt; 1509 User->replaceAllUsesWith(WideBO); 1510 DeadInsts.emplace_back(User); 1511 } 1512 } 1513 } 1514 } 1515 1516 /// Determine whether an individual user of the narrow IV can be widened. If so, 1517 /// return the wide clone of the user. 1518 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1519 assert(ExtendKindMap.count(DU.NarrowDef) && 1520 "Should already know the kind of extension used to widen NarrowDef"); 1521 1522 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1523 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1524 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1525 // For LCSSA phis, sink the truncate outside the loop. 1526 // After SimplifyCFG most loop exit targets have a single predecessor. 1527 // Otherwise fall back to a truncate within the loop. 1528 if (UsePhi->getNumOperands() != 1) 1529 truncateIVUse(DU, DT, LI); 1530 else { 1531 // Widening the PHI requires us to insert a trunc. The logical place 1532 // for this trunc is in the same BB as the PHI. This is not possible if 1533 // the BB is terminated by a catchswitch. 1534 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1535 return nullptr; 1536 1537 PHINode *WidePhi = 1538 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1539 UsePhi); 1540 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1541 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1542 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1543 UsePhi->replaceAllUsesWith(Trunc); 1544 DeadInsts.emplace_back(UsePhi); 1545 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1546 << *WidePhi << "\n"); 1547 } 1548 return nullptr; 1549 } 1550 } 1551 1552 // This narrow use can be widened by a sext if it's non-negative or its narrow 1553 // def was widended by a sext. Same for zext. 1554 auto canWidenBySExt = [&]() { 1555 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1556 }; 1557 auto canWidenByZExt = [&]() { 1558 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1559 }; 1560 1561 // Our raison d'etre! Eliminate sign and zero extension. 1562 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1563 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1564 Value *NewDef = DU.WideDef; 1565 if (DU.NarrowUse->getType() != WideType) { 1566 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1567 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1568 if (CastWidth < IVWidth) { 1569 // The cast isn't as wide as the IV, so insert a Trunc. 1570 IRBuilder<> Builder(DU.NarrowUse); 1571 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1572 } 1573 else { 1574 // A wider extend was hidden behind a narrower one. This may induce 1575 // another round of IV widening in which the intermediate IV becomes 1576 // dead. It should be very rare. 1577 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1578 << " not wide enough to subsume " << *DU.NarrowUse 1579 << "\n"); 1580 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1581 NewDef = DU.NarrowUse; 1582 } 1583 } 1584 if (NewDef != DU.NarrowUse) { 1585 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1586 << " replaced by " << *DU.WideDef << "\n"); 1587 ++NumElimExt; 1588 DU.NarrowUse->replaceAllUsesWith(NewDef); 1589 DeadInsts.emplace_back(DU.NarrowUse); 1590 } 1591 // Now that the extend is gone, we want to expose it's uses for potential 1592 // further simplification. We don't need to directly inform SimplifyIVUsers 1593 // of the new users, because their parent IV will be processed later as a 1594 // new loop phi. If we preserved IVUsers analysis, we would also want to 1595 // push the uses of WideDef here. 1596 1597 // No further widening is needed. The deceased [sz]ext had done it for us. 1598 return nullptr; 1599 } 1600 1601 // Does this user itself evaluate to a recurrence after widening? 1602 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1603 if (!WideAddRec.first) 1604 WideAddRec = getWideRecurrence(DU); 1605 1606 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1607 if (!WideAddRec.first) { 1608 // If use is a loop condition, try to promote the condition instead of 1609 // truncating the IV first. 1610 if (widenLoopCompare(DU)) 1611 return nullptr; 1612 1613 // We are here about to generate a truncate instruction that may hurt 1614 // performance because the scalar evolution expression computed earlier 1615 // in WideAddRec.first does not indicate a polynomial induction expression. 1616 // In that case, look at the operands of the use instruction to determine 1617 // if we can still widen the use instead of truncating its operand. 1618 if (widenWithVariantLoadUse(DU)) { 1619 widenWithVariantLoadUseCodegen(DU); 1620 return nullptr; 1621 } 1622 1623 // This user does not evaluate to a recurrence after widening, so don't 1624 // follow it. Instead insert a Trunc to kill off the original use, 1625 // eventually isolating the original narrow IV so it can be removed. 1626 truncateIVUse(DU, DT, LI); 1627 return nullptr; 1628 } 1629 // Assume block terminators cannot evaluate to a recurrence. We can't to 1630 // insert a Trunc after a terminator if there happens to be a critical edge. 1631 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1632 "SCEV is not expected to evaluate a block terminator"); 1633 1634 // Reuse the IV increment that SCEVExpander created as long as it dominates 1635 // NarrowUse. 1636 Instruction *WideUse = nullptr; 1637 if (WideAddRec.first == WideIncExpr && 1638 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1639 WideUse = WideInc; 1640 else { 1641 WideUse = cloneIVUser(DU, WideAddRec.first); 1642 if (!WideUse) 1643 return nullptr; 1644 } 1645 // Evaluation of WideAddRec ensured that the narrow expression could be 1646 // extended outside the loop without overflow. This suggests that the wide use 1647 // evaluates to the same expression as the extended narrow use, but doesn't 1648 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1649 // where it fails, we simply throw away the newly created wide use. 1650 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1651 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1652 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1653 << "\n"); 1654 DeadInsts.emplace_back(WideUse); 1655 return nullptr; 1656 } 1657 1658 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1659 // Returning WideUse pushes it on the worklist. 1660 return WideUse; 1661 } 1662 1663 /// Add eligible users of NarrowDef to NarrowIVUsers. 1664 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1665 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1666 bool NonNegativeDef = 1667 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1668 SE->getConstant(NarrowSCEV->getType(), 0)); 1669 for (User *U : NarrowDef->users()) { 1670 Instruction *NarrowUser = cast<Instruction>(U); 1671 1672 // Handle data flow merges and bizarre phi cycles. 1673 if (!Widened.insert(NarrowUser).second) 1674 continue; 1675 1676 bool NonNegativeUse = false; 1677 if (!NonNegativeDef) { 1678 // We might have a control-dependent range information for this context. 1679 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1680 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1681 } 1682 1683 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1684 NonNegativeDef || NonNegativeUse); 1685 } 1686 } 1687 1688 /// Process a single induction variable. First use the SCEVExpander to create a 1689 /// wide induction variable that evaluates to the same recurrence as the 1690 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1691 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1692 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1693 /// 1694 /// It would be simpler to delete uses as they are processed, but we must avoid 1695 /// invalidating SCEV expressions. 1696 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1697 // Is this phi an induction variable? 1698 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1699 if (!AddRec) 1700 return nullptr; 1701 1702 // Widen the induction variable expression. 1703 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1704 ? SE->getSignExtendExpr(AddRec, WideType) 1705 : SE->getZeroExtendExpr(AddRec, WideType); 1706 1707 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1708 "Expect the new IV expression to preserve its type"); 1709 1710 // Can the IV be extended outside the loop without overflow? 1711 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1712 if (!AddRec || AddRec->getLoop() != L) 1713 return nullptr; 1714 1715 // An AddRec must have loop-invariant operands. Since this AddRec is 1716 // materialized by a loop header phi, the expression cannot have any post-loop 1717 // operands, so they must dominate the loop header. 1718 assert( 1719 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1720 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1721 "Loop header phi recurrence inputs do not dominate the loop"); 1722 1723 // Iterate over IV uses (including transitive ones) looking for IV increments 1724 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1725 // the increment calculate control-dependent range information basing on 1726 // dominating conditions inside of the loop (e.g. a range check inside of the 1727 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1728 // 1729 // Control-dependent range information is later used to prove that a narrow 1730 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1731 // this on demand because when pushNarrowIVUsers needs this information some 1732 // of the dominating conditions might be already widened. 1733 if (UsePostIncrementRanges) 1734 calculatePostIncRanges(OrigPhi); 1735 1736 // The rewriter provides a value for the desired IV expression. This may 1737 // either find an existing phi or materialize a new one. Either way, we 1738 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1739 // of the phi-SCC dominates the loop entry. 1740 Instruction *InsertPt = &L->getHeader()->front(); 1741 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1742 1743 // Remembering the WideIV increment generated by SCEVExpander allows 1744 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1745 // employ a general reuse mechanism because the call above is the only call to 1746 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1747 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1748 WideInc = 1749 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1750 WideIncExpr = SE->getSCEV(WideInc); 1751 // Propagate the debug location associated with the original loop increment 1752 // to the new (widened) increment. 1753 auto *OrigInc = 1754 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1755 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1756 } 1757 1758 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1759 ++NumWidened; 1760 1761 // Traverse the def-use chain using a worklist starting at the original IV. 1762 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1763 1764 Widened.insert(OrigPhi); 1765 pushNarrowIVUsers(OrigPhi, WidePhi); 1766 1767 while (!NarrowIVUsers.empty()) { 1768 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1769 1770 // Process a def-use edge. This may replace the use, so don't hold a 1771 // use_iterator across it. 1772 Instruction *WideUse = widenIVUse(DU, Rewriter); 1773 1774 // Follow all def-use edges from the previous narrow use. 1775 if (WideUse) 1776 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1777 1778 // widenIVUse may have removed the def-use edge. 1779 if (DU.NarrowDef->use_empty()) 1780 DeadInsts.emplace_back(DU.NarrowDef); 1781 } 1782 1783 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1784 // evaluate the same recurrence, we can just copy the debug info over. 1785 SmallVector<DbgValueInst *, 1> DbgValues; 1786 llvm::findDbgValues(DbgValues, OrigPhi); 1787 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1788 ValueAsMetadata::get(WidePhi)); 1789 for (auto &DbgValue : DbgValues) 1790 DbgValue->setOperand(0, MDPhi); 1791 return WidePhi; 1792 } 1793 1794 /// Calculates control-dependent range for the given def at the given context 1795 /// by looking at dominating conditions inside of the loop 1796 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1797 Instruction *NarrowUser) { 1798 using namespace llvm::PatternMatch; 1799 1800 Value *NarrowDefLHS; 1801 const APInt *NarrowDefRHS; 1802 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1803 m_APInt(NarrowDefRHS))) || 1804 !NarrowDefRHS->isNonNegative()) 1805 return; 1806 1807 auto UpdateRangeFromCondition = [&] (Value *Condition, 1808 bool TrueDest) { 1809 CmpInst::Predicate Pred; 1810 Value *CmpRHS; 1811 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1812 m_Value(CmpRHS)))) 1813 return; 1814 1815 CmpInst::Predicate P = 1816 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1817 1818 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1819 auto CmpConstrainedLHSRange = 1820 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1821 auto NarrowDefRange = 1822 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1823 1824 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1825 }; 1826 1827 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1828 if (!HasGuards) 1829 return; 1830 1831 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1832 Ctx->getParent()->rend())) { 1833 Value *C = nullptr; 1834 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1835 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1836 } 1837 }; 1838 1839 UpdateRangeFromGuards(NarrowUser); 1840 1841 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1842 // If NarrowUserBB is statically unreachable asking dominator queries may 1843 // yield surprising results. (e.g. the block may not have a dom tree node) 1844 if (!DT->isReachableFromEntry(NarrowUserBB)) 1845 return; 1846 1847 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1848 L->contains(DTB->getBlock()); 1849 DTB = DTB->getIDom()) { 1850 auto *BB = DTB->getBlock(); 1851 auto *TI = BB->getTerminator(); 1852 UpdateRangeFromGuards(TI); 1853 1854 auto *BI = dyn_cast<BranchInst>(TI); 1855 if (!BI || !BI->isConditional()) 1856 continue; 1857 1858 auto *TrueSuccessor = BI->getSuccessor(0); 1859 auto *FalseSuccessor = BI->getSuccessor(1); 1860 1861 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1862 return BBE.isSingleEdge() && 1863 DT->dominates(BBE, NarrowUser->getParent()); 1864 }; 1865 1866 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1867 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1868 1869 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1870 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1871 } 1872 } 1873 1874 /// Calculates PostIncRangeInfos map for the given IV 1875 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1876 SmallPtrSet<Instruction *, 16> Visited; 1877 SmallVector<Instruction *, 6> Worklist; 1878 Worklist.push_back(OrigPhi); 1879 Visited.insert(OrigPhi); 1880 1881 while (!Worklist.empty()) { 1882 Instruction *NarrowDef = Worklist.pop_back_val(); 1883 1884 for (Use &U : NarrowDef->uses()) { 1885 auto *NarrowUser = cast<Instruction>(U.getUser()); 1886 1887 // Don't go looking outside the current loop. 1888 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1889 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1890 continue; 1891 1892 if (!Visited.insert(NarrowUser).second) 1893 continue; 1894 1895 Worklist.push_back(NarrowUser); 1896 1897 calculatePostIncRange(NarrowDef, NarrowUser); 1898 } 1899 } 1900 } 1901 1902 //===----------------------------------------------------------------------===// 1903 // Live IV Reduction - Minimize IVs live across the loop. 1904 //===----------------------------------------------------------------------===// 1905 1906 //===----------------------------------------------------------------------===// 1907 // Simplification of IV users based on SCEV evaluation. 1908 //===----------------------------------------------------------------------===// 1909 1910 namespace { 1911 1912 class IndVarSimplifyVisitor : public IVVisitor { 1913 ScalarEvolution *SE; 1914 const TargetTransformInfo *TTI; 1915 PHINode *IVPhi; 1916 1917 public: 1918 WideIVInfo WI; 1919 1920 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1921 const TargetTransformInfo *TTI, 1922 const DominatorTree *DTree) 1923 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1924 DT = DTree; 1925 WI.NarrowIV = IVPhi; 1926 } 1927 1928 // Implement the interface used by simplifyUsersOfIV. 1929 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1930 }; 1931 1932 } // end anonymous namespace 1933 1934 /// Iteratively perform simplification on a worklist of IV users. Each 1935 /// successive simplification may push more users which may themselves be 1936 /// candidates for simplification. 1937 /// 1938 /// Sign/Zero extend elimination is interleaved with IV simplification. 1939 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1940 SCEVExpander &Rewriter, 1941 LoopInfo *LI) { 1942 SmallVector<WideIVInfo, 8> WideIVs; 1943 1944 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1945 Intrinsic::getName(Intrinsic::experimental_guard)); 1946 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1947 1948 SmallVector<PHINode*, 8> LoopPhis; 1949 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1950 LoopPhis.push_back(cast<PHINode>(I)); 1951 } 1952 // Each round of simplification iterates through the SimplifyIVUsers worklist 1953 // for all current phis, then determines whether any IVs can be 1954 // widened. Widening adds new phis to LoopPhis, inducing another round of 1955 // simplification on the wide IVs. 1956 bool Changed = false; 1957 while (!LoopPhis.empty()) { 1958 // Evaluate as many IV expressions as possible before widening any IVs. This 1959 // forces SCEV to set no-wrap flags before evaluating sign/zero 1960 // extension. The first time SCEV attempts to normalize sign/zero extension, 1961 // the result becomes final. So for the most predictable results, we delay 1962 // evaluation of sign/zero extend evaluation until needed, and avoid running 1963 // other SCEV based analysis prior to simplifyAndExtend. 1964 do { 1965 PHINode *CurrIV = LoopPhis.pop_back_val(); 1966 1967 // Information about sign/zero extensions of CurrIV. 1968 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1969 1970 Changed |= 1971 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1972 1973 if (Visitor.WI.WidestNativeType) { 1974 WideIVs.push_back(Visitor.WI); 1975 } 1976 } while(!LoopPhis.empty()); 1977 1978 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1979 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1980 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1981 Changed = true; 1982 LoopPhis.push_back(WidePhi); 1983 } 1984 } 1985 } 1986 return Changed; 1987 } 1988 1989 //===----------------------------------------------------------------------===// 1990 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1991 //===----------------------------------------------------------------------===// 1992 1993 /// Given an Value which is hoped to be part of an add recurance in the given 1994 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1995 /// that this is less general than SCEVs AddRec checking. 1996 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1997 Instruction *IncI = dyn_cast<Instruction>(IncV); 1998 if (!IncI) 1999 return nullptr; 2000 2001 switch (IncI->getOpcode()) { 2002 case Instruction::Add: 2003 case Instruction::Sub: 2004 break; 2005 case Instruction::GetElementPtr: 2006 // An IV counter must preserve its type. 2007 if (IncI->getNumOperands() == 2) 2008 break; 2009 LLVM_FALLTHROUGH; 2010 default: 2011 return nullptr; 2012 } 2013 2014 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2015 if (Phi && Phi->getParent() == L->getHeader()) { 2016 if (L->isLoopInvariant(IncI->getOperand(1))) 2017 return Phi; 2018 return nullptr; 2019 } 2020 if (IncI->getOpcode() == Instruction::GetElementPtr) 2021 return nullptr; 2022 2023 // Allow add/sub to be commuted. 2024 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2025 if (Phi && Phi->getParent() == L->getHeader()) { 2026 if (L->isLoopInvariant(IncI->getOperand(0))) 2027 return Phi; 2028 } 2029 return nullptr; 2030 } 2031 2032 /// Whether the current loop exit test is based on this value. Currently this 2033 /// is limited to a direct use in the loop condition. 2034 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2035 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2036 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2037 // TODO: Allow non-icmp loop test. 2038 if (!ICmp) 2039 return false; 2040 2041 // TODO: Allow indirect use. 2042 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2043 } 2044 2045 /// linearFunctionTestReplace policy. Return true unless we can show that the 2046 /// current exit test is already sufficiently canonical. 2047 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2048 assert(L->getLoopLatch() && "Must be in simplified form"); 2049 2050 // Avoid converting a constant or loop invariant test back to a runtime 2051 // test. This is critical for when SCEV's cached ExitCount is less precise 2052 // than the current IR (such as after we've proven a particular exit is 2053 // actually dead and thus the BE count never reaches our ExitCount.) 2054 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2055 if (L->isLoopInvariant(BI->getCondition())) 2056 return false; 2057 2058 // Do LFTR to simplify the exit condition to an ICMP. 2059 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2060 if (!Cond) 2061 return true; 2062 2063 // Do LFTR to simplify the exit ICMP to EQ/NE 2064 ICmpInst::Predicate Pred = Cond->getPredicate(); 2065 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2066 return true; 2067 2068 // Look for a loop invariant RHS 2069 Value *LHS = Cond->getOperand(0); 2070 Value *RHS = Cond->getOperand(1); 2071 if (!L->isLoopInvariant(RHS)) { 2072 if (!L->isLoopInvariant(LHS)) 2073 return true; 2074 std::swap(LHS, RHS); 2075 } 2076 // Look for a simple IV counter LHS 2077 PHINode *Phi = dyn_cast<PHINode>(LHS); 2078 if (!Phi) 2079 Phi = getLoopPhiForCounter(LHS, L); 2080 2081 if (!Phi) 2082 return true; 2083 2084 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2085 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2086 if (Idx < 0) 2087 return true; 2088 2089 // Do LFTR if the exit condition's IV is *not* a simple counter. 2090 Value *IncV = Phi->getIncomingValue(Idx); 2091 return Phi != getLoopPhiForCounter(IncV, L); 2092 } 2093 2094 /// Return true if undefined behavior would provable be executed on the path to 2095 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2096 /// anything about whether OnPathTo is actually executed or whether Root is 2097 /// actually poison. This can be used to assess whether a new use of Root can 2098 /// be added at a location which is control equivalent with OnPathTo (such as 2099 /// immediately before it) without introducing UB which didn't previously 2100 /// exist. Note that a false result conveys no information. 2101 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2102 Instruction *OnPathTo, 2103 DominatorTree *DT) { 2104 // Basic approach is to assume Root is poison, propagate poison forward 2105 // through all users we can easily track, and then check whether any of those 2106 // users are provable UB and must execute before out exiting block might 2107 // exit. 2108 2109 // The set of all recursive users we've visited (which are assumed to all be 2110 // poison because of said visit) 2111 SmallSet<const Value *, 16> KnownPoison; 2112 SmallVector<const Instruction*, 16> Worklist; 2113 Worklist.push_back(Root); 2114 while (!Worklist.empty()) { 2115 const Instruction *I = Worklist.pop_back_val(); 2116 2117 // If we know this must trigger UB on a path leading our target. 2118 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2119 return true; 2120 2121 // If we can't analyze propagation through this instruction, just skip it 2122 // and transitive users. Safe as false is a conservative result. 2123 if (!propagatesFullPoison(I) && I != Root) 2124 continue; 2125 2126 if (KnownPoison.insert(I).second) 2127 for (const User *User : I->users()) 2128 Worklist.push_back(cast<Instruction>(User)); 2129 } 2130 2131 // Might be non-UB, or might have a path we couldn't prove must execute on 2132 // way to exiting bb. 2133 return false; 2134 } 2135 2136 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2137 /// down to checking that all operands are constant and listing instructions 2138 /// that may hide undef. 2139 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2140 unsigned Depth) { 2141 if (isa<Constant>(V)) 2142 return !isa<UndefValue>(V); 2143 2144 if (Depth >= 6) 2145 return false; 2146 2147 // Conservatively handle non-constant non-instructions. For example, Arguments 2148 // may be undef. 2149 Instruction *I = dyn_cast<Instruction>(V); 2150 if (!I) 2151 return false; 2152 2153 // Load and return values may be undef. 2154 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2155 return false; 2156 2157 // Optimistically handle other instructions. 2158 for (Value *Op : I->operands()) { 2159 if (!Visited.insert(Op).second) 2160 continue; 2161 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2162 return false; 2163 } 2164 return true; 2165 } 2166 2167 /// Return true if the given value is concrete. We must prove that undef can 2168 /// never reach it. 2169 /// 2170 /// TODO: If we decide that this is a good approach to checking for undef, we 2171 /// may factor it into a common location. 2172 static bool hasConcreteDef(Value *V) { 2173 SmallPtrSet<Value*, 8> Visited; 2174 Visited.insert(V); 2175 return hasConcreteDefImpl(V, Visited, 0); 2176 } 2177 2178 /// Return true if this IV has any uses other than the (soon to be rewritten) 2179 /// loop exit test. 2180 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2181 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2182 Value *IncV = Phi->getIncomingValue(LatchIdx); 2183 2184 for (User *U : Phi->users()) 2185 if (U != Cond && U != IncV) return false; 2186 2187 for (User *U : IncV->users()) 2188 if (U != Cond && U != Phi) return false; 2189 return true; 2190 } 2191 2192 /// Return true if the given phi is a "counter" in L. A counter is an 2193 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2194 /// step of 1. Note that L must have exactly one latch. 2195 static bool isLoopCounter(PHINode* Phi, Loop *L, 2196 ScalarEvolution *SE) { 2197 assert(Phi->getParent() == L->getHeader()); 2198 assert(L->getLoopLatch()); 2199 2200 if (!SE->isSCEVable(Phi->getType())) 2201 return false; 2202 2203 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2204 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2205 return false; 2206 2207 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2208 if (!Step || !Step->isOne()) 2209 return false; 2210 2211 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2212 Value *IncV = Phi->getIncomingValue(LatchIdx); 2213 return (getLoopPhiForCounter(IncV, L) == Phi); 2214 } 2215 2216 /// Search the loop header for a loop counter (anadd rec w/step of one) 2217 /// suitable for use by LFTR. If multiple counters are available, select the 2218 /// "best" one based profitable heuristics. 2219 /// 2220 /// BECount may be an i8* pointer type. The pointer difference is already 2221 /// valid count without scaling the address stride, so it remains a pointer 2222 /// expression as far as SCEV is concerned. 2223 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2224 const SCEV *BECount, 2225 ScalarEvolution *SE, DominatorTree *DT) { 2226 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2227 2228 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2229 2230 // Loop over all of the PHI nodes, looking for a simple counter. 2231 PHINode *BestPhi = nullptr; 2232 const SCEV *BestInit = nullptr; 2233 BasicBlock *LatchBlock = L->getLoopLatch(); 2234 assert(LatchBlock && "Must be in simplified form"); 2235 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2236 2237 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2238 PHINode *Phi = cast<PHINode>(I); 2239 if (!isLoopCounter(Phi, L, SE)) 2240 continue; 2241 2242 // Avoid comparing an integer IV against a pointer Limit. 2243 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2244 continue; 2245 2246 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2247 2248 // AR may be a pointer type, while BECount is an integer type. 2249 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2250 // AR may not be a narrower type, or we may never exit. 2251 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2252 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2253 continue; 2254 2255 // Avoid reusing a potentially undef value to compute other values that may 2256 // have originally had a concrete definition. 2257 if (!hasConcreteDef(Phi)) { 2258 // We explicitly allow unknown phis as long as they are already used by 2259 // the loop exit test. This is legal since performing LFTR could not 2260 // increase the number of undef users. 2261 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2262 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2263 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2264 continue; 2265 } 2266 2267 // Avoid introducing undefined behavior due to poison which didn't exist in 2268 // the original program. (Annoyingly, the rules for poison and undef 2269 // propagation are distinct, so this does NOT cover the undef case above.) 2270 // We have to ensure that we don't introduce UB by introducing a use on an 2271 // iteration where said IV produces poison. Our strategy here differs for 2272 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2273 // see code in linearFunctionTestReplace. For pointers, we restrict 2274 // transforms as there is no good way to reinfer inbounds once lost. 2275 if (!Phi->getType()->isIntegerTy() && 2276 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2277 continue; 2278 2279 const SCEV *Init = AR->getStart(); 2280 2281 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2282 // Don't force a live loop counter if another IV can be used. 2283 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2284 continue; 2285 2286 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2287 // also prefers integer to pointer IVs. 2288 if (BestInit->isZero() != Init->isZero()) { 2289 if (BestInit->isZero()) 2290 continue; 2291 } 2292 // If two IVs both count from zero or both count from nonzero then the 2293 // narrower is likely a dead phi that has been widened. Use the wider phi 2294 // to allow the other to be eliminated. 2295 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2296 continue; 2297 } 2298 BestPhi = Phi; 2299 BestInit = Init; 2300 } 2301 return BestPhi; 2302 } 2303 2304 /// Insert an IR expression which computes the value held by the IV IndVar 2305 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2306 /// is taken ExitCount times. 2307 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2308 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2309 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2310 assert(isLoopCounter(IndVar, L, SE)); 2311 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2312 const SCEV *IVInit = AR->getStart(); 2313 2314 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2315 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2316 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2317 // the existing GEPs whenever possible. 2318 if (IndVar->getType()->isPointerTy() && 2319 !ExitCount->getType()->isPointerTy()) { 2320 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2321 // signed value. ExitCount on the other hand represents the loop trip count, 2322 // which is an unsigned value. FindLoopCounter only allows induction 2323 // variables that have a positive unit stride of one. This means we don't 2324 // have to handle the case of negative offsets (yet) and just need to zero 2325 // extend ExitCount. 2326 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2327 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2328 if (UsePostInc) 2329 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2330 2331 // Expand the code for the iteration count. 2332 assert(SE->isLoopInvariant(IVOffset, L) && 2333 "Computed iteration count is not loop invariant!"); 2334 2335 // We could handle pointer IVs other than i8*, but we need to compensate for 2336 // gep index scaling. 2337 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2338 cast<PointerType>(IndVar->getType()) 2339 ->getElementType())->isOne() && 2340 "unit stride pointer IV must be i8*"); 2341 2342 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2343 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2344 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2345 } else { 2346 // In any other case, convert both IVInit and ExitCount to integers before 2347 // comparing. This may result in SCEV expansion of pointers, but in practice 2348 // SCEV will fold the pointer arithmetic away as such: 2349 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2350 // 2351 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2352 // for simple memset-style loops. 2353 // 2354 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2355 // were generated on top of case #2, which is not expected. 2356 2357 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2358 // For unit stride, IVCount = Start + ExitCount with 2's complement 2359 // overflow. 2360 2361 // For integer IVs, truncate the IV before computing IVInit + BECount, 2362 // unless we know apriori that the limit must be a constant when evaluated 2363 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2364 // of the IV in the loop over a (potentially) expensive expansion of the 2365 // widened exit count add(zext(add)) expression. 2366 if (SE->getTypeSizeInBits(IVInit->getType()) 2367 > SE->getTypeSizeInBits(ExitCount->getType())) { 2368 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2369 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2370 else 2371 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2372 } 2373 2374 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2375 2376 if (UsePostInc) 2377 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2378 2379 // Expand the code for the iteration count. 2380 assert(SE->isLoopInvariant(IVLimit, L) && 2381 "Computed iteration count is not loop invariant!"); 2382 // Ensure that we generate the same type as IndVar, or a smaller integer 2383 // type. In the presence of null pointer values, we have an integer type 2384 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2385 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2386 IndVar->getType() : ExitCount->getType(); 2387 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2388 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2389 } 2390 } 2391 2392 /// This method rewrites the exit condition of the loop to be a canonical != 2393 /// comparison against the incremented loop induction variable. This pass is 2394 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2395 /// determine a loop-invariant trip count of the loop, which is actually a much 2396 /// broader range than just linear tests. 2397 bool IndVarSimplify:: 2398 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2399 const SCEV *ExitCount, 2400 PHINode *IndVar, SCEVExpander &Rewriter) { 2401 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2402 assert(isLoopCounter(IndVar, L, SE)); 2403 Instruction * const IncVar = 2404 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2405 2406 // Initialize CmpIndVar to the preincremented IV. 2407 Value *CmpIndVar = IndVar; 2408 bool UsePostInc = false; 2409 2410 // If the exiting block is the same as the backedge block, we prefer to 2411 // compare against the post-incremented value, otherwise we must compare 2412 // against the preincremented value. 2413 if (ExitingBB == L->getLoopLatch()) { 2414 // For pointer IVs, we chose to not strip inbounds which requires us not 2415 // to add a potentially UB introducing use. We need to either a) show 2416 // the loop test we're modifying is already in post-inc form, or b) show 2417 // that adding a use must not introduce UB. 2418 bool SafeToPostInc = 2419 IndVar->getType()->isIntegerTy() || 2420 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2421 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2422 if (SafeToPostInc) { 2423 UsePostInc = true; 2424 CmpIndVar = IncVar; 2425 } 2426 } 2427 2428 // It may be necessary to drop nowrap flags on the incrementing instruction 2429 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2430 // case the increment might have previously been poison on the last iteration 2431 // only) or if LFTR switches to a different IV that was previously dynamically 2432 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2433 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2434 // check), because the pre-inc addrec flags may be adopted from the original 2435 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2436 // TODO: This handling is inaccurate for one case: If we switch to a 2437 // dynamically dead IV that wraps on the first loop iteration only, which is 2438 // not covered by the post-inc addrec. (If the new IV was not dynamically 2439 // dead, it could not be poison on the first iteration in the first place.) 2440 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2441 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2442 if (BO->hasNoUnsignedWrap()) 2443 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2444 if (BO->hasNoSignedWrap()) 2445 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2446 } 2447 2448 Value *ExitCnt = genLoopLimit( 2449 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2450 assert(ExitCnt->getType()->isPointerTy() == 2451 IndVar->getType()->isPointerTy() && 2452 "genLoopLimit missed a cast"); 2453 2454 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2455 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2456 ICmpInst::Predicate P; 2457 if (L->contains(BI->getSuccessor(0))) 2458 P = ICmpInst::ICMP_NE; 2459 else 2460 P = ICmpInst::ICMP_EQ; 2461 2462 IRBuilder<> Builder(BI); 2463 2464 // The new loop exit condition should reuse the debug location of the 2465 // original loop exit condition. 2466 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2467 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2468 2469 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2470 // avoid the expensive expansion of the limit expression in the wider type, 2471 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2472 // since we know (from the exit count bitwidth), that we can't self-wrap in 2473 // the narrower type. 2474 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2475 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2476 if (CmpIndVarSize > ExitCntSize) { 2477 assert(!CmpIndVar->getType()->isPointerTy() && 2478 !ExitCnt->getType()->isPointerTy()); 2479 2480 // Before resorting to actually inserting the truncate, use the same 2481 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2482 // the other side of the comparison instead. We still evaluate the limit 2483 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2484 // a truncate within in. 2485 bool Extended = false; 2486 const SCEV *IV = SE->getSCEV(CmpIndVar); 2487 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2488 ExitCnt->getType()); 2489 const SCEV *ZExtTrunc = 2490 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2491 2492 if (ZExtTrunc == IV) { 2493 Extended = true; 2494 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2495 "wide.trip.count"); 2496 } else { 2497 const SCEV *SExtTrunc = 2498 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2499 if (SExtTrunc == IV) { 2500 Extended = true; 2501 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2502 "wide.trip.count"); 2503 } 2504 } 2505 2506 if (Extended) { 2507 bool Discard; 2508 L->makeLoopInvariant(ExitCnt, Discard); 2509 } else 2510 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2511 "lftr.wideiv"); 2512 } 2513 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2514 << " LHS:" << *CmpIndVar << '\n' 2515 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2516 << "\n" 2517 << " RHS:\t" << *ExitCnt << "\n" 2518 << "ExitCount:\t" << *ExitCount << "\n" 2519 << " was: " << *BI->getCondition() << "\n"); 2520 2521 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2522 Value *OrigCond = BI->getCondition(); 2523 // It's tempting to use replaceAllUsesWith here to fully replace the old 2524 // comparison, but that's not immediately safe, since users of the old 2525 // comparison may not be dominated by the new comparison. Instead, just 2526 // update the branch to use the new comparison; in the common case this 2527 // will make old comparison dead. 2528 BI->setCondition(Cond); 2529 DeadInsts.push_back(OrigCond); 2530 2531 ++NumLFTR; 2532 return true; 2533 } 2534 2535 //===----------------------------------------------------------------------===// 2536 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2537 //===----------------------------------------------------------------------===// 2538 2539 /// If there's a single exit block, sink any loop-invariant values that 2540 /// were defined in the preheader but not used inside the loop into the 2541 /// exit block to reduce register pressure in the loop. 2542 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2543 BasicBlock *ExitBlock = L->getExitBlock(); 2544 if (!ExitBlock) return false; 2545 2546 BasicBlock *Preheader = L->getLoopPreheader(); 2547 if (!Preheader) return false; 2548 2549 bool MadeAnyChanges = false; 2550 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2551 BasicBlock::iterator I(Preheader->getTerminator()); 2552 while (I != Preheader->begin()) { 2553 --I; 2554 // New instructions were inserted at the end of the preheader. 2555 if (isa<PHINode>(I)) 2556 break; 2557 2558 // Don't move instructions which might have side effects, since the side 2559 // effects need to complete before instructions inside the loop. Also don't 2560 // move instructions which might read memory, since the loop may modify 2561 // memory. Note that it's okay if the instruction might have undefined 2562 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2563 // block. 2564 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2565 continue; 2566 2567 // Skip debug info intrinsics. 2568 if (isa<DbgInfoIntrinsic>(I)) 2569 continue; 2570 2571 // Skip eh pad instructions. 2572 if (I->isEHPad()) 2573 continue; 2574 2575 // Don't sink alloca: we never want to sink static alloca's out of the 2576 // entry block, and correctly sinking dynamic alloca's requires 2577 // checks for stacksave/stackrestore intrinsics. 2578 // FIXME: Refactor this check somehow? 2579 if (isa<AllocaInst>(I)) 2580 continue; 2581 2582 // Determine if there is a use in or before the loop (direct or 2583 // otherwise). 2584 bool UsedInLoop = false; 2585 for (Use &U : I->uses()) { 2586 Instruction *User = cast<Instruction>(U.getUser()); 2587 BasicBlock *UseBB = User->getParent(); 2588 if (PHINode *P = dyn_cast<PHINode>(User)) { 2589 unsigned i = 2590 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2591 UseBB = P->getIncomingBlock(i); 2592 } 2593 if (UseBB == Preheader || L->contains(UseBB)) { 2594 UsedInLoop = true; 2595 break; 2596 } 2597 } 2598 2599 // If there is, the def must remain in the preheader. 2600 if (UsedInLoop) 2601 continue; 2602 2603 // Otherwise, sink it to the exit block. 2604 Instruction *ToMove = &*I; 2605 bool Done = false; 2606 2607 if (I != Preheader->begin()) { 2608 // Skip debug info intrinsics. 2609 do { 2610 --I; 2611 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2612 2613 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2614 Done = true; 2615 } else { 2616 Done = true; 2617 } 2618 2619 MadeAnyChanges = true; 2620 ToMove->moveBefore(*ExitBlock, InsertPt); 2621 if (Done) break; 2622 InsertPt = ToMove->getIterator(); 2623 } 2624 2625 return MadeAnyChanges; 2626 } 2627 2628 bool IndVarSimplify::optimizeLoopExits(Loop *L) { 2629 SmallVector<BasicBlock*, 16> ExitingBlocks; 2630 L->getExitingBlocks(ExitingBlocks); 2631 2632 // Form an expression for the maximum exit count possible for this loop. We 2633 // merge the max and exact information to approximate a version of 2634 // getMaxBackedgeTakenInfo which isn't restricted to just constants. 2635 // TODO: factor this out as a version of getMaxBackedgeTakenCount which 2636 // isn't guaranteed to return a constant. 2637 SmallVector<const SCEV*, 4> ExitCounts; 2638 const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); 2639 if (!isa<SCEVCouldNotCompute>(MaxConstEC)) 2640 ExitCounts.push_back(MaxConstEC); 2641 for (BasicBlock *ExitingBB : ExitingBlocks) { 2642 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2643 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2644 assert(DT->dominates(ExitingBB, L->getLoopLatch()) && 2645 "We should only have known counts for exiting blocks that " 2646 "dominate latch!"); 2647 ExitCounts.push_back(ExitCount); 2648 } 2649 } 2650 if (ExitCounts.empty()) 2651 return false; 2652 const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); 2653 2654 bool Changed = false; 2655 for (BasicBlock *ExitingBB : ExitingBlocks) { 2656 // If our exitting block exits multiple loops, we can only rewrite the 2657 // innermost one. Otherwise, we're changing how many times the innermost 2658 // loop runs before it exits. 2659 if (LI->getLoopFor(ExitingBB) != L) 2660 continue; 2661 2662 // Can't rewrite non-branch yet. 2663 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2664 if (!BI) 2665 continue; 2666 2667 // If already constant, nothing to do. 2668 if (isa<Constant>(BI->getCondition())) 2669 continue; 2670 2671 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2672 if (isa<SCEVCouldNotCompute>(ExitCount)) 2673 continue; 2674 2675 // If we know we'd exit on the first iteration, rewrite the exit to 2676 // reflect this. This does not imply the loop must exit through this 2677 // exit; there may be an earlier one taken on the first iteration. 2678 // TODO: Given we know the backedge can't be taken, we should go ahead 2679 // and break it. Or at least, kill all the header phis and simplify. 2680 if (ExitCount->isZero()) { 2681 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2682 auto *OldCond = BI->getCondition(); 2683 auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : 2684 ConstantInt::getFalse(OldCond->getType()); 2685 BI->setCondition(NewCond); 2686 if (OldCond->use_empty()) 2687 DeadInsts.push_back(OldCond); 2688 Changed = true; 2689 continue; 2690 } 2691 2692 // If we end up with a pointer exit count, bail. Note that we can end up 2693 // with a pointer exit count for one exiting block, and not for another in 2694 // the same loop. 2695 if (!ExitCount->getType()->isIntegerTy() || 2696 !MaxExitCount->getType()->isIntegerTy()) 2697 continue; 2698 2699 Type *WiderType = 2700 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2701 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2702 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2703 assert(MaxExitCount->getType() == ExitCount->getType()); 2704 2705 // Can we prove that some other exit must be taken strictly before this 2706 // one? TODO: handle cases where ule is known, and equality is covered 2707 // by a dominating exit 2708 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2709 MaxExitCount, ExitCount)) { 2710 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2711 auto *OldCond = BI->getCondition(); 2712 auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : 2713 ConstantInt::getTrue(OldCond->getType()); 2714 BI->setCondition(NewCond); 2715 if (OldCond->use_empty()) 2716 DeadInsts.push_back(OldCond); 2717 Changed = true; 2718 continue; 2719 } 2720 2721 // TODO: If we can prove that the exiting iteration is equal to the exit 2722 // count for this exit and that no previous exit oppurtunities exist within 2723 // the loop, then we can discharge all other exits. (May fall out of 2724 // previous TODO.) 2725 2726 // TODO: If we can't prove any relation between our exit count and the 2727 // loops exit count, but taking this exit doesn't require actually running 2728 // the loop (i.e. no side effects, no computed values used in exit), then 2729 // we can replace the exit test with a loop invariant test which exits on 2730 // the first iteration. 2731 } 2732 return Changed; 2733 } 2734 2735 //===----------------------------------------------------------------------===// 2736 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2737 //===----------------------------------------------------------------------===// 2738 2739 bool IndVarSimplify::run(Loop *L) { 2740 // We need (and expect!) the incoming loop to be in LCSSA. 2741 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2742 "LCSSA required to run indvars!"); 2743 bool Changed = false; 2744 2745 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2746 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2747 // canonicalization can be a pessimization without LSR to "clean up" 2748 // afterwards. 2749 // - We depend on having a preheader; in particular, 2750 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2751 // and we're in trouble if we can't find the induction variable even when 2752 // we've manually inserted one. 2753 // - LFTR relies on having a single backedge. 2754 if (!L->isLoopSimplifyForm()) 2755 return false; 2756 2757 // If there are any floating-point recurrences, attempt to 2758 // transform them to use integer recurrences. 2759 Changed |= rewriteNonIntegerIVs(L); 2760 2761 #ifndef NDEBUG 2762 // Used below for a consistency check only 2763 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2764 #endif 2765 2766 // Create a rewriter object which we'll use to transform the code with. 2767 SCEVExpander Rewriter(*SE, DL, "indvars"); 2768 #ifndef NDEBUG 2769 Rewriter.setDebugType(DEBUG_TYPE); 2770 #endif 2771 2772 // Eliminate redundant IV users. 2773 // 2774 // Simplification works best when run before other consumers of SCEV. We 2775 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2776 // other expressions involving loop IVs have been evaluated. This helps SCEV 2777 // set no-wrap flags before normalizing sign/zero extension. 2778 Rewriter.disableCanonicalMode(); 2779 Changed |= simplifyAndExtend(L, Rewriter, LI); 2780 2781 // Check to see if we can compute the final value of any expressions 2782 // that are recurrent in the loop, and substitute the exit values from the 2783 // loop into any instructions outside of the loop that use the final values 2784 // of the current expressions. 2785 if (ReplaceExitValue != NeverRepl) 2786 Changed |= rewriteLoopExitValues(L, Rewriter); 2787 2788 // Eliminate redundant IV cycles. 2789 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2790 2791 Changed |= optimizeLoopExits(L); 2792 2793 // If we have a trip count expression, rewrite the loop's exit condition 2794 // using it. 2795 if (!DisableLFTR) { 2796 SmallVector<BasicBlock*, 16> ExitingBlocks; 2797 L->getExitingBlocks(ExitingBlocks); 2798 for (BasicBlock *ExitingBB : ExitingBlocks) { 2799 // Can't rewrite non-branch yet. 2800 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2801 continue; 2802 2803 // If our exitting block exits multiple loops, we can only rewrite the 2804 // innermost one. Otherwise, we're changing how many times the innermost 2805 // loop runs before it exits. 2806 if (LI->getLoopFor(ExitingBB) != L) 2807 continue; 2808 2809 if (!needsLFTR(L, ExitingBB)) 2810 continue; 2811 2812 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2813 if (isa<SCEVCouldNotCompute>(ExitCount)) 2814 continue; 2815 2816 // This was handled above, but as we form SCEVs, we can sometimes refine 2817 // existing ones; this allows exit counts to be folded to zero which 2818 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2819 // until stable to handle cases like this better. 2820 if (ExitCount->isZero()) 2821 continue; 2822 2823 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2824 if (!IndVar) 2825 continue; 2826 2827 // Avoid high cost expansions. Note: This heuristic is questionable in 2828 // that our definition of "high cost" is not exactly principled. 2829 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2830 continue; 2831 2832 // Check preconditions for proper SCEVExpander operation. SCEV does not 2833 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2834 // any pass that uses the SCEVExpander must do it. This does not work 2835 // well for loop passes because SCEVExpander makes assumptions about 2836 // all loops, while LoopPassManager only forces the current loop to be 2837 // simplified. 2838 // 2839 // FIXME: SCEV expansion has no way to bail out, so the caller must 2840 // explicitly check any assumptions made by SCEV. Brittle. 2841 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2842 if (!AR || AR->getLoop()->getLoopPreheader()) 2843 Changed |= linearFunctionTestReplace(L, ExitingBB, 2844 ExitCount, IndVar, 2845 Rewriter); 2846 } 2847 } 2848 // Clear the rewriter cache, because values that are in the rewriter's cache 2849 // can be deleted in the loop below, causing the AssertingVH in the cache to 2850 // trigger. 2851 Rewriter.clear(); 2852 2853 // Now that we're done iterating through lists, clean up any instructions 2854 // which are now dead. 2855 while (!DeadInsts.empty()) 2856 if (Instruction *Inst = 2857 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2858 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2859 2860 // The Rewriter may not be used from this point on. 2861 2862 // Loop-invariant instructions in the preheader that aren't used in the 2863 // loop may be sunk below the loop to reduce register pressure. 2864 Changed |= sinkUnusedInvariants(L); 2865 2866 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2867 // trip count and therefore can further simplify exit values in addition to 2868 // rewriteLoopExitValues. 2869 Changed |= rewriteFirstIterationLoopExitValues(L); 2870 2871 // Clean up dead instructions. 2872 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2873 2874 // Check a post-condition. 2875 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2876 "Indvars did not preserve LCSSA!"); 2877 2878 // Verify that LFTR, and any other change have not interfered with SCEV's 2879 // ability to compute trip count. 2880 #ifndef NDEBUG 2881 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2882 SE->forgetLoop(L); 2883 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2884 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2885 SE->getTypeSizeInBits(NewBECount->getType())) 2886 NewBECount = SE->getTruncateOrNoop(NewBECount, 2887 BackedgeTakenCount->getType()); 2888 else 2889 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2890 NewBECount->getType()); 2891 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2892 } 2893 #endif 2894 2895 return Changed; 2896 } 2897 2898 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2899 LoopStandardAnalysisResults &AR, 2900 LPMUpdater &) { 2901 Function *F = L.getHeader()->getParent(); 2902 const DataLayout &DL = F->getParent()->getDataLayout(); 2903 2904 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2905 if (!IVS.run(&L)) 2906 return PreservedAnalyses::all(); 2907 2908 auto PA = getLoopPassPreservedAnalyses(); 2909 PA.preserveSet<CFGAnalyses>(); 2910 return PA; 2911 } 2912 2913 namespace { 2914 2915 struct IndVarSimplifyLegacyPass : public LoopPass { 2916 static char ID; // Pass identification, replacement for typeid 2917 2918 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2919 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2920 } 2921 2922 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2923 if (skipLoop(L)) 2924 return false; 2925 2926 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2927 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2928 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2929 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2930 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2931 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2932 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2933 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2934 2935 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2936 return IVS.run(L); 2937 } 2938 2939 void getAnalysisUsage(AnalysisUsage &AU) const override { 2940 AU.setPreservesCFG(); 2941 getLoopAnalysisUsage(AU); 2942 } 2943 }; 2944 2945 } // end anonymous namespace 2946 2947 char IndVarSimplifyLegacyPass::ID = 0; 2948 2949 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2950 "Induction Variable Simplification", false, false) 2951 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2952 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2953 "Induction Variable Simplification", false, false) 2954 2955 Pass *llvm::createIndVarSimplifyPass() { 2956 return new IndVarSimplifyLegacyPass(); 2957 } 2958