1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 static cl::opt<bool> 128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), 129 cl::desc("Predicate conditions in read only loops")); 130 131 namespace { 132 133 struct RewritePhi; 134 135 class IndVarSimplify { 136 LoopInfo *LI; 137 ScalarEvolution *SE; 138 DominatorTree *DT; 139 const DataLayout &DL; 140 TargetLibraryInfo *TLI; 141 const TargetTransformInfo *TTI; 142 143 SmallVector<WeakTrackingVH, 16> DeadInsts; 144 145 bool isValidRewrite(Value *FromVal, Value *ToVal); 146 147 bool handleFloatingPointIV(Loop *L, PHINode *PH); 148 bool rewriteNonIntegerIVs(Loop *L); 149 150 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 158 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 159 bool rewriteFirstIterationLoopExitValues(Loop *L); 160 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 161 162 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 163 const SCEV *ExitCount, 164 PHINode *IndVar, SCEVExpander &Rewriter); 165 166 bool sinkUnusedInvariants(Loop *L); 167 168 public: 169 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 170 const DataLayout &DL, TargetLibraryInfo *TLI, 171 TargetTransformInfo *TTI) 172 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 173 174 bool run(Loop *L); 175 }; 176 177 } // end anonymous namespace 178 179 /// Return true if the SCEV expansion generated by the rewriter can replace the 180 /// original value. SCEV guarantees that it produces the same value, but the way 181 /// it is produced may be illegal IR. Ideally, this function will only be 182 /// called for verification. 183 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 184 // If an SCEV expression subsumed multiple pointers, its expansion could 185 // reassociate the GEP changing the base pointer. This is illegal because the 186 // final address produced by a GEP chain must be inbounds relative to its 187 // underlying object. Otherwise basic alias analysis, among other things, 188 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 189 // producing an expression involving multiple pointers. Until then, we must 190 // bail out here. 191 // 192 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 193 // because it understands lcssa phis while SCEV does not. 194 Value *FromPtr = FromVal; 195 Value *ToPtr = ToVal; 196 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 197 FromPtr = GEP->getPointerOperand(); 198 } 199 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 200 ToPtr = GEP->getPointerOperand(); 201 } 202 if (FromPtr != FromVal || ToPtr != ToVal) { 203 // Quickly check the common case 204 if (FromPtr == ToPtr) 205 return true; 206 207 // SCEV may have rewritten an expression that produces the GEP's pointer 208 // operand. That's ok as long as the pointer operand has the same base 209 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 210 // base of a recurrence. This handles the case in which SCEV expansion 211 // converts a pointer type recurrence into a nonrecurrent pointer base 212 // indexed by an integer recurrence. 213 214 // If the GEP base pointer is a vector of pointers, abort. 215 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 216 return false; 217 218 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 219 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 220 if (FromBase == ToBase) 221 return true; 222 223 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 224 << " != " << *ToBase << "\n"); 225 226 return false; 227 } 228 return true; 229 } 230 231 /// Determine the insertion point for this user. By default, insert immediately 232 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 233 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 234 /// common dominator for the incoming blocks. A nullptr can be returned if no 235 /// viable location is found: it may happen if User is a PHI and Def only comes 236 /// to this PHI from unreachable blocks. 237 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 238 DominatorTree *DT, LoopInfo *LI) { 239 PHINode *PHI = dyn_cast<PHINode>(User); 240 if (!PHI) 241 return User; 242 243 Instruction *InsertPt = nullptr; 244 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 245 if (PHI->getIncomingValue(i) != Def) 246 continue; 247 248 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 249 250 if (!DT->isReachableFromEntry(InsertBB)) 251 continue; 252 253 if (!InsertPt) { 254 InsertPt = InsertBB->getTerminator(); 255 continue; 256 } 257 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 258 InsertPt = InsertBB->getTerminator(); 259 } 260 261 // If we have skipped all inputs, it means that Def only comes to Phi from 262 // unreachable blocks. 263 if (!InsertPt) 264 return nullptr; 265 266 auto *DefI = dyn_cast<Instruction>(Def); 267 if (!DefI) 268 return InsertPt; 269 270 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 271 272 auto *L = LI->getLoopFor(DefI->getParent()); 273 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 274 275 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 276 if (LI->getLoopFor(DTN->getBlock()) == L) 277 return DTN->getBlock()->getTerminator(); 278 279 llvm_unreachable("DefI dominates InsertPt!"); 280 } 281 282 //===----------------------------------------------------------------------===// 283 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 284 //===----------------------------------------------------------------------===// 285 286 /// Convert APF to an integer, if possible. 287 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 288 bool isExact = false; 289 // See if we can convert this to an int64_t 290 uint64_t UIntVal; 291 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 292 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 293 !isExact) 294 return false; 295 IntVal = UIntVal; 296 return true; 297 } 298 299 /// If the loop has floating induction variable then insert corresponding 300 /// integer induction variable if possible. 301 /// For example, 302 /// for(double i = 0; i < 10000; ++i) 303 /// bar(i) 304 /// is converted into 305 /// for(int i = 0; i < 10000; ++i) 306 /// bar((double)i); 307 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 308 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 309 unsigned BackEdge = IncomingEdge^1; 310 311 // Check incoming value. 312 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 313 314 int64_t InitValue; 315 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 316 return false; 317 318 // Check IV increment. Reject this PN if increment operation is not 319 // an add or increment value can not be represented by an integer. 320 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 321 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 322 323 // If this is not an add of the PHI with a constantfp, or if the constant fp 324 // is not an integer, bail out. 325 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 326 int64_t IncValue; 327 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 328 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 329 return false; 330 331 // Check Incr uses. One user is PN and the other user is an exit condition 332 // used by the conditional terminator. 333 Value::user_iterator IncrUse = Incr->user_begin(); 334 Instruction *U1 = cast<Instruction>(*IncrUse++); 335 if (IncrUse == Incr->user_end()) return false; 336 Instruction *U2 = cast<Instruction>(*IncrUse++); 337 if (IncrUse != Incr->user_end()) return false; 338 339 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 340 // only used by a branch, we can't transform it. 341 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 342 if (!Compare) 343 Compare = dyn_cast<FCmpInst>(U2); 344 if (!Compare || !Compare->hasOneUse() || 345 !isa<BranchInst>(Compare->user_back())) 346 return false; 347 348 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 349 350 // We need to verify that the branch actually controls the iteration count 351 // of the loop. If not, the new IV can overflow and no one will notice. 352 // The branch block must be in the loop and one of the successors must be out 353 // of the loop. 354 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 355 if (!L->contains(TheBr->getParent()) || 356 (L->contains(TheBr->getSuccessor(0)) && 357 L->contains(TheBr->getSuccessor(1)))) 358 return false; 359 360 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 361 // transform it. 362 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 363 int64_t ExitValue; 364 if (ExitValueVal == nullptr || 365 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 366 return false; 367 368 // Find new predicate for integer comparison. 369 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 370 switch (Compare->getPredicate()) { 371 default: return false; // Unknown comparison. 372 case CmpInst::FCMP_OEQ: 373 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 374 case CmpInst::FCMP_ONE: 375 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 376 case CmpInst::FCMP_OGT: 377 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 378 case CmpInst::FCMP_OGE: 379 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 380 case CmpInst::FCMP_OLT: 381 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 382 case CmpInst::FCMP_OLE: 383 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 384 } 385 386 // We convert the floating point induction variable to a signed i32 value if 387 // we can. This is only safe if the comparison will not overflow in a way 388 // that won't be trapped by the integer equivalent operations. Check for this 389 // now. 390 // TODO: We could use i64 if it is native and the range requires it. 391 392 // The start/stride/exit values must all fit in signed i32. 393 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 394 return false; 395 396 // If not actually striding (add x, 0.0), avoid touching the code. 397 if (IncValue == 0) 398 return false; 399 400 // Positive and negative strides have different safety conditions. 401 if (IncValue > 0) { 402 // If we have a positive stride, we require the init to be less than the 403 // exit value. 404 if (InitValue >= ExitValue) 405 return false; 406 407 uint32_t Range = uint32_t(ExitValue-InitValue); 408 // Check for infinite loop, either: 409 // while (i <= Exit) or until (i > Exit) 410 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 411 if (++Range == 0) return false; // Range overflows. 412 } 413 414 unsigned Leftover = Range % uint32_t(IncValue); 415 416 // If this is an equality comparison, we require that the strided value 417 // exactly land on the exit value, otherwise the IV condition will wrap 418 // around and do things the fp IV wouldn't. 419 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 420 Leftover != 0) 421 return false; 422 423 // If the stride would wrap around the i32 before exiting, we can't 424 // transform the IV. 425 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 426 return false; 427 } else { 428 // If we have a negative stride, we require the init to be greater than the 429 // exit value. 430 if (InitValue <= ExitValue) 431 return false; 432 433 uint32_t Range = uint32_t(InitValue-ExitValue); 434 // Check for infinite loop, either: 435 // while (i >= Exit) or until (i < Exit) 436 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 437 if (++Range == 0) return false; // Range overflows. 438 } 439 440 unsigned Leftover = Range % uint32_t(-IncValue); 441 442 // If this is an equality comparison, we require that the strided value 443 // exactly land on the exit value, otherwise the IV condition will wrap 444 // around and do things the fp IV wouldn't. 445 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 446 Leftover != 0) 447 return false; 448 449 // If the stride would wrap around the i32 before exiting, we can't 450 // transform the IV. 451 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 452 return false; 453 } 454 455 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 456 457 // Insert new integer induction variable. 458 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 459 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 460 PN->getIncomingBlock(IncomingEdge)); 461 462 Value *NewAdd = 463 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 464 Incr->getName()+".int", Incr); 465 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 466 467 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 468 ConstantInt::get(Int32Ty, ExitValue), 469 Compare->getName()); 470 471 // In the following deletions, PN may become dead and may be deleted. 472 // Use a WeakTrackingVH to observe whether this happens. 473 WeakTrackingVH WeakPH = PN; 474 475 // Delete the old floating point exit comparison. The branch starts using the 476 // new comparison. 477 NewCompare->takeName(Compare); 478 Compare->replaceAllUsesWith(NewCompare); 479 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 480 481 // Delete the old floating point increment. 482 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 483 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 484 485 // If the FP induction variable still has uses, this is because something else 486 // in the loop uses its value. In order to canonicalize the induction 487 // variable, we chose to eliminate the IV and rewrite it in terms of an 488 // int->fp cast. 489 // 490 // We give preference to sitofp over uitofp because it is faster on most 491 // platforms. 492 if (WeakPH) { 493 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 494 &*PN->getParent()->getFirstInsertionPt()); 495 PN->replaceAllUsesWith(Conv); 496 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 497 } 498 return true; 499 } 500 501 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 502 // First step. Check to see if there are any floating-point recurrences. 503 // If there are, change them into integer recurrences, permitting analysis by 504 // the SCEV routines. 505 BasicBlock *Header = L->getHeader(); 506 507 SmallVector<WeakTrackingVH, 8> PHIs; 508 for (PHINode &PN : Header->phis()) 509 PHIs.push_back(&PN); 510 511 bool Changed = false; 512 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 513 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 514 Changed |= handleFloatingPointIV(L, PN); 515 516 // If the loop previously had floating-point IV, ScalarEvolution 517 // may not have been able to compute a trip count. Now that we've done some 518 // re-writing, the trip count may be computable. 519 if (Changed) 520 SE->forgetLoop(L); 521 return Changed; 522 } 523 524 namespace { 525 526 // Collect information about PHI nodes which can be transformed in 527 // rewriteLoopExitValues. 528 struct RewritePhi { 529 PHINode *PN; 530 531 // Ith incoming value. 532 unsigned Ith; 533 534 // Exit value after expansion. 535 Value *Val; 536 537 // High Cost when expansion. 538 bool HighCost; 539 540 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 541 : PN(P), Ith(I), Val(V), HighCost(H) {} 542 }; 543 544 } // end anonymous namespace 545 546 //===----------------------------------------------------------------------===// 547 // rewriteLoopExitValues - Optimize IV users outside the loop. 548 // As a side effect, reduces the amount of IV processing within the loop. 549 //===----------------------------------------------------------------------===// 550 551 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 552 SmallPtrSet<const Instruction *, 8> Visited; 553 SmallVector<const Instruction *, 8> WorkList; 554 Visited.insert(I); 555 WorkList.push_back(I); 556 while (!WorkList.empty()) { 557 const Instruction *Curr = WorkList.pop_back_val(); 558 // This use is outside the loop, nothing to do. 559 if (!L->contains(Curr)) 560 continue; 561 // Do we assume it is a "hard" use which will not be eliminated easily? 562 if (Curr->mayHaveSideEffects()) 563 return true; 564 // Otherwise, add all its users to worklist. 565 for (auto U : Curr->users()) { 566 auto *UI = cast<Instruction>(U); 567 if (Visited.insert(UI).second) 568 WorkList.push_back(UI); 569 } 570 } 571 return false; 572 } 573 574 /// Check to see if this loop has a computable loop-invariant execution count. 575 /// If so, this means that we can compute the final value of any expressions 576 /// that are recurrent in the loop, and substitute the exit values from the loop 577 /// into any instructions outside of the loop that use the final values of the 578 /// current expressions. 579 /// 580 /// This is mostly redundant with the regular IndVarSimplify activities that 581 /// happen later, except that it's more powerful in some cases, because it's 582 /// able to brute-force evaluate arbitrary instructions as long as they have 583 /// constant operands at the beginning of the loop. 584 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 585 // Check a pre-condition. 586 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 587 "Indvars did not preserve LCSSA!"); 588 589 SmallVector<BasicBlock*, 8> ExitBlocks; 590 L->getUniqueExitBlocks(ExitBlocks); 591 592 SmallVector<RewritePhi, 8> RewritePhiSet; 593 // Find all values that are computed inside the loop, but used outside of it. 594 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 595 // the exit blocks of the loop to find them. 596 for (BasicBlock *ExitBB : ExitBlocks) { 597 // If there are no PHI nodes in this exit block, then no values defined 598 // inside the loop are used on this path, skip it. 599 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 600 if (!PN) continue; 601 602 unsigned NumPreds = PN->getNumIncomingValues(); 603 604 // Iterate over all of the PHI nodes. 605 BasicBlock::iterator BBI = ExitBB->begin(); 606 while ((PN = dyn_cast<PHINode>(BBI++))) { 607 if (PN->use_empty()) 608 continue; // dead use, don't replace it 609 610 if (!SE->isSCEVable(PN->getType())) 611 continue; 612 613 // It's necessary to tell ScalarEvolution about this explicitly so that 614 // it can walk the def-use list and forget all SCEVs, as it may not be 615 // watching the PHI itself. Once the new exit value is in place, there 616 // may not be a def-use connection between the loop and every instruction 617 // which got a SCEVAddRecExpr for that loop. 618 SE->forgetValue(PN); 619 620 // Iterate over all of the values in all the PHI nodes. 621 for (unsigned i = 0; i != NumPreds; ++i) { 622 // If the value being merged in is not integer or is not defined 623 // in the loop, skip it. 624 Value *InVal = PN->getIncomingValue(i); 625 if (!isa<Instruction>(InVal)) 626 continue; 627 628 // If this pred is for a subloop, not L itself, skip it. 629 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 630 continue; // The Block is in a subloop, skip it. 631 632 // Check that InVal is defined in the loop. 633 Instruction *Inst = cast<Instruction>(InVal); 634 if (!L->contains(Inst)) 635 continue; 636 637 // Okay, this instruction has a user outside of the current loop 638 // and varies predictably *inside* the loop. Evaluate the value it 639 // contains when the loop exits, if possible. We prefer to start with 640 // expressions which are true for all exits (so as to maximize 641 // expression reuse by the SCEVExpander), but resort to per-exit 642 // evaluation if that fails. 643 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 644 if (isa<SCEVCouldNotCompute>(ExitValue) || 645 !SE->isLoopInvariant(ExitValue, L) || 646 !isSafeToExpand(ExitValue, *SE)) { 647 // TODO: This should probably be sunk into SCEV in some way; maybe a 648 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 649 // most SCEV expressions and other recurrence types (e.g. shift 650 // recurrences). Is there existing code we can reuse? 651 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 652 if (isa<SCEVCouldNotCompute>(ExitCount)) 653 continue; 654 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 655 if (AddRec->getLoop() == L) 656 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 657 if (isa<SCEVCouldNotCompute>(ExitValue) || 658 !SE->isLoopInvariant(ExitValue, L) || 659 !isSafeToExpand(ExitValue, *SE)) 660 continue; 661 } 662 663 // Computing the value outside of the loop brings no benefit if it is 664 // definitely used inside the loop in a way which can not be optimized 665 // away. Avoid doing so unless we know we have a value which computes 666 // the ExitValue already. TODO: This should be merged into SCEV 667 // expander to leverage its knowledge of existing expressions. 668 if (ReplaceExitValue != AlwaysRepl && 669 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 670 hasHardUserWithinLoop(L, Inst)) 671 continue; 672 673 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 674 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 675 676 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 677 << '\n' 678 << " LoopVal = " << *Inst << "\n"); 679 680 if (!isValidRewrite(Inst, ExitVal)) { 681 DeadInsts.push_back(ExitVal); 682 continue; 683 } 684 685 #ifndef NDEBUG 686 // If we reuse an instruction from a loop which is neither L nor one of 687 // its containing loops, we end up breaking LCSSA form for this loop by 688 // creating a new use of its instruction. 689 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 690 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 691 if (EVL != L) 692 assert(EVL->contains(L) && "LCSSA breach detected!"); 693 #endif 694 695 // Collect all the candidate PHINodes to be rewritten. 696 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 697 } 698 } 699 } 700 701 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 702 703 bool Changed = false; 704 // Transformation. 705 for (const RewritePhi &Phi : RewritePhiSet) { 706 PHINode *PN = Phi.PN; 707 Value *ExitVal = Phi.Val; 708 709 // Only do the rewrite when the ExitValue can be expanded cheaply. 710 // If LoopCanBeDel is true, rewrite exit value aggressively. 711 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 712 DeadInsts.push_back(ExitVal); 713 continue; 714 } 715 716 Changed = true; 717 ++NumReplaced; 718 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 719 PN->setIncomingValue(Phi.Ith, ExitVal); 720 721 // If this instruction is dead now, delete it. Don't do it now to avoid 722 // invalidating iterators. 723 if (isInstructionTriviallyDead(Inst, TLI)) 724 DeadInsts.push_back(Inst); 725 726 // Replace PN with ExitVal if that is legal and does not break LCSSA. 727 if (PN->getNumIncomingValues() == 1 && 728 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 729 PN->replaceAllUsesWith(ExitVal); 730 PN->eraseFromParent(); 731 } 732 } 733 734 // The insertion point instruction may have been deleted; clear it out 735 // so that the rewriter doesn't trip over it later. 736 Rewriter.clearInsertPoint(); 737 return Changed; 738 } 739 740 //===---------------------------------------------------------------------===// 741 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 742 // they will exit at the first iteration. 743 //===---------------------------------------------------------------------===// 744 745 /// Check to see if this loop has loop invariant conditions which lead to loop 746 /// exits. If so, we know that if the exit path is taken, it is at the first 747 /// loop iteration. This lets us predict exit values of PHI nodes that live in 748 /// loop header. 749 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 750 // Verify the input to the pass is already in LCSSA form. 751 assert(L->isLCSSAForm(*DT)); 752 753 SmallVector<BasicBlock *, 8> ExitBlocks; 754 L->getUniqueExitBlocks(ExitBlocks); 755 756 bool MadeAnyChanges = false; 757 for (auto *ExitBB : ExitBlocks) { 758 // If there are no more PHI nodes in this exit block, then no more 759 // values defined inside the loop are used on this path. 760 for (PHINode &PN : ExitBB->phis()) { 761 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 762 IncomingValIdx != E; ++IncomingValIdx) { 763 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 764 765 // Can we prove that the exit must run on the first iteration if it 766 // runs at all? (i.e. early exits are fine for our purposes, but 767 // traces which lead to this exit being taken on the 2nd iteration 768 // aren't.) Note that this is about whether the exit branch is 769 // executed, not about whether it is taken. 770 if (!L->getLoopLatch() || 771 !DT->dominates(IncomingBB, L->getLoopLatch())) 772 continue; 773 774 // Get condition that leads to the exit path. 775 auto *TermInst = IncomingBB->getTerminator(); 776 777 Value *Cond = nullptr; 778 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 779 // Must be a conditional branch, otherwise the block 780 // should not be in the loop. 781 Cond = BI->getCondition(); 782 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 783 Cond = SI->getCondition(); 784 else 785 continue; 786 787 if (!L->isLoopInvariant(Cond)) 788 continue; 789 790 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 791 792 // Only deal with PHIs in the loop header. 793 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 794 continue; 795 796 // If ExitVal is a PHI on the loop header, then we know its 797 // value along this exit because the exit can only be taken 798 // on the first iteration. 799 auto *LoopPreheader = L->getLoopPreheader(); 800 assert(LoopPreheader && "Invalid loop"); 801 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 802 if (PreheaderIdx != -1) { 803 assert(ExitVal->getParent() == L->getHeader() && 804 "ExitVal must be in loop header"); 805 MadeAnyChanges = true; 806 PN.setIncomingValue(IncomingValIdx, 807 ExitVal->getIncomingValue(PreheaderIdx)); 808 } 809 } 810 } 811 } 812 return MadeAnyChanges; 813 } 814 815 /// Check whether it is possible to delete the loop after rewriting exit 816 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 817 /// aggressively. 818 bool IndVarSimplify::canLoopBeDeleted( 819 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 820 BasicBlock *Preheader = L->getLoopPreheader(); 821 // If there is no preheader, the loop will not be deleted. 822 if (!Preheader) 823 return false; 824 825 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 826 // We obviate multiple ExitingBlocks case for simplicity. 827 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 828 // after exit value rewriting, we can enhance the logic here. 829 SmallVector<BasicBlock *, 4> ExitingBlocks; 830 L->getExitingBlocks(ExitingBlocks); 831 SmallVector<BasicBlock *, 8> ExitBlocks; 832 L->getUniqueExitBlocks(ExitBlocks); 833 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 834 return false; 835 836 BasicBlock *ExitBlock = ExitBlocks[0]; 837 BasicBlock::iterator BI = ExitBlock->begin(); 838 while (PHINode *P = dyn_cast<PHINode>(BI)) { 839 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 840 841 // If the Incoming value of P is found in RewritePhiSet, we know it 842 // could be rewritten to use a loop invariant value in transformation 843 // phase later. Skip it in the loop invariant check below. 844 bool found = false; 845 for (const RewritePhi &Phi : RewritePhiSet) { 846 unsigned i = Phi.Ith; 847 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 848 found = true; 849 break; 850 } 851 } 852 853 Instruction *I; 854 if (!found && (I = dyn_cast<Instruction>(Incoming))) 855 if (!L->hasLoopInvariantOperands(I)) 856 return false; 857 858 ++BI; 859 } 860 861 for (auto *BB : L->blocks()) 862 if (llvm::any_of(*BB, [](Instruction &I) { 863 return I.mayHaveSideEffects(); 864 })) 865 return false; 866 867 return true; 868 } 869 870 //===----------------------------------------------------------------------===// 871 // IV Widening - Extend the width of an IV to cover its widest uses. 872 //===----------------------------------------------------------------------===// 873 874 namespace { 875 876 // Collect information about induction variables that are used by sign/zero 877 // extend operations. This information is recorded by CollectExtend and provides 878 // the input to WidenIV. 879 struct WideIVInfo { 880 PHINode *NarrowIV = nullptr; 881 882 // Widest integer type created [sz]ext 883 Type *WidestNativeType = nullptr; 884 885 // Was a sext user seen before a zext? 886 bool IsSigned = false; 887 }; 888 889 } // end anonymous namespace 890 891 /// Update information about the induction variable that is extended by this 892 /// sign or zero extend operation. This is used to determine the final width of 893 /// the IV before actually widening it. 894 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 895 const TargetTransformInfo *TTI) { 896 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 897 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 898 return; 899 900 Type *Ty = Cast->getType(); 901 uint64_t Width = SE->getTypeSizeInBits(Ty); 902 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 903 return; 904 905 // Check that `Cast` actually extends the induction variable (we rely on this 906 // later). This takes care of cases where `Cast` is extending a truncation of 907 // the narrow induction variable, and thus can end up being narrower than the 908 // "narrow" induction variable. 909 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 910 if (NarrowIVWidth >= Width) 911 return; 912 913 // Cast is either an sext or zext up to this point. 914 // We should not widen an indvar if arithmetics on the wider indvar are more 915 // expensive than those on the narrower indvar. We check only the cost of ADD 916 // because at least an ADD is required to increment the induction variable. We 917 // could compute more comprehensively the cost of all instructions on the 918 // induction variable when necessary. 919 if (TTI && 920 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 921 TTI->getArithmeticInstrCost(Instruction::Add, 922 Cast->getOperand(0)->getType())) { 923 return; 924 } 925 926 if (!WI.WidestNativeType) { 927 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 928 WI.IsSigned = IsSigned; 929 return; 930 } 931 932 // We extend the IV to satisfy the sign of its first user, arbitrarily. 933 if (WI.IsSigned != IsSigned) 934 return; 935 936 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 937 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 938 } 939 940 namespace { 941 942 /// Record a link in the Narrow IV def-use chain along with the WideIV that 943 /// computes the same value as the Narrow IV def. This avoids caching Use* 944 /// pointers. 945 struct NarrowIVDefUse { 946 Instruction *NarrowDef = nullptr; 947 Instruction *NarrowUse = nullptr; 948 Instruction *WideDef = nullptr; 949 950 // True if the narrow def is never negative. Tracking this information lets 951 // us use a sign extension instead of a zero extension or vice versa, when 952 // profitable and legal. 953 bool NeverNegative = false; 954 955 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 956 bool NeverNegative) 957 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 958 NeverNegative(NeverNegative) {} 959 }; 960 961 /// The goal of this transform is to remove sign and zero extends without 962 /// creating any new induction variables. To do this, it creates a new phi of 963 /// the wider type and redirects all users, either removing extends or inserting 964 /// truncs whenever we stop propagating the type. 965 class WidenIV { 966 // Parameters 967 PHINode *OrigPhi; 968 Type *WideType; 969 970 // Context 971 LoopInfo *LI; 972 Loop *L; 973 ScalarEvolution *SE; 974 DominatorTree *DT; 975 976 // Does the module have any calls to the llvm.experimental.guard intrinsic 977 // at all? If not we can avoid scanning instructions looking for guards. 978 bool HasGuards; 979 980 // Result 981 PHINode *WidePhi = nullptr; 982 Instruction *WideInc = nullptr; 983 const SCEV *WideIncExpr = nullptr; 984 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 985 986 SmallPtrSet<Instruction *,16> Widened; 987 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 988 989 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 990 991 // A map tracking the kind of extension used to widen each narrow IV 992 // and narrow IV user. 993 // Key: pointer to a narrow IV or IV user. 994 // Value: the kind of extension used to widen this Instruction. 995 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 996 997 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 998 999 // A map with control-dependent ranges for post increment IV uses. The key is 1000 // a pair of IV def and a use of this def denoting the context. The value is 1001 // a ConstantRange representing possible values of the def at the given 1002 // context. 1003 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 1004 1005 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 1006 Instruction *UseI) { 1007 DefUserPair Key(Def, UseI); 1008 auto It = PostIncRangeInfos.find(Key); 1009 return It == PostIncRangeInfos.end() 1010 ? Optional<ConstantRange>(None) 1011 : Optional<ConstantRange>(It->second); 1012 } 1013 1014 void calculatePostIncRanges(PHINode *OrigPhi); 1015 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1016 1017 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1018 DefUserPair Key(Def, UseI); 1019 auto It = PostIncRangeInfos.find(Key); 1020 if (It == PostIncRangeInfos.end()) 1021 PostIncRangeInfos.insert({Key, R}); 1022 else 1023 It->second = R.intersectWith(It->second); 1024 } 1025 1026 public: 1027 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1028 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1029 bool HasGuards) 1030 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1031 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1032 HasGuards(HasGuards), DeadInsts(DI) { 1033 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1034 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1035 } 1036 1037 PHINode *createWideIV(SCEVExpander &Rewriter); 1038 1039 protected: 1040 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1041 Instruction *Use); 1042 1043 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1044 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1045 const SCEVAddRecExpr *WideAR); 1046 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1047 1048 ExtendKind getExtendKind(Instruction *I); 1049 1050 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1051 1052 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1053 1054 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1055 1056 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1057 unsigned OpCode) const; 1058 1059 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1060 1061 bool widenLoopCompare(NarrowIVDefUse DU); 1062 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1063 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1064 1065 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1066 }; 1067 1068 } // end anonymous namespace 1069 1070 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1071 bool IsSigned, Instruction *Use) { 1072 // Set the debug location and conservative insertion point. 1073 IRBuilder<> Builder(Use); 1074 // Hoist the insertion point into loop preheaders as far as possible. 1075 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1076 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1077 L = L->getParentLoop()) 1078 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1079 1080 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1081 Builder.CreateZExt(NarrowOper, WideType); 1082 } 1083 1084 /// Instantiate a wide operation to replace a narrow operation. This only needs 1085 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1086 /// 0 for any operation we decide not to clone. 1087 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1088 const SCEVAddRecExpr *WideAR) { 1089 unsigned Opcode = DU.NarrowUse->getOpcode(); 1090 switch (Opcode) { 1091 default: 1092 return nullptr; 1093 case Instruction::Add: 1094 case Instruction::Mul: 1095 case Instruction::UDiv: 1096 case Instruction::Sub: 1097 return cloneArithmeticIVUser(DU, WideAR); 1098 1099 case Instruction::And: 1100 case Instruction::Or: 1101 case Instruction::Xor: 1102 case Instruction::Shl: 1103 case Instruction::LShr: 1104 case Instruction::AShr: 1105 return cloneBitwiseIVUser(DU); 1106 } 1107 } 1108 1109 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1110 Instruction *NarrowUse = DU.NarrowUse; 1111 Instruction *NarrowDef = DU.NarrowDef; 1112 Instruction *WideDef = DU.WideDef; 1113 1114 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1115 1116 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1117 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1118 // invariant and will be folded or hoisted. If it actually comes from a 1119 // widened IV, it should be removed during a future call to widenIVUse. 1120 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1121 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1122 ? WideDef 1123 : createExtendInst(NarrowUse->getOperand(0), WideType, 1124 IsSigned, NarrowUse); 1125 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1126 ? WideDef 1127 : createExtendInst(NarrowUse->getOperand(1), WideType, 1128 IsSigned, NarrowUse); 1129 1130 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1131 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1132 NarrowBO->getName()); 1133 IRBuilder<> Builder(NarrowUse); 1134 Builder.Insert(WideBO); 1135 WideBO->copyIRFlags(NarrowBO); 1136 return WideBO; 1137 } 1138 1139 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1140 const SCEVAddRecExpr *WideAR) { 1141 Instruction *NarrowUse = DU.NarrowUse; 1142 Instruction *NarrowDef = DU.NarrowDef; 1143 Instruction *WideDef = DU.WideDef; 1144 1145 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1146 1147 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1148 1149 // We're trying to find X such that 1150 // 1151 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1152 // 1153 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1154 // and check using SCEV if any of them are correct. 1155 1156 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1157 // correct solution to X. 1158 auto GuessNonIVOperand = [&](bool SignExt) { 1159 const SCEV *WideLHS; 1160 const SCEV *WideRHS; 1161 1162 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1163 if (SignExt) 1164 return SE->getSignExtendExpr(S, Ty); 1165 return SE->getZeroExtendExpr(S, Ty); 1166 }; 1167 1168 if (IVOpIdx == 0) { 1169 WideLHS = SE->getSCEV(WideDef); 1170 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1171 WideRHS = GetExtend(NarrowRHS, WideType); 1172 } else { 1173 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1174 WideLHS = GetExtend(NarrowLHS, WideType); 1175 WideRHS = SE->getSCEV(WideDef); 1176 } 1177 1178 // WideUse is "WideDef `op.wide` X" as described in the comment. 1179 const SCEV *WideUse = nullptr; 1180 1181 switch (NarrowUse->getOpcode()) { 1182 default: 1183 llvm_unreachable("No other possibility!"); 1184 1185 case Instruction::Add: 1186 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1187 break; 1188 1189 case Instruction::Mul: 1190 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1191 break; 1192 1193 case Instruction::UDiv: 1194 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1195 break; 1196 1197 case Instruction::Sub: 1198 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1199 break; 1200 } 1201 1202 return WideUse == WideAR; 1203 }; 1204 1205 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1206 if (!GuessNonIVOperand(SignExtend)) { 1207 SignExtend = !SignExtend; 1208 if (!GuessNonIVOperand(SignExtend)) 1209 return nullptr; 1210 } 1211 1212 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1213 ? WideDef 1214 : createExtendInst(NarrowUse->getOperand(0), WideType, 1215 SignExtend, NarrowUse); 1216 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1217 ? WideDef 1218 : createExtendInst(NarrowUse->getOperand(1), WideType, 1219 SignExtend, NarrowUse); 1220 1221 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1222 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1223 NarrowBO->getName()); 1224 1225 IRBuilder<> Builder(NarrowUse); 1226 Builder.Insert(WideBO); 1227 WideBO->copyIRFlags(NarrowBO); 1228 return WideBO; 1229 } 1230 1231 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1232 auto It = ExtendKindMap.find(I); 1233 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1234 return It->second; 1235 } 1236 1237 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1238 unsigned OpCode) const { 1239 if (OpCode == Instruction::Add) 1240 return SE->getAddExpr(LHS, RHS); 1241 if (OpCode == Instruction::Sub) 1242 return SE->getMinusSCEV(LHS, RHS); 1243 if (OpCode == Instruction::Mul) 1244 return SE->getMulExpr(LHS, RHS); 1245 1246 llvm_unreachable("Unsupported opcode."); 1247 } 1248 1249 /// No-wrap operations can transfer sign extension of their result to their 1250 /// operands. Generate the SCEV value for the widened operation without 1251 /// actually modifying the IR yet. If the expression after extending the 1252 /// operands is an AddRec for this loop, return the AddRec and the kind of 1253 /// extension used. 1254 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1255 // Handle the common case of add<nsw/nuw> 1256 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1257 // Only Add/Sub/Mul instructions supported yet. 1258 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1259 OpCode != Instruction::Mul) 1260 return {nullptr, Unknown}; 1261 1262 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1263 // if extending the other will lead to a recurrence. 1264 const unsigned ExtendOperIdx = 1265 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1266 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1267 1268 const SCEV *ExtendOperExpr = nullptr; 1269 const OverflowingBinaryOperator *OBO = 1270 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1271 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1272 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1273 ExtendOperExpr = SE->getSignExtendExpr( 1274 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1275 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1276 ExtendOperExpr = SE->getZeroExtendExpr( 1277 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1278 else 1279 return {nullptr, Unknown}; 1280 1281 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1282 // flags. This instruction may be guarded by control flow that the no-wrap 1283 // behavior depends on. Non-control-equivalent instructions can be mapped to 1284 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1285 // semantics to those operations. 1286 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1287 const SCEV *rhs = ExtendOperExpr; 1288 1289 // Let's swap operands to the initial order for the case of non-commutative 1290 // operations, like SUB. See PR21014. 1291 if (ExtendOperIdx == 0) 1292 std::swap(lhs, rhs); 1293 const SCEVAddRecExpr *AddRec = 1294 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1295 1296 if (!AddRec || AddRec->getLoop() != L) 1297 return {nullptr, Unknown}; 1298 1299 return {AddRec, ExtKind}; 1300 } 1301 1302 /// Is this instruction potentially interesting for further simplification after 1303 /// widening it's type? In other words, can the extend be safely hoisted out of 1304 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1305 /// so, return the extended recurrence and the kind of extension used. Otherwise 1306 /// return {nullptr, Unknown}. 1307 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1308 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1309 return {nullptr, Unknown}; 1310 1311 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1312 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1313 SE->getTypeSizeInBits(WideType)) { 1314 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1315 // index. So don't follow this use. 1316 return {nullptr, Unknown}; 1317 } 1318 1319 const SCEV *WideExpr; 1320 ExtendKind ExtKind; 1321 if (DU.NeverNegative) { 1322 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1323 if (isa<SCEVAddRecExpr>(WideExpr)) 1324 ExtKind = SignExtended; 1325 else { 1326 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1327 ExtKind = ZeroExtended; 1328 } 1329 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1330 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1331 ExtKind = SignExtended; 1332 } else { 1333 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1334 ExtKind = ZeroExtended; 1335 } 1336 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1337 if (!AddRec || AddRec->getLoop() != L) 1338 return {nullptr, Unknown}; 1339 return {AddRec, ExtKind}; 1340 } 1341 1342 /// This IV user cannot be widened. Replace this use of the original narrow IV 1343 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1344 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1345 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1346 if (!InsertPt) 1347 return; 1348 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1349 << *DU.NarrowUse << "\n"); 1350 IRBuilder<> Builder(InsertPt); 1351 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1352 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1353 } 1354 1355 /// If the narrow use is a compare instruction, then widen the compare 1356 // (and possibly the other operand). The extend operation is hoisted into the 1357 // loop preheader as far as possible. 1358 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1359 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1360 if (!Cmp) 1361 return false; 1362 1363 // We can legally widen the comparison in the following two cases: 1364 // 1365 // - The signedness of the IV extension and comparison match 1366 // 1367 // - The narrow IV is always positive (and thus its sign extension is equal 1368 // to its zero extension). For instance, let's say we're zero extending 1369 // %narrow for the following use 1370 // 1371 // icmp slt i32 %narrow, %val ... (A) 1372 // 1373 // and %narrow is always positive. Then 1374 // 1375 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1376 // == icmp slt i32 zext(%narrow), sext(%val) 1377 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1378 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1379 return false; 1380 1381 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1382 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1383 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1384 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1385 1386 // Widen the compare instruction. 1387 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1388 if (!InsertPt) 1389 return false; 1390 IRBuilder<> Builder(InsertPt); 1391 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1392 1393 // Widen the other operand of the compare, if necessary. 1394 if (CastWidth < IVWidth) { 1395 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1396 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1397 } 1398 return true; 1399 } 1400 1401 /// If the narrow use is an instruction whose two operands are the defining 1402 /// instruction of DU and a load instruction, then we have the following: 1403 /// if the load is hoisted outside the loop, then we do not reach this function 1404 /// as scalar evolution analysis works fine in widenIVUse with variables 1405 /// hoisted outside the loop and efficient code is subsequently generated by 1406 /// not emitting truncate instructions. But when the load is not hoisted 1407 /// (whether due to limitation in alias analysis or due to a true legality), 1408 /// then scalar evolution can not proceed with loop variant values and 1409 /// inefficient code is generated. This function handles the non-hoisted load 1410 /// special case by making the optimization generate the same type of code for 1411 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1412 /// instruction). This special case is important especially when the induction 1413 /// variables are affecting addressing mode in code generation. 1414 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1415 Instruction *NarrowUse = DU.NarrowUse; 1416 Instruction *NarrowDef = DU.NarrowDef; 1417 Instruction *WideDef = DU.WideDef; 1418 1419 // Handle the common case of add<nsw/nuw> 1420 const unsigned OpCode = NarrowUse->getOpcode(); 1421 // Only Add/Sub/Mul instructions are supported. 1422 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1423 OpCode != Instruction::Mul) 1424 return false; 1425 1426 // The operand that is not defined by NarrowDef of DU. Let's call it the 1427 // other operand. 1428 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1429 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1430 "bad DU"); 1431 1432 const SCEV *ExtendOperExpr = nullptr; 1433 const OverflowingBinaryOperator *OBO = 1434 cast<OverflowingBinaryOperator>(NarrowUse); 1435 ExtendKind ExtKind = getExtendKind(NarrowDef); 1436 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1437 ExtendOperExpr = SE->getSignExtendExpr( 1438 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1439 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1440 ExtendOperExpr = SE->getZeroExtendExpr( 1441 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1442 else 1443 return false; 1444 1445 // We are interested in the other operand being a load instruction. 1446 // But, we should look into relaxing this restriction later on. 1447 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1448 if (I && I->getOpcode() != Instruction::Load) 1449 return false; 1450 1451 // Verifying that Defining operand is an AddRec 1452 const SCEV *Op1 = SE->getSCEV(WideDef); 1453 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1454 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1455 return false; 1456 // Verifying that other operand is an Extend. 1457 if (ExtKind == SignExtended) { 1458 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1459 return false; 1460 } else { 1461 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1462 return false; 1463 } 1464 1465 if (ExtKind == SignExtended) { 1466 for (Use &U : NarrowUse->uses()) { 1467 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1468 if (!User || User->getType() != WideType) 1469 return false; 1470 } 1471 } else { // ExtKind == ZeroExtended 1472 for (Use &U : NarrowUse->uses()) { 1473 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1474 if (!User || User->getType() != WideType) 1475 return false; 1476 } 1477 } 1478 1479 return true; 1480 } 1481 1482 /// Special Case for widening with variant Loads (see 1483 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1484 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1485 Instruction *NarrowUse = DU.NarrowUse; 1486 Instruction *NarrowDef = DU.NarrowDef; 1487 Instruction *WideDef = DU.WideDef; 1488 1489 ExtendKind ExtKind = getExtendKind(NarrowDef); 1490 1491 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1492 1493 // Generating a widening use instruction. 1494 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1495 ? WideDef 1496 : createExtendInst(NarrowUse->getOperand(0), WideType, 1497 ExtKind, NarrowUse); 1498 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1499 ? WideDef 1500 : createExtendInst(NarrowUse->getOperand(1), WideType, 1501 ExtKind, NarrowUse); 1502 1503 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1504 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1505 NarrowBO->getName()); 1506 IRBuilder<> Builder(NarrowUse); 1507 Builder.Insert(WideBO); 1508 WideBO->copyIRFlags(NarrowBO); 1509 1510 if (ExtKind == SignExtended) 1511 ExtendKindMap[NarrowUse] = SignExtended; 1512 else 1513 ExtendKindMap[NarrowUse] = ZeroExtended; 1514 1515 // Update the Use. 1516 if (ExtKind == SignExtended) { 1517 for (Use &U : NarrowUse->uses()) { 1518 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1519 if (User && User->getType() == WideType) { 1520 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1521 << *WideBO << "\n"); 1522 ++NumElimExt; 1523 User->replaceAllUsesWith(WideBO); 1524 DeadInsts.emplace_back(User); 1525 } 1526 } 1527 } else { // ExtKind == ZeroExtended 1528 for (Use &U : NarrowUse->uses()) { 1529 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1530 if (User && User->getType() == WideType) { 1531 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1532 << *WideBO << "\n"); 1533 ++NumElimExt; 1534 User->replaceAllUsesWith(WideBO); 1535 DeadInsts.emplace_back(User); 1536 } 1537 } 1538 } 1539 } 1540 1541 /// Determine whether an individual user of the narrow IV can be widened. If so, 1542 /// return the wide clone of the user. 1543 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1544 assert(ExtendKindMap.count(DU.NarrowDef) && 1545 "Should already know the kind of extension used to widen NarrowDef"); 1546 1547 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1548 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1549 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1550 // For LCSSA phis, sink the truncate outside the loop. 1551 // After SimplifyCFG most loop exit targets have a single predecessor. 1552 // Otherwise fall back to a truncate within the loop. 1553 if (UsePhi->getNumOperands() != 1) 1554 truncateIVUse(DU, DT, LI); 1555 else { 1556 // Widening the PHI requires us to insert a trunc. The logical place 1557 // for this trunc is in the same BB as the PHI. This is not possible if 1558 // the BB is terminated by a catchswitch. 1559 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1560 return nullptr; 1561 1562 PHINode *WidePhi = 1563 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1564 UsePhi); 1565 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1566 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1567 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1568 UsePhi->replaceAllUsesWith(Trunc); 1569 DeadInsts.emplace_back(UsePhi); 1570 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1571 << *WidePhi << "\n"); 1572 } 1573 return nullptr; 1574 } 1575 } 1576 1577 // This narrow use can be widened by a sext if it's non-negative or its narrow 1578 // def was widended by a sext. Same for zext. 1579 auto canWidenBySExt = [&]() { 1580 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1581 }; 1582 auto canWidenByZExt = [&]() { 1583 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1584 }; 1585 1586 // Our raison d'etre! Eliminate sign and zero extension. 1587 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1588 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1589 Value *NewDef = DU.WideDef; 1590 if (DU.NarrowUse->getType() != WideType) { 1591 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1592 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1593 if (CastWidth < IVWidth) { 1594 // The cast isn't as wide as the IV, so insert a Trunc. 1595 IRBuilder<> Builder(DU.NarrowUse); 1596 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1597 } 1598 else { 1599 // A wider extend was hidden behind a narrower one. This may induce 1600 // another round of IV widening in which the intermediate IV becomes 1601 // dead. It should be very rare. 1602 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1603 << " not wide enough to subsume " << *DU.NarrowUse 1604 << "\n"); 1605 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1606 NewDef = DU.NarrowUse; 1607 } 1608 } 1609 if (NewDef != DU.NarrowUse) { 1610 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1611 << " replaced by " << *DU.WideDef << "\n"); 1612 ++NumElimExt; 1613 DU.NarrowUse->replaceAllUsesWith(NewDef); 1614 DeadInsts.emplace_back(DU.NarrowUse); 1615 } 1616 // Now that the extend is gone, we want to expose it's uses for potential 1617 // further simplification. We don't need to directly inform SimplifyIVUsers 1618 // of the new users, because their parent IV will be processed later as a 1619 // new loop phi. If we preserved IVUsers analysis, we would also want to 1620 // push the uses of WideDef here. 1621 1622 // No further widening is needed. The deceased [sz]ext had done it for us. 1623 return nullptr; 1624 } 1625 1626 // Does this user itself evaluate to a recurrence after widening? 1627 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1628 if (!WideAddRec.first) 1629 WideAddRec = getWideRecurrence(DU); 1630 1631 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1632 if (!WideAddRec.first) { 1633 // If use is a loop condition, try to promote the condition instead of 1634 // truncating the IV first. 1635 if (widenLoopCompare(DU)) 1636 return nullptr; 1637 1638 // We are here about to generate a truncate instruction that may hurt 1639 // performance because the scalar evolution expression computed earlier 1640 // in WideAddRec.first does not indicate a polynomial induction expression. 1641 // In that case, look at the operands of the use instruction to determine 1642 // if we can still widen the use instead of truncating its operand. 1643 if (widenWithVariantLoadUse(DU)) { 1644 widenWithVariantLoadUseCodegen(DU); 1645 return nullptr; 1646 } 1647 1648 // This user does not evaluate to a recurrence after widening, so don't 1649 // follow it. Instead insert a Trunc to kill off the original use, 1650 // eventually isolating the original narrow IV so it can be removed. 1651 truncateIVUse(DU, DT, LI); 1652 return nullptr; 1653 } 1654 // Assume block terminators cannot evaluate to a recurrence. We can't to 1655 // insert a Trunc after a terminator if there happens to be a critical edge. 1656 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1657 "SCEV is not expected to evaluate a block terminator"); 1658 1659 // Reuse the IV increment that SCEVExpander created as long as it dominates 1660 // NarrowUse. 1661 Instruction *WideUse = nullptr; 1662 if (WideAddRec.first == WideIncExpr && 1663 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1664 WideUse = WideInc; 1665 else { 1666 WideUse = cloneIVUser(DU, WideAddRec.first); 1667 if (!WideUse) 1668 return nullptr; 1669 } 1670 // Evaluation of WideAddRec ensured that the narrow expression could be 1671 // extended outside the loop without overflow. This suggests that the wide use 1672 // evaluates to the same expression as the extended narrow use, but doesn't 1673 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1674 // where it fails, we simply throw away the newly created wide use. 1675 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1676 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1677 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1678 << "\n"); 1679 DeadInsts.emplace_back(WideUse); 1680 return nullptr; 1681 } 1682 1683 // if we reached this point then we are going to replace 1684 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1685 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1686 1687 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1688 // Returning WideUse pushes it on the worklist. 1689 return WideUse; 1690 } 1691 1692 /// Add eligible users of NarrowDef to NarrowIVUsers. 1693 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1694 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1695 bool NonNegativeDef = 1696 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1697 SE->getConstant(NarrowSCEV->getType(), 0)); 1698 for (User *U : NarrowDef->users()) { 1699 Instruction *NarrowUser = cast<Instruction>(U); 1700 1701 // Handle data flow merges and bizarre phi cycles. 1702 if (!Widened.insert(NarrowUser).second) 1703 continue; 1704 1705 bool NonNegativeUse = false; 1706 if (!NonNegativeDef) { 1707 // We might have a control-dependent range information for this context. 1708 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1709 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1710 } 1711 1712 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1713 NonNegativeDef || NonNegativeUse); 1714 } 1715 } 1716 1717 /// Process a single induction variable. First use the SCEVExpander to create a 1718 /// wide induction variable that evaluates to the same recurrence as the 1719 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1720 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1721 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1722 /// 1723 /// It would be simpler to delete uses as they are processed, but we must avoid 1724 /// invalidating SCEV expressions. 1725 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1726 // Is this phi an induction variable? 1727 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1728 if (!AddRec) 1729 return nullptr; 1730 1731 // Widen the induction variable expression. 1732 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1733 ? SE->getSignExtendExpr(AddRec, WideType) 1734 : SE->getZeroExtendExpr(AddRec, WideType); 1735 1736 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1737 "Expect the new IV expression to preserve its type"); 1738 1739 // Can the IV be extended outside the loop without overflow? 1740 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1741 if (!AddRec || AddRec->getLoop() != L) 1742 return nullptr; 1743 1744 // An AddRec must have loop-invariant operands. Since this AddRec is 1745 // materialized by a loop header phi, the expression cannot have any post-loop 1746 // operands, so they must dominate the loop header. 1747 assert( 1748 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1749 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1750 "Loop header phi recurrence inputs do not dominate the loop"); 1751 1752 // Iterate over IV uses (including transitive ones) looking for IV increments 1753 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1754 // the increment calculate control-dependent range information basing on 1755 // dominating conditions inside of the loop (e.g. a range check inside of the 1756 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1757 // 1758 // Control-dependent range information is later used to prove that a narrow 1759 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1760 // this on demand because when pushNarrowIVUsers needs this information some 1761 // of the dominating conditions might be already widened. 1762 if (UsePostIncrementRanges) 1763 calculatePostIncRanges(OrigPhi); 1764 1765 // The rewriter provides a value for the desired IV expression. This may 1766 // either find an existing phi or materialize a new one. Either way, we 1767 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1768 // of the phi-SCC dominates the loop entry. 1769 Instruction *InsertPt = &L->getHeader()->front(); 1770 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1771 1772 // Remembering the WideIV increment generated by SCEVExpander allows 1773 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1774 // employ a general reuse mechanism because the call above is the only call to 1775 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1776 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1777 WideInc = 1778 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1779 WideIncExpr = SE->getSCEV(WideInc); 1780 // Propagate the debug location associated with the original loop increment 1781 // to the new (widened) increment. 1782 auto *OrigInc = 1783 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1784 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1785 } 1786 1787 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1788 ++NumWidened; 1789 1790 // Traverse the def-use chain using a worklist starting at the original IV. 1791 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1792 1793 Widened.insert(OrigPhi); 1794 pushNarrowIVUsers(OrigPhi, WidePhi); 1795 1796 while (!NarrowIVUsers.empty()) { 1797 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1798 1799 // Process a def-use edge. This may replace the use, so don't hold a 1800 // use_iterator across it. 1801 Instruction *WideUse = widenIVUse(DU, Rewriter); 1802 1803 // Follow all def-use edges from the previous narrow use. 1804 if (WideUse) 1805 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1806 1807 // widenIVUse may have removed the def-use edge. 1808 if (DU.NarrowDef->use_empty()) 1809 DeadInsts.emplace_back(DU.NarrowDef); 1810 } 1811 1812 // Attach any debug information to the new PHI. 1813 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1814 1815 return WidePhi; 1816 } 1817 1818 /// Calculates control-dependent range for the given def at the given context 1819 /// by looking at dominating conditions inside of the loop 1820 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1821 Instruction *NarrowUser) { 1822 using namespace llvm::PatternMatch; 1823 1824 Value *NarrowDefLHS; 1825 const APInt *NarrowDefRHS; 1826 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1827 m_APInt(NarrowDefRHS))) || 1828 !NarrowDefRHS->isNonNegative()) 1829 return; 1830 1831 auto UpdateRangeFromCondition = [&] (Value *Condition, 1832 bool TrueDest) { 1833 CmpInst::Predicate Pred; 1834 Value *CmpRHS; 1835 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1836 m_Value(CmpRHS)))) 1837 return; 1838 1839 CmpInst::Predicate P = 1840 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1841 1842 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1843 auto CmpConstrainedLHSRange = 1844 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1845 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1846 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1847 1848 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1849 }; 1850 1851 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1852 if (!HasGuards) 1853 return; 1854 1855 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1856 Ctx->getParent()->rend())) { 1857 Value *C = nullptr; 1858 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1859 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1860 } 1861 }; 1862 1863 UpdateRangeFromGuards(NarrowUser); 1864 1865 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1866 // If NarrowUserBB is statically unreachable asking dominator queries may 1867 // yield surprising results. (e.g. the block may not have a dom tree node) 1868 if (!DT->isReachableFromEntry(NarrowUserBB)) 1869 return; 1870 1871 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1872 L->contains(DTB->getBlock()); 1873 DTB = DTB->getIDom()) { 1874 auto *BB = DTB->getBlock(); 1875 auto *TI = BB->getTerminator(); 1876 UpdateRangeFromGuards(TI); 1877 1878 auto *BI = dyn_cast<BranchInst>(TI); 1879 if (!BI || !BI->isConditional()) 1880 continue; 1881 1882 auto *TrueSuccessor = BI->getSuccessor(0); 1883 auto *FalseSuccessor = BI->getSuccessor(1); 1884 1885 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1886 return BBE.isSingleEdge() && 1887 DT->dominates(BBE, NarrowUser->getParent()); 1888 }; 1889 1890 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1891 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1892 1893 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1894 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1895 } 1896 } 1897 1898 /// Calculates PostIncRangeInfos map for the given IV 1899 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1900 SmallPtrSet<Instruction *, 16> Visited; 1901 SmallVector<Instruction *, 6> Worklist; 1902 Worklist.push_back(OrigPhi); 1903 Visited.insert(OrigPhi); 1904 1905 while (!Worklist.empty()) { 1906 Instruction *NarrowDef = Worklist.pop_back_val(); 1907 1908 for (Use &U : NarrowDef->uses()) { 1909 auto *NarrowUser = cast<Instruction>(U.getUser()); 1910 1911 // Don't go looking outside the current loop. 1912 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1913 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1914 continue; 1915 1916 if (!Visited.insert(NarrowUser).second) 1917 continue; 1918 1919 Worklist.push_back(NarrowUser); 1920 1921 calculatePostIncRange(NarrowDef, NarrowUser); 1922 } 1923 } 1924 } 1925 1926 //===----------------------------------------------------------------------===// 1927 // Live IV Reduction - Minimize IVs live across the loop. 1928 //===----------------------------------------------------------------------===// 1929 1930 //===----------------------------------------------------------------------===// 1931 // Simplification of IV users based on SCEV evaluation. 1932 //===----------------------------------------------------------------------===// 1933 1934 namespace { 1935 1936 class IndVarSimplifyVisitor : public IVVisitor { 1937 ScalarEvolution *SE; 1938 const TargetTransformInfo *TTI; 1939 PHINode *IVPhi; 1940 1941 public: 1942 WideIVInfo WI; 1943 1944 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1945 const TargetTransformInfo *TTI, 1946 const DominatorTree *DTree) 1947 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1948 DT = DTree; 1949 WI.NarrowIV = IVPhi; 1950 } 1951 1952 // Implement the interface used by simplifyUsersOfIV. 1953 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1954 }; 1955 1956 } // end anonymous namespace 1957 1958 /// Iteratively perform simplification on a worklist of IV users. Each 1959 /// successive simplification may push more users which may themselves be 1960 /// candidates for simplification. 1961 /// 1962 /// Sign/Zero extend elimination is interleaved with IV simplification. 1963 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1964 SCEVExpander &Rewriter, 1965 LoopInfo *LI) { 1966 SmallVector<WideIVInfo, 8> WideIVs; 1967 1968 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1969 Intrinsic::getName(Intrinsic::experimental_guard)); 1970 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1971 1972 SmallVector<PHINode*, 8> LoopPhis; 1973 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1974 LoopPhis.push_back(cast<PHINode>(I)); 1975 } 1976 // Each round of simplification iterates through the SimplifyIVUsers worklist 1977 // for all current phis, then determines whether any IVs can be 1978 // widened. Widening adds new phis to LoopPhis, inducing another round of 1979 // simplification on the wide IVs. 1980 bool Changed = false; 1981 while (!LoopPhis.empty()) { 1982 // Evaluate as many IV expressions as possible before widening any IVs. This 1983 // forces SCEV to set no-wrap flags before evaluating sign/zero 1984 // extension. The first time SCEV attempts to normalize sign/zero extension, 1985 // the result becomes final. So for the most predictable results, we delay 1986 // evaluation of sign/zero extend evaluation until needed, and avoid running 1987 // other SCEV based analysis prior to simplifyAndExtend. 1988 do { 1989 PHINode *CurrIV = LoopPhis.pop_back_val(); 1990 1991 // Information about sign/zero extensions of CurrIV. 1992 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1993 1994 Changed |= 1995 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1996 1997 if (Visitor.WI.WidestNativeType) { 1998 WideIVs.push_back(Visitor.WI); 1999 } 2000 } while(!LoopPhis.empty()); 2001 2002 for (; !WideIVs.empty(); WideIVs.pop_back()) { 2003 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 2004 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 2005 Changed = true; 2006 LoopPhis.push_back(WidePhi); 2007 } 2008 } 2009 } 2010 return Changed; 2011 } 2012 2013 //===----------------------------------------------------------------------===// 2014 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2015 //===----------------------------------------------------------------------===// 2016 2017 /// Given an Value which is hoped to be part of an add recurance in the given 2018 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2019 /// that this is less general than SCEVs AddRec checking. 2020 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2021 Instruction *IncI = dyn_cast<Instruction>(IncV); 2022 if (!IncI) 2023 return nullptr; 2024 2025 switch (IncI->getOpcode()) { 2026 case Instruction::Add: 2027 case Instruction::Sub: 2028 break; 2029 case Instruction::GetElementPtr: 2030 // An IV counter must preserve its type. 2031 if (IncI->getNumOperands() == 2) 2032 break; 2033 LLVM_FALLTHROUGH; 2034 default: 2035 return nullptr; 2036 } 2037 2038 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2039 if (Phi && Phi->getParent() == L->getHeader()) { 2040 if (L->isLoopInvariant(IncI->getOperand(1))) 2041 return Phi; 2042 return nullptr; 2043 } 2044 if (IncI->getOpcode() == Instruction::GetElementPtr) 2045 return nullptr; 2046 2047 // Allow add/sub to be commuted. 2048 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2049 if (Phi && Phi->getParent() == L->getHeader()) { 2050 if (L->isLoopInvariant(IncI->getOperand(0))) 2051 return Phi; 2052 } 2053 return nullptr; 2054 } 2055 2056 /// Whether the current loop exit test is based on this value. Currently this 2057 /// is limited to a direct use in the loop condition. 2058 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2059 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2060 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2061 // TODO: Allow non-icmp loop test. 2062 if (!ICmp) 2063 return false; 2064 2065 // TODO: Allow indirect use. 2066 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2067 } 2068 2069 /// linearFunctionTestReplace policy. Return true unless we can show that the 2070 /// current exit test is already sufficiently canonical. 2071 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2072 assert(L->getLoopLatch() && "Must be in simplified form"); 2073 2074 // Avoid converting a constant or loop invariant test back to a runtime 2075 // test. This is critical for when SCEV's cached ExitCount is less precise 2076 // than the current IR (such as after we've proven a particular exit is 2077 // actually dead and thus the BE count never reaches our ExitCount.) 2078 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2079 if (L->isLoopInvariant(BI->getCondition())) 2080 return false; 2081 2082 // Do LFTR to simplify the exit condition to an ICMP. 2083 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2084 if (!Cond) 2085 return true; 2086 2087 // Do LFTR to simplify the exit ICMP to EQ/NE 2088 ICmpInst::Predicate Pred = Cond->getPredicate(); 2089 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2090 return true; 2091 2092 // Look for a loop invariant RHS 2093 Value *LHS = Cond->getOperand(0); 2094 Value *RHS = Cond->getOperand(1); 2095 if (!L->isLoopInvariant(RHS)) { 2096 if (!L->isLoopInvariant(LHS)) 2097 return true; 2098 std::swap(LHS, RHS); 2099 } 2100 // Look for a simple IV counter LHS 2101 PHINode *Phi = dyn_cast<PHINode>(LHS); 2102 if (!Phi) 2103 Phi = getLoopPhiForCounter(LHS, L); 2104 2105 if (!Phi) 2106 return true; 2107 2108 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2109 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2110 if (Idx < 0) 2111 return true; 2112 2113 // Do LFTR if the exit condition's IV is *not* a simple counter. 2114 Value *IncV = Phi->getIncomingValue(Idx); 2115 return Phi != getLoopPhiForCounter(IncV, L); 2116 } 2117 2118 /// Return true if undefined behavior would provable be executed on the path to 2119 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2120 /// anything about whether OnPathTo is actually executed or whether Root is 2121 /// actually poison. This can be used to assess whether a new use of Root can 2122 /// be added at a location which is control equivalent with OnPathTo (such as 2123 /// immediately before it) without introducing UB which didn't previously 2124 /// exist. Note that a false result conveys no information. 2125 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2126 Instruction *OnPathTo, 2127 DominatorTree *DT) { 2128 // Basic approach is to assume Root is poison, propagate poison forward 2129 // through all users we can easily track, and then check whether any of those 2130 // users are provable UB and must execute before out exiting block might 2131 // exit. 2132 2133 // The set of all recursive users we've visited (which are assumed to all be 2134 // poison because of said visit) 2135 SmallSet<const Value *, 16> KnownPoison; 2136 SmallVector<const Instruction*, 16> Worklist; 2137 Worklist.push_back(Root); 2138 while (!Worklist.empty()) { 2139 const Instruction *I = Worklist.pop_back_val(); 2140 2141 // If we know this must trigger UB on a path leading our target. 2142 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2143 return true; 2144 2145 // If we can't analyze propagation through this instruction, just skip it 2146 // and transitive users. Safe as false is a conservative result. 2147 if (!propagatesFullPoison(I) && I != Root) 2148 continue; 2149 2150 if (KnownPoison.insert(I).second) 2151 for (const User *User : I->users()) 2152 Worklist.push_back(cast<Instruction>(User)); 2153 } 2154 2155 // Might be non-UB, or might have a path we couldn't prove must execute on 2156 // way to exiting bb. 2157 return false; 2158 } 2159 2160 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2161 /// down to checking that all operands are constant and listing instructions 2162 /// that may hide undef. 2163 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2164 unsigned Depth) { 2165 if (isa<Constant>(V)) 2166 return !isa<UndefValue>(V); 2167 2168 if (Depth >= 6) 2169 return false; 2170 2171 // Conservatively handle non-constant non-instructions. For example, Arguments 2172 // may be undef. 2173 Instruction *I = dyn_cast<Instruction>(V); 2174 if (!I) 2175 return false; 2176 2177 // Load and return values may be undef. 2178 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2179 return false; 2180 2181 // Optimistically handle other instructions. 2182 for (Value *Op : I->operands()) { 2183 if (!Visited.insert(Op).second) 2184 continue; 2185 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2186 return false; 2187 } 2188 return true; 2189 } 2190 2191 /// Return true if the given value is concrete. We must prove that undef can 2192 /// never reach it. 2193 /// 2194 /// TODO: If we decide that this is a good approach to checking for undef, we 2195 /// may factor it into a common location. 2196 static bool hasConcreteDef(Value *V) { 2197 SmallPtrSet<Value*, 8> Visited; 2198 Visited.insert(V); 2199 return hasConcreteDefImpl(V, Visited, 0); 2200 } 2201 2202 /// Return true if this IV has any uses other than the (soon to be rewritten) 2203 /// loop exit test. 2204 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2205 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2206 Value *IncV = Phi->getIncomingValue(LatchIdx); 2207 2208 for (User *U : Phi->users()) 2209 if (U != Cond && U != IncV) return false; 2210 2211 for (User *U : IncV->users()) 2212 if (U != Cond && U != Phi) return false; 2213 return true; 2214 } 2215 2216 /// Return true if the given phi is a "counter" in L. A counter is an 2217 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2218 /// step of 1. Note that L must have exactly one latch. 2219 static bool isLoopCounter(PHINode* Phi, Loop *L, 2220 ScalarEvolution *SE) { 2221 assert(Phi->getParent() == L->getHeader()); 2222 assert(L->getLoopLatch()); 2223 2224 if (!SE->isSCEVable(Phi->getType())) 2225 return false; 2226 2227 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2228 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2229 return false; 2230 2231 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2232 if (!Step || !Step->isOne()) 2233 return false; 2234 2235 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2236 Value *IncV = Phi->getIncomingValue(LatchIdx); 2237 return (getLoopPhiForCounter(IncV, L) == Phi); 2238 } 2239 2240 /// Search the loop header for a loop counter (anadd rec w/step of one) 2241 /// suitable for use by LFTR. If multiple counters are available, select the 2242 /// "best" one based profitable heuristics. 2243 /// 2244 /// BECount may be an i8* pointer type. The pointer difference is already 2245 /// valid count without scaling the address stride, so it remains a pointer 2246 /// expression as far as SCEV is concerned. 2247 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2248 const SCEV *BECount, 2249 ScalarEvolution *SE, DominatorTree *DT) { 2250 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2251 2252 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2253 2254 // Loop over all of the PHI nodes, looking for a simple counter. 2255 PHINode *BestPhi = nullptr; 2256 const SCEV *BestInit = nullptr; 2257 BasicBlock *LatchBlock = L->getLoopLatch(); 2258 assert(LatchBlock && "Must be in simplified form"); 2259 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2260 2261 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2262 PHINode *Phi = cast<PHINode>(I); 2263 if (!isLoopCounter(Phi, L, SE)) 2264 continue; 2265 2266 // Avoid comparing an integer IV against a pointer Limit. 2267 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2268 continue; 2269 2270 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2271 2272 // AR may be a pointer type, while BECount is an integer type. 2273 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2274 // AR may not be a narrower type, or we may never exit. 2275 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2276 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2277 continue; 2278 2279 // Avoid reusing a potentially undef value to compute other values that may 2280 // have originally had a concrete definition. 2281 if (!hasConcreteDef(Phi)) { 2282 // We explicitly allow unknown phis as long as they are already used by 2283 // the loop exit test. This is legal since performing LFTR could not 2284 // increase the number of undef users. 2285 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2286 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2287 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2288 continue; 2289 } 2290 2291 // Avoid introducing undefined behavior due to poison which didn't exist in 2292 // the original program. (Annoyingly, the rules for poison and undef 2293 // propagation are distinct, so this does NOT cover the undef case above.) 2294 // We have to ensure that we don't introduce UB by introducing a use on an 2295 // iteration where said IV produces poison. Our strategy here differs for 2296 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2297 // see code in linearFunctionTestReplace. For pointers, we restrict 2298 // transforms as there is no good way to reinfer inbounds once lost. 2299 if (!Phi->getType()->isIntegerTy() && 2300 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2301 continue; 2302 2303 const SCEV *Init = AR->getStart(); 2304 2305 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2306 // Don't force a live loop counter if another IV can be used. 2307 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2308 continue; 2309 2310 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2311 // also prefers integer to pointer IVs. 2312 if (BestInit->isZero() != Init->isZero()) { 2313 if (BestInit->isZero()) 2314 continue; 2315 } 2316 // If two IVs both count from zero or both count from nonzero then the 2317 // narrower is likely a dead phi that has been widened. Use the wider phi 2318 // to allow the other to be eliminated. 2319 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2320 continue; 2321 } 2322 BestPhi = Phi; 2323 BestInit = Init; 2324 } 2325 return BestPhi; 2326 } 2327 2328 /// Insert an IR expression which computes the value held by the IV IndVar 2329 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2330 /// is taken ExitCount times. 2331 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2332 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2333 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2334 assert(isLoopCounter(IndVar, L, SE)); 2335 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2336 const SCEV *IVInit = AR->getStart(); 2337 2338 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2339 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2340 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2341 // the existing GEPs whenever possible. 2342 if (IndVar->getType()->isPointerTy() && 2343 !ExitCount->getType()->isPointerTy()) { 2344 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2345 // signed value. ExitCount on the other hand represents the loop trip count, 2346 // which is an unsigned value. FindLoopCounter only allows induction 2347 // variables that have a positive unit stride of one. This means we don't 2348 // have to handle the case of negative offsets (yet) and just need to zero 2349 // extend ExitCount. 2350 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2351 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2352 if (UsePostInc) 2353 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2354 2355 // Expand the code for the iteration count. 2356 assert(SE->isLoopInvariant(IVOffset, L) && 2357 "Computed iteration count is not loop invariant!"); 2358 2359 // We could handle pointer IVs other than i8*, but we need to compensate for 2360 // gep index scaling. 2361 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2362 cast<PointerType>(IndVar->getType()) 2363 ->getElementType())->isOne() && 2364 "unit stride pointer IV must be i8*"); 2365 2366 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2367 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2368 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2369 } else { 2370 // In any other case, convert both IVInit and ExitCount to integers before 2371 // comparing. This may result in SCEV expansion of pointers, but in practice 2372 // SCEV will fold the pointer arithmetic away as such: 2373 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2374 // 2375 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2376 // for simple memset-style loops. 2377 // 2378 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2379 // were generated on top of case #2, which is not expected. 2380 2381 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2382 // For unit stride, IVCount = Start + ExitCount with 2's complement 2383 // overflow. 2384 2385 // For integer IVs, truncate the IV before computing IVInit + BECount, 2386 // unless we know apriori that the limit must be a constant when evaluated 2387 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2388 // of the IV in the loop over a (potentially) expensive expansion of the 2389 // widened exit count add(zext(add)) expression. 2390 if (SE->getTypeSizeInBits(IVInit->getType()) 2391 > SE->getTypeSizeInBits(ExitCount->getType())) { 2392 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2393 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2394 else 2395 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2396 } 2397 2398 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2399 2400 if (UsePostInc) 2401 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2402 2403 // Expand the code for the iteration count. 2404 assert(SE->isLoopInvariant(IVLimit, L) && 2405 "Computed iteration count is not loop invariant!"); 2406 // Ensure that we generate the same type as IndVar, or a smaller integer 2407 // type. In the presence of null pointer values, we have an integer type 2408 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2409 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2410 IndVar->getType() : ExitCount->getType(); 2411 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2412 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2413 } 2414 } 2415 2416 /// This method rewrites the exit condition of the loop to be a canonical != 2417 /// comparison against the incremented loop induction variable. This pass is 2418 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2419 /// determine a loop-invariant trip count of the loop, which is actually a much 2420 /// broader range than just linear tests. 2421 bool IndVarSimplify:: 2422 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2423 const SCEV *ExitCount, 2424 PHINode *IndVar, SCEVExpander &Rewriter) { 2425 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2426 assert(isLoopCounter(IndVar, L, SE)); 2427 Instruction * const IncVar = 2428 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2429 2430 // Initialize CmpIndVar to the preincremented IV. 2431 Value *CmpIndVar = IndVar; 2432 bool UsePostInc = false; 2433 2434 // If the exiting block is the same as the backedge block, we prefer to 2435 // compare against the post-incremented value, otherwise we must compare 2436 // against the preincremented value. 2437 if (ExitingBB == L->getLoopLatch()) { 2438 // For pointer IVs, we chose to not strip inbounds which requires us not 2439 // to add a potentially UB introducing use. We need to either a) show 2440 // the loop test we're modifying is already in post-inc form, or b) show 2441 // that adding a use must not introduce UB. 2442 bool SafeToPostInc = 2443 IndVar->getType()->isIntegerTy() || 2444 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2445 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2446 if (SafeToPostInc) { 2447 UsePostInc = true; 2448 CmpIndVar = IncVar; 2449 } 2450 } 2451 2452 // It may be necessary to drop nowrap flags on the incrementing instruction 2453 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2454 // case the increment might have previously been poison on the last iteration 2455 // only) or if LFTR switches to a different IV that was previously dynamically 2456 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2457 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2458 // check), because the pre-inc addrec flags may be adopted from the original 2459 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2460 // TODO: This handling is inaccurate for one case: If we switch to a 2461 // dynamically dead IV that wraps on the first loop iteration only, which is 2462 // not covered by the post-inc addrec. (If the new IV was not dynamically 2463 // dead, it could not be poison on the first iteration in the first place.) 2464 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2465 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2466 if (BO->hasNoUnsignedWrap()) 2467 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2468 if (BO->hasNoSignedWrap()) 2469 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2470 } 2471 2472 Value *ExitCnt = genLoopLimit( 2473 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2474 assert(ExitCnt->getType()->isPointerTy() == 2475 IndVar->getType()->isPointerTy() && 2476 "genLoopLimit missed a cast"); 2477 2478 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2479 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2480 ICmpInst::Predicate P; 2481 if (L->contains(BI->getSuccessor(0))) 2482 P = ICmpInst::ICMP_NE; 2483 else 2484 P = ICmpInst::ICMP_EQ; 2485 2486 IRBuilder<> Builder(BI); 2487 2488 // The new loop exit condition should reuse the debug location of the 2489 // original loop exit condition. 2490 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2491 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2492 2493 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2494 // avoid the expensive expansion of the limit expression in the wider type, 2495 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2496 // since we know (from the exit count bitwidth), that we can't self-wrap in 2497 // the narrower type. 2498 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2499 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2500 if (CmpIndVarSize > ExitCntSize) { 2501 assert(!CmpIndVar->getType()->isPointerTy() && 2502 !ExitCnt->getType()->isPointerTy()); 2503 2504 // Before resorting to actually inserting the truncate, use the same 2505 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2506 // the other side of the comparison instead. We still evaluate the limit 2507 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2508 // a truncate within in. 2509 bool Extended = false; 2510 const SCEV *IV = SE->getSCEV(CmpIndVar); 2511 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2512 ExitCnt->getType()); 2513 const SCEV *ZExtTrunc = 2514 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2515 2516 if (ZExtTrunc == IV) { 2517 Extended = true; 2518 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2519 "wide.trip.count"); 2520 } else { 2521 const SCEV *SExtTrunc = 2522 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2523 if (SExtTrunc == IV) { 2524 Extended = true; 2525 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2526 "wide.trip.count"); 2527 } 2528 } 2529 2530 if (Extended) { 2531 bool Discard; 2532 L->makeLoopInvariant(ExitCnt, Discard); 2533 } else 2534 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2535 "lftr.wideiv"); 2536 } 2537 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2538 << " LHS:" << *CmpIndVar << '\n' 2539 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2540 << "\n" 2541 << " RHS:\t" << *ExitCnt << "\n" 2542 << "ExitCount:\t" << *ExitCount << "\n" 2543 << " was: " << *BI->getCondition() << "\n"); 2544 2545 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2546 Value *OrigCond = BI->getCondition(); 2547 // It's tempting to use replaceAllUsesWith here to fully replace the old 2548 // comparison, but that's not immediately safe, since users of the old 2549 // comparison may not be dominated by the new comparison. Instead, just 2550 // update the branch to use the new comparison; in the common case this 2551 // will make old comparison dead. 2552 BI->setCondition(Cond); 2553 DeadInsts.push_back(OrigCond); 2554 2555 ++NumLFTR; 2556 return true; 2557 } 2558 2559 //===----------------------------------------------------------------------===// 2560 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2561 //===----------------------------------------------------------------------===// 2562 2563 /// If there's a single exit block, sink any loop-invariant values that 2564 /// were defined in the preheader but not used inside the loop into the 2565 /// exit block to reduce register pressure in the loop. 2566 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2567 BasicBlock *ExitBlock = L->getExitBlock(); 2568 if (!ExitBlock) return false; 2569 2570 BasicBlock *Preheader = L->getLoopPreheader(); 2571 if (!Preheader) return false; 2572 2573 bool MadeAnyChanges = false; 2574 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2575 BasicBlock::iterator I(Preheader->getTerminator()); 2576 while (I != Preheader->begin()) { 2577 --I; 2578 // New instructions were inserted at the end of the preheader. 2579 if (isa<PHINode>(I)) 2580 break; 2581 2582 // Don't move instructions which might have side effects, since the side 2583 // effects need to complete before instructions inside the loop. Also don't 2584 // move instructions which might read memory, since the loop may modify 2585 // memory. Note that it's okay if the instruction might have undefined 2586 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2587 // block. 2588 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2589 continue; 2590 2591 // Skip debug info intrinsics. 2592 if (isa<DbgInfoIntrinsic>(I)) 2593 continue; 2594 2595 // Skip eh pad instructions. 2596 if (I->isEHPad()) 2597 continue; 2598 2599 // Don't sink alloca: we never want to sink static alloca's out of the 2600 // entry block, and correctly sinking dynamic alloca's requires 2601 // checks for stacksave/stackrestore intrinsics. 2602 // FIXME: Refactor this check somehow? 2603 if (isa<AllocaInst>(I)) 2604 continue; 2605 2606 // Determine if there is a use in or before the loop (direct or 2607 // otherwise). 2608 bool UsedInLoop = false; 2609 for (Use &U : I->uses()) { 2610 Instruction *User = cast<Instruction>(U.getUser()); 2611 BasicBlock *UseBB = User->getParent(); 2612 if (PHINode *P = dyn_cast<PHINode>(User)) { 2613 unsigned i = 2614 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2615 UseBB = P->getIncomingBlock(i); 2616 } 2617 if (UseBB == Preheader || L->contains(UseBB)) { 2618 UsedInLoop = true; 2619 break; 2620 } 2621 } 2622 2623 // If there is, the def must remain in the preheader. 2624 if (UsedInLoop) 2625 continue; 2626 2627 // Otherwise, sink it to the exit block. 2628 Instruction *ToMove = &*I; 2629 bool Done = false; 2630 2631 if (I != Preheader->begin()) { 2632 // Skip debug info intrinsics. 2633 do { 2634 --I; 2635 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2636 2637 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2638 Done = true; 2639 } else { 2640 Done = true; 2641 } 2642 2643 MadeAnyChanges = true; 2644 ToMove->moveBefore(*ExitBlock, InsertPt); 2645 if (Done) break; 2646 InsertPt = ToMove->getIterator(); 2647 } 2648 2649 return MadeAnyChanges; 2650 } 2651 2652 /// Return a symbolic upper bound for the backedge taken count of the loop. 2653 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2654 /// an arbitrary expression as opposed to only constants. 2655 /// TODO: Move into the ScalarEvolution class. 2656 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2657 DominatorTree &DT, Loop *L) { 2658 SmallVector<BasicBlock*, 16> ExitingBlocks; 2659 L->getExitingBlocks(ExitingBlocks); 2660 2661 // Form an expression for the maximum exit count possible for this loop. We 2662 // merge the max and exact information to approximate a version of 2663 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2664 SmallVector<const SCEV*, 4> ExitCounts; 2665 for (BasicBlock *ExitingBB : ExitingBlocks) { 2666 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2667 if (isa<SCEVCouldNotCompute>(ExitCount)) 2668 ExitCount = SE.getExitCount(L, ExitingBB, 2669 ScalarEvolution::ConstantMaximum); 2670 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2671 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2672 "We should only have known counts for exiting blocks that " 2673 "dominate latch!"); 2674 ExitCounts.push_back(ExitCount); 2675 } 2676 } 2677 if (ExitCounts.empty()) 2678 return SE.getCouldNotCompute(); 2679 return SE.getUMinFromMismatchedTypes(ExitCounts); 2680 } 2681 2682 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2683 SmallVector<BasicBlock*, 16> ExitingBlocks; 2684 L->getExitingBlocks(ExitingBlocks); 2685 2686 // Remove all exits which aren't both rewriteable and analyzeable. 2687 auto NewEnd = llvm::remove_if(ExitingBlocks, 2688 [&](BasicBlock *ExitingBB) { 2689 // If our exitting block exits multiple loops, we can only rewrite the 2690 // innermost one. Otherwise, we're changing how many times the innermost 2691 // loop runs before it exits. 2692 if (LI->getLoopFor(ExitingBB) != L) 2693 return true; 2694 2695 // Can't rewrite non-branch yet. 2696 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2697 if (!BI) 2698 return true; 2699 2700 // If already constant, nothing to do. 2701 if (isa<Constant>(BI->getCondition())) 2702 return true; 2703 2704 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2705 if (isa<SCEVCouldNotCompute>(ExitCount)) 2706 return true; 2707 return false; 2708 }); 2709 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2710 2711 if (ExitingBlocks.empty()) 2712 return false; 2713 2714 // Get a symbolic upper bound on the loop backedge taken count. 2715 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2716 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2717 return false; 2718 2719 // Visit our exit blocks in order of dominance. We know from the fact that 2720 // all exits (left) are analyzeable that the must be a total dominance order 2721 // between them as each must dominate the latch. The visit order only 2722 // matters for the provably equal case. 2723 llvm::sort(ExitingBlocks, 2724 [&](BasicBlock *A, BasicBlock *B) { 2725 // std::sort sorts in ascending order, so we want the inverse of 2726 // the normal dominance relation. 2727 if (DT->properlyDominates(A, B)) return true; 2728 if (DT->properlyDominates(B, A)) return false; 2729 llvm_unreachable("expected total dominance order!"); 2730 }); 2731 #ifdef ASSERT 2732 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2733 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2734 } 2735 #endif 2736 2737 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2738 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2739 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2740 auto *OldCond = BI->getCondition(); 2741 auto *NewCond = ConstantInt::get(OldCond->getType(), 2742 IsTaken ? ExitIfTrue : !ExitIfTrue); 2743 BI->setCondition(NewCond); 2744 if (OldCond->use_empty()) 2745 DeadInsts.push_back(OldCond); 2746 }; 2747 2748 bool Changed = false; 2749 SmallSet<const SCEV*, 8> DominatingExitCounts; 2750 for (BasicBlock *ExitingBB : ExitingBlocks) { 2751 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2752 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2753 2754 // If we know we'd exit on the first iteration, rewrite the exit to 2755 // reflect this. This does not imply the loop must exit through this 2756 // exit; there may be an earlier one taken on the first iteration. 2757 // TODO: Given we know the backedge can't be taken, we should go ahead 2758 // and break it. Or at least, kill all the header phis and simplify. 2759 if (ExitCount->isZero()) { 2760 FoldExit(ExitingBB, true); 2761 Changed = true; 2762 continue; 2763 } 2764 2765 // If we end up with a pointer exit count, bail. Note that we can end up 2766 // with a pointer exit count for one exiting block, and not for another in 2767 // the same loop. 2768 if (!ExitCount->getType()->isIntegerTy() || 2769 !MaxExitCount->getType()->isIntegerTy()) 2770 continue; 2771 2772 Type *WiderType = 2773 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2774 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2775 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2776 assert(MaxExitCount->getType() == ExitCount->getType()); 2777 2778 // Can we prove that some other exit must be taken strictly before this 2779 // one? 2780 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2781 MaxExitCount, ExitCount)) { 2782 FoldExit(ExitingBB, false); 2783 Changed = true; 2784 continue; 2785 } 2786 2787 // As we run, keep track of which exit counts we've encountered. If we 2788 // find a duplicate, we've found an exit which would have exited on the 2789 // exiting iteration, but (from the visit order) strictly follows another 2790 // which does the same and is thus dead. 2791 if (!DominatingExitCounts.insert(ExitCount).second) { 2792 FoldExit(ExitingBB, false); 2793 Changed = true; 2794 continue; 2795 } 2796 2797 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2798 // here. If we kept track of the min of dominanting exits so far, we could 2799 // discharge exits with EC >= MDEC. This is less powerful than the existing 2800 // transform (since later exits aren't considered), but potentially more 2801 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2802 // or a >u b. Such a case is not currently known. 2803 } 2804 return Changed; 2805 } 2806 2807 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2808 SmallVector<BasicBlock*, 16> ExitingBlocks; 2809 L->getExitingBlocks(ExitingBlocks); 2810 2811 bool Changed = false; 2812 2813 // Finally, see if we can rewrite our exit conditions into a loop invariant 2814 // form. If we have a read-only loop, and we can tell that we must exit down 2815 // a path which does not need any of the values computed within the loop, we 2816 // can rewrite the loop to exit on the first iteration. Note that this 2817 // doesn't either a) tell us the loop exits on the first iteration (unless 2818 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2819 // This transformation looks a lot like a restricted form of dead loop 2820 // elimination, but restricted to read-only loops and without neccesssarily 2821 // needing to kill the loop entirely. 2822 if (!LoopPredication) 2823 return Changed; 2824 2825 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2826 return Changed; 2827 2828 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2829 // through *explicit* control flow. We have to eliminate the possibility of 2830 // implicit exits (see below) before we know it's truly exact. 2831 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2832 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2833 !SE->isLoopInvariant(ExactBTC, L) || 2834 !isSafeToExpand(ExactBTC, *SE)) 2835 return Changed; 2836 2837 // If we end up with a pointer exit count, bail. It may be unsized. 2838 if (!ExactBTC->getType()->isIntegerTy()) 2839 return Changed; 2840 2841 auto BadExit = [&](BasicBlock *ExitingBB) { 2842 // If our exiting block exits multiple loops, we can only rewrite the 2843 // innermost one. Otherwise, we're changing how many times the innermost 2844 // loop runs before it exits. 2845 if (LI->getLoopFor(ExitingBB) != L) 2846 return true; 2847 2848 // Can't rewrite non-branch yet. 2849 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2850 if (!BI) 2851 return true; 2852 2853 // If already constant, nothing to do. 2854 if (isa<Constant>(BI->getCondition())) 2855 return true; 2856 2857 // If the exit block has phis, we need to be able to compute the values 2858 // within the loop which contains them. This assumes trivially lcssa phis 2859 // have already been removed; TODO: generalize 2860 BasicBlock *ExitBlock = 2861 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2862 if (!ExitBlock->phis().empty()) 2863 return true; 2864 2865 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2866 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2867 if (!SE->isLoopInvariant(ExitCount, L) || 2868 !isSafeToExpand(ExitCount, *SE)) 2869 return true; 2870 2871 // If we end up with a pointer exit count, bail. It may be unsized. 2872 if (!ExitCount->getType()->isIntegerTy()) 2873 return true; 2874 2875 return false; 2876 }; 2877 2878 // If we have any exits which can't be predicated themselves, than we can't 2879 // predicate any exit which isn't guaranteed to execute before it. Consider 2880 // two exits (a) and (b) which would both exit on the same iteration. If we 2881 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2882 // we could convert a loop from exiting through (a) to one exiting through 2883 // (b). Note that this problem exists only for exits with the same exit 2884 // count, and we could be more aggressive when exit counts are known inequal. 2885 llvm::sort(ExitingBlocks, 2886 [&](BasicBlock *A, BasicBlock *B) { 2887 // std::sort sorts in ascending order, so we want the inverse of 2888 // the normal dominance relation, plus a tie breaker for blocks 2889 // unordered by dominance. 2890 if (DT->properlyDominates(A, B)) return true; 2891 if (DT->properlyDominates(B, A)) return false; 2892 return A->getName() < B->getName(); 2893 }); 2894 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2895 // exits before the backedge). If they aren't, reasoning about reachability 2896 // is complicated and we choose not to for now. 2897 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2898 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2899 return Changed; 2900 2901 // Given our sorted total order, we know that exit[j] must be evaluated 2902 // after all exit[i] such j > i. 2903 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2904 if (BadExit(ExitingBlocks[i])) { 2905 ExitingBlocks.resize(i); 2906 break; 2907 } 2908 2909 if (ExitingBlocks.empty()) 2910 return Changed; 2911 2912 // We rely on not being able to reach an exiting block on a later iteration 2913 // then it's statically compute exit count. The implementaton of 2914 // getExitCount currently has this invariant, but assert it here so that 2915 // breakage is obvious if this ever changes.. 2916 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2917 return DT->dominates(ExitingBB, L->getLoopLatch()); 2918 })); 2919 2920 // At this point, ExitingBlocks consists of only those blocks which are 2921 // predicatable. Given that, we know we have at least one exit we can 2922 // predicate if the loop is doesn't have side effects and doesn't have any 2923 // implicit exits (because then our exact BTC isn't actually exact). 2924 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2925 // suggestions on how to improve this? I can obviously bail out for outer 2926 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2927 // is that enough for *all* side effects? 2928 for (BasicBlock *BB : L->blocks()) 2929 for (auto &I : *BB) 2930 // TODO:isGuaranteedToTransfer 2931 if (I.mayHaveSideEffects() || I.mayThrow()) 2932 return Changed; 2933 2934 // Finally, do the actual predication for all predicatable blocks. A couple 2935 // of notes here: 2936 // 1) We don't bother to constant fold dominated exits with identical exit 2937 // counts; that's simply a form of CSE/equality propagation and we leave 2938 // it for dedicated passes. 2939 // 2) We insert the comparison at the branch. Hoisting introduces additional 2940 // legality constraints and we leave that to dedicated logic. We want to 2941 // predicate even if we can't insert a loop invariant expression as 2942 // peeling or unrolling will likely reduce the cost of the otherwise loop 2943 // varying check. 2944 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2945 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2946 Value *ExactBTCV = nullptr; //lazy generated if needed 2947 for (BasicBlock *ExitingBB : ExitingBlocks) { 2948 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2949 2950 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2951 Value *NewCond; 2952 if (ExitCount == ExactBTC) { 2953 NewCond = L->contains(BI->getSuccessor(0)) ? 2954 B.getFalse() : B.getTrue(); 2955 } else { 2956 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2957 if (!ExactBTCV) 2958 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2959 Value *RHS = ExactBTCV; 2960 if (ECV->getType() != RHS->getType()) { 2961 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2962 ECV = B.CreateZExt(ECV, WiderTy); 2963 RHS = B.CreateZExt(RHS, WiderTy); 2964 } 2965 auto Pred = L->contains(BI->getSuccessor(0)) ? 2966 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2967 NewCond = B.CreateICmp(Pred, ECV, RHS); 2968 } 2969 Value *OldCond = BI->getCondition(); 2970 BI->setCondition(NewCond); 2971 if (OldCond->use_empty()) 2972 DeadInsts.push_back(OldCond); 2973 Changed = true; 2974 } 2975 2976 return Changed; 2977 } 2978 2979 //===----------------------------------------------------------------------===// 2980 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2981 //===----------------------------------------------------------------------===// 2982 2983 bool IndVarSimplify::run(Loop *L) { 2984 // We need (and expect!) the incoming loop to be in LCSSA. 2985 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2986 "LCSSA required to run indvars!"); 2987 bool Changed = false; 2988 2989 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2990 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2991 // canonicalization can be a pessimization without LSR to "clean up" 2992 // afterwards. 2993 // - We depend on having a preheader; in particular, 2994 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2995 // and we're in trouble if we can't find the induction variable even when 2996 // we've manually inserted one. 2997 // - LFTR relies on having a single backedge. 2998 if (!L->isLoopSimplifyForm()) 2999 return false; 3000 3001 #ifndef NDEBUG 3002 // Used below for a consistency check only 3003 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 3004 #endif 3005 3006 // If there are any floating-point recurrences, attempt to 3007 // transform them to use integer recurrences. 3008 Changed |= rewriteNonIntegerIVs(L); 3009 3010 // Create a rewriter object which we'll use to transform the code with. 3011 SCEVExpander Rewriter(*SE, DL, "indvars"); 3012 #ifndef NDEBUG 3013 Rewriter.setDebugType(DEBUG_TYPE); 3014 #endif 3015 3016 // Eliminate redundant IV users. 3017 // 3018 // Simplification works best when run before other consumers of SCEV. We 3019 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 3020 // other expressions involving loop IVs have been evaluated. This helps SCEV 3021 // set no-wrap flags before normalizing sign/zero extension. 3022 Rewriter.disableCanonicalMode(); 3023 Changed |= simplifyAndExtend(L, Rewriter, LI); 3024 3025 // Check to see if we can compute the final value of any expressions 3026 // that are recurrent in the loop, and substitute the exit values from the 3027 // loop into any instructions outside of the loop that use the final values 3028 // of the current expressions. 3029 if (ReplaceExitValue != NeverRepl) 3030 Changed |= rewriteLoopExitValues(L, Rewriter); 3031 3032 // Eliminate redundant IV cycles. 3033 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 3034 3035 // Try to eliminate loop exits based on analyzeable exit counts 3036 if (optimizeLoopExits(L, Rewriter)) { 3037 Changed = true; 3038 // Given we've changed exit counts, notify SCEV 3039 SE->forgetLoop(L); 3040 } 3041 3042 // Try to form loop invariant tests for loop exits by changing how many 3043 // iterations of the loop run when that is unobservable. 3044 if (predicateLoopExits(L, Rewriter)) { 3045 Changed = true; 3046 // Given we've changed exit counts, notify SCEV 3047 SE->forgetLoop(L); 3048 } 3049 3050 // If we have a trip count expression, rewrite the loop's exit condition 3051 // using it. 3052 if (!DisableLFTR) { 3053 SmallVector<BasicBlock*, 16> ExitingBlocks; 3054 L->getExitingBlocks(ExitingBlocks); 3055 for (BasicBlock *ExitingBB : ExitingBlocks) { 3056 // Can't rewrite non-branch yet. 3057 if (!isa<BranchInst>(ExitingBB->getTerminator())) 3058 continue; 3059 3060 // If our exitting block exits multiple loops, we can only rewrite the 3061 // innermost one. Otherwise, we're changing how many times the innermost 3062 // loop runs before it exits. 3063 if (LI->getLoopFor(ExitingBB) != L) 3064 continue; 3065 3066 if (!needsLFTR(L, ExitingBB)) 3067 continue; 3068 3069 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 3070 if (isa<SCEVCouldNotCompute>(ExitCount)) 3071 continue; 3072 3073 // This was handled above, but as we form SCEVs, we can sometimes refine 3074 // existing ones; this allows exit counts to be folded to zero which 3075 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 3076 // until stable to handle cases like this better. 3077 if (ExitCount->isZero()) 3078 continue; 3079 3080 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 3081 if (!IndVar) 3082 continue; 3083 3084 // Avoid high cost expansions. Note: This heuristic is questionable in 3085 // that our definition of "high cost" is not exactly principled. 3086 if (Rewriter.isHighCostExpansion(ExitCount, L)) 3087 continue; 3088 3089 // Check preconditions for proper SCEVExpander operation. SCEV does not 3090 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 3091 // any pass that uses the SCEVExpander must do it. This does not work 3092 // well for loop passes because SCEVExpander makes assumptions about 3093 // all loops, while LoopPassManager only forces the current loop to be 3094 // simplified. 3095 // 3096 // FIXME: SCEV expansion has no way to bail out, so the caller must 3097 // explicitly check any assumptions made by SCEV. Brittle. 3098 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 3099 if (!AR || AR->getLoop()->getLoopPreheader()) 3100 Changed |= linearFunctionTestReplace(L, ExitingBB, 3101 ExitCount, IndVar, 3102 Rewriter); 3103 } 3104 } 3105 // Clear the rewriter cache, because values that are in the rewriter's cache 3106 // can be deleted in the loop below, causing the AssertingVH in the cache to 3107 // trigger. 3108 Rewriter.clear(); 3109 3110 // Now that we're done iterating through lists, clean up any instructions 3111 // which are now dead. 3112 while (!DeadInsts.empty()) 3113 if (Instruction *Inst = 3114 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 3115 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 3116 3117 // The Rewriter may not be used from this point on. 3118 3119 // Loop-invariant instructions in the preheader that aren't used in the 3120 // loop may be sunk below the loop to reduce register pressure. 3121 Changed |= sinkUnusedInvariants(L); 3122 3123 // rewriteFirstIterationLoopExitValues does not rely on the computation of 3124 // trip count and therefore can further simplify exit values in addition to 3125 // rewriteLoopExitValues. 3126 Changed |= rewriteFirstIterationLoopExitValues(L); 3127 3128 // Clean up dead instructions. 3129 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 3130 3131 // Check a post-condition. 3132 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3133 "Indvars did not preserve LCSSA!"); 3134 3135 // Verify that LFTR, and any other change have not interfered with SCEV's 3136 // ability to compute trip count. We may have *changed* the exit count, but 3137 // only by reducing it. 3138 #ifndef NDEBUG 3139 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 3140 SE->forgetLoop(L); 3141 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 3142 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 3143 SE->getTypeSizeInBits(NewBECount->getType())) 3144 NewBECount = SE->getTruncateOrNoop(NewBECount, 3145 BackedgeTakenCount->getType()); 3146 else 3147 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 3148 NewBECount->getType()); 3149 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 3150 NewBECount) && "indvars must preserve SCEV"); 3151 } 3152 #endif 3153 3154 return Changed; 3155 } 3156 3157 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 3158 LoopStandardAnalysisResults &AR, 3159 LPMUpdater &) { 3160 Function *F = L.getHeader()->getParent(); 3161 const DataLayout &DL = F->getParent()->getDataLayout(); 3162 3163 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 3164 if (!IVS.run(&L)) 3165 return PreservedAnalyses::all(); 3166 3167 auto PA = getLoopPassPreservedAnalyses(); 3168 PA.preserveSet<CFGAnalyses>(); 3169 return PA; 3170 } 3171 3172 namespace { 3173 3174 struct IndVarSimplifyLegacyPass : public LoopPass { 3175 static char ID; // Pass identification, replacement for typeid 3176 3177 IndVarSimplifyLegacyPass() : LoopPass(ID) { 3178 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 3179 } 3180 3181 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3182 if (skipLoop(L)) 3183 return false; 3184 3185 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3186 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3187 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3188 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3189 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 3190 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 3191 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 3192 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 3193 3194 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 3195 return IVS.run(L); 3196 } 3197 3198 void getAnalysisUsage(AnalysisUsage &AU) const override { 3199 AU.setPreservesCFG(); 3200 getLoopAnalysisUsage(AU); 3201 } 3202 }; 3203 3204 } // end anonymous namespace 3205 3206 char IndVarSimplifyLegacyPass::ID = 0; 3207 3208 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 3209 "Induction Variable Simplification", false, false) 3210 INITIALIZE_PASS_DEPENDENCY(LoopPass) 3211 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 3212 "Induction Variable Simplification", false, false) 3213 3214 Pass *llvm::createIndVarSimplifyPass() { 3215 return new IndVarSimplifyLegacyPass(); 3216 } 3217