1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/ConstantRange.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/Function.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InstrTypes.h" 57 #include "llvm/IR/Instruction.h" 58 #include "llvm/IR/Instructions.h" 59 #include "llvm/IR/IntrinsicInst.h" 60 #include "llvm/IR/Intrinsics.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/IR/PassManager.h" 64 #include "llvm/IR/PatternMatch.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/Use.h" 67 #include "llvm/IR/User.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/IR/ValueHandle.h" 70 #include "llvm/InitializePasses.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Local.h" 83 #include "llvm/Transforms/Utils/LoopUtils.h" 84 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 85 #include <cassert> 86 #include <cstdint> 87 #include <utility> 88 89 using namespace llvm; 90 91 #define DEBUG_TYPE "indvars" 92 93 STATISTIC(NumWidened , "Number of indvars widened"); 94 STATISTIC(NumReplaced , "Number of exit values replaced"); 95 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 96 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 97 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 98 99 // Trip count verification can be enabled by default under NDEBUG if we 100 // implement a strong expression equivalence checker in SCEV. Until then, we 101 // use the verify-indvars flag, which may assert in some cases. 102 static cl::opt<bool> VerifyIndvars( 103 "verify-indvars", cl::Hidden, 104 cl::desc("Verify the ScalarEvolution result after running indvars")); 105 106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 107 108 static cl::opt<ReplaceExitVal> ReplaceExitValue( 109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 112 clEnumValN(OnlyCheapRepl, "cheap", 113 "only replace exit value when the cost is cheap"), 114 clEnumValN(NoHardUse, "noharduse", 115 "only replace exit values when loop def likely dead"), 116 clEnumValN(AlwaysRepl, "always", 117 "always replace exit value whenever possible"))); 118 119 static cl::opt<bool> UsePostIncrementRanges( 120 "indvars-post-increment-ranges", cl::Hidden, 121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 122 cl::init(true)); 123 124 static cl::opt<bool> 125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 126 cl::desc("Disable Linear Function Test Replace optimization")); 127 128 static cl::opt<bool> 129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 130 cl::desc("Predicate conditions in read only loops")); 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 144 SmallVector<WeakTrackingVH, 16> DeadInsts; 145 146 bool isValidRewrite(Value *FromVal, Value *ToVal); 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 /// Try to eliminate loop exits based on analyzeable exit counts 153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 154 /// Try to form loop invariant tests for loop exits by changing how many 155 /// iterations of the loop run when that is unobservable. 156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 157 158 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 159 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 160 bool rewriteFirstIterationLoopExitValues(Loop *L); 161 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 162 163 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 164 const SCEV *ExitCount, 165 PHINode *IndVar, SCEVExpander &Rewriter); 166 167 bool sinkUnusedInvariants(Loop *L); 168 169 public: 170 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 171 const DataLayout &DL, TargetLibraryInfo *TLI, 172 TargetTransformInfo *TTI) 173 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 /// Return true if the SCEV expansion generated by the rewriter can replace the 181 /// original value. SCEV guarantees that it produces the same value, but the way 182 /// it is produced may be illegal IR. Ideally, this function will only be 183 /// called for verification. 184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 185 // If an SCEV expression subsumed multiple pointers, its expansion could 186 // reassociate the GEP changing the base pointer. This is illegal because the 187 // final address produced by a GEP chain must be inbounds relative to its 188 // underlying object. Otherwise basic alias analysis, among other things, 189 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 190 // producing an expression involving multiple pointers. Until then, we must 191 // bail out here. 192 // 193 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 194 // because it understands lcssa phis while SCEV does not. 195 Value *FromPtr = FromVal; 196 Value *ToPtr = ToVal; 197 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 198 FromPtr = GEP->getPointerOperand(); 199 } 200 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 201 ToPtr = GEP->getPointerOperand(); 202 } 203 if (FromPtr != FromVal || ToPtr != ToVal) { 204 // Quickly check the common case 205 if (FromPtr == ToPtr) 206 return true; 207 208 // SCEV may have rewritten an expression that produces the GEP's pointer 209 // operand. That's ok as long as the pointer operand has the same base 210 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 211 // base of a recurrence. This handles the case in which SCEV expansion 212 // converts a pointer type recurrence into a nonrecurrent pointer base 213 // indexed by an integer recurrence. 214 215 // If the GEP base pointer is a vector of pointers, abort. 216 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 217 return false; 218 219 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 220 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 221 if (FromBase == ToBase) 222 return true; 223 224 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 225 << " != " << *ToBase << "\n"); 226 227 return false; 228 } 229 return true; 230 } 231 232 /// Determine the insertion point for this user. By default, insert immediately 233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 235 /// common dominator for the incoming blocks. A nullptr can be returned if no 236 /// viable location is found: it may happen if User is a PHI and Def only comes 237 /// to this PHI from unreachable blocks. 238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 239 DominatorTree *DT, LoopInfo *LI) { 240 PHINode *PHI = dyn_cast<PHINode>(User); 241 if (!PHI) 242 return User; 243 244 Instruction *InsertPt = nullptr; 245 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 246 if (PHI->getIncomingValue(i) != Def) 247 continue; 248 249 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 250 251 if (!DT->isReachableFromEntry(InsertBB)) 252 continue; 253 254 if (!InsertPt) { 255 InsertPt = InsertBB->getTerminator(); 256 continue; 257 } 258 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 259 InsertPt = InsertBB->getTerminator(); 260 } 261 262 // If we have skipped all inputs, it means that Def only comes to Phi from 263 // unreachable blocks. 264 if (!InsertPt) 265 return nullptr; 266 267 auto *DefI = dyn_cast<Instruction>(Def); 268 if (!DefI) 269 return InsertPt; 270 271 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 272 273 auto *L = LI->getLoopFor(DefI->getParent()); 274 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 275 276 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 277 if (LI->getLoopFor(DTN->getBlock()) == L) 278 return DTN->getBlock()->getTerminator(); 279 280 llvm_unreachable("DefI dominates InsertPt!"); 281 } 282 283 //===----------------------------------------------------------------------===// 284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 285 //===----------------------------------------------------------------------===// 286 287 /// Convert APF to an integer, if possible. 288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 289 bool isExact = false; 290 // See if we can convert this to an int64_t 291 uint64_t UIntVal; 292 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 293 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 294 !isExact) 295 return false; 296 IntVal = UIntVal; 297 return true; 298 } 299 300 /// If the loop has floating induction variable then insert corresponding 301 /// integer induction variable if possible. 302 /// For example, 303 /// for(double i = 0; i < 10000; ++i) 304 /// bar(i) 305 /// is converted into 306 /// for(int i = 0; i < 10000; ++i) 307 /// bar((double)i); 308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 309 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 310 unsigned BackEdge = IncomingEdge^1; 311 312 // Check incoming value. 313 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 314 315 int64_t InitValue; 316 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 317 return false; 318 319 // Check IV increment. Reject this PN if increment operation is not 320 // an add or increment value can not be represented by an integer. 321 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 322 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 323 324 // If this is not an add of the PHI with a constantfp, or if the constant fp 325 // is not an integer, bail out. 326 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 327 int64_t IncValue; 328 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 329 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 330 return false; 331 332 // Check Incr uses. One user is PN and the other user is an exit condition 333 // used by the conditional terminator. 334 Value::user_iterator IncrUse = Incr->user_begin(); 335 Instruction *U1 = cast<Instruction>(*IncrUse++); 336 if (IncrUse == Incr->user_end()) return false; 337 Instruction *U2 = cast<Instruction>(*IncrUse++); 338 if (IncrUse != Incr->user_end()) return false; 339 340 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 341 // only used by a branch, we can't transform it. 342 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 343 if (!Compare) 344 Compare = dyn_cast<FCmpInst>(U2); 345 if (!Compare || !Compare->hasOneUse() || 346 !isa<BranchInst>(Compare->user_back())) 347 return false; 348 349 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 350 351 // We need to verify that the branch actually controls the iteration count 352 // of the loop. If not, the new IV can overflow and no one will notice. 353 // The branch block must be in the loop and one of the successors must be out 354 // of the loop. 355 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 356 if (!L->contains(TheBr->getParent()) || 357 (L->contains(TheBr->getSuccessor(0)) && 358 L->contains(TheBr->getSuccessor(1)))) 359 return false; 360 361 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 362 // transform it. 363 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 364 int64_t ExitValue; 365 if (ExitValueVal == nullptr || 366 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 367 return false; 368 369 // Find new predicate for integer comparison. 370 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 371 switch (Compare->getPredicate()) { 372 default: return false; // Unknown comparison. 373 case CmpInst::FCMP_OEQ: 374 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 375 case CmpInst::FCMP_ONE: 376 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 377 case CmpInst::FCMP_OGT: 378 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 379 case CmpInst::FCMP_OGE: 380 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 381 case CmpInst::FCMP_OLT: 382 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 383 case CmpInst::FCMP_OLE: 384 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 385 } 386 387 // We convert the floating point induction variable to a signed i32 value if 388 // we can. This is only safe if the comparison will not overflow in a way 389 // that won't be trapped by the integer equivalent operations. Check for this 390 // now. 391 // TODO: We could use i64 if it is native and the range requires it. 392 393 // The start/stride/exit values must all fit in signed i32. 394 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 395 return false; 396 397 // If not actually striding (add x, 0.0), avoid touching the code. 398 if (IncValue == 0) 399 return false; 400 401 // Positive and negative strides have different safety conditions. 402 if (IncValue > 0) { 403 // If we have a positive stride, we require the init to be less than the 404 // exit value. 405 if (InitValue >= ExitValue) 406 return false; 407 408 uint32_t Range = uint32_t(ExitValue-InitValue); 409 // Check for infinite loop, either: 410 // while (i <= Exit) or until (i > Exit) 411 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 412 if (++Range == 0) return false; // Range overflows. 413 } 414 415 unsigned Leftover = Range % uint32_t(IncValue); 416 417 // If this is an equality comparison, we require that the strided value 418 // exactly land on the exit value, otherwise the IV condition will wrap 419 // around and do things the fp IV wouldn't. 420 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 421 Leftover != 0) 422 return false; 423 424 // If the stride would wrap around the i32 before exiting, we can't 425 // transform the IV. 426 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 427 return false; 428 } else { 429 // If we have a negative stride, we require the init to be greater than the 430 // exit value. 431 if (InitValue <= ExitValue) 432 return false; 433 434 uint32_t Range = uint32_t(InitValue-ExitValue); 435 // Check for infinite loop, either: 436 // while (i >= Exit) or until (i < Exit) 437 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 438 if (++Range == 0) return false; // Range overflows. 439 } 440 441 unsigned Leftover = Range % uint32_t(-IncValue); 442 443 // If this is an equality comparison, we require that the strided value 444 // exactly land on the exit value, otherwise the IV condition will wrap 445 // around and do things the fp IV wouldn't. 446 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 447 Leftover != 0) 448 return false; 449 450 // If the stride would wrap around the i32 before exiting, we can't 451 // transform the IV. 452 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 453 return false; 454 } 455 456 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 457 458 // Insert new integer induction variable. 459 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 460 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 461 PN->getIncomingBlock(IncomingEdge)); 462 463 Value *NewAdd = 464 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 465 Incr->getName()+".int", Incr); 466 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 467 468 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 469 ConstantInt::get(Int32Ty, ExitValue), 470 Compare->getName()); 471 472 // In the following deletions, PN may become dead and may be deleted. 473 // Use a WeakTrackingVH to observe whether this happens. 474 WeakTrackingVH WeakPH = PN; 475 476 // Delete the old floating point exit comparison. The branch starts using the 477 // new comparison. 478 NewCompare->takeName(Compare); 479 Compare->replaceAllUsesWith(NewCompare); 480 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 481 482 // Delete the old floating point increment. 483 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 484 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 485 486 // If the FP induction variable still has uses, this is because something else 487 // in the loop uses its value. In order to canonicalize the induction 488 // variable, we chose to eliminate the IV and rewrite it in terms of an 489 // int->fp cast. 490 // 491 // We give preference to sitofp over uitofp because it is faster on most 492 // platforms. 493 if (WeakPH) { 494 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 495 &*PN->getParent()->getFirstInsertionPt()); 496 PN->replaceAllUsesWith(Conv); 497 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 498 } 499 return true; 500 } 501 502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 503 // First step. Check to see if there are any floating-point recurrences. 504 // If there are, change them into integer recurrences, permitting analysis by 505 // the SCEV routines. 506 BasicBlock *Header = L->getHeader(); 507 508 SmallVector<WeakTrackingVH, 8> PHIs; 509 for (PHINode &PN : Header->phis()) 510 PHIs.push_back(&PN); 511 512 bool Changed = false; 513 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 514 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 515 Changed |= handleFloatingPointIV(L, PN); 516 517 // If the loop previously had floating-point IV, ScalarEvolution 518 // may not have been able to compute a trip count. Now that we've done some 519 // re-writing, the trip count may be computable. 520 if (Changed) 521 SE->forgetLoop(L); 522 return Changed; 523 } 524 525 namespace { 526 527 // Collect information about PHI nodes which can be transformed in 528 // rewriteLoopExitValues. 529 struct RewritePhi { 530 PHINode *PN; 531 532 // Ith incoming value. 533 unsigned Ith; 534 535 // Exit value after expansion. 536 Value *Val; 537 538 // High Cost when expansion. 539 bool HighCost; 540 541 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 542 : PN(P), Ith(I), Val(V), HighCost(H) {} 543 }; 544 545 } // end anonymous namespace 546 547 //===----------------------------------------------------------------------===// 548 // rewriteLoopExitValues - Optimize IV users outside the loop. 549 // As a side effect, reduces the amount of IV processing within the loop. 550 //===----------------------------------------------------------------------===// 551 552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 553 SmallPtrSet<const Instruction *, 8> Visited; 554 SmallVector<const Instruction *, 8> WorkList; 555 Visited.insert(I); 556 WorkList.push_back(I); 557 while (!WorkList.empty()) { 558 const Instruction *Curr = WorkList.pop_back_val(); 559 // This use is outside the loop, nothing to do. 560 if (!L->contains(Curr)) 561 continue; 562 // Do we assume it is a "hard" use which will not be eliminated easily? 563 if (Curr->mayHaveSideEffects()) 564 return true; 565 // Otherwise, add all its users to worklist. 566 for (auto U : Curr->users()) { 567 auto *UI = cast<Instruction>(U); 568 if (Visited.insert(UI).second) 569 WorkList.push_back(UI); 570 } 571 } 572 return false; 573 } 574 575 /// Check to see if this loop has a computable loop-invariant execution count. 576 /// If so, this means that we can compute the final value of any expressions 577 /// that are recurrent in the loop, and substitute the exit values from the loop 578 /// into any instructions outside of the loop that use the final values of the 579 /// current expressions. 580 /// 581 /// This is mostly redundant with the regular IndVarSimplify activities that 582 /// happen later, except that it's more powerful in some cases, because it's 583 /// able to brute-force evaluate arbitrary instructions as long as they have 584 /// constant operands at the beginning of the loop. 585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 586 // Check a pre-condition. 587 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 588 "Indvars did not preserve LCSSA!"); 589 590 SmallVector<BasicBlock*, 8> ExitBlocks; 591 L->getUniqueExitBlocks(ExitBlocks); 592 593 SmallVector<RewritePhi, 8> RewritePhiSet; 594 // Find all values that are computed inside the loop, but used outside of it. 595 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 596 // the exit blocks of the loop to find them. 597 for (BasicBlock *ExitBB : ExitBlocks) { 598 // If there are no PHI nodes in this exit block, then no values defined 599 // inside the loop are used on this path, skip it. 600 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 601 if (!PN) continue; 602 603 unsigned NumPreds = PN->getNumIncomingValues(); 604 605 // Iterate over all of the PHI nodes. 606 BasicBlock::iterator BBI = ExitBB->begin(); 607 while ((PN = dyn_cast<PHINode>(BBI++))) { 608 if (PN->use_empty()) 609 continue; // dead use, don't replace it 610 611 if (!SE->isSCEVable(PN->getType())) 612 continue; 613 614 // It's necessary to tell ScalarEvolution about this explicitly so that 615 // it can walk the def-use list and forget all SCEVs, as it may not be 616 // watching the PHI itself. Once the new exit value is in place, there 617 // may not be a def-use connection between the loop and every instruction 618 // which got a SCEVAddRecExpr for that loop. 619 SE->forgetValue(PN); 620 621 // Iterate over all of the values in all the PHI nodes. 622 for (unsigned i = 0; i != NumPreds; ++i) { 623 // If the value being merged in is not integer or is not defined 624 // in the loop, skip it. 625 Value *InVal = PN->getIncomingValue(i); 626 if (!isa<Instruction>(InVal)) 627 continue; 628 629 // If this pred is for a subloop, not L itself, skip it. 630 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 631 continue; // The Block is in a subloop, skip it. 632 633 // Check that InVal is defined in the loop. 634 Instruction *Inst = cast<Instruction>(InVal); 635 if (!L->contains(Inst)) 636 continue; 637 638 // Okay, this instruction has a user outside of the current loop 639 // and varies predictably *inside* the loop. Evaluate the value it 640 // contains when the loop exits, if possible. We prefer to start with 641 // expressions which are true for all exits (so as to maximize 642 // expression reuse by the SCEVExpander), but resort to per-exit 643 // evaluation if that fails. 644 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 645 if (isa<SCEVCouldNotCompute>(ExitValue) || 646 !SE->isLoopInvariant(ExitValue, L) || 647 !isSafeToExpand(ExitValue, *SE)) { 648 // TODO: This should probably be sunk into SCEV in some way; maybe a 649 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 650 // most SCEV expressions and other recurrence types (e.g. shift 651 // recurrences). Is there existing code we can reuse? 652 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 653 if (isa<SCEVCouldNotCompute>(ExitCount)) 654 continue; 655 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 656 if (AddRec->getLoop() == L) 657 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 658 if (isa<SCEVCouldNotCompute>(ExitValue) || 659 !SE->isLoopInvariant(ExitValue, L) || 660 !isSafeToExpand(ExitValue, *SE)) 661 continue; 662 } 663 664 // Computing the value outside of the loop brings no benefit if it is 665 // definitely used inside the loop in a way which can not be optimized 666 // away. Avoid doing so unless we know we have a value which computes 667 // the ExitValue already. TODO: This should be merged into SCEV 668 // expander to leverage its knowledge of existing expressions. 669 if (ReplaceExitValue != AlwaysRepl && 670 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 671 hasHardUserWithinLoop(L, Inst)) 672 continue; 673 674 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 675 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 676 677 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 678 << '\n' 679 << " LoopVal = " << *Inst << "\n"); 680 681 if (!isValidRewrite(Inst, ExitVal)) { 682 DeadInsts.push_back(ExitVal); 683 continue; 684 } 685 686 #ifndef NDEBUG 687 // If we reuse an instruction from a loop which is neither L nor one of 688 // its containing loops, we end up breaking LCSSA form for this loop by 689 // creating a new use of its instruction. 690 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 691 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 692 if (EVL != L) 693 assert(EVL->contains(L) && "LCSSA breach detected!"); 694 #endif 695 696 // Collect all the candidate PHINodes to be rewritten. 697 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 698 } 699 } 700 } 701 702 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 703 704 bool Changed = false; 705 // Transformation. 706 for (const RewritePhi &Phi : RewritePhiSet) { 707 PHINode *PN = Phi.PN; 708 Value *ExitVal = Phi.Val; 709 710 // Only do the rewrite when the ExitValue can be expanded cheaply. 711 // If LoopCanBeDel is true, rewrite exit value aggressively. 712 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 713 DeadInsts.push_back(ExitVal); 714 continue; 715 } 716 717 Changed = true; 718 ++NumReplaced; 719 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 720 PN->setIncomingValue(Phi.Ith, ExitVal); 721 722 // If this instruction is dead now, delete it. Don't do it now to avoid 723 // invalidating iterators. 724 if (isInstructionTriviallyDead(Inst, TLI)) 725 DeadInsts.push_back(Inst); 726 727 // Replace PN with ExitVal if that is legal and does not break LCSSA. 728 if (PN->getNumIncomingValues() == 1 && 729 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 730 PN->replaceAllUsesWith(ExitVal); 731 PN->eraseFromParent(); 732 } 733 } 734 735 // The insertion point instruction may have been deleted; clear it out 736 // so that the rewriter doesn't trip over it later. 737 Rewriter.clearInsertPoint(); 738 return Changed; 739 } 740 741 //===---------------------------------------------------------------------===// 742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 743 // they will exit at the first iteration. 744 //===---------------------------------------------------------------------===// 745 746 /// Check to see if this loop has loop invariant conditions which lead to loop 747 /// exits. If so, we know that if the exit path is taken, it is at the first 748 /// loop iteration. This lets us predict exit values of PHI nodes that live in 749 /// loop header. 750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 751 // Verify the input to the pass is already in LCSSA form. 752 assert(L->isLCSSAForm(*DT)); 753 754 SmallVector<BasicBlock *, 8> ExitBlocks; 755 L->getUniqueExitBlocks(ExitBlocks); 756 757 bool MadeAnyChanges = false; 758 for (auto *ExitBB : ExitBlocks) { 759 // If there are no more PHI nodes in this exit block, then no more 760 // values defined inside the loop are used on this path. 761 for (PHINode &PN : ExitBB->phis()) { 762 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 763 IncomingValIdx != E; ++IncomingValIdx) { 764 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 765 766 // Can we prove that the exit must run on the first iteration if it 767 // runs at all? (i.e. early exits are fine for our purposes, but 768 // traces which lead to this exit being taken on the 2nd iteration 769 // aren't.) Note that this is about whether the exit branch is 770 // executed, not about whether it is taken. 771 if (!L->getLoopLatch() || 772 !DT->dominates(IncomingBB, L->getLoopLatch())) 773 continue; 774 775 // Get condition that leads to the exit path. 776 auto *TermInst = IncomingBB->getTerminator(); 777 778 Value *Cond = nullptr; 779 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 780 // Must be a conditional branch, otherwise the block 781 // should not be in the loop. 782 Cond = BI->getCondition(); 783 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 784 Cond = SI->getCondition(); 785 else 786 continue; 787 788 if (!L->isLoopInvariant(Cond)) 789 continue; 790 791 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 792 793 // Only deal with PHIs in the loop header. 794 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 795 continue; 796 797 // If ExitVal is a PHI on the loop header, then we know its 798 // value along this exit because the exit can only be taken 799 // on the first iteration. 800 auto *LoopPreheader = L->getLoopPreheader(); 801 assert(LoopPreheader && "Invalid loop"); 802 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 803 if (PreheaderIdx != -1) { 804 assert(ExitVal->getParent() == L->getHeader() && 805 "ExitVal must be in loop header"); 806 MadeAnyChanges = true; 807 PN.setIncomingValue(IncomingValIdx, 808 ExitVal->getIncomingValue(PreheaderIdx)); 809 } 810 } 811 } 812 } 813 return MadeAnyChanges; 814 } 815 816 /// Check whether it is possible to delete the loop after rewriting exit 817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 818 /// aggressively. 819 bool IndVarSimplify::canLoopBeDeleted( 820 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 821 BasicBlock *Preheader = L->getLoopPreheader(); 822 // If there is no preheader, the loop will not be deleted. 823 if (!Preheader) 824 return false; 825 826 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 827 // We obviate multiple ExitingBlocks case for simplicity. 828 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 829 // after exit value rewriting, we can enhance the logic here. 830 SmallVector<BasicBlock *, 4> ExitingBlocks; 831 L->getExitingBlocks(ExitingBlocks); 832 SmallVector<BasicBlock *, 8> ExitBlocks; 833 L->getUniqueExitBlocks(ExitBlocks); 834 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 835 return false; 836 837 BasicBlock *ExitBlock = ExitBlocks[0]; 838 BasicBlock::iterator BI = ExitBlock->begin(); 839 while (PHINode *P = dyn_cast<PHINode>(BI)) { 840 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 841 842 // If the Incoming value of P is found in RewritePhiSet, we know it 843 // could be rewritten to use a loop invariant value in transformation 844 // phase later. Skip it in the loop invariant check below. 845 bool found = false; 846 for (const RewritePhi &Phi : RewritePhiSet) { 847 unsigned i = Phi.Ith; 848 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 849 found = true; 850 break; 851 } 852 } 853 854 Instruction *I; 855 if (!found && (I = dyn_cast<Instruction>(Incoming))) 856 if (!L->hasLoopInvariantOperands(I)) 857 return false; 858 859 ++BI; 860 } 861 862 for (auto *BB : L->blocks()) 863 if (llvm::any_of(*BB, [](Instruction &I) { 864 return I.mayHaveSideEffects(); 865 })) 866 return false; 867 868 return true; 869 } 870 871 //===----------------------------------------------------------------------===// 872 // IV Widening - Extend the width of an IV to cover its widest uses. 873 //===----------------------------------------------------------------------===// 874 875 namespace { 876 877 // Collect information about induction variables that are used by sign/zero 878 // extend operations. This information is recorded by CollectExtend and provides 879 // the input to WidenIV. 880 struct WideIVInfo { 881 PHINode *NarrowIV = nullptr; 882 883 // Widest integer type created [sz]ext 884 Type *WidestNativeType = nullptr; 885 886 // Was a sext user seen before a zext? 887 bool IsSigned = false; 888 }; 889 890 } // end anonymous namespace 891 892 /// Update information about the induction variable that is extended by this 893 /// sign or zero extend operation. This is used to determine the final width of 894 /// the IV before actually widening it. 895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 896 const TargetTransformInfo *TTI) { 897 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 898 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 899 return; 900 901 Type *Ty = Cast->getType(); 902 uint64_t Width = SE->getTypeSizeInBits(Ty); 903 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 904 return; 905 906 // Check that `Cast` actually extends the induction variable (we rely on this 907 // later). This takes care of cases where `Cast` is extending a truncation of 908 // the narrow induction variable, and thus can end up being narrower than the 909 // "narrow" induction variable. 910 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 911 if (NarrowIVWidth >= Width) 912 return; 913 914 // Cast is either an sext or zext up to this point. 915 // We should not widen an indvar if arithmetics on the wider indvar are more 916 // expensive than those on the narrower indvar. We check only the cost of ADD 917 // because at least an ADD is required to increment the induction variable. We 918 // could compute more comprehensively the cost of all instructions on the 919 // induction variable when necessary. 920 if (TTI && 921 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 922 TTI->getArithmeticInstrCost(Instruction::Add, 923 Cast->getOperand(0)->getType())) { 924 return; 925 } 926 927 if (!WI.WidestNativeType) { 928 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 929 WI.IsSigned = IsSigned; 930 return; 931 } 932 933 // We extend the IV to satisfy the sign of its first user, arbitrarily. 934 if (WI.IsSigned != IsSigned) 935 return; 936 937 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 938 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 939 } 940 941 namespace { 942 943 /// Record a link in the Narrow IV def-use chain along with the WideIV that 944 /// computes the same value as the Narrow IV def. This avoids caching Use* 945 /// pointers. 946 struct NarrowIVDefUse { 947 Instruction *NarrowDef = nullptr; 948 Instruction *NarrowUse = nullptr; 949 Instruction *WideDef = nullptr; 950 951 // True if the narrow def is never negative. Tracking this information lets 952 // us use a sign extension instead of a zero extension or vice versa, when 953 // profitable and legal. 954 bool NeverNegative = false; 955 956 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 957 bool NeverNegative) 958 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 959 NeverNegative(NeverNegative) {} 960 }; 961 962 /// The goal of this transform is to remove sign and zero extends without 963 /// creating any new induction variables. To do this, it creates a new phi of 964 /// the wider type and redirects all users, either removing extends or inserting 965 /// truncs whenever we stop propagating the type. 966 class WidenIV { 967 // Parameters 968 PHINode *OrigPhi; 969 Type *WideType; 970 971 // Context 972 LoopInfo *LI; 973 Loop *L; 974 ScalarEvolution *SE; 975 DominatorTree *DT; 976 977 // Does the module have any calls to the llvm.experimental.guard intrinsic 978 // at all? If not we can avoid scanning instructions looking for guards. 979 bool HasGuards; 980 981 // Result 982 PHINode *WidePhi = nullptr; 983 Instruction *WideInc = nullptr; 984 const SCEV *WideIncExpr = nullptr; 985 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 986 987 SmallPtrSet<Instruction *,16> Widened; 988 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 989 990 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 991 992 // A map tracking the kind of extension used to widen each narrow IV 993 // and narrow IV user. 994 // Key: pointer to a narrow IV or IV user. 995 // Value: the kind of extension used to widen this Instruction. 996 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 997 998 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 999 1000 // A map with control-dependent ranges for post increment IV uses. The key is 1001 // a pair of IV def and a use of this def denoting the context. The value is 1002 // a ConstantRange representing possible values of the def at the given 1003 // context. 1004 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 1005 1006 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 1007 Instruction *UseI) { 1008 DefUserPair Key(Def, UseI); 1009 auto It = PostIncRangeInfos.find(Key); 1010 return It == PostIncRangeInfos.end() 1011 ? Optional<ConstantRange>(None) 1012 : Optional<ConstantRange>(It->second); 1013 } 1014 1015 void calculatePostIncRanges(PHINode *OrigPhi); 1016 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1017 1018 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1019 DefUserPair Key(Def, UseI); 1020 auto It = PostIncRangeInfos.find(Key); 1021 if (It == PostIncRangeInfos.end()) 1022 PostIncRangeInfos.insert({Key, R}); 1023 else 1024 It->second = R.intersectWith(It->second); 1025 } 1026 1027 public: 1028 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1029 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1030 bool HasGuards) 1031 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1032 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1033 HasGuards(HasGuards), DeadInsts(DI) { 1034 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1035 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1036 } 1037 1038 PHINode *createWideIV(SCEVExpander &Rewriter); 1039 1040 protected: 1041 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1042 Instruction *Use); 1043 1044 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1045 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1046 const SCEVAddRecExpr *WideAR); 1047 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1048 1049 ExtendKind getExtendKind(Instruction *I); 1050 1051 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1052 1053 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1054 1055 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1056 1057 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1058 unsigned OpCode) const; 1059 1060 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1061 1062 bool widenLoopCompare(NarrowIVDefUse DU); 1063 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1064 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1065 1066 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1067 }; 1068 1069 } // end anonymous namespace 1070 1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1072 bool IsSigned, Instruction *Use) { 1073 // Set the debug location and conservative insertion point. 1074 IRBuilder<> Builder(Use); 1075 // Hoist the insertion point into loop preheaders as far as possible. 1076 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1077 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1078 L = L->getParentLoop()) 1079 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1080 1081 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1082 Builder.CreateZExt(NarrowOper, WideType); 1083 } 1084 1085 /// Instantiate a wide operation to replace a narrow operation. This only needs 1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1087 /// 0 for any operation we decide not to clone. 1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1089 const SCEVAddRecExpr *WideAR) { 1090 unsigned Opcode = DU.NarrowUse->getOpcode(); 1091 switch (Opcode) { 1092 default: 1093 return nullptr; 1094 case Instruction::Add: 1095 case Instruction::Mul: 1096 case Instruction::UDiv: 1097 case Instruction::Sub: 1098 return cloneArithmeticIVUser(DU, WideAR); 1099 1100 case Instruction::And: 1101 case Instruction::Or: 1102 case Instruction::Xor: 1103 case Instruction::Shl: 1104 case Instruction::LShr: 1105 case Instruction::AShr: 1106 return cloneBitwiseIVUser(DU); 1107 } 1108 } 1109 1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1111 Instruction *NarrowUse = DU.NarrowUse; 1112 Instruction *NarrowDef = DU.NarrowDef; 1113 Instruction *WideDef = DU.WideDef; 1114 1115 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1116 1117 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1118 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1119 // invariant and will be folded or hoisted. If it actually comes from a 1120 // widened IV, it should be removed during a future call to widenIVUse. 1121 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1122 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1123 ? WideDef 1124 : createExtendInst(NarrowUse->getOperand(0), WideType, 1125 IsSigned, NarrowUse); 1126 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1127 ? WideDef 1128 : createExtendInst(NarrowUse->getOperand(1), WideType, 1129 IsSigned, NarrowUse); 1130 1131 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1132 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1133 NarrowBO->getName()); 1134 IRBuilder<> Builder(NarrowUse); 1135 Builder.Insert(WideBO); 1136 WideBO->copyIRFlags(NarrowBO); 1137 return WideBO; 1138 } 1139 1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1141 const SCEVAddRecExpr *WideAR) { 1142 Instruction *NarrowUse = DU.NarrowUse; 1143 Instruction *NarrowDef = DU.NarrowDef; 1144 Instruction *WideDef = DU.WideDef; 1145 1146 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1147 1148 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1149 1150 // We're trying to find X such that 1151 // 1152 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1153 // 1154 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1155 // and check using SCEV if any of them are correct. 1156 1157 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1158 // correct solution to X. 1159 auto GuessNonIVOperand = [&](bool SignExt) { 1160 const SCEV *WideLHS; 1161 const SCEV *WideRHS; 1162 1163 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1164 if (SignExt) 1165 return SE->getSignExtendExpr(S, Ty); 1166 return SE->getZeroExtendExpr(S, Ty); 1167 }; 1168 1169 if (IVOpIdx == 0) { 1170 WideLHS = SE->getSCEV(WideDef); 1171 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1172 WideRHS = GetExtend(NarrowRHS, WideType); 1173 } else { 1174 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1175 WideLHS = GetExtend(NarrowLHS, WideType); 1176 WideRHS = SE->getSCEV(WideDef); 1177 } 1178 1179 // WideUse is "WideDef `op.wide` X" as described in the comment. 1180 const SCEV *WideUse = nullptr; 1181 1182 switch (NarrowUse->getOpcode()) { 1183 default: 1184 llvm_unreachable("No other possibility!"); 1185 1186 case Instruction::Add: 1187 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1188 break; 1189 1190 case Instruction::Mul: 1191 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1192 break; 1193 1194 case Instruction::UDiv: 1195 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1196 break; 1197 1198 case Instruction::Sub: 1199 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1200 break; 1201 } 1202 1203 return WideUse == WideAR; 1204 }; 1205 1206 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1207 if (!GuessNonIVOperand(SignExtend)) { 1208 SignExtend = !SignExtend; 1209 if (!GuessNonIVOperand(SignExtend)) 1210 return nullptr; 1211 } 1212 1213 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1214 ? WideDef 1215 : createExtendInst(NarrowUse->getOperand(0), WideType, 1216 SignExtend, NarrowUse); 1217 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1218 ? WideDef 1219 : createExtendInst(NarrowUse->getOperand(1), WideType, 1220 SignExtend, NarrowUse); 1221 1222 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1223 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1224 NarrowBO->getName()); 1225 1226 IRBuilder<> Builder(NarrowUse); 1227 Builder.Insert(WideBO); 1228 WideBO->copyIRFlags(NarrowBO); 1229 return WideBO; 1230 } 1231 1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1233 auto It = ExtendKindMap.find(I); 1234 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1235 return It->second; 1236 } 1237 1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1239 unsigned OpCode) const { 1240 if (OpCode == Instruction::Add) 1241 return SE->getAddExpr(LHS, RHS); 1242 if (OpCode == Instruction::Sub) 1243 return SE->getMinusSCEV(LHS, RHS); 1244 if (OpCode == Instruction::Mul) 1245 return SE->getMulExpr(LHS, RHS); 1246 1247 llvm_unreachable("Unsupported opcode."); 1248 } 1249 1250 /// No-wrap operations can transfer sign extension of their result to their 1251 /// operands. Generate the SCEV value for the widened operation without 1252 /// actually modifying the IR yet. If the expression after extending the 1253 /// operands is an AddRec for this loop, return the AddRec and the kind of 1254 /// extension used. 1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1256 // Handle the common case of add<nsw/nuw> 1257 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1258 // Only Add/Sub/Mul instructions supported yet. 1259 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1260 OpCode != Instruction::Mul) 1261 return {nullptr, Unknown}; 1262 1263 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1264 // if extending the other will lead to a recurrence. 1265 const unsigned ExtendOperIdx = 1266 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1267 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1268 1269 const SCEV *ExtendOperExpr = nullptr; 1270 const OverflowingBinaryOperator *OBO = 1271 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1272 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1273 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1274 ExtendOperExpr = SE->getSignExtendExpr( 1275 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1276 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1277 ExtendOperExpr = SE->getZeroExtendExpr( 1278 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1279 else 1280 return {nullptr, Unknown}; 1281 1282 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1283 // flags. This instruction may be guarded by control flow that the no-wrap 1284 // behavior depends on. Non-control-equivalent instructions can be mapped to 1285 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1286 // semantics to those operations. 1287 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1288 const SCEV *rhs = ExtendOperExpr; 1289 1290 // Let's swap operands to the initial order for the case of non-commutative 1291 // operations, like SUB. See PR21014. 1292 if (ExtendOperIdx == 0) 1293 std::swap(lhs, rhs); 1294 const SCEVAddRecExpr *AddRec = 1295 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1296 1297 if (!AddRec || AddRec->getLoop() != L) 1298 return {nullptr, Unknown}; 1299 1300 return {AddRec, ExtKind}; 1301 } 1302 1303 /// Is this instruction potentially interesting for further simplification after 1304 /// widening it's type? In other words, can the extend be safely hoisted out of 1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1306 /// so, return the extended recurrence and the kind of extension used. Otherwise 1307 /// return {nullptr, Unknown}. 1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1309 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1310 return {nullptr, Unknown}; 1311 1312 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1313 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1314 SE->getTypeSizeInBits(WideType)) { 1315 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1316 // index. So don't follow this use. 1317 return {nullptr, Unknown}; 1318 } 1319 1320 const SCEV *WideExpr; 1321 ExtendKind ExtKind; 1322 if (DU.NeverNegative) { 1323 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1324 if (isa<SCEVAddRecExpr>(WideExpr)) 1325 ExtKind = SignExtended; 1326 else { 1327 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1328 ExtKind = ZeroExtended; 1329 } 1330 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1331 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1332 ExtKind = SignExtended; 1333 } else { 1334 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1335 ExtKind = ZeroExtended; 1336 } 1337 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1338 if (!AddRec || AddRec->getLoop() != L) 1339 return {nullptr, Unknown}; 1340 return {AddRec, ExtKind}; 1341 } 1342 1343 /// This IV user cannot be widened. Replace this use of the original narrow IV 1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1346 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1347 if (!InsertPt) 1348 return; 1349 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1350 << *DU.NarrowUse << "\n"); 1351 IRBuilder<> Builder(InsertPt); 1352 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1353 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1354 } 1355 1356 /// If the narrow use is a compare instruction, then widen the compare 1357 // (and possibly the other operand). The extend operation is hoisted into the 1358 // loop preheader as far as possible. 1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1360 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1361 if (!Cmp) 1362 return false; 1363 1364 // We can legally widen the comparison in the following two cases: 1365 // 1366 // - The signedness of the IV extension and comparison match 1367 // 1368 // - The narrow IV is always positive (and thus its sign extension is equal 1369 // to its zero extension). For instance, let's say we're zero extending 1370 // %narrow for the following use 1371 // 1372 // icmp slt i32 %narrow, %val ... (A) 1373 // 1374 // and %narrow is always positive. Then 1375 // 1376 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1377 // == icmp slt i32 zext(%narrow), sext(%val) 1378 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1379 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1380 return false; 1381 1382 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1383 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1384 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1385 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1386 1387 // Widen the compare instruction. 1388 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1389 if (!InsertPt) 1390 return false; 1391 IRBuilder<> Builder(InsertPt); 1392 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1393 1394 // Widen the other operand of the compare, if necessary. 1395 if (CastWidth < IVWidth) { 1396 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1397 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1398 } 1399 return true; 1400 } 1401 1402 /// If the narrow use is an instruction whose two operands are the defining 1403 /// instruction of DU and a load instruction, then we have the following: 1404 /// if the load is hoisted outside the loop, then we do not reach this function 1405 /// as scalar evolution analysis works fine in widenIVUse with variables 1406 /// hoisted outside the loop and efficient code is subsequently generated by 1407 /// not emitting truncate instructions. But when the load is not hoisted 1408 /// (whether due to limitation in alias analysis or due to a true legality), 1409 /// then scalar evolution can not proceed with loop variant values and 1410 /// inefficient code is generated. This function handles the non-hoisted load 1411 /// special case by making the optimization generate the same type of code for 1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1413 /// instruction). This special case is important especially when the induction 1414 /// variables are affecting addressing mode in code generation. 1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1416 Instruction *NarrowUse = DU.NarrowUse; 1417 Instruction *NarrowDef = DU.NarrowDef; 1418 Instruction *WideDef = DU.WideDef; 1419 1420 // Handle the common case of add<nsw/nuw> 1421 const unsigned OpCode = NarrowUse->getOpcode(); 1422 // Only Add/Sub/Mul instructions are supported. 1423 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1424 OpCode != Instruction::Mul) 1425 return false; 1426 1427 // The operand that is not defined by NarrowDef of DU. Let's call it the 1428 // other operand. 1429 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1430 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1431 "bad DU"); 1432 1433 const SCEV *ExtendOperExpr = nullptr; 1434 const OverflowingBinaryOperator *OBO = 1435 cast<OverflowingBinaryOperator>(NarrowUse); 1436 ExtendKind ExtKind = getExtendKind(NarrowDef); 1437 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1438 ExtendOperExpr = SE->getSignExtendExpr( 1439 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1440 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1441 ExtendOperExpr = SE->getZeroExtendExpr( 1442 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1443 else 1444 return false; 1445 1446 // We are interested in the other operand being a load instruction. 1447 // But, we should look into relaxing this restriction later on. 1448 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1449 if (I && I->getOpcode() != Instruction::Load) 1450 return false; 1451 1452 // Verifying that Defining operand is an AddRec 1453 const SCEV *Op1 = SE->getSCEV(WideDef); 1454 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1455 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1456 return false; 1457 // Verifying that other operand is an Extend. 1458 if (ExtKind == SignExtended) { 1459 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1460 return false; 1461 } else { 1462 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1463 return false; 1464 } 1465 1466 if (ExtKind == SignExtended) { 1467 for (Use &U : NarrowUse->uses()) { 1468 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1469 if (!User || User->getType() != WideType) 1470 return false; 1471 } 1472 } else { // ExtKind == ZeroExtended 1473 for (Use &U : NarrowUse->uses()) { 1474 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1475 if (!User || User->getType() != WideType) 1476 return false; 1477 } 1478 } 1479 1480 return true; 1481 } 1482 1483 /// Special Case for widening with variant Loads (see 1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1486 Instruction *NarrowUse = DU.NarrowUse; 1487 Instruction *NarrowDef = DU.NarrowDef; 1488 Instruction *WideDef = DU.WideDef; 1489 1490 ExtendKind ExtKind = getExtendKind(NarrowDef); 1491 1492 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1493 1494 // Generating a widening use instruction. 1495 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1496 ? WideDef 1497 : createExtendInst(NarrowUse->getOperand(0), WideType, 1498 ExtKind, NarrowUse); 1499 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1500 ? WideDef 1501 : createExtendInst(NarrowUse->getOperand(1), WideType, 1502 ExtKind, NarrowUse); 1503 1504 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1505 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1506 NarrowBO->getName()); 1507 IRBuilder<> Builder(NarrowUse); 1508 Builder.Insert(WideBO); 1509 WideBO->copyIRFlags(NarrowBO); 1510 1511 assert(ExtKind != Unknown && "Unknown ExtKind not handled"); 1512 1513 ExtendKindMap[NarrowUse] = ExtKind; 1514 1515 for (Use &U : NarrowUse->uses()) { 1516 Instruction *User = nullptr; 1517 if (ExtKind == SignExtended) 1518 User = dyn_cast<SExtInst>(U.getUser()); 1519 else 1520 User = dyn_cast<ZExtInst>(U.getUser()); 1521 if (User && User->getType() == WideType) { 1522 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1523 << *WideBO << "\n"); 1524 ++NumElimExt; 1525 User->replaceAllUsesWith(WideBO); 1526 DeadInsts.emplace_back(User); 1527 } 1528 } 1529 } 1530 1531 /// Determine whether an individual user of the narrow IV can be widened. If so, 1532 /// return the wide clone of the user. 1533 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1534 assert(ExtendKindMap.count(DU.NarrowDef) && 1535 "Should already know the kind of extension used to widen NarrowDef"); 1536 1537 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1538 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1539 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1540 // For LCSSA phis, sink the truncate outside the loop. 1541 // After SimplifyCFG most loop exit targets have a single predecessor. 1542 // Otherwise fall back to a truncate within the loop. 1543 if (UsePhi->getNumOperands() != 1) 1544 truncateIVUse(DU, DT, LI); 1545 else { 1546 // Widening the PHI requires us to insert a trunc. The logical place 1547 // for this trunc is in the same BB as the PHI. This is not possible if 1548 // the BB is terminated by a catchswitch. 1549 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1550 return nullptr; 1551 1552 PHINode *WidePhi = 1553 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1554 UsePhi); 1555 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1556 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1557 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1558 UsePhi->replaceAllUsesWith(Trunc); 1559 DeadInsts.emplace_back(UsePhi); 1560 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1561 << *WidePhi << "\n"); 1562 } 1563 return nullptr; 1564 } 1565 } 1566 1567 // This narrow use can be widened by a sext if it's non-negative or its narrow 1568 // def was widended by a sext. Same for zext. 1569 auto canWidenBySExt = [&]() { 1570 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1571 }; 1572 auto canWidenByZExt = [&]() { 1573 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1574 }; 1575 1576 // Our raison d'etre! Eliminate sign and zero extension. 1577 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1578 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1579 Value *NewDef = DU.WideDef; 1580 if (DU.NarrowUse->getType() != WideType) { 1581 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1582 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1583 if (CastWidth < IVWidth) { 1584 // The cast isn't as wide as the IV, so insert a Trunc. 1585 IRBuilder<> Builder(DU.NarrowUse); 1586 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1587 } 1588 else { 1589 // A wider extend was hidden behind a narrower one. This may induce 1590 // another round of IV widening in which the intermediate IV becomes 1591 // dead. It should be very rare. 1592 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1593 << " not wide enough to subsume " << *DU.NarrowUse 1594 << "\n"); 1595 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1596 NewDef = DU.NarrowUse; 1597 } 1598 } 1599 if (NewDef != DU.NarrowUse) { 1600 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1601 << " replaced by " << *DU.WideDef << "\n"); 1602 ++NumElimExt; 1603 DU.NarrowUse->replaceAllUsesWith(NewDef); 1604 DeadInsts.emplace_back(DU.NarrowUse); 1605 } 1606 // Now that the extend is gone, we want to expose it's uses for potential 1607 // further simplification. We don't need to directly inform SimplifyIVUsers 1608 // of the new users, because their parent IV will be processed later as a 1609 // new loop phi. If we preserved IVUsers analysis, we would also want to 1610 // push the uses of WideDef here. 1611 1612 // No further widening is needed. The deceased [sz]ext had done it for us. 1613 return nullptr; 1614 } 1615 1616 // Does this user itself evaluate to a recurrence after widening? 1617 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1618 if (!WideAddRec.first) 1619 WideAddRec = getWideRecurrence(DU); 1620 1621 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1622 if (!WideAddRec.first) { 1623 // If use is a loop condition, try to promote the condition instead of 1624 // truncating the IV first. 1625 if (widenLoopCompare(DU)) 1626 return nullptr; 1627 1628 // We are here about to generate a truncate instruction that may hurt 1629 // performance because the scalar evolution expression computed earlier 1630 // in WideAddRec.first does not indicate a polynomial induction expression. 1631 // In that case, look at the operands of the use instruction to determine 1632 // if we can still widen the use instead of truncating its operand. 1633 if (widenWithVariantLoadUse(DU)) { 1634 widenWithVariantLoadUseCodegen(DU); 1635 return nullptr; 1636 } 1637 1638 // This user does not evaluate to a recurrence after widening, so don't 1639 // follow it. Instead insert a Trunc to kill off the original use, 1640 // eventually isolating the original narrow IV so it can be removed. 1641 truncateIVUse(DU, DT, LI); 1642 return nullptr; 1643 } 1644 // Assume block terminators cannot evaluate to a recurrence. We can't to 1645 // insert a Trunc after a terminator if there happens to be a critical edge. 1646 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1647 "SCEV is not expected to evaluate a block terminator"); 1648 1649 // Reuse the IV increment that SCEVExpander created as long as it dominates 1650 // NarrowUse. 1651 Instruction *WideUse = nullptr; 1652 if (WideAddRec.first == WideIncExpr && 1653 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1654 WideUse = WideInc; 1655 else { 1656 WideUse = cloneIVUser(DU, WideAddRec.first); 1657 if (!WideUse) 1658 return nullptr; 1659 } 1660 // Evaluation of WideAddRec ensured that the narrow expression could be 1661 // extended outside the loop without overflow. This suggests that the wide use 1662 // evaluates to the same expression as the extended narrow use, but doesn't 1663 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1664 // where it fails, we simply throw away the newly created wide use. 1665 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1666 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1667 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1668 << "\n"); 1669 DeadInsts.emplace_back(WideUse); 1670 return nullptr; 1671 } 1672 1673 // if we reached this point then we are going to replace 1674 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1675 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1676 1677 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1678 // Returning WideUse pushes it on the worklist. 1679 return WideUse; 1680 } 1681 1682 /// Add eligible users of NarrowDef to NarrowIVUsers. 1683 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1684 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1685 bool NonNegativeDef = 1686 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1687 SE->getConstant(NarrowSCEV->getType(), 0)); 1688 for (User *U : NarrowDef->users()) { 1689 Instruction *NarrowUser = cast<Instruction>(U); 1690 1691 // Handle data flow merges and bizarre phi cycles. 1692 if (!Widened.insert(NarrowUser).second) 1693 continue; 1694 1695 bool NonNegativeUse = false; 1696 if (!NonNegativeDef) { 1697 // We might have a control-dependent range information for this context. 1698 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1699 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1700 } 1701 1702 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1703 NonNegativeDef || NonNegativeUse); 1704 } 1705 } 1706 1707 /// Process a single induction variable. First use the SCEVExpander to create a 1708 /// wide induction variable that evaluates to the same recurrence as the 1709 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1710 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1711 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1712 /// 1713 /// It would be simpler to delete uses as they are processed, but we must avoid 1714 /// invalidating SCEV expressions. 1715 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1716 // Is this phi an induction variable? 1717 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1718 if (!AddRec) 1719 return nullptr; 1720 1721 // Widen the induction variable expression. 1722 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1723 ? SE->getSignExtendExpr(AddRec, WideType) 1724 : SE->getZeroExtendExpr(AddRec, WideType); 1725 1726 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1727 "Expect the new IV expression to preserve its type"); 1728 1729 // Can the IV be extended outside the loop without overflow? 1730 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1731 if (!AddRec || AddRec->getLoop() != L) 1732 return nullptr; 1733 1734 // An AddRec must have loop-invariant operands. Since this AddRec is 1735 // materialized by a loop header phi, the expression cannot have any post-loop 1736 // operands, so they must dominate the loop header. 1737 assert( 1738 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1739 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1740 "Loop header phi recurrence inputs do not dominate the loop"); 1741 1742 // Iterate over IV uses (including transitive ones) looking for IV increments 1743 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1744 // the increment calculate control-dependent range information basing on 1745 // dominating conditions inside of the loop (e.g. a range check inside of the 1746 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1747 // 1748 // Control-dependent range information is later used to prove that a narrow 1749 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1750 // this on demand because when pushNarrowIVUsers needs this information some 1751 // of the dominating conditions might be already widened. 1752 if (UsePostIncrementRanges) 1753 calculatePostIncRanges(OrigPhi); 1754 1755 // The rewriter provides a value for the desired IV expression. This may 1756 // either find an existing phi or materialize a new one. Either way, we 1757 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1758 // of the phi-SCC dominates the loop entry. 1759 Instruction *InsertPt = &L->getHeader()->front(); 1760 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1761 1762 // Remembering the WideIV increment generated by SCEVExpander allows 1763 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1764 // employ a general reuse mechanism because the call above is the only call to 1765 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1766 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1767 WideInc = 1768 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1769 WideIncExpr = SE->getSCEV(WideInc); 1770 // Propagate the debug location associated with the original loop increment 1771 // to the new (widened) increment. 1772 auto *OrigInc = 1773 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1774 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1775 } 1776 1777 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1778 ++NumWidened; 1779 1780 // Traverse the def-use chain using a worklist starting at the original IV. 1781 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1782 1783 Widened.insert(OrigPhi); 1784 pushNarrowIVUsers(OrigPhi, WidePhi); 1785 1786 while (!NarrowIVUsers.empty()) { 1787 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1788 1789 // Process a def-use edge. This may replace the use, so don't hold a 1790 // use_iterator across it. 1791 Instruction *WideUse = widenIVUse(DU, Rewriter); 1792 1793 // Follow all def-use edges from the previous narrow use. 1794 if (WideUse) 1795 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1796 1797 // widenIVUse may have removed the def-use edge. 1798 if (DU.NarrowDef->use_empty()) 1799 DeadInsts.emplace_back(DU.NarrowDef); 1800 } 1801 1802 // Attach any debug information to the new PHI. 1803 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1804 1805 return WidePhi; 1806 } 1807 1808 /// Calculates control-dependent range for the given def at the given context 1809 /// by looking at dominating conditions inside of the loop 1810 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1811 Instruction *NarrowUser) { 1812 using namespace llvm::PatternMatch; 1813 1814 Value *NarrowDefLHS; 1815 const APInt *NarrowDefRHS; 1816 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1817 m_APInt(NarrowDefRHS))) || 1818 !NarrowDefRHS->isNonNegative()) 1819 return; 1820 1821 auto UpdateRangeFromCondition = [&] (Value *Condition, 1822 bool TrueDest) { 1823 CmpInst::Predicate Pred; 1824 Value *CmpRHS; 1825 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1826 m_Value(CmpRHS)))) 1827 return; 1828 1829 CmpInst::Predicate P = 1830 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1831 1832 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1833 auto CmpConstrainedLHSRange = 1834 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1835 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1836 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1837 1838 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1839 }; 1840 1841 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1842 if (!HasGuards) 1843 return; 1844 1845 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1846 Ctx->getParent()->rend())) { 1847 Value *C = nullptr; 1848 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1849 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1850 } 1851 }; 1852 1853 UpdateRangeFromGuards(NarrowUser); 1854 1855 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1856 // If NarrowUserBB is statically unreachable asking dominator queries may 1857 // yield surprising results. (e.g. the block may not have a dom tree node) 1858 if (!DT->isReachableFromEntry(NarrowUserBB)) 1859 return; 1860 1861 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1862 L->contains(DTB->getBlock()); 1863 DTB = DTB->getIDom()) { 1864 auto *BB = DTB->getBlock(); 1865 auto *TI = BB->getTerminator(); 1866 UpdateRangeFromGuards(TI); 1867 1868 auto *BI = dyn_cast<BranchInst>(TI); 1869 if (!BI || !BI->isConditional()) 1870 continue; 1871 1872 auto *TrueSuccessor = BI->getSuccessor(0); 1873 auto *FalseSuccessor = BI->getSuccessor(1); 1874 1875 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1876 return BBE.isSingleEdge() && 1877 DT->dominates(BBE, NarrowUser->getParent()); 1878 }; 1879 1880 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1881 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1882 1883 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1884 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1885 } 1886 } 1887 1888 /// Calculates PostIncRangeInfos map for the given IV 1889 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1890 SmallPtrSet<Instruction *, 16> Visited; 1891 SmallVector<Instruction *, 6> Worklist; 1892 Worklist.push_back(OrigPhi); 1893 Visited.insert(OrigPhi); 1894 1895 while (!Worklist.empty()) { 1896 Instruction *NarrowDef = Worklist.pop_back_val(); 1897 1898 for (Use &U : NarrowDef->uses()) { 1899 auto *NarrowUser = cast<Instruction>(U.getUser()); 1900 1901 // Don't go looking outside the current loop. 1902 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1903 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1904 continue; 1905 1906 if (!Visited.insert(NarrowUser).second) 1907 continue; 1908 1909 Worklist.push_back(NarrowUser); 1910 1911 calculatePostIncRange(NarrowDef, NarrowUser); 1912 } 1913 } 1914 } 1915 1916 //===----------------------------------------------------------------------===// 1917 // Live IV Reduction - Minimize IVs live across the loop. 1918 //===----------------------------------------------------------------------===// 1919 1920 //===----------------------------------------------------------------------===// 1921 // Simplification of IV users based on SCEV evaluation. 1922 //===----------------------------------------------------------------------===// 1923 1924 namespace { 1925 1926 class IndVarSimplifyVisitor : public IVVisitor { 1927 ScalarEvolution *SE; 1928 const TargetTransformInfo *TTI; 1929 PHINode *IVPhi; 1930 1931 public: 1932 WideIVInfo WI; 1933 1934 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1935 const TargetTransformInfo *TTI, 1936 const DominatorTree *DTree) 1937 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1938 DT = DTree; 1939 WI.NarrowIV = IVPhi; 1940 } 1941 1942 // Implement the interface used by simplifyUsersOfIV. 1943 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1944 }; 1945 1946 } // end anonymous namespace 1947 1948 /// Iteratively perform simplification on a worklist of IV users. Each 1949 /// successive simplification may push more users which may themselves be 1950 /// candidates for simplification. 1951 /// 1952 /// Sign/Zero extend elimination is interleaved with IV simplification. 1953 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1954 SCEVExpander &Rewriter, 1955 LoopInfo *LI) { 1956 SmallVector<WideIVInfo, 8> WideIVs; 1957 1958 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1959 Intrinsic::getName(Intrinsic::experimental_guard)); 1960 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1961 1962 SmallVector<PHINode*, 8> LoopPhis; 1963 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1964 LoopPhis.push_back(cast<PHINode>(I)); 1965 } 1966 // Each round of simplification iterates through the SimplifyIVUsers worklist 1967 // for all current phis, then determines whether any IVs can be 1968 // widened. Widening adds new phis to LoopPhis, inducing another round of 1969 // simplification on the wide IVs. 1970 bool Changed = false; 1971 while (!LoopPhis.empty()) { 1972 // Evaluate as many IV expressions as possible before widening any IVs. This 1973 // forces SCEV to set no-wrap flags before evaluating sign/zero 1974 // extension. The first time SCEV attempts to normalize sign/zero extension, 1975 // the result becomes final. So for the most predictable results, we delay 1976 // evaluation of sign/zero extend evaluation until needed, and avoid running 1977 // other SCEV based analysis prior to simplifyAndExtend. 1978 do { 1979 PHINode *CurrIV = LoopPhis.pop_back_val(); 1980 1981 // Information about sign/zero extensions of CurrIV. 1982 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1983 1984 Changed |= 1985 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1986 1987 if (Visitor.WI.WidestNativeType) { 1988 WideIVs.push_back(Visitor.WI); 1989 } 1990 } while(!LoopPhis.empty()); 1991 1992 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1993 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1994 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1995 Changed = true; 1996 LoopPhis.push_back(WidePhi); 1997 } 1998 } 1999 } 2000 return Changed; 2001 } 2002 2003 //===----------------------------------------------------------------------===// 2004 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2005 //===----------------------------------------------------------------------===// 2006 2007 /// Given an Value which is hoped to be part of an add recurance in the given 2008 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2009 /// that this is less general than SCEVs AddRec checking. 2010 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2011 Instruction *IncI = dyn_cast<Instruction>(IncV); 2012 if (!IncI) 2013 return nullptr; 2014 2015 switch (IncI->getOpcode()) { 2016 case Instruction::Add: 2017 case Instruction::Sub: 2018 break; 2019 case Instruction::GetElementPtr: 2020 // An IV counter must preserve its type. 2021 if (IncI->getNumOperands() == 2) 2022 break; 2023 LLVM_FALLTHROUGH; 2024 default: 2025 return nullptr; 2026 } 2027 2028 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2029 if (Phi && Phi->getParent() == L->getHeader()) { 2030 if (L->isLoopInvariant(IncI->getOperand(1))) 2031 return Phi; 2032 return nullptr; 2033 } 2034 if (IncI->getOpcode() == Instruction::GetElementPtr) 2035 return nullptr; 2036 2037 // Allow add/sub to be commuted. 2038 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2039 if (Phi && Phi->getParent() == L->getHeader()) { 2040 if (L->isLoopInvariant(IncI->getOperand(0))) 2041 return Phi; 2042 } 2043 return nullptr; 2044 } 2045 2046 /// Whether the current loop exit test is based on this value. Currently this 2047 /// is limited to a direct use in the loop condition. 2048 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2049 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2050 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2051 // TODO: Allow non-icmp loop test. 2052 if (!ICmp) 2053 return false; 2054 2055 // TODO: Allow indirect use. 2056 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2057 } 2058 2059 /// linearFunctionTestReplace policy. Return true unless we can show that the 2060 /// current exit test is already sufficiently canonical. 2061 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2062 assert(L->getLoopLatch() && "Must be in simplified form"); 2063 2064 // Avoid converting a constant or loop invariant test back to a runtime 2065 // test. This is critical for when SCEV's cached ExitCount is less precise 2066 // than the current IR (such as after we've proven a particular exit is 2067 // actually dead and thus the BE count never reaches our ExitCount.) 2068 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2069 if (L->isLoopInvariant(BI->getCondition())) 2070 return false; 2071 2072 // Do LFTR to simplify the exit condition to an ICMP. 2073 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2074 if (!Cond) 2075 return true; 2076 2077 // Do LFTR to simplify the exit ICMP to EQ/NE 2078 ICmpInst::Predicate Pred = Cond->getPredicate(); 2079 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2080 return true; 2081 2082 // Look for a loop invariant RHS 2083 Value *LHS = Cond->getOperand(0); 2084 Value *RHS = Cond->getOperand(1); 2085 if (!L->isLoopInvariant(RHS)) { 2086 if (!L->isLoopInvariant(LHS)) 2087 return true; 2088 std::swap(LHS, RHS); 2089 } 2090 // Look for a simple IV counter LHS 2091 PHINode *Phi = dyn_cast<PHINode>(LHS); 2092 if (!Phi) 2093 Phi = getLoopPhiForCounter(LHS, L); 2094 2095 if (!Phi) 2096 return true; 2097 2098 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2099 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2100 if (Idx < 0) 2101 return true; 2102 2103 // Do LFTR if the exit condition's IV is *not* a simple counter. 2104 Value *IncV = Phi->getIncomingValue(Idx); 2105 return Phi != getLoopPhiForCounter(IncV, L); 2106 } 2107 2108 /// Return true if undefined behavior would provable be executed on the path to 2109 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2110 /// anything about whether OnPathTo is actually executed or whether Root is 2111 /// actually poison. This can be used to assess whether a new use of Root can 2112 /// be added at a location which is control equivalent with OnPathTo (such as 2113 /// immediately before it) without introducing UB which didn't previously 2114 /// exist. Note that a false result conveys no information. 2115 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2116 Instruction *OnPathTo, 2117 DominatorTree *DT) { 2118 // Basic approach is to assume Root is poison, propagate poison forward 2119 // through all users we can easily track, and then check whether any of those 2120 // users are provable UB and must execute before out exiting block might 2121 // exit. 2122 2123 // The set of all recursive users we've visited (which are assumed to all be 2124 // poison because of said visit) 2125 SmallSet<const Value *, 16> KnownPoison; 2126 SmallVector<const Instruction*, 16> Worklist; 2127 Worklist.push_back(Root); 2128 while (!Worklist.empty()) { 2129 const Instruction *I = Worklist.pop_back_val(); 2130 2131 // If we know this must trigger UB on a path leading our target. 2132 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2133 return true; 2134 2135 // If we can't analyze propagation through this instruction, just skip it 2136 // and transitive users. Safe as false is a conservative result. 2137 if (!propagatesFullPoison(I) && I != Root) 2138 continue; 2139 2140 if (KnownPoison.insert(I).second) 2141 for (const User *User : I->users()) 2142 Worklist.push_back(cast<Instruction>(User)); 2143 } 2144 2145 // Might be non-UB, or might have a path we couldn't prove must execute on 2146 // way to exiting bb. 2147 return false; 2148 } 2149 2150 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2151 /// down to checking that all operands are constant and listing instructions 2152 /// that may hide undef. 2153 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2154 unsigned Depth) { 2155 if (isa<Constant>(V)) 2156 return !isa<UndefValue>(V); 2157 2158 if (Depth >= 6) 2159 return false; 2160 2161 // Conservatively handle non-constant non-instructions. For example, Arguments 2162 // may be undef. 2163 Instruction *I = dyn_cast<Instruction>(V); 2164 if (!I) 2165 return false; 2166 2167 // Load and return values may be undef. 2168 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2169 return false; 2170 2171 // Optimistically handle other instructions. 2172 for (Value *Op : I->operands()) { 2173 if (!Visited.insert(Op).second) 2174 continue; 2175 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2176 return false; 2177 } 2178 return true; 2179 } 2180 2181 /// Return true if the given value is concrete. We must prove that undef can 2182 /// never reach it. 2183 /// 2184 /// TODO: If we decide that this is a good approach to checking for undef, we 2185 /// may factor it into a common location. 2186 static bool hasConcreteDef(Value *V) { 2187 SmallPtrSet<Value*, 8> Visited; 2188 Visited.insert(V); 2189 return hasConcreteDefImpl(V, Visited, 0); 2190 } 2191 2192 /// Return true if this IV has any uses other than the (soon to be rewritten) 2193 /// loop exit test. 2194 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2195 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2196 Value *IncV = Phi->getIncomingValue(LatchIdx); 2197 2198 for (User *U : Phi->users()) 2199 if (U != Cond && U != IncV) return false; 2200 2201 for (User *U : IncV->users()) 2202 if (U != Cond && U != Phi) return false; 2203 return true; 2204 } 2205 2206 /// Return true if the given phi is a "counter" in L. A counter is an 2207 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2208 /// step of 1. Note that L must have exactly one latch. 2209 static bool isLoopCounter(PHINode* Phi, Loop *L, 2210 ScalarEvolution *SE) { 2211 assert(Phi->getParent() == L->getHeader()); 2212 assert(L->getLoopLatch()); 2213 2214 if (!SE->isSCEVable(Phi->getType())) 2215 return false; 2216 2217 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2218 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2219 return false; 2220 2221 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2222 if (!Step || !Step->isOne()) 2223 return false; 2224 2225 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2226 Value *IncV = Phi->getIncomingValue(LatchIdx); 2227 return (getLoopPhiForCounter(IncV, L) == Phi); 2228 } 2229 2230 /// Search the loop header for a loop counter (anadd rec w/step of one) 2231 /// suitable for use by LFTR. If multiple counters are available, select the 2232 /// "best" one based profitable heuristics. 2233 /// 2234 /// BECount may be an i8* pointer type. The pointer difference is already 2235 /// valid count without scaling the address stride, so it remains a pointer 2236 /// expression as far as SCEV is concerned. 2237 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2238 const SCEV *BECount, 2239 ScalarEvolution *SE, DominatorTree *DT) { 2240 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2241 2242 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2243 2244 // Loop over all of the PHI nodes, looking for a simple counter. 2245 PHINode *BestPhi = nullptr; 2246 const SCEV *BestInit = nullptr; 2247 BasicBlock *LatchBlock = L->getLoopLatch(); 2248 assert(LatchBlock && "Must be in simplified form"); 2249 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2250 2251 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2252 PHINode *Phi = cast<PHINode>(I); 2253 if (!isLoopCounter(Phi, L, SE)) 2254 continue; 2255 2256 // Avoid comparing an integer IV against a pointer Limit. 2257 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2258 continue; 2259 2260 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2261 2262 // AR may be a pointer type, while BECount is an integer type. 2263 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2264 // AR may not be a narrower type, or we may never exit. 2265 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2266 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2267 continue; 2268 2269 // Avoid reusing a potentially undef value to compute other values that may 2270 // have originally had a concrete definition. 2271 if (!hasConcreteDef(Phi)) { 2272 // We explicitly allow unknown phis as long as they are already used by 2273 // the loop exit test. This is legal since performing LFTR could not 2274 // increase the number of undef users. 2275 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2276 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2277 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2278 continue; 2279 } 2280 2281 // Avoid introducing undefined behavior due to poison which didn't exist in 2282 // the original program. (Annoyingly, the rules for poison and undef 2283 // propagation are distinct, so this does NOT cover the undef case above.) 2284 // We have to ensure that we don't introduce UB by introducing a use on an 2285 // iteration where said IV produces poison. Our strategy here differs for 2286 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2287 // see code in linearFunctionTestReplace. For pointers, we restrict 2288 // transforms as there is no good way to reinfer inbounds once lost. 2289 if (!Phi->getType()->isIntegerTy() && 2290 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2291 continue; 2292 2293 const SCEV *Init = AR->getStart(); 2294 2295 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2296 // Don't force a live loop counter if another IV can be used. 2297 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2298 continue; 2299 2300 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2301 // also prefers integer to pointer IVs. 2302 if (BestInit->isZero() != Init->isZero()) { 2303 if (BestInit->isZero()) 2304 continue; 2305 } 2306 // If two IVs both count from zero or both count from nonzero then the 2307 // narrower is likely a dead phi that has been widened. Use the wider phi 2308 // to allow the other to be eliminated. 2309 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2310 continue; 2311 } 2312 BestPhi = Phi; 2313 BestInit = Init; 2314 } 2315 return BestPhi; 2316 } 2317 2318 /// Insert an IR expression which computes the value held by the IV IndVar 2319 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2320 /// is taken ExitCount times. 2321 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2322 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2323 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2324 assert(isLoopCounter(IndVar, L, SE)); 2325 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2326 const SCEV *IVInit = AR->getStart(); 2327 2328 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2329 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2330 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2331 // the existing GEPs whenever possible. 2332 if (IndVar->getType()->isPointerTy() && 2333 !ExitCount->getType()->isPointerTy()) { 2334 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2335 // signed value. ExitCount on the other hand represents the loop trip count, 2336 // which is an unsigned value. FindLoopCounter only allows induction 2337 // variables that have a positive unit stride of one. This means we don't 2338 // have to handle the case of negative offsets (yet) and just need to zero 2339 // extend ExitCount. 2340 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2341 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2342 if (UsePostInc) 2343 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2344 2345 // Expand the code for the iteration count. 2346 assert(SE->isLoopInvariant(IVOffset, L) && 2347 "Computed iteration count is not loop invariant!"); 2348 2349 // We could handle pointer IVs other than i8*, but we need to compensate for 2350 // gep index scaling. 2351 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2352 cast<PointerType>(IndVar->getType()) 2353 ->getElementType())->isOne() && 2354 "unit stride pointer IV must be i8*"); 2355 2356 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2357 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2358 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2359 } else { 2360 // In any other case, convert both IVInit and ExitCount to integers before 2361 // comparing. This may result in SCEV expansion of pointers, but in practice 2362 // SCEV will fold the pointer arithmetic away as such: 2363 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2364 // 2365 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2366 // for simple memset-style loops. 2367 // 2368 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2369 // were generated on top of case #2, which is not expected. 2370 2371 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2372 // For unit stride, IVCount = Start + ExitCount with 2's complement 2373 // overflow. 2374 2375 // For integer IVs, truncate the IV before computing IVInit + BECount, 2376 // unless we know apriori that the limit must be a constant when evaluated 2377 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2378 // of the IV in the loop over a (potentially) expensive expansion of the 2379 // widened exit count add(zext(add)) expression. 2380 if (SE->getTypeSizeInBits(IVInit->getType()) 2381 > SE->getTypeSizeInBits(ExitCount->getType())) { 2382 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2383 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2384 else 2385 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2386 } 2387 2388 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2389 2390 if (UsePostInc) 2391 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2392 2393 // Expand the code for the iteration count. 2394 assert(SE->isLoopInvariant(IVLimit, L) && 2395 "Computed iteration count is not loop invariant!"); 2396 // Ensure that we generate the same type as IndVar, or a smaller integer 2397 // type. In the presence of null pointer values, we have an integer type 2398 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2399 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2400 IndVar->getType() : ExitCount->getType(); 2401 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2402 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2403 } 2404 } 2405 2406 /// This method rewrites the exit condition of the loop to be a canonical != 2407 /// comparison against the incremented loop induction variable. This pass is 2408 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2409 /// determine a loop-invariant trip count of the loop, which is actually a much 2410 /// broader range than just linear tests. 2411 bool IndVarSimplify:: 2412 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2413 const SCEV *ExitCount, 2414 PHINode *IndVar, SCEVExpander &Rewriter) { 2415 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2416 assert(isLoopCounter(IndVar, L, SE)); 2417 Instruction * const IncVar = 2418 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2419 2420 // Initialize CmpIndVar to the preincremented IV. 2421 Value *CmpIndVar = IndVar; 2422 bool UsePostInc = false; 2423 2424 // If the exiting block is the same as the backedge block, we prefer to 2425 // compare against the post-incremented value, otherwise we must compare 2426 // against the preincremented value. 2427 if (ExitingBB == L->getLoopLatch()) { 2428 // For pointer IVs, we chose to not strip inbounds which requires us not 2429 // to add a potentially UB introducing use. We need to either a) show 2430 // the loop test we're modifying is already in post-inc form, or b) show 2431 // that adding a use must not introduce UB. 2432 bool SafeToPostInc = 2433 IndVar->getType()->isIntegerTy() || 2434 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2435 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2436 if (SafeToPostInc) { 2437 UsePostInc = true; 2438 CmpIndVar = IncVar; 2439 } 2440 } 2441 2442 // It may be necessary to drop nowrap flags on the incrementing instruction 2443 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2444 // case the increment might have previously been poison on the last iteration 2445 // only) or if LFTR switches to a different IV that was previously dynamically 2446 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2447 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2448 // check), because the pre-inc addrec flags may be adopted from the original 2449 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2450 // TODO: This handling is inaccurate for one case: If we switch to a 2451 // dynamically dead IV that wraps on the first loop iteration only, which is 2452 // not covered by the post-inc addrec. (If the new IV was not dynamically 2453 // dead, it could not be poison on the first iteration in the first place.) 2454 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2455 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2456 if (BO->hasNoUnsignedWrap()) 2457 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2458 if (BO->hasNoSignedWrap()) 2459 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2460 } 2461 2462 Value *ExitCnt = genLoopLimit( 2463 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2464 assert(ExitCnt->getType()->isPointerTy() == 2465 IndVar->getType()->isPointerTy() && 2466 "genLoopLimit missed a cast"); 2467 2468 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2469 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2470 ICmpInst::Predicate P; 2471 if (L->contains(BI->getSuccessor(0))) 2472 P = ICmpInst::ICMP_NE; 2473 else 2474 P = ICmpInst::ICMP_EQ; 2475 2476 IRBuilder<> Builder(BI); 2477 2478 // The new loop exit condition should reuse the debug location of the 2479 // original loop exit condition. 2480 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2481 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2482 2483 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2484 // avoid the expensive expansion of the limit expression in the wider type, 2485 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2486 // since we know (from the exit count bitwidth), that we can't self-wrap in 2487 // the narrower type. 2488 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2489 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2490 if (CmpIndVarSize > ExitCntSize) { 2491 assert(!CmpIndVar->getType()->isPointerTy() && 2492 !ExitCnt->getType()->isPointerTy()); 2493 2494 // Before resorting to actually inserting the truncate, use the same 2495 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2496 // the other side of the comparison instead. We still evaluate the limit 2497 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2498 // a truncate within in. 2499 bool Extended = false; 2500 const SCEV *IV = SE->getSCEV(CmpIndVar); 2501 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2502 ExitCnt->getType()); 2503 const SCEV *ZExtTrunc = 2504 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2505 2506 if (ZExtTrunc == IV) { 2507 Extended = true; 2508 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2509 "wide.trip.count"); 2510 } else { 2511 const SCEV *SExtTrunc = 2512 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2513 if (SExtTrunc == IV) { 2514 Extended = true; 2515 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2516 "wide.trip.count"); 2517 } 2518 } 2519 2520 if (Extended) { 2521 bool Discard; 2522 L->makeLoopInvariant(ExitCnt, Discard); 2523 } else 2524 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2525 "lftr.wideiv"); 2526 } 2527 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2528 << " LHS:" << *CmpIndVar << '\n' 2529 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2530 << "\n" 2531 << " RHS:\t" << *ExitCnt << "\n" 2532 << "ExitCount:\t" << *ExitCount << "\n" 2533 << " was: " << *BI->getCondition() << "\n"); 2534 2535 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2536 Value *OrigCond = BI->getCondition(); 2537 // It's tempting to use replaceAllUsesWith here to fully replace the old 2538 // comparison, but that's not immediately safe, since users of the old 2539 // comparison may not be dominated by the new comparison. Instead, just 2540 // update the branch to use the new comparison; in the common case this 2541 // will make old comparison dead. 2542 BI->setCondition(Cond); 2543 DeadInsts.push_back(OrigCond); 2544 2545 ++NumLFTR; 2546 return true; 2547 } 2548 2549 //===----------------------------------------------------------------------===// 2550 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2551 //===----------------------------------------------------------------------===// 2552 2553 /// If there's a single exit block, sink any loop-invariant values that 2554 /// were defined in the preheader but not used inside the loop into the 2555 /// exit block to reduce register pressure in the loop. 2556 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2557 BasicBlock *ExitBlock = L->getExitBlock(); 2558 if (!ExitBlock) return false; 2559 2560 BasicBlock *Preheader = L->getLoopPreheader(); 2561 if (!Preheader) return false; 2562 2563 bool MadeAnyChanges = false; 2564 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2565 BasicBlock::iterator I(Preheader->getTerminator()); 2566 while (I != Preheader->begin()) { 2567 --I; 2568 // New instructions were inserted at the end of the preheader. 2569 if (isa<PHINode>(I)) 2570 break; 2571 2572 // Don't move instructions which might have side effects, since the side 2573 // effects need to complete before instructions inside the loop. Also don't 2574 // move instructions which might read memory, since the loop may modify 2575 // memory. Note that it's okay if the instruction might have undefined 2576 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2577 // block. 2578 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2579 continue; 2580 2581 // Skip debug info intrinsics. 2582 if (isa<DbgInfoIntrinsic>(I)) 2583 continue; 2584 2585 // Skip eh pad instructions. 2586 if (I->isEHPad()) 2587 continue; 2588 2589 // Don't sink alloca: we never want to sink static alloca's out of the 2590 // entry block, and correctly sinking dynamic alloca's requires 2591 // checks for stacksave/stackrestore intrinsics. 2592 // FIXME: Refactor this check somehow? 2593 if (isa<AllocaInst>(I)) 2594 continue; 2595 2596 // Determine if there is a use in or before the loop (direct or 2597 // otherwise). 2598 bool UsedInLoop = false; 2599 for (Use &U : I->uses()) { 2600 Instruction *User = cast<Instruction>(U.getUser()); 2601 BasicBlock *UseBB = User->getParent(); 2602 if (PHINode *P = dyn_cast<PHINode>(User)) { 2603 unsigned i = 2604 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2605 UseBB = P->getIncomingBlock(i); 2606 } 2607 if (UseBB == Preheader || L->contains(UseBB)) { 2608 UsedInLoop = true; 2609 break; 2610 } 2611 } 2612 2613 // If there is, the def must remain in the preheader. 2614 if (UsedInLoop) 2615 continue; 2616 2617 // Otherwise, sink it to the exit block. 2618 Instruction *ToMove = &*I; 2619 bool Done = false; 2620 2621 if (I != Preheader->begin()) { 2622 // Skip debug info intrinsics. 2623 do { 2624 --I; 2625 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2626 2627 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2628 Done = true; 2629 } else { 2630 Done = true; 2631 } 2632 2633 MadeAnyChanges = true; 2634 ToMove->moveBefore(*ExitBlock, InsertPt); 2635 if (Done) break; 2636 InsertPt = ToMove->getIterator(); 2637 } 2638 2639 return MadeAnyChanges; 2640 } 2641 2642 /// Return a symbolic upper bound for the backedge taken count of the loop. 2643 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2644 /// an arbitrary expression as opposed to only constants. 2645 /// TODO: Move into the ScalarEvolution class. 2646 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2647 DominatorTree &DT, Loop *L) { 2648 SmallVector<BasicBlock*, 16> ExitingBlocks; 2649 L->getExitingBlocks(ExitingBlocks); 2650 2651 // Form an expression for the maximum exit count possible for this loop. We 2652 // merge the max and exact information to approximate a version of 2653 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2654 SmallVector<const SCEV*, 4> ExitCounts; 2655 for (BasicBlock *ExitingBB : ExitingBlocks) { 2656 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2657 if (isa<SCEVCouldNotCompute>(ExitCount)) 2658 ExitCount = SE.getExitCount(L, ExitingBB, 2659 ScalarEvolution::ConstantMaximum); 2660 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2661 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2662 "We should only have known counts for exiting blocks that " 2663 "dominate latch!"); 2664 ExitCounts.push_back(ExitCount); 2665 } 2666 } 2667 if (ExitCounts.empty()) 2668 return SE.getCouldNotCompute(); 2669 return SE.getUMinFromMismatchedTypes(ExitCounts); 2670 } 2671 2672 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2673 SmallVector<BasicBlock*, 16> ExitingBlocks; 2674 L->getExitingBlocks(ExitingBlocks); 2675 2676 // Remove all exits which aren't both rewriteable and analyzeable. 2677 auto NewEnd = llvm::remove_if(ExitingBlocks, 2678 [&](BasicBlock *ExitingBB) { 2679 // If our exitting block exits multiple loops, we can only rewrite the 2680 // innermost one. Otherwise, we're changing how many times the innermost 2681 // loop runs before it exits. 2682 if (LI->getLoopFor(ExitingBB) != L) 2683 return true; 2684 2685 // Can't rewrite non-branch yet. 2686 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2687 if (!BI) 2688 return true; 2689 2690 // If already constant, nothing to do. 2691 if (isa<Constant>(BI->getCondition())) 2692 return true; 2693 2694 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2695 if (isa<SCEVCouldNotCompute>(ExitCount)) 2696 return true; 2697 return false; 2698 }); 2699 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2700 2701 if (ExitingBlocks.empty()) 2702 return false; 2703 2704 // Get a symbolic upper bound on the loop backedge taken count. 2705 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2706 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2707 return false; 2708 2709 // Visit our exit blocks in order of dominance. We know from the fact that 2710 // all exits (left) are analyzeable that the must be a total dominance order 2711 // between them as each must dominate the latch. The visit order only 2712 // matters for the provably equal case. 2713 llvm::sort(ExitingBlocks, 2714 [&](BasicBlock *A, BasicBlock *B) { 2715 // std::sort sorts in ascending order, so we want the inverse of 2716 // the normal dominance relation. 2717 if (DT->properlyDominates(A, B)) return true; 2718 if (DT->properlyDominates(B, A)) return false; 2719 llvm_unreachable("expected total dominance order!"); 2720 }); 2721 #ifdef ASSERT 2722 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2723 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2724 } 2725 #endif 2726 2727 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2728 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2729 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2730 auto *OldCond = BI->getCondition(); 2731 auto *NewCond = ConstantInt::get(OldCond->getType(), 2732 IsTaken ? ExitIfTrue : !ExitIfTrue); 2733 BI->setCondition(NewCond); 2734 if (OldCond->use_empty()) 2735 DeadInsts.push_back(OldCond); 2736 }; 2737 2738 bool Changed = false; 2739 SmallSet<const SCEV*, 8> DominatingExitCounts; 2740 for (BasicBlock *ExitingBB : ExitingBlocks) { 2741 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2742 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2743 2744 // If we know we'd exit on the first iteration, rewrite the exit to 2745 // reflect this. This does not imply the loop must exit through this 2746 // exit; there may be an earlier one taken on the first iteration. 2747 // TODO: Given we know the backedge can't be taken, we should go ahead 2748 // and break it. Or at least, kill all the header phis and simplify. 2749 if (ExitCount->isZero()) { 2750 FoldExit(ExitingBB, true); 2751 Changed = true; 2752 continue; 2753 } 2754 2755 // If we end up with a pointer exit count, bail. Note that we can end up 2756 // with a pointer exit count for one exiting block, and not for another in 2757 // the same loop. 2758 if (!ExitCount->getType()->isIntegerTy() || 2759 !MaxExitCount->getType()->isIntegerTy()) 2760 continue; 2761 2762 Type *WiderType = 2763 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2764 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2765 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2766 assert(MaxExitCount->getType() == ExitCount->getType()); 2767 2768 // Can we prove that some other exit must be taken strictly before this 2769 // one? 2770 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2771 MaxExitCount, ExitCount)) { 2772 FoldExit(ExitingBB, false); 2773 Changed = true; 2774 continue; 2775 } 2776 2777 // As we run, keep track of which exit counts we've encountered. If we 2778 // find a duplicate, we've found an exit which would have exited on the 2779 // exiting iteration, but (from the visit order) strictly follows another 2780 // which does the same and is thus dead. 2781 if (!DominatingExitCounts.insert(ExitCount).second) { 2782 FoldExit(ExitingBB, false); 2783 Changed = true; 2784 continue; 2785 } 2786 2787 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2788 // here. If we kept track of the min of dominanting exits so far, we could 2789 // discharge exits with EC >= MDEC. This is less powerful than the existing 2790 // transform (since later exits aren't considered), but potentially more 2791 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2792 // or a >u b. Such a case is not currently known. 2793 } 2794 return Changed; 2795 } 2796 2797 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2798 SmallVector<BasicBlock*, 16> ExitingBlocks; 2799 L->getExitingBlocks(ExitingBlocks); 2800 2801 bool Changed = false; 2802 2803 // Finally, see if we can rewrite our exit conditions into a loop invariant 2804 // form. If we have a read-only loop, and we can tell that we must exit down 2805 // a path which does not need any of the values computed within the loop, we 2806 // can rewrite the loop to exit on the first iteration. Note that this 2807 // doesn't either a) tell us the loop exits on the first iteration (unless 2808 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2809 // This transformation looks a lot like a restricted form of dead loop 2810 // elimination, but restricted to read-only loops and without neccesssarily 2811 // needing to kill the loop entirely. 2812 if (!LoopPredication) 2813 return Changed; 2814 2815 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2816 return Changed; 2817 2818 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2819 // through *explicit* control flow. We have to eliminate the possibility of 2820 // implicit exits (see below) before we know it's truly exact. 2821 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2822 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2823 !SE->isLoopInvariant(ExactBTC, L) || 2824 !isSafeToExpand(ExactBTC, *SE)) 2825 return Changed; 2826 2827 // If we end up with a pointer exit count, bail. It may be unsized. 2828 if (!ExactBTC->getType()->isIntegerTy()) 2829 return Changed; 2830 2831 auto BadExit = [&](BasicBlock *ExitingBB) { 2832 // If our exiting block exits multiple loops, we can only rewrite the 2833 // innermost one. Otherwise, we're changing how many times the innermost 2834 // loop runs before it exits. 2835 if (LI->getLoopFor(ExitingBB) != L) 2836 return true; 2837 2838 // Can't rewrite non-branch yet. 2839 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2840 if (!BI) 2841 return true; 2842 2843 // If already constant, nothing to do. 2844 if (isa<Constant>(BI->getCondition())) 2845 return true; 2846 2847 // If the exit block has phis, we need to be able to compute the values 2848 // within the loop which contains them. This assumes trivially lcssa phis 2849 // have already been removed; TODO: generalize 2850 BasicBlock *ExitBlock = 2851 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2852 if (!ExitBlock->phis().empty()) 2853 return true; 2854 2855 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2856 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2857 if (!SE->isLoopInvariant(ExitCount, L) || 2858 !isSafeToExpand(ExitCount, *SE)) 2859 return true; 2860 2861 // If we end up with a pointer exit count, bail. It may be unsized. 2862 if (!ExitCount->getType()->isIntegerTy()) 2863 return true; 2864 2865 return false; 2866 }; 2867 2868 // If we have any exits which can't be predicated themselves, than we can't 2869 // predicate any exit which isn't guaranteed to execute before it. Consider 2870 // two exits (a) and (b) which would both exit on the same iteration. If we 2871 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2872 // we could convert a loop from exiting through (a) to one exiting through 2873 // (b). Note that this problem exists only for exits with the same exit 2874 // count, and we could be more aggressive when exit counts are known inequal. 2875 llvm::sort(ExitingBlocks, 2876 [&](BasicBlock *A, BasicBlock *B) { 2877 // std::sort sorts in ascending order, so we want the inverse of 2878 // the normal dominance relation, plus a tie breaker for blocks 2879 // unordered by dominance. 2880 if (DT->properlyDominates(A, B)) return true; 2881 if (DT->properlyDominates(B, A)) return false; 2882 return A->getName() < B->getName(); 2883 }); 2884 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2885 // exits before the backedge). If they aren't, reasoning about reachability 2886 // is complicated and we choose not to for now. 2887 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2888 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2889 return Changed; 2890 2891 // Given our sorted total order, we know that exit[j] must be evaluated 2892 // after all exit[i] such j > i. 2893 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2894 if (BadExit(ExitingBlocks[i])) { 2895 ExitingBlocks.resize(i); 2896 break; 2897 } 2898 2899 if (ExitingBlocks.empty()) 2900 return Changed; 2901 2902 // We rely on not being able to reach an exiting block on a later iteration 2903 // then it's statically compute exit count. The implementaton of 2904 // getExitCount currently has this invariant, but assert it here so that 2905 // breakage is obvious if this ever changes.. 2906 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2907 return DT->dominates(ExitingBB, L->getLoopLatch()); 2908 })); 2909 2910 // At this point, ExitingBlocks consists of only those blocks which are 2911 // predicatable. Given that, we know we have at least one exit we can 2912 // predicate if the loop is doesn't have side effects and doesn't have any 2913 // implicit exits (because then our exact BTC isn't actually exact). 2914 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2915 // suggestions on how to improve this? I can obviously bail out for outer 2916 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2917 // is that enough for *all* side effects? 2918 for (BasicBlock *BB : L->blocks()) 2919 for (auto &I : *BB) 2920 // TODO:isGuaranteedToTransfer 2921 if (I.mayHaveSideEffects() || I.mayThrow()) 2922 return Changed; 2923 2924 // Finally, do the actual predication for all predicatable blocks. A couple 2925 // of notes here: 2926 // 1) We don't bother to constant fold dominated exits with identical exit 2927 // counts; that's simply a form of CSE/equality propagation and we leave 2928 // it for dedicated passes. 2929 // 2) We insert the comparison at the branch. Hoisting introduces additional 2930 // legality constraints and we leave that to dedicated logic. We want to 2931 // predicate even if we can't insert a loop invariant expression as 2932 // peeling or unrolling will likely reduce the cost of the otherwise loop 2933 // varying check. 2934 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2935 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2936 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2937 for (BasicBlock *ExitingBB : ExitingBlocks) { 2938 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2939 2940 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2941 Value *NewCond; 2942 if (ExitCount == ExactBTC) { 2943 NewCond = L->contains(BI->getSuccessor(0)) ? 2944 B.getFalse() : B.getTrue(); 2945 } else { 2946 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2947 if (!ExactBTCV) 2948 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2949 Value *RHS = ExactBTCV; 2950 if (ECV->getType() != RHS->getType()) { 2951 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2952 ECV = B.CreateZExt(ECV, WiderTy); 2953 RHS = B.CreateZExt(RHS, WiderTy); 2954 } 2955 auto Pred = L->contains(BI->getSuccessor(0)) ? 2956 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2957 NewCond = B.CreateICmp(Pred, ECV, RHS); 2958 } 2959 Value *OldCond = BI->getCondition(); 2960 BI->setCondition(NewCond); 2961 if (OldCond->use_empty()) 2962 DeadInsts.push_back(OldCond); 2963 Changed = true; 2964 } 2965 2966 return Changed; 2967 } 2968 2969 //===----------------------------------------------------------------------===// 2970 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2971 //===----------------------------------------------------------------------===// 2972 2973 bool IndVarSimplify::run(Loop *L) { 2974 // We need (and expect!) the incoming loop to be in LCSSA. 2975 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2976 "LCSSA required to run indvars!"); 2977 bool Changed = false; 2978 2979 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2980 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2981 // canonicalization can be a pessimization without LSR to "clean up" 2982 // afterwards. 2983 // - We depend on having a preheader; in particular, 2984 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2985 // and we're in trouble if we can't find the induction variable even when 2986 // we've manually inserted one. 2987 // - LFTR relies on having a single backedge. 2988 if (!L->isLoopSimplifyForm()) 2989 return false; 2990 2991 #ifndef NDEBUG 2992 // Used below for a consistency check only 2993 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2994 #endif 2995 2996 // If there are any floating-point recurrences, attempt to 2997 // transform them to use integer recurrences. 2998 Changed |= rewriteNonIntegerIVs(L); 2999 3000 // Create a rewriter object which we'll use to transform the code with. 3001 SCEVExpander Rewriter(*SE, DL, "indvars"); 3002 #ifndef NDEBUG 3003 Rewriter.setDebugType(DEBUG_TYPE); 3004 #endif 3005 3006 // Eliminate redundant IV users. 3007 // 3008 // Simplification works best when run before other consumers of SCEV. We 3009 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 3010 // other expressions involving loop IVs have been evaluated. This helps SCEV 3011 // set no-wrap flags before normalizing sign/zero extension. 3012 Rewriter.disableCanonicalMode(); 3013 Changed |= simplifyAndExtend(L, Rewriter, LI); 3014 3015 // Check to see if we can compute the final value of any expressions 3016 // that are recurrent in the loop, and substitute the exit values from the 3017 // loop into any instructions outside of the loop that use the final values 3018 // of the current expressions. 3019 if (ReplaceExitValue != NeverRepl) 3020 Changed |= rewriteLoopExitValues(L, Rewriter); 3021 3022 // Eliminate redundant IV cycles. 3023 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 3024 3025 // Try to eliminate loop exits based on analyzeable exit counts 3026 if (optimizeLoopExits(L, Rewriter)) { 3027 Changed = true; 3028 // Given we've changed exit counts, notify SCEV 3029 SE->forgetLoop(L); 3030 } 3031 3032 // Try to form loop invariant tests for loop exits by changing how many 3033 // iterations of the loop run when that is unobservable. 3034 if (predicateLoopExits(L, Rewriter)) { 3035 Changed = true; 3036 // Given we've changed exit counts, notify SCEV 3037 SE->forgetLoop(L); 3038 } 3039 3040 // If we have a trip count expression, rewrite the loop's exit condition 3041 // using it. 3042 if (!DisableLFTR) { 3043 SmallVector<BasicBlock*, 16> ExitingBlocks; 3044 L->getExitingBlocks(ExitingBlocks); 3045 for (BasicBlock *ExitingBB : ExitingBlocks) { 3046 // Can't rewrite non-branch yet. 3047 if (!isa<BranchInst>(ExitingBB->getTerminator())) 3048 continue; 3049 3050 // If our exitting block exits multiple loops, we can only rewrite the 3051 // innermost one. Otherwise, we're changing how many times the innermost 3052 // loop runs before it exits. 3053 if (LI->getLoopFor(ExitingBB) != L) 3054 continue; 3055 3056 if (!needsLFTR(L, ExitingBB)) 3057 continue; 3058 3059 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 3060 if (isa<SCEVCouldNotCompute>(ExitCount)) 3061 continue; 3062 3063 // This was handled above, but as we form SCEVs, we can sometimes refine 3064 // existing ones; this allows exit counts to be folded to zero which 3065 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 3066 // until stable to handle cases like this better. 3067 if (ExitCount->isZero()) 3068 continue; 3069 3070 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 3071 if (!IndVar) 3072 continue; 3073 3074 // Avoid high cost expansions. Note: This heuristic is questionable in 3075 // that our definition of "high cost" is not exactly principled. 3076 if (Rewriter.isHighCostExpansion(ExitCount, L)) 3077 continue; 3078 3079 // Check preconditions for proper SCEVExpander operation. SCEV does not 3080 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 3081 // any pass that uses the SCEVExpander must do it. This does not work 3082 // well for loop passes because SCEVExpander makes assumptions about 3083 // all loops, while LoopPassManager only forces the current loop to be 3084 // simplified. 3085 // 3086 // FIXME: SCEV expansion has no way to bail out, so the caller must 3087 // explicitly check any assumptions made by SCEV. Brittle. 3088 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 3089 if (!AR || AR->getLoop()->getLoopPreheader()) 3090 Changed |= linearFunctionTestReplace(L, ExitingBB, 3091 ExitCount, IndVar, 3092 Rewriter); 3093 } 3094 } 3095 // Clear the rewriter cache, because values that are in the rewriter's cache 3096 // can be deleted in the loop below, causing the AssertingVH in the cache to 3097 // trigger. 3098 Rewriter.clear(); 3099 3100 // Now that we're done iterating through lists, clean up any instructions 3101 // which are now dead. 3102 while (!DeadInsts.empty()) 3103 if (Instruction *Inst = 3104 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 3105 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 3106 3107 // The Rewriter may not be used from this point on. 3108 3109 // Loop-invariant instructions in the preheader that aren't used in the 3110 // loop may be sunk below the loop to reduce register pressure. 3111 Changed |= sinkUnusedInvariants(L); 3112 3113 // rewriteFirstIterationLoopExitValues does not rely on the computation of 3114 // trip count and therefore can further simplify exit values in addition to 3115 // rewriteLoopExitValues. 3116 Changed |= rewriteFirstIterationLoopExitValues(L); 3117 3118 // Clean up dead instructions. 3119 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 3120 3121 // Check a post-condition. 3122 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3123 "Indvars did not preserve LCSSA!"); 3124 3125 // Verify that LFTR, and any other change have not interfered with SCEV's 3126 // ability to compute trip count. We may have *changed* the exit count, but 3127 // only by reducing it. 3128 #ifndef NDEBUG 3129 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 3130 SE->forgetLoop(L); 3131 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 3132 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 3133 SE->getTypeSizeInBits(NewBECount->getType())) 3134 NewBECount = SE->getTruncateOrNoop(NewBECount, 3135 BackedgeTakenCount->getType()); 3136 else 3137 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 3138 NewBECount->getType()); 3139 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 3140 NewBECount) && "indvars must preserve SCEV"); 3141 } 3142 #endif 3143 3144 return Changed; 3145 } 3146 3147 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 3148 LoopStandardAnalysisResults &AR, 3149 LPMUpdater &) { 3150 Function *F = L.getHeader()->getParent(); 3151 const DataLayout &DL = F->getParent()->getDataLayout(); 3152 3153 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 3154 if (!IVS.run(&L)) 3155 return PreservedAnalyses::all(); 3156 3157 auto PA = getLoopPassPreservedAnalyses(); 3158 PA.preserveSet<CFGAnalyses>(); 3159 return PA; 3160 } 3161 3162 namespace { 3163 3164 struct IndVarSimplifyLegacyPass : public LoopPass { 3165 static char ID; // Pass identification, replacement for typeid 3166 3167 IndVarSimplifyLegacyPass() : LoopPass(ID) { 3168 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 3169 } 3170 3171 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3172 if (skipLoop(L)) 3173 return false; 3174 3175 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3176 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3177 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3178 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3179 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 3180 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 3181 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 3182 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 3183 3184 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 3185 return IVS.run(L); 3186 } 3187 3188 void getAnalysisUsage(AnalysisUsage &AU) const override { 3189 AU.setPreservesCFG(); 3190 getLoopAnalysisUsage(AU); 3191 } 3192 }; 3193 3194 } // end anonymous namespace 3195 3196 char IndVarSimplifyLegacyPass::ID = 0; 3197 3198 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 3199 "Induction Variable Simplification", false, false) 3200 INITIALIZE_PASS_DEPENDENCY(LoopPass) 3201 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 3202 "Induction Variable Simplification", false, false) 3203 3204 Pass *llvm::createIndVarSimplifyPass() { 3205 return new IndVarSimplifyLegacyPass(); 3206 } 3207