1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 static cl::opt<bool> 128 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(false), 129 cl::desc("Predicate conditions in read only loops")); 130 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 144 SmallVector<WeakTrackingVH, 16> DeadInsts; 145 146 bool isValidRewrite(Value *FromVal, Value *ToVal); 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 154 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 155 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 156 bool rewriteFirstIterationLoopExitValues(Loop *L); 157 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 170 171 bool run(Loop *L); 172 }; 173 174 } // end anonymous namespace 175 176 /// Return true if the SCEV expansion generated by the rewriter can replace the 177 /// original value. SCEV guarantees that it produces the same value, but the way 178 /// it is produced may be illegal IR. Ideally, this function will only be 179 /// called for verification. 180 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 181 // If an SCEV expression subsumed multiple pointers, its expansion could 182 // reassociate the GEP changing the base pointer. This is illegal because the 183 // final address produced by a GEP chain must be inbounds relative to its 184 // underlying object. Otherwise basic alias analysis, among other things, 185 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 186 // producing an expression involving multiple pointers. Until then, we must 187 // bail out here. 188 // 189 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 190 // because it understands lcssa phis while SCEV does not. 191 Value *FromPtr = FromVal; 192 Value *ToPtr = ToVal; 193 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 194 FromPtr = GEP->getPointerOperand(); 195 } 196 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 197 ToPtr = GEP->getPointerOperand(); 198 } 199 if (FromPtr != FromVal || ToPtr != ToVal) { 200 // Quickly check the common case 201 if (FromPtr == ToPtr) 202 return true; 203 204 // SCEV may have rewritten an expression that produces the GEP's pointer 205 // operand. That's ok as long as the pointer operand has the same base 206 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 207 // base of a recurrence. This handles the case in which SCEV expansion 208 // converts a pointer type recurrence into a nonrecurrent pointer base 209 // indexed by an integer recurrence. 210 211 // If the GEP base pointer is a vector of pointers, abort. 212 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 213 return false; 214 215 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 216 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 217 if (FromBase == ToBase) 218 return true; 219 220 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 221 << " != " << *ToBase << "\n"); 222 223 return false; 224 } 225 return true; 226 } 227 228 /// Determine the insertion point for this user. By default, insert immediately 229 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 230 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 231 /// common dominator for the incoming blocks. A nullptr can be returned if no 232 /// viable location is found: it may happen if User is a PHI and Def only comes 233 /// to this PHI from unreachable blocks. 234 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 235 DominatorTree *DT, LoopInfo *LI) { 236 PHINode *PHI = dyn_cast<PHINode>(User); 237 if (!PHI) 238 return User; 239 240 Instruction *InsertPt = nullptr; 241 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 242 if (PHI->getIncomingValue(i) != Def) 243 continue; 244 245 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 246 247 if (!DT->isReachableFromEntry(InsertBB)) 248 continue; 249 250 if (!InsertPt) { 251 InsertPt = InsertBB->getTerminator(); 252 continue; 253 } 254 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 255 InsertPt = InsertBB->getTerminator(); 256 } 257 258 // If we have skipped all inputs, it means that Def only comes to Phi from 259 // unreachable blocks. 260 if (!InsertPt) 261 return nullptr; 262 263 auto *DefI = dyn_cast<Instruction>(Def); 264 if (!DefI) 265 return InsertPt; 266 267 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 268 269 auto *L = LI->getLoopFor(DefI->getParent()); 270 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 271 272 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 273 if (LI->getLoopFor(DTN->getBlock()) == L) 274 return DTN->getBlock()->getTerminator(); 275 276 llvm_unreachable("DefI dominates InsertPt!"); 277 } 278 279 //===----------------------------------------------------------------------===// 280 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 281 //===----------------------------------------------------------------------===// 282 283 /// Convert APF to an integer, if possible. 284 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 285 bool isExact = false; 286 // See if we can convert this to an int64_t 287 uint64_t UIntVal; 288 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 289 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 290 !isExact) 291 return false; 292 IntVal = UIntVal; 293 return true; 294 } 295 296 /// If the loop has floating induction variable then insert corresponding 297 /// integer induction variable if possible. 298 /// For example, 299 /// for(double i = 0; i < 10000; ++i) 300 /// bar(i) 301 /// is converted into 302 /// for(int i = 0; i < 10000; ++i) 303 /// bar((double)i); 304 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 305 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 306 unsigned BackEdge = IncomingEdge^1; 307 308 // Check incoming value. 309 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 310 311 int64_t InitValue; 312 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 313 return false; 314 315 // Check IV increment. Reject this PN if increment operation is not 316 // an add or increment value can not be represented by an integer. 317 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 318 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 319 320 // If this is not an add of the PHI with a constantfp, or if the constant fp 321 // is not an integer, bail out. 322 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 323 int64_t IncValue; 324 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 325 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 326 return false; 327 328 // Check Incr uses. One user is PN and the other user is an exit condition 329 // used by the conditional terminator. 330 Value::user_iterator IncrUse = Incr->user_begin(); 331 Instruction *U1 = cast<Instruction>(*IncrUse++); 332 if (IncrUse == Incr->user_end()) return false; 333 Instruction *U2 = cast<Instruction>(*IncrUse++); 334 if (IncrUse != Incr->user_end()) return false; 335 336 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 337 // only used by a branch, we can't transform it. 338 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 339 if (!Compare) 340 Compare = dyn_cast<FCmpInst>(U2); 341 if (!Compare || !Compare->hasOneUse() || 342 !isa<BranchInst>(Compare->user_back())) 343 return false; 344 345 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 346 347 // We need to verify that the branch actually controls the iteration count 348 // of the loop. If not, the new IV can overflow and no one will notice. 349 // The branch block must be in the loop and one of the successors must be out 350 // of the loop. 351 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 352 if (!L->contains(TheBr->getParent()) || 353 (L->contains(TheBr->getSuccessor(0)) && 354 L->contains(TheBr->getSuccessor(1)))) 355 return false; 356 357 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 358 // transform it. 359 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 360 int64_t ExitValue; 361 if (ExitValueVal == nullptr || 362 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 363 return false; 364 365 // Find new predicate for integer comparison. 366 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 367 switch (Compare->getPredicate()) { 368 default: return false; // Unknown comparison. 369 case CmpInst::FCMP_OEQ: 370 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 371 case CmpInst::FCMP_ONE: 372 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 373 case CmpInst::FCMP_OGT: 374 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 375 case CmpInst::FCMP_OGE: 376 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 377 case CmpInst::FCMP_OLT: 378 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 379 case CmpInst::FCMP_OLE: 380 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 381 } 382 383 // We convert the floating point induction variable to a signed i32 value if 384 // we can. This is only safe if the comparison will not overflow in a way 385 // that won't be trapped by the integer equivalent operations. Check for this 386 // now. 387 // TODO: We could use i64 if it is native and the range requires it. 388 389 // The start/stride/exit values must all fit in signed i32. 390 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 391 return false; 392 393 // If not actually striding (add x, 0.0), avoid touching the code. 394 if (IncValue == 0) 395 return false; 396 397 // Positive and negative strides have different safety conditions. 398 if (IncValue > 0) { 399 // If we have a positive stride, we require the init to be less than the 400 // exit value. 401 if (InitValue >= ExitValue) 402 return false; 403 404 uint32_t Range = uint32_t(ExitValue-InitValue); 405 // Check for infinite loop, either: 406 // while (i <= Exit) or until (i > Exit) 407 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 408 if (++Range == 0) return false; // Range overflows. 409 } 410 411 unsigned Leftover = Range % uint32_t(IncValue); 412 413 // If this is an equality comparison, we require that the strided value 414 // exactly land on the exit value, otherwise the IV condition will wrap 415 // around and do things the fp IV wouldn't. 416 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 417 Leftover != 0) 418 return false; 419 420 // If the stride would wrap around the i32 before exiting, we can't 421 // transform the IV. 422 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 423 return false; 424 } else { 425 // If we have a negative stride, we require the init to be greater than the 426 // exit value. 427 if (InitValue <= ExitValue) 428 return false; 429 430 uint32_t Range = uint32_t(InitValue-ExitValue); 431 // Check for infinite loop, either: 432 // while (i >= Exit) or until (i < Exit) 433 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 434 if (++Range == 0) return false; // Range overflows. 435 } 436 437 unsigned Leftover = Range % uint32_t(-IncValue); 438 439 // If this is an equality comparison, we require that the strided value 440 // exactly land on the exit value, otherwise the IV condition will wrap 441 // around and do things the fp IV wouldn't. 442 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 443 Leftover != 0) 444 return false; 445 446 // If the stride would wrap around the i32 before exiting, we can't 447 // transform the IV. 448 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 449 return false; 450 } 451 452 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 453 454 // Insert new integer induction variable. 455 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 456 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 457 PN->getIncomingBlock(IncomingEdge)); 458 459 Value *NewAdd = 460 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 461 Incr->getName()+".int", Incr); 462 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 463 464 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 465 ConstantInt::get(Int32Ty, ExitValue), 466 Compare->getName()); 467 468 // In the following deletions, PN may become dead and may be deleted. 469 // Use a WeakTrackingVH to observe whether this happens. 470 WeakTrackingVH WeakPH = PN; 471 472 // Delete the old floating point exit comparison. The branch starts using the 473 // new comparison. 474 NewCompare->takeName(Compare); 475 Compare->replaceAllUsesWith(NewCompare); 476 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 477 478 // Delete the old floating point increment. 479 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 480 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 481 482 // If the FP induction variable still has uses, this is because something else 483 // in the loop uses its value. In order to canonicalize the induction 484 // variable, we chose to eliminate the IV and rewrite it in terms of an 485 // int->fp cast. 486 // 487 // We give preference to sitofp over uitofp because it is faster on most 488 // platforms. 489 if (WeakPH) { 490 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 491 &*PN->getParent()->getFirstInsertionPt()); 492 PN->replaceAllUsesWith(Conv); 493 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 494 } 495 return true; 496 } 497 498 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 499 // First step. Check to see if there are any floating-point recurrences. 500 // If there are, change them into integer recurrences, permitting analysis by 501 // the SCEV routines. 502 BasicBlock *Header = L->getHeader(); 503 504 SmallVector<WeakTrackingVH, 8> PHIs; 505 for (PHINode &PN : Header->phis()) 506 PHIs.push_back(&PN); 507 508 bool Changed = false; 509 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 510 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 511 Changed |= handleFloatingPointIV(L, PN); 512 513 // If the loop previously had floating-point IV, ScalarEvolution 514 // may not have been able to compute a trip count. Now that we've done some 515 // re-writing, the trip count may be computable. 516 if (Changed) 517 SE->forgetLoop(L); 518 return Changed; 519 } 520 521 namespace { 522 523 // Collect information about PHI nodes which can be transformed in 524 // rewriteLoopExitValues. 525 struct RewritePhi { 526 PHINode *PN; 527 528 // Ith incoming value. 529 unsigned Ith; 530 531 // Exit value after expansion. 532 Value *Val; 533 534 // High Cost when expansion. 535 bool HighCost; 536 537 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 538 : PN(P), Ith(I), Val(V), HighCost(H) {} 539 }; 540 541 } // end anonymous namespace 542 543 //===----------------------------------------------------------------------===// 544 // rewriteLoopExitValues - Optimize IV users outside the loop. 545 // As a side effect, reduces the amount of IV processing within the loop. 546 //===----------------------------------------------------------------------===// 547 548 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 549 SmallPtrSet<const Instruction *, 8> Visited; 550 SmallVector<const Instruction *, 8> WorkList; 551 Visited.insert(I); 552 WorkList.push_back(I); 553 while (!WorkList.empty()) { 554 const Instruction *Curr = WorkList.pop_back_val(); 555 // This use is outside the loop, nothing to do. 556 if (!L->contains(Curr)) 557 continue; 558 // Do we assume it is a "hard" use which will not be eliminated easily? 559 if (Curr->mayHaveSideEffects()) 560 return true; 561 // Otherwise, add all its users to worklist. 562 for (auto U : Curr->users()) { 563 auto *UI = cast<Instruction>(U); 564 if (Visited.insert(UI).second) 565 WorkList.push_back(UI); 566 } 567 } 568 return false; 569 } 570 571 /// Check to see if this loop has a computable loop-invariant execution count. 572 /// If so, this means that we can compute the final value of any expressions 573 /// that are recurrent in the loop, and substitute the exit values from the loop 574 /// into any instructions outside of the loop that use the final values of the 575 /// current expressions. 576 /// 577 /// This is mostly redundant with the regular IndVarSimplify activities that 578 /// happen later, except that it's more powerful in some cases, because it's 579 /// able to brute-force evaluate arbitrary instructions as long as they have 580 /// constant operands at the beginning of the loop. 581 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 582 // Check a pre-condition. 583 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 584 "Indvars did not preserve LCSSA!"); 585 586 SmallVector<BasicBlock*, 8> ExitBlocks; 587 L->getUniqueExitBlocks(ExitBlocks); 588 589 SmallVector<RewritePhi, 8> RewritePhiSet; 590 // Find all values that are computed inside the loop, but used outside of it. 591 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 592 // the exit blocks of the loop to find them. 593 for (BasicBlock *ExitBB : ExitBlocks) { 594 // If there are no PHI nodes in this exit block, then no values defined 595 // inside the loop are used on this path, skip it. 596 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 597 if (!PN) continue; 598 599 unsigned NumPreds = PN->getNumIncomingValues(); 600 601 // Iterate over all of the PHI nodes. 602 BasicBlock::iterator BBI = ExitBB->begin(); 603 while ((PN = dyn_cast<PHINode>(BBI++))) { 604 if (PN->use_empty()) 605 continue; // dead use, don't replace it 606 607 if (!SE->isSCEVable(PN->getType())) 608 continue; 609 610 // It's necessary to tell ScalarEvolution about this explicitly so that 611 // it can walk the def-use list and forget all SCEVs, as it may not be 612 // watching the PHI itself. Once the new exit value is in place, there 613 // may not be a def-use connection between the loop and every instruction 614 // which got a SCEVAddRecExpr for that loop. 615 SE->forgetValue(PN); 616 617 // Iterate over all of the values in all the PHI nodes. 618 for (unsigned i = 0; i != NumPreds; ++i) { 619 // If the value being merged in is not integer or is not defined 620 // in the loop, skip it. 621 Value *InVal = PN->getIncomingValue(i); 622 if (!isa<Instruction>(InVal)) 623 continue; 624 625 // If this pred is for a subloop, not L itself, skip it. 626 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 627 continue; // The Block is in a subloop, skip it. 628 629 // Check that InVal is defined in the loop. 630 Instruction *Inst = cast<Instruction>(InVal); 631 if (!L->contains(Inst)) 632 continue; 633 634 // Okay, this instruction has a user outside of the current loop 635 // and varies predictably *inside* the loop. Evaluate the value it 636 // contains when the loop exits, if possible. We prefer to start with 637 // expressions which are true for all exits (so as to maximize 638 // expression reuse by the SCEVExpander), but resort to per-exit 639 // evaluation if that fails. 640 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 641 if (isa<SCEVCouldNotCompute>(ExitValue) || 642 !SE->isLoopInvariant(ExitValue, L) || 643 !isSafeToExpand(ExitValue, *SE)) { 644 // TODO: This should probably be sunk into SCEV in some way; maybe a 645 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 646 // most SCEV expressions and other recurrence types (e.g. shift 647 // recurrences). Is there existing code we can reuse? 648 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 649 if (isa<SCEVCouldNotCompute>(ExitCount)) 650 continue; 651 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 652 if (AddRec->getLoop() == L) 653 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 654 if (isa<SCEVCouldNotCompute>(ExitValue) || 655 !SE->isLoopInvariant(ExitValue, L) || 656 !isSafeToExpand(ExitValue, *SE)) 657 continue; 658 } 659 660 // Computing the value outside of the loop brings no benefit if it is 661 // definitely used inside the loop in a way which can not be optimized 662 // away. Avoid doing so unless we know we have a value which computes 663 // the ExitValue already. TODO: This should be merged into SCEV 664 // expander to leverage its knowledge of existing expressions. 665 if (ReplaceExitValue != AlwaysRepl && 666 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 667 hasHardUserWithinLoop(L, Inst)) 668 continue; 669 670 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 671 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 672 673 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 674 << '\n' 675 << " LoopVal = " << *Inst << "\n"); 676 677 if (!isValidRewrite(Inst, ExitVal)) { 678 DeadInsts.push_back(ExitVal); 679 continue; 680 } 681 682 #ifndef NDEBUG 683 // If we reuse an instruction from a loop which is neither L nor one of 684 // its containing loops, we end up breaking LCSSA form for this loop by 685 // creating a new use of its instruction. 686 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 687 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 688 if (EVL != L) 689 assert(EVL->contains(L) && "LCSSA breach detected!"); 690 #endif 691 692 // Collect all the candidate PHINodes to be rewritten. 693 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 694 } 695 } 696 } 697 698 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 699 700 bool Changed = false; 701 // Transformation. 702 for (const RewritePhi &Phi : RewritePhiSet) { 703 PHINode *PN = Phi.PN; 704 Value *ExitVal = Phi.Val; 705 706 // Only do the rewrite when the ExitValue can be expanded cheaply. 707 // If LoopCanBeDel is true, rewrite exit value aggressively. 708 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 709 DeadInsts.push_back(ExitVal); 710 continue; 711 } 712 713 Changed = true; 714 ++NumReplaced; 715 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 716 PN->setIncomingValue(Phi.Ith, ExitVal); 717 718 // If this instruction is dead now, delete it. Don't do it now to avoid 719 // invalidating iterators. 720 if (isInstructionTriviallyDead(Inst, TLI)) 721 DeadInsts.push_back(Inst); 722 723 // Replace PN with ExitVal if that is legal and does not break LCSSA. 724 if (PN->getNumIncomingValues() == 1 && 725 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 726 PN->replaceAllUsesWith(ExitVal); 727 PN->eraseFromParent(); 728 } 729 } 730 731 // The insertion point instruction may have been deleted; clear it out 732 // so that the rewriter doesn't trip over it later. 733 Rewriter.clearInsertPoint(); 734 return Changed; 735 } 736 737 //===---------------------------------------------------------------------===// 738 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 739 // they will exit at the first iteration. 740 //===---------------------------------------------------------------------===// 741 742 /// Check to see if this loop has loop invariant conditions which lead to loop 743 /// exits. If so, we know that if the exit path is taken, it is at the first 744 /// loop iteration. This lets us predict exit values of PHI nodes that live in 745 /// loop header. 746 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 747 // Verify the input to the pass is already in LCSSA form. 748 assert(L->isLCSSAForm(*DT)); 749 750 SmallVector<BasicBlock *, 8> ExitBlocks; 751 L->getUniqueExitBlocks(ExitBlocks); 752 753 bool MadeAnyChanges = false; 754 for (auto *ExitBB : ExitBlocks) { 755 // If there are no more PHI nodes in this exit block, then no more 756 // values defined inside the loop are used on this path. 757 for (PHINode &PN : ExitBB->phis()) { 758 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 759 IncomingValIdx != E; ++IncomingValIdx) { 760 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 761 762 // Can we prove that the exit must run on the first iteration if it 763 // runs at all? (i.e. early exits are fine for our purposes, but 764 // traces which lead to this exit being taken on the 2nd iteration 765 // aren't.) Note that this is about whether the exit branch is 766 // executed, not about whether it is taken. 767 if (!L->getLoopLatch() || 768 !DT->dominates(IncomingBB, L->getLoopLatch())) 769 continue; 770 771 // Get condition that leads to the exit path. 772 auto *TermInst = IncomingBB->getTerminator(); 773 774 Value *Cond = nullptr; 775 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 776 // Must be a conditional branch, otherwise the block 777 // should not be in the loop. 778 Cond = BI->getCondition(); 779 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 780 Cond = SI->getCondition(); 781 else 782 continue; 783 784 if (!L->isLoopInvariant(Cond)) 785 continue; 786 787 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 788 789 // Only deal with PHIs in the loop header. 790 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 791 continue; 792 793 // If ExitVal is a PHI on the loop header, then we know its 794 // value along this exit because the exit can only be taken 795 // on the first iteration. 796 auto *LoopPreheader = L->getLoopPreheader(); 797 assert(LoopPreheader && "Invalid loop"); 798 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 799 if (PreheaderIdx != -1) { 800 assert(ExitVal->getParent() == L->getHeader() && 801 "ExitVal must be in loop header"); 802 MadeAnyChanges = true; 803 PN.setIncomingValue(IncomingValIdx, 804 ExitVal->getIncomingValue(PreheaderIdx)); 805 } 806 } 807 } 808 } 809 return MadeAnyChanges; 810 } 811 812 /// Check whether it is possible to delete the loop after rewriting exit 813 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 814 /// aggressively. 815 bool IndVarSimplify::canLoopBeDeleted( 816 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 817 BasicBlock *Preheader = L->getLoopPreheader(); 818 // If there is no preheader, the loop will not be deleted. 819 if (!Preheader) 820 return false; 821 822 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 823 // We obviate multiple ExitingBlocks case for simplicity. 824 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 825 // after exit value rewriting, we can enhance the logic here. 826 SmallVector<BasicBlock *, 4> ExitingBlocks; 827 L->getExitingBlocks(ExitingBlocks); 828 SmallVector<BasicBlock *, 8> ExitBlocks; 829 L->getUniqueExitBlocks(ExitBlocks); 830 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 831 return false; 832 833 BasicBlock *ExitBlock = ExitBlocks[0]; 834 BasicBlock::iterator BI = ExitBlock->begin(); 835 while (PHINode *P = dyn_cast<PHINode>(BI)) { 836 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 837 838 // If the Incoming value of P is found in RewritePhiSet, we know it 839 // could be rewritten to use a loop invariant value in transformation 840 // phase later. Skip it in the loop invariant check below. 841 bool found = false; 842 for (const RewritePhi &Phi : RewritePhiSet) { 843 unsigned i = Phi.Ith; 844 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 845 found = true; 846 break; 847 } 848 } 849 850 Instruction *I; 851 if (!found && (I = dyn_cast<Instruction>(Incoming))) 852 if (!L->hasLoopInvariantOperands(I)) 853 return false; 854 855 ++BI; 856 } 857 858 for (auto *BB : L->blocks()) 859 if (llvm::any_of(*BB, [](Instruction &I) { 860 return I.mayHaveSideEffects(); 861 })) 862 return false; 863 864 return true; 865 } 866 867 //===----------------------------------------------------------------------===// 868 // IV Widening - Extend the width of an IV to cover its widest uses. 869 //===----------------------------------------------------------------------===// 870 871 namespace { 872 873 // Collect information about induction variables that are used by sign/zero 874 // extend operations. This information is recorded by CollectExtend and provides 875 // the input to WidenIV. 876 struct WideIVInfo { 877 PHINode *NarrowIV = nullptr; 878 879 // Widest integer type created [sz]ext 880 Type *WidestNativeType = nullptr; 881 882 // Was a sext user seen before a zext? 883 bool IsSigned = false; 884 }; 885 886 } // end anonymous namespace 887 888 /// Update information about the induction variable that is extended by this 889 /// sign or zero extend operation. This is used to determine the final width of 890 /// the IV before actually widening it. 891 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 892 const TargetTransformInfo *TTI) { 893 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 894 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 895 return; 896 897 Type *Ty = Cast->getType(); 898 uint64_t Width = SE->getTypeSizeInBits(Ty); 899 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 900 return; 901 902 // Check that `Cast` actually extends the induction variable (we rely on this 903 // later). This takes care of cases where `Cast` is extending a truncation of 904 // the narrow induction variable, and thus can end up being narrower than the 905 // "narrow" induction variable. 906 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 907 if (NarrowIVWidth >= Width) 908 return; 909 910 // Cast is either an sext or zext up to this point. 911 // We should not widen an indvar if arithmetics on the wider indvar are more 912 // expensive than those on the narrower indvar. We check only the cost of ADD 913 // because at least an ADD is required to increment the induction variable. We 914 // could compute more comprehensively the cost of all instructions on the 915 // induction variable when necessary. 916 if (TTI && 917 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 918 TTI->getArithmeticInstrCost(Instruction::Add, 919 Cast->getOperand(0)->getType())) { 920 return; 921 } 922 923 if (!WI.WidestNativeType) { 924 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 925 WI.IsSigned = IsSigned; 926 return; 927 } 928 929 // We extend the IV to satisfy the sign of its first user, arbitrarily. 930 if (WI.IsSigned != IsSigned) 931 return; 932 933 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 934 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 935 } 936 937 namespace { 938 939 /// Record a link in the Narrow IV def-use chain along with the WideIV that 940 /// computes the same value as the Narrow IV def. This avoids caching Use* 941 /// pointers. 942 struct NarrowIVDefUse { 943 Instruction *NarrowDef = nullptr; 944 Instruction *NarrowUse = nullptr; 945 Instruction *WideDef = nullptr; 946 947 // True if the narrow def is never negative. Tracking this information lets 948 // us use a sign extension instead of a zero extension or vice versa, when 949 // profitable and legal. 950 bool NeverNegative = false; 951 952 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 953 bool NeverNegative) 954 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 955 NeverNegative(NeverNegative) {} 956 }; 957 958 /// The goal of this transform is to remove sign and zero extends without 959 /// creating any new induction variables. To do this, it creates a new phi of 960 /// the wider type and redirects all users, either removing extends or inserting 961 /// truncs whenever we stop propagating the type. 962 class WidenIV { 963 // Parameters 964 PHINode *OrigPhi; 965 Type *WideType; 966 967 // Context 968 LoopInfo *LI; 969 Loop *L; 970 ScalarEvolution *SE; 971 DominatorTree *DT; 972 973 // Does the module have any calls to the llvm.experimental.guard intrinsic 974 // at all? If not we can avoid scanning instructions looking for guards. 975 bool HasGuards; 976 977 // Result 978 PHINode *WidePhi = nullptr; 979 Instruction *WideInc = nullptr; 980 const SCEV *WideIncExpr = nullptr; 981 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 982 983 SmallPtrSet<Instruction *,16> Widened; 984 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 985 986 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 987 988 // A map tracking the kind of extension used to widen each narrow IV 989 // and narrow IV user. 990 // Key: pointer to a narrow IV or IV user. 991 // Value: the kind of extension used to widen this Instruction. 992 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 993 994 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 995 996 // A map with control-dependent ranges for post increment IV uses. The key is 997 // a pair of IV def and a use of this def denoting the context. The value is 998 // a ConstantRange representing possible values of the def at the given 999 // context. 1000 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 1001 1002 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 1003 Instruction *UseI) { 1004 DefUserPair Key(Def, UseI); 1005 auto It = PostIncRangeInfos.find(Key); 1006 return It == PostIncRangeInfos.end() 1007 ? Optional<ConstantRange>(None) 1008 : Optional<ConstantRange>(It->second); 1009 } 1010 1011 void calculatePostIncRanges(PHINode *OrigPhi); 1012 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1013 1014 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1015 DefUserPair Key(Def, UseI); 1016 auto It = PostIncRangeInfos.find(Key); 1017 if (It == PostIncRangeInfos.end()) 1018 PostIncRangeInfos.insert({Key, R}); 1019 else 1020 It->second = R.intersectWith(It->second); 1021 } 1022 1023 public: 1024 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1025 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1026 bool HasGuards) 1027 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1028 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1029 HasGuards(HasGuards), DeadInsts(DI) { 1030 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1031 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1032 } 1033 1034 PHINode *createWideIV(SCEVExpander &Rewriter); 1035 1036 protected: 1037 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1038 Instruction *Use); 1039 1040 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1041 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1042 const SCEVAddRecExpr *WideAR); 1043 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1044 1045 ExtendKind getExtendKind(Instruction *I); 1046 1047 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1048 1049 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1050 1051 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1052 1053 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1054 unsigned OpCode) const; 1055 1056 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1057 1058 bool widenLoopCompare(NarrowIVDefUse DU); 1059 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1060 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1061 1062 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1063 }; 1064 1065 } // end anonymous namespace 1066 1067 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1068 bool IsSigned, Instruction *Use) { 1069 // Set the debug location and conservative insertion point. 1070 IRBuilder<> Builder(Use); 1071 // Hoist the insertion point into loop preheaders as far as possible. 1072 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1073 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1074 L = L->getParentLoop()) 1075 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1076 1077 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1078 Builder.CreateZExt(NarrowOper, WideType); 1079 } 1080 1081 /// Instantiate a wide operation to replace a narrow operation. This only needs 1082 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1083 /// 0 for any operation we decide not to clone. 1084 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1085 const SCEVAddRecExpr *WideAR) { 1086 unsigned Opcode = DU.NarrowUse->getOpcode(); 1087 switch (Opcode) { 1088 default: 1089 return nullptr; 1090 case Instruction::Add: 1091 case Instruction::Mul: 1092 case Instruction::UDiv: 1093 case Instruction::Sub: 1094 return cloneArithmeticIVUser(DU, WideAR); 1095 1096 case Instruction::And: 1097 case Instruction::Or: 1098 case Instruction::Xor: 1099 case Instruction::Shl: 1100 case Instruction::LShr: 1101 case Instruction::AShr: 1102 return cloneBitwiseIVUser(DU); 1103 } 1104 } 1105 1106 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1107 Instruction *NarrowUse = DU.NarrowUse; 1108 Instruction *NarrowDef = DU.NarrowDef; 1109 Instruction *WideDef = DU.WideDef; 1110 1111 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1112 1113 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1114 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1115 // invariant and will be folded or hoisted. If it actually comes from a 1116 // widened IV, it should be removed during a future call to widenIVUse. 1117 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1118 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1119 ? WideDef 1120 : createExtendInst(NarrowUse->getOperand(0), WideType, 1121 IsSigned, NarrowUse); 1122 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1123 ? WideDef 1124 : createExtendInst(NarrowUse->getOperand(1), WideType, 1125 IsSigned, NarrowUse); 1126 1127 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1128 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1129 NarrowBO->getName()); 1130 IRBuilder<> Builder(NarrowUse); 1131 Builder.Insert(WideBO); 1132 WideBO->copyIRFlags(NarrowBO); 1133 return WideBO; 1134 } 1135 1136 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1137 const SCEVAddRecExpr *WideAR) { 1138 Instruction *NarrowUse = DU.NarrowUse; 1139 Instruction *NarrowDef = DU.NarrowDef; 1140 Instruction *WideDef = DU.WideDef; 1141 1142 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1143 1144 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1145 1146 // We're trying to find X such that 1147 // 1148 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1149 // 1150 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1151 // and check using SCEV if any of them are correct. 1152 1153 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1154 // correct solution to X. 1155 auto GuessNonIVOperand = [&](bool SignExt) { 1156 const SCEV *WideLHS; 1157 const SCEV *WideRHS; 1158 1159 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1160 if (SignExt) 1161 return SE->getSignExtendExpr(S, Ty); 1162 return SE->getZeroExtendExpr(S, Ty); 1163 }; 1164 1165 if (IVOpIdx == 0) { 1166 WideLHS = SE->getSCEV(WideDef); 1167 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1168 WideRHS = GetExtend(NarrowRHS, WideType); 1169 } else { 1170 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1171 WideLHS = GetExtend(NarrowLHS, WideType); 1172 WideRHS = SE->getSCEV(WideDef); 1173 } 1174 1175 // WideUse is "WideDef `op.wide` X" as described in the comment. 1176 const SCEV *WideUse = nullptr; 1177 1178 switch (NarrowUse->getOpcode()) { 1179 default: 1180 llvm_unreachable("No other possibility!"); 1181 1182 case Instruction::Add: 1183 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1184 break; 1185 1186 case Instruction::Mul: 1187 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1188 break; 1189 1190 case Instruction::UDiv: 1191 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1192 break; 1193 1194 case Instruction::Sub: 1195 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1196 break; 1197 } 1198 1199 return WideUse == WideAR; 1200 }; 1201 1202 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1203 if (!GuessNonIVOperand(SignExtend)) { 1204 SignExtend = !SignExtend; 1205 if (!GuessNonIVOperand(SignExtend)) 1206 return nullptr; 1207 } 1208 1209 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1210 ? WideDef 1211 : createExtendInst(NarrowUse->getOperand(0), WideType, 1212 SignExtend, NarrowUse); 1213 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1214 ? WideDef 1215 : createExtendInst(NarrowUse->getOperand(1), WideType, 1216 SignExtend, NarrowUse); 1217 1218 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1219 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1220 NarrowBO->getName()); 1221 1222 IRBuilder<> Builder(NarrowUse); 1223 Builder.Insert(WideBO); 1224 WideBO->copyIRFlags(NarrowBO); 1225 return WideBO; 1226 } 1227 1228 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1229 auto It = ExtendKindMap.find(I); 1230 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1231 return It->second; 1232 } 1233 1234 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1235 unsigned OpCode) const { 1236 if (OpCode == Instruction::Add) 1237 return SE->getAddExpr(LHS, RHS); 1238 if (OpCode == Instruction::Sub) 1239 return SE->getMinusSCEV(LHS, RHS); 1240 if (OpCode == Instruction::Mul) 1241 return SE->getMulExpr(LHS, RHS); 1242 1243 llvm_unreachable("Unsupported opcode."); 1244 } 1245 1246 /// No-wrap operations can transfer sign extension of their result to their 1247 /// operands. Generate the SCEV value for the widened operation without 1248 /// actually modifying the IR yet. If the expression after extending the 1249 /// operands is an AddRec for this loop, return the AddRec and the kind of 1250 /// extension used. 1251 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1252 // Handle the common case of add<nsw/nuw> 1253 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1254 // Only Add/Sub/Mul instructions supported yet. 1255 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1256 OpCode != Instruction::Mul) 1257 return {nullptr, Unknown}; 1258 1259 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1260 // if extending the other will lead to a recurrence. 1261 const unsigned ExtendOperIdx = 1262 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1263 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1264 1265 const SCEV *ExtendOperExpr = nullptr; 1266 const OverflowingBinaryOperator *OBO = 1267 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1268 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1269 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1270 ExtendOperExpr = SE->getSignExtendExpr( 1271 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1272 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1273 ExtendOperExpr = SE->getZeroExtendExpr( 1274 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1275 else 1276 return {nullptr, Unknown}; 1277 1278 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1279 // flags. This instruction may be guarded by control flow that the no-wrap 1280 // behavior depends on. Non-control-equivalent instructions can be mapped to 1281 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1282 // semantics to those operations. 1283 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1284 const SCEV *rhs = ExtendOperExpr; 1285 1286 // Let's swap operands to the initial order for the case of non-commutative 1287 // operations, like SUB. See PR21014. 1288 if (ExtendOperIdx == 0) 1289 std::swap(lhs, rhs); 1290 const SCEVAddRecExpr *AddRec = 1291 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1292 1293 if (!AddRec || AddRec->getLoop() != L) 1294 return {nullptr, Unknown}; 1295 1296 return {AddRec, ExtKind}; 1297 } 1298 1299 /// Is this instruction potentially interesting for further simplification after 1300 /// widening it's type? In other words, can the extend be safely hoisted out of 1301 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1302 /// so, return the extended recurrence and the kind of extension used. Otherwise 1303 /// return {nullptr, Unknown}. 1304 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1305 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1306 return {nullptr, Unknown}; 1307 1308 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1309 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1310 SE->getTypeSizeInBits(WideType)) { 1311 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1312 // index. So don't follow this use. 1313 return {nullptr, Unknown}; 1314 } 1315 1316 const SCEV *WideExpr; 1317 ExtendKind ExtKind; 1318 if (DU.NeverNegative) { 1319 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1320 if (isa<SCEVAddRecExpr>(WideExpr)) 1321 ExtKind = SignExtended; 1322 else { 1323 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1324 ExtKind = ZeroExtended; 1325 } 1326 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1327 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1328 ExtKind = SignExtended; 1329 } else { 1330 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1331 ExtKind = ZeroExtended; 1332 } 1333 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1334 if (!AddRec || AddRec->getLoop() != L) 1335 return {nullptr, Unknown}; 1336 return {AddRec, ExtKind}; 1337 } 1338 1339 /// This IV user cannot be widened. Replace this use of the original narrow IV 1340 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1341 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1342 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1343 if (!InsertPt) 1344 return; 1345 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1346 << *DU.NarrowUse << "\n"); 1347 IRBuilder<> Builder(InsertPt); 1348 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1349 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1350 } 1351 1352 /// If the narrow use is a compare instruction, then widen the compare 1353 // (and possibly the other operand). The extend operation is hoisted into the 1354 // loop preheader as far as possible. 1355 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1356 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1357 if (!Cmp) 1358 return false; 1359 1360 // We can legally widen the comparison in the following two cases: 1361 // 1362 // - The signedness of the IV extension and comparison match 1363 // 1364 // - The narrow IV is always positive (and thus its sign extension is equal 1365 // to its zero extension). For instance, let's say we're zero extending 1366 // %narrow for the following use 1367 // 1368 // icmp slt i32 %narrow, %val ... (A) 1369 // 1370 // and %narrow is always positive. Then 1371 // 1372 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1373 // == icmp slt i32 zext(%narrow), sext(%val) 1374 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1375 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1376 return false; 1377 1378 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1379 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1380 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1381 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1382 1383 // Widen the compare instruction. 1384 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1385 if (!InsertPt) 1386 return false; 1387 IRBuilder<> Builder(InsertPt); 1388 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1389 1390 // Widen the other operand of the compare, if necessary. 1391 if (CastWidth < IVWidth) { 1392 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1393 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1394 } 1395 return true; 1396 } 1397 1398 /// If the narrow use is an instruction whose two operands are the defining 1399 /// instruction of DU and a load instruction, then we have the following: 1400 /// if the load is hoisted outside the loop, then we do not reach this function 1401 /// as scalar evolution analysis works fine in widenIVUse with variables 1402 /// hoisted outside the loop and efficient code is subsequently generated by 1403 /// not emitting truncate instructions. But when the load is not hoisted 1404 /// (whether due to limitation in alias analysis or due to a true legality), 1405 /// then scalar evolution can not proceed with loop variant values and 1406 /// inefficient code is generated. This function handles the non-hoisted load 1407 /// special case by making the optimization generate the same type of code for 1408 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1409 /// instruction). This special case is important especially when the induction 1410 /// variables are affecting addressing mode in code generation. 1411 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1412 Instruction *NarrowUse = DU.NarrowUse; 1413 Instruction *NarrowDef = DU.NarrowDef; 1414 Instruction *WideDef = DU.WideDef; 1415 1416 // Handle the common case of add<nsw/nuw> 1417 const unsigned OpCode = NarrowUse->getOpcode(); 1418 // Only Add/Sub/Mul instructions are supported. 1419 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1420 OpCode != Instruction::Mul) 1421 return false; 1422 1423 // The operand that is not defined by NarrowDef of DU. Let's call it the 1424 // other operand. 1425 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1426 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1427 "bad DU"); 1428 1429 const SCEV *ExtendOperExpr = nullptr; 1430 const OverflowingBinaryOperator *OBO = 1431 cast<OverflowingBinaryOperator>(NarrowUse); 1432 ExtendKind ExtKind = getExtendKind(NarrowDef); 1433 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1434 ExtendOperExpr = SE->getSignExtendExpr( 1435 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1436 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1437 ExtendOperExpr = SE->getZeroExtendExpr( 1438 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1439 else 1440 return false; 1441 1442 // We are interested in the other operand being a load instruction. 1443 // But, we should look into relaxing this restriction later on. 1444 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1445 if (I && I->getOpcode() != Instruction::Load) 1446 return false; 1447 1448 // Verifying that Defining operand is an AddRec 1449 const SCEV *Op1 = SE->getSCEV(WideDef); 1450 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1451 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1452 return false; 1453 // Verifying that other operand is an Extend. 1454 if (ExtKind == SignExtended) { 1455 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1456 return false; 1457 } else { 1458 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1459 return false; 1460 } 1461 1462 if (ExtKind == SignExtended) { 1463 for (Use &U : NarrowUse->uses()) { 1464 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1465 if (!User || User->getType() != WideType) 1466 return false; 1467 } 1468 } else { // ExtKind == ZeroExtended 1469 for (Use &U : NarrowUse->uses()) { 1470 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1471 if (!User || User->getType() != WideType) 1472 return false; 1473 } 1474 } 1475 1476 return true; 1477 } 1478 1479 /// Special Case for widening with variant Loads (see 1480 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1481 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1482 Instruction *NarrowUse = DU.NarrowUse; 1483 Instruction *NarrowDef = DU.NarrowDef; 1484 Instruction *WideDef = DU.WideDef; 1485 1486 ExtendKind ExtKind = getExtendKind(NarrowDef); 1487 1488 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1489 1490 // Generating a widening use instruction. 1491 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1492 ? WideDef 1493 : createExtendInst(NarrowUse->getOperand(0), WideType, 1494 ExtKind, NarrowUse); 1495 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1496 ? WideDef 1497 : createExtendInst(NarrowUse->getOperand(1), WideType, 1498 ExtKind, NarrowUse); 1499 1500 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1501 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1502 NarrowBO->getName()); 1503 IRBuilder<> Builder(NarrowUse); 1504 Builder.Insert(WideBO); 1505 WideBO->copyIRFlags(NarrowBO); 1506 1507 if (ExtKind == SignExtended) 1508 ExtendKindMap[NarrowUse] = SignExtended; 1509 else 1510 ExtendKindMap[NarrowUse] = ZeroExtended; 1511 1512 // Update the Use. 1513 if (ExtKind == SignExtended) { 1514 for (Use &U : NarrowUse->uses()) { 1515 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1516 if (User && User->getType() == WideType) { 1517 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1518 << *WideBO << "\n"); 1519 ++NumElimExt; 1520 User->replaceAllUsesWith(WideBO); 1521 DeadInsts.emplace_back(User); 1522 } 1523 } 1524 } else { // ExtKind == ZeroExtended 1525 for (Use &U : NarrowUse->uses()) { 1526 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1527 if (User && User->getType() == WideType) { 1528 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1529 << *WideBO << "\n"); 1530 ++NumElimExt; 1531 User->replaceAllUsesWith(WideBO); 1532 DeadInsts.emplace_back(User); 1533 } 1534 } 1535 } 1536 } 1537 1538 /// Determine whether an individual user of the narrow IV can be widened. If so, 1539 /// return the wide clone of the user. 1540 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1541 assert(ExtendKindMap.count(DU.NarrowDef) && 1542 "Should already know the kind of extension used to widen NarrowDef"); 1543 1544 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1545 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1546 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1547 // For LCSSA phis, sink the truncate outside the loop. 1548 // After SimplifyCFG most loop exit targets have a single predecessor. 1549 // Otherwise fall back to a truncate within the loop. 1550 if (UsePhi->getNumOperands() != 1) 1551 truncateIVUse(DU, DT, LI); 1552 else { 1553 // Widening the PHI requires us to insert a trunc. The logical place 1554 // for this trunc is in the same BB as the PHI. This is not possible if 1555 // the BB is terminated by a catchswitch. 1556 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1557 return nullptr; 1558 1559 PHINode *WidePhi = 1560 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1561 UsePhi); 1562 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1563 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1564 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1565 UsePhi->replaceAllUsesWith(Trunc); 1566 DeadInsts.emplace_back(UsePhi); 1567 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1568 << *WidePhi << "\n"); 1569 } 1570 return nullptr; 1571 } 1572 } 1573 1574 // This narrow use can be widened by a sext if it's non-negative or its narrow 1575 // def was widended by a sext. Same for zext. 1576 auto canWidenBySExt = [&]() { 1577 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1578 }; 1579 auto canWidenByZExt = [&]() { 1580 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1581 }; 1582 1583 // Our raison d'etre! Eliminate sign and zero extension. 1584 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1585 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1586 Value *NewDef = DU.WideDef; 1587 if (DU.NarrowUse->getType() != WideType) { 1588 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1589 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1590 if (CastWidth < IVWidth) { 1591 // The cast isn't as wide as the IV, so insert a Trunc. 1592 IRBuilder<> Builder(DU.NarrowUse); 1593 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1594 } 1595 else { 1596 // A wider extend was hidden behind a narrower one. This may induce 1597 // another round of IV widening in which the intermediate IV becomes 1598 // dead. It should be very rare. 1599 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1600 << " not wide enough to subsume " << *DU.NarrowUse 1601 << "\n"); 1602 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1603 NewDef = DU.NarrowUse; 1604 } 1605 } 1606 if (NewDef != DU.NarrowUse) { 1607 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1608 << " replaced by " << *DU.WideDef << "\n"); 1609 ++NumElimExt; 1610 DU.NarrowUse->replaceAllUsesWith(NewDef); 1611 DeadInsts.emplace_back(DU.NarrowUse); 1612 } 1613 // Now that the extend is gone, we want to expose it's uses for potential 1614 // further simplification. We don't need to directly inform SimplifyIVUsers 1615 // of the new users, because their parent IV will be processed later as a 1616 // new loop phi. If we preserved IVUsers analysis, we would also want to 1617 // push the uses of WideDef here. 1618 1619 // No further widening is needed. The deceased [sz]ext had done it for us. 1620 return nullptr; 1621 } 1622 1623 // Does this user itself evaluate to a recurrence after widening? 1624 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1625 if (!WideAddRec.first) 1626 WideAddRec = getWideRecurrence(DU); 1627 1628 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1629 if (!WideAddRec.first) { 1630 // If use is a loop condition, try to promote the condition instead of 1631 // truncating the IV first. 1632 if (widenLoopCompare(DU)) 1633 return nullptr; 1634 1635 // We are here about to generate a truncate instruction that may hurt 1636 // performance because the scalar evolution expression computed earlier 1637 // in WideAddRec.first does not indicate a polynomial induction expression. 1638 // In that case, look at the operands of the use instruction to determine 1639 // if we can still widen the use instead of truncating its operand. 1640 if (widenWithVariantLoadUse(DU)) { 1641 widenWithVariantLoadUseCodegen(DU); 1642 return nullptr; 1643 } 1644 1645 // This user does not evaluate to a recurrence after widening, so don't 1646 // follow it. Instead insert a Trunc to kill off the original use, 1647 // eventually isolating the original narrow IV so it can be removed. 1648 truncateIVUse(DU, DT, LI); 1649 return nullptr; 1650 } 1651 // Assume block terminators cannot evaluate to a recurrence. We can't to 1652 // insert a Trunc after a terminator if there happens to be a critical edge. 1653 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1654 "SCEV is not expected to evaluate a block terminator"); 1655 1656 // Reuse the IV increment that SCEVExpander created as long as it dominates 1657 // NarrowUse. 1658 Instruction *WideUse = nullptr; 1659 if (WideAddRec.first == WideIncExpr && 1660 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1661 WideUse = WideInc; 1662 else { 1663 WideUse = cloneIVUser(DU, WideAddRec.first); 1664 if (!WideUse) 1665 return nullptr; 1666 } 1667 // Evaluation of WideAddRec ensured that the narrow expression could be 1668 // extended outside the loop without overflow. This suggests that the wide use 1669 // evaluates to the same expression as the extended narrow use, but doesn't 1670 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1671 // where it fails, we simply throw away the newly created wide use. 1672 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1673 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1674 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1675 << "\n"); 1676 DeadInsts.emplace_back(WideUse); 1677 return nullptr; 1678 } 1679 1680 // if we reached this point then we are going to replace 1681 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1682 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1683 1684 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1685 // Returning WideUse pushes it on the worklist. 1686 return WideUse; 1687 } 1688 1689 /// Add eligible users of NarrowDef to NarrowIVUsers. 1690 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1691 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1692 bool NonNegativeDef = 1693 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1694 SE->getConstant(NarrowSCEV->getType(), 0)); 1695 for (User *U : NarrowDef->users()) { 1696 Instruction *NarrowUser = cast<Instruction>(U); 1697 1698 // Handle data flow merges and bizarre phi cycles. 1699 if (!Widened.insert(NarrowUser).second) 1700 continue; 1701 1702 bool NonNegativeUse = false; 1703 if (!NonNegativeDef) { 1704 // We might have a control-dependent range information for this context. 1705 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1706 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1707 } 1708 1709 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1710 NonNegativeDef || NonNegativeUse); 1711 } 1712 } 1713 1714 /// Process a single induction variable. First use the SCEVExpander to create a 1715 /// wide induction variable that evaluates to the same recurrence as the 1716 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1717 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1718 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1719 /// 1720 /// It would be simpler to delete uses as they are processed, but we must avoid 1721 /// invalidating SCEV expressions. 1722 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1723 // Is this phi an induction variable? 1724 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1725 if (!AddRec) 1726 return nullptr; 1727 1728 // Widen the induction variable expression. 1729 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1730 ? SE->getSignExtendExpr(AddRec, WideType) 1731 : SE->getZeroExtendExpr(AddRec, WideType); 1732 1733 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1734 "Expect the new IV expression to preserve its type"); 1735 1736 // Can the IV be extended outside the loop without overflow? 1737 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1738 if (!AddRec || AddRec->getLoop() != L) 1739 return nullptr; 1740 1741 // An AddRec must have loop-invariant operands. Since this AddRec is 1742 // materialized by a loop header phi, the expression cannot have any post-loop 1743 // operands, so they must dominate the loop header. 1744 assert( 1745 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1746 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1747 "Loop header phi recurrence inputs do not dominate the loop"); 1748 1749 // Iterate over IV uses (including transitive ones) looking for IV increments 1750 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1751 // the increment calculate control-dependent range information basing on 1752 // dominating conditions inside of the loop (e.g. a range check inside of the 1753 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1754 // 1755 // Control-dependent range information is later used to prove that a narrow 1756 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1757 // this on demand because when pushNarrowIVUsers needs this information some 1758 // of the dominating conditions might be already widened. 1759 if (UsePostIncrementRanges) 1760 calculatePostIncRanges(OrigPhi); 1761 1762 // The rewriter provides a value for the desired IV expression. This may 1763 // either find an existing phi or materialize a new one. Either way, we 1764 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1765 // of the phi-SCC dominates the loop entry. 1766 Instruction *InsertPt = &L->getHeader()->front(); 1767 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1768 1769 // Remembering the WideIV increment generated by SCEVExpander allows 1770 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1771 // employ a general reuse mechanism because the call above is the only call to 1772 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1773 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1774 WideInc = 1775 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1776 WideIncExpr = SE->getSCEV(WideInc); 1777 // Propagate the debug location associated with the original loop increment 1778 // to the new (widened) increment. 1779 auto *OrigInc = 1780 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1781 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1782 } 1783 1784 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1785 ++NumWidened; 1786 1787 // Traverse the def-use chain using a worklist starting at the original IV. 1788 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1789 1790 Widened.insert(OrigPhi); 1791 pushNarrowIVUsers(OrigPhi, WidePhi); 1792 1793 while (!NarrowIVUsers.empty()) { 1794 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1795 1796 // Process a def-use edge. This may replace the use, so don't hold a 1797 // use_iterator across it. 1798 Instruction *WideUse = widenIVUse(DU, Rewriter); 1799 1800 // Follow all def-use edges from the previous narrow use. 1801 if (WideUse) 1802 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1803 1804 // widenIVUse may have removed the def-use edge. 1805 if (DU.NarrowDef->use_empty()) 1806 DeadInsts.emplace_back(DU.NarrowDef); 1807 } 1808 1809 // Attach any debug information to the new PHI. 1810 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1811 1812 return WidePhi; 1813 } 1814 1815 /// Calculates control-dependent range for the given def at the given context 1816 /// by looking at dominating conditions inside of the loop 1817 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1818 Instruction *NarrowUser) { 1819 using namespace llvm::PatternMatch; 1820 1821 Value *NarrowDefLHS; 1822 const APInt *NarrowDefRHS; 1823 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1824 m_APInt(NarrowDefRHS))) || 1825 !NarrowDefRHS->isNonNegative()) 1826 return; 1827 1828 auto UpdateRangeFromCondition = [&] (Value *Condition, 1829 bool TrueDest) { 1830 CmpInst::Predicate Pred; 1831 Value *CmpRHS; 1832 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1833 m_Value(CmpRHS)))) 1834 return; 1835 1836 CmpInst::Predicate P = 1837 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1838 1839 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1840 auto CmpConstrainedLHSRange = 1841 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1842 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1843 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1844 1845 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1846 }; 1847 1848 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1849 if (!HasGuards) 1850 return; 1851 1852 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1853 Ctx->getParent()->rend())) { 1854 Value *C = nullptr; 1855 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1856 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1857 } 1858 }; 1859 1860 UpdateRangeFromGuards(NarrowUser); 1861 1862 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1863 // If NarrowUserBB is statically unreachable asking dominator queries may 1864 // yield surprising results. (e.g. the block may not have a dom tree node) 1865 if (!DT->isReachableFromEntry(NarrowUserBB)) 1866 return; 1867 1868 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1869 L->contains(DTB->getBlock()); 1870 DTB = DTB->getIDom()) { 1871 auto *BB = DTB->getBlock(); 1872 auto *TI = BB->getTerminator(); 1873 UpdateRangeFromGuards(TI); 1874 1875 auto *BI = dyn_cast<BranchInst>(TI); 1876 if (!BI || !BI->isConditional()) 1877 continue; 1878 1879 auto *TrueSuccessor = BI->getSuccessor(0); 1880 auto *FalseSuccessor = BI->getSuccessor(1); 1881 1882 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1883 return BBE.isSingleEdge() && 1884 DT->dominates(BBE, NarrowUser->getParent()); 1885 }; 1886 1887 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1888 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1889 1890 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1891 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1892 } 1893 } 1894 1895 /// Calculates PostIncRangeInfos map for the given IV 1896 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1897 SmallPtrSet<Instruction *, 16> Visited; 1898 SmallVector<Instruction *, 6> Worklist; 1899 Worklist.push_back(OrigPhi); 1900 Visited.insert(OrigPhi); 1901 1902 while (!Worklist.empty()) { 1903 Instruction *NarrowDef = Worklist.pop_back_val(); 1904 1905 for (Use &U : NarrowDef->uses()) { 1906 auto *NarrowUser = cast<Instruction>(U.getUser()); 1907 1908 // Don't go looking outside the current loop. 1909 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1910 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1911 continue; 1912 1913 if (!Visited.insert(NarrowUser).second) 1914 continue; 1915 1916 Worklist.push_back(NarrowUser); 1917 1918 calculatePostIncRange(NarrowDef, NarrowUser); 1919 } 1920 } 1921 } 1922 1923 //===----------------------------------------------------------------------===// 1924 // Live IV Reduction - Minimize IVs live across the loop. 1925 //===----------------------------------------------------------------------===// 1926 1927 //===----------------------------------------------------------------------===// 1928 // Simplification of IV users based on SCEV evaluation. 1929 //===----------------------------------------------------------------------===// 1930 1931 namespace { 1932 1933 class IndVarSimplifyVisitor : public IVVisitor { 1934 ScalarEvolution *SE; 1935 const TargetTransformInfo *TTI; 1936 PHINode *IVPhi; 1937 1938 public: 1939 WideIVInfo WI; 1940 1941 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1942 const TargetTransformInfo *TTI, 1943 const DominatorTree *DTree) 1944 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1945 DT = DTree; 1946 WI.NarrowIV = IVPhi; 1947 } 1948 1949 // Implement the interface used by simplifyUsersOfIV. 1950 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1951 }; 1952 1953 } // end anonymous namespace 1954 1955 /// Iteratively perform simplification on a worklist of IV users. Each 1956 /// successive simplification may push more users which may themselves be 1957 /// candidates for simplification. 1958 /// 1959 /// Sign/Zero extend elimination is interleaved with IV simplification. 1960 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1961 SCEVExpander &Rewriter, 1962 LoopInfo *LI) { 1963 SmallVector<WideIVInfo, 8> WideIVs; 1964 1965 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1966 Intrinsic::getName(Intrinsic::experimental_guard)); 1967 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1968 1969 SmallVector<PHINode*, 8> LoopPhis; 1970 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1971 LoopPhis.push_back(cast<PHINode>(I)); 1972 } 1973 // Each round of simplification iterates through the SimplifyIVUsers worklist 1974 // for all current phis, then determines whether any IVs can be 1975 // widened. Widening adds new phis to LoopPhis, inducing another round of 1976 // simplification on the wide IVs. 1977 bool Changed = false; 1978 while (!LoopPhis.empty()) { 1979 // Evaluate as many IV expressions as possible before widening any IVs. This 1980 // forces SCEV to set no-wrap flags before evaluating sign/zero 1981 // extension. The first time SCEV attempts to normalize sign/zero extension, 1982 // the result becomes final. So for the most predictable results, we delay 1983 // evaluation of sign/zero extend evaluation until needed, and avoid running 1984 // other SCEV based analysis prior to simplifyAndExtend. 1985 do { 1986 PHINode *CurrIV = LoopPhis.pop_back_val(); 1987 1988 // Information about sign/zero extensions of CurrIV. 1989 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1990 1991 Changed |= 1992 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1993 1994 if (Visitor.WI.WidestNativeType) { 1995 WideIVs.push_back(Visitor.WI); 1996 } 1997 } while(!LoopPhis.empty()); 1998 1999 for (; !WideIVs.empty(); WideIVs.pop_back()) { 2000 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 2001 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 2002 Changed = true; 2003 LoopPhis.push_back(WidePhi); 2004 } 2005 } 2006 } 2007 return Changed; 2008 } 2009 2010 //===----------------------------------------------------------------------===// 2011 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2012 //===----------------------------------------------------------------------===// 2013 2014 /// Given an Value which is hoped to be part of an add recurance in the given 2015 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2016 /// that this is less general than SCEVs AddRec checking. 2017 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2018 Instruction *IncI = dyn_cast<Instruction>(IncV); 2019 if (!IncI) 2020 return nullptr; 2021 2022 switch (IncI->getOpcode()) { 2023 case Instruction::Add: 2024 case Instruction::Sub: 2025 break; 2026 case Instruction::GetElementPtr: 2027 // An IV counter must preserve its type. 2028 if (IncI->getNumOperands() == 2) 2029 break; 2030 LLVM_FALLTHROUGH; 2031 default: 2032 return nullptr; 2033 } 2034 2035 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2036 if (Phi && Phi->getParent() == L->getHeader()) { 2037 if (L->isLoopInvariant(IncI->getOperand(1))) 2038 return Phi; 2039 return nullptr; 2040 } 2041 if (IncI->getOpcode() == Instruction::GetElementPtr) 2042 return nullptr; 2043 2044 // Allow add/sub to be commuted. 2045 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2046 if (Phi && Phi->getParent() == L->getHeader()) { 2047 if (L->isLoopInvariant(IncI->getOperand(0))) 2048 return Phi; 2049 } 2050 return nullptr; 2051 } 2052 2053 /// Whether the current loop exit test is based on this value. Currently this 2054 /// is limited to a direct use in the loop condition. 2055 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2056 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2057 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2058 // TODO: Allow non-icmp loop test. 2059 if (!ICmp) 2060 return false; 2061 2062 // TODO: Allow indirect use. 2063 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2064 } 2065 2066 /// linearFunctionTestReplace policy. Return true unless we can show that the 2067 /// current exit test is already sufficiently canonical. 2068 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2069 assert(L->getLoopLatch() && "Must be in simplified form"); 2070 2071 // Avoid converting a constant or loop invariant test back to a runtime 2072 // test. This is critical for when SCEV's cached ExitCount is less precise 2073 // than the current IR (such as after we've proven a particular exit is 2074 // actually dead and thus the BE count never reaches our ExitCount.) 2075 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2076 if (L->isLoopInvariant(BI->getCondition())) 2077 return false; 2078 2079 // Do LFTR to simplify the exit condition to an ICMP. 2080 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2081 if (!Cond) 2082 return true; 2083 2084 // Do LFTR to simplify the exit ICMP to EQ/NE 2085 ICmpInst::Predicate Pred = Cond->getPredicate(); 2086 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2087 return true; 2088 2089 // Look for a loop invariant RHS 2090 Value *LHS = Cond->getOperand(0); 2091 Value *RHS = Cond->getOperand(1); 2092 if (!L->isLoopInvariant(RHS)) { 2093 if (!L->isLoopInvariant(LHS)) 2094 return true; 2095 std::swap(LHS, RHS); 2096 } 2097 // Look for a simple IV counter LHS 2098 PHINode *Phi = dyn_cast<PHINode>(LHS); 2099 if (!Phi) 2100 Phi = getLoopPhiForCounter(LHS, L); 2101 2102 if (!Phi) 2103 return true; 2104 2105 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2106 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2107 if (Idx < 0) 2108 return true; 2109 2110 // Do LFTR if the exit condition's IV is *not* a simple counter. 2111 Value *IncV = Phi->getIncomingValue(Idx); 2112 return Phi != getLoopPhiForCounter(IncV, L); 2113 } 2114 2115 /// Return true if undefined behavior would provable be executed on the path to 2116 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2117 /// anything about whether OnPathTo is actually executed or whether Root is 2118 /// actually poison. This can be used to assess whether a new use of Root can 2119 /// be added at a location which is control equivalent with OnPathTo (such as 2120 /// immediately before it) without introducing UB which didn't previously 2121 /// exist. Note that a false result conveys no information. 2122 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2123 Instruction *OnPathTo, 2124 DominatorTree *DT) { 2125 // Basic approach is to assume Root is poison, propagate poison forward 2126 // through all users we can easily track, and then check whether any of those 2127 // users are provable UB and must execute before out exiting block might 2128 // exit. 2129 2130 // The set of all recursive users we've visited (which are assumed to all be 2131 // poison because of said visit) 2132 SmallSet<const Value *, 16> KnownPoison; 2133 SmallVector<const Instruction*, 16> Worklist; 2134 Worklist.push_back(Root); 2135 while (!Worklist.empty()) { 2136 const Instruction *I = Worklist.pop_back_val(); 2137 2138 // If we know this must trigger UB on a path leading our target. 2139 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2140 return true; 2141 2142 // If we can't analyze propagation through this instruction, just skip it 2143 // and transitive users. Safe as false is a conservative result. 2144 if (!propagatesFullPoison(I) && I != Root) 2145 continue; 2146 2147 if (KnownPoison.insert(I).second) 2148 for (const User *User : I->users()) 2149 Worklist.push_back(cast<Instruction>(User)); 2150 } 2151 2152 // Might be non-UB, or might have a path we couldn't prove must execute on 2153 // way to exiting bb. 2154 return false; 2155 } 2156 2157 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2158 /// down to checking that all operands are constant and listing instructions 2159 /// that may hide undef. 2160 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2161 unsigned Depth) { 2162 if (isa<Constant>(V)) 2163 return !isa<UndefValue>(V); 2164 2165 if (Depth >= 6) 2166 return false; 2167 2168 // Conservatively handle non-constant non-instructions. For example, Arguments 2169 // may be undef. 2170 Instruction *I = dyn_cast<Instruction>(V); 2171 if (!I) 2172 return false; 2173 2174 // Load and return values may be undef. 2175 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2176 return false; 2177 2178 // Optimistically handle other instructions. 2179 for (Value *Op : I->operands()) { 2180 if (!Visited.insert(Op).second) 2181 continue; 2182 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2183 return false; 2184 } 2185 return true; 2186 } 2187 2188 /// Return true if the given value is concrete. We must prove that undef can 2189 /// never reach it. 2190 /// 2191 /// TODO: If we decide that this is a good approach to checking for undef, we 2192 /// may factor it into a common location. 2193 static bool hasConcreteDef(Value *V) { 2194 SmallPtrSet<Value*, 8> Visited; 2195 Visited.insert(V); 2196 return hasConcreteDefImpl(V, Visited, 0); 2197 } 2198 2199 /// Return true if this IV has any uses other than the (soon to be rewritten) 2200 /// loop exit test. 2201 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2202 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2203 Value *IncV = Phi->getIncomingValue(LatchIdx); 2204 2205 for (User *U : Phi->users()) 2206 if (U != Cond && U != IncV) return false; 2207 2208 for (User *U : IncV->users()) 2209 if (U != Cond && U != Phi) return false; 2210 return true; 2211 } 2212 2213 /// Return true if the given phi is a "counter" in L. A counter is an 2214 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2215 /// step of 1. Note that L must have exactly one latch. 2216 static bool isLoopCounter(PHINode* Phi, Loop *L, 2217 ScalarEvolution *SE) { 2218 assert(Phi->getParent() == L->getHeader()); 2219 assert(L->getLoopLatch()); 2220 2221 if (!SE->isSCEVable(Phi->getType())) 2222 return false; 2223 2224 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2225 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2226 return false; 2227 2228 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2229 if (!Step || !Step->isOne()) 2230 return false; 2231 2232 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2233 Value *IncV = Phi->getIncomingValue(LatchIdx); 2234 return (getLoopPhiForCounter(IncV, L) == Phi); 2235 } 2236 2237 /// Search the loop header for a loop counter (anadd rec w/step of one) 2238 /// suitable for use by LFTR. If multiple counters are available, select the 2239 /// "best" one based profitable heuristics. 2240 /// 2241 /// BECount may be an i8* pointer type. The pointer difference is already 2242 /// valid count without scaling the address stride, so it remains a pointer 2243 /// expression as far as SCEV is concerned. 2244 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2245 const SCEV *BECount, 2246 ScalarEvolution *SE, DominatorTree *DT) { 2247 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2248 2249 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2250 2251 // Loop over all of the PHI nodes, looking for a simple counter. 2252 PHINode *BestPhi = nullptr; 2253 const SCEV *BestInit = nullptr; 2254 BasicBlock *LatchBlock = L->getLoopLatch(); 2255 assert(LatchBlock && "Must be in simplified form"); 2256 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2257 2258 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2259 PHINode *Phi = cast<PHINode>(I); 2260 if (!isLoopCounter(Phi, L, SE)) 2261 continue; 2262 2263 // Avoid comparing an integer IV against a pointer Limit. 2264 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2265 continue; 2266 2267 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2268 2269 // AR may be a pointer type, while BECount is an integer type. 2270 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2271 // AR may not be a narrower type, or we may never exit. 2272 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2273 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2274 continue; 2275 2276 // Avoid reusing a potentially undef value to compute other values that may 2277 // have originally had a concrete definition. 2278 if (!hasConcreteDef(Phi)) { 2279 // We explicitly allow unknown phis as long as they are already used by 2280 // the loop exit test. This is legal since performing LFTR could not 2281 // increase the number of undef users. 2282 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2283 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2284 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2285 continue; 2286 } 2287 2288 // Avoid introducing undefined behavior due to poison which didn't exist in 2289 // the original program. (Annoyingly, the rules for poison and undef 2290 // propagation are distinct, so this does NOT cover the undef case above.) 2291 // We have to ensure that we don't introduce UB by introducing a use on an 2292 // iteration where said IV produces poison. Our strategy here differs for 2293 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2294 // see code in linearFunctionTestReplace. For pointers, we restrict 2295 // transforms as there is no good way to reinfer inbounds once lost. 2296 if (!Phi->getType()->isIntegerTy() && 2297 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2298 continue; 2299 2300 const SCEV *Init = AR->getStart(); 2301 2302 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2303 // Don't force a live loop counter if another IV can be used. 2304 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2305 continue; 2306 2307 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2308 // also prefers integer to pointer IVs. 2309 if (BestInit->isZero() != Init->isZero()) { 2310 if (BestInit->isZero()) 2311 continue; 2312 } 2313 // If two IVs both count from zero or both count from nonzero then the 2314 // narrower is likely a dead phi that has been widened. Use the wider phi 2315 // to allow the other to be eliminated. 2316 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2317 continue; 2318 } 2319 BestPhi = Phi; 2320 BestInit = Init; 2321 } 2322 return BestPhi; 2323 } 2324 2325 /// Insert an IR expression which computes the value held by the IV IndVar 2326 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2327 /// is taken ExitCount times. 2328 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2329 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2330 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2331 assert(isLoopCounter(IndVar, L, SE)); 2332 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2333 const SCEV *IVInit = AR->getStart(); 2334 2335 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2336 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2337 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2338 // the existing GEPs whenever possible. 2339 if (IndVar->getType()->isPointerTy() && 2340 !ExitCount->getType()->isPointerTy()) { 2341 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2342 // signed value. ExitCount on the other hand represents the loop trip count, 2343 // which is an unsigned value. FindLoopCounter only allows induction 2344 // variables that have a positive unit stride of one. This means we don't 2345 // have to handle the case of negative offsets (yet) and just need to zero 2346 // extend ExitCount. 2347 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2348 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2349 if (UsePostInc) 2350 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2351 2352 // Expand the code for the iteration count. 2353 assert(SE->isLoopInvariant(IVOffset, L) && 2354 "Computed iteration count is not loop invariant!"); 2355 2356 // We could handle pointer IVs other than i8*, but we need to compensate for 2357 // gep index scaling. 2358 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2359 cast<PointerType>(IndVar->getType()) 2360 ->getElementType())->isOne() && 2361 "unit stride pointer IV must be i8*"); 2362 2363 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2364 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2365 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2366 } else { 2367 // In any other case, convert both IVInit and ExitCount to integers before 2368 // comparing. This may result in SCEV expansion of pointers, but in practice 2369 // SCEV will fold the pointer arithmetic away as such: 2370 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2371 // 2372 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2373 // for simple memset-style loops. 2374 // 2375 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2376 // were generated on top of case #2, which is not expected. 2377 2378 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2379 // For unit stride, IVCount = Start + ExitCount with 2's complement 2380 // overflow. 2381 2382 // For integer IVs, truncate the IV before computing IVInit + BECount, 2383 // unless we know apriori that the limit must be a constant when evaluated 2384 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2385 // of the IV in the loop over a (potentially) expensive expansion of the 2386 // widened exit count add(zext(add)) expression. 2387 if (SE->getTypeSizeInBits(IVInit->getType()) 2388 > SE->getTypeSizeInBits(ExitCount->getType())) { 2389 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2390 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2391 else 2392 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2393 } 2394 2395 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2396 2397 if (UsePostInc) 2398 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2399 2400 // Expand the code for the iteration count. 2401 assert(SE->isLoopInvariant(IVLimit, L) && 2402 "Computed iteration count is not loop invariant!"); 2403 // Ensure that we generate the same type as IndVar, or a smaller integer 2404 // type. In the presence of null pointer values, we have an integer type 2405 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2406 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2407 IndVar->getType() : ExitCount->getType(); 2408 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2409 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2410 } 2411 } 2412 2413 /// This method rewrites the exit condition of the loop to be a canonical != 2414 /// comparison against the incremented loop induction variable. This pass is 2415 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2416 /// determine a loop-invariant trip count of the loop, which is actually a much 2417 /// broader range than just linear tests. 2418 bool IndVarSimplify:: 2419 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2420 const SCEV *ExitCount, 2421 PHINode *IndVar, SCEVExpander &Rewriter) { 2422 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2423 assert(isLoopCounter(IndVar, L, SE)); 2424 Instruction * const IncVar = 2425 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2426 2427 // Initialize CmpIndVar to the preincremented IV. 2428 Value *CmpIndVar = IndVar; 2429 bool UsePostInc = false; 2430 2431 // If the exiting block is the same as the backedge block, we prefer to 2432 // compare against the post-incremented value, otherwise we must compare 2433 // against the preincremented value. 2434 if (ExitingBB == L->getLoopLatch()) { 2435 // For pointer IVs, we chose to not strip inbounds which requires us not 2436 // to add a potentially UB introducing use. We need to either a) show 2437 // the loop test we're modifying is already in post-inc form, or b) show 2438 // that adding a use must not introduce UB. 2439 bool SafeToPostInc = 2440 IndVar->getType()->isIntegerTy() || 2441 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2442 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2443 if (SafeToPostInc) { 2444 UsePostInc = true; 2445 CmpIndVar = IncVar; 2446 } 2447 } 2448 2449 // It may be necessary to drop nowrap flags on the incrementing instruction 2450 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2451 // case the increment might have previously been poison on the last iteration 2452 // only) or if LFTR switches to a different IV that was previously dynamically 2453 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2454 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2455 // check), because the pre-inc addrec flags may be adopted from the original 2456 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2457 // TODO: This handling is inaccurate for one case: If we switch to a 2458 // dynamically dead IV that wraps on the first loop iteration only, which is 2459 // not covered by the post-inc addrec. (If the new IV was not dynamically 2460 // dead, it could not be poison on the first iteration in the first place.) 2461 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2462 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2463 if (BO->hasNoUnsignedWrap()) 2464 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2465 if (BO->hasNoSignedWrap()) 2466 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2467 } 2468 2469 Value *ExitCnt = genLoopLimit( 2470 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2471 assert(ExitCnt->getType()->isPointerTy() == 2472 IndVar->getType()->isPointerTy() && 2473 "genLoopLimit missed a cast"); 2474 2475 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2476 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2477 ICmpInst::Predicate P; 2478 if (L->contains(BI->getSuccessor(0))) 2479 P = ICmpInst::ICMP_NE; 2480 else 2481 P = ICmpInst::ICMP_EQ; 2482 2483 IRBuilder<> Builder(BI); 2484 2485 // The new loop exit condition should reuse the debug location of the 2486 // original loop exit condition. 2487 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2488 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2489 2490 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2491 // avoid the expensive expansion of the limit expression in the wider type, 2492 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2493 // since we know (from the exit count bitwidth), that we can't self-wrap in 2494 // the narrower type. 2495 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2496 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2497 if (CmpIndVarSize > ExitCntSize) { 2498 assert(!CmpIndVar->getType()->isPointerTy() && 2499 !ExitCnt->getType()->isPointerTy()); 2500 2501 // Before resorting to actually inserting the truncate, use the same 2502 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2503 // the other side of the comparison instead. We still evaluate the limit 2504 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2505 // a truncate within in. 2506 bool Extended = false; 2507 const SCEV *IV = SE->getSCEV(CmpIndVar); 2508 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2509 ExitCnt->getType()); 2510 const SCEV *ZExtTrunc = 2511 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2512 2513 if (ZExtTrunc == IV) { 2514 Extended = true; 2515 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2516 "wide.trip.count"); 2517 } else { 2518 const SCEV *SExtTrunc = 2519 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2520 if (SExtTrunc == IV) { 2521 Extended = true; 2522 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2523 "wide.trip.count"); 2524 } 2525 } 2526 2527 if (Extended) { 2528 bool Discard; 2529 L->makeLoopInvariant(ExitCnt, Discard); 2530 } else 2531 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2532 "lftr.wideiv"); 2533 } 2534 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2535 << " LHS:" << *CmpIndVar << '\n' 2536 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2537 << "\n" 2538 << " RHS:\t" << *ExitCnt << "\n" 2539 << "ExitCount:\t" << *ExitCount << "\n" 2540 << " was: " << *BI->getCondition() << "\n"); 2541 2542 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2543 Value *OrigCond = BI->getCondition(); 2544 // It's tempting to use replaceAllUsesWith here to fully replace the old 2545 // comparison, but that's not immediately safe, since users of the old 2546 // comparison may not be dominated by the new comparison. Instead, just 2547 // update the branch to use the new comparison; in the common case this 2548 // will make old comparison dead. 2549 BI->setCondition(Cond); 2550 DeadInsts.push_back(OrigCond); 2551 2552 ++NumLFTR; 2553 return true; 2554 } 2555 2556 //===----------------------------------------------------------------------===// 2557 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2558 //===----------------------------------------------------------------------===// 2559 2560 /// If there's a single exit block, sink any loop-invariant values that 2561 /// were defined in the preheader but not used inside the loop into the 2562 /// exit block to reduce register pressure in the loop. 2563 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2564 BasicBlock *ExitBlock = L->getExitBlock(); 2565 if (!ExitBlock) return false; 2566 2567 BasicBlock *Preheader = L->getLoopPreheader(); 2568 if (!Preheader) return false; 2569 2570 bool MadeAnyChanges = false; 2571 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2572 BasicBlock::iterator I(Preheader->getTerminator()); 2573 while (I != Preheader->begin()) { 2574 --I; 2575 // New instructions were inserted at the end of the preheader. 2576 if (isa<PHINode>(I)) 2577 break; 2578 2579 // Don't move instructions which might have side effects, since the side 2580 // effects need to complete before instructions inside the loop. Also don't 2581 // move instructions which might read memory, since the loop may modify 2582 // memory. Note that it's okay if the instruction might have undefined 2583 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2584 // block. 2585 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2586 continue; 2587 2588 // Skip debug info intrinsics. 2589 if (isa<DbgInfoIntrinsic>(I)) 2590 continue; 2591 2592 // Skip eh pad instructions. 2593 if (I->isEHPad()) 2594 continue; 2595 2596 // Don't sink alloca: we never want to sink static alloca's out of the 2597 // entry block, and correctly sinking dynamic alloca's requires 2598 // checks for stacksave/stackrestore intrinsics. 2599 // FIXME: Refactor this check somehow? 2600 if (isa<AllocaInst>(I)) 2601 continue; 2602 2603 // Determine if there is a use in or before the loop (direct or 2604 // otherwise). 2605 bool UsedInLoop = false; 2606 for (Use &U : I->uses()) { 2607 Instruction *User = cast<Instruction>(U.getUser()); 2608 BasicBlock *UseBB = User->getParent(); 2609 if (PHINode *P = dyn_cast<PHINode>(User)) { 2610 unsigned i = 2611 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2612 UseBB = P->getIncomingBlock(i); 2613 } 2614 if (UseBB == Preheader || L->contains(UseBB)) { 2615 UsedInLoop = true; 2616 break; 2617 } 2618 } 2619 2620 // If there is, the def must remain in the preheader. 2621 if (UsedInLoop) 2622 continue; 2623 2624 // Otherwise, sink it to the exit block. 2625 Instruction *ToMove = &*I; 2626 bool Done = false; 2627 2628 if (I != Preheader->begin()) { 2629 // Skip debug info intrinsics. 2630 do { 2631 --I; 2632 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2633 2634 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2635 Done = true; 2636 } else { 2637 Done = true; 2638 } 2639 2640 MadeAnyChanges = true; 2641 ToMove->moveBefore(*ExitBlock, InsertPt); 2642 if (Done) break; 2643 InsertPt = ToMove->getIterator(); 2644 } 2645 2646 return MadeAnyChanges; 2647 } 2648 2649 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2650 SmallVector<BasicBlock*, 16> ExitingBlocks; 2651 L->getExitingBlocks(ExitingBlocks); 2652 2653 // Form an expression for the maximum exit count possible for this loop. We 2654 // merge the max and exact information to approximate a version of 2655 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2656 // TODO: factor this out as a version of getConstantMaxBackedgeTakenCount which 2657 // isn't guaranteed to return a constant. 2658 SmallVector<const SCEV*, 4> ExitCounts; 2659 const SCEV *MaxConstEC = SE->getConstantMaxBackedgeTakenCount(L); 2660 if (!isa<SCEVCouldNotCompute>(MaxConstEC)) 2661 ExitCounts.push_back(MaxConstEC); 2662 for (BasicBlock *ExitingBB : ExitingBlocks) { 2663 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2664 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2665 assert(DT->dominates(ExitingBB, L->getLoopLatch()) && 2666 "We should only have known counts for exiting blocks that " 2667 "dominate latch!"); 2668 ExitCounts.push_back(ExitCount); 2669 } 2670 } 2671 if (ExitCounts.empty()) 2672 return false; 2673 const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); 2674 2675 bool Changed = false; 2676 for (BasicBlock *ExitingBB : ExitingBlocks) { 2677 // If our exitting block exits multiple loops, we can only rewrite the 2678 // innermost one. Otherwise, we're changing how many times the innermost 2679 // loop runs before it exits. 2680 if (LI->getLoopFor(ExitingBB) != L) 2681 continue; 2682 2683 // Can't rewrite non-branch yet. 2684 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2685 if (!BI) 2686 continue; 2687 2688 // If already constant, nothing to do. 2689 if (isa<Constant>(BI->getCondition())) 2690 continue; 2691 2692 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2693 if (isa<SCEVCouldNotCompute>(ExitCount)) 2694 continue; 2695 2696 // If we know we'd exit on the first iteration, rewrite the exit to 2697 // reflect this. This does not imply the loop must exit through this 2698 // exit; there may be an earlier one taken on the first iteration. 2699 // TODO: Given we know the backedge can't be taken, we should go ahead 2700 // and break it. Or at least, kill all the header phis and simplify. 2701 if (ExitCount->isZero()) { 2702 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2703 auto *OldCond = BI->getCondition(); 2704 auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : 2705 ConstantInt::getFalse(OldCond->getType()); 2706 BI->setCondition(NewCond); 2707 if (OldCond->use_empty()) 2708 DeadInsts.push_back(OldCond); 2709 Changed = true; 2710 continue; 2711 } 2712 2713 // If we end up with a pointer exit count, bail. Note that we can end up 2714 // with a pointer exit count for one exiting block, and not for another in 2715 // the same loop. 2716 if (!ExitCount->getType()->isIntegerTy() || 2717 !MaxExitCount->getType()->isIntegerTy()) 2718 continue; 2719 2720 Type *WiderType = 2721 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2722 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2723 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2724 assert(MaxExitCount->getType() == ExitCount->getType()); 2725 2726 // Can we prove that some other exit must be taken strictly before this 2727 // one? 2728 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2729 MaxExitCount, ExitCount)) { 2730 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2731 auto *OldCond = BI->getCondition(); 2732 auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : 2733 ConstantInt::getTrue(OldCond->getType()); 2734 BI->setCondition(NewCond); 2735 if (OldCond->use_empty()) 2736 DeadInsts.push_back(OldCond); 2737 Changed = true; 2738 continue; 2739 } 2740 2741 // TODO: If we can prove that the exiting iteration is equal to the exit 2742 // count for this exit and that no previous exit oppurtunities exist within 2743 // the loop, then we can discharge all other exits. (May fall out of 2744 // previous TODO.) 2745 } 2746 2747 // Finally, see if we can rewrite our exit conditions into a loop invariant 2748 // form. If we have a read-only loop, and we can tell that we must exit down 2749 // a path which does not need any of the values computed within the loop, we 2750 // can rewrite the loop to exit on the first iteration. Note that this 2751 // doesn't either a) tell us the loop exits on the first iteration (unless 2752 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2753 // This transformation looks a lot like a restricted form of dead loop 2754 // elimination, but restricted to read-only loops and without neccesssarily 2755 // needing to kill the loop entirely. 2756 if (!LoopPredication) 2757 return Changed; 2758 2759 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2760 return Changed; 2761 2762 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2763 // through *explicit* control flow. We have to eliminate the possibility of 2764 // implicit exits (see below) before we know it's truly exact. 2765 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2766 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2767 !SE->isLoopInvariant(ExactBTC, L) || 2768 !isSafeToExpand(ExactBTC, *SE)) 2769 return Changed; 2770 2771 auto BadExit = [&](BasicBlock *ExitingBB) { 2772 // If our exiting block exits multiple loops, we can only rewrite the 2773 // innermost one. Otherwise, we're changing how many times the innermost 2774 // loop runs before it exits. 2775 if (LI->getLoopFor(ExitingBB) != L) 2776 return true; 2777 2778 // Can't rewrite non-branch yet. 2779 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2780 if (!BI) 2781 return true; 2782 2783 // If already constant, nothing to do. 2784 if (isa<Constant>(BI->getCondition())) 2785 return true; 2786 2787 // If the exit block has phis, we need to be able to compute the values 2788 // within the loop which contains them. This assumes trivially lcssa phis 2789 // have already been removed; TODO: generalize 2790 BasicBlock *ExitBlock = 2791 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2792 if (!ExitBlock->phis().empty()) 2793 return true; 2794 2795 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2796 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2797 if (!SE->isLoopInvariant(ExitCount, L) || 2798 !isSafeToExpand(ExitCount, *SE)) 2799 return true; 2800 2801 return false; 2802 }; 2803 2804 // If we have any exits which can't be predicated themselves, than we can't 2805 // predicate any exit which isn't guaranteed to execute before it. Consider 2806 // two exits (a) and (b) which would both exit on the same iteration. If we 2807 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2808 // we could convert a loop from exiting through (a) to one exiting through 2809 // (b). Note that this problem exists only for exits with the same exit 2810 // count, and we could be more aggressive when exit counts are known inequal. 2811 llvm::sort(ExitingBlocks, 2812 [&](BasicBlock *A, BasicBlock *B) { 2813 // std::sort sorts in ascending order, so we want the inverse of 2814 // the normal dominance relation, plus a tie breaker for blocks 2815 // unordered by dominance. 2816 if (DT->properlyDominates(A, B)) return true; 2817 if (DT->properlyDominates(B, A)) return false; 2818 return A->getName() < B->getName(); 2819 }); 2820 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2821 // exits before the backedge). If they aren't, reasoning about reachability 2822 // is complicated and we choose not to for now. 2823 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2824 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2825 return Changed; 2826 2827 // Given our sorted total order, we know that exit[j] must be evaluated 2828 // after all exit[i] such j > i. 2829 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2830 if (BadExit(ExitingBlocks[i])) { 2831 ExitingBlocks.resize(i); 2832 break; 2833 } 2834 2835 if (ExitingBlocks.empty()) 2836 return Changed; 2837 2838 // We rely on not being able to reach an exiting block on a later iteration 2839 // then it's statically compute exit count. The implementaton of 2840 // getExitCount currently has this invariant, but assert it here so that 2841 // breakage is obvious if this ever changes.. 2842 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2843 return DT->dominates(ExitingBB, L->getLoopLatch()); 2844 })); 2845 2846 // At this point, ExitingBlocks consists of only those blocks which are 2847 // predicatable. Given that, we know we have at least one exit we can 2848 // predicate if the loop is doesn't have side effects and doesn't have any 2849 // implicit exits (because then our exact BTC isn't actually exact). 2850 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2851 // suggestions on how to improve this? I can obviously bail out for outer 2852 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2853 // is that enough for *all* side effects? 2854 for (BasicBlock *BB : L->blocks()) 2855 for (auto &I : *BB) 2856 // TODO:isGuaranteedToTransfer 2857 if (I.mayHaveSideEffects() || I.mayThrow()) 2858 return Changed; 2859 2860 // Finally, do the actual predication for all predicatable blocks. A couple 2861 // of notes here: 2862 // 1) We don't bother to constant fold dominated exits with identical exit 2863 // counts; that's simply a form of CSE/equality propagation and we leave 2864 // it for dedicated passes. 2865 // 2) We insert the comparison at the branch. Hoisting introduces additional 2866 // legality constraints and we leave that to dedicated logic. We want to 2867 // predicate even if we can't insert a loop invariant expression as 2868 // peeling or unrolling will likely reduce the cost of the otherwise loop 2869 // varying check. 2870 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2871 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2872 Value *ExactBTCV = nullptr; //lazy generated if needed 2873 for (BasicBlock *ExitingBB : ExitingBlocks) { 2874 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2875 2876 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2877 Value *NewCond; 2878 if (ExitCount == ExactBTC) { 2879 NewCond = L->contains(BI->getSuccessor(0)) ? 2880 B.getFalse() : B.getTrue(); 2881 } else { 2882 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2883 if (!ExactBTCV) 2884 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2885 Value *RHS = ExactBTCV; 2886 if (ECV->getType() != RHS->getType()) { 2887 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2888 ECV = B.CreateZExt(ECV, WiderTy); 2889 RHS = B.CreateZExt(RHS, WiderTy); 2890 } 2891 auto Pred = L->contains(BI->getSuccessor(0)) ? 2892 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2893 NewCond = B.CreateICmp(Pred, ECV, RHS); 2894 } 2895 Value *OldCond = BI->getCondition(); 2896 BI->setCondition(NewCond); 2897 if (OldCond->use_empty()) 2898 DeadInsts.push_back(OldCond); 2899 Changed = true; 2900 } 2901 2902 return Changed; 2903 } 2904 2905 //===----------------------------------------------------------------------===// 2906 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2907 //===----------------------------------------------------------------------===// 2908 2909 bool IndVarSimplify::run(Loop *L) { 2910 // We need (and expect!) the incoming loop to be in LCSSA. 2911 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2912 "LCSSA required to run indvars!"); 2913 bool Changed = false; 2914 2915 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2916 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2917 // canonicalization can be a pessimization without LSR to "clean up" 2918 // afterwards. 2919 // - We depend on having a preheader; in particular, 2920 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2921 // and we're in trouble if we can't find the induction variable even when 2922 // we've manually inserted one. 2923 // - LFTR relies on having a single backedge. 2924 if (!L->isLoopSimplifyForm()) 2925 return false; 2926 2927 // If there are any floating-point recurrences, attempt to 2928 // transform them to use integer recurrences. 2929 Changed |= rewriteNonIntegerIVs(L); 2930 2931 #ifndef NDEBUG 2932 // Used below for a consistency check only 2933 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2934 #endif 2935 2936 // Create a rewriter object which we'll use to transform the code with. 2937 SCEVExpander Rewriter(*SE, DL, "indvars"); 2938 #ifndef NDEBUG 2939 Rewriter.setDebugType(DEBUG_TYPE); 2940 #endif 2941 2942 // Eliminate redundant IV users. 2943 // 2944 // Simplification works best when run before other consumers of SCEV. We 2945 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2946 // other expressions involving loop IVs have been evaluated. This helps SCEV 2947 // set no-wrap flags before normalizing sign/zero extension. 2948 Rewriter.disableCanonicalMode(); 2949 Changed |= simplifyAndExtend(L, Rewriter, LI); 2950 2951 // Check to see if we can compute the final value of any expressions 2952 // that are recurrent in the loop, and substitute the exit values from the 2953 // loop into any instructions outside of the loop that use the final values 2954 // of the current expressions. 2955 if (ReplaceExitValue != NeverRepl) 2956 Changed |= rewriteLoopExitValues(L, Rewriter); 2957 2958 // Eliminate redundant IV cycles. 2959 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2960 2961 Changed |= optimizeLoopExits(L, Rewriter); 2962 2963 // If we have a trip count expression, rewrite the loop's exit condition 2964 // using it. 2965 if (!DisableLFTR) { 2966 SmallVector<BasicBlock*, 16> ExitingBlocks; 2967 L->getExitingBlocks(ExitingBlocks); 2968 for (BasicBlock *ExitingBB : ExitingBlocks) { 2969 // Can't rewrite non-branch yet. 2970 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2971 continue; 2972 2973 // If our exitting block exits multiple loops, we can only rewrite the 2974 // innermost one. Otherwise, we're changing how many times the innermost 2975 // loop runs before it exits. 2976 if (LI->getLoopFor(ExitingBB) != L) 2977 continue; 2978 2979 if (!needsLFTR(L, ExitingBB)) 2980 continue; 2981 2982 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2983 if (isa<SCEVCouldNotCompute>(ExitCount)) 2984 continue; 2985 2986 // This was handled above, but as we form SCEVs, we can sometimes refine 2987 // existing ones; this allows exit counts to be folded to zero which 2988 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2989 // until stable to handle cases like this better. 2990 if (ExitCount->isZero()) 2991 continue; 2992 2993 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2994 if (!IndVar) 2995 continue; 2996 2997 // Avoid high cost expansions. Note: This heuristic is questionable in 2998 // that our definition of "high cost" is not exactly principled. 2999 if (Rewriter.isHighCostExpansion(ExitCount, L)) 3000 continue; 3001 3002 // Check preconditions for proper SCEVExpander operation. SCEV does not 3003 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 3004 // any pass that uses the SCEVExpander must do it. This does not work 3005 // well for loop passes because SCEVExpander makes assumptions about 3006 // all loops, while LoopPassManager only forces the current loop to be 3007 // simplified. 3008 // 3009 // FIXME: SCEV expansion has no way to bail out, so the caller must 3010 // explicitly check any assumptions made by SCEV. Brittle. 3011 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 3012 if (!AR || AR->getLoop()->getLoopPreheader()) 3013 Changed |= linearFunctionTestReplace(L, ExitingBB, 3014 ExitCount, IndVar, 3015 Rewriter); 3016 } 3017 } 3018 // Clear the rewriter cache, because values that are in the rewriter's cache 3019 // can be deleted in the loop below, causing the AssertingVH in the cache to 3020 // trigger. 3021 Rewriter.clear(); 3022 3023 // Now that we're done iterating through lists, clean up any instructions 3024 // which are now dead. 3025 while (!DeadInsts.empty()) 3026 if (Instruction *Inst = 3027 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 3028 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 3029 3030 // The Rewriter may not be used from this point on. 3031 3032 // Loop-invariant instructions in the preheader that aren't used in the 3033 // loop may be sunk below the loop to reduce register pressure. 3034 Changed |= sinkUnusedInvariants(L); 3035 3036 // rewriteFirstIterationLoopExitValues does not rely on the computation of 3037 // trip count and therefore can further simplify exit values in addition to 3038 // rewriteLoopExitValues. 3039 Changed |= rewriteFirstIterationLoopExitValues(L); 3040 3041 // Clean up dead instructions. 3042 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 3043 3044 // Check a post-condition. 3045 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3046 "Indvars did not preserve LCSSA!"); 3047 3048 // Verify that LFTR, and any other change have not interfered with SCEV's 3049 // ability to compute trip count. 3050 #ifndef NDEBUG 3051 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 3052 SE->forgetLoop(L); 3053 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 3054 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 3055 SE->getTypeSizeInBits(NewBECount->getType())) 3056 NewBECount = SE->getTruncateOrNoop(NewBECount, 3057 BackedgeTakenCount->getType()); 3058 else 3059 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 3060 NewBECount->getType()); 3061 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 3062 } 3063 #endif 3064 3065 return Changed; 3066 } 3067 3068 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 3069 LoopStandardAnalysisResults &AR, 3070 LPMUpdater &) { 3071 Function *F = L.getHeader()->getParent(); 3072 const DataLayout &DL = F->getParent()->getDataLayout(); 3073 3074 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 3075 if (!IVS.run(&L)) 3076 return PreservedAnalyses::all(); 3077 3078 auto PA = getLoopPassPreservedAnalyses(); 3079 PA.preserveSet<CFGAnalyses>(); 3080 return PA; 3081 } 3082 3083 namespace { 3084 3085 struct IndVarSimplifyLegacyPass : public LoopPass { 3086 static char ID; // Pass identification, replacement for typeid 3087 3088 IndVarSimplifyLegacyPass() : LoopPass(ID) { 3089 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 3090 } 3091 3092 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3093 if (skipLoop(L)) 3094 return false; 3095 3096 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3097 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3098 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3099 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3100 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 3101 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 3102 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 3103 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 3104 3105 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 3106 return IVS.run(L); 3107 } 3108 3109 void getAnalysisUsage(AnalysisUsage &AU) const override { 3110 AU.setPreservesCFG(); 3111 getLoopAnalysisUsage(AU); 3112 } 3113 }; 3114 3115 } // end anonymous namespace 3116 3117 char IndVarSimplifyLegacyPass::ID = 0; 3118 3119 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 3120 "Induction Variable Simplification", false, false) 3121 INITIALIZE_PASS_DEPENDENCY(LoopPass) 3122 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 3123 "Induction Variable Simplification", false, false) 3124 3125 Pass *llvm::createIndVarSimplifyPass() { 3126 return new IndVarSimplifyLegacyPass(); 3127 } 3128