1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 using namespace PatternMatch; 93 94 #define DEBUG_TYPE "indvars" 95 96 STATISTIC(NumWidened , "Number of indvars widened"); 97 STATISTIC(NumReplaced , "Number of exit values replaced"); 98 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 99 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 100 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 101 102 // Trip count verification can be enabled by default under NDEBUG if we 103 // implement a strong expression equivalence checker in SCEV. Until then, we 104 // use the verify-indvars flag, which may assert in some cases. 105 static cl::opt<bool> VerifyIndvars( 106 "verify-indvars", cl::Hidden, 107 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 108 "effect in release builds. (Note: this adds additional SCEV " 109 "queries potentially changing the analysis result)")); 110 111 static cl::opt<ReplaceExitVal> ReplaceExitValue( 112 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 113 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 114 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 115 clEnumValN(OnlyCheapRepl, "cheap", 116 "only replace exit value when the cost is cheap"), 117 clEnumValN(NoHardUse, "noharduse", 118 "only replace exit values when loop def likely dead"), 119 clEnumValN(AlwaysRepl, "always", 120 "always replace exit value whenever possible"))); 121 122 static cl::opt<bool> UsePostIncrementRanges( 123 "indvars-post-increment-ranges", cl::Hidden, 124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 125 cl::init(true)); 126 127 static cl::opt<bool> 128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 129 cl::desc("Disable Linear Function Test Replace optimization")); 130 131 static cl::opt<bool> 132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 133 cl::desc("Predicate conditions in read only loops")); 134 135 static cl::opt<bool> 136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 137 cl::desc("Allow widening of indvars to eliminate s/zext")); 138 139 namespace { 140 141 struct RewritePhi; 142 143 class IndVarSimplify { 144 LoopInfo *LI; 145 ScalarEvolution *SE; 146 DominatorTree *DT; 147 const DataLayout &DL; 148 TargetLibraryInfo *TLI; 149 const TargetTransformInfo *TTI; 150 std::unique_ptr<MemorySSAUpdater> MSSAU; 151 152 SmallVector<WeakTrackingVH, 16> DeadInsts; 153 bool WidenIndVars; 154 155 bool handleFloatingPointIV(Loop *L, PHINode *PH); 156 bool rewriteNonIntegerIVs(Loop *L); 157 158 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 159 /// See if we can convert an exit condition from signed to unsigned. 160 /// (See inline comment about why this is duplicated from simplifyAndExtend) 161 bool canonicalizeExitCondition(Loop *L); 162 /// Try to eliminate loop exits based on analyzeable exit counts 163 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 164 /// Try to form loop invariant tests for loop exits by changing how many 165 /// iterations of the loop run when that is unobservable. 166 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 167 168 bool rewriteFirstIterationLoopExitValues(Loop *L); 169 170 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 171 const SCEV *ExitCount, 172 PHINode *IndVar, SCEVExpander &Rewriter); 173 174 bool sinkUnusedInvariants(Loop *L); 175 176 public: 177 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 178 const DataLayout &DL, TargetLibraryInfo *TLI, 179 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 180 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 181 WidenIndVars(WidenIndVars) { 182 if (MSSA) 183 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 184 } 185 186 bool run(Loop *L); 187 }; 188 189 } // end anonymous namespace 190 191 //===----------------------------------------------------------------------===// 192 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 193 //===----------------------------------------------------------------------===// 194 195 /// Convert APF to an integer, if possible. 196 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 197 bool isExact = false; 198 // See if we can convert this to an int64_t 199 uint64_t UIntVal; 200 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 201 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 202 !isExact) 203 return false; 204 IntVal = UIntVal; 205 return true; 206 } 207 208 /// If the loop has floating induction variable then insert corresponding 209 /// integer induction variable if possible. 210 /// For example, 211 /// for(double i = 0; i < 10000; ++i) 212 /// bar(i) 213 /// is converted into 214 /// for(int i = 0; i < 10000; ++i) 215 /// bar((double)i); 216 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 217 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 218 unsigned BackEdge = IncomingEdge^1; 219 220 // Check incoming value. 221 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 222 223 int64_t InitValue; 224 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 225 return false; 226 227 // Check IV increment. Reject this PN if increment operation is not 228 // an add or increment value can not be represented by an integer. 229 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 230 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 231 232 // If this is not an add of the PHI with a constantfp, or if the constant fp 233 // is not an integer, bail out. 234 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 235 int64_t IncValue; 236 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 237 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 238 return false; 239 240 // Check Incr uses. One user is PN and the other user is an exit condition 241 // used by the conditional terminator. 242 Value::user_iterator IncrUse = Incr->user_begin(); 243 Instruction *U1 = cast<Instruction>(*IncrUse++); 244 if (IncrUse == Incr->user_end()) return false; 245 Instruction *U2 = cast<Instruction>(*IncrUse++); 246 if (IncrUse != Incr->user_end()) return false; 247 248 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 249 // only used by a branch, we can't transform it. 250 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 251 if (!Compare) 252 Compare = dyn_cast<FCmpInst>(U2); 253 if (!Compare || !Compare->hasOneUse() || 254 !isa<BranchInst>(Compare->user_back())) 255 return false; 256 257 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 258 259 // We need to verify that the branch actually controls the iteration count 260 // of the loop. If not, the new IV can overflow and no one will notice. 261 // The branch block must be in the loop and one of the successors must be out 262 // of the loop. 263 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 264 if (!L->contains(TheBr->getParent()) || 265 (L->contains(TheBr->getSuccessor(0)) && 266 L->contains(TheBr->getSuccessor(1)))) 267 return false; 268 269 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 270 // transform it. 271 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 272 int64_t ExitValue; 273 if (ExitValueVal == nullptr || 274 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 275 return false; 276 277 // Find new predicate for integer comparison. 278 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 279 switch (Compare->getPredicate()) { 280 default: return false; // Unknown comparison. 281 case CmpInst::FCMP_OEQ: 282 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 283 case CmpInst::FCMP_ONE: 284 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 285 case CmpInst::FCMP_OGT: 286 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 287 case CmpInst::FCMP_OGE: 288 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 289 case CmpInst::FCMP_OLT: 290 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 291 case CmpInst::FCMP_OLE: 292 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 293 } 294 295 // We convert the floating point induction variable to a signed i32 value if 296 // we can. This is only safe if the comparison will not overflow in a way 297 // that won't be trapped by the integer equivalent operations. Check for this 298 // now. 299 // TODO: We could use i64 if it is native and the range requires it. 300 301 // The start/stride/exit values must all fit in signed i32. 302 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 303 return false; 304 305 // If not actually striding (add x, 0.0), avoid touching the code. 306 if (IncValue == 0) 307 return false; 308 309 // Positive and negative strides have different safety conditions. 310 if (IncValue > 0) { 311 // If we have a positive stride, we require the init to be less than the 312 // exit value. 313 if (InitValue >= ExitValue) 314 return false; 315 316 uint32_t Range = uint32_t(ExitValue-InitValue); 317 // Check for infinite loop, either: 318 // while (i <= Exit) or until (i > Exit) 319 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 320 if (++Range == 0) return false; // Range overflows. 321 } 322 323 unsigned Leftover = Range % uint32_t(IncValue); 324 325 // If this is an equality comparison, we require that the strided value 326 // exactly land on the exit value, otherwise the IV condition will wrap 327 // around and do things the fp IV wouldn't. 328 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 329 Leftover != 0) 330 return false; 331 332 // If the stride would wrap around the i32 before exiting, we can't 333 // transform the IV. 334 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 335 return false; 336 } else { 337 // If we have a negative stride, we require the init to be greater than the 338 // exit value. 339 if (InitValue <= ExitValue) 340 return false; 341 342 uint32_t Range = uint32_t(InitValue-ExitValue); 343 // Check for infinite loop, either: 344 // while (i >= Exit) or until (i < Exit) 345 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 346 if (++Range == 0) return false; // Range overflows. 347 } 348 349 unsigned Leftover = Range % uint32_t(-IncValue); 350 351 // If this is an equality comparison, we require that the strided value 352 // exactly land on the exit value, otherwise the IV condition will wrap 353 // around and do things the fp IV wouldn't. 354 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 355 Leftover != 0) 356 return false; 357 358 // If the stride would wrap around the i32 before exiting, we can't 359 // transform the IV. 360 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 361 return false; 362 } 363 364 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 365 366 // Insert new integer induction variable. 367 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 368 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 369 PN->getIncomingBlock(IncomingEdge)); 370 371 Value *NewAdd = 372 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 373 Incr->getName()+".int", Incr); 374 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 375 376 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 377 ConstantInt::get(Int32Ty, ExitValue), 378 Compare->getName()); 379 380 // In the following deletions, PN may become dead and may be deleted. 381 // Use a WeakTrackingVH to observe whether this happens. 382 WeakTrackingVH WeakPH = PN; 383 384 // Delete the old floating point exit comparison. The branch starts using the 385 // new comparison. 386 NewCompare->takeName(Compare); 387 Compare->replaceAllUsesWith(NewCompare); 388 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 389 390 // Delete the old floating point increment. 391 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 392 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 393 394 // If the FP induction variable still has uses, this is because something else 395 // in the loop uses its value. In order to canonicalize the induction 396 // variable, we chose to eliminate the IV and rewrite it in terms of an 397 // int->fp cast. 398 // 399 // We give preference to sitofp over uitofp because it is faster on most 400 // platforms. 401 if (WeakPH) { 402 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 403 &*PN->getParent()->getFirstInsertionPt()); 404 PN->replaceAllUsesWith(Conv); 405 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 406 } 407 return true; 408 } 409 410 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 411 // First step. Check to see if there are any floating-point recurrences. 412 // If there are, change them into integer recurrences, permitting analysis by 413 // the SCEV routines. 414 BasicBlock *Header = L->getHeader(); 415 416 SmallVector<WeakTrackingVH, 8> PHIs; 417 for (PHINode &PN : Header->phis()) 418 PHIs.push_back(&PN); 419 420 bool Changed = false; 421 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 422 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 423 Changed |= handleFloatingPointIV(L, PN); 424 425 // If the loop previously had floating-point IV, ScalarEvolution 426 // may not have been able to compute a trip count. Now that we've done some 427 // re-writing, the trip count may be computable. 428 if (Changed) 429 SE->forgetLoop(L); 430 return Changed; 431 } 432 433 //===---------------------------------------------------------------------===// 434 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 435 // they will exit at the first iteration. 436 //===---------------------------------------------------------------------===// 437 438 /// Check to see if this loop has loop invariant conditions which lead to loop 439 /// exits. If so, we know that if the exit path is taken, it is at the first 440 /// loop iteration. This lets us predict exit values of PHI nodes that live in 441 /// loop header. 442 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 443 // Verify the input to the pass is already in LCSSA form. 444 assert(L->isLCSSAForm(*DT)); 445 446 SmallVector<BasicBlock *, 8> ExitBlocks; 447 L->getUniqueExitBlocks(ExitBlocks); 448 449 bool MadeAnyChanges = false; 450 for (auto *ExitBB : ExitBlocks) { 451 // If there are no more PHI nodes in this exit block, then no more 452 // values defined inside the loop are used on this path. 453 for (PHINode &PN : ExitBB->phis()) { 454 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 455 IncomingValIdx != E; ++IncomingValIdx) { 456 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 457 458 // Can we prove that the exit must run on the first iteration if it 459 // runs at all? (i.e. early exits are fine for our purposes, but 460 // traces which lead to this exit being taken on the 2nd iteration 461 // aren't.) Note that this is about whether the exit branch is 462 // executed, not about whether it is taken. 463 if (!L->getLoopLatch() || 464 !DT->dominates(IncomingBB, L->getLoopLatch())) 465 continue; 466 467 // Get condition that leads to the exit path. 468 auto *TermInst = IncomingBB->getTerminator(); 469 470 Value *Cond = nullptr; 471 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 472 // Must be a conditional branch, otherwise the block 473 // should not be in the loop. 474 Cond = BI->getCondition(); 475 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 476 Cond = SI->getCondition(); 477 else 478 continue; 479 480 if (!L->isLoopInvariant(Cond)) 481 continue; 482 483 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 484 485 // Only deal with PHIs in the loop header. 486 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 487 continue; 488 489 // If ExitVal is a PHI on the loop header, then we know its 490 // value along this exit because the exit can only be taken 491 // on the first iteration. 492 auto *LoopPreheader = L->getLoopPreheader(); 493 assert(LoopPreheader && "Invalid loop"); 494 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 495 if (PreheaderIdx != -1) { 496 assert(ExitVal->getParent() == L->getHeader() && 497 "ExitVal must be in loop header"); 498 MadeAnyChanges = true; 499 PN.setIncomingValue(IncomingValIdx, 500 ExitVal->getIncomingValue(PreheaderIdx)); 501 SE->forgetValue(&PN); 502 } 503 } 504 } 505 } 506 return MadeAnyChanges; 507 } 508 509 //===----------------------------------------------------------------------===// 510 // IV Widening - Extend the width of an IV to cover its widest uses. 511 //===----------------------------------------------------------------------===// 512 513 /// Update information about the induction variable that is extended by this 514 /// sign or zero extend operation. This is used to determine the final width of 515 /// the IV before actually widening it. 516 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 517 ScalarEvolution *SE, 518 const TargetTransformInfo *TTI) { 519 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 520 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 521 return; 522 523 Type *Ty = Cast->getType(); 524 uint64_t Width = SE->getTypeSizeInBits(Ty); 525 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 526 return; 527 528 // Check that `Cast` actually extends the induction variable (we rely on this 529 // later). This takes care of cases where `Cast` is extending a truncation of 530 // the narrow induction variable, and thus can end up being narrower than the 531 // "narrow" induction variable. 532 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 533 if (NarrowIVWidth >= Width) 534 return; 535 536 // Cast is either an sext or zext up to this point. 537 // We should not widen an indvar if arithmetics on the wider indvar are more 538 // expensive than those on the narrower indvar. We check only the cost of ADD 539 // because at least an ADD is required to increment the induction variable. We 540 // could compute more comprehensively the cost of all instructions on the 541 // induction variable when necessary. 542 if (TTI && 543 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 544 TTI->getArithmeticInstrCost(Instruction::Add, 545 Cast->getOperand(0)->getType())) { 546 return; 547 } 548 549 if (!WI.WidestNativeType) { 550 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 551 WI.IsSigned = IsSigned; 552 return; 553 } 554 555 // We extend the IV to satisfy the sign of its first user, arbitrarily. 556 if (WI.IsSigned != IsSigned) 557 return; 558 559 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 560 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 561 } 562 563 //===----------------------------------------------------------------------===// 564 // Live IV Reduction - Minimize IVs live across the loop. 565 //===----------------------------------------------------------------------===// 566 567 //===----------------------------------------------------------------------===// 568 // Simplification of IV users based on SCEV evaluation. 569 //===----------------------------------------------------------------------===// 570 571 namespace { 572 573 class IndVarSimplifyVisitor : public IVVisitor { 574 ScalarEvolution *SE; 575 const TargetTransformInfo *TTI; 576 PHINode *IVPhi; 577 578 public: 579 WideIVInfo WI; 580 581 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 582 const TargetTransformInfo *TTI, 583 const DominatorTree *DTree) 584 : SE(SCEV), TTI(TTI), IVPhi(IV) { 585 DT = DTree; 586 WI.NarrowIV = IVPhi; 587 } 588 589 // Implement the interface used by simplifyUsersOfIV. 590 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 591 }; 592 593 } // end anonymous namespace 594 595 /// Iteratively perform simplification on a worklist of IV users. Each 596 /// successive simplification may push more users which may themselves be 597 /// candidates for simplification. 598 /// 599 /// Sign/Zero extend elimination is interleaved with IV simplification. 600 bool IndVarSimplify::simplifyAndExtend(Loop *L, 601 SCEVExpander &Rewriter, 602 LoopInfo *LI) { 603 SmallVector<WideIVInfo, 8> WideIVs; 604 605 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 606 Intrinsic::getName(Intrinsic::experimental_guard)); 607 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 608 609 SmallVector<PHINode*, 8> LoopPhis; 610 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 611 LoopPhis.push_back(cast<PHINode>(I)); 612 } 613 // Each round of simplification iterates through the SimplifyIVUsers worklist 614 // for all current phis, then determines whether any IVs can be 615 // widened. Widening adds new phis to LoopPhis, inducing another round of 616 // simplification on the wide IVs. 617 bool Changed = false; 618 while (!LoopPhis.empty()) { 619 // Evaluate as many IV expressions as possible before widening any IVs. This 620 // forces SCEV to set no-wrap flags before evaluating sign/zero 621 // extension. The first time SCEV attempts to normalize sign/zero extension, 622 // the result becomes final. So for the most predictable results, we delay 623 // evaluation of sign/zero extend evaluation until needed, and avoid running 624 // other SCEV based analysis prior to simplifyAndExtend. 625 do { 626 PHINode *CurrIV = LoopPhis.pop_back_val(); 627 628 // Information about sign/zero extensions of CurrIV. 629 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 630 631 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 632 &Visitor); 633 634 if (Visitor.WI.WidestNativeType) { 635 WideIVs.push_back(Visitor.WI); 636 } 637 } while(!LoopPhis.empty()); 638 639 // Continue if we disallowed widening. 640 if (!WidenIndVars) 641 continue; 642 643 for (; !WideIVs.empty(); WideIVs.pop_back()) { 644 unsigned ElimExt; 645 unsigned Widened; 646 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 647 DT, DeadInsts, ElimExt, Widened, 648 HasGuards, UsePostIncrementRanges)) { 649 NumElimExt += ElimExt; 650 NumWidened += Widened; 651 Changed = true; 652 LoopPhis.push_back(WidePhi); 653 } 654 } 655 } 656 return Changed; 657 } 658 659 //===----------------------------------------------------------------------===// 660 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 661 //===----------------------------------------------------------------------===// 662 663 /// Given an Value which is hoped to be part of an add recurance in the given 664 /// loop, return the associated Phi node if so. Otherwise, return null. Note 665 /// that this is less general than SCEVs AddRec checking. 666 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 667 Instruction *IncI = dyn_cast<Instruction>(IncV); 668 if (!IncI) 669 return nullptr; 670 671 switch (IncI->getOpcode()) { 672 case Instruction::Add: 673 case Instruction::Sub: 674 break; 675 case Instruction::GetElementPtr: 676 // An IV counter must preserve its type. 677 if (IncI->getNumOperands() == 2) 678 break; 679 LLVM_FALLTHROUGH; 680 default: 681 return nullptr; 682 } 683 684 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 685 if (Phi && Phi->getParent() == L->getHeader()) { 686 if (L->isLoopInvariant(IncI->getOperand(1))) 687 return Phi; 688 return nullptr; 689 } 690 if (IncI->getOpcode() == Instruction::GetElementPtr) 691 return nullptr; 692 693 // Allow add/sub to be commuted. 694 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 695 if (Phi && Phi->getParent() == L->getHeader()) { 696 if (L->isLoopInvariant(IncI->getOperand(0))) 697 return Phi; 698 } 699 return nullptr; 700 } 701 702 /// Whether the current loop exit test is based on this value. Currently this 703 /// is limited to a direct use in the loop condition. 704 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 705 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 706 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 707 // TODO: Allow non-icmp loop test. 708 if (!ICmp) 709 return false; 710 711 // TODO: Allow indirect use. 712 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 713 } 714 715 /// linearFunctionTestReplace policy. Return true unless we can show that the 716 /// current exit test is already sufficiently canonical. 717 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 718 assert(L->getLoopLatch() && "Must be in simplified form"); 719 720 // Avoid converting a constant or loop invariant test back to a runtime 721 // test. This is critical for when SCEV's cached ExitCount is less precise 722 // than the current IR (such as after we've proven a particular exit is 723 // actually dead and thus the BE count never reaches our ExitCount.) 724 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 725 if (L->isLoopInvariant(BI->getCondition())) 726 return false; 727 728 // Do LFTR to simplify the exit condition to an ICMP. 729 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 730 if (!Cond) 731 return true; 732 733 // Do LFTR to simplify the exit ICMP to EQ/NE 734 ICmpInst::Predicate Pred = Cond->getPredicate(); 735 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 736 return true; 737 738 // Look for a loop invariant RHS 739 Value *LHS = Cond->getOperand(0); 740 Value *RHS = Cond->getOperand(1); 741 if (!L->isLoopInvariant(RHS)) { 742 if (!L->isLoopInvariant(LHS)) 743 return true; 744 std::swap(LHS, RHS); 745 } 746 // Look for a simple IV counter LHS 747 PHINode *Phi = dyn_cast<PHINode>(LHS); 748 if (!Phi) 749 Phi = getLoopPhiForCounter(LHS, L); 750 751 if (!Phi) 752 return true; 753 754 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 755 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 756 if (Idx < 0) 757 return true; 758 759 // Do LFTR if the exit condition's IV is *not* a simple counter. 760 Value *IncV = Phi->getIncomingValue(Idx); 761 return Phi != getLoopPhiForCounter(IncV, L); 762 } 763 764 /// Return true if undefined behavior would provable be executed on the path to 765 /// OnPathTo if Root produced a posion result. Note that this doesn't say 766 /// anything about whether OnPathTo is actually executed or whether Root is 767 /// actually poison. This can be used to assess whether a new use of Root can 768 /// be added at a location which is control equivalent with OnPathTo (such as 769 /// immediately before it) without introducing UB which didn't previously 770 /// exist. Note that a false result conveys no information. 771 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 772 Instruction *OnPathTo, 773 DominatorTree *DT) { 774 // Basic approach is to assume Root is poison, propagate poison forward 775 // through all users we can easily track, and then check whether any of those 776 // users are provable UB and must execute before out exiting block might 777 // exit. 778 779 // The set of all recursive users we've visited (which are assumed to all be 780 // poison because of said visit) 781 SmallSet<const Value *, 16> KnownPoison; 782 SmallVector<const Instruction*, 16> Worklist; 783 Worklist.push_back(Root); 784 while (!Worklist.empty()) { 785 const Instruction *I = Worklist.pop_back_val(); 786 787 // If we know this must trigger UB on a path leading our target. 788 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 789 return true; 790 791 // If we can't analyze propagation through this instruction, just skip it 792 // and transitive users. Safe as false is a conservative result. 793 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 794 continue; 795 796 if (KnownPoison.insert(I).second) 797 for (const User *User : I->users()) 798 Worklist.push_back(cast<Instruction>(User)); 799 } 800 801 // Might be non-UB, or might have a path we couldn't prove must execute on 802 // way to exiting bb. 803 return false; 804 } 805 806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 807 /// down to checking that all operands are constant and listing instructions 808 /// that may hide undef. 809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 810 unsigned Depth) { 811 if (isa<Constant>(V)) 812 return !isa<UndefValue>(V); 813 814 if (Depth >= 6) 815 return false; 816 817 // Conservatively handle non-constant non-instructions. For example, Arguments 818 // may be undef. 819 Instruction *I = dyn_cast<Instruction>(V); 820 if (!I) 821 return false; 822 823 // Load and return values may be undef. 824 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 825 return false; 826 827 // Optimistically handle other instructions. 828 for (Value *Op : I->operands()) { 829 if (!Visited.insert(Op).second) 830 continue; 831 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 832 return false; 833 } 834 return true; 835 } 836 837 /// Return true if the given value is concrete. We must prove that undef can 838 /// never reach it. 839 /// 840 /// TODO: If we decide that this is a good approach to checking for undef, we 841 /// may factor it into a common location. 842 static bool hasConcreteDef(Value *V) { 843 SmallPtrSet<Value*, 8> Visited; 844 Visited.insert(V); 845 return hasConcreteDefImpl(V, Visited, 0); 846 } 847 848 /// Return true if this IV has any uses other than the (soon to be rewritten) 849 /// loop exit test. 850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 851 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 852 Value *IncV = Phi->getIncomingValue(LatchIdx); 853 854 for (User *U : Phi->users()) 855 if (U != Cond && U != IncV) return false; 856 857 for (User *U : IncV->users()) 858 if (U != Cond && U != Phi) return false; 859 return true; 860 } 861 862 /// Return true if the given phi is a "counter" in L. A counter is an 863 /// add recurance (of integer or pointer type) with an arbitrary start, and a 864 /// step of 1. Note that L must have exactly one latch. 865 static bool isLoopCounter(PHINode* Phi, Loop *L, 866 ScalarEvolution *SE) { 867 assert(Phi->getParent() == L->getHeader()); 868 assert(L->getLoopLatch()); 869 870 if (!SE->isSCEVable(Phi->getType())) 871 return false; 872 873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 874 if (!AR || AR->getLoop() != L || !AR->isAffine()) 875 return false; 876 877 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 878 if (!Step || !Step->isOne()) 879 return false; 880 881 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 882 Value *IncV = Phi->getIncomingValue(LatchIdx); 883 return (getLoopPhiForCounter(IncV, L) == Phi && 884 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 885 } 886 887 /// Search the loop header for a loop counter (anadd rec w/step of one) 888 /// suitable for use by LFTR. If multiple counters are available, select the 889 /// "best" one based profitable heuristics. 890 /// 891 /// BECount may be an i8* pointer type. The pointer difference is already 892 /// valid count without scaling the address stride, so it remains a pointer 893 /// expression as far as SCEV is concerned. 894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 895 const SCEV *BECount, 896 ScalarEvolution *SE, DominatorTree *DT) { 897 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 898 899 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 900 901 // Loop over all of the PHI nodes, looking for a simple counter. 902 PHINode *BestPhi = nullptr; 903 const SCEV *BestInit = nullptr; 904 BasicBlock *LatchBlock = L->getLoopLatch(); 905 assert(LatchBlock && "Must be in simplified form"); 906 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 907 908 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 909 PHINode *Phi = cast<PHINode>(I); 910 if (!isLoopCounter(Phi, L, SE)) 911 continue; 912 913 // Avoid comparing an integer IV against a pointer Limit. 914 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 915 continue; 916 917 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 918 919 // AR may be a pointer type, while BECount is an integer type. 920 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 921 // AR may not be a narrower type, or we may never exit. 922 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 923 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 924 continue; 925 926 // Avoid reusing a potentially undef value to compute other values that may 927 // have originally had a concrete definition. 928 if (!hasConcreteDef(Phi)) { 929 // We explicitly allow unknown phis as long as they are already used by 930 // the loop exit test. This is legal since performing LFTR could not 931 // increase the number of undef users. 932 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 933 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 934 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 935 continue; 936 } 937 938 // Avoid introducing undefined behavior due to poison which didn't exist in 939 // the original program. (Annoyingly, the rules for poison and undef 940 // propagation are distinct, so this does NOT cover the undef case above.) 941 // We have to ensure that we don't introduce UB by introducing a use on an 942 // iteration where said IV produces poison. Our strategy here differs for 943 // pointers and integer IVs. For integers, we strip and reinfer as needed, 944 // see code in linearFunctionTestReplace. For pointers, we restrict 945 // transforms as there is no good way to reinfer inbounds once lost. 946 if (!Phi->getType()->isIntegerTy() && 947 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 948 continue; 949 950 const SCEV *Init = AR->getStart(); 951 952 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 953 // Don't force a live loop counter if another IV can be used. 954 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 955 continue; 956 957 // Prefer to count-from-zero. This is a more "canonical" counter form. It 958 // also prefers integer to pointer IVs. 959 if (BestInit->isZero() != Init->isZero()) { 960 if (BestInit->isZero()) 961 continue; 962 } 963 // If two IVs both count from zero or both count from nonzero then the 964 // narrower is likely a dead phi that has been widened. Use the wider phi 965 // to allow the other to be eliminated. 966 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 967 continue; 968 } 969 BestPhi = Phi; 970 BestInit = Init; 971 } 972 return BestPhi; 973 } 974 975 /// Insert an IR expression which computes the value held by the IV IndVar 976 /// (which must be an loop counter w/unit stride) after the backedge of loop L 977 /// is taken ExitCount times. 978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 979 const SCEV *ExitCount, bool UsePostInc, Loop *L, 980 SCEVExpander &Rewriter, ScalarEvolution *SE) { 981 assert(isLoopCounter(IndVar, L, SE)); 982 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 983 const SCEV *IVInit = AR->getStart(); 984 985 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 986 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 987 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 988 // the existing GEPs whenever possible. 989 if (IndVar->getType()->isPointerTy() && 990 !ExitCount->getType()->isPointerTy()) { 991 // IVOffset will be the new GEP offset that is interpreted by GEP as a 992 // signed value. ExitCount on the other hand represents the loop trip count, 993 // which is an unsigned value. FindLoopCounter only allows induction 994 // variables that have a positive unit stride of one. This means we don't 995 // have to handle the case of negative offsets (yet) and just need to zero 996 // extend ExitCount. 997 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 998 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 999 if (UsePostInc) 1000 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1001 1002 // Expand the code for the iteration count. 1003 assert(SE->isLoopInvariant(IVOffset, L) && 1004 "Computed iteration count is not loop invariant!"); 1005 1006 // We could handle pointer IVs other than i8*, but we need to compensate for 1007 // gep index scaling. 1008 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1009 cast<PointerType>(IndVar->getType()) 1010 ->getElementType())->isOne() && 1011 "unit stride pointer IV must be i8*"); 1012 1013 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1014 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1015 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1016 } else { 1017 // In any other case, convert both IVInit and ExitCount to integers before 1018 // comparing. This may result in SCEV expansion of pointers, but in practice 1019 // SCEV will fold the pointer arithmetic away as such: 1020 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1021 // 1022 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1023 // for simple memset-style loops. 1024 // 1025 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1026 // were generated on top of case #2, which is not expected. 1027 1028 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1029 // For unit stride, IVCount = Start + ExitCount with 2's complement 1030 // overflow. 1031 1032 // For integer IVs, truncate the IV before computing IVInit + BECount, 1033 // unless we know apriori that the limit must be a constant when evaluated 1034 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1035 // of the IV in the loop over a (potentially) expensive expansion of the 1036 // widened exit count add(zext(add)) expression. 1037 if (SE->getTypeSizeInBits(IVInit->getType()) 1038 > SE->getTypeSizeInBits(ExitCount->getType())) { 1039 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1040 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1041 else 1042 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1043 } 1044 1045 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1046 1047 if (UsePostInc) 1048 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1049 1050 // Expand the code for the iteration count. 1051 assert(SE->isLoopInvariant(IVLimit, L) && 1052 "Computed iteration count is not loop invariant!"); 1053 // Ensure that we generate the same type as IndVar, or a smaller integer 1054 // type. In the presence of null pointer values, we have an integer type 1055 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1056 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1057 IndVar->getType() : ExitCount->getType(); 1058 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1059 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1060 } 1061 } 1062 1063 /// This method rewrites the exit condition of the loop to be a canonical != 1064 /// comparison against the incremented loop induction variable. This pass is 1065 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1066 /// determine a loop-invariant trip count of the loop, which is actually a much 1067 /// broader range than just linear tests. 1068 bool IndVarSimplify:: 1069 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1070 const SCEV *ExitCount, 1071 PHINode *IndVar, SCEVExpander &Rewriter) { 1072 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1073 assert(isLoopCounter(IndVar, L, SE)); 1074 Instruction * const IncVar = 1075 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1076 1077 // Initialize CmpIndVar to the preincremented IV. 1078 Value *CmpIndVar = IndVar; 1079 bool UsePostInc = false; 1080 1081 // If the exiting block is the same as the backedge block, we prefer to 1082 // compare against the post-incremented value, otherwise we must compare 1083 // against the preincremented value. 1084 if (ExitingBB == L->getLoopLatch()) { 1085 // For pointer IVs, we chose to not strip inbounds which requires us not 1086 // to add a potentially UB introducing use. We need to either a) show 1087 // the loop test we're modifying is already in post-inc form, or b) show 1088 // that adding a use must not introduce UB. 1089 bool SafeToPostInc = 1090 IndVar->getType()->isIntegerTy() || 1091 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1092 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1093 if (SafeToPostInc) { 1094 UsePostInc = true; 1095 CmpIndVar = IncVar; 1096 } 1097 } 1098 1099 // It may be necessary to drop nowrap flags on the incrementing instruction 1100 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1101 // case the increment might have previously been poison on the last iteration 1102 // only) or if LFTR switches to a different IV that was previously dynamically 1103 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1104 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1105 // check), because the pre-inc addrec flags may be adopted from the original 1106 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1107 // TODO: This handling is inaccurate for one case: If we switch to a 1108 // dynamically dead IV that wraps on the first loop iteration only, which is 1109 // not covered by the post-inc addrec. (If the new IV was not dynamically 1110 // dead, it could not be poison on the first iteration in the first place.) 1111 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1112 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1113 if (BO->hasNoUnsignedWrap()) 1114 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1115 if (BO->hasNoSignedWrap()) 1116 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1117 } 1118 1119 Value *ExitCnt = genLoopLimit( 1120 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1121 assert(ExitCnt->getType()->isPointerTy() == 1122 IndVar->getType()->isPointerTy() && 1123 "genLoopLimit missed a cast"); 1124 1125 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1126 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1127 ICmpInst::Predicate P; 1128 if (L->contains(BI->getSuccessor(0))) 1129 P = ICmpInst::ICMP_NE; 1130 else 1131 P = ICmpInst::ICMP_EQ; 1132 1133 IRBuilder<> Builder(BI); 1134 1135 // The new loop exit condition should reuse the debug location of the 1136 // original loop exit condition. 1137 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1138 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1139 1140 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1141 // avoid the expensive expansion of the limit expression in the wider type, 1142 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1143 // since we know (from the exit count bitwidth), that we can't self-wrap in 1144 // the narrower type. 1145 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1146 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1147 if (CmpIndVarSize > ExitCntSize) { 1148 assert(!CmpIndVar->getType()->isPointerTy() && 1149 !ExitCnt->getType()->isPointerTy()); 1150 1151 // Before resorting to actually inserting the truncate, use the same 1152 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1153 // the other side of the comparison instead. We still evaluate the limit 1154 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1155 // a truncate within in. 1156 bool Extended = false; 1157 const SCEV *IV = SE->getSCEV(CmpIndVar); 1158 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1159 ExitCnt->getType()); 1160 const SCEV *ZExtTrunc = 1161 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1162 1163 if (ZExtTrunc == IV) { 1164 Extended = true; 1165 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1166 "wide.trip.count"); 1167 } else { 1168 const SCEV *SExtTrunc = 1169 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1170 if (SExtTrunc == IV) { 1171 Extended = true; 1172 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1173 "wide.trip.count"); 1174 } 1175 } 1176 1177 if (Extended) { 1178 bool Discard; 1179 L->makeLoopInvariant(ExitCnt, Discard); 1180 } else 1181 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1182 "lftr.wideiv"); 1183 } 1184 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1185 << " LHS:" << *CmpIndVar << '\n' 1186 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1187 << "\n" 1188 << " RHS:\t" << *ExitCnt << "\n" 1189 << "ExitCount:\t" << *ExitCount << "\n" 1190 << " was: " << *BI->getCondition() << "\n"); 1191 1192 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1193 Value *OrigCond = BI->getCondition(); 1194 // It's tempting to use replaceAllUsesWith here to fully replace the old 1195 // comparison, but that's not immediately safe, since users of the old 1196 // comparison may not be dominated by the new comparison. Instead, just 1197 // update the branch to use the new comparison; in the common case this 1198 // will make old comparison dead. 1199 BI->setCondition(Cond); 1200 DeadInsts.emplace_back(OrigCond); 1201 1202 ++NumLFTR; 1203 return true; 1204 } 1205 1206 //===----------------------------------------------------------------------===// 1207 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1208 //===----------------------------------------------------------------------===// 1209 1210 /// If there's a single exit block, sink any loop-invariant values that 1211 /// were defined in the preheader but not used inside the loop into the 1212 /// exit block to reduce register pressure in the loop. 1213 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1214 BasicBlock *ExitBlock = L->getExitBlock(); 1215 if (!ExitBlock) return false; 1216 1217 BasicBlock *Preheader = L->getLoopPreheader(); 1218 if (!Preheader) return false; 1219 1220 bool MadeAnyChanges = false; 1221 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1222 BasicBlock::iterator I(Preheader->getTerminator()); 1223 while (I != Preheader->begin()) { 1224 --I; 1225 // New instructions were inserted at the end of the preheader. 1226 if (isa<PHINode>(I)) 1227 break; 1228 1229 // Don't move instructions which might have side effects, since the side 1230 // effects need to complete before instructions inside the loop. Also don't 1231 // move instructions which might read memory, since the loop may modify 1232 // memory. Note that it's okay if the instruction might have undefined 1233 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1234 // block. 1235 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1236 continue; 1237 1238 // Skip debug info intrinsics. 1239 if (isa<DbgInfoIntrinsic>(I)) 1240 continue; 1241 1242 // Skip eh pad instructions. 1243 if (I->isEHPad()) 1244 continue; 1245 1246 // Don't sink alloca: we never want to sink static alloca's out of the 1247 // entry block, and correctly sinking dynamic alloca's requires 1248 // checks for stacksave/stackrestore intrinsics. 1249 // FIXME: Refactor this check somehow? 1250 if (isa<AllocaInst>(I)) 1251 continue; 1252 1253 // Determine if there is a use in or before the loop (direct or 1254 // otherwise). 1255 bool UsedInLoop = false; 1256 for (Use &U : I->uses()) { 1257 Instruction *User = cast<Instruction>(U.getUser()); 1258 BasicBlock *UseBB = User->getParent(); 1259 if (PHINode *P = dyn_cast<PHINode>(User)) { 1260 unsigned i = 1261 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1262 UseBB = P->getIncomingBlock(i); 1263 } 1264 if (UseBB == Preheader || L->contains(UseBB)) { 1265 UsedInLoop = true; 1266 break; 1267 } 1268 } 1269 1270 // If there is, the def must remain in the preheader. 1271 if (UsedInLoop) 1272 continue; 1273 1274 // Otherwise, sink it to the exit block. 1275 Instruction *ToMove = &*I; 1276 bool Done = false; 1277 1278 if (I != Preheader->begin()) { 1279 // Skip debug info intrinsics. 1280 do { 1281 --I; 1282 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1283 1284 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1285 Done = true; 1286 } else { 1287 Done = true; 1288 } 1289 1290 MadeAnyChanges = true; 1291 ToMove->moveBefore(*ExitBlock, InsertPt); 1292 if (Done) break; 1293 InsertPt = ToMove->getIterator(); 1294 } 1295 1296 return MadeAnyChanges; 1297 } 1298 1299 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1300 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1301 auto *OldCond = BI->getCondition(); 1302 BI->setCondition(NewCond); 1303 if (OldCond->use_empty()) 1304 DeadInsts.emplace_back(OldCond); 1305 } 1306 1307 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1308 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1309 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1310 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1311 auto *OldCond = BI->getCondition(); 1312 auto *NewCond = 1313 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1314 replaceExitCond(BI, NewCond, DeadInsts); 1315 } 1316 1317 static void replaceLoopPHINodesWithPreheaderValues( 1318 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1319 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1320 auto *LoopPreheader = L->getLoopPreheader(); 1321 auto *LoopHeader = L->getHeader(); 1322 for (auto &PN : LoopHeader->phis()) { 1323 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1324 PN.replaceAllUsesWith(PreheaderIncoming); 1325 DeadInsts.emplace_back(&PN); 1326 } 1327 } 1328 1329 static void replaceWithInvariantCond( 1330 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1331 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1332 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1333 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1334 Rewriter.setInsertPoint(BI); 1335 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1336 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1337 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1338 if (ExitIfTrue) 1339 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1340 IRBuilder<> Builder(BI); 1341 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1342 BI->getCondition()->getName()); 1343 replaceExitCond(BI, NewCond, DeadInsts); 1344 } 1345 1346 static bool optimizeLoopExitWithUnknownExitCount( 1347 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1348 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1349 ScalarEvolution *SE, SCEVExpander &Rewriter, 1350 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1351 ICmpInst::Predicate Pred; 1352 Value *LHS, *RHS; 1353 BasicBlock *TrueSucc, *FalseSucc; 1354 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1355 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1356 return false; 1357 1358 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1359 "Not a loop exit!"); 1360 1361 // 'LHS pred RHS' should now mean that we stay in loop. 1362 if (L->contains(FalseSucc)) 1363 Pred = CmpInst::getInversePredicate(Pred); 1364 1365 // If we are proving loop exit, invert the predicate. 1366 if (Inverted) 1367 Pred = CmpInst::getInversePredicate(Pred); 1368 1369 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1370 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1371 // Can we prove it to be trivially true? 1372 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1373 foldExit(L, ExitingBB, Inverted, DeadInsts); 1374 return true; 1375 } 1376 // Further logic works for non-inverted condition only. 1377 if (Inverted) 1378 return false; 1379 1380 auto *ARTy = LHSS->getType(); 1381 auto *MaxIterTy = MaxIter->getType(); 1382 // If possible, adjust types. 1383 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1384 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1385 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1386 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1387 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1388 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1389 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1390 } 1391 1392 if (SkipLastIter) { 1393 const SCEV *One = SE->getOne(MaxIter->getType()); 1394 MaxIter = SE->getMinusSCEV(MaxIter, One); 1395 } 1396 1397 // Check if there is a loop-invariant predicate equivalent to our check. 1398 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1399 L, BI, MaxIter); 1400 if (!LIP) 1401 return false; 1402 1403 // Can we prove it to be trivially true? 1404 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1405 foldExit(L, ExitingBB, Inverted, DeadInsts); 1406 else 1407 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1408 Rewriter, DeadInsts); 1409 1410 return true; 1411 } 1412 1413 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1414 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1415 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1416 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1417 // it already has flags. The alternative to this would be to extending the 1418 // set of "interesting" IV users to include the icmp, but doing that 1419 // regresses results in practice by querying SCEVs before trip counts which 1420 // rely on them which results in SCEV caching sub-optimal answers. The 1421 // concern about caching sub-optimal results is why we only query SCEVs of 1422 // the loop invariant RHS here. 1423 1424 auto *ExitingBB = L->getExitingBlock(); 1425 if (!ExitingBB) 1426 return false; 1427 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1428 if (!BI) 1429 return false; 1430 assert(BI->isConditional() && "exit branch must be conditional"); 1431 1432 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1433 if (!ICmp) 1434 return false; 1435 1436 auto *LHS = ICmp->getOperand(0); 1437 auto *RHS = ICmp->getOperand(1); 1438 // Avoid computing SCEVs in the loop to avoid poisoning cache with 1439 // sub-optimal results. 1440 if (!L->isLoopInvariant(RHS)) 1441 return false; 1442 1443 // Match (icmp signed-cond zext, RHS) 1444 Value *LHSOp = nullptr; 1445 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1446 return false; 1447 1448 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1449 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1450 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1451 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1452 FullCR = FullCR.zeroExtend(OuterBitWidth); 1453 if (!FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) 1454 return false; 1455 1456 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1457 // replace the signed condition with the unsigned version. 1458 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1459 return true; 1460 } 1461 1462 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1463 SmallVector<BasicBlock*, 16> ExitingBlocks; 1464 L->getExitingBlocks(ExitingBlocks); 1465 1466 // Remove all exits which aren't both rewriteable and execute on every 1467 // iteration. 1468 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1469 // If our exitting block exits multiple loops, we can only rewrite the 1470 // innermost one. Otherwise, we're changing how many times the innermost 1471 // loop runs before it exits. 1472 if (LI->getLoopFor(ExitingBB) != L) 1473 return true; 1474 1475 // Can't rewrite non-branch yet. 1476 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1477 if (!BI) 1478 return true; 1479 1480 // If already constant, nothing to do. 1481 if (isa<Constant>(BI->getCondition())) 1482 return true; 1483 1484 // Likewise, the loop latch must be dominated by the exiting BB. 1485 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1486 return true; 1487 1488 return false; 1489 }); 1490 1491 if (ExitingBlocks.empty()) 1492 return false; 1493 1494 // Get a symbolic upper bound on the loop backedge taken count. 1495 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1496 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1497 return false; 1498 1499 // Visit our exit blocks in order of dominance. We know from the fact that 1500 // all exits must dominate the latch, so there is a total dominance order 1501 // between them. 1502 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1503 // std::sort sorts in ascending order, so we want the inverse of 1504 // the normal dominance relation. 1505 if (A == B) return false; 1506 if (DT->properlyDominates(A, B)) 1507 return true; 1508 else { 1509 assert(DT->properlyDominates(B, A) && 1510 "expected total dominance order!"); 1511 return false; 1512 } 1513 }); 1514 #ifdef ASSERT 1515 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1516 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1517 } 1518 #endif 1519 1520 bool Changed = false; 1521 bool SkipLastIter = false; 1522 SmallSet<const SCEV*, 8> DominatingExitCounts; 1523 for (BasicBlock *ExitingBB : ExitingBlocks) { 1524 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1525 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1526 // Okay, we do not know the exit count here. Can we at least prove that it 1527 // will remain the same within iteration space? 1528 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1529 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1530 return optimizeLoopExitWithUnknownExitCount( 1531 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1532 Rewriter, DeadInsts); 1533 }; 1534 1535 // TODO: We might have proved that we can skip the last iteration for 1536 // this check. In this case, we only want to check the condition on the 1537 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1538 // corner case: 1539 // 1540 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1541 // 1542 // If we could not prove that len != 0, then we also could not prove that 1543 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1544 // OptimizeCond will likely not prove anything for it, even if it could 1545 // prove the same fact for len. 1546 // 1547 // As a temporary solution, we query both last and pre-last iterations in 1548 // hope that we will be able to prove triviality for at least one of 1549 // them. We can stop querying MaxExitCount for this case once SCEV 1550 // understands that (MaxExitCount - 1) will not overflow here. 1551 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1552 Changed = true; 1553 else if (SkipLastIter) 1554 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1555 Changed = true; 1556 continue; 1557 } 1558 1559 if (MaxExitCount == ExitCount) 1560 // If the loop has more than 1 iteration, all further checks will be 1561 // executed 1 iteration less. 1562 SkipLastIter = true; 1563 1564 // If we know we'd exit on the first iteration, rewrite the exit to 1565 // reflect this. This does not imply the loop must exit through this 1566 // exit; there may be an earlier one taken on the first iteration. 1567 // We know that the backedge can't be taken, so we replace all 1568 // the header PHIs with values coming from the preheader. 1569 if (ExitCount->isZero()) { 1570 foldExit(L, ExitingBB, true, DeadInsts); 1571 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1572 Changed = true; 1573 continue; 1574 } 1575 1576 assert(ExitCount->getType()->isIntegerTy() && 1577 MaxExitCount->getType()->isIntegerTy() && 1578 "Exit counts must be integers"); 1579 1580 Type *WiderType = 1581 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1582 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1583 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1584 assert(MaxExitCount->getType() == ExitCount->getType()); 1585 1586 // Can we prove that some other exit must be taken strictly before this 1587 // one? 1588 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1589 MaxExitCount, ExitCount)) { 1590 foldExit(L, ExitingBB, false, DeadInsts); 1591 Changed = true; 1592 continue; 1593 } 1594 1595 // As we run, keep track of which exit counts we've encountered. If we 1596 // find a duplicate, we've found an exit which would have exited on the 1597 // exiting iteration, but (from the visit order) strictly follows another 1598 // which does the same and is thus dead. 1599 if (!DominatingExitCounts.insert(ExitCount).second) { 1600 foldExit(L, ExitingBB, false, DeadInsts); 1601 Changed = true; 1602 continue; 1603 } 1604 1605 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1606 // here. If we kept track of the min of dominanting exits so far, we could 1607 // discharge exits with EC >= MDEC. This is less powerful than the existing 1608 // transform (since later exits aren't considered), but potentially more 1609 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1610 // or a >u b. Such a case is not currently known. 1611 } 1612 return Changed; 1613 } 1614 1615 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1616 SmallVector<BasicBlock*, 16> ExitingBlocks; 1617 L->getExitingBlocks(ExitingBlocks); 1618 1619 // Finally, see if we can rewrite our exit conditions into a loop invariant 1620 // form. If we have a read-only loop, and we can tell that we must exit down 1621 // a path which does not need any of the values computed within the loop, we 1622 // can rewrite the loop to exit on the first iteration. Note that this 1623 // doesn't either a) tell us the loop exits on the first iteration (unless 1624 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1625 // This transformation looks a lot like a restricted form of dead loop 1626 // elimination, but restricted to read-only loops and without neccesssarily 1627 // needing to kill the loop entirely. 1628 if (!LoopPredication) 1629 return false; 1630 1631 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1632 // through *explicit* control flow. We have to eliminate the possibility of 1633 // implicit exits (see below) before we know it's truly exact. 1634 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1635 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1636 return false; 1637 1638 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1639 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1640 1641 auto BadExit = [&](BasicBlock *ExitingBB) { 1642 // If our exiting block exits multiple loops, we can only rewrite the 1643 // innermost one. Otherwise, we're changing how many times the innermost 1644 // loop runs before it exits. 1645 if (LI->getLoopFor(ExitingBB) != L) 1646 return true; 1647 1648 // Can't rewrite non-branch yet. 1649 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1650 if (!BI) 1651 return true; 1652 1653 // If already constant, nothing to do. 1654 if (isa<Constant>(BI->getCondition())) 1655 return true; 1656 1657 // If the exit block has phis, we need to be able to compute the values 1658 // within the loop which contains them. This assumes trivially lcssa phis 1659 // have already been removed; TODO: generalize 1660 BasicBlock *ExitBlock = 1661 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1662 if (!ExitBlock->phis().empty()) 1663 return true; 1664 1665 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1666 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1667 return true; 1668 1669 assert(SE->isLoopInvariant(ExitCount, L) && 1670 "Exit count must be loop invariant"); 1671 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1672 return false; 1673 }; 1674 1675 // If we have any exits which can't be predicated themselves, than we can't 1676 // predicate any exit which isn't guaranteed to execute before it. Consider 1677 // two exits (a) and (b) which would both exit on the same iteration. If we 1678 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1679 // we could convert a loop from exiting through (a) to one exiting through 1680 // (b). Note that this problem exists only for exits with the same exit 1681 // count, and we could be more aggressive when exit counts are known inequal. 1682 llvm::sort(ExitingBlocks, 1683 [&](BasicBlock *A, BasicBlock *B) { 1684 // std::sort sorts in ascending order, so we want the inverse of 1685 // the normal dominance relation, plus a tie breaker for blocks 1686 // unordered by dominance. 1687 if (DT->properlyDominates(A, B)) return true; 1688 if (DT->properlyDominates(B, A)) return false; 1689 return A->getName() < B->getName(); 1690 }); 1691 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1692 // exits before the backedge). If they aren't, reasoning about reachability 1693 // is complicated and we choose not to for now. 1694 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1695 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1696 return false; 1697 1698 // Given our sorted total order, we know that exit[j] must be evaluated 1699 // after all exit[i] such j > i. 1700 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1701 if (BadExit(ExitingBlocks[i])) { 1702 ExitingBlocks.resize(i); 1703 break; 1704 } 1705 1706 if (ExitingBlocks.empty()) 1707 return false; 1708 1709 // We rely on not being able to reach an exiting block on a later iteration 1710 // then it's statically compute exit count. The implementaton of 1711 // getExitCount currently has this invariant, but assert it here so that 1712 // breakage is obvious if this ever changes.. 1713 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1714 return DT->dominates(ExitingBB, L->getLoopLatch()); 1715 })); 1716 1717 // At this point, ExitingBlocks consists of only those blocks which are 1718 // predicatable. Given that, we know we have at least one exit we can 1719 // predicate if the loop is doesn't have side effects and doesn't have any 1720 // implicit exits (because then our exact BTC isn't actually exact). 1721 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1722 // suggestions on how to improve this? I can obviously bail out for outer 1723 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1724 // is that enough for *all* side effects? 1725 for (BasicBlock *BB : L->blocks()) 1726 for (auto &I : *BB) 1727 // TODO:isGuaranteedToTransfer 1728 if (I.mayHaveSideEffects()) 1729 return false; 1730 1731 bool Changed = false; 1732 // Finally, do the actual predication for all predicatable blocks. A couple 1733 // of notes here: 1734 // 1) We don't bother to constant fold dominated exits with identical exit 1735 // counts; that's simply a form of CSE/equality propagation and we leave 1736 // it for dedicated passes. 1737 // 2) We insert the comparison at the branch. Hoisting introduces additional 1738 // legality constraints and we leave that to dedicated logic. We want to 1739 // predicate even if we can't insert a loop invariant expression as 1740 // peeling or unrolling will likely reduce the cost of the otherwise loop 1741 // varying check. 1742 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1743 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1744 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1745 for (BasicBlock *ExitingBB : ExitingBlocks) { 1746 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1747 1748 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1749 Value *NewCond; 1750 if (ExitCount == ExactBTC) { 1751 NewCond = L->contains(BI->getSuccessor(0)) ? 1752 B.getFalse() : B.getTrue(); 1753 } else { 1754 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1755 if (!ExactBTCV) 1756 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1757 Value *RHS = ExactBTCV; 1758 if (ECV->getType() != RHS->getType()) { 1759 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1760 ECV = B.CreateZExt(ECV, WiderTy); 1761 RHS = B.CreateZExt(RHS, WiderTy); 1762 } 1763 auto Pred = L->contains(BI->getSuccessor(0)) ? 1764 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1765 NewCond = B.CreateICmp(Pred, ECV, RHS); 1766 } 1767 Value *OldCond = BI->getCondition(); 1768 BI->setCondition(NewCond); 1769 if (OldCond->use_empty()) 1770 DeadInsts.emplace_back(OldCond); 1771 Changed = true; 1772 } 1773 1774 return Changed; 1775 } 1776 1777 //===----------------------------------------------------------------------===// 1778 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1779 //===----------------------------------------------------------------------===// 1780 1781 bool IndVarSimplify::run(Loop *L) { 1782 // We need (and expect!) the incoming loop to be in LCSSA. 1783 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1784 "LCSSA required to run indvars!"); 1785 1786 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1787 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1788 // canonicalization can be a pessimization without LSR to "clean up" 1789 // afterwards. 1790 // - We depend on having a preheader; in particular, 1791 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1792 // and we're in trouble if we can't find the induction variable even when 1793 // we've manually inserted one. 1794 // - LFTR relies on having a single backedge. 1795 if (!L->isLoopSimplifyForm()) 1796 return false; 1797 1798 #ifndef NDEBUG 1799 // Used below for a consistency check only 1800 // Note: Since the result returned by ScalarEvolution may depend on the order 1801 // in which previous results are added to its cache, the call to 1802 // getBackedgeTakenCount() may change following SCEV queries. 1803 const SCEV *BackedgeTakenCount; 1804 if (VerifyIndvars) 1805 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1806 #endif 1807 1808 bool Changed = false; 1809 // If there are any floating-point recurrences, attempt to 1810 // transform them to use integer recurrences. 1811 Changed |= rewriteNonIntegerIVs(L); 1812 1813 // Create a rewriter object which we'll use to transform the code with. 1814 SCEVExpander Rewriter(*SE, DL, "indvars"); 1815 #ifndef NDEBUG 1816 Rewriter.setDebugType(DEBUG_TYPE); 1817 #endif 1818 1819 // Eliminate redundant IV users. 1820 // 1821 // Simplification works best when run before other consumers of SCEV. We 1822 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1823 // other expressions involving loop IVs have been evaluated. This helps SCEV 1824 // set no-wrap flags before normalizing sign/zero extension. 1825 Rewriter.disableCanonicalMode(); 1826 Changed |= simplifyAndExtend(L, Rewriter, LI); 1827 1828 // Check to see if we can compute the final value of any expressions 1829 // that are recurrent in the loop, and substitute the exit values from the 1830 // loop into any instructions outside of the loop that use the final values 1831 // of the current expressions. 1832 if (ReplaceExitValue != NeverRepl) { 1833 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1834 ReplaceExitValue, DeadInsts)) { 1835 NumReplaced += Rewrites; 1836 Changed = true; 1837 } 1838 } 1839 1840 // Eliminate redundant IV cycles. 1841 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 1842 1843 if (canonicalizeExitCondition(L)) 1844 // We've changed the predicate, but have not changed exit counts, or the 1845 // values which can flow through any SCEV. i.e, no invalidation needed. 1846 Changed = true; 1847 1848 // Try to eliminate loop exits based on analyzeable exit counts 1849 if (optimizeLoopExits(L, Rewriter)) { 1850 Changed = true; 1851 // Given we've changed exit counts, notify SCEV 1852 // Some nested loops may share same folded exit basic block, 1853 // thus we need to notify top most loop. 1854 SE->forgetTopmostLoop(L); 1855 } 1856 1857 // Try to form loop invariant tests for loop exits by changing how many 1858 // iterations of the loop run when that is unobservable. 1859 if (predicateLoopExits(L, Rewriter)) { 1860 Changed = true; 1861 // Given we've changed exit counts, notify SCEV 1862 SE->forgetLoop(L); 1863 } 1864 1865 // If we have a trip count expression, rewrite the loop's exit condition 1866 // using it. 1867 if (!DisableLFTR) { 1868 BasicBlock *PreHeader = L->getLoopPreheader(); 1869 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 1870 1871 SmallVector<BasicBlock*, 16> ExitingBlocks; 1872 L->getExitingBlocks(ExitingBlocks); 1873 for (BasicBlock *ExitingBB : ExitingBlocks) { 1874 // Can't rewrite non-branch yet. 1875 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1876 continue; 1877 1878 // If our exitting block exits multiple loops, we can only rewrite the 1879 // innermost one. Otherwise, we're changing how many times the innermost 1880 // loop runs before it exits. 1881 if (LI->getLoopFor(ExitingBB) != L) 1882 continue; 1883 1884 if (!needsLFTR(L, ExitingBB)) 1885 continue; 1886 1887 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1888 if (isa<SCEVCouldNotCompute>(ExitCount)) 1889 continue; 1890 1891 // This was handled above, but as we form SCEVs, we can sometimes refine 1892 // existing ones; this allows exit counts to be folded to zero which 1893 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1894 // until stable to handle cases like this better. 1895 if (ExitCount->isZero()) 1896 continue; 1897 1898 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1899 if (!IndVar) 1900 continue; 1901 1902 // Avoid high cost expansions. Note: This heuristic is questionable in 1903 // that our definition of "high cost" is not exactly principled. 1904 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1905 TTI, PreHeaderBR)) 1906 continue; 1907 1908 // Check preconditions for proper SCEVExpander operation. SCEV does not 1909 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1910 // any pass that uses the SCEVExpander must do it. This does not work 1911 // well for loop passes because SCEVExpander makes assumptions about 1912 // all loops, while LoopPassManager only forces the current loop to be 1913 // simplified. 1914 // 1915 // FIXME: SCEV expansion has no way to bail out, so the caller must 1916 // explicitly check any assumptions made by SCEV. Brittle. 1917 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1918 if (!AR || AR->getLoop()->getLoopPreheader()) 1919 Changed |= linearFunctionTestReplace(L, ExitingBB, 1920 ExitCount, IndVar, 1921 Rewriter); 1922 } 1923 } 1924 // Clear the rewriter cache, because values that are in the rewriter's cache 1925 // can be deleted in the loop below, causing the AssertingVH in the cache to 1926 // trigger. 1927 Rewriter.clear(); 1928 1929 // Now that we're done iterating through lists, clean up any instructions 1930 // which are now dead. 1931 while (!DeadInsts.empty()) { 1932 Value *V = DeadInsts.pop_back_val(); 1933 1934 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 1935 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 1936 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 1937 Changed |= 1938 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 1939 } 1940 1941 // The Rewriter may not be used from this point on. 1942 1943 // Loop-invariant instructions in the preheader that aren't used in the 1944 // loop may be sunk below the loop to reduce register pressure. 1945 Changed |= sinkUnusedInvariants(L); 1946 1947 // rewriteFirstIterationLoopExitValues does not rely on the computation of 1948 // trip count and therefore can further simplify exit values in addition to 1949 // rewriteLoopExitValues. 1950 Changed |= rewriteFirstIterationLoopExitValues(L); 1951 1952 // Clean up dead instructions. 1953 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 1954 1955 // Check a post-condition. 1956 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1957 "Indvars did not preserve LCSSA!"); 1958 1959 // Verify that LFTR, and any other change have not interfered with SCEV's 1960 // ability to compute trip count. We may have *changed* the exit count, but 1961 // only by reducing it. 1962 #ifndef NDEBUG 1963 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 1964 SE->forgetLoop(L); 1965 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 1966 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 1967 SE->getTypeSizeInBits(NewBECount->getType())) 1968 NewBECount = SE->getTruncateOrNoop(NewBECount, 1969 BackedgeTakenCount->getType()); 1970 else 1971 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 1972 NewBECount->getType()); 1973 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 1974 NewBECount) && "indvars must preserve SCEV"); 1975 } 1976 if (VerifyMemorySSA && MSSAU) 1977 MSSAU->getMemorySSA()->verifyMemorySSA(); 1978 #endif 1979 1980 return Changed; 1981 } 1982 1983 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 1984 LoopStandardAnalysisResults &AR, 1985 LPMUpdater &) { 1986 Function *F = L.getHeader()->getParent(); 1987 const DataLayout &DL = F->getParent()->getDataLayout(); 1988 1989 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 1990 WidenIndVars && AllowIVWidening); 1991 if (!IVS.run(&L)) 1992 return PreservedAnalyses::all(); 1993 1994 auto PA = getLoopPassPreservedAnalyses(); 1995 PA.preserveSet<CFGAnalyses>(); 1996 if (AR.MSSA) 1997 PA.preserve<MemorySSAAnalysis>(); 1998 return PA; 1999 } 2000 2001 namespace { 2002 2003 struct IndVarSimplifyLegacyPass : public LoopPass { 2004 static char ID; // Pass identification, replacement for typeid 2005 2006 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2007 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2008 } 2009 2010 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2011 if (skipLoop(L)) 2012 return false; 2013 2014 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2015 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2016 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2017 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2018 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2019 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2020 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2021 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2022 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2023 MemorySSA *MSSA = nullptr; 2024 if (MSSAAnalysis) 2025 MSSA = &MSSAAnalysis->getMSSA(); 2026 2027 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2028 return IVS.run(L); 2029 } 2030 2031 void getAnalysisUsage(AnalysisUsage &AU) const override { 2032 AU.setPreservesCFG(); 2033 AU.addPreserved<MemorySSAWrapperPass>(); 2034 getLoopAnalysisUsage(AU); 2035 } 2036 }; 2037 2038 } // end anonymous namespace 2039 2040 char IndVarSimplifyLegacyPass::ID = 0; 2041 2042 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2043 "Induction Variable Simplification", false, false) 2044 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2045 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2046 "Induction Variable Simplification", false, false) 2047 2048 Pass *llvm::createIndVarSimplifyPass() { 2049 return new IndVarSimplifyLegacyPass(); 2050 } 2051