1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 struct RewritePhi; 141 142 class IndVarSimplify { 143 LoopInfo *LI; 144 ScalarEvolution *SE; 145 DominatorTree *DT; 146 const DataLayout &DL; 147 TargetLibraryInfo *TLI; 148 const TargetTransformInfo *TTI; 149 std::unique_ptr<MemorySSAUpdater> MSSAU; 150 151 SmallVector<WeakTrackingVH, 16> DeadInsts; 152 bool WidenIndVars; 153 154 bool handleFloatingPointIV(Loop *L, PHINode *PH); 155 bool rewriteNonIntegerIVs(Loop *L); 156 157 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 158 /// Try to eliminate loop exits based on analyzeable exit counts 159 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 160 /// Try to form loop invariant tests for loop exits by changing how many 161 /// iterations of the loop run when that is unobservable. 162 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 163 164 bool rewriteFirstIterationLoopExitValues(Loop *L); 165 166 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 167 const SCEV *ExitCount, 168 PHINode *IndVar, SCEVExpander &Rewriter); 169 170 bool sinkUnusedInvariants(Loop *L); 171 172 public: 173 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 174 const DataLayout &DL, TargetLibraryInfo *TLI, 175 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 176 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 177 WidenIndVars(WidenIndVars) { 178 if (MSSA) 179 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 180 } 181 182 bool run(Loop *L); 183 }; 184 185 } // end anonymous namespace 186 187 //===----------------------------------------------------------------------===// 188 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 189 //===----------------------------------------------------------------------===// 190 191 /// Convert APF to an integer, if possible. 192 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 193 bool isExact = false; 194 // See if we can convert this to an int64_t 195 uint64_t UIntVal; 196 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 197 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 198 !isExact) 199 return false; 200 IntVal = UIntVal; 201 return true; 202 } 203 204 /// If the loop has floating induction variable then insert corresponding 205 /// integer induction variable if possible. 206 /// For example, 207 /// for(double i = 0; i < 10000; ++i) 208 /// bar(i) 209 /// is converted into 210 /// for(int i = 0; i < 10000; ++i) 211 /// bar((double)i); 212 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 213 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 214 unsigned BackEdge = IncomingEdge^1; 215 216 // Check incoming value. 217 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 218 219 int64_t InitValue; 220 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 221 return false; 222 223 // Check IV increment. Reject this PN if increment operation is not 224 // an add or increment value can not be represented by an integer. 225 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 226 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 227 228 // If this is not an add of the PHI with a constantfp, or if the constant fp 229 // is not an integer, bail out. 230 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 231 int64_t IncValue; 232 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 233 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 234 return false; 235 236 // Check Incr uses. One user is PN and the other user is an exit condition 237 // used by the conditional terminator. 238 Value::user_iterator IncrUse = Incr->user_begin(); 239 Instruction *U1 = cast<Instruction>(*IncrUse++); 240 if (IncrUse == Incr->user_end()) return false; 241 Instruction *U2 = cast<Instruction>(*IncrUse++); 242 if (IncrUse != Incr->user_end()) return false; 243 244 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 245 // only used by a branch, we can't transform it. 246 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 247 if (!Compare) 248 Compare = dyn_cast<FCmpInst>(U2); 249 if (!Compare || !Compare->hasOneUse() || 250 !isa<BranchInst>(Compare->user_back())) 251 return false; 252 253 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 254 255 // We need to verify that the branch actually controls the iteration count 256 // of the loop. If not, the new IV can overflow and no one will notice. 257 // The branch block must be in the loop and one of the successors must be out 258 // of the loop. 259 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 260 if (!L->contains(TheBr->getParent()) || 261 (L->contains(TheBr->getSuccessor(0)) && 262 L->contains(TheBr->getSuccessor(1)))) 263 return false; 264 265 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 266 // transform it. 267 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 268 int64_t ExitValue; 269 if (ExitValueVal == nullptr || 270 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 271 return false; 272 273 // Find new predicate for integer comparison. 274 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 275 switch (Compare->getPredicate()) { 276 default: return false; // Unknown comparison. 277 case CmpInst::FCMP_OEQ: 278 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 279 case CmpInst::FCMP_ONE: 280 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 281 case CmpInst::FCMP_OGT: 282 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 283 case CmpInst::FCMP_OGE: 284 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 285 case CmpInst::FCMP_OLT: 286 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 287 case CmpInst::FCMP_OLE: 288 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 289 } 290 291 // We convert the floating point induction variable to a signed i32 value if 292 // we can. This is only safe if the comparison will not overflow in a way 293 // that won't be trapped by the integer equivalent operations. Check for this 294 // now. 295 // TODO: We could use i64 if it is native and the range requires it. 296 297 // The start/stride/exit values must all fit in signed i32. 298 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 299 return false; 300 301 // If not actually striding (add x, 0.0), avoid touching the code. 302 if (IncValue == 0) 303 return false; 304 305 // Positive and negative strides have different safety conditions. 306 if (IncValue > 0) { 307 // If we have a positive stride, we require the init to be less than the 308 // exit value. 309 if (InitValue >= ExitValue) 310 return false; 311 312 uint32_t Range = uint32_t(ExitValue-InitValue); 313 // Check for infinite loop, either: 314 // while (i <= Exit) or until (i > Exit) 315 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 316 if (++Range == 0) return false; // Range overflows. 317 } 318 319 unsigned Leftover = Range % uint32_t(IncValue); 320 321 // If this is an equality comparison, we require that the strided value 322 // exactly land on the exit value, otherwise the IV condition will wrap 323 // around and do things the fp IV wouldn't. 324 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 325 Leftover != 0) 326 return false; 327 328 // If the stride would wrap around the i32 before exiting, we can't 329 // transform the IV. 330 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 331 return false; 332 } else { 333 // If we have a negative stride, we require the init to be greater than the 334 // exit value. 335 if (InitValue <= ExitValue) 336 return false; 337 338 uint32_t Range = uint32_t(InitValue-ExitValue); 339 // Check for infinite loop, either: 340 // while (i >= Exit) or until (i < Exit) 341 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 342 if (++Range == 0) return false; // Range overflows. 343 } 344 345 unsigned Leftover = Range % uint32_t(-IncValue); 346 347 // If this is an equality comparison, we require that the strided value 348 // exactly land on the exit value, otherwise the IV condition will wrap 349 // around and do things the fp IV wouldn't. 350 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 351 Leftover != 0) 352 return false; 353 354 // If the stride would wrap around the i32 before exiting, we can't 355 // transform the IV. 356 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 357 return false; 358 } 359 360 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 361 362 // Insert new integer induction variable. 363 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 364 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 365 PN->getIncomingBlock(IncomingEdge)); 366 367 Value *NewAdd = 368 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 369 Incr->getName()+".int", Incr); 370 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 371 372 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 373 ConstantInt::get(Int32Ty, ExitValue), 374 Compare->getName()); 375 376 // In the following deletions, PN may become dead and may be deleted. 377 // Use a WeakTrackingVH to observe whether this happens. 378 WeakTrackingVH WeakPH = PN; 379 380 // Delete the old floating point exit comparison. The branch starts using the 381 // new comparison. 382 NewCompare->takeName(Compare); 383 Compare->replaceAllUsesWith(NewCompare); 384 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 385 386 // Delete the old floating point increment. 387 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 388 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 389 390 // If the FP induction variable still has uses, this is because something else 391 // in the loop uses its value. In order to canonicalize the induction 392 // variable, we chose to eliminate the IV and rewrite it in terms of an 393 // int->fp cast. 394 // 395 // We give preference to sitofp over uitofp because it is faster on most 396 // platforms. 397 if (WeakPH) { 398 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 399 &*PN->getParent()->getFirstInsertionPt()); 400 PN->replaceAllUsesWith(Conv); 401 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 402 } 403 return true; 404 } 405 406 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 407 // First step. Check to see if there are any floating-point recurrences. 408 // If there are, change them into integer recurrences, permitting analysis by 409 // the SCEV routines. 410 BasicBlock *Header = L->getHeader(); 411 412 SmallVector<WeakTrackingVH, 8> PHIs; 413 for (PHINode &PN : Header->phis()) 414 PHIs.push_back(&PN); 415 416 bool Changed = false; 417 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 418 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 419 Changed |= handleFloatingPointIV(L, PN); 420 421 // If the loop previously had floating-point IV, ScalarEvolution 422 // may not have been able to compute a trip count. Now that we've done some 423 // re-writing, the trip count may be computable. 424 if (Changed) 425 SE->forgetLoop(L); 426 return Changed; 427 } 428 429 //===---------------------------------------------------------------------===// 430 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 431 // they will exit at the first iteration. 432 //===---------------------------------------------------------------------===// 433 434 /// Check to see if this loop has loop invariant conditions which lead to loop 435 /// exits. If so, we know that if the exit path is taken, it is at the first 436 /// loop iteration. This lets us predict exit values of PHI nodes that live in 437 /// loop header. 438 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 439 // Verify the input to the pass is already in LCSSA form. 440 assert(L->isLCSSAForm(*DT)); 441 442 SmallVector<BasicBlock *, 8> ExitBlocks; 443 L->getUniqueExitBlocks(ExitBlocks); 444 445 bool MadeAnyChanges = false; 446 for (auto *ExitBB : ExitBlocks) { 447 // If there are no more PHI nodes in this exit block, then no more 448 // values defined inside the loop are used on this path. 449 for (PHINode &PN : ExitBB->phis()) { 450 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 451 IncomingValIdx != E; ++IncomingValIdx) { 452 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 453 454 // Can we prove that the exit must run on the first iteration if it 455 // runs at all? (i.e. early exits are fine for our purposes, but 456 // traces which lead to this exit being taken on the 2nd iteration 457 // aren't.) Note that this is about whether the exit branch is 458 // executed, not about whether it is taken. 459 if (!L->getLoopLatch() || 460 !DT->dominates(IncomingBB, L->getLoopLatch())) 461 continue; 462 463 // Get condition that leads to the exit path. 464 auto *TermInst = IncomingBB->getTerminator(); 465 466 Value *Cond = nullptr; 467 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 468 // Must be a conditional branch, otherwise the block 469 // should not be in the loop. 470 Cond = BI->getCondition(); 471 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 472 Cond = SI->getCondition(); 473 else 474 continue; 475 476 if (!L->isLoopInvariant(Cond)) 477 continue; 478 479 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 480 481 // Only deal with PHIs in the loop header. 482 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 483 continue; 484 485 // If ExitVal is a PHI on the loop header, then we know its 486 // value along this exit because the exit can only be taken 487 // on the first iteration. 488 auto *LoopPreheader = L->getLoopPreheader(); 489 assert(LoopPreheader && "Invalid loop"); 490 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 491 if (PreheaderIdx != -1) { 492 assert(ExitVal->getParent() == L->getHeader() && 493 "ExitVal must be in loop header"); 494 MadeAnyChanges = true; 495 PN.setIncomingValue(IncomingValIdx, 496 ExitVal->getIncomingValue(PreheaderIdx)); 497 SE->forgetValue(&PN); 498 } 499 } 500 } 501 } 502 return MadeAnyChanges; 503 } 504 505 //===----------------------------------------------------------------------===// 506 // IV Widening - Extend the width of an IV to cover its widest uses. 507 //===----------------------------------------------------------------------===// 508 509 /// Update information about the induction variable that is extended by this 510 /// sign or zero extend operation. This is used to determine the final width of 511 /// the IV before actually widening it. 512 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 513 ScalarEvolution *SE, 514 const TargetTransformInfo *TTI) { 515 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 516 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 517 return; 518 519 Type *Ty = Cast->getType(); 520 uint64_t Width = SE->getTypeSizeInBits(Ty); 521 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 522 return; 523 524 // Check that `Cast` actually extends the induction variable (we rely on this 525 // later). This takes care of cases where `Cast` is extending a truncation of 526 // the narrow induction variable, and thus can end up being narrower than the 527 // "narrow" induction variable. 528 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 529 if (NarrowIVWidth >= Width) 530 return; 531 532 // Cast is either an sext or zext up to this point. 533 // We should not widen an indvar if arithmetics on the wider indvar are more 534 // expensive than those on the narrower indvar. We check only the cost of ADD 535 // because at least an ADD is required to increment the induction variable. We 536 // could compute more comprehensively the cost of all instructions on the 537 // induction variable when necessary. 538 if (TTI && 539 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 540 TTI->getArithmeticInstrCost(Instruction::Add, 541 Cast->getOperand(0)->getType())) { 542 return; 543 } 544 545 if (!WI.WidestNativeType) { 546 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 547 WI.IsSigned = IsSigned; 548 return; 549 } 550 551 // We extend the IV to satisfy the sign of its first user, arbitrarily. 552 if (WI.IsSigned != IsSigned) 553 return; 554 555 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 556 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 557 } 558 559 //===----------------------------------------------------------------------===// 560 // Live IV Reduction - Minimize IVs live across the loop. 561 //===----------------------------------------------------------------------===// 562 563 //===----------------------------------------------------------------------===// 564 // Simplification of IV users based on SCEV evaluation. 565 //===----------------------------------------------------------------------===// 566 567 namespace { 568 569 class IndVarSimplifyVisitor : public IVVisitor { 570 ScalarEvolution *SE; 571 const TargetTransformInfo *TTI; 572 PHINode *IVPhi; 573 574 public: 575 WideIVInfo WI; 576 577 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 578 const TargetTransformInfo *TTI, 579 const DominatorTree *DTree) 580 : SE(SCEV), TTI(TTI), IVPhi(IV) { 581 DT = DTree; 582 WI.NarrowIV = IVPhi; 583 } 584 585 // Implement the interface used by simplifyUsersOfIV. 586 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 587 }; 588 589 } // end anonymous namespace 590 591 /// Iteratively perform simplification on a worklist of IV users. Each 592 /// successive simplification may push more users which may themselves be 593 /// candidates for simplification. 594 /// 595 /// Sign/Zero extend elimination is interleaved with IV simplification. 596 bool IndVarSimplify::simplifyAndExtend(Loop *L, 597 SCEVExpander &Rewriter, 598 LoopInfo *LI) { 599 SmallVector<WideIVInfo, 8> WideIVs; 600 601 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 602 Intrinsic::getName(Intrinsic::experimental_guard)); 603 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 604 605 SmallVector<PHINode*, 8> LoopPhis; 606 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 607 LoopPhis.push_back(cast<PHINode>(I)); 608 } 609 // Each round of simplification iterates through the SimplifyIVUsers worklist 610 // for all current phis, then determines whether any IVs can be 611 // widened. Widening adds new phis to LoopPhis, inducing another round of 612 // simplification on the wide IVs. 613 bool Changed = false; 614 while (!LoopPhis.empty()) { 615 // Evaluate as many IV expressions as possible before widening any IVs. This 616 // forces SCEV to set no-wrap flags before evaluating sign/zero 617 // extension. The first time SCEV attempts to normalize sign/zero extension, 618 // the result becomes final. So for the most predictable results, we delay 619 // evaluation of sign/zero extend evaluation until needed, and avoid running 620 // other SCEV based analysis prior to simplifyAndExtend. 621 do { 622 PHINode *CurrIV = LoopPhis.pop_back_val(); 623 624 // Information about sign/zero extensions of CurrIV. 625 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 626 627 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 628 &Visitor); 629 630 if (Visitor.WI.WidestNativeType) { 631 WideIVs.push_back(Visitor.WI); 632 } 633 } while(!LoopPhis.empty()); 634 635 // Continue if we disallowed widening. 636 if (!WidenIndVars) 637 continue; 638 639 for (; !WideIVs.empty(); WideIVs.pop_back()) { 640 unsigned ElimExt; 641 unsigned Widened; 642 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 643 DT, DeadInsts, ElimExt, Widened, 644 HasGuards, UsePostIncrementRanges)) { 645 NumElimExt += ElimExt; 646 NumWidened += Widened; 647 Changed = true; 648 LoopPhis.push_back(WidePhi); 649 } 650 } 651 } 652 return Changed; 653 } 654 655 //===----------------------------------------------------------------------===// 656 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 657 //===----------------------------------------------------------------------===// 658 659 /// Given an Value which is hoped to be part of an add recurance in the given 660 /// loop, return the associated Phi node if so. Otherwise, return null. Note 661 /// that this is less general than SCEVs AddRec checking. 662 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 663 Instruction *IncI = dyn_cast<Instruction>(IncV); 664 if (!IncI) 665 return nullptr; 666 667 switch (IncI->getOpcode()) { 668 case Instruction::Add: 669 case Instruction::Sub: 670 break; 671 case Instruction::GetElementPtr: 672 // An IV counter must preserve its type. 673 if (IncI->getNumOperands() == 2) 674 break; 675 LLVM_FALLTHROUGH; 676 default: 677 return nullptr; 678 } 679 680 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 681 if (Phi && Phi->getParent() == L->getHeader()) { 682 if (L->isLoopInvariant(IncI->getOperand(1))) 683 return Phi; 684 return nullptr; 685 } 686 if (IncI->getOpcode() == Instruction::GetElementPtr) 687 return nullptr; 688 689 // Allow add/sub to be commuted. 690 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 691 if (Phi && Phi->getParent() == L->getHeader()) { 692 if (L->isLoopInvariant(IncI->getOperand(0))) 693 return Phi; 694 } 695 return nullptr; 696 } 697 698 /// Whether the current loop exit test is based on this value. Currently this 699 /// is limited to a direct use in the loop condition. 700 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 701 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 702 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 703 // TODO: Allow non-icmp loop test. 704 if (!ICmp) 705 return false; 706 707 // TODO: Allow indirect use. 708 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 709 } 710 711 /// linearFunctionTestReplace policy. Return true unless we can show that the 712 /// current exit test is already sufficiently canonical. 713 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 714 assert(L->getLoopLatch() && "Must be in simplified form"); 715 716 // Avoid converting a constant or loop invariant test back to a runtime 717 // test. This is critical for when SCEV's cached ExitCount is less precise 718 // than the current IR (such as after we've proven a particular exit is 719 // actually dead and thus the BE count never reaches our ExitCount.) 720 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 721 if (L->isLoopInvariant(BI->getCondition())) 722 return false; 723 724 // Do LFTR to simplify the exit condition to an ICMP. 725 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 726 if (!Cond) 727 return true; 728 729 // Do LFTR to simplify the exit ICMP to EQ/NE 730 ICmpInst::Predicate Pred = Cond->getPredicate(); 731 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 732 return true; 733 734 // Look for a loop invariant RHS 735 Value *LHS = Cond->getOperand(0); 736 Value *RHS = Cond->getOperand(1); 737 if (!L->isLoopInvariant(RHS)) { 738 if (!L->isLoopInvariant(LHS)) 739 return true; 740 std::swap(LHS, RHS); 741 } 742 // Look for a simple IV counter LHS 743 PHINode *Phi = dyn_cast<PHINode>(LHS); 744 if (!Phi) 745 Phi = getLoopPhiForCounter(LHS, L); 746 747 if (!Phi) 748 return true; 749 750 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 751 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 752 if (Idx < 0) 753 return true; 754 755 // Do LFTR if the exit condition's IV is *not* a simple counter. 756 Value *IncV = Phi->getIncomingValue(Idx); 757 return Phi != getLoopPhiForCounter(IncV, L); 758 } 759 760 /// Return true if undefined behavior would provable be executed on the path to 761 /// OnPathTo if Root produced a posion result. Note that this doesn't say 762 /// anything about whether OnPathTo is actually executed or whether Root is 763 /// actually poison. This can be used to assess whether a new use of Root can 764 /// be added at a location which is control equivalent with OnPathTo (such as 765 /// immediately before it) without introducing UB which didn't previously 766 /// exist. Note that a false result conveys no information. 767 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 768 Instruction *OnPathTo, 769 DominatorTree *DT) { 770 // Basic approach is to assume Root is poison, propagate poison forward 771 // through all users we can easily track, and then check whether any of those 772 // users are provable UB and must execute before out exiting block might 773 // exit. 774 775 // The set of all recursive users we've visited (which are assumed to all be 776 // poison because of said visit) 777 SmallSet<const Value *, 16> KnownPoison; 778 SmallVector<const Instruction*, 16> Worklist; 779 Worklist.push_back(Root); 780 while (!Worklist.empty()) { 781 const Instruction *I = Worklist.pop_back_val(); 782 783 // If we know this must trigger UB on a path leading our target. 784 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 785 return true; 786 787 // If we can't analyze propagation through this instruction, just skip it 788 // and transitive users. Safe as false is a conservative result. 789 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 790 continue; 791 792 if (KnownPoison.insert(I).second) 793 for (const User *User : I->users()) 794 Worklist.push_back(cast<Instruction>(User)); 795 } 796 797 // Might be non-UB, or might have a path we couldn't prove must execute on 798 // way to exiting bb. 799 return false; 800 } 801 802 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 803 /// down to checking that all operands are constant and listing instructions 804 /// that may hide undef. 805 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 806 unsigned Depth) { 807 if (isa<Constant>(V)) 808 return !isa<UndefValue>(V); 809 810 if (Depth >= 6) 811 return false; 812 813 // Conservatively handle non-constant non-instructions. For example, Arguments 814 // may be undef. 815 Instruction *I = dyn_cast<Instruction>(V); 816 if (!I) 817 return false; 818 819 // Load and return values may be undef. 820 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 821 return false; 822 823 // Optimistically handle other instructions. 824 for (Value *Op : I->operands()) { 825 if (!Visited.insert(Op).second) 826 continue; 827 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 828 return false; 829 } 830 return true; 831 } 832 833 /// Return true if the given value is concrete. We must prove that undef can 834 /// never reach it. 835 /// 836 /// TODO: If we decide that this is a good approach to checking for undef, we 837 /// may factor it into a common location. 838 static bool hasConcreteDef(Value *V) { 839 SmallPtrSet<Value*, 8> Visited; 840 Visited.insert(V); 841 return hasConcreteDefImpl(V, Visited, 0); 842 } 843 844 /// Return true if this IV has any uses other than the (soon to be rewritten) 845 /// loop exit test. 846 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 847 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 848 Value *IncV = Phi->getIncomingValue(LatchIdx); 849 850 for (User *U : Phi->users()) 851 if (U != Cond && U != IncV) return false; 852 853 for (User *U : IncV->users()) 854 if (U != Cond && U != Phi) return false; 855 return true; 856 } 857 858 /// Return true if the given phi is a "counter" in L. A counter is an 859 /// add recurance (of integer or pointer type) with an arbitrary start, and a 860 /// step of 1. Note that L must have exactly one latch. 861 static bool isLoopCounter(PHINode* Phi, Loop *L, 862 ScalarEvolution *SE) { 863 assert(Phi->getParent() == L->getHeader()); 864 assert(L->getLoopLatch()); 865 866 if (!SE->isSCEVable(Phi->getType())) 867 return false; 868 869 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 870 if (!AR || AR->getLoop() != L || !AR->isAffine()) 871 return false; 872 873 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 874 if (!Step || !Step->isOne()) 875 return false; 876 877 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 878 Value *IncV = Phi->getIncomingValue(LatchIdx); 879 return (getLoopPhiForCounter(IncV, L) == Phi && 880 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 881 } 882 883 /// Search the loop header for a loop counter (anadd rec w/step of one) 884 /// suitable for use by LFTR. If multiple counters are available, select the 885 /// "best" one based profitable heuristics. 886 /// 887 /// BECount may be an i8* pointer type. The pointer difference is already 888 /// valid count without scaling the address stride, so it remains a pointer 889 /// expression as far as SCEV is concerned. 890 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 891 const SCEV *BECount, 892 ScalarEvolution *SE, DominatorTree *DT) { 893 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 894 895 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 896 897 // Loop over all of the PHI nodes, looking for a simple counter. 898 PHINode *BestPhi = nullptr; 899 const SCEV *BestInit = nullptr; 900 BasicBlock *LatchBlock = L->getLoopLatch(); 901 assert(LatchBlock && "Must be in simplified form"); 902 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 903 904 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 905 PHINode *Phi = cast<PHINode>(I); 906 if (!isLoopCounter(Phi, L, SE)) 907 continue; 908 909 // Avoid comparing an integer IV against a pointer Limit. 910 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 911 continue; 912 913 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 914 915 // AR may be a pointer type, while BECount is an integer type. 916 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 917 // AR may not be a narrower type, or we may never exit. 918 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 919 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 920 continue; 921 922 // Avoid reusing a potentially undef value to compute other values that may 923 // have originally had a concrete definition. 924 if (!hasConcreteDef(Phi)) { 925 // We explicitly allow unknown phis as long as they are already used by 926 // the loop exit test. This is legal since performing LFTR could not 927 // increase the number of undef users. 928 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 929 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 930 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 931 continue; 932 } 933 934 // Avoid introducing undefined behavior due to poison which didn't exist in 935 // the original program. (Annoyingly, the rules for poison and undef 936 // propagation are distinct, so this does NOT cover the undef case above.) 937 // We have to ensure that we don't introduce UB by introducing a use on an 938 // iteration where said IV produces poison. Our strategy here differs for 939 // pointers and integer IVs. For integers, we strip and reinfer as needed, 940 // see code in linearFunctionTestReplace. For pointers, we restrict 941 // transforms as there is no good way to reinfer inbounds once lost. 942 if (!Phi->getType()->isIntegerTy() && 943 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 944 continue; 945 946 const SCEV *Init = AR->getStart(); 947 948 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 949 // Don't force a live loop counter if another IV can be used. 950 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 951 continue; 952 953 // Prefer to count-from-zero. This is a more "canonical" counter form. It 954 // also prefers integer to pointer IVs. 955 if (BestInit->isZero() != Init->isZero()) { 956 if (BestInit->isZero()) 957 continue; 958 } 959 // If two IVs both count from zero or both count from nonzero then the 960 // narrower is likely a dead phi that has been widened. Use the wider phi 961 // to allow the other to be eliminated. 962 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 963 continue; 964 } 965 BestPhi = Phi; 966 BestInit = Init; 967 } 968 return BestPhi; 969 } 970 971 /// Insert an IR expression which computes the value held by the IV IndVar 972 /// (which must be an loop counter w/unit stride) after the backedge of loop L 973 /// is taken ExitCount times. 974 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 975 const SCEV *ExitCount, bool UsePostInc, Loop *L, 976 SCEVExpander &Rewriter, ScalarEvolution *SE) { 977 assert(isLoopCounter(IndVar, L, SE)); 978 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 979 const SCEV *IVInit = AR->getStart(); 980 981 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 982 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 983 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 984 // the existing GEPs whenever possible. 985 if (IndVar->getType()->isPointerTy() && 986 !ExitCount->getType()->isPointerTy()) { 987 // IVOffset will be the new GEP offset that is interpreted by GEP as a 988 // signed value. ExitCount on the other hand represents the loop trip count, 989 // which is an unsigned value. FindLoopCounter only allows induction 990 // variables that have a positive unit stride of one. This means we don't 991 // have to handle the case of negative offsets (yet) and just need to zero 992 // extend ExitCount. 993 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 994 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 995 if (UsePostInc) 996 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 997 998 // Expand the code for the iteration count. 999 assert(SE->isLoopInvariant(IVOffset, L) && 1000 "Computed iteration count is not loop invariant!"); 1001 1002 // We could handle pointer IVs other than i8*, but we need to compensate for 1003 // gep index scaling. 1004 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1005 cast<PointerType>(IndVar->getType()) 1006 ->getElementType())->isOne() && 1007 "unit stride pointer IV must be i8*"); 1008 1009 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1010 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1011 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1012 } else { 1013 // In any other case, convert both IVInit and ExitCount to integers before 1014 // comparing. This may result in SCEV expansion of pointers, but in practice 1015 // SCEV will fold the pointer arithmetic away as such: 1016 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1017 // 1018 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1019 // for simple memset-style loops. 1020 // 1021 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1022 // were generated on top of case #2, which is not expected. 1023 1024 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1025 // For unit stride, IVCount = Start + ExitCount with 2's complement 1026 // overflow. 1027 1028 // For integer IVs, truncate the IV before computing IVInit + BECount, 1029 // unless we know apriori that the limit must be a constant when evaluated 1030 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1031 // of the IV in the loop over a (potentially) expensive expansion of the 1032 // widened exit count add(zext(add)) expression. 1033 if (SE->getTypeSizeInBits(IVInit->getType()) 1034 > SE->getTypeSizeInBits(ExitCount->getType())) { 1035 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1036 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1037 else 1038 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1039 } 1040 1041 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1042 1043 if (UsePostInc) 1044 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1045 1046 // Expand the code for the iteration count. 1047 assert(SE->isLoopInvariant(IVLimit, L) && 1048 "Computed iteration count is not loop invariant!"); 1049 // Ensure that we generate the same type as IndVar, or a smaller integer 1050 // type. In the presence of null pointer values, we have an integer type 1051 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1052 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1053 IndVar->getType() : ExitCount->getType(); 1054 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1055 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1056 } 1057 } 1058 1059 /// This method rewrites the exit condition of the loop to be a canonical != 1060 /// comparison against the incremented loop induction variable. This pass is 1061 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1062 /// determine a loop-invariant trip count of the loop, which is actually a much 1063 /// broader range than just linear tests. 1064 bool IndVarSimplify:: 1065 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1066 const SCEV *ExitCount, 1067 PHINode *IndVar, SCEVExpander &Rewriter) { 1068 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1069 assert(isLoopCounter(IndVar, L, SE)); 1070 Instruction * const IncVar = 1071 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1072 1073 // Initialize CmpIndVar to the preincremented IV. 1074 Value *CmpIndVar = IndVar; 1075 bool UsePostInc = false; 1076 1077 // If the exiting block is the same as the backedge block, we prefer to 1078 // compare against the post-incremented value, otherwise we must compare 1079 // against the preincremented value. 1080 if (ExitingBB == L->getLoopLatch()) { 1081 // For pointer IVs, we chose to not strip inbounds which requires us not 1082 // to add a potentially UB introducing use. We need to either a) show 1083 // the loop test we're modifying is already in post-inc form, or b) show 1084 // that adding a use must not introduce UB. 1085 bool SafeToPostInc = 1086 IndVar->getType()->isIntegerTy() || 1087 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1088 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1089 if (SafeToPostInc) { 1090 UsePostInc = true; 1091 CmpIndVar = IncVar; 1092 } 1093 } 1094 1095 // It may be necessary to drop nowrap flags on the incrementing instruction 1096 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1097 // case the increment might have previously been poison on the last iteration 1098 // only) or if LFTR switches to a different IV that was previously dynamically 1099 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1100 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1101 // check), because the pre-inc addrec flags may be adopted from the original 1102 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1103 // TODO: This handling is inaccurate for one case: If we switch to a 1104 // dynamically dead IV that wraps on the first loop iteration only, which is 1105 // not covered by the post-inc addrec. (If the new IV was not dynamically 1106 // dead, it could not be poison on the first iteration in the first place.) 1107 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1108 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1109 if (BO->hasNoUnsignedWrap()) 1110 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1111 if (BO->hasNoSignedWrap()) 1112 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1113 } 1114 1115 Value *ExitCnt = genLoopLimit( 1116 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1117 assert(ExitCnt->getType()->isPointerTy() == 1118 IndVar->getType()->isPointerTy() && 1119 "genLoopLimit missed a cast"); 1120 1121 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1122 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1123 ICmpInst::Predicate P; 1124 if (L->contains(BI->getSuccessor(0))) 1125 P = ICmpInst::ICMP_NE; 1126 else 1127 P = ICmpInst::ICMP_EQ; 1128 1129 IRBuilder<> Builder(BI); 1130 1131 // The new loop exit condition should reuse the debug location of the 1132 // original loop exit condition. 1133 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1134 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1135 1136 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1137 // avoid the expensive expansion of the limit expression in the wider type, 1138 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1139 // since we know (from the exit count bitwidth), that we can't self-wrap in 1140 // the narrower type. 1141 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1142 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1143 if (CmpIndVarSize > ExitCntSize) { 1144 assert(!CmpIndVar->getType()->isPointerTy() && 1145 !ExitCnt->getType()->isPointerTy()); 1146 1147 // Before resorting to actually inserting the truncate, use the same 1148 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1149 // the other side of the comparison instead. We still evaluate the limit 1150 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1151 // a truncate within in. 1152 bool Extended = false; 1153 const SCEV *IV = SE->getSCEV(CmpIndVar); 1154 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1155 ExitCnt->getType()); 1156 const SCEV *ZExtTrunc = 1157 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1158 1159 if (ZExtTrunc == IV) { 1160 Extended = true; 1161 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1162 "wide.trip.count"); 1163 } else { 1164 const SCEV *SExtTrunc = 1165 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1166 if (SExtTrunc == IV) { 1167 Extended = true; 1168 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1169 "wide.trip.count"); 1170 } 1171 } 1172 1173 if (Extended) { 1174 bool Discard; 1175 L->makeLoopInvariant(ExitCnt, Discard); 1176 } else 1177 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1178 "lftr.wideiv"); 1179 } 1180 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1181 << " LHS:" << *CmpIndVar << '\n' 1182 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1183 << "\n" 1184 << " RHS:\t" << *ExitCnt << "\n" 1185 << "ExitCount:\t" << *ExitCount << "\n" 1186 << " was: " << *BI->getCondition() << "\n"); 1187 1188 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1189 Value *OrigCond = BI->getCondition(); 1190 // It's tempting to use replaceAllUsesWith here to fully replace the old 1191 // comparison, but that's not immediately safe, since users of the old 1192 // comparison may not be dominated by the new comparison. Instead, just 1193 // update the branch to use the new comparison; in the common case this 1194 // will make old comparison dead. 1195 BI->setCondition(Cond); 1196 DeadInsts.emplace_back(OrigCond); 1197 1198 ++NumLFTR; 1199 return true; 1200 } 1201 1202 //===----------------------------------------------------------------------===// 1203 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1204 //===----------------------------------------------------------------------===// 1205 1206 /// If there's a single exit block, sink any loop-invariant values that 1207 /// were defined in the preheader but not used inside the loop into the 1208 /// exit block to reduce register pressure in the loop. 1209 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1210 BasicBlock *ExitBlock = L->getExitBlock(); 1211 if (!ExitBlock) return false; 1212 1213 BasicBlock *Preheader = L->getLoopPreheader(); 1214 if (!Preheader) return false; 1215 1216 bool MadeAnyChanges = false; 1217 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1218 BasicBlock::iterator I(Preheader->getTerminator()); 1219 while (I != Preheader->begin()) { 1220 --I; 1221 // New instructions were inserted at the end of the preheader. 1222 if (isa<PHINode>(I)) 1223 break; 1224 1225 // Don't move instructions which might have side effects, since the side 1226 // effects need to complete before instructions inside the loop. Also don't 1227 // move instructions which might read memory, since the loop may modify 1228 // memory. Note that it's okay if the instruction might have undefined 1229 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1230 // block. 1231 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1232 continue; 1233 1234 // Skip debug info intrinsics. 1235 if (isa<DbgInfoIntrinsic>(I)) 1236 continue; 1237 1238 // Skip eh pad instructions. 1239 if (I->isEHPad()) 1240 continue; 1241 1242 // Don't sink alloca: we never want to sink static alloca's out of the 1243 // entry block, and correctly sinking dynamic alloca's requires 1244 // checks for stacksave/stackrestore intrinsics. 1245 // FIXME: Refactor this check somehow? 1246 if (isa<AllocaInst>(I)) 1247 continue; 1248 1249 // Determine if there is a use in or before the loop (direct or 1250 // otherwise). 1251 bool UsedInLoop = false; 1252 for (Use &U : I->uses()) { 1253 Instruction *User = cast<Instruction>(U.getUser()); 1254 BasicBlock *UseBB = User->getParent(); 1255 if (PHINode *P = dyn_cast<PHINode>(User)) { 1256 unsigned i = 1257 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1258 UseBB = P->getIncomingBlock(i); 1259 } 1260 if (UseBB == Preheader || L->contains(UseBB)) { 1261 UsedInLoop = true; 1262 break; 1263 } 1264 } 1265 1266 // If there is, the def must remain in the preheader. 1267 if (UsedInLoop) 1268 continue; 1269 1270 // Otherwise, sink it to the exit block. 1271 Instruction *ToMove = &*I; 1272 bool Done = false; 1273 1274 if (I != Preheader->begin()) { 1275 // Skip debug info intrinsics. 1276 do { 1277 --I; 1278 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1279 1280 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1281 Done = true; 1282 } else { 1283 Done = true; 1284 } 1285 1286 MadeAnyChanges = true; 1287 ToMove->moveBefore(*ExitBlock, InsertPt); 1288 if (Done) break; 1289 InsertPt = ToMove->getIterator(); 1290 } 1291 1292 return MadeAnyChanges; 1293 } 1294 1295 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1296 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1297 auto *OldCond = BI->getCondition(); 1298 BI->setCondition(NewCond); 1299 if (OldCond->use_empty()) 1300 DeadInsts.emplace_back(OldCond); 1301 } 1302 1303 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1304 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1305 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1306 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1307 auto *OldCond = BI->getCondition(); 1308 auto *NewCond = 1309 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1310 replaceExitCond(BI, NewCond, DeadInsts); 1311 } 1312 1313 static void replaceLoopPHINodesWithPreheaderValues( 1314 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1315 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1316 auto *LoopPreheader = L->getLoopPreheader(); 1317 auto *LoopHeader = L->getHeader(); 1318 for (auto &PN : LoopHeader->phis()) { 1319 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1320 PN.replaceAllUsesWith(PreheaderIncoming); 1321 DeadInsts.emplace_back(&PN); 1322 } 1323 } 1324 1325 static void replaceWithInvariantCond( 1326 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1327 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1328 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1329 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1330 Rewriter.setInsertPoint(BI); 1331 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1332 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1333 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1334 if (ExitIfTrue) 1335 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1336 IRBuilder<> Builder(BI); 1337 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1338 BI->getCondition()->getName()); 1339 replaceExitCond(BI, NewCond, DeadInsts); 1340 } 1341 1342 static bool optimizeLoopExitWithUnknownExitCount( 1343 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1344 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1345 ScalarEvolution *SE, SCEVExpander &Rewriter, 1346 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1347 ICmpInst::Predicate Pred; 1348 Value *LHS, *RHS; 1349 using namespace PatternMatch; 1350 BasicBlock *TrueSucc, *FalseSucc; 1351 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1352 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1353 return false; 1354 1355 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1356 "Not a loop exit!"); 1357 1358 // 'LHS pred RHS' should now mean that we stay in loop. 1359 if (L->contains(FalseSucc)) 1360 Pred = CmpInst::getInversePredicate(Pred); 1361 1362 // If we are proving loop exit, invert the predicate. 1363 if (Inverted) 1364 Pred = CmpInst::getInversePredicate(Pred); 1365 1366 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1367 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1368 // Can we prove it to be trivially true? 1369 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1370 foldExit(L, ExitingBB, Inverted, DeadInsts); 1371 return true; 1372 } 1373 // Further logic works for non-inverted condition only. 1374 if (Inverted) 1375 return false; 1376 1377 auto *ARTy = LHSS->getType(); 1378 auto *MaxIterTy = MaxIter->getType(); 1379 // If possible, adjust types. 1380 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1381 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1382 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1383 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1384 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1385 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1386 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1387 } 1388 1389 if (SkipLastIter) { 1390 const SCEV *One = SE->getOne(MaxIter->getType()); 1391 MaxIter = SE->getMinusSCEV(MaxIter, One); 1392 } 1393 1394 // Check if there is a loop-invariant predicate equivalent to our check. 1395 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1396 L, BI, MaxIter); 1397 if (!LIP) 1398 return false; 1399 1400 // Can we prove it to be trivially true? 1401 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1402 foldExit(L, ExitingBB, Inverted, DeadInsts); 1403 else 1404 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1405 Rewriter, DeadInsts); 1406 1407 return true; 1408 } 1409 1410 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1411 SmallVector<BasicBlock*, 16> ExitingBlocks; 1412 L->getExitingBlocks(ExitingBlocks); 1413 1414 // Remove all exits which aren't both rewriteable and execute on every 1415 // iteration. 1416 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1417 // If our exitting block exits multiple loops, we can only rewrite the 1418 // innermost one. Otherwise, we're changing how many times the innermost 1419 // loop runs before it exits. 1420 if (LI->getLoopFor(ExitingBB) != L) 1421 return true; 1422 1423 // Can't rewrite non-branch yet. 1424 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1425 if (!BI) 1426 return true; 1427 1428 // If already constant, nothing to do. 1429 if (isa<Constant>(BI->getCondition())) 1430 return true; 1431 1432 // Likewise, the loop latch must be dominated by the exiting BB. 1433 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1434 return true; 1435 1436 return false; 1437 }); 1438 1439 if (ExitingBlocks.empty()) 1440 return false; 1441 1442 // Get a symbolic upper bound on the loop backedge taken count. 1443 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1444 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1445 return false; 1446 1447 // Visit our exit blocks in order of dominance. We know from the fact that 1448 // all exits must dominate the latch, so there is a total dominance order 1449 // between them. 1450 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1451 // std::sort sorts in ascending order, so we want the inverse of 1452 // the normal dominance relation. 1453 if (A == B) return false; 1454 if (DT->properlyDominates(A, B)) 1455 return true; 1456 else { 1457 assert(DT->properlyDominates(B, A) && 1458 "expected total dominance order!"); 1459 return false; 1460 } 1461 }); 1462 #ifdef ASSERT 1463 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1464 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1465 } 1466 #endif 1467 1468 bool Changed = false; 1469 bool SkipLastIter = false; 1470 SmallSet<const SCEV*, 8> DominatingExitCounts; 1471 for (BasicBlock *ExitingBB : ExitingBlocks) { 1472 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1473 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1474 // Okay, we do not know the exit count here. Can we at least prove that it 1475 // will remain the same within iteration space? 1476 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1477 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1478 return optimizeLoopExitWithUnknownExitCount( 1479 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1480 Rewriter, DeadInsts); 1481 }; 1482 1483 // TODO: We might have proved that we can skip the last iteration for 1484 // this check. In this case, we only want to check the condition on the 1485 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1486 // corner case: 1487 // 1488 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1489 // 1490 // If we could not prove that len != 0, then we also could not prove that 1491 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1492 // OptimizeCond will likely not prove anything for it, even if it could 1493 // prove the same fact for len. 1494 // 1495 // As a temporary solution, we query both last and pre-last iterations in 1496 // hope that we will be able to prove triviality for at least one of 1497 // them. We can stop querying MaxExitCount for this case once SCEV 1498 // understands that (MaxExitCount - 1) will not overflow here. 1499 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1500 Changed = true; 1501 else if (SkipLastIter) 1502 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1503 Changed = true; 1504 continue; 1505 } 1506 1507 if (MaxExitCount == ExitCount) 1508 // If the loop has more than 1 iteration, all further checks will be 1509 // executed 1 iteration less. 1510 SkipLastIter = true; 1511 1512 // If we know we'd exit on the first iteration, rewrite the exit to 1513 // reflect this. This does not imply the loop must exit through this 1514 // exit; there may be an earlier one taken on the first iteration. 1515 // We know that the backedge can't be taken, so we replace all 1516 // the header PHIs with values coming from the preheader. 1517 if (ExitCount->isZero()) { 1518 foldExit(L, ExitingBB, true, DeadInsts); 1519 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1520 Changed = true; 1521 continue; 1522 } 1523 1524 assert(ExitCount->getType()->isIntegerTy() && 1525 MaxExitCount->getType()->isIntegerTy() && 1526 "Exit counts must be integers"); 1527 1528 Type *WiderType = 1529 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1530 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1531 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1532 assert(MaxExitCount->getType() == ExitCount->getType()); 1533 1534 // Can we prove that some other exit must be taken strictly before this 1535 // one? 1536 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1537 MaxExitCount, ExitCount)) { 1538 foldExit(L, ExitingBB, false, DeadInsts); 1539 Changed = true; 1540 continue; 1541 } 1542 1543 // As we run, keep track of which exit counts we've encountered. If we 1544 // find a duplicate, we've found an exit which would have exited on the 1545 // exiting iteration, but (from the visit order) strictly follows another 1546 // which does the same and is thus dead. 1547 if (!DominatingExitCounts.insert(ExitCount).second) { 1548 foldExit(L, ExitingBB, false, DeadInsts); 1549 Changed = true; 1550 continue; 1551 } 1552 1553 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1554 // here. If we kept track of the min of dominanting exits so far, we could 1555 // discharge exits with EC >= MDEC. This is less powerful than the existing 1556 // transform (since later exits aren't considered), but potentially more 1557 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1558 // or a >u b. Such a case is not currently known. 1559 } 1560 return Changed; 1561 } 1562 1563 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1564 SmallVector<BasicBlock*, 16> ExitingBlocks; 1565 L->getExitingBlocks(ExitingBlocks); 1566 1567 // Finally, see if we can rewrite our exit conditions into a loop invariant 1568 // form. If we have a read-only loop, and we can tell that we must exit down 1569 // a path which does not need any of the values computed within the loop, we 1570 // can rewrite the loop to exit on the first iteration. Note that this 1571 // doesn't either a) tell us the loop exits on the first iteration (unless 1572 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1573 // This transformation looks a lot like a restricted form of dead loop 1574 // elimination, but restricted to read-only loops and without neccesssarily 1575 // needing to kill the loop entirely. 1576 if (!LoopPredication) 1577 return false; 1578 1579 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1580 // through *explicit* control flow. We have to eliminate the possibility of 1581 // implicit exits (see below) before we know it's truly exact. 1582 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1583 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1584 return false; 1585 1586 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1587 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1588 1589 auto BadExit = [&](BasicBlock *ExitingBB) { 1590 // If our exiting block exits multiple loops, we can only rewrite the 1591 // innermost one. Otherwise, we're changing how many times the innermost 1592 // loop runs before it exits. 1593 if (LI->getLoopFor(ExitingBB) != L) 1594 return true; 1595 1596 // Can't rewrite non-branch yet. 1597 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1598 if (!BI) 1599 return true; 1600 1601 // If already constant, nothing to do. 1602 if (isa<Constant>(BI->getCondition())) 1603 return true; 1604 1605 // If the exit block has phis, we need to be able to compute the values 1606 // within the loop which contains them. This assumes trivially lcssa phis 1607 // have already been removed; TODO: generalize 1608 BasicBlock *ExitBlock = 1609 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1610 if (!ExitBlock->phis().empty()) 1611 return true; 1612 1613 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1614 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1615 return true; 1616 1617 assert(SE->isLoopInvariant(ExitCount, L) && 1618 "Exit count must be loop invariant"); 1619 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1620 return false; 1621 }; 1622 1623 // If we have any exits which can't be predicated themselves, than we can't 1624 // predicate any exit which isn't guaranteed to execute before it. Consider 1625 // two exits (a) and (b) which would both exit on the same iteration. If we 1626 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1627 // we could convert a loop from exiting through (a) to one exiting through 1628 // (b). Note that this problem exists only for exits with the same exit 1629 // count, and we could be more aggressive when exit counts are known inequal. 1630 llvm::sort(ExitingBlocks, 1631 [&](BasicBlock *A, BasicBlock *B) { 1632 // std::sort sorts in ascending order, so we want the inverse of 1633 // the normal dominance relation, plus a tie breaker for blocks 1634 // unordered by dominance. 1635 if (DT->properlyDominates(A, B)) return true; 1636 if (DT->properlyDominates(B, A)) return false; 1637 return A->getName() < B->getName(); 1638 }); 1639 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1640 // exits before the backedge). If they aren't, reasoning about reachability 1641 // is complicated and we choose not to for now. 1642 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1643 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1644 return false; 1645 1646 // Given our sorted total order, we know that exit[j] must be evaluated 1647 // after all exit[i] such j > i. 1648 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1649 if (BadExit(ExitingBlocks[i])) { 1650 ExitingBlocks.resize(i); 1651 break; 1652 } 1653 1654 if (ExitingBlocks.empty()) 1655 return false; 1656 1657 // We rely on not being able to reach an exiting block on a later iteration 1658 // then it's statically compute exit count. The implementaton of 1659 // getExitCount currently has this invariant, but assert it here so that 1660 // breakage is obvious if this ever changes.. 1661 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1662 return DT->dominates(ExitingBB, L->getLoopLatch()); 1663 })); 1664 1665 // At this point, ExitingBlocks consists of only those blocks which are 1666 // predicatable. Given that, we know we have at least one exit we can 1667 // predicate if the loop is doesn't have side effects and doesn't have any 1668 // implicit exits (because then our exact BTC isn't actually exact). 1669 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1670 // suggestions on how to improve this? I can obviously bail out for outer 1671 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1672 // is that enough for *all* side effects? 1673 for (BasicBlock *BB : L->blocks()) 1674 for (auto &I : *BB) 1675 // TODO:isGuaranteedToTransfer 1676 if (I.mayHaveSideEffects()) 1677 return false; 1678 1679 bool Changed = false; 1680 // Finally, do the actual predication for all predicatable blocks. A couple 1681 // of notes here: 1682 // 1) We don't bother to constant fold dominated exits with identical exit 1683 // counts; that's simply a form of CSE/equality propagation and we leave 1684 // it for dedicated passes. 1685 // 2) We insert the comparison at the branch. Hoisting introduces additional 1686 // legality constraints and we leave that to dedicated logic. We want to 1687 // predicate even if we can't insert a loop invariant expression as 1688 // peeling or unrolling will likely reduce the cost of the otherwise loop 1689 // varying check. 1690 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1691 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1692 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1693 for (BasicBlock *ExitingBB : ExitingBlocks) { 1694 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1695 1696 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1697 Value *NewCond; 1698 if (ExitCount == ExactBTC) { 1699 NewCond = L->contains(BI->getSuccessor(0)) ? 1700 B.getFalse() : B.getTrue(); 1701 } else { 1702 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1703 if (!ExactBTCV) 1704 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1705 Value *RHS = ExactBTCV; 1706 if (ECV->getType() != RHS->getType()) { 1707 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1708 ECV = B.CreateZExt(ECV, WiderTy); 1709 RHS = B.CreateZExt(RHS, WiderTy); 1710 } 1711 auto Pred = L->contains(BI->getSuccessor(0)) ? 1712 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1713 NewCond = B.CreateICmp(Pred, ECV, RHS); 1714 } 1715 Value *OldCond = BI->getCondition(); 1716 BI->setCondition(NewCond); 1717 if (OldCond->use_empty()) 1718 DeadInsts.emplace_back(OldCond); 1719 Changed = true; 1720 } 1721 1722 return Changed; 1723 } 1724 1725 //===----------------------------------------------------------------------===// 1726 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1727 //===----------------------------------------------------------------------===// 1728 1729 bool IndVarSimplify::run(Loop *L) { 1730 // We need (and expect!) the incoming loop to be in LCSSA. 1731 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1732 "LCSSA required to run indvars!"); 1733 1734 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1735 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1736 // canonicalization can be a pessimization without LSR to "clean up" 1737 // afterwards. 1738 // - We depend on having a preheader; in particular, 1739 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1740 // and we're in trouble if we can't find the induction variable even when 1741 // we've manually inserted one. 1742 // - LFTR relies on having a single backedge. 1743 if (!L->isLoopSimplifyForm()) 1744 return false; 1745 1746 #ifndef NDEBUG 1747 // Used below for a consistency check only 1748 // Note: Since the result returned by ScalarEvolution may depend on the order 1749 // in which previous results are added to its cache, the call to 1750 // getBackedgeTakenCount() may change following SCEV queries. 1751 const SCEV *BackedgeTakenCount; 1752 if (VerifyIndvars) 1753 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1754 #endif 1755 1756 bool Changed = false; 1757 // If there are any floating-point recurrences, attempt to 1758 // transform them to use integer recurrences. 1759 Changed |= rewriteNonIntegerIVs(L); 1760 1761 // Create a rewriter object which we'll use to transform the code with. 1762 SCEVExpander Rewriter(*SE, DL, "indvars"); 1763 #ifndef NDEBUG 1764 Rewriter.setDebugType(DEBUG_TYPE); 1765 #endif 1766 1767 // Eliminate redundant IV users. 1768 // 1769 // Simplification works best when run before other consumers of SCEV. We 1770 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1771 // other expressions involving loop IVs have been evaluated. This helps SCEV 1772 // set no-wrap flags before normalizing sign/zero extension. 1773 Rewriter.disableCanonicalMode(); 1774 Changed |= simplifyAndExtend(L, Rewriter, LI); 1775 1776 // Check to see if we can compute the final value of any expressions 1777 // that are recurrent in the loop, and substitute the exit values from the 1778 // loop into any instructions outside of the loop that use the final values 1779 // of the current expressions. 1780 if (ReplaceExitValue != NeverRepl) { 1781 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1782 ReplaceExitValue, DeadInsts)) { 1783 NumReplaced += Rewrites; 1784 Changed = true; 1785 } 1786 } 1787 1788 // Eliminate redundant IV cycles. 1789 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 1790 1791 // Try to eliminate loop exits based on analyzeable exit counts 1792 if (optimizeLoopExits(L, Rewriter)) { 1793 Changed = true; 1794 // Given we've changed exit counts, notify SCEV 1795 // Some nested loops may share same folded exit basic block, 1796 // thus we need to notify top most loop. 1797 SE->forgetTopmostLoop(L); 1798 } 1799 1800 // Try to form loop invariant tests for loop exits by changing how many 1801 // iterations of the loop run when that is unobservable. 1802 if (predicateLoopExits(L, Rewriter)) { 1803 Changed = true; 1804 // Given we've changed exit counts, notify SCEV 1805 SE->forgetLoop(L); 1806 } 1807 1808 // If we have a trip count expression, rewrite the loop's exit condition 1809 // using it. 1810 if (!DisableLFTR) { 1811 BasicBlock *PreHeader = L->getLoopPreheader(); 1812 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 1813 1814 SmallVector<BasicBlock*, 16> ExitingBlocks; 1815 L->getExitingBlocks(ExitingBlocks); 1816 for (BasicBlock *ExitingBB : ExitingBlocks) { 1817 // Can't rewrite non-branch yet. 1818 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1819 continue; 1820 1821 // If our exitting block exits multiple loops, we can only rewrite the 1822 // innermost one. Otherwise, we're changing how many times the innermost 1823 // loop runs before it exits. 1824 if (LI->getLoopFor(ExitingBB) != L) 1825 continue; 1826 1827 if (!needsLFTR(L, ExitingBB)) 1828 continue; 1829 1830 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1831 if (isa<SCEVCouldNotCompute>(ExitCount)) 1832 continue; 1833 1834 // This was handled above, but as we form SCEVs, we can sometimes refine 1835 // existing ones; this allows exit counts to be folded to zero which 1836 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1837 // until stable to handle cases like this better. 1838 if (ExitCount->isZero()) 1839 continue; 1840 1841 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1842 if (!IndVar) 1843 continue; 1844 1845 // Avoid high cost expansions. Note: This heuristic is questionable in 1846 // that our definition of "high cost" is not exactly principled. 1847 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1848 TTI, PreHeaderBR)) 1849 continue; 1850 1851 // Check preconditions for proper SCEVExpander operation. SCEV does not 1852 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1853 // any pass that uses the SCEVExpander must do it. This does not work 1854 // well for loop passes because SCEVExpander makes assumptions about 1855 // all loops, while LoopPassManager only forces the current loop to be 1856 // simplified. 1857 // 1858 // FIXME: SCEV expansion has no way to bail out, so the caller must 1859 // explicitly check any assumptions made by SCEV. Brittle. 1860 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1861 if (!AR || AR->getLoop()->getLoopPreheader()) 1862 Changed |= linearFunctionTestReplace(L, ExitingBB, 1863 ExitCount, IndVar, 1864 Rewriter); 1865 } 1866 } 1867 // Clear the rewriter cache, because values that are in the rewriter's cache 1868 // can be deleted in the loop below, causing the AssertingVH in the cache to 1869 // trigger. 1870 Rewriter.clear(); 1871 1872 // Now that we're done iterating through lists, clean up any instructions 1873 // which are now dead. 1874 while (!DeadInsts.empty()) { 1875 Value *V = DeadInsts.pop_back_val(); 1876 1877 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 1878 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 1879 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 1880 Changed |= 1881 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 1882 } 1883 1884 // The Rewriter may not be used from this point on. 1885 1886 // Loop-invariant instructions in the preheader that aren't used in the 1887 // loop may be sunk below the loop to reduce register pressure. 1888 Changed |= sinkUnusedInvariants(L); 1889 1890 // rewriteFirstIterationLoopExitValues does not rely on the computation of 1891 // trip count and therefore can further simplify exit values in addition to 1892 // rewriteLoopExitValues. 1893 Changed |= rewriteFirstIterationLoopExitValues(L); 1894 1895 // Clean up dead instructions. 1896 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 1897 1898 // Check a post-condition. 1899 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1900 "Indvars did not preserve LCSSA!"); 1901 1902 // Verify that LFTR, and any other change have not interfered with SCEV's 1903 // ability to compute trip count. We may have *changed* the exit count, but 1904 // only by reducing it. 1905 #ifndef NDEBUG 1906 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 1907 SE->forgetLoop(L); 1908 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 1909 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 1910 SE->getTypeSizeInBits(NewBECount->getType())) 1911 NewBECount = SE->getTruncateOrNoop(NewBECount, 1912 BackedgeTakenCount->getType()); 1913 else 1914 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 1915 NewBECount->getType()); 1916 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 1917 NewBECount) && "indvars must preserve SCEV"); 1918 } 1919 if (VerifyMemorySSA && MSSAU) 1920 MSSAU->getMemorySSA()->verifyMemorySSA(); 1921 #endif 1922 1923 return Changed; 1924 } 1925 1926 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 1927 LoopStandardAnalysisResults &AR, 1928 LPMUpdater &) { 1929 Function *F = L.getHeader()->getParent(); 1930 const DataLayout &DL = F->getParent()->getDataLayout(); 1931 1932 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 1933 WidenIndVars && AllowIVWidening); 1934 if (!IVS.run(&L)) 1935 return PreservedAnalyses::all(); 1936 1937 auto PA = getLoopPassPreservedAnalyses(); 1938 PA.preserveSet<CFGAnalyses>(); 1939 if (AR.MSSA) 1940 PA.preserve<MemorySSAAnalysis>(); 1941 return PA; 1942 } 1943 1944 namespace { 1945 1946 struct IndVarSimplifyLegacyPass : public LoopPass { 1947 static char ID; // Pass identification, replacement for typeid 1948 1949 IndVarSimplifyLegacyPass() : LoopPass(ID) { 1950 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 1951 } 1952 1953 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 1954 if (skipLoop(L)) 1955 return false; 1956 1957 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1958 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1959 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1960 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 1961 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 1962 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 1963 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 1964 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1965 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 1966 MemorySSA *MSSA = nullptr; 1967 if (MSSAAnalysis) 1968 MSSA = &MSSAAnalysis->getMSSA(); 1969 1970 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 1971 return IVS.run(L); 1972 } 1973 1974 void getAnalysisUsage(AnalysisUsage &AU) const override { 1975 AU.setPreservesCFG(); 1976 AU.addPreserved<MemorySSAWrapperPass>(); 1977 getLoopAnalysisUsage(AU); 1978 } 1979 }; 1980 1981 } // end anonymous namespace 1982 1983 char IndVarSimplifyLegacyPass::ID = 0; 1984 1985 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 1986 "Induction Variable Simplification", false, false) 1987 INITIALIZE_PASS_DEPENDENCY(LoopPass) 1988 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 1989 "Induction Variable Simplification", false, false) 1990 1991 Pass *llvm::createIndVarSimplifyPass() { 1992 return new IndVarSimplifyLegacyPass(); 1993 } 1994