1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 93 #define DEBUG_TYPE "indvars" 94 95 STATISTIC(NumWidened , "Number of indvars widened"); 96 STATISTIC(NumReplaced , "Number of exit values replaced"); 97 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 98 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 99 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 100 101 // Trip count verification can be enabled by default under NDEBUG if we 102 // implement a strong expression equivalence checker in SCEV. Until then, we 103 // use the verify-indvars flag, which may assert in some cases. 104 static cl::opt<bool> VerifyIndvars( 105 "verify-indvars", cl::Hidden, 106 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 107 "effect in release builds. (Note: this adds additional SCEV " 108 "queries potentially changing the analysis result)")); 109 110 static cl::opt<ReplaceExitVal> ReplaceExitValue( 111 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 112 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 113 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 114 clEnumValN(OnlyCheapRepl, "cheap", 115 "only replace exit value when the cost is cheap"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 struct RewritePhi; 141 142 class IndVarSimplify { 143 LoopInfo *LI; 144 ScalarEvolution *SE; 145 DominatorTree *DT; 146 const DataLayout &DL; 147 TargetLibraryInfo *TLI; 148 const TargetTransformInfo *TTI; 149 std::unique_ptr<MemorySSAUpdater> MSSAU; 150 151 SmallVector<WeakTrackingVH, 16> DeadInsts; 152 153 bool handleFloatingPointIV(Loop *L, PHINode *PH); 154 bool rewriteNonIntegerIVs(Loop *L); 155 156 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 157 /// Try to eliminate loop exits based on analyzeable exit counts 158 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 159 /// Try to form loop invariant tests for loop exits by changing how many 160 /// iterations of the loop run when that is unobservable. 161 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 162 163 bool rewriteFirstIterationLoopExitValues(Loop *L); 164 165 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 166 const SCEV *ExitCount, 167 PHINode *IndVar, SCEVExpander &Rewriter); 168 169 bool sinkUnusedInvariants(Loop *L); 170 171 public: 172 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 173 const DataLayout &DL, TargetLibraryInfo *TLI, 174 TargetTransformInfo *TTI, MemorySSA *MSSA) 175 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) { 176 if (MSSA) 177 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 178 } 179 180 bool run(Loop *L); 181 }; 182 183 } // end anonymous namespace 184 185 //===----------------------------------------------------------------------===// 186 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 187 //===----------------------------------------------------------------------===// 188 189 /// Convert APF to an integer, if possible. 190 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 191 bool isExact = false; 192 // See if we can convert this to an int64_t 193 uint64_t UIntVal; 194 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 195 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 196 !isExact) 197 return false; 198 IntVal = UIntVal; 199 return true; 200 } 201 202 /// If the loop has floating induction variable then insert corresponding 203 /// integer induction variable if possible. 204 /// For example, 205 /// for(double i = 0; i < 10000; ++i) 206 /// bar(i) 207 /// is converted into 208 /// for(int i = 0; i < 10000; ++i) 209 /// bar((double)i); 210 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 211 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 212 unsigned BackEdge = IncomingEdge^1; 213 214 // Check incoming value. 215 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 216 217 int64_t InitValue; 218 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 219 return false; 220 221 // Check IV increment. Reject this PN if increment operation is not 222 // an add or increment value can not be represented by an integer. 223 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 224 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 225 226 // If this is not an add of the PHI with a constantfp, or if the constant fp 227 // is not an integer, bail out. 228 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 229 int64_t IncValue; 230 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 231 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 232 return false; 233 234 // Check Incr uses. One user is PN and the other user is an exit condition 235 // used by the conditional terminator. 236 Value::user_iterator IncrUse = Incr->user_begin(); 237 Instruction *U1 = cast<Instruction>(*IncrUse++); 238 if (IncrUse == Incr->user_end()) return false; 239 Instruction *U2 = cast<Instruction>(*IncrUse++); 240 if (IncrUse != Incr->user_end()) return false; 241 242 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 243 // only used by a branch, we can't transform it. 244 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 245 if (!Compare) 246 Compare = dyn_cast<FCmpInst>(U2); 247 if (!Compare || !Compare->hasOneUse() || 248 !isa<BranchInst>(Compare->user_back())) 249 return false; 250 251 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 252 253 // We need to verify that the branch actually controls the iteration count 254 // of the loop. If not, the new IV can overflow and no one will notice. 255 // The branch block must be in the loop and one of the successors must be out 256 // of the loop. 257 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 258 if (!L->contains(TheBr->getParent()) || 259 (L->contains(TheBr->getSuccessor(0)) && 260 L->contains(TheBr->getSuccessor(1)))) 261 return false; 262 263 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 264 // transform it. 265 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 266 int64_t ExitValue; 267 if (ExitValueVal == nullptr || 268 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 269 return false; 270 271 // Find new predicate for integer comparison. 272 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 273 switch (Compare->getPredicate()) { 274 default: return false; // Unknown comparison. 275 case CmpInst::FCMP_OEQ: 276 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 277 case CmpInst::FCMP_ONE: 278 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 279 case CmpInst::FCMP_OGT: 280 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 281 case CmpInst::FCMP_OGE: 282 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 283 case CmpInst::FCMP_OLT: 284 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 285 case CmpInst::FCMP_OLE: 286 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 287 } 288 289 // We convert the floating point induction variable to a signed i32 value if 290 // we can. This is only safe if the comparison will not overflow in a way 291 // that won't be trapped by the integer equivalent operations. Check for this 292 // now. 293 // TODO: We could use i64 if it is native and the range requires it. 294 295 // The start/stride/exit values must all fit in signed i32. 296 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 297 return false; 298 299 // If not actually striding (add x, 0.0), avoid touching the code. 300 if (IncValue == 0) 301 return false; 302 303 // Positive and negative strides have different safety conditions. 304 if (IncValue > 0) { 305 // If we have a positive stride, we require the init to be less than the 306 // exit value. 307 if (InitValue >= ExitValue) 308 return false; 309 310 uint32_t Range = uint32_t(ExitValue-InitValue); 311 // Check for infinite loop, either: 312 // while (i <= Exit) or until (i > Exit) 313 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 314 if (++Range == 0) return false; // Range overflows. 315 } 316 317 unsigned Leftover = Range % uint32_t(IncValue); 318 319 // If this is an equality comparison, we require that the strided value 320 // exactly land on the exit value, otherwise the IV condition will wrap 321 // around and do things the fp IV wouldn't. 322 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 323 Leftover != 0) 324 return false; 325 326 // If the stride would wrap around the i32 before exiting, we can't 327 // transform the IV. 328 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 329 return false; 330 } else { 331 // If we have a negative stride, we require the init to be greater than the 332 // exit value. 333 if (InitValue <= ExitValue) 334 return false; 335 336 uint32_t Range = uint32_t(InitValue-ExitValue); 337 // Check for infinite loop, either: 338 // while (i >= Exit) or until (i < Exit) 339 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 340 if (++Range == 0) return false; // Range overflows. 341 } 342 343 unsigned Leftover = Range % uint32_t(-IncValue); 344 345 // If this is an equality comparison, we require that the strided value 346 // exactly land on the exit value, otherwise the IV condition will wrap 347 // around and do things the fp IV wouldn't. 348 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 349 Leftover != 0) 350 return false; 351 352 // If the stride would wrap around the i32 before exiting, we can't 353 // transform the IV. 354 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 355 return false; 356 } 357 358 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 359 360 // Insert new integer induction variable. 361 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 362 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 363 PN->getIncomingBlock(IncomingEdge)); 364 365 Value *NewAdd = 366 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 367 Incr->getName()+".int", Incr); 368 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 369 370 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 371 ConstantInt::get(Int32Ty, ExitValue), 372 Compare->getName()); 373 374 // In the following deletions, PN may become dead and may be deleted. 375 // Use a WeakTrackingVH to observe whether this happens. 376 WeakTrackingVH WeakPH = PN; 377 378 // Delete the old floating point exit comparison. The branch starts using the 379 // new comparison. 380 NewCompare->takeName(Compare); 381 Compare->replaceAllUsesWith(NewCompare); 382 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 383 384 // Delete the old floating point increment. 385 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 386 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 387 388 // If the FP induction variable still has uses, this is because something else 389 // in the loop uses its value. In order to canonicalize the induction 390 // variable, we chose to eliminate the IV and rewrite it in terms of an 391 // int->fp cast. 392 // 393 // We give preference to sitofp over uitofp because it is faster on most 394 // platforms. 395 if (WeakPH) { 396 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 397 &*PN->getParent()->getFirstInsertionPt()); 398 PN->replaceAllUsesWith(Conv); 399 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 400 } 401 return true; 402 } 403 404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 405 // First step. Check to see if there are any floating-point recurrences. 406 // If there are, change them into integer recurrences, permitting analysis by 407 // the SCEV routines. 408 BasicBlock *Header = L->getHeader(); 409 410 SmallVector<WeakTrackingVH, 8> PHIs; 411 for (PHINode &PN : Header->phis()) 412 PHIs.push_back(&PN); 413 414 bool Changed = false; 415 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 416 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 417 Changed |= handleFloatingPointIV(L, PN); 418 419 // If the loop previously had floating-point IV, ScalarEvolution 420 // may not have been able to compute a trip count. Now that we've done some 421 // re-writing, the trip count may be computable. 422 if (Changed) 423 SE->forgetLoop(L); 424 return Changed; 425 } 426 427 //===---------------------------------------------------------------------===// 428 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 429 // they will exit at the first iteration. 430 //===---------------------------------------------------------------------===// 431 432 /// Check to see if this loop has loop invariant conditions which lead to loop 433 /// exits. If so, we know that if the exit path is taken, it is at the first 434 /// loop iteration. This lets us predict exit values of PHI nodes that live in 435 /// loop header. 436 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 437 // Verify the input to the pass is already in LCSSA form. 438 assert(L->isLCSSAForm(*DT)); 439 440 SmallVector<BasicBlock *, 8> ExitBlocks; 441 L->getUniqueExitBlocks(ExitBlocks); 442 443 bool MadeAnyChanges = false; 444 for (auto *ExitBB : ExitBlocks) { 445 // If there are no more PHI nodes in this exit block, then no more 446 // values defined inside the loop are used on this path. 447 for (PHINode &PN : ExitBB->phis()) { 448 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 449 IncomingValIdx != E; ++IncomingValIdx) { 450 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 451 452 // Can we prove that the exit must run on the first iteration if it 453 // runs at all? (i.e. early exits are fine for our purposes, but 454 // traces which lead to this exit being taken on the 2nd iteration 455 // aren't.) Note that this is about whether the exit branch is 456 // executed, not about whether it is taken. 457 if (!L->getLoopLatch() || 458 !DT->dominates(IncomingBB, L->getLoopLatch())) 459 continue; 460 461 // Get condition that leads to the exit path. 462 auto *TermInst = IncomingBB->getTerminator(); 463 464 Value *Cond = nullptr; 465 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 466 // Must be a conditional branch, otherwise the block 467 // should not be in the loop. 468 Cond = BI->getCondition(); 469 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 470 Cond = SI->getCondition(); 471 else 472 continue; 473 474 if (!L->isLoopInvariant(Cond)) 475 continue; 476 477 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 478 479 // Only deal with PHIs in the loop header. 480 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 481 continue; 482 483 // If ExitVal is a PHI on the loop header, then we know its 484 // value along this exit because the exit can only be taken 485 // on the first iteration. 486 auto *LoopPreheader = L->getLoopPreheader(); 487 assert(LoopPreheader && "Invalid loop"); 488 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 489 if (PreheaderIdx != -1) { 490 assert(ExitVal->getParent() == L->getHeader() && 491 "ExitVal must be in loop header"); 492 MadeAnyChanges = true; 493 PN.setIncomingValue(IncomingValIdx, 494 ExitVal->getIncomingValue(PreheaderIdx)); 495 } 496 } 497 } 498 } 499 return MadeAnyChanges; 500 } 501 502 //===----------------------------------------------------------------------===// 503 // IV Widening - Extend the width of an IV to cover its widest uses. 504 //===----------------------------------------------------------------------===// 505 506 /// Update information about the induction variable that is extended by this 507 /// sign or zero extend operation. This is used to determine the final width of 508 /// the IV before actually widening it. 509 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 510 ScalarEvolution *SE, 511 const TargetTransformInfo *TTI) { 512 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 513 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 514 return; 515 516 Type *Ty = Cast->getType(); 517 uint64_t Width = SE->getTypeSizeInBits(Ty); 518 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 519 return; 520 521 // Check that `Cast` actually extends the induction variable (we rely on this 522 // later). This takes care of cases where `Cast` is extending a truncation of 523 // the narrow induction variable, and thus can end up being narrower than the 524 // "narrow" induction variable. 525 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 526 if (NarrowIVWidth >= Width) 527 return; 528 529 // Cast is either an sext or zext up to this point. 530 // We should not widen an indvar if arithmetics on the wider indvar are more 531 // expensive than those on the narrower indvar. We check only the cost of ADD 532 // because at least an ADD is required to increment the induction variable. We 533 // could compute more comprehensively the cost of all instructions on the 534 // induction variable when necessary. 535 if (TTI && 536 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 537 TTI->getArithmeticInstrCost(Instruction::Add, 538 Cast->getOperand(0)->getType())) { 539 return; 540 } 541 542 if (!WI.WidestNativeType) { 543 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 544 WI.IsSigned = IsSigned; 545 return; 546 } 547 548 // We extend the IV to satisfy the sign of its first user, arbitrarily. 549 if (WI.IsSigned != IsSigned) 550 return; 551 552 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 553 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 554 } 555 556 //===----------------------------------------------------------------------===// 557 // Live IV Reduction - Minimize IVs live across the loop. 558 //===----------------------------------------------------------------------===// 559 560 //===----------------------------------------------------------------------===// 561 // Simplification of IV users based on SCEV evaluation. 562 //===----------------------------------------------------------------------===// 563 564 namespace { 565 566 class IndVarSimplifyVisitor : public IVVisitor { 567 ScalarEvolution *SE; 568 const TargetTransformInfo *TTI; 569 PHINode *IVPhi; 570 571 public: 572 WideIVInfo WI; 573 574 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 575 const TargetTransformInfo *TTI, 576 const DominatorTree *DTree) 577 : SE(SCEV), TTI(TTI), IVPhi(IV) { 578 DT = DTree; 579 WI.NarrowIV = IVPhi; 580 } 581 582 // Implement the interface used by simplifyUsersOfIV. 583 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 584 }; 585 586 } // end anonymous namespace 587 588 /// Iteratively perform simplification on a worklist of IV users. Each 589 /// successive simplification may push more users which may themselves be 590 /// candidates for simplification. 591 /// 592 /// Sign/Zero extend elimination is interleaved with IV simplification. 593 bool IndVarSimplify::simplifyAndExtend(Loop *L, 594 SCEVExpander &Rewriter, 595 LoopInfo *LI) { 596 SmallVector<WideIVInfo, 8> WideIVs; 597 598 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 599 Intrinsic::getName(Intrinsic::experimental_guard)); 600 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 601 602 SmallVector<PHINode*, 8> LoopPhis; 603 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 604 LoopPhis.push_back(cast<PHINode>(I)); 605 } 606 // Each round of simplification iterates through the SimplifyIVUsers worklist 607 // for all current phis, then determines whether any IVs can be 608 // widened. Widening adds new phis to LoopPhis, inducing another round of 609 // simplification on the wide IVs. 610 bool Changed = false; 611 while (!LoopPhis.empty()) { 612 // Evaluate as many IV expressions as possible before widening any IVs. This 613 // forces SCEV to set no-wrap flags before evaluating sign/zero 614 // extension. The first time SCEV attempts to normalize sign/zero extension, 615 // the result becomes final. So for the most predictable results, we delay 616 // evaluation of sign/zero extend evaluation until needed, and avoid running 617 // other SCEV based analysis prior to simplifyAndExtend. 618 do { 619 PHINode *CurrIV = LoopPhis.pop_back_val(); 620 621 // Information about sign/zero extensions of CurrIV. 622 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 623 624 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 625 &Visitor); 626 627 if (Visitor.WI.WidestNativeType) { 628 WideIVs.push_back(Visitor.WI); 629 } 630 } while(!LoopPhis.empty()); 631 632 // Continue if we disallowed widening. 633 if (!AllowIVWidening) 634 continue; 635 636 for (; !WideIVs.empty(); WideIVs.pop_back()) { 637 unsigned ElimExt; 638 unsigned Widened; 639 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 640 DT, DeadInsts, ElimExt, Widened, 641 HasGuards, UsePostIncrementRanges)) { 642 NumElimExt += ElimExt; 643 NumWidened += Widened; 644 Changed = true; 645 LoopPhis.push_back(WidePhi); 646 } 647 } 648 } 649 return Changed; 650 } 651 652 //===----------------------------------------------------------------------===// 653 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 654 //===----------------------------------------------------------------------===// 655 656 /// Given an Value which is hoped to be part of an add recurance in the given 657 /// loop, return the associated Phi node if so. Otherwise, return null. Note 658 /// that this is less general than SCEVs AddRec checking. 659 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 660 Instruction *IncI = dyn_cast<Instruction>(IncV); 661 if (!IncI) 662 return nullptr; 663 664 switch (IncI->getOpcode()) { 665 case Instruction::Add: 666 case Instruction::Sub: 667 break; 668 case Instruction::GetElementPtr: 669 // An IV counter must preserve its type. 670 if (IncI->getNumOperands() == 2) 671 break; 672 LLVM_FALLTHROUGH; 673 default: 674 return nullptr; 675 } 676 677 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 678 if (Phi && Phi->getParent() == L->getHeader()) { 679 if (L->isLoopInvariant(IncI->getOperand(1))) 680 return Phi; 681 return nullptr; 682 } 683 if (IncI->getOpcode() == Instruction::GetElementPtr) 684 return nullptr; 685 686 // Allow add/sub to be commuted. 687 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 688 if (Phi && Phi->getParent() == L->getHeader()) { 689 if (L->isLoopInvariant(IncI->getOperand(0))) 690 return Phi; 691 } 692 return nullptr; 693 } 694 695 /// Whether the current loop exit test is based on this value. Currently this 696 /// is limited to a direct use in the loop condition. 697 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 698 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 699 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 700 // TODO: Allow non-icmp loop test. 701 if (!ICmp) 702 return false; 703 704 // TODO: Allow indirect use. 705 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 706 } 707 708 /// linearFunctionTestReplace policy. Return true unless we can show that the 709 /// current exit test is already sufficiently canonical. 710 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 711 assert(L->getLoopLatch() && "Must be in simplified form"); 712 713 // Avoid converting a constant or loop invariant test back to a runtime 714 // test. This is critical for when SCEV's cached ExitCount is less precise 715 // than the current IR (such as after we've proven a particular exit is 716 // actually dead and thus the BE count never reaches our ExitCount.) 717 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 718 if (L->isLoopInvariant(BI->getCondition())) 719 return false; 720 721 // Do LFTR to simplify the exit condition to an ICMP. 722 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 723 if (!Cond) 724 return true; 725 726 // Do LFTR to simplify the exit ICMP to EQ/NE 727 ICmpInst::Predicate Pred = Cond->getPredicate(); 728 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 729 return true; 730 731 // Look for a loop invariant RHS 732 Value *LHS = Cond->getOperand(0); 733 Value *RHS = Cond->getOperand(1); 734 if (!L->isLoopInvariant(RHS)) { 735 if (!L->isLoopInvariant(LHS)) 736 return true; 737 std::swap(LHS, RHS); 738 } 739 // Look for a simple IV counter LHS 740 PHINode *Phi = dyn_cast<PHINode>(LHS); 741 if (!Phi) 742 Phi = getLoopPhiForCounter(LHS, L); 743 744 if (!Phi) 745 return true; 746 747 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 748 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 749 if (Idx < 0) 750 return true; 751 752 // Do LFTR if the exit condition's IV is *not* a simple counter. 753 Value *IncV = Phi->getIncomingValue(Idx); 754 return Phi != getLoopPhiForCounter(IncV, L); 755 } 756 757 /// Return true if undefined behavior would provable be executed on the path to 758 /// OnPathTo if Root produced a posion result. Note that this doesn't say 759 /// anything about whether OnPathTo is actually executed or whether Root is 760 /// actually poison. This can be used to assess whether a new use of Root can 761 /// be added at a location which is control equivalent with OnPathTo (such as 762 /// immediately before it) without introducing UB which didn't previously 763 /// exist. Note that a false result conveys no information. 764 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 765 Instruction *OnPathTo, 766 DominatorTree *DT) { 767 // Basic approach is to assume Root is poison, propagate poison forward 768 // through all users we can easily track, and then check whether any of those 769 // users are provable UB and must execute before out exiting block might 770 // exit. 771 772 // The set of all recursive users we've visited (which are assumed to all be 773 // poison because of said visit) 774 SmallSet<const Value *, 16> KnownPoison; 775 SmallVector<const Instruction*, 16> Worklist; 776 Worklist.push_back(Root); 777 while (!Worklist.empty()) { 778 const Instruction *I = Worklist.pop_back_val(); 779 780 // If we know this must trigger UB on a path leading our target. 781 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 782 return true; 783 784 // If we can't analyze propagation through this instruction, just skip it 785 // and transitive users. Safe as false is a conservative result. 786 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 787 continue; 788 789 if (KnownPoison.insert(I).second) 790 for (const User *User : I->users()) 791 Worklist.push_back(cast<Instruction>(User)); 792 } 793 794 // Might be non-UB, or might have a path we couldn't prove must execute on 795 // way to exiting bb. 796 return false; 797 } 798 799 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 800 /// down to checking that all operands are constant and listing instructions 801 /// that may hide undef. 802 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 803 unsigned Depth) { 804 if (isa<Constant>(V)) 805 return !isa<UndefValue>(V); 806 807 if (Depth >= 6) 808 return false; 809 810 // Conservatively handle non-constant non-instructions. For example, Arguments 811 // may be undef. 812 Instruction *I = dyn_cast<Instruction>(V); 813 if (!I) 814 return false; 815 816 // Load and return values may be undef. 817 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 818 return false; 819 820 // Optimistically handle other instructions. 821 for (Value *Op : I->operands()) { 822 if (!Visited.insert(Op).second) 823 continue; 824 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 825 return false; 826 } 827 return true; 828 } 829 830 /// Return true if the given value is concrete. We must prove that undef can 831 /// never reach it. 832 /// 833 /// TODO: If we decide that this is a good approach to checking for undef, we 834 /// may factor it into a common location. 835 static bool hasConcreteDef(Value *V) { 836 SmallPtrSet<Value*, 8> Visited; 837 Visited.insert(V); 838 return hasConcreteDefImpl(V, Visited, 0); 839 } 840 841 /// Return true if this IV has any uses other than the (soon to be rewritten) 842 /// loop exit test. 843 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 844 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 845 Value *IncV = Phi->getIncomingValue(LatchIdx); 846 847 for (User *U : Phi->users()) 848 if (U != Cond && U != IncV) return false; 849 850 for (User *U : IncV->users()) 851 if (U != Cond && U != Phi) return false; 852 return true; 853 } 854 855 /// Return true if the given phi is a "counter" in L. A counter is an 856 /// add recurance (of integer or pointer type) with an arbitrary start, and a 857 /// step of 1. Note that L must have exactly one latch. 858 static bool isLoopCounter(PHINode* Phi, Loop *L, 859 ScalarEvolution *SE) { 860 assert(Phi->getParent() == L->getHeader()); 861 assert(L->getLoopLatch()); 862 863 if (!SE->isSCEVable(Phi->getType())) 864 return false; 865 866 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 867 if (!AR || AR->getLoop() != L || !AR->isAffine()) 868 return false; 869 870 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 871 if (!Step || !Step->isOne()) 872 return false; 873 874 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 875 Value *IncV = Phi->getIncomingValue(LatchIdx); 876 return (getLoopPhiForCounter(IncV, L) == Phi); 877 } 878 879 /// Search the loop header for a loop counter (anadd rec w/step of one) 880 /// suitable for use by LFTR. If multiple counters are available, select the 881 /// "best" one based profitable heuristics. 882 /// 883 /// BECount may be an i8* pointer type. The pointer difference is already 884 /// valid count without scaling the address stride, so it remains a pointer 885 /// expression as far as SCEV is concerned. 886 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 887 const SCEV *BECount, 888 ScalarEvolution *SE, DominatorTree *DT) { 889 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 890 891 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 892 893 // Loop over all of the PHI nodes, looking for a simple counter. 894 PHINode *BestPhi = nullptr; 895 const SCEV *BestInit = nullptr; 896 BasicBlock *LatchBlock = L->getLoopLatch(); 897 assert(LatchBlock && "Must be in simplified form"); 898 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 899 900 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 901 PHINode *Phi = cast<PHINode>(I); 902 if (!isLoopCounter(Phi, L, SE)) 903 continue; 904 905 // Avoid comparing an integer IV against a pointer Limit. 906 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 907 continue; 908 909 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 910 911 // AR may be a pointer type, while BECount is an integer type. 912 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 913 // AR may not be a narrower type, or we may never exit. 914 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 915 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 916 continue; 917 918 // Avoid reusing a potentially undef value to compute other values that may 919 // have originally had a concrete definition. 920 if (!hasConcreteDef(Phi)) { 921 // We explicitly allow unknown phis as long as they are already used by 922 // the loop exit test. This is legal since performing LFTR could not 923 // increase the number of undef users. 924 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 925 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 926 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 927 continue; 928 } 929 930 // Avoid introducing undefined behavior due to poison which didn't exist in 931 // the original program. (Annoyingly, the rules for poison and undef 932 // propagation are distinct, so this does NOT cover the undef case above.) 933 // We have to ensure that we don't introduce UB by introducing a use on an 934 // iteration where said IV produces poison. Our strategy here differs for 935 // pointers and integer IVs. For integers, we strip and reinfer as needed, 936 // see code in linearFunctionTestReplace. For pointers, we restrict 937 // transforms as there is no good way to reinfer inbounds once lost. 938 if (!Phi->getType()->isIntegerTy() && 939 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 940 continue; 941 942 const SCEV *Init = AR->getStart(); 943 944 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 945 // Don't force a live loop counter if another IV can be used. 946 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 947 continue; 948 949 // Prefer to count-from-zero. This is a more "canonical" counter form. It 950 // also prefers integer to pointer IVs. 951 if (BestInit->isZero() != Init->isZero()) { 952 if (BestInit->isZero()) 953 continue; 954 } 955 // If two IVs both count from zero or both count from nonzero then the 956 // narrower is likely a dead phi that has been widened. Use the wider phi 957 // to allow the other to be eliminated. 958 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 959 continue; 960 } 961 BestPhi = Phi; 962 BestInit = Init; 963 } 964 return BestPhi; 965 } 966 967 /// Insert an IR expression which computes the value held by the IV IndVar 968 /// (which must be an loop counter w/unit stride) after the backedge of loop L 969 /// is taken ExitCount times. 970 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 971 const SCEV *ExitCount, bool UsePostInc, Loop *L, 972 SCEVExpander &Rewriter, ScalarEvolution *SE) { 973 assert(isLoopCounter(IndVar, L, SE)); 974 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 975 const SCEV *IVInit = AR->getStart(); 976 977 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 978 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 979 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 980 // the existing GEPs whenever possible. 981 if (IndVar->getType()->isPointerTy() && 982 !ExitCount->getType()->isPointerTy()) { 983 // IVOffset will be the new GEP offset that is interpreted by GEP as a 984 // signed value. ExitCount on the other hand represents the loop trip count, 985 // which is an unsigned value. FindLoopCounter only allows induction 986 // variables that have a positive unit stride of one. This means we don't 987 // have to handle the case of negative offsets (yet) and just need to zero 988 // extend ExitCount. 989 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 990 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 991 if (UsePostInc) 992 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 993 994 // Expand the code for the iteration count. 995 assert(SE->isLoopInvariant(IVOffset, L) && 996 "Computed iteration count is not loop invariant!"); 997 998 // We could handle pointer IVs other than i8*, but we need to compensate for 999 // gep index scaling. 1000 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1001 cast<PointerType>(IndVar->getType()) 1002 ->getElementType())->isOne() && 1003 "unit stride pointer IV must be i8*"); 1004 1005 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1006 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1007 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1008 } else { 1009 // In any other case, convert both IVInit and ExitCount to integers before 1010 // comparing. This may result in SCEV expansion of pointers, but in practice 1011 // SCEV will fold the pointer arithmetic away as such: 1012 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1013 // 1014 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1015 // for simple memset-style loops. 1016 // 1017 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1018 // were generated on top of case #2, which is not expected. 1019 1020 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1021 // For unit stride, IVCount = Start + ExitCount with 2's complement 1022 // overflow. 1023 1024 // For integer IVs, truncate the IV before computing IVInit + BECount, 1025 // unless we know apriori that the limit must be a constant when evaluated 1026 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1027 // of the IV in the loop over a (potentially) expensive expansion of the 1028 // widened exit count add(zext(add)) expression. 1029 if (SE->getTypeSizeInBits(IVInit->getType()) 1030 > SE->getTypeSizeInBits(ExitCount->getType())) { 1031 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1032 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1033 else 1034 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1035 } 1036 1037 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1038 1039 if (UsePostInc) 1040 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1041 1042 // Expand the code for the iteration count. 1043 assert(SE->isLoopInvariant(IVLimit, L) && 1044 "Computed iteration count is not loop invariant!"); 1045 // Ensure that we generate the same type as IndVar, or a smaller integer 1046 // type. In the presence of null pointer values, we have an integer type 1047 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1048 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1049 IndVar->getType() : ExitCount->getType(); 1050 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1051 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1052 } 1053 } 1054 1055 /// This method rewrites the exit condition of the loop to be a canonical != 1056 /// comparison against the incremented loop induction variable. This pass is 1057 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1058 /// determine a loop-invariant trip count of the loop, which is actually a much 1059 /// broader range than just linear tests. 1060 bool IndVarSimplify:: 1061 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1062 const SCEV *ExitCount, 1063 PHINode *IndVar, SCEVExpander &Rewriter) { 1064 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1065 assert(isLoopCounter(IndVar, L, SE)); 1066 Instruction * const IncVar = 1067 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1068 1069 // Initialize CmpIndVar to the preincremented IV. 1070 Value *CmpIndVar = IndVar; 1071 bool UsePostInc = false; 1072 1073 // If the exiting block is the same as the backedge block, we prefer to 1074 // compare against the post-incremented value, otherwise we must compare 1075 // against the preincremented value. 1076 if (ExitingBB == L->getLoopLatch()) { 1077 // For pointer IVs, we chose to not strip inbounds which requires us not 1078 // to add a potentially UB introducing use. We need to either a) show 1079 // the loop test we're modifying is already in post-inc form, or b) show 1080 // that adding a use must not introduce UB. 1081 bool SafeToPostInc = 1082 IndVar->getType()->isIntegerTy() || 1083 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1084 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1085 if (SafeToPostInc) { 1086 UsePostInc = true; 1087 CmpIndVar = IncVar; 1088 } 1089 } 1090 1091 // It may be necessary to drop nowrap flags on the incrementing instruction 1092 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1093 // case the increment might have previously been poison on the last iteration 1094 // only) or if LFTR switches to a different IV that was previously dynamically 1095 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1096 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1097 // check), because the pre-inc addrec flags may be adopted from the original 1098 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1099 // TODO: This handling is inaccurate for one case: If we switch to a 1100 // dynamically dead IV that wraps on the first loop iteration only, which is 1101 // not covered by the post-inc addrec. (If the new IV was not dynamically 1102 // dead, it could not be poison on the first iteration in the first place.) 1103 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1104 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1105 if (BO->hasNoUnsignedWrap()) 1106 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1107 if (BO->hasNoSignedWrap()) 1108 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1109 } 1110 1111 Value *ExitCnt = genLoopLimit( 1112 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1113 assert(ExitCnt->getType()->isPointerTy() == 1114 IndVar->getType()->isPointerTy() && 1115 "genLoopLimit missed a cast"); 1116 1117 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1118 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1119 ICmpInst::Predicate P; 1120 if (L->contains(BI->getSuccessor(0))) 1121 P = ICmpInst::ICMP_NE; 1122 else 1123 P = ICmpInst::ICMP_EQ; 1124 1125 IRBuilder<> Builder(BI); 1126 1127 // The new loop exit condition should reuse the debug location of the 1128 // original loop exit condition. 1129 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1130 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1131 1132 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1133 // avoid the expensive expansion of the limit expression in the wider type, 1134 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1135 // since we know (from the exit count bitwidth), that we can't self-wrap in 1136 // the narrower type. 1137 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1138 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1139 if (CmpIndVarSize > ExitCntSize) { 1140 assert(!CmpIndVar->getType()->isPointerTy() && 1141 !ExitCnt->getType()->isPointerTy()); 1142 1143 // Before resorting to actually inserting the truncate, use the same 1144 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1145 // the other side of the comparison instead. We still evaluate the limit 1146 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1147 // a truncate within in. 1148 bool Extended = false; 1149 const SCEV *IV = SE->getSCEV(CmpIndVar); 1150 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1151 ExitCnt->getType()); 1152 const SCEV *ZExtTrunc = 1153 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1154 1155 if (ZExtTrunc == IV) { 1156 Extended = true; 1157 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1158 "wide.trip.count"); 1159 } else { 1160 const SCEV *SExtTrunc = 1161 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1162 if (SExtTrunc == IV) { 1163 Extended = true; 1164 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1165 "wide.trip.count"); 1166 } 1167 } 1168 1169 if (Extended) { 1170 bool Discard; 1171 L->makeLoopInvariant(ExitCnt, Discard); 1172 } else 1173 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1174 "lftr.wideiv"); 1175 } 1176 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1177 << " LHS:" << *CmpIndVar << '\n' 1178 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1179 << "\n" 1180 << " RHS:\t" << *ExitCnt << "\n" 1181 << "ExitCount:\t" << *ExitCount << "\n" 1182 << " was: " << *BI->getCondition() << "\n"); 1183 1184 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1185 Value *OrigCond = BI->getCondition(); 1186 // It's tempting to use replaceAllUsesWith here to fully replace the old 1187 // comparison, but that's not immediately safe, since users of the old 1188 // comparison may not be dominated by the new comparison. Instead, just 1189 // update the branch to use the new comparison; in the common case this 1190 // will make old comparison dead. 1191 BI->setCondition(Cond); 1192 DeadInsts.emplace_back(OrigCond); 1193 1194 ++NumLFTR; 1195 return true; 1196 } 1197 1198 //===----------------------------------------------------------------------===// 1199 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1200 //===----------------------------------------------------------------------===// 1201 1202 /// If there's a single exit block, sink any loop-invariant values that 1203 /// were defined in the preheader but not used inside the loop into the 1204 /// exit block to reduce register pressure in the loop. 1205 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1206 BasicBlock *ExitBlock = L->getExitBlock(); 1207 if (!ExitBlock) return false; 1208 1209 BasicBlock *Preheader = L->getLoopPreheader(); 1210 if (!Preheader) return false; 1211 1212 bool MadeAnyChanges = false; 1213 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1214 BasicBlock::iterator I(Preheader->getTerminator()); 1215 while (I != Preheader->begin()) { 1216 --I; 1217 // New instructions were inserted at the end of the preheader. 1218 if (isa<PHINode>(I)) 1219 break; 1220 1221 // Don't move instructions which might have side effects, since the side 1222 // effects need to complete before instructions inside the loop. Also don't 1223 // move instructions which might read memory, since the loop may modify 1224 // memory. Note that it's okay if the instruction might have undefined 1225 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1226 // block. 1227 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1228 continue; 1229 1230 // Skip debug info intrinsics. 1231 if (isa<DbgInfoIntrinsic>(I)) 1232 continue; 1233 1234 // Skip eh pad instructions. 1235 if (I->isEHPad()) 1236 continue; 1237 1238 // Don't sink alloca: we never want to sink static alloca's out of the 1239 // entry block, and correctly sinking dynamic alloca's requires 1240 // checks for stacksave/stackrestore intrinsics. 1241 // FIXME: Refactor this check somehow? 1242 if (isa<AllocaInst>(I)) 1243 continue; 1244 1245 // Determine if there is a use in or before the loop (direct or 1246 // otherwise). 1247 bool UsedInLoop = false; 1248 for (Use &U : I->uses()) { 1249 Instruction *User = cast<Instruction>(U.getUser()); 1250 BasicBlock *UseBB = User->getParent(); 1251 if (PHINode *P = dyn_cast<PHINode>(User)) { 1252 unsigned i = 1253 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1254 UseBB = P->getIncomingBlock(i); 1255 } 1256 if (UseBB == Preheader || L->contains(UseBB)) { 1257 UsedInLoop = true; 1258 break; 1259 } 1260 } 1261 1262 // If there is, the def must remain in the preheader. 1263 if (UsedInLoop) 1264 continue; 1265 1266 // Otherwise, sink it to the exit block. 1267 Instruction *ToMove = &*I; 1268 bool Done = false; 1269 1270 if (I != Preheader->begin()) { 1271 // Skip debug info intrinsics. 1272 do { 1273 --I; 1274 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 1275 1276 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 1277 Done = true; 1278 } else { 1279 Done = true; 1280 } 1281 1282 MadeAnyChanges = true; 1283 ToMove->moveBefore(*ExitBlock, InsertPt); 1284 if (Done) break; 1285 InsertPt = ToMove->getIterator(); 1286 } 1287 1288 return MadeAnyChanges; 1289 } 1290 1291 enum ExitCondAnalysisResult { 1292 CanBeRemoved, 1293 CannotOptimize 1294 }; 1295 1296 /// If the condition of BI is trivially true during at least first MaxIter 1297 /// iterations, return CanBeRemoved. 1298 /// Otherwise, return CannotOptimize. 1299 static ExitCondAnalysisResult analyzeCond(const Loop *L, BranchInst *BI, 1300 ScalarEvolution *SE, 1301 bool ProvingLoopExit, 1302 const SCEV *MaxIter) { 1303 ICmpInst::Predicate Pred; 1304 Value *LHS, *RHS; 1305 using namespace PatternMatch; 1306 BasicBlock *TrueSucc, *FalseSucc; 1307 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1308 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1309 return CannotOptimize; 1310 1311 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1312 "Not a loop exit!"); 1313 1314 // 'LHS pred RHS' should now mean that we stay in loop. 1315 if (L->contains(FalseSucc)) 1316 Pred = CmpInst::getInversePredicate(Pred); 1317 1318 // If we are proving loop exit, invert the predicate. 1319 if (ProvingLoopExit) 1320 Pred = CmpInst::getInversePredicate(Pred); 1321 1322 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1323 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1324 // Can we prove it to be trivially true? 1325 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) 1326 return CanBeRemoved; 1327 1328 if (ProvingLoopExit) 1329 return CannotOptimize; 1330 1331 ICmpInst::Predicate InvariantPred; 1332 const SCEV *InvariantLHS, *InvariantRHS; 1333 1334 // Check if there is a loop-invariant predicate equivalent to our check. 1335 if (!SE->isLoopInvariantExitCondDuringFirstIterations( 1336 Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS, 1337 InvariantRHS)) 1338 return CannotOptimize; 1339 1340 // Can we prove it to be trivially true? 1341 if (SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI)) 1342 return CanBeRemoved; 1343 return CannotOptimize; 1344 } 1345 1346 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1347 SmallVector<BasicBlock*, 16> ExitingBlocks; 1348 L->getExitingBlocks(ExitingBlocks); 1349 1350 // Remove all exits which aren't both rewriteable and execute on every 1351 // iteration. 1352 auto NewEnd = llvm::remove_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1353 // If our exitting block exits multiple loops, we can only rewrite the 1354 // innermost one. Otherwise, we're changing how many times the innermost 1355 // loop runs before it exits. 1356 if (LI->getLoopFor(ExitingBB) != L) 1357 return true; 1358 1359 // Can't rewrite non-branch yet. 1360 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1361 if (!BI) 1362 return true; 1363 1364 // If already constant, nothing to do. 1365 if (isa<Constant>(BI->getCondition())) 1366 return true; 1367 1368 // Likewise, the loop latch must be dominated by the exiting BB. 1369 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1370 return true; 1371 1372 return false; 1373 }); 1374 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 1375 1376 if (ExitingBlocks.empty()) 1377 return false; 1378 1379 // Get a symbolic upper bound on the loop backedge taken count. 1380 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1381 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1382 return false; 1383 1384 // Visit our exit blocks in order of dominance. We know from the fact that 1385 // all exits must dominate the latch, so there is a total dominance order 1386 // between them. 1387 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1388 // std::sort sorts in ascending order, so we want the inverse of 1389 // the normal dominance relation. 1390 if (A == B) return false; 1391 if (DT->properlyDominates(A, B)) 1392 return true; 1393 else { 1394 assert(DT->properlyDominates(B, A) && 1395 "expected total dominance order!"); 1396 return false; 1397 } 1398 }); 1399 #ifdef ASSERT 1400 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1401 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1402 } 1403 #endif 1404 1405 auto ReplaceExitCond = [&](BranchInst *BI, Value *NewCond) { 1406 auto *OldCond = BI->getCondition(); 1407 BI->setCondition(NewCond); 1408 if (OldCond->use_empty()) 1409 DeadInsts.emplace_back(OldCond); 1410 }; 1411 1412 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 1413 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1414 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1415 auto *OldCond = BI->getCondition(); 1416 auto *NewCond = ConstantInt::get(OldCond->getType(), 1417 IsTaken ? ExitIfTrue : !ExitIfTrue); 1418 ReplaceExitCond(BI, NewCond); 1419 }; 1420 1421 bool Changed = false; 1422 bool SkipLastIter = false; 1423 SmallSet<const SCEV*, 8> DominatingExitCounts; 1424 for (BasicBlock *ExitingBB : ExitingBlocks) { 1425 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1426 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1427 // Okay, we do not know the exit count here. Can we at least prove that it 1428 // will remain the same within iteration space? 1429 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1430 auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit]( 1431 bool Inverted, bool SkipLastIter) { 1432 const SCEV *MaxIter = MaxExitCount; 1433 if (SkipLastIter) { 1434 const SCEV *One = SE->getOne(MaxIter->getType()); 1435 MaxIter = SE->getMinusSCEV(MaxIter, One); 1436 } 1437 switch (analyzeCond(L, BI, SE, Inverted, MaxIter)) { 1438 case CanBeRemoved: 1439 FoldExit(ExitingBB, Inverted); 1440 return true; 1441 case CannotOptimize: 1442 return false; 1443 } 1444 llvm_unreachable("Unknown case!"); 1445 }; 1446 1447 // TODO: We might have proved that we can skip the last iteration for 1448 // this check. In this case, we only want to check the condition on the 1449 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1450 // corner case: 1451 // 1452 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1453 // 1454 // If we could not prove that len != 0, then we also could not prove that 1455 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1456 // OptimizeCond will likely not prove anything for it, even if it could 1457 // prove the same fact for len. 1458 // 1459 // As a temporary solution, we query both last and pre-last iterations in 1460 // hope that we will be able to prove triviality for at least one of 1461 // them. We can stop querying MaxExitCount for this case once SCEV 1462 // understands that (MaxExitCount - 1) will not overflow here. 1463 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1464 Changed = true; 1465 else if (SkipLastIter) 1466 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1467 Changed = true; 1468 continue; 1469 } 1470 1471 if (MaxExitCount == ExitCount) 1472 // If the loop has more than 1 iteration, all further checks will be 1473 // executed 1 iteration less. 1474 SkipLastIter = true; 1475 1476 // If we know we'd exit on the first iteration, rewrite the exit to 1477 // reflect this. This does not imply the loop must exit through this 1478 // exit; there may be an earlier one taken on the first iteration. 1479 // TODO: Given we know the backedge can't be taken, we should go ahead 1480 // and break it. Or at least, kill all the header phis and simplify. 1481 if (ExitCount->isZero()) { 1482 FoldExit(ExitingBB, true); 1483 Changed = true; 1484 continue; 1485 } 1486 1487 // If we end up with a pointer exit count, bail. Note that we can end up 1488 // with a pointer exit count for one exiting block, and not for another in 1489 // the same loop. 1490 if (!ExitCount->getType()->isIntegerTy() || 1491 !MaxExitCount->getType()->isIntegerTy()) 1492 continue; 1493 1494 Type *WiderType = 1495 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1496 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1497 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1498 assert(MaxExitCount->getType() == ExitCount->getType()); 1499 1500 // Can we prove that some other exit must be taken strictly before this 1501 // one? 1502 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1503 MaxExitCount, ExitCount)) { 1504 FoldExit(ExitingBB, false); 1505 Changed = true; 1506 continue; 1507 } 1508 1509 // As we run, keep track of which exit counts we've encountered. If we 1510 // find a duplicate, we've found an exit which would have exited on the 1511 // exiting iteration, but (from the visit order) strictly follows another 1512 // which does the same and is thus dead. 1513 if (!DominatingExitCounts.insert(ExitCount).second) { 1514 FoldExit(ExitingBB, false); 1515 Changed = true; 1516 continue; 1517 } 1518 1519 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1520 // here. If we kept track of the min of dominanting exits so far, we could 1521 // discharge exits with EC >= MDEC. This is less powerful than the existing 1522 // transform (since later exits aren't considered), but potentially more 1523 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1524 // or a >u b. Such a case is not currently known. 1525 } 1526 return Changed; 1527 } 1528 1529 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1530 SmallVector<BasicBlock*, 16> ExitingBlocks; 1531 L->getExitingBlocks(ExitingBlocks); 1532 1533 // Finally, see if we can rewrite our exit conditions into a loop invariant 1534 // form. If we have a read-only loop, and we can tell that we must exit down 1535 // a path which does not need any of the values computed within the loop, we 1536 // can rewrite the loop to exit on the first iteration. Note that this 1537 // doesn't either a) tell us the loop exits on the first iteration (unless 1538 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1539 // This transformation looks a lot like a restricted form of dead loop 1540 // elimination, but restricted to read-only loops and without neccesssarily 1541 // needing to kill the loop entirely. 1542 if (!LoopPredication) 1543 return false; 1544 1545 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 1546 return false; 1547 1548 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1549 // through *explicit* control flow. We have to eliminate the possibility of 1550 // implicit exits (see below) before we know it's truly exact. 1551 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1552 if (isa<SCEVCouldNotCompute>(ExactBTC) || 1553 !SE->isLoopInvariant(ExactBTC, L) || 1554 !isSafeToExpand(ExactBTC, *SE)) 1555 return false; 1556 1557 // If we end up with a pointer exit count, bail. It may be unsized. 1558 if (!ExactBTC->getType()->isIntegerTy()) 1559 return false; 1560 1561 auto BadExit = [&](BasicBlock *ExitingBB) { 1562 // If our exiting block exits multiple loops, we can only rewrite the 1563 // innermost one. Otherwise, we're changing how many times the innermost 1564 // loop runs before it exits. 1565 if (LI->getLoopFor(ExitingBB) != L) 1566 return true; 1567 1568 // Can't rewrite non-branch yet. 1569 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1570 if (!BI) 1571 return true; 1572 1573 // If already constant, nothing to do. 1574 if (isa<Constant>(BI->getCondition())) 1575 return true; 1576 1577 // If the exit block has phis, we need to be able to compute the values 1578 // within the loop which contains them. This assumes trivially lcssa phis 1579 // have already been removed; TODO: generalize 1580 BasicBlock *ExitBlock = 1581 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1582 if (!ExitBlock->phis().empty()) 1583 return true; 1584 1585 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1586 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 1587 if (!SE->isLoopInvariant(ExitCount, L) || 1588 !isSafeToExpand(ExitCount, *SE)) 1589 return true; 1590 1591 // If we end up with a pointer exit count, bail. It may be unsized. 1592 if (!ExitCount->getType()->isIntegerTy()) 1593 return true; 1594 1595 return false; 1596 }; 1597 1598 // If we have any exits which can't be predicated themselves, than we can't 1599 // predicate any exit which isn't guaranteed to execute before it. Consider 1600 // two exits (a) and (b) which would both exit on the same iteration. If we 1601 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1602 // we could convert a loop from exiting through (a) to one exiting through 1603 // (b). Note that this problem exists only for exits with the same exit 1604 // count, and we could be more aggressive when exit counts are known inequal. 1605 llvm::sort(ExitingBlocks, 1606 [&](BasicBlock *A, BasicBlock *B) { 1607 // std::sort sorts in ascending order, so we want the inverse of 1608 // the normal dominance relation, plus a tie breaker for blocks 1609 // unordered by dominance. 1610 if (DT->properlyDominates(A, B)) return true; 1611 if (DT->properlyDominates(B, A)) return false; 1612 return A->getName() < B->getName(); 1613 }); 1614 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1615 // exits before the backedge). If they aren't, reasoning about reachability 1616 // is complicated and we choose not to for now. 1617 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1618 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1619 return false; 1620 1621 // Given our sorted total order, we know that exit[j] must be evaluated 1622 // after all exit[i] such j > i. 1623 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1624 if (BadExit(ExitingBlocks[i])) { 1625 ExitingBlocks.resize(i); 1626 break; 1627 } 1628 1629 if (ExitingBlocks.empty()) 1630 return false; 1631 1632 // We rely on not being able to reach an exiting block on a later iteration 1633 // then it's statically compute exit count. The implementaton of 1634 // getExitCount currently has this invariant, but assert it here so that 1635 // breakage is obvious if this ever changes.. 1636 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1637 return DT->dominates(ExitingBB, L->getLoopLatch()); 1638 })); 1639 1640 // At this point, ExitingBlocks consists of only those blocks which are 1641 // predicatable. Given that, we know we have at least one exit we can 1642 // predicate if the loop is doesn't have side effects and doesn't have any 1643 // implicit exits (because then our exact BTC isn't actually exact). 1644 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1645 // suggestions on how to improve this? I can obviously bail out for outer 1646 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1647 // is that enough for *all* side effects? 1648 for (BasicBlock *BB : L->blocks()) 1649 for (auto &I : *BB) 1650 // TODO:isGuaranteedToTransfer 1651 if (I.mayHaveSideEffects() || I.mayThrow()) 1652 return false; 1653 1654 bool Changed = false; 1655 // Finally, do the actual predication for all predicatable blocks. A couple 1656 // of notes here: 1657 // 1) We don't bother to constant fold dominated exits with identical exit 1658 // counts; that's simply a form of CSE/equality propagation and we leave 1659 // it for dedicated passes. 1660 // 2) We insert the comparison at the branch. Hoisting introduces additional 1661 // legality constraints and we leave that to dedicated logic. We want to 1662 // predicate even if we can't insert a loop invariant expression as 1663 // peeling or unrolling will likely reduce the cost of the otherwise loop 1664 // varying check. 1665 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1666 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1667 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1668 for (BasicBlock *ExitingBB : ExitingBlocks) { 1669 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1670 1671 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1672 Value *NewCond; 1673 if (ExitCount == ExactBTC) { 1674 NewCond = L->contains(BI->getSuccessor(0)) ? 1675 B.getFalse() : B.getTrue(); 1676 } else { 1677 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1678 if (!ExactBTCV) 1679 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1680 Value *RHS = ExactBTCV; 1681 if (ECV->getType() != RHS->getType()) { 1682 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1683 ECV = B.CreateZExt(ECV, WiderTy); 1684 RHS = B.CreateZExt(RHS, WiderTy); 1685 } 1686 auto Pred = L->contains(BI->getSuccessor(0)) ? 1687 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1688 NewCond = B.CreateICmp(Pred, ECV, RHS); 1689 } 1690 Value *OldCond = BI->getCondition(); 1691 BI->setCondition(NewCond); 1692 if (OldCond->use_empty()) 1693 DeadInsts.emplace_back(OldCond); 1694 Changed = true; 1695 } 1696 1697 return Changed; 1698 } 1699 1700 //===----------------------------------------------------------------------===// 1701 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1702 //===----------------------------------------------------------------------===// 1703 1704 bool IndVarSimplify::run(Loop *L) { 1705 // We need (and expect!) the incoming loop to be in LCSSA. 1706 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1707 "LCSSA required to run indvars!"); 1708 1709 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1710 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1711 // canonicalization can be a pessimization without LSR to "clean up" 1712 // afterwards. 1713 // - We depend on having a preheader; in particular, 1714 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1715 // and we're in trouble if we can't find the induction variable even when 1716 // we've manually inserted one. 1717 // - LFTR relies on having a single backedge. 1718 if (!L->isLoopSimplifyForm()) 1719 return false; 1720 1721 #ifndef NDEBUG 1722 // Used below for a consistency check only 1723 // Note: Since the result returned by ScalarEvolution may depend on the order 1724 // in which previous results are added to its cache, the call to 1725 // getBackedgeTakenCount() may change following SCEV queries. 1726 const SCEV *BackedgeTakenCount; 1727 if (VerifyIndvars) 1728 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1729 #endif 1730 1731 bool Changed = false; 1732 // If there are any floating-point recurrences, attempt to 1733 // transform them to use integer recurrences. 1734 Changed |= rewriteNonIntegerIVs(L); 1735 1736 // Create a rewriter object which we'll use to transform the code with. 1737 SCEVExpander Rewriter(*SE, DL, "indvars"); 1738 #ifndef NDEBUG 1739 Rewriter.setDebugType(DEBUG_TYPE); 1740 #endif 1741 1742 // Eliminate redundant IV users. 1743 // 1744 // Simplification works best when run before other consumers of SCEV. We 1745 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1746 // other expressions involving loop IVs have been evaluated. This helps SCEV 1747 // set no-wrap flags before normalizing sign/zero extension. 1748 Rewriter.disableCanonicalMode(); 1749 Changed |= simplifyAndExtend(L, Rewriter, LI); 1750 1751 // Check to see if we can compute the final value of any expressions 1752 // that are recurrent in the loop, and substitute the exit values from the 1753 // loop into any instructions outside of the loop that use the final values 1754 // of the current expressions. 1755 if (ReplaceExitValue != NeverRepl) { 1756 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1757 ReplaceExitValue, DeadInsts)) { 1758 NumReplaced += Rewrites; 1759 Changed = true; 1760 } 1761 } 1762 1763 // Eliminate redundant IV cycles. 1764 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 1765 1766 // Try to eliminate loop exits based on analyzeable exit counts 1767 if (optimizeLoopExits(L, Rewriter)) { 1768 Changed = true; 1769 // Given we've changed exit counts, notify SCEV 1770 SE->forgetLoop(L); 1771 } 1772 1773 // Try to form loop invariant tests for loop exits by changing how many 1774 // iterations of the loop run when that is unobservable. 1775 if (predicateLoopExits(L, Rewriter)) { 1776 Changed = true; 1777 // Given we've changed exit counts, notify SCEV 1778 SE->forgetLoop(L); 1779 } 1780 1781 // If we have a trip count expression, rewrite the loop's exit condition 1782 // using it. 1783 if (!DisableLFTR) { 1784 BasicBlock *PreHeader = L->getLoopPreheader(); 1785 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 1786 1787 SmallVector<BasicBlock*, 16> ExitingBlocks; 1788 L->getExitingBlocks(ExitingBlocks); 1789 for (BasicBlock *ExitingBB : ExitingBlocks) { 1790 // Can't rewrite non-branch yet. 1791 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1792 continue; 1793 1794 // If our exitting block exits multiple loops, we can only rewrite the 1795 // innermost one. Otherwise, we're changing how many times the innermost 1796 // loop runs before it exits. 1797 if (LI->getLoopFor(ExitingBB) != L) 1798 continue; 1799 1800 if (!needsLFTR(L, ExitingBB)) 1801 continue; 1802 1803 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1804 if (isa<SCEVCouldNotCompute>(ExitCount)) 1805 continue; 1806 1807 // This was handled above, but as we form SCEVs, we can sometimes refine 1808 // existing ones; this allows exit counts to be folded to zero which 1809 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1810 // until stable to handle cases like this better. 1811 if (ExitCount->isZero()) 1812 continue; 1813 1814 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1815 if (!IndVar) 1816 continue; 1817 1818 // Avoid high cost expansions. Note: This heuristic is questionable in 1819 // that our definition of "high cost" is not exactly principled. 1820 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1821 TTI, PreHeaderBR)) 1822 continue; 1823 1824 // Check preconditions for proper SCEVExpander operation. SCEV does not 1825 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1826 // any pass that uses the SCEVExpander must do it. This does not work 1827 // well for loop passes because SCEVExpander makes assumptions about 1828 // all loops, while LoopPassManager only forces the current loop to be 1829 // simplified. 1830 // 1831 // FIXME: SCEV expansion has no way to bail out, so the caller must 1832 // explicitly check any assumptions made by SCEV. Brittle. 1833 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 1834 if (!AR || AR->getLoop()->getLoopPreheader()) 1835 Changed |= linearFunctionTestReplace(L, ExitingBB, 1836 ExitCount, IndVar, 1837 Rewriter); 1838 } 1839 } 1840 // Clear the rewriter cache, because values that are in the rewriter's cache 1841 // can be deleted in the loop below, causing the AssertingVH in the cache to 1842 // trigger. 1843 Rewriter.clear(); 1844 1845 // Now that we're done iterating through lists, clean up any instructions 1846 // which are now dead. 1847 while (!DeadInsts.empty()) 1848 if (Instruction *Inst = 1849 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 1850 Changed |= 1851 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 1852 1853 // The Rewriter may not be used from this point on. 1854 1855 // Loop-invariant instructions in the preheader that aren't used in the 1856 // loop may be sunk below the loop to reduce register pressure. 1857 Changed |= sinkUnusedInvariants(L); 1858 1859 // rewriteFirstIterationLoopExitValues does not rely on the computation of 1860 // trip count and therefore can further simplify exit values in addition to 1861 // rewriteLoopExitValues. 1862 Changed |= rewriteFirstIterationLoopExitValues(L); 1863 1864 // Clean up dead instructions. 1865 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 1866 1867 // Check a post-condition. 1868 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1869 "Indvars did not preserve LCSSA!"); 1870 1871 // Verify that LFTR, and any other change have not interfered with SCEV's 1872 // ability to compute trip count. We may have *changed* the exit count, but 1873 // only by reducing it. 1874 #ifndef NDEBUG 1875 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 1876 SE->forgetLoop(L); 1877 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 1878 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 1879 SE->getTypeSizeInBits(NewBECount->getType())) 1880 NewBECount = SE->getTruncateOrNoop(NewBECount, 1881 BackedgeTakenCount->getType()); 1882 else 1883 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 1884 NewBECount->getType()); 1885 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 1886 NewBECount) && "indvars must preserve SCEV"); 1887 } 1888 if (VerifyMemorySSA && MSSAU) 1889 MSSAU->getMemorySSA()->verifyMemorySSA(); 1890 #endif 1891 1892 return Changed; 1893 } 1894 1895 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 1896 LoopStandardAnalysisResults &AR, 1897 LPMUpdater &) { 1898 Function *F = L.getHeader()->getParent(); 1899 const DataLayout &DL = F->getParent()->getDataLayout(); 1900 1901 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA); 1902 if (!IVS.run(&L)) 1903 return PreservedAnalyses::all(); 1904 1905 auto PA = getLoopPassPreservedAnalyses(); 1906 PA.preserveSet<CFGAnalyses>(); 1907 if (AR.MSSA) 1908 PA.preserve<MemorySSAAnalysis>(); 1909 return PA; 1910 } 1911 1912 namespace { 1913 1914 struct IndVarSimplifyLegacyPass : public LoopPass { 1915 static char ID; // Pass identification, replacement for typeid 1916 1917 IndVarSimplifyLegacyPass() : LoopPass(ID) { 1918 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 1919 } 1920 1921 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 1922 if (skipLoop(L)) 1923 return false; 1924 1925 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1926 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1927 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1928 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 1929 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 1930 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 1931 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 1932 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1933 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 1934 MemorySSA *MSSA = nullptr; 1935 if (MSSAAnalysis) 1936 MSSA = &MSSAAnalysis->getMSSA(); 1937 1938 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA); 1939 return IVS.run(L); 1940 } 1941 1942 void getAnalysisUsage(AnalysisUsage &AU) const override { 1943 AU.setPreservesCFG(); 1944 AU.addPreserved<MemorySSAWrapperPass>(); 1945 getLoopAnalysisUsage(AU); 1946 } 1947 }; 1948 1949 } // end anonymous namespace 1950 1951 char IndVarSimplifyLegacyPass::ID = 0; 1952 1953 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 1954 "Induction Variable Simplification", false, false) 1955 INITIALIZE_PASS_DEPENDENCY(LoopPass) 1956 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 1957 "Induction Variable Simplification", false, false) 1958 1959 Pass *llvm::createIndVarSimplifyPass() { 1960 return new IndVarSimplifyLegacyPass(); 1961 } 1962