1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/InitializePasses.h" 68 #include "llvm/Pass.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 77 #include "llvm/Transforms/Utils/Local.h" 78 #include "llvm/Transforms/Utils/LoopUtils.h" 79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 80 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 81 #include <cassert> 82 #include <cstdint> 83 #include <utility> 84 85 using namespace llvm; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 // Trip count verification can be enabled by default under NDEBUG if we 97 // implement a strong expression equivalence checker in SCEV. Until then, we 98 // use the verify-indvars flag, which may assert in some cases. 99 static cl::opt<bool> VerifyIndvars( 100 "verify-indvars", cl::Hidden, 101 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 102 "effect in release builds. (Note: this adds additional SCEV " 103 "queries potentially changing the analysis result)")); 104 105 static cl::opt<ReplaceExitVal> ReplaceExitValue( 106 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 107 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 108 cl::values( 109 clEnumValN(NeverRepl, "never", "never replace exit value"), 110 clEnumValN(OnlyCheapRepl, "cheap", 111 "only replace exit value when the cost is cheap"), 112 clEnumValN( 113 UnusedIndVarInLoop, "unusedindvarinloop", 114 "only replace exit value when it is an unused " 115 "induction variable in the loop and has cheap replacement cost"), 116 clEnumValN(NoHardUse, "noharduse", 117 "only replace exit values when loop def likely dead"), 118 clEnumValN(AlwaysRepl, "always", 119 "always replace exit value whenever possible"))); 120 121 static cl::opt<bool> UsePostIncrementRanges( 122 "indvars-post-increment-ranges", cl::Hidden, 123 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 124 cl::init(true)); 125 126 static cl::opt<bool> 127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 128 cl::desc("Disable Linear Function Test Replace optimization")); 129 130 static cl::opt<bool> 131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 132 cl::desc("Predicate conditions in read only loops")); 133 134 static cl::opt<bool> 135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 136 cl::desc("Allow widening of indvars to eliminate s/zext")); 137 138 namespace { 139 140 class IndVarSimplify { 141 LoopInfo *LI; 142 ScalarEvolution *SE; 143 DominatorTree *DT; 144 const DataLayout &DL; 145 TargetLibraryInfo *TLI; 146 const TargetTransformInfo *TTI; 147 std::unique_ptr<MemorySSAUpdater> MSSAU; 148 149 SmallVector<WeakTrackingVH, 16> DeadInsts; 150 bool WidenIndVars; 151 152 bool handleFloatingPointIV(Loop *L, PHINode *PH); 153 bool rewriteNonIntegerIVs(Loop *L); 154 155 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 156 /// Try to improve our exit conditions by converting condition from signed 157 /// to unsigned or rotating computation out of the loop. 158 /// (See inline comment about why this is duplicated from simplifyAndExtend) 159 bool canonicalizeExitCondition(Loop *L); 160 /// Try to eliminate loop exits based on analyzeable exit counts 161 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 162 /// Try to form loop invariant tests for loop exits by changing how many 163 /// iterations of the loop run when that is unobservable. 164 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 165 166 bool rewriteFirstIterationLoopExitValues(Loop *L); 167 168 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 169 const SCEV *ExitCount, 170 PHINode *IndVar, SCEVExpander &Rewriter); 171 172 bool sinkUnusedInvariants(Loop *L); 173 174 public: 175 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 176 const DataLayout &DL, TargetLibraryInfo *TLI, 177 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 178 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 179 WidenIndVars(WidenIndVars) { 180 if (MSSA) 181 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 182 } 183 184 bool run(Loop *L); 185 }; 186 187 } // end anonymous namespace 188 189 //===----------------------------------------------------------------------===// 190 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 191 //===----------------------------------------------------------------------===// 192 193 /// Convert APF to an integer, if possible. 194 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 195 bool isExact = false; 196 // See if we can convert this to an int64_t 197 uint64_t UIntVal; 198 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 199 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 200 !isExact) 201 return false; 202 IntVal = UIntVal; 203 return true; 204 } 205 206 /// If the loop has floating induction variable then insert corresponding 207 /// integer induction variable if possible. 208 /// For example, 209 /// for(double i = 0; i < 10000; ++i) 210 /// bar(i) 211 /// is converted into 212 /// for(int i = 0; i < 10000; ++i) 213 /// bar((double)i); 214 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 215 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 216 unsigned BackEdge = IncomingEdge^1; 217 218 // Check incoming value. 219 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 220 221 int64_t InitValue; 222 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 223 return false; 224 225 // Check IV increment. Reject this PN if increment operation is not 226 // an add or increment value can not be represented by an integer. 227 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 228 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 229 230 // If this is not an add of the PHI with a constantfp, or if the constant fp 231 // is not an integer, bail out. 232 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 233 int64_t IncValue; 234 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 235 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 236 return false; 237 238 // Check Incr uses. One user is PN and the other user is an exit condition 239 // used by the conditional terminator. 240 Value::user_iterator IncrUse = Incr->user_begin(); 241 Instruction *U1 = cast<Instruction>(*IncrUse++); 242 if (IncrUse == Incr->user_end()) return false; 243 Instruction *U2 = cast<Instruction>(*IncrUse++); 244 if (IncrUse != Incr->user_end()) return false; 245 246 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 247 // only used by a branch, we can't transform it. 248 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 249 if (!Compare) 250 Compare = dyn_cast<FCmpInst>(U2); 251 if (!Compare || !Compare->hasOneUse() || 252 !isa<BranchInst>(Compare->user_back())) 253 return false; 254 255 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 256 257 // We need to verify that the branch actually controls the iteration count 258 // of the loop. If not, the new IV can overflow and no one will notice. 259 // The branch block must be in the loop and one of the successors must be out 260 // of the loop. 261 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 262 if (!L->contains(TheBr->getParent()) || 263 (L->contains(TheBr->getSuccessor(0)) && 264 L->contains(TheBr->getSuccessor(1)))) 265 return false; 266 267 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 268 // transform it. 269 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 270 int64_t ExitValue; 271 if (ExitValueVal == nullptr || 272 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 273 return false; 274 275 // Find new predicate for integer comparison. 276 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 277 switch (Compare->getPredicate()) { 278 default: return false; // Unknown comparison. 279 case CmpInst::FCMP_OEQ: 280 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 281 case CmpInst::FCMP_ONE: 282 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 283 case CmpInst::FCMP_OGT: 284 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 285 case CmpInst::FCMP_OGE: 286 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 287 case CmpInst::FCMP_OLT: 288 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 289 case CmpInst::FCMP_OLE: 290 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 291 } 292 293 // We convert the floating point induction variable to a signed i32 value if 294 // we can. This is only safe if the comparison will not overflow in a way 295 // that won't be trapped by the integer equivalent operations. Check for this 296 // now. 297 // TODO: We could use i64 if it is native and the range requires it. 298 299 // The start/stride/exit values must all fit in signed i32. 300 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 301 return false; 302 303 // If not actually striding (add x, 0.0), avoid touching the code. 304 if (IncValue == 0) 305 return false; 306 307 // Positive and negative strides have different safety conditions. 308 if (IncValue > 0) { 309 // If we have a positive stride, we require the init to be less than the 310 // exit value. 311 if (InitValue >= ExitValue) 312 return false; 313 314 uint32_t Range = uint32_t(ExitValue-InitValue); 315 // Check for infinite loop, either: 316 // while (i <= Exit) or until (i > Exit) 317 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 318 if (++Range == 0) return false; // Range overflows. 319 } 320 321 unsigned Leftover = Range % uint32_t(IncValue); 322 323 // If this is an equality comparison, we require that the strided value 324 // exactly land on the exit value, otherwise the IV condition will wrap 325 // around and do things the fp IV wouldn't. 326 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 327 Leftover != 0) 328 return false; 329 330 // If the stride would wrap around the i32 before exiting, we can't 331 // transform the IV. 332 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 333 return false; 334 } else { 335 // If we have a negative stride, we require the init to be greater than the 336 // exit value. 337 if (InitValue <= ExitValue) 338 return false; 339 340 uint32_t Range = uint32_t(InitValue-ExitValue); 341 // Check for infinite loop, either: 342 // while (i >= Exit) or until (i < Exit) 343 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 344 if (++Range == 0) return false; // Range overflows. 345 } 346 347 unsigned Leftover = Range % uint32_t(-IncValue); 348 349 // If this is an equality comparison, we require that the strided value 350 // exactly land on the exit value, otherwise the IV condition will wrap 351 // around and do things the fp IV wouldn't. 352 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 353 Leftover != 0) 354 return false; 355 356 // If the stride would wrap around the i32 before exiting, we can't 357 // transform the IV. 358 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 359 return false; 360 } 361 362 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 363 364 // Insert new integer induction variable. 365 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 366 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 367 PN->getIncomingBlock(IncomingEdge)); 368 369 Value *NewAdd = 370 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 371 Incr->getName()+".int", Incr); 372 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 373 374 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 375 ConstantInt::get(Int32Ty, ExitValue), 376 Compare->getName()); 377 378 // In the following deletions, PN may become dead and may be deleted. 379 // Use a WeakTrackingVH to observe whether this happens. 380 WeakTrackingVH WeakPH = PN; 381 382 // Delete the old floating point exit comparison. The branch starts using the 383 // new comparison. 384 NewCompare->takeName(Compare); 385 Compare->replaceAllUsesWith(NewCompare); 386 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 387 388 // Delete the old floating point increment. 389 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 390 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 391 392 // If the FP induction variable still has uses, this is because something else 393 // in the loop uses its value. In order to canonicalize the induction 394 // variable, we chose to eliminate the IV and rewrite it in terms of an 395 // int->fp cast. 396 // 397 // We give preference to sitofp over uitofp because it is faster on most 398 // platforms. 399 if (WeakPH) { 400 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 401 &*PN->getParent()->getFirstInsertionPt()); 402 PN->replaceAllUsesWith(Conv); 403 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 404 } 405 return true; 406 } 407 408 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 409 // First step. Check to see if there are any floating-point recurrences. 410 // If there are, change them into integer recurrences, permitting analysis by 411 // the SCEV routines. 412 BasicBlock *Header = L->getHeader(); 413 414 SmallVector<WeakTrackingVH, 8> PHIs; 415 for (PHINode &PN : Header->phis()) 416 PHIs.push_back(&PN); 417 418 bool Changed = false; 419 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 420 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 421 Changed |= handleFloatingPointIV(L, PN); 422 423 // If the loop previously had floating-point IV, ScalarEvolution 424 // may not have been able to compute a trip count. Now that we've done some 425 // re-writing, the trip count may be computable. 426 if (Changed) 427 SE->forgetLoop(L); 428 return Changed; 429 } 430 431 //===---------------------------------------------------------------------===// 432 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 433 // they will exit at the first iteration. 434 //===---------------------------------------------------------------------===// 435 436 /// Check to see if this loop has loop invariant conditions which lead to loop 437 /// exits. If so, we know that if the exit path is taken, it is at the first 438 /// loop iteration. This lets us predict exit values of PHI nodes that live in 439 /// loop header. 440 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 441 // Verify the input to the pass is already in LCSSA form. 442 assert(L->isLCSSAForm(*DT)); 443 444 SmallVector<BasicBlock *, 8> ExitBlocks; 445 L->getUniqueExitBlocks(ExitBlocks); 446 447 bool MadeAnyChanges = false; 448 for (auto *ExitBB : ExitBlocks) { 449 // If there are no more PHI nodes in this exit block, then no more 450 // values defined inside the loop are used on this path. 451 for (PHINode &PN : ExitBB->phis()) { 452 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 453 IncomingValIdx != E; ++IncomingValIdx) { 454 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 455 456 // Can we prove that the exit must run on the first iteration if it 457 // runs at all? (i.e. early exits are fine for our purposes, but 458 // traces which lead to this exit being taken on the 2nd iteration 459 // aren't.) Note that this is about whether the exit branch is 460 // executed, not about whether it is taken. 461 if (!L->getLoopLatch() || 462 !DT->dominates(IncomingBB, L->getLoopLatch())) 463 continue; 464 465 // Get condition that leads to the exit path. 466 auto *TermInst = IncomingBB->getTerminator(); 467 468 Value *Cond = nullptr; 469 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 470 // Must be a conditional branch, otherwise the block 471 // should not be in the loop. 472 Cond = BI->getCondition(); 473 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 474 Cond = SI->getCondition(); 475 else 476 continue; 477 478 if (!L->isLoopInvariant(Cond)) 479 continue; 480 481 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 482 483 // Only deal with PHIs in the loop header. 484 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 485 continue; 486 487 // If ExitVal is a PHI on the loop header, then we know its 488 // value along this exit because the exit can only be taken 489 // on the first iteration. 490 auto *LoopPreheader = L->getLoopPreheader(); 491 assert(LoopPreheader && "Invalid loop"); 492 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 493 if (PreheaderIdx != -1) { 494 assert(ExitVal->getParent() == L->getHeader() && 495 "ExitVal must be in loop header"); 496 MadeAnyChanges = true; 497 PN.setIncomingValue(IncomingValIdx, 498 ExitVal->getIncomingValue(PreheaderIdx)); 499 SE->forgetValue(&PN); 500 } 501 } 502 } 503 } 504 return MadeAnyChanges; 505 } 506 507 //===----------------------------------------------------------------------===// 508 // IV Widening - Extend the width of an IV to cover its widest uses. 509 //===----------------------------------------------------------------------===// 510 511 /// Update information about the induction variable that is extended by this 512 /// sign or zero extend operation. This is used to determine the final width of 513 /// the IV before actually widening it. 514 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 515 ScalarEvolution *SE, 516 const TargetTransformInfo *TTI) { 517 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 518 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 519 return; 520 521 Type *Ty = Cast->getType(); 522 uint64_t Width = SE->getTypeSizeInBits(Ty); 523 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 524 return; 525 526 // Check that `Cast` actually extends the induction variable (we rely on this 527 // later). This takes care of cases where `Cast` is extending a truncation of 528 // the narrow induction variable, and thus can end up being narrower than the 529 // "narrow" induction variable. 530 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 531 if (NarrowIVWidth >= Width) 532 return; 533 534 // Cast is either an sext or zext up to this point. 535 // We should not widen an indvar if arithmetics on the wider indvar are more 536 // expensive than those on the narrower indvar. We check only the cost of ADD 537 // because at least an ADD is required to increment the induction variable. We 538 // could compute more comprehensively the cost of all instructions on the 539 // induction variable when necessary. 540 if (TTI && 541 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 542 TTI->getArithmeticInstrCost(Instruction::Add, 543 Cast->getOperand(0)->getType())) { 544 return; 545 } 546 547 if (!WI.WidestNativeType || 548 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 549 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 550 WI.IsSigned = IsSigned; 551 return; 552 } 553 554 // We extend the IV to satisfy the sign of its user(s), or 'signed' 555 // if there are multiple users with both sign- and zero extensions, 556 // in order not to introduce nondeterministic behaviour based on the 557 // unspecified order of a PHI nodes' users-iterator. 558 WI.IsSigned |= IsSigned; 559 } 560 561 //===----------------------------------------------------------------------===// 562 // Live IV Reduction - Minimize IVs live across the loop. 563 //===----------------------------------------------------------------------===// 564 565 //===----------------------------------------------------------------------===// 566 // Simplification of IV users based on SCEV evaluation. 567 //===----------------------------------------------------------------------===// 568 569 namespace { 570 571 class IndVarSimplifyVisitor : public IVVisitor { 572 ScalarEvolution *SE; 573 const TargetTransformInfo *TTI; 574 PHINode *IVPhi; 575 576 public: 577 WideIVInfo WI; 578 579 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 580 const TargetTransformInfo *TTI, 581 const DominatorTree *DTree) 582 : SE(SCEV), TTI(TTI), IVPhi(IV) { 583 DT = DTree; 584 WI.NarrowIV = IVPhi; 585 } 586 587 // Implement the interface used by simplifyUsersOfIV. 588 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 589 }; 590 591 } // end anonymous namespace 592 593 /// Iteratively perform simplification on a worklist of IV users. Each 594 /// successive simplification may push more users which may themselves be 595 /// candidates for simplification. 596 /// 597 /// Sign/Zero extend elimination is interleaved with IV simplification. 598 bool IndVarSimplify::simplifyAndExtend(Loop *L, 599 SCEVExpander &Rewriter, 600 LoopInfo *LI) { 601 SmallVector<WideIVInfo, 8> WideIVs; 602 603 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 604 Intrinsic::getName(Intrinsic::experimental_guard)); 605 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 606 607 SmallVector<PHINode *, 8> LoopPhis; 608 for (PHINode &PN : L->getHeader()->phis()) 609 LoopPhis.push_back(&PN); 610 611 // Each round of simplification iterates through the SimplifyIVUsers worklist 612 // for all current phis, then determines whether any IVs can be 613 // widened. Widening adds new phis to LoopPhis, inducing another round of 614 // simplification on the wide IVs. 615 bool Changed = false; 616 while (!LoopPhis.empty()) { 617 // Evaluate as many IV expressions as possible before widening any IVs. This 618 // forces SCEV to set no-wrap flags before evaluating sign/zero 619 // extension. The first time SCEV attempts to normalize sign/zero extension, 620 // the result becomes final. So for the most predictable results, we delay 621 // evaluation of sign/zero extend evaluation until needed, and avoid running 622 // other SCEV based analysis prior to simplifyAndExtend. 623 do { 624 PHINode *CurrIV = LoopPhis.pop_back_val(); 625 626 // Information about sign/zero extensions of CurrIV. 627 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 628 629 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 630 &Visitor); 631 632 if (Visitor.WI.WidestNativeType) { 633 WideIVs.push_back(Visitor.WI); 634 } 635 } while(!LoopPhis.empty()); 636 637 // Continue if we disallowed widening. 638 if (!WidenIndVars) 639 continue; 640 641 for (; !WideIVs.empty(); WideIVs.pop_back()) { 642 unsigned ElimExt; 643 unsigned Widened; 644 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 645 DT, DeadInsts, ElimExt, Widened, 646 HasGuards, UsePostIncrementRanges)) { 647 NumElimExt += ElimExt; 648 NumWidened += Widened; 649 Changed = true; 650 LoopPhis.push_back(WidePhi); 651 } 652 } 653 } 654 return Changed; 655 } 656 657 //===----------------------------------------------------------------------===// 658 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 659 //===----------------------------------------------------------------------===// 660 661 /// Given an Value which is hoped to be part of an add recurance in the given 662 /// loop, return the associated Phi node if so. Otherwise, return null. Note 663 /// that this is less general than SCEVs AddRec checking. 664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 665 Instruction *IncI = dyn_cast<Instruction>(IncV); 666 if (!IncI) 667 return nullptr; 668 669 switch (IncI->getOpcode()) { 670 case Instruction::Add: 671 case Instruction::Sub: 672 break; 673 case Instruction::GetElementPtr: 674 // An IV counter must preserve its type. 675 if (IncI->getNumOperands() == 2) 676 break; 677 [[fallthrough]]; 678 default: 679 return nullptr; 680 } 681 682 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 683 if (Phi && Phi->getParent() == L->getHeader()) { 684 if (L->isLoopInvariant(IncI->getOperand(1))) 685 return Phi; 686 return nullptr; 687 } 688 if (IncI->getOpcode() == Instruction::GetElementPtr) 689 return nullptr; 690 691 // Allow add/sub to be commuted. 692 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 693 if (Phi && Phi->getParent() == L->getHeader()) { 694 if (L->isLoopInvariant(IncI->getOperand(0))) 695 return Phi; 696 } 697 return nullptr; 698 } 699 700 /// Whether the current loop exit test is based on this value. Currently this 701 /// is limited to a direct use in the loop condition. 702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 703 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 704 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 705 // TODO: Allow non-icmp loop test. 706 if (!ICmp) 707 return false; 708 709 // TODO: Allow indirect use. 710 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 711 } 712 713 /// linearFunctionTestReplace policy. Return true unless we can show that the 714 /// current exit test is already sufficiently canonical. 715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 716 assert(L->getLoopLatch() && "Must be in simplified form"); 717 718 // Avoid converting a constant or loop invariant test back to a runtime 719 // test. This is critical for when SCEV's cached ExitCount is less precise 720 // than the current IR (such as after we've proven a particular exit is 721 // actually dead and thus the BE count never reaches our ExitCount.) 722 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 723 if (L->isLoopInvariant(BI->getCondition())) 724 return false; 725 726 // Do LFTR to simplify the exit condition to an ICMP. 727 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 728 if (!Cond) 729 return true; 730 731 // Do LFTR to simplify the exit ICMP to EQ/NE 732 ICmpInst::Predicate Pred = Cond->getPredicate(); 733 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 734 return true; 735 736 // Look for a loop invariant RHS 737 Value *LHS = Cond->getOperand(0); 738 Value *RHS = Cond->getOperand(1); 739 if (!L->isLoopInvariant(RHS)) { 740 if (!L->isLoopInvariant(LHS)) 741 return true; 742 std::swap(LHS, RHS); 743 } 744 // Look for a simple IV counter LHS 745 PHINode *Phi = dyn_cast<PHINode>(LHS); 746 if (!Phi) 747 Phi = getLoopPhiForCounter(LHS, L); 748 749 if (!Phi) 750 return true; 751 752 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 753 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 754 if (Idx < 0) 755 return true; 756 757 // Do LFTR if the exit condition's IV is *not* a simple counter. 758 Value *IncV = Phi->getIncomingValue(Idx); 759 return Phi != getLoopPhiForCounter(IncV, L); 760 } 761 762 /// Return true if undefined behavior would provable be executed on the path to 763 /// OnPathTo if Root produced a posion result. Note that this doesn't say 764 /// anything about whether OnPathTo is actually executed or whether Root is 765 /// actually poison. This can be used to assess whether a new use of Root can 766 /// be added at a location which is control equivalent with OnPathTo (such as 767 /// immediately before it) without introducing UB which didn't previously 768 /// exist. Note that a false result conveys no information. 769 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 770 Instruction *OnPathTo, 771 DominatorTree *DT) { 772 // Basic approach is to assume Root is poison, propagate poison forward 773 // through all users we can easily track, and then check whether any of those 774 // users are provable UB and must execute before out exiting block might 775 // exit. 776 777 // The set of all recursive users we've visited (which are assumed to all be 778 // poison because of said visit) 779 SmallSet<const Value *, 16> KnownPoison; 780 SmallVector<const Instruction*, 16> Worklist; 781 Worklist.push_back(Root); 782 while (!Worklist.empty()) { 783 const Instruction *I = Worklist.pop_back_val(); 784 785 // If we know this must trigger UB on a path leading our target. 786 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 787 return true; 788 789 // If we can't analyze propagation through this instruction, just skip it 790 // and transitive users. Safe as false is a conservative result. 791 if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { 792 return KnownPoison.contains(U) && propagatesPoison(U); 793 })) 794 continue; 795 796 if (KnownPoison.insert(I).second) 797 for (const User *User : I->users()) 798 Worklist.push_back(cast<Instruction>(User)); 799 } 800 801 // Might be non-UB, or might have a path we couldn't prove must execute on 802 // way to exiting bb. 803 return false; 804 } 805 806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 807 /// down to checking that all operands are constant and listing instructions 808 /// that may hide undef. 809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 810 unsigned Depth) { 811 if (isa<Constant>(V)) 812 return !isa<UndefValue>(V); 813 814 if (Depth >= 6) 815 return false; 816 817 // Conservatively handle non-constant non-instructions. For example, Arguments 818 // may be undef. 819 Instruction *I = dyn_cast<Instruction>(V); 820 if (!I) 821 return false; 822 823 // Load and return values may be undef. 824 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 825 return false; 826 827 // Optimistically handle other instructions. 828 for (Value *Op : I->operands()) { 829 if (!Visited.insert(Op).second) 830 continue; 831 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 832 return false; 833 } 834 return true; 835 } 836 837 /// Return true if the given value is concrete. We must prove that undef can 838 /// never reach it. 839 /// 840 /// TODO: If we decide that this is a good approach to checking for undef, we 841 /// may factor it into a common location. 842 static bool hasConcreteDef(Value *V) { 843 SmallPtrSet<Value*, 8> Visited; 844 Visited.insert(V); 845 return hasConcreteDefImpl(V, Visited, 0); 846 } 847 848 /// Return true if this IV has any uses other than the (soon to be rewritten) 849 /// loop exit test. 850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 851 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 852 Value *IncV = Phi->getIncomingValue(LatchIdx); 853 854 for (User *U : Phi->users()) 855 if (U != Cond && U != IncV) return false; 856 857 for (User *U : IncV->users()) 858 if (U != Cond && U != Phi) return false; 859 return true; 860 } 861 862 /// Return true if the given phi is a "counter" in L. A counter is an 863 /// add recurance (of integer or pointer type) with an arbitrary start, and a 864 /// step of 1. Note that L must have exactly one latch. 865 static bool isLoopCounter(PHINode* Phi, Loop *L, 866 ScalarEvolution *SE) { 867 assert(Phi->getParent() == L->getHeader()); 868 assert(L->getLoopLatch()); 869 870 if (!SE->isSCEVable(Phi->getType())) 871 return false; 872 873 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 874 if (!AR || AR->getLoop() != L || !AR->isAffine()) 875 return false; 876 877 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 878 if (!Step || !Step->isOne()) 879 return false; 880 881 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 882 Value *IncV = Phi->getIncomingValue(LatchIdx); 883 return (getLoopPhiForCounter(IncV, L) == Phi && 884 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 885 } 886 887 /// Search the loop header for a loop counter (anadd rec w/step of one) 888 /// suitable for use by LFTR. If multiple counters are available, select the 889 /// "best" one based profitable heuristics. 890 /// 891 /// BECount may be an i8* pointer type. The pointer difference is already 892 /// valid count without scaling the address stride, so it remains a pointer 893 /// expression as far as SCEV is concerned. 894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 895 const SCEV *BECount, 896 ScalarEvolution *SE, DominatorTree *DT) { 897 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 898 899 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 900 901 // Loop over all of the PHI nodes, looking for a simple counter. 902 PHINode *BestPhi = nullptr; 903 const SCEV *BestInit = nullptr; 904 BasicBlock *LatchBlock = L->getLoopLatch(); 905 assert(LatchBlock && "Must be in simplified form"); 906 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 907 908 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 909 PHINode *Phi = cast<PHINode>(I); 910 if (!isLoopCounter(Phi, L, SE)) 911 continue; 912 913 // Avoid comparing an integer IV against a pointer Limit. 914 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 915 continue; 916 917 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 918 919 // AR may be a pointer type, while BECount is an integer type. 920 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 921 // AR may not be a narrower type, or we may never exit. 922 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 923 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 924 continue; 925 926 // Avoid reusing a potentially undef value to compute other values that may 927 // have originally had a concrete definition. 928 if (!hasConcreteDef(Phi)) { 929 // We explicitly allow unknown phis as long as they are already used by 930 // the loop exit test. This is legal since performing LFTR could not 931 // increase the number of undef users. 932 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 933 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 934 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 935 continue; 936 } 937 938 // Avoid introducing undefined behavior due to poison which didn't exist in 939 // the original program. (Annoyingly, the rules for poison and undef 940 // propagation are distinct, so this does NOT cover the undef case above.) 941 // We have to ensure that we don't introduce UB by introducing a use on an 942 // iteration where said IV produces poison. Our strategy here differs for 943 // pointers and integer IVs. For integers, we strip and reinfer as needed, 944 // see code in linearFunctionTestReplace. For pointers, we restrict 945 // transforms as there is no good way to reinfer inbounds once lost. 946 if (!Phi->getType()->isIntegerTy() && 947 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 948 continue; 949 950 const SCEV *Init = AR->getStart(); 951 952 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 953 // Don't force a live loop counter if another IV can be used. 954 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 955 continue; 956 957 // Prefer to count-from-zero. This is a more "canonical" counter form. It 958 // also prefers integer to pointer IVs. 959 if (BestInit->isZero() != Init->isZero()) { 960 if (BestInit->isZero()) 961 continue; 962 } 963 // If two IVs both count from zero or both count from nonzero then the 964 // narrower is likely a dead phi that has been widened. Use the wider phi 965 // to allow the other to be eliminated. 966 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 967 continue; 968 } 969 BestPhi = Phi; 970 BestInit = Init; 971 } 972 return BestPhi; 973 } 974 975 /// Insert an IR expression which computes the value held by the IV IndVar 976 /// (which must be an loop counter w/unit stride) after the backedge of loop L 977 /// is taken ExitCount times. 978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 979 const SCEV *ExitCount, bool UsePostInc, Loop *L, 980 SCEVExpander &Rewriter, ScalarEvolution *SE) { 981 assert(isLoopCounter(IndVar, L, SE)); 982 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 983 const SCEV *IVInit = AR->getStart(); 984 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 985 986 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 987 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 988 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 989 // the existing GEPs whenever possible. 990 if (IndVar->getType()->isPointerTy() && 991 !ExitCount->getType()->isPointerTy()) { 992 // IVOffset will be the new GEP offset that is interpreted by GEP as a 993 // signed value. ExitCount on the other hand represents the loop trip count, 994 // which is an unsigned value. FindLoopCounter only allows induction 995 // variables that have a positive unit stride of one. This means we don't 996 // have to handle the case of negative offsets (yet) and just need to zero 997 // extend ExitCount. 998 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 999 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 1000 if (UsePostInc) 1001 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1002 1003 // Expand the code for the iteration count. 1004 assert(SE->isLoopInvariant(IVOffset, L) && 1005 "Computed iteration count is not loop invariant!"); 1006 1007 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1008 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1009 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1010 } else { 1011 // In any other case, convert both IVInit and ExitCount to integers before 1012 // comparing. This may result in SCEV expansion of pointers, but in practice 1013 // SCEV will fold the pointer arithmetic away as such: 1014 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1015 // 1016 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1017 // for simple memset-style loops. 1018 // 1019 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1020 // were generated on top of case #2, which is not expected. 1021 1022 // For unit stride, IVCount = Start + ExitCount with 2's complement 1023 // overflow. 1024 1025 // For integer IVs, truncate the IV before computing IVInit + BECount, 1026 // unless we know apriori that the limit must be a constant when evaluated 1027 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1028 // of the IV in the loop over a (potentially) expensive expansion of the 1029 // widened exit count add(zext(add)) expression. 1030 if (SE->getTypeSizeInBits(IVInit->getType()) 1031 > SE->getTypeSizeInBits(ExitCount->getType())) { 1032 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1033 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1034 else 1035 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1036 } 1037 1038 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1039 1040 if (UsePostInc) 1041 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1042 1043 // Expand the code for the iteration count. 1044 assert(SE->isLoopInvariant(IVLimit, L) && 1045 "Computed iteration count is not loop invariant!"); 1046 // Ensure that we generate the same type as IndVar, or a smaller integer 1047 // type. In the presence of null pointer values, we have an integer type 1048 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1049 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1050 IndVar->getType() : ExitCount->getType(); 1051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1052 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1053 } 1054 } 1055 1056 /// This method rewrites the exit condition of the loop to be a canonical != 1057 /// comparison against the incremented loop induction variable. This pass is 1058 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1059 /// determine a loop-invariant trip count of the loop, which is actually a much 1060 /// broader range than just linear tests. 1061 bool IndVarSimplify:: 1062 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1063 const SCEV *ExitCount, 1064 PHINode *IndVar, SCEVExpander &Rewriter) { 1065 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1066 assert(isLoopCounter(IndVar, L, SE)); 1067 Instruction * const IncVar = 1068 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1069 1070 // Initialize CmpIndVar to the preincremented IV. 1071 Value *CmpIndVar = IndVar; 1072 bool UsePostInc = false; 1073 1074 // If the exiting block is the same as the backedge block, we prefer to 1075 // compare against the post-incremented value, otherwise we must compare 1076 // against the preincremented value. 1077 if (ExitingBB == L->getLoopLatch()) { 1078 // For pointer IVs, we chose to not strip inbounds which requires us not 1079 // to add a potentially UB introducing use. We need to either a) show 1080 // the loop test we're modifying is already in post-inc form, or b) show 1081 // that adding a use must not introduce UB. 1082 bool SafeToPostInc = 1083 IndVar->getType()->isIntegerTy() || 1084 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1085 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1086 if (SafeToPostInc) { 1087 UsePostInc = true; 1088 CmpIndVar = IncVar; 1089 } 1090 } 1091 1092 // It may be necessary to drop nowrap flags on the incrementing instruction 1093 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1094 // case the increment might have previously been poison on the last iteration 1095 // only) or if LFTR switches to a different IV that was previously dynamically 1096 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1097 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1098 // check), because the pre-inc addrec flags may be adopted from the original 1099 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1100 // TODO: This handling is inaccurate for one case: If we switch to a 1101 // dynamically dead IV that wraps on the first loop iteration only, which is 1102 // not covered by the post-inc addrec. (If the new IV was not dynamically 1103 // dead, it could not be poison on the first iteration in the first place.) 1104 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1105 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1106 if (BO->hasNoUnsignedWrap()) 1107 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1108 if (BO->hasNoSignedWrap()) 1109 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1110 } 1111 1112 Value *ExitCnt = genLoopLimit( 1113 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1114 assert(ExitCnt->getType()->isPointerTy() == 1115 IndVar->getType()->isPointerTy() && 1116 "genLoopLimit missed a cast"); 1117 1118 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1119 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1120 ICmpInst::Predicate P; 1121 if (L->contains(BI->getSuccessor(0))) 1122 P = ICmpInst::ICMP_NE; 1123 else 1124 P = ICmpInst::ICMP_EQ; 1125 1126 IRBuilder<> Builder(BI); 1127 1128 // The new loop exit condition should reuse the debug location of the 1129 // original loop exit condition. 1130 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1131 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1132 1133 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1134 // avoid the expensive expansion of the limit expression in the wider type, 1135 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1136 // since we know (from the exit count bitwidth), that we can't self-wrap in 1137 // the narrower type. 1138 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1139 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1140 if (CmpIndVarSize > ExitCntSize) { 1141 assert(!CmpIndVar->getType()->isPointerTy() && 1142 !ExitCnt->getType()->isPointerTy()); 1143 1144 // Before resorting to actually inserting the truncate, use the same 1145 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1146 // the other side of the comparison instead. We still evaluate the limit 1147 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1148 // a truncate within in. 1149 bool Extended = false; 1150 const SCEV *IV = SE->getSCEV(CmpIndVar); 1151 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1152 ExitCnt->getType()); 1153 const SCEV *ZExtTrunc = 1154 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1155 1156 if (ZExtTrunc == IV) { 1157 Extended = true; 1158 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1159 "wide.trip.count"); 1160 } else { 1161 const SCEV *SExtTrunc = 1162 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1163 if (SExtTrunc == IV) { 1164 Extended = true; 1165 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1166 "wide.trip.count"); 1167 } 1168 } 1169 1170 if (Extended) { 1171 bool Discard; 1172 L->makeLoopInvariant(ExitCnt, Discard); 1173 } else 1174 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1175 "lftr.wideiv"); 1176 } 1177 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1178 << " LHS:" << *CmpIndVar << '\n' 1179 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1180 << "\n" 1181 << " RHS:\t" << *ExitCnt << "\n" 1182 << "ExitCount:\t" << *ExitCount << "\n" 1183 << " was: " << *BI->getCondition() << "\n"); 1184 1185 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1186 Value *OrigCond = BI->getCondition(); 1187 // It's tempting to use replaceAllUsesWith here to fully replace the old 1188 // comparison, but that's not immediately safe, since users of the old 1189 // comparison may not be dominated by the new comparison. Instead, just 1190 // update the branch to use the new comparison; in the common case this 1191 // will make old comparison dead. 1192 BI->setCondition(Cond); 1193 DeadInsts.emplace_back(OrigCond); 1194 1195 ++NumLFTR; 1196 return true; 1197 } 1198 1199 //===----------------------------------------------------------------------===// 1200 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1201 //===----------------------------------------------------------------------===// 1202 1203 /// If there's a single exit block, sink any loop-invariant values that 1204 /// were defined in the preheader but not used inside the loop into the 1205 /// exit block to reduce register pressure in the loop. 1206 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1207 BasicBlock *ExitBlock = L->getExitBlock(); 1208 if (!ExitBlock) return false; 1209 1210 BasicBlock *Preheader = L->getLoopPreheader(); 1211 if (!Preheader) return false; 1212 1213 bool MadeAnyChanges = false; 1214 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1215 BasicBlock::iterator I(Preheader->getTerminator()); 1216 while (I != Preheader->begin()) { 1217 --I; 1218 // New instructions were inserted at the end of the preheader. 1219 if (isa<PHINode>(I)) 1220 break; 1221 1222 // Don't move instructions which might have side effects, since the side 1223 // effects need to complete before instructions inside the loop. Also don't 1224 // move instructions which might read memory, since the loop may modify 1225 // memory. Note that it's okay if the instruction might have undefined 1226 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1227 // block. 1228 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1229 continue; 1230 1231 // Skip debug info intrinsics. 1232 if (isa<DbgInfoIntrinsic>(I)) 1233 continue; 1234 1235 // Skip eh pad instructions. 1236 if (I->isEHPad()) 1237 continue; 1238 1239 // Don't sink alloca: we never want to sink static alloca's out of the 1240 // entry block, and correctly sinking dynamic alloca's requires 1241 // checks for stacksave/stackrestore intrinsics. 1242 // FIXME: Refactor this check somehow? 1243 if (isa<AllocaInst>(I)) 1244 continue; 1245 1246 // Determine if there is a use in or before the loop (direct or 1247 // otherwise). 1248 bool UsedInLoop = false; 1249 for (Use &U : I->uses()) { 1250 Instruction *User = cast<Instruction>(U.getUser()); 1251 BasicBlock *UseBB = User->getParent(); 1252 if (PHINode *P = dyn_cast<PHINode>(User)) { 1253 unsigned i = 1254 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1255 UseBB = P->getIncomingBlock(i); 1256 } 1257 if (UseBB == Preheader || L->contains(UseBB)) { 1258 UsedInLoop = true; 1259 break; 1260 } 1261 } 1262 1263 // If there is, the def must remain in the preheader. 1264 if (UsedInLoop) 1265 continue; 1266 1267 // Otherwise, sink it to the exit block. 1268 Instruction *ToMove = &*I; 1269 bool Done = false; 1270 1271 if (I != Preheader->begin()) { 1272 // Skip debug info intrinsics. 1273 do { 1274 --I; 1275 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1276 1277 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1278 Done = true; 1279 } else { 1280 Done = true; 1281 } 1282 1283 MadeAnyChanges = true; 1284 ToMove->moveBefore(*ExitBlock, InsertPt); 1285 SE->forgetValue(ToMove); 1286 if (Done) break; 1287 InsertPt = ToMove->getIterator(); 1288 } 1289 1290 return MadeAnyChanges; 1291 } 1292 1293 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1294 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1295 auto *OldCond = BI->getCondition(); 1296 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1297 << " with " << *NewCond << "\n"); 1298 BI->setCondition(NewCond); 1299 if (OldCond->use_empty()) 1300 DeadInsts.emplace_back(OldCond); 1301 } 1302 1303 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1304 bool IsTaken) { 1305 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1306 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1307 auto *OldCond = BI->getCondition(); 1308 return ConstantInt::get(OldCond->getType(), 1309 IsTaken ? ExitIfTrue : !ExitIfTrue); 1310 } 1311 1312 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1313 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1314 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1315 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1316 replaceExitCond(BI, NewCond, DeadInsts); 1317 } 1318 1319 static void replaceLoopPHINodesWithPreheaderValues( 1320 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1321 ScalarEvolution &SE) { 1322 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1323 auto *LoopPreheader = L->getLoopPreheader(); 1324 auto *LoopHeader = L->getHeader(); 1325 SmallVector<Instruction *> Worklist; 1326 for (auto &PN : LoopHeader->phis()) { 1327 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1328 for (User *U : PN.users()) 1329 Worklist.push_back(cast<Instruction>(U)); 1330 SE.forgetValue(&PN); 1331 PN.replaceAllUsesWith(PreheaderIncoming); 1332 DeadInsts.emplace_back(&PN); 1333 } 1334 1335 // Replacing with the preheader value will often allow IV users to simplify 1336 // (especially if the preheader value is a constant). 1337 SmallPtrSet<Instruction *, 16> Visited; 1338 while (!Worklist.empty()) { 1339 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1340 if (!Visited.insert(I).second) 1341 continue; 1342 1343 // Don't simplify instructions outside the loop. 1344 if (!L->contains(I)) 1345 continue; 1346 1347 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1348 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1349 for (User *U : I->users()) 1350 Worklist.push_back(cast<Instruction>(U)); 1351 I->replaceAllUsesWith(Res); 1352 DeadInsts.emplace_back(I); 1353 } 1354 } 1355 } 1356 1357 static Value * 1358 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1359 const ScalarEvolution::LoopInvariantPredicate &LIP, 1360 SCEVExpander &Rewriter) { 1361 ICmpInst::Predicate InvariantPred = LIP.Pred; 1362 BasicBlock *Preheader = L->getLoopPreheader(); 1363 assert(Preheader && "Preheader doesn't exist"); 1364 Rewriter.setInsertPoint(Preheader->getTerminator()); 1365 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1366 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1367 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1368 if (ExitIfTrue) 1369 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1370 IRBuilder<> Builder(Preheader->getTerminator()); 1371 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1372 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1373 BI->getCondition()->getName()); 1374 } 1375 1376 static std::optional<Value *> 1377 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1378 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1379 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1380 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1381 Value *LHS = ICmp->getOperand(0); 1382 Value *RHS = ICmp->getOperand(1); 1383 1384 // 'LHS pred RHS' should now mean that we stay in loop. 1385 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1386 if (Inverted) 1387 Pred = CmpInst::getInversePredicate(Pred); 1388 1389 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1390 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1391 // Can we prove it to be trivially true or false? 1392 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1393 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1394 1395 auto *ARTy = LHSS->getType(); 1396 auto *MaxIterTy = MaxIter->getType(); 1397 // If possible, adjust types. 1398 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1399 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1400 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1401 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1402 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1403 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1404 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1405 } 1406 1407 if (SkipLastIter) { 1408 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1409 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1410 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1411 // So we manually construct umin(a - 1, b - 1). 1412 SmallVector<const SCEV *, 4> Elements; 1413 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1414 for (auto *Op : UMin->operands()) 1415 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1416 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1417 } else 1418 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1419 } 1420 1421 // Check if there is a loop-invariant predicate equivalent to our check. 1422 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1423 L, BI, MaxIter); 1424 if (!LIP) 1425 return std::nullopt; 1426 1427 // Can we prove it to be trivially true? 1428 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1429 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1430 else 1431 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1432 } 1433 1434 static bool optimizeLoopExitWithUnknownExitCount( 1435 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1436 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1437 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1438 assert( 1439 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1440 "Not a loop exit!"); 1441 1442 // For branch that stays in loop by TRUE condition, go through AND. For branch 1443 // that stays in loop by FALSE condition, go through OR. Both gives the 1444 // similar logic: "stay in loop iff all conditions are true(false)". 1445 bool Inverted = L->contains(BI->getSuccessor(1)); 1446 SmallVector<ICmpInst *, 4> LeafConditions; 1447 SmallVector<Value *, 4> Worklist; 1448 SmallPtrSet<Value *, 4> Visited; 1449 Value *OldCond = BI->getCondition(); 1450 Visited.insert(OldCond); 1451 Worklist.push_back(OldCond); 1452 1453 auto GoThrough = [&](Value *V) { 1454 Value *LHS = nullptr, *RHS = nullptr; 1455 if (Inverted) { 1456 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1457 return false; 1458 } else { 1459 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1460 return false; 1461 } 1462 if (Visited.insert(LHS).second) 1463 Worklist.push_back(LHS); 1464 if (Visited.insert(RHS).second) 1465 Worklist.push_back(RHS); 1466 return true; 1467 }; 1468 1469 do { 1470 Value *Curr = Worklist.pop_back_val(); 1471 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1472 // those with one use, to avoid instruction duplication. 1473 if (Curr->hasOneUse()) 1474 if (!GoThrough(Curr)) 1475 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1476 LeafConditions.push_back(ICmp); 1477 } while (!Worklist.empty()); 1478 1479 // If the current basic block has the same exit count as the whole loop, and 1480 // it consists of multiple icmp's, try to collect all icmp's that give exact 1481 // same exit count. For all other icmp's, we could use one less iteration, 1482 // because their value on the last iteration doesn't really matter. 1483 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1484 if (!SkipLastIter && LeafConditions.size() > 1 && 1485 SE->getExitCount(L, ExitingBB, 1486 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1487 MaxIter) 1488 for (auto *ICmp : LeafConditions) { 1489 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1490 /*ControlsExit*/ false); 1491 auto *ExitMax = EL.SymbolicMaxNotTaken; 1492 if (isa<SCEVCouldNotCompute>(ExitMax)) 1493 continue; 1494 // They could be of different types (specifically this happens after 1495 // IV widening). 1496 auto *WiderType = 1497 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1498 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1499 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1500 if (WideExitMax == WideMaxIter) 1501 ICmpsFailingOnLastIter.insert(ICmp); 1502 } 1503 1504 bool Changed = false; 1505 for (auto *OldCond : LeafConditions) { 1506 // Skip last iteration for this icmp under one of two conditions: 1507 // - We do it for all conditions; 1508 // - There is another ICmp that would fail on last iter, so this one doesn't 1509 // really matter. 1510 bool OptimisticSkipLastIter = SkipLastIter; 1511 if (!OptimisticSkipLastIter) { 1512 if (ICmpsFailingOnLastIter.size() > 1) 1513 OptimisticSkipLastIter = true; 1514 else if (ICmpsFailingOnLastIter.size() == 1) 1515 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1516 } 1517 if (auto Replaced = 1518 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1519 OptimisticSkipLastIter, SE, Rewriter)) { 1520 Changed = true; 1521 auto *NewCond = *Replaced; 1522 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1523 NCI->setName(OldCond->getName() + ".first_iter"); 1524 } 1525 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1526 << " with " << *NewCond << "\n"); 1527 assert(OldCond->hasOneUse() && "Must be!"); 1528 OldCond->replaceAllUsesWith(NewCond); 1529 DeadInsts.push_back(OldCond); 1530 // Make sure we no longer consider this condition as failing on last 1531 // iteration. 1532 ICmpsFailingOnLastIter.erase(OldCond); 1533 } 1534 } 1535 return Changed; 1536 } 1537 1538 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1539 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1540 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1541 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1542 // it already has flags. The alternative to this would be to extending the 1543 // set of "interesting" IV users to include the icmp, but doing that 1544 // regresses results in practice by querying SCEVs before trip counts which 1545 // rely on them which results in SCEV caching sub-optimal answers. The 1546 // concern about caching sub-optimal results is why we only query SCEVs of 1547 // the loop invariant RHS here. 1548 SmallVector<BasicBlock*, 16> ExitingBlocks; 1549 L->getExitingBlocks(ExitingBlocks); 1550 bool Changed = false; 1551 for (auto *ExitingBB : ExitingBlocks) { 1552 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1553 if (!BI) 1554 continue; 1555 assert(BI->isConditional() && "exit branch must be conditional"); 1556 1557 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1558 if (!ICmp || !ICmp->hasOneUse()) 1559 continue; 1560 1561 auto *LHS = ICmp->getOperand(0); 1562 auto *RHS = ICmp->getOperand(1); 1563 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1564 // poisoning cache with sub-optimal results. For the must-execute case, 1565 // this is a neccessary precondition for correctness. 1566 if (!L->isLoopInvariant(RHS)) { 1567 if (!L->isLoopInvariant(LHS)) 1568 continue; 1569 // Same logic applies for the inverse case 1570 std::swap(LHS, RHS); 1571 } 1572 1573 // Match (icmp signed-cond zext, RHS) 1574 Value *LHSOp = nullptr; 1575 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1576 continue; 1577 1578 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1579 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1580 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1581 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1582 FullCR = FullCR.zeroExtend(OuterBitWidth); 1583 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1584 if (FullCR.contains(RHSCR)) { 1585 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1586 // replace the signed condition with the unsigned version. 1587 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1588 Changed = true; 1589 // Note: No SCEV invalidation needed. We've changed the predicate, but 1590 // have not changed exit counts, or the values produced by the compare. 1591 continue; 1592 } 1593 } 1594 1595 // Now that we've canonicalized the condition to match the extend, 1596 // see if we can rotate the extend out of the loop. 1597 for (auto *ExitingBB : ExitingBlocks) { 1598 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1599 if (!BI) 1600 continue; 1601 assert(BI->isConditional() && "exit branch must be conditional"); 1602 1603 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1604 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1605 continue; 1606 1607 bool Swapped = false; 1608 auto *LHS = ICmp->getOperand(0); 1609 auto *RHS = ICmp->getOperand(1); 1610 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1611 // Nothing to rotate 1612 continue; 1613 if (L->isLoopInvariant(LHS)) { 1614 // Same logic applies for the inverse case until we actually pick 1615 // which operand of the compare to update. 1616 Swapped = true; 1617 std::swap(LHS, RHS); 1618 } 1619 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1620 1621 // Match (icmp unsigned-cond zext, RHS) 1622 // TODO: Extend to handle corresponding sext/signed-cmp case 1623 // TODO: Extend to other invertible functions 1624 Value *LHSOp = nullptr; 1625 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1626 continue; 1627 1628 // In general, we only rotate if we can do so without increasing the number 1629 // of instructions. The exception is when we have an zext(add-rec). The 1630 // reason for allowing this exception is that we know we need to get rid 1631 // of the zext for SCEV to be able to compute a trip count for said loops; 1632 // we consider the new trip count valuable enough to increase instruction 1633 // count by one. 1634 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1635 continue; 1636 1637 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1638 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1639 // when zext is loop varying and RHS is loop invariant. This converts 1640 // loop varying work to loop-invariant work. 1641 auto doRotateTransform = [&]() { 1642 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1643 auto *NewRHS = 1644 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1645 L->getLoopPreheader()->getTerminator()); 1646 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1647 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1648 if (LHS->use_empty()) 1649 DeadInsts.push_back(LHS); 1650 }; 1651 1652 1653 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1654 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1655 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1656 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1657 FullCR = FullCR.zeroExtend(OuterBitWidth); 1658 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1659 if (FullCR.contains(RHSCR)) { 1660 doRotateTransform(); 1661 Changed = true; 1662 // Note, we are leaving SCEV in an unfortunately imprecise case here 1663 // as rotation tends to reveal information about trip counts not 1664 // previously visible. 1665 continue; 1666 } 1667 } 1668 1669 return Changed; 1670 } 1671 1672 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1673 SmallVector<BasicBlock*, 16> ExitingBlocks; 1674 L->getExitingBlocks(ExitingBlocks); 1675 1676 // Remove all exits which aren't both rewriteable and execute on every 1677 // iteration. 1678 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1679 // If our exitting block exits multiple loops, we can only rewrite the 1680 // innermost one. Otherwise, we're changing how many times the innermost 1681 // loop runs before it exits. 1682 if (LI->getLoopFor(ExitingBB) != L) 1683 return true; 1684 1685 // Can't rewrite non-branch yet. 1686 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1687 if (!BI) 1688 return true; 1689 1690 // Likewise, the loop latch must be dominated by the exiting BB. 1691 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1692 return true; 1693 1694 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1695 // If already constant, nothing to do. However, if this is an 1696 // unconditional exit, we can still replace header phis with their 1697 // preheader value. 1698 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1699 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1700 return true; 1701 } 1702 1703 return false; 1704 }); 1705 1706 if (ExitingBlocks.empty()) 1707 return false; 1708 1709 // Get a symbolic upper bound on the loop backedge taken count. 1710 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1711 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1712 return false; 1713 1714 // Visit our exit blocks in order of dominance. We know from the fact that 1715 // all exits must dominate the latch, so there is a total dominance order 1716 // between them. 1717 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1718 // std::sort sorts in ascending order, so we want the inverse of 1719 // the normal dominance relation. 1720 if (A == B) return false; 1721 if (DT->properlyDominates(A, B)) 1722 return true; 1723 else { 1724 assert(DT->properlyDominates(B, A) && 1725 "expected total dominance order!"); 1726 return false; 1727 } 1728 }); 1729 #ifdef ASSERT 1730 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1731 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1732 } 1733 #endif 1734 1735 bool Changed = false; 1736 bool SkipLastIter = false; 1737 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1738 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1739 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1740 return; 1741 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1742 CurrMaxExit = MaxExitCount; 1743 else 1744 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1745 // If the loop has more than 1 iteration, all further checks will be 1746 // executed 1 iteration less. 1747 if (CurrMaxExit == MaxBECount) 1748 SkipLastIter = true; 1749 }; 1750 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1751 for (BasicBlock *ExitingBB : ExitingBlocks) { 1752 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1753 const SCEV *MaxExitCount = SE->getExitCount( 1754 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1755 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1756 // Okay, we do not know the exit count here. Can we at least prove that it 1757 // will remain the same within iteration space? 1758 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1759 auto OptimizeCond = [&](bool SkipLastIter) { 1760 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1761 MaxBECount, SkipLastIter, 1762 SE, Rewriter, DeadInsts); 1763 }; 1764 1765 // TODO: We might have proved that we can skip the last iteration for 1766 // this check. In this case, we only want to check the condition on the 1767 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1768 // corner case: 1769 // 1770 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1771 // 1772 // If we could not prove that len != 0, then we also could not prove that 1773 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1774 // OptimizeCond will likely not prove anything for it, even if it could 1775 // prove the same fact for len. 1776 // 1777 // As a temporary solution, we query both last and pre-last iterations in 1778 // hope that we will be able to prove triviality for at least one of 1779 // them. We can stop querying MaxBECount for this case once SCEV 1780 // understands that (MaxBECount - 1) will not overflow here. 1781 if (OptimizeCond(false)) 1782 Changed = true; 1783 else if (SkipLastIter && OptimizeCond(true)) 1784 Changed = true; 1785 UpdateSkipLastIter(MaxExitCount); 1786 continue; 1787 } 1788 1789 UpdateSkipLastIter(ExactExitCount); 1790 1791 // If we know we'd exit on the first iteration, rewrite the exit to 1792 // reflect this. This does not imply the loop must exit through this 1793 // exit; there may be an earlier one taken on the first iteration. 1794 // We know that the backedge can't be taken, so we replace all 1795 // the header PHIs with values coming from the preheader. 1796 if (ExactExitCount->isZero()) { 1797 foldExit(L, ExitingBB, true, DeadInsts); 1798 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1799 Changed = true; 1800 continue; 1801 } 1802 1803 assert(ExactExitCount->getType()->isIntegerTy() && 1804 MaxBECount->getType()->isIntegerTy() && 1805 "Exit counts must be integers"); 1806 1807 Type *WiderType = 1808 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1809 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1810 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1811 assert(MaxBECount->getType() == ExactExitCount->getType()); 1812 1813 // Can we prove that some other exit must be taken strictly before this 1814 // one? 1815 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1816 ExactExitCount)) { 1817 foldExit(L, ExitingBB, false, DeadInsts); 1818 Changed = true; 1819 continue; 1820 } 1821 1822 // As we run, keep track of which exit counts we've encountered. If we 1823 // find a duplicate, we've found an exit which would have exited on the 1824 // exiting iteration, but (from the visit order) strictly follows another 1825 // which does the same and is thus dead. 1826 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1827 foldExit(L, ExitingBB, false, DeadInsts); 1828 Changed = true; 1829 continue; 1830 } 1831 1832 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1833 // here. If we kept track of the min of dominanting exits so far, we could 1834 // discharge exits with EC >= MDEC. This is less powerful than the existing 1835 // transform (since later exits aren't considered), but potentially more 1836 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1837 // or a >u b. Such a case is not currently known. 1838 } 1839 return Changed; 1840 } 1841 1842 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1843 SmallVector<BasicBlock*, 16> ExitingBlocks; 1844 L->getExitingBlocks(ExitingBlocks); 1845 1846 // Finally, see if we can rewrite our exit conditions into a loop invariant 1847 // form. If we have a read-only loop, and we can tell that we must exit down 1848 // a path which does not need any of the values computed within the loop, we 1849 // can rewrite the loop to exit on the first iteration. Note that this 1850 // doesn't either a) tell us the loop exits on the first iteration (unless 1851 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1852 // This transformation looks a lot like a restricted form of dead loop 1853 // elimination, but restricted to read-only loops and without neccesssarily 1854 // needing to kill the loop entirely. 1855 if (!LoopPredication) 1856 return false; 1857 1858 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1859 // through *explicit* control flow. We have to eliminate the possibility of 1860 // implicit exits (see below) before we know it's truly exact. 1861 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1862 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1863 return false; 1864 1865 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1866 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1867 1868 auto BadExit = [&](BasicBlock *ExitingBB) { 1869 // If our exiting block exits multiple loops, we can only rewrite the 1870 // innermost one. Otherwise, we're changing how many times the innermost 1871 // loop runs before it exits. 1872 if (LI->getLoopFor(ExitingBB) != L) 1873 return true; 1874 1875 // Can't rewrite non-branch yet. 1876 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1877 if (!BI) 1878 return true; 1879 1880 // If already constant, nothing to do. 1881 if (isa<Constant>(BI->getCondition())) 1882 return true; 1883 1884 // If the exit block has phis, we need to be able to compute the values 1885 // within the loop which contains them. This assumes trivially lcssa phis 1886 // have already been removed; TODO: generalize 1887 BasicBlock *ExitBlock = 1888 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1889 if (!ExitBlock->phis().empty()) 1890 return true; 1891 1892 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1893 if (isa<SCEVCouldNotCompute>(ExitCount) || 1894 !Rewriter.isSafeToExpand(ExitCount)) 1895 return true; 1896 1897 assert(SE->isLoopInvariant(ExitCount, L) && 1898 "Exit count must be loop invariant"); 1899 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1900 return false; 1901 }; 1902 1903 // If we have any exits which can't be predicated themselves, than we can't 1904 // predicate any exit which isn't guaranteed to execute before it. Consider 1905 // two exits (a) and (b) which would both exit on the same iteration. If we 1906 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1907 // we could convert a loop from exiting through (a) to one exiting through 1908 // (b). Note that this problem exists only for exits with the same exit 1909 // count, and we could be more aggressive when exit counts are known inequal. 1910 llvm::sort(ExitingBlocks, 1911 [&](BasicBlock *A, BasicBlock *B) { 1912 // std::sort sorts in ascending order, so we want the inverse of 1913 // the normal dominance relation, plus a tie breaker for blocks 1914 // unordered by dominance. 1915 if (DT->properlyDominates(A, B)) return true; 1916 if (DT->properlyDominates(B, A)) return false; 1917 return A->getName() < B->getName(); 1918 }); 1919 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1920 // exits before the backedge). If they aren't, reasoning about reachability 1921 // is complicated and we choose not to for now. 1922 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1923 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1924 return false; 1925 1926 // Given our sorted total order, we know that exit[j] must be evaluated 1927 // after all exit[i] such j > i. 1928 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1929 if (BadExit(ExitingBlocks[i])) { 1930 ExitingBlocks.resize(i); 1931 break; 1932 } 1933 1934 if (ExitingBlocks.empty()) 1935 return false; 1936 1937 // We rely on not being able to reach an exiting block on a later iteration 1938 // then it's statically compute exit count. The implementaton of 1939 // getExitCount currently has this invariant, but assert it here so that 1940 // breakage is obvious if this ever changes.. 1941 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1942 return DT->dominates(ExitingBB, L->getLoopLatch()); 1943 })); 1944 1945 // At this point, ExitingBlocks consists of only those blocks which are 1946 // predicatable. Given that, we know we have at least one exit we can 1947 // predicate if the loop is doesn't have side effects and doesn't have any 1948 // implicit exits (because then our exact BTC isn't actually exact). 1949 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1950 // suggestions on how to improve this? I can obviously bail out for outer 1951 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1952 // is that enough for *all* side effects? 1953 for (BasicBlock *BB : L->blocks()) 1954 for (auto &I : *BB) 1955 // TODO:isGuaranteedToTransfer 1956 if (I.mayHaveSideEffects()) 1957 return false; 1958 1959 bool Changed = false; 1960 // Finally, do the actual predication for all predicatable blocks. A couple 1961 // of notes here: 1962 // 1) We don't bother to constant fold dominated exits with identical exit 1963 // counts; that's simply a form of CSE/equality propagation and we leave 1964 // it for dedicated passes. 1965 // 2) We insert the comparison at the branch. Hoisting introduces additional 1966 // legality constraints and we leave that to dedicated logic. We want to 1967 // predicate even if we can't insert a loop invariant expression as 1968 // peeling or unrolling will likely reduce the cost of the otherwise loop 1969 // varying check. 1970 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1971 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1972 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1973 for (BasicBlock *ExitingBB : ExitingBlocks) { 1974 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1975 1976 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1977 Value *NewCond; 1978 if (ExitCount == ExactBTC) { 1979 NewCond = L->contains(BI->getSuccessor(0)) ? 1980 B.getFalse() : B.getTrue(); 1981 } else { 1982 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1983 if (!ExactBTCV) 1984 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1985 Value *RHS = ExactBTCV; 1986 if (ECV->getType() != RHS->getType()) { 1987 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1988 ECV = B.CreateZExt(ECV, WiderTy); 1989 RHS = B.CreateZExt(RHS, WiderTy); 1990 } 1991 auto Pred = L->contains(BI->getSuccessor(0)) ? 1992 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1993 NewCond = B.CreateICmp(Pred, ECV, RHS); 1994 } 1995 Value *OldCond = BI->getCondition(); 1996 BI->setCondition(NewCond); 1997 if (OldCond->use_empty()) 1998 DeadInsts.emplace_back(OldCond); 1999 Changed = true; 2000 } 2001 2002 return Changed; 2003 } 2004 2005 //===----------------------------------------------------------------------===// 2006 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2007 //===----------------------------------------------------------------------===// 2008 2009 bool IndVarSimplify::run(Loop *L) { 2010 // We need (and expect!) the incoming loop to be in LCSSA. 2011 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2012 "LCSSA required to run indvars!"); 2013 2014 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2015 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2016 // canonicalization can be a pessimization without LSR to "clean up" 2017 // afterwards. 2018 // - We depend on having a preheader; in particular, 2019 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2020 // and we're in trouble if we can't find the induction variable even when 2021 // we've manually inserted one. 2022 // - LFTR relies on having a single backedge. 2023 if (!L->isLoopSimplifyForm()) 2024 return false; 2025 2026 #ifndef NDEBUG 2027 // Used below for a consistency check only 2028 // Note: Since the result returned by ScalarEvolution may depend on the order 2029 // in which previous results are added to its cache, the call to 2030 // getBackedgeTakenCount() may change following SCEV queries. 2031 const SCEV *BackedgeTakenCount; 2032 if (VerifyIndvars) 2033 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2034 #endif 2035 2036 bool Changed = false; 2037 // If there are any floating-point recurrences, attempt to 2038 // transform them to use integer recurrences. 2039 Changed |= rewriteNonIntegerIVs(L); 2040 2041 // Create a rewriter object which we'll use to transform the code with. 2042 SCEVExpander Rewriter(*SE, DL, "indvars"); 2043 #ifndef NDEBUG 2044 Rewriter.setDebugType(DEBUG_TYPE); 2045 #endif 2046 2047 // Eliminate redundant IV users. 2048 // 2049 // Simplification works best when run before other consumers of SCEV. We 2050 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2051 // other expressions involving loop IVs have been evaluated. This helps SCEV 2052 // set no-wrap flags before normalizing sign/zero extension. 2053 Rewriter.disableCanonicalMode(); 2054 Changed |= simplifyAndExtend(L, Rewriter, LI); 2055 2056 // Check to see if we can compute the final value of any expressions 2057 // that are recurrent in the loop, and substitute the exit values from the 2058 // loop into any instructions outside of the loop that use the final values 2059 // of the current expressions. 2060 if (ReplaceExitValue != NeverRepl) { 2061 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 2062 ReplaceExitValue, DeadInsts)) { 2063 NumReplaced += Rewrites; 2064 Changed = true; 2065 } 2066 } 2067 2068 // Eliminate redundant IV cycles. 2069 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 2070 2071 // Try to convert exit conditions to unsigned and rotate computation 2072 // out of the loop. Note: Handles invalidation internally if needed. 2073 Changed |= canonicalizeExitCondition(L); 2074 2075 // Try to eliminate loop exits based on analyzeable exit counts 2076 if (optimizeLoopExits(L, Rewriter)) { 2077 Changed = true; 2078 // Given we've changed exit counts, notify SCEV 2079 // Some nested loops may share same folded exit basic block, 2080 // thus we need to notify top most loop. 2081 SE->forgetTopmostLoop(L); 2082 } 2083 2084 // Try to form loop invariant tests for loop exits by changing how many 2085 // iterations of the loop run when that is unobservable. 2086 if (predicateLoopExits(L, Rewriter)) { 2087 Changed = true; 2088 // Given we've changed exit counts, notify SCEV 2089 SE->forgetLoop(L); 2090 } 2091 2092 // If we have a trip count expression, rewrite the loop's exit condition 2093 // using it. 2094 if (!DisableLFTR) { 2095 BasicBlock *PreHeader = L->getLoopPreheader(); 2096 2097 SmallVector<BasicBlock*, 16> ExitingBlocks; 2098 L->getExitingBlocks(ExitingBlocks); 2099 for (BasicBlock *ExitingBB : ExitingBlocks) { 2100 // Can't rewrite non-branch yet. 2101 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2102 continue; 2103 2104 // If our exitting block exits multiple loops, we can only rewrite the 2105 // innermost one. Otherwise, we're changing how many times the innermost 2106 // loop runs before it exits. 2107 if (LI->getLoopFor(ExitingBB) != L) 2108 continue; 2109 2110 if (!needsLFTR(L, ExitingBB)) 2111 continue; 2112 2113 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2114 if (isa<SCEVCouldNotCompute>(ExitCount)) 2115 continue; 2116 2117 // This was handled above, but as we form SCEVs, we can sometimes refine 2118 // existing ones; this allows exit counts to be folded to zero which 2119 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2120 // until stable to handle cases like this better. 2121 if (ExitCount->isZero()) 2122 continue; 2123 2124 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2125 if (!IndVar) 2126 continue; 2127 2128 // Avoid high cost expansions. Note: This heuristic is questionable in 2129 // that our definition of "high cost" is not exactly principled. 2130 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2131 TTI, PreHeader->getTerminator())) 2132 continue; 2133 2134 // Check preconditions for proper SCEVExpander operation. SCEV does not 2135 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2136 // any pass that uses the SCEVExpander must do it. This does not work 2137 // well for loop passes because SCEVExpander makes assumptions about 2138 // all loops, while LoopPassManager only forces the current loop to be 2139 // simplified. 2140 // 2141 // FIXME: SCEV expansion has no way to bail out, so the caller must 2142 // explicitly check any assumptions made by SCEV. Brittle. 2143 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2144 if (!AR || AR->getLoop()->getLoopPreheader()) 2145 Changed |= linearFunctionTestReplace(L, ExitingBB, 2146 ExitCount, IndVar, 2147 Rewriter); 2148 } 2149 } 2150 // Clear the rewriter cache, because values that are in the rewriter's cache 2151 // can be deleted in the loop below, causing the AssertingVH in the cache to 2152 // trigger. 2153 Rewriter.clear(); 2154 2155 // Now that we're done iterating through lists, clean up any instructions 2156 // which are now dead. 2157 while (!DeadInsts.empty()) { 2158 Value *V = DeadInsts.pop_back_val(); 2159 2160 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2161 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2162 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2163 Changed |= 2164 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2165 } 2166 2167 // The Rewriter may not be used from this point on. 2168 2169 // Loop-invariant instructions in the preheader that aren't used in the 2170 // loop may be sunk below the loop to reduce register pressure. 2171 Changed |= sinkUnusedInvariants(L); 2172 2173 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2174 // trip count and therefore can further simplify exit values in addition to 2175 // rewriteLoopExitValues. 2176 Changed |= rewriteFirstIterationLoopExitValues(L); 2177 2178 // Clean up dead instructions. 2179 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2180 2181 // Check a post-condition. 2182 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2183 "Indvars did not preserve LCSSA!"); 2184 2185 // Verify that LFTR, and any other change have not interfered with SCEV's 2186 // ability to compute trip count. We may have *changed* the exit count, but 2187 // only by reducing it. 2188 #ifndef NDEBUG 2189 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2190 SE->forgetLoop(L); 2191 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2192 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2193 SE->getTypeSizeInBits(NewBECount->getType())) 2194 NewBECount = SE->getTruncateOrNoop(NewBECount, 2195 BackedgeTakenCount->getType()); 2196 else 2197 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2198 NewBECount->getType()); 2199 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2200 NewBECount) && "indvars must preserve SCEV"); 2201 } 2202 if (VerifyMemorySSA && MSSAU) 2203 MSSAU->getMemorySSA()->verifyMemorySSA(); 2204 #endif 2205 2206 return Changed; 2207 } 2208 2209 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2210 LoopStandardAnalysisResults &AR, 2211 LPMUpdater &) { 2212 Function *F = L.getHeader()->getParent(); 2213 const DataLayout &DL = F->getParent()->getDataLayout(); 2214 2215 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2216 WidenIndVars && AllowIVWidening); 2217 if (!IVS.run(&L)) 2218 return PreservedAnalyses::all(); 2219 2220 auto PA = getLoopPassPreservedAnalyses(); 2221 PA.preserveSet<CFGAnalyses>(); 2222 if (AR.MSSA) 2223 PA.preserve<MemorySSAAnalysis>(); 2224 return PA; 2225 } 2226