1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/InitializePasses.h" 68 #include "llvm/Pass.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 77 #include "llvm/Transforms/Utils/Local.h" 78 #include "llvm/Transforms/Utils/LoopUtils.h" 79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 80 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 81 #include <cassert> 82 #include <cstdint> 83 #include <utility> 84 85 using namespace llvm; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 static cl::opt<ReplaceExitVal> ReplaceExitValue( 97 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 98 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 99 cl::values( 100 clEnumValN(NeverRepl, "never", "never replace exit value"), 101 clEnumValN(OnlyCheapRepl, "cheap", 102 "only replace exit value when the cost is cheap"), 103 clEnumValN( 104 UnusedIndVarInLoop, "unusedindvarinloop", 105 "only replace exit value when it is an unused " 106 "induction variable in the loop and has cheap replacement cost"), 107 clEnumValN(NoHardUse, "noharduse", 108 "only replace exit values when loop def likely dead"), 109 clEnumValN(AlwaysRepl, "always", 110 "always replace exit value whenever possible"))); 111 112 static cl::opt<bool> UsePostIncrementRanges( 113 "indvars-post-increment-ranges", cl::Hidden, 114 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 115 cl::init(true)); 116 117 static cl::opt<bool> 118 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 119 cl::desc("Disable Linear Function Test Replace optimization")); 120 121 static cl::opt<bool> 122 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 123 cl::desc("Predicate conditions in read only loops")); 124 125 static cl::opt<bool> 126 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 127 cl::desc("Allow widening of indvars to eliminate s/zext")); 128 129 namespace { 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 std::unique_ptr<MemorySSAUpdater> MSSAU; 139 140 SmallVector<WeakTrackingVH, 16> DeadInsts; 141 bool WidenIndVars; 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 /// Try to improve our exit conditions by converting condition from signed 148 /// to unsigned or rotating computation out of the loop. 149 /// (See inline comment about why this is duplicated from simplifyAndExtend) 150 bool canonicalizeExitCondition(Loop *L); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool rewriteFirstIterationLoopExitValues(Loop *L); 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 170 WidenIndVars(WidenIndVars) { 171 if (MSSA) 172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 173 } 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 //===----------------------------------------------------------------------===// 181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 182 //===----------------------------------------------------------------------===// 183 184 /// Convert APF to an integer, if possible. 185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 186 bool isExact = false; 187 // See if we can convert this to an int64_t 188 uint64_t UIntVal; 189 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 190 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 191 !isExact) 192 return false; 193 IntVal = UIntVal; 194 return true; 195 } 196 197 /// If the loop has floating induction variable then insert corresponding 198 /// integer induction variable if possible. 199 /// For example, 200 /// for(double i = 0; i < 10000; ++i) 201 /// bar(i) 202 /// is converted into 203 /// for(int i = 0; i < 10000; ++i) 204 /// bar((double)i); 205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 206 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 207 unsigned BackEdge = IncomingEdge^1; 208 209 // Check incoming value. 210 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 211 212 int64_t InitValue; 213 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 214 return false; 215 216 // Check IV increment. Reject this PN if increment operation is not 217 // an add or increment value can not be represented by an integer. 218 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 219 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 220 221 // If this is not an add of the PHI with a constantfp, or if the constant fp 222 // is not an integer, bail out. 223 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 224 int64_t IncValue; 225 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 226 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 227 return false; 228 229 // Check Incr uses. One user is PN and the other user is an exit condition 230 // used by the conditional terminator. 231 Value::user_iterator IncrUse = Incr->user_begin(); 232 Instruction *U1 = cast<Instruction>(*IncrUse++); 233 if (IncrUse == Incr->user_end()) return false; 234 Instruction *U2 = cast<Instruction>(*IncrUse++); 235 if (IncrUse != Incr->user_end()) return false; 236 237 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 238 // only used by a branch, we can't transform it. 239 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 240 if (!Compare) 241 Compare = dyn_cast<FCmpInst>(U2); 242 if (!Compare || !Compare->hasOneUse() || 243 !isa<BranchInst>(Compare->user_back())) 244 return false; 245 246 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 247 248 // We need to verify that the branch actually controls the iteration count 249 // of the loop. If not, the new IV can overflow and no one will notice. 250 // The branch block must be in the loop and one of the successors must be out 251 // of the loop. 252 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 253 if (!L->contains(TheBr->getParent()) || 254 (L->contains(TheBr->getSuccessor(0)) && 255 L->contains(TheBr->getSuccessor(1)))) 256 return false; 257 258 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 259 // transform it. 260 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 261 int64_t ExitValue; 262 if (ExitValueVal == nullptr || 263 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 264 return false; 265 266 // Find new predicate for integer comparison. 267 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 268 switch (Compare->getPredicate()) { 269 default: return false; // Unknown comparison. 270 case CmpInst::FCMP_OEQ: 271 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 272 case CmpInst::FCMP_ONE: 273 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 274 case CmpInst::FCMP_OGT: 275 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 276 case CmpInst::FCMP_OGE: 277 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 278 case CmpInst::FCMP_OLT: 279 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 280 case CmpInst::FCMP_OLE: 281 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 282 } 283 284 // We convert the floating point induction variable to a signed i32 value if 285 // we can. This is only safe if the comparison will not overflow in a way 286 // that won't be trapped by the integer equivalent operations. Check for this 287 // now. 288 // TODO: We could use i64 if it is native and the range requires it. 289 290 // The start/stride/exit values must all fit in signed i32. 291 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 292 return false; 293 294 // If not actually striding (add x, 0.0), avoid touching the code. 295 if (IncValue == 0) 296 return false; 297 298 // Positive and negative strides have different safety conditions. 299 if (IncValue > 0) { 300 // If we have a positive stride, we require the init to be less than the 301 // exit value. 302 if (InitValue >= ExitValue) 303 return false; 304 305 uint32_t Range = uint32_t(ExitValue-InitValue); 306 // Check for infinite loop, either: 307 // while (i <= Exit) or until (i > Exit) 308 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 309 if (++Range == 0) return false; // Range overflows. 310 } 311 312 unsigned Leftover = Range % uint32_t(IncValue); 313 314 // If this is an equality comparison, we require that the strided value 315 // exactly land on the exit value, otherwise the IV condition will wrap 316 // around and do things the fp IV wouldn't. 317 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 318 Leftover != 0) 319 return false; 320 321 // If the stride would wrap around the i32 before exiting, we can't 322 // transform the IV. 323 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 324 return false; 325 } else { 326 // If we have a negative stride, we require the init to be greater than the 327 // exit value. 328 if (InitValue <= ExitValue) 329 return false; 330 331 uint32_t Range = uint32_t(InitValue-ExitValue); 332 // Check for infinite loop, either: 333 // while (i >= Exit) or until (i < Exit) 334 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 335 if (++Range == 0) return false; // Range overflows. 336 } 337 338 unsigned Leftover = Range % uint32_t(-IncValue); 339 340 // If this is an equality comparison, we require that the strided value 341 // exactly land on the exit value, otherwise the IV condition will wrap 342 // around and do things the fp IV wouldn't. 343 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 344 Leftover != 0) 345 return false; 346 347 // If the stride would wrap around the i32 before exiting, we can't 348 // transform the IV. 349 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 350 return false; 351 } 352 353 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 354 355 // Insert new integer induction variable. 356 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 357 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 358 PN->getIncomingBlock(IncomingEdge)); 359 360 Value *NewAdd = 361 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 362 Incr->getName()+".int", Incr); 363 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 364 365 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 366 ConstantInt::get(Int32Ty, ExitValue), 367 Compare->getName()); 368 369 // In the following deletions, PN may become dead and may be deleted. 370 // Use a WeakTrackingVH to observe whether this happens. 371 WeakTrackingVH WeakPH = PN; 372 373 // Delete the old floating point exit comparison. The branch starts using the 374 // new comparison. 375 NewCompare->takeName(Compare); 376 Compare->replaceAllUsesWith(NewCompare); 377 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 378 379 // Delete the old floating point increment. 380 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 381 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 382 383 // If the FP induction variable still has uses, this is because something else 384 // in the loop uses its value. In order to canonicalize the induction 385 // variable, we chose to eliminate the IV and rewrite it in terms of an 386 // int->fp cast. 387 // 388 // We give preference to sitofp over uitofp because it is faster on most 389 // platforms. 390 if (WeakPH) { 391 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 392 &*PN->getParent()->getFirstInsertionPt()); 393 PN->replaceAllUsesWith(Conv); 394 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 395 } 396 return true; 397 } 398 399 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 400 // First step. Check to see if there are any floating-point recurrences. 401 // If there are, change them into integer recurrences, permitting analysis by 402 // the SCEV routines. 403 BasicBlock *Header = L->getHeader(); 404 405 SmallVector<WeakTrackingVH, 8> PHIs; 406 for (PHINode &PN : Header->phis()) 407 PHIs.push_back(&PN); 408 409 bool Changed = false; 410 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 411 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 412 Changed |= handleFloatingPointIV(L, PN); 413 414 // If the loop previously had floating-point IV, ScalarEvolution 415 // may not have been able to compute a trip count. Now that we've done some 416 // re-writing, the trip count may be computable. 417 if (Changed) 418 SE->forgetLoop(L); 419 return Changed; 420 } 421 422 //===---------------------------------------------------------------------===// 423 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 424 // they will exit at the first iteration. 425 //===---------------------------------------------------------------------===// 426 427 /// Check to see if this loop has loop invariant conditions which lead to loop 428 /// exits. If so, we know that if the exit path is taken, it is at the first 429 /// loop iteration. This lets us predict exit values of PHI nodes that live in 430 /// loop header. 431 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 432 // Verify the input to the pass is already in LCSSA form. 433 assert(L->isLCSSAForm(*DT)); 434 435 SmallVector<BasicBlock *, 8> ExitBlocks; 436 L->getUniqueExitBlocks(ExitBlocks); 437 438 bool MadeAnyChanges = false; 439 for (auto *ExitBB : ExitBlocks) { 440 // If there are no more PHI nodes in this exit block, then no more 441 // values defined inside the loop are used on this path. 442 for (PHINode &PN : ExitBB->phis()) { 443 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 444 IncomingValIdx != E; ++IncomingValIdx) { 445 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 446 447 // Can we prove that the exit must run on the first iteration if it 448 // runs at all? (i.e. early exits are fine for our purposes, but 449 // traces which lead to this exit being taken on the 2nd iteration 450 // aren't.) Note that this is about whether the exit branch is 451 // executed, not about whether it is taken. 452 if (!L->getLoopLatch() || 453 !DT->dominates(IncomingBB, L->getLoopLatch())) 454 continue; 455 456 // Get condition that leads to the exit path. 457 auto *TermInst = IncomingBB->getTerminator(); 458 459 Value *Cond = nullptr; 460 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 461 // Must be a conditional branch, otherwise the block 462 // should not be in the loop. 463 Cond = BI->getCondition(); 464 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 465 Cond = SI->getCondition(); 466 else 467 continue; 468 469 if (!L->isLoopInvariant(Cond)) 470 continue; 471 472 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 473 474 // Only deal with PHIs in the loop header. 475 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 476 continue; 477 478 // If ExitVal is a PHI on the loop header, then we know its 479 // value along this exit because the exit can only be taken 480 // on the first iteration. 481 auto *LoopPreheader = L->getLoopPreheader(); 482 assert(LoopPreheader && "Invalid loop"); 483 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 484 if (PreheaderIdx != -1) { 485 assert(ExitVal->getParent() == L->getHeader() && 486 "ExitVal must be in loop header"); 487 MadeAnyChanges = true; 488 PN.setIncomingValue(IncomingValIdx, 489 ExitVal->getIncomingValue(PreheaderIdx)); 490 SE->forgetValue(&PN); 491 } 492 } 493 } 494 } 495 return MadeAnyChanges; 496 } 497 498 //===----------------------------------------------------------------------===// 499 // IV Widening - Extend the width of an IV to cover its widest uses. 500 //===----------------------------------------------------------------------===// 501 502 /// Update information about the induction variable that is extended by this 503 /// sign or zero extend operation. This is used to determine the final width of 504 /// the IV before actually widening it. 505 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 506 ScalarEvolution *SE, 507 const TargetTransformInfo *TTI) { 508 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 509 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 510 return; 511 512 Type *Ty = Cast->getType(); 513 uint64_t Width = SE->getTypeSizeInBits(Ty); 514 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 515 return; 516 517 // Check that `Cast` actually extends the induction variable (we rely on this 518 // later). This takes care of cases where `Cast` is extending a truncation of 519 // the narrow induction variable, and thus can end up being narrower than the 520 // "narrow" induction variable. 521 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 522 if (NarrowIVWidth >= Width) 523 return; 524 525 // Cast is either an sext or zext up to this point. 526 // We should not widen an indvar if arithmetics on the wider indvar are more 527 // expensive than those on the narrower indvar. We check only the cost of ADD 528 // because at least an ADD is required to increment the induction variable. We 529 // could compute more comprehensively the cost of all instructions on the 530 // induction variable when necessary. 531 if (TTI && 532 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 533 TTI->getArithmeticInstrCost(Instruction::Add, 534 Cast->getOperand(0)->getType())) { 535 return; 536 } 537 538 if (!WI.WidestNativeType || 539 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 540 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 541 WI.IsSigned = IsSigned; 542 return; 543 } 544 545 // We extend the IV to satisfy the sign of its user(s), or 'signed' 546 // if there are multiple users with both sign- and zero extensions, 547 // in order not to introduce nondeterministic behaviour based on the 548 // unspecified order of a PHI nodes' users-iterator. 549 WI.IsSigned |= IsSigned; 550 } 551 552 //===----------------------------------------------------------------------===// 553 // Live IV Reduction - Minimize IVs live across the loop. 554 //===----------------------------------------------------------------------===// 555 556 //===----------------------------------------------------------------------===// 557 // Simplification of IV users based on SCEV evaluation. 558 //===----------------------------------------------------------------------===// 559 560 namespace { 561 562 class IndVarSimplifyVisitor : public IVVisitor { 563 ScalarEvolution *SE; 564 const TargetTransformInfo *TTI; 565 PHINode *IVPhi; 566 567 public: 568 WideIVInfo WI; 569 570 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 571 const TargetTransformInfo *TTI, 572 const DominatorTree *DTree) 573 : SE(SCEV), TTI(TTI), IVPhi(IV) { 574 DT = DTree; 575 WI.NarrowIV = IVPhi; 576 } 577 578 // Implement the interface used by simplifyUsersOfIV. 579 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 580 }; 581 582 } // end anonymous namespace 583 584 /// Iteratively perform simplification on a worklist of IV users. Each 585 /// successive simplification may push more users which may themselves be 586 /// candidates for simplification. 587 /// 588 /// Sign/Zero extend elimination is interleaved with IV simplification. 589 bool IndVarSimplify::simplifyAndExtend(Loop *L, 590 SCEVExpander &Rewriter, 591 LoopInfo *LI) { 592 SmallVector<WideIVInfo, 8> WideIVs; 593 594 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 595 Intrinsic::getName(Intrinsic::experimental_guard)); 596 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 597 598 SmallVector<PHINode *, 8> LoopPhis; 599 for (PHINode &PN : L->getHeader()->phis()) 600 LoopPhis.push_back(&PN); 601 602 // Each round of simplification iterates through the SimplifyIVUsers worklist 603 // for all current phis, then determines whether any IVs can be 604 // widened. Widening adds new phis to LoopPhis, inducing another round of 605 // simplification on the wide IVs. 606 bool Changed = false; 607 while (!LoopPhis.empty()) { 608 // Evaluate as many IV expressions as possible before widening any IVs. This 609 // forces SCEV to set no-wrap flags before evaluating sign/zero 610 // extension. The first time SCEV attempts to normalize sign/zero extension, 611 // the result becomes final. So for the most predictable results, we delay 612 // evaluation of sign/zero extend evaluation until needed, and avoid running 613 // other SCEV based analysis prior to simplifyAndExtend. 614 do { 615 PHINode *CurrIV = LoopPhis.pop_back_val(); 616 617 // Information about sign/zero extensions of CurrIV. 618 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 619 620 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 621 &Visitor); 622 623 if (Visitor.WI.WidestNativeType) { 624 WideIVs.push_back(Visitor.WI); 625 } 626 } while(!LoopPhis.empty()); 627 628 // Continue if we disallowed widening. 629 if (!WidenIndVars) 630 continue; 631 632 for (; !WideIVs.empty(); WideIVs.pop_back()) { 633 unsigned ElimExt; 634 unsigned Widened; 635 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 636 DT, DeadInsts, ElimExt, Widened, 637 HasGuards, UsePostIncrementRanges)) { 638 NumElimExt += ElimExt; 639 NumWidened += Widened; 640 Changed = true; 641 LoopPhis.push_back(WidePhi); 642 } 643 } 644 } 645 return Changed; 646 } 647 648 //===----------------------------------------------------------------------===// 649 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 650 //===----------------------------------------------------------------------===// 651 652 /// Given an Value which is hoped to be part of an add recurance in the given 653 /// loop, return the associated Phi node if so. Otherwise, return null. Note 654 /// that this is less general than SCEVs AddRec checking. 655 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 656 Instruction *IncI = dyn_cast<Instruction>(IncV); 657 if (!IncI) 658 return nullptr; 659 660 switch (IncI->getOpcode()) { 661 case Instruction::Add: 662 case Instruction::Sub: 663 break; 664 case Instruction::GetElementPtr: 665 // An IV counter must preserve its type. 666 if (IncI->getNumOperands() == 2) 667 break; 668 [[fallthrough]]; 669 default: 670 return nullptr; 671 } 672 673 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 674 if (Phi && Phi->getParent() == L->getHeader()) { 675 if (L->isLoopInvariant(IncI->getOperand(1))) 676 return Phi; 677 return nullptr; 678 } 679 if (IncI->getOpcode() == Instruction::GetElementPtr) 680 return nullptr; 681 682 // Allow add/sub to be commuted. 683 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 684 if (Phi && Phi->getParent() == L->getHeader()) { 685 if (L->isLoopInvariant(IncI->getOperand(0))) 686 return Phi; 687 } 688 return nullptr; 689 } 690 691 /// Whether the current loop exit test is based on this value. Currently this 692 /// is limited to a direct use in the loop condition. 693 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 694 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 695 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 696 // TODO: Allow non-icmp loop test. 697 if (!ICmp) 698 return false; 699 700 // TODO: Allow indirect use. 701 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 702 } 703 704 /// linearFunctionTestReplace policy. Return true unless we can show that the 705 /// current exit test is already sufficiently canonical. 706 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 707 assert(L->getLoopLatch() && "Must be in simplified form"); 708 709 // Avoid converting a constant or loop invariant test back to a runtime 710 // test. This is critical for when SCEV's cached ExitCount is less precise 711 // than the current IR (such as after we've proven a particular exit is 712 // actually dead and thus the BE count never reaches our ExitCount.) 713 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 714 if (L->isLoopInvariant(BI->getCondition())) 715 return false; 716 717 // Do LFTR to simplify the exit condition to an ICMP. 718 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 719 if (!Cond) 720 return true; 721 722 // Do LFTR to simplify the exit ICMP to EQ/NE 723 ICmpInst::Predicate Pred = Cond->getPredicate(); 724 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 725 return true; 726 727 // Look for a loop invariant RHS 728 Value *LHS = Cond->getOperand(0); 729 Value *RHS = Cond->getOperand(1); 730 if (!L->isLoopInvariant(RHS)) { 731 if (!L->isLoopInvariant(LHS)) 732 return true; 733 std::swap(LHS, RHS); 734 } 735 // Look for a simple IV counter LHS 736 PHINode *Phi = dyn_cast<PHINode>(LHS); 737 if (!Phi) 738 Phi = getLoopPhiForCounter(LHS, L); 739 740 if (!Phi) 741 return true; 742 743 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 744 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 745 if (Idx < 0) 746 return true; 747 748 // Do LFTR if the exit condition's IV is *not* a simple counter. 749 Value *IncV = Phi->getIncomingValue(Idx); 750 return Phi != getLoopPhiForCounter(IncV, L); 751 } 752 753 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 754 /// down to checking that all operands are constant and listing instructions 755 /// that may hide undef. 756 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 757 unsigned Depth) { 758 if (isa<Constant>(V)) 759 return !isa<UndefValue>(V); 760 761 if (Depth >= 6) 762 return false; 763 764 // Conservatively handle non-constant non-instructions. For example, Arguments 765 // may be undef. 766 Instruction *I = dyn_cast<Instruction>(V); 767 if (!I) 768 return false; 769 770 // Load and return values may be undef. 771 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 772 return false; 773 774 // Optimistically handle other instructions. 775 for (Value *Op : I->operands()) { 776 if (!Visited.insert(Op).second) 777 continue; 778 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 779 return false; 780 } 781 return true; 782 } 783 784 /// Return true if the given value is concrete. We must prove that undef can 785 /// never reach it. 786 /// 787 /// TODO: If we decide that this is a good approach to checking for undef, we 788 /// may factor it into a common location. 789 static bool hasConcreteDef(Value *V) { 790 SmallPtrSet<Value*, 8> Visited; 791 Visited.insert(V); 792 return hasConcreteDefImpl(V, Visited, 0); 793 } 794 795 /// Return true if the given phi is a "counter" in L. A counter is an 796 /// add recurance (of integer or pointer type) with an arbitrary start, and a 797 /// step of 1. Note that L must have exactly one latch. 798 static bool isLoopCounter(PHINode* Phi, Loop *L, 799 ScalarEvolution *SE) { 800 assert(Phi->getParent() == L->getHeader()); 801 assert(L->getLoopLatch()); 802 803 if (!SE->isSCEVable(Phi->getType())) 804 return false; 805 806 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 807 if (!AR || AR->getLoop() != L || !AR->isAffine()) 808 return false; 809 810 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 811 if (!Step || !Step->isOne()) 812 return false; 813 814 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 815 Value *IncV = Phi->getIncomingValue(LatchIdx); 816 return (getLoopPhiForCounter(IncV, L) == Phi && 817 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 818 } 819 820 /// Search the loop header for a loop counter (anadd rec w/step of one) 821 /// suitable for use by LFTR. If multiple counters are available, select the 822 /// "best" one based profitable heuristics. 823 /// 824 /// BECount may be an i8* pointer type. The pointer difference is already 825 /// valid count without scaling the address stride, so it remains a pointer 826 /// expression as far as SCEV is concerned. 827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 828 const SCEV *BECount, 829 ScalarEvolution *SE, DominatorTree *DT) { 830 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 831 832 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 833 834 // Loop over all of the PHI nodes, looking for a simple counter. 835 PHINode *BestPhi = nullptr; 836 const SCEV *BestInit = nullptr; 837 BasicBlock *LatchBlock = L->getLoopLatch(); 838 assert(LatchBlock && "Must be in simplified form"); 839 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 840 841 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 842 PHINode *Phi = cast<PHINode>(I); 843 if (!isLoopCounter(Phi, L, SE)) 844 continue; 845 846 // Avoid comparing an integer IV against a pointer Limit. 847 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 848 continue; 849 850 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 851 852 // AR may be a pointer type, while BECount is an integer type. 853 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 854 // AR may not be a narrower type, or we may never exit. 855 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 856 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 857 continue; 858 859 // Avoid reusing a potentially undef value to compute other values that may 860 // have originally had a concrete definition. 861 if (!hasConcreteDef(Phi)) { 862 // We explicitly allow unknown phis as long as they are already used by 863 // the loop exit test. This is legal since performing LFTR could not 864 // increase the number of undef users. 865 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 866 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 867 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 868 continue; 869 } 870 871 // Avoid introducing undefined behavior due to poison which didn't exist in 872 // the original program. (Annoyingly, the rules for poison and undef 873 // propagation are distinct, so this does NOT cover the undef case above.) 874 // We have to ensure that we don't introduce UB by introducing a use on an 875 // iteration where said IV produces poison. Our strategy here differs for 876 // pointers and integer IVs. For integers, we strip and reinfer as needed, 877 // see code in linearFunctionTestReplace. For pointers, we restrict 878 // transforms as there is no good way to reinfer inbounds once lost. 879 if (!Phi->getType()->isIntegerTy() && 880 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 881 continue; 882 883 const SCEV *Init = AR->getStart(); 884 885 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 886 // Don't force a live loop counter if another IV can be used. 887 if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 888 continue; 889 890 // Prefer to count-from-zero. This is a more "canonical" counter form. It 891 // also prefers integer to pointer IVs. 892 if (BestInit->isZero() != Init->isZero()) { 893 if (BestInit->isZero()) 894 continue; 895 } 896 // If two IVs both count from zero or both count from nonzero then the 897 // narrower is likely a dead phi that has been widened. Use the wider phi 898 // to allow the other to be eliminated. 899 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 900 continue; 901 } 902 BestPhi = Phi; 903 BestInit = Init; 904 } 905 return BestPhi; 906 } 907 908 /// Insert an IR expression which computes the value held by the IV IndVar 909 /// (which must be an loop counter w/unit stride) after the backedge of loop L 910 /// is taken ExitCount times. 911 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 912 const SCEV *ExitCount, bool UsePostInc, Loop *L, 913 SCEVExpander &Rewriter, ScalarEvolution *SE) { 914 assert(isLoopCounter(IndVar, L, SE)); 915 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 916 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 917 const SCEV *IVInit = AR->getStart(); 918 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 919 920 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 921 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 922 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 923 // the existing GEPs whenever possible. 924 if (IndVar->getType()->isPointerTy()) { 925 // IVOffset will be the new GEP offset that is interpreted by GEP as a 926 // signed value. ExitCount on the other hand represents the loop trip count, 927 // which is an unsigned value. FindLoopCounter only allows induction 928 // variables that have a positive unit stride of one. This means we don't 929 // have to handle the case of negative offsets (yet) and just need to zero 930 // extend ExitCount. 931 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 932 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 933 if (UsePostInc) 934 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 935 936 // Expand the code for the iteration count. 937 assert(SE->isLoopInvariant(IVOffset, L) && 938 "Computed iteration count is not loop invariant!"); 939 940 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 941 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 942 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 943 } else { 944 // In any other case, convert both IVInit and ExitCount to integers before 945 // comparing. This may result in SCEV expansion of pointers, but in practice 946 // SCEV will fold the pointer arithmetic away as such: 947 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 948 // 949 // Valid Cases: (1) both integers is most common; (2) both may be pointers 950 // for simple memset-style loops. 951 // 952 // IVInit integer and ExitCount pointer would only occur if a canonical IV 953 // were generated on top of case #2, which is not expected. 954 955 // For unit stride, IVCount = Start + ExitCount with 2's complement 956 // overflow. 957 958 // For integer IVs, truncate the IV before computing IVInit + BECount, 959 // unless we know apriori that the limit must be a constant when evaluated 960 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 961 // of the IV in the loop over a (potentially) expensive expansion of the 962 // widened exit count add(zext(add)) expression. 963 if (SE->getTypeSizeInBits(IVInit->getType()) 964 > SE->getTypeSizeInBits(ExitCount->getType())) { 965 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 966 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 967 else 968 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 969 } 970 971 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 972 973 if (UsePostInc) 974 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 975 976 // Expand the code for the iteration count. 977 assert(SE->isLoopInvariant(IVLimit, L) && 978 "Computed iteration count is not loop invariant!"); 979 // Ensure that we generate the same type as IndVar, or a smaller integer 980 // type. In the presence of null pointer values, we have an integer type 981 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 982 Type *LimitTy = ExitCount->getType(); 983 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 984 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 985 } 986 } 987 988 /// This method rewrites the exit condition of the loop to be a canonical != 989 /// comparison against the incremented loop induction variable. This pass is 990 /// able to rewrite the exit tests of any loop where the SCEV analysis can 991 /// determine a loop-invariant trip count of the loop, which is actually a much 992 /// broader range than just linear tests. 993 bool IndVarSimplify:: 994 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 995 const SCEV *ExitCount, 996 PHINode *IndVar, SCEVExpander &Rewriter) { 997 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 998 assert(isLoopCounter(IndVar, L, SE)); 999 Instruction * const IncVar = 1000 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1001 1002 // Initialize CmpIndVar to the preincremented IV. 1003 Value *CmpIndVar = IndVar; 1004 bool UsePostInc = false; 1005 1006 // If the exiting block is the same as the backedge block, we prefer to 1007 // compare against the post-incremented value, otherwise we must compare 1008 // against the preincremented value. 1009 if (ExitingBB == L->getLoopLatch()) { 1010 // For pointer IVs, we chose to not strip inbounds which requires us not 1011 // to add a potentially UB introducing use. We need to either a) show 1012 // the loop test we're modifying is already in post-inc form, or b) show 1013 // that adding a use must not introduce UB. 1014 bool SafeToPostInc = 1015 IndVar->getType()->isIntegerTy() || 1016 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1017 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1018 if (SafeToPostInc) { 1019 UsePostInc = true; 1020 CmpIndVar = IncVar; 1021 } 1022 } 1023 1024 // It may be necessary to drop nowrap flags on the incrementing instruction 1025 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1026 // case the increment might have previously been poison on the last iteration 1027 // only) or if LFTR switches to a different IV that was previously dynamically 1028 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1029 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1030 // check), because the pre-inc addrec flags may be adopted from the original 1031 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1032 // TODO: This handling is inaccurate for one case: If we switch to a 1033 // dynamically dead IV that wraps on the first loop iteration only, which is 1034 // not covered by the post-inc addrec. (If the new IV was not dynamically 1035 // dead, it could not be poison on the first iteration in the first place.) 1036 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1037 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1038 if (BO->hasNoUnsignedWrap()) 1039 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1040 if (BO->hasNoSignedWrap()) 1041 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1042 } 1043 1044 Value *ExitCnt = genLoopLimit( 1045 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1046 assert(ExitCnt->getType()->isPointerTy() == 1047 IndVar->getType()->isPointerTy() && 1048 "genLoopLimit missed a cast"); 1049 1050 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1051 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1052 ICmpInst::Predicate P; 1053 if (L->contains(BI->getSuccessor(0))) 1054 P = ICmpInst::ICMP_NE; 1055 else 1056 P = ICmpInst::ICMP_EQ; 1057 1058 IRBuilder<> Builder(BI); 1059 1060 // The new loop exit condition should reuse the debug location of the 1061 // original loop exit condition. 1062 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1063 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1064 1065 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1066 // avoid the expensive expansion of the limit expression in the wider type, 1067 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1068 // since we know (from the exit count bitwidth), that we can't self-wrap in 1069 // the narrower type. 1070 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1071 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1072 if (CmpIndVarSize > ExitCntSize) { 1073 assert(!CmpIndVar->getType()->isPointerTy() && 1074 !ExitCnt->getType()->isPointerTy()); 1075 1076 // Before resorting to actually inserting the truncate, use the same 1077 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1078 // the other side of the comparison instead. We still evaluate the limit 1079 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1080 // a truncate within in. 1081 bool Extended = false; 1082 const SCEV *IV = SE->getSCEV(CmpIndVar); 1083 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 1084 const SCEV *ZExtTrunc = 1085 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1086 1087 if (ZExtTrunc == IV) { 1088 Extended = true; 1089 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1090 "wide.trip.count"); 1091 } else { 1092 const SCEV *SExtTrunc = 1093 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1094 if (SExtTrunc == IV) { 1095 Extended = true; 1096 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1097 "wide.trip.count"); 1098 } 1099 } 1100 1101 if (Extended) { 1102 bool Discard; 1103 L->makeLoopInvariant(ExitCnt, Discard); 1104 } else 1105 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1106 "lftr.wideiv"); 1107 } 1108 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1109 << " LHS:" << *CmpIndVar << '\n' 1110 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1111 << "\n" 1112 << " RHS:\t" << *ExitCnt << "\n" 1113 << "ExitCount:\t" << *ExitCount << "\n" 1114 << " was: " << *BI->getCondition() << "\n"); 1115 1116 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1117 Value *OrigCond = BI->getCondition(); 1118 // It's tempting to use replaceAllUsesWith here to fully replace the old 1119 // comparison, but that's not immediately safe, since users of the old 1120 // comparison may not be dominated by the new comparison. Instead, just 1121 // update the branch to use the new comparison; in the common case this 1122 // will make old comparison dead. 1123 BI->setCondition(Cond); 1124 DeadInsts.emplace_back(OrigCond); 1125 1126 ++NumLFTR; 1127 return true; 1128 } 1129 1130 //===----------------------------------------------------------------------===// 1131 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1132 //===----------------------------------------------------------------------===// 1133 1134 /// If there's a single exit block, sink any loop-invariant values that 1135 /// were defined in the preheader but not used inside the loop into the 1136 /// exit block to reduce register pressure in the loop. 1137 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1138 BasicBlock *ExitBlock = L->getExitBlock(); 1139 if (!ExitBlock) return false; 1140 1141 BasicBlock *Preheader = L->getLoopPreheader(); 1142 if (!Preheader) return false; 1143 1144 bool MadeAnyChanges = false; 1145 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1146 BasicBlock::iterator I(Preheader->getTerminator()); 1147 while (I != Preheader->begin()) { 1148 --I; 1149 // New instructions were inserted at the end of the preheader. 1150 if (isa<PHINode>(I)) 1151 break; 1152 1153 // Don't move instructions which might have side effects, since the side 1154 // effects need to complete before instructions inside the loop. Also don't 1155 // move instructions which might read memory, since the loop may modify 1156 // memory. Note that it's okay if the instruction might have undefined 1157 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1158 // block. 1159 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1160 continue; 1161 1162 // Skip debug info intrinsics. 1163 if (isa<DbgInfoIntrinsic>(I)) 1164 continue; 1165 1166 // Skip eh pad instructions. 1167 if (I->isEHPad()) 1168 continue; 1169 1170 // Don't sink alloca: we never want to sink static alloca's out of the 1171 // entry block, and correctly sinking dynamic alloca's requires 1172 // checks for stacksave/stackrestore intrinsics. 1173 // FIXME: Refactor this check somehow? 1174 if (isa<AllocaInst>(I)) 1175 continue; 1176 1177 // Determine if there is a use in or before the loop (direct or 1178 // otherwise). 1179 bool UsedInLoop = false; 1180 for (Use &U : I->uses()) { 1181 Instruction *User = cast<Instruction>(U.getUser()); 1182 BasicBlock *UseBB = User->getParent(); 1183 if (PHINode *P = dyn_cast<PHINode>(User)) { 1184 unsigned i = 1185 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1186 UseBB = P->getIncomingBlock(i); 1187 } 1188 if (UseBB == Preheader || L->contains(UseBB)) { 1189 UsedInLoop = true; 1190 break; 1191 } 1192 } 1193 1194 // If there is, the def must remain in the preheader. 1195 if (UsedInLoop) 1196 continue; 1197 1198 // Otherwise, sink it to the exit block. 1199 Instruction *ToMove = &*I; 1200 bool Done = false; 1201 1202 if (I != Preheader->begin()) { 1203 // Skip debug info intrinsics. 1204 do { 1205 --I; 1206 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1207 1208 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1209 Done = true; 1210 } else { 1211 Done = true; 1212 } 1213 1214 MadeAnyChanges = true; 1215 ToMove->moveBefore(*ExitBlock, InsertPt); 1216 SE->forgetValue(ToMove); 1217 if (Done) break; 1218 InsertPt = ToMove->getIterator(); 1219 } 1220 1221 return MadeAnyChanges; 1222 } 1223 1224 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1225 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1226 auto *OldCond = BI->getCondition(); 1227 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1228 << " with " << *NewCond << "\n"); 1229 BI->setCondition(NewCond); 1230 if (OldCond->use_empty()) 1231 DeadInsts.emplace_back(OldCond); 1232 } 1233 1234 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1235 bool IsTaken) { 1236 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1237 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1238 auto *OldCond = BI->getCondition(); 1239 return ConstantInt::get(OldCond->getType(), 1240 IsTaken ? ExitIfTrue : !ExitIfTrue); 1241 } 1242 1243 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1244 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1245 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1246 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1247 replaceExitCond(BI, NewCond, DeadInsts); 1248 } 1249 1250 static void replaceLoopPHINodesWithPreheaderValues( 1251 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1252 ScalarEvolution &SE) { 1253 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1254 auto *LoopPreheader = L->getLoopPreheader(); 1255 auto *LoopHeader = L->getHeader(); 1256 SmallVector<Instruction *> Worklist; 1257 for (auto &PN : LoopHeader->phis()) { 1258 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1259 for (User *U : PN.users()) 1260 Worklist.push_back(cast<Instruction>(U)); 1261 SE.forgetValue(&PN); 1262 PN.replaceAllUsesWith(PreheaderIncoming); 1263 DeadInsts.emplace_back(&PN); 1264 } 1265 1266 // Replacing with the preheader value will often allow IV users to simplify 1267 // (especially if the preheader value is a constant). 1268 SmallPtrSet<Instruction *, 16> Visited; 1269 while (!Worklist.empty()) { 1270 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1271 if (!Visited.insert(I).second) 1272 continue; 1273 1274 // Don't simplify instructions outside the loop. 1275 if (!L->contains(I)) 1276 continue; 1277 1278 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1279 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1280 for (User *U : I->users()) 1281 Worklist.push_back(cast<Instruction>(U)); 1282 I->replaceAllUsesWith(Res); 1283 DeadInsts.emplace_back(I); 1284 } 1285 } 1286 } 1287 1288 static Value * 1289 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1290 const ScalarEvolution::LoopInvariantPredicate &LIP, 1291 SCEVExpander &Rewriter) { 1292 ICmpInst::Predicate InvariantPred = LIP.Pred; 1293 BasicBlock *Preheader = L->getLoopPreheader(); 1294 assert(Preheader && "Preheader doesn't exist"); 1295 Rewriter.setInsertPoint(Preheader->getTerminator()); 1296 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1297 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1298 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1299 if (ExitIfTrue) 1300 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1301 IRBuilder<> Builder(Preheader->getTerminator()); 1302 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1303 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1304 BI->getCondition()->getName()); 1305 } 1306 1307 static std::optional<Value *> 1308 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1309 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1310 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1311 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1312 Value *LHS = ICmp->getOperand(0); 1313 Value *RHS = ICmp->getOperand(1); 1314 1315 // 'LHS pred RHS' should now mean that we stay in loop. 1316 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1317 if (Inverted) 1318 Pred = CmpInst::getInversePredicate(Pred); 1319 1320 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1321 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1322 // Can we prove it to be trivially true or false? 1323 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1324 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1325 1326 auto *ARTy = LHSS->getType(); 1327 auto *MaxIterTy = MaxIter->getType(); 1328 // If possible, adjust types. 1329 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1330 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1331 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1332 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1333 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1334 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1335 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1336 } 1337 1338 if (SkipLastIter) { 1339 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1340 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1341 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1342 // So we manually construct umin(a - 1, b - 1). 1343 SmallVector<const SCEV *, 4> Elements; 1344 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1345 for (auto *Op : UMin->operands()) 1346 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1347 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1348 } else 1349 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1350 } 1351 1352 // Check if there is a loop-invariant predicate equivalent to our check. 1353 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1354 L, BI, MaxIter); 1355 if (!LIP) 1356 return std::nullopt; 1357 1358 // Can we prove it to be trivially true? 1359 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1360 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1361 else 1362 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1363 } 1364 1365 static bool optimizeLoopExitWithUnknownExitCount( 1366 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1367 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1368 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1369 assert( 1370 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1371 "Not a loop exit!"); 1372 1373 // For branch that stays in loop by TRUE condition, go through AND. For branch 1374 // that stays in loop by FALSE condition, go through OR. Both gives the 1375 // similar logic: "stay in loop iff all conditions are true(false)". 1376 bool Inverted = L->contains(BI->getSuccessor(1)); 1377 SmallVector<ICmpInst *, 4> LeafConditions; 1378 SmallVector<Value *, 4> Worklist; 1379 SmallPtrSet<Value *, 4> Visited; 1380 Value *OldCond = BI->getCondition(); 1381 Visited.insert(OldCond); 1382 Worklist.push_back(OldCond); 1383 1384 auto GoThrough = [&](Value *V) { 1385 Value *LHS = nullptr, *RHS = nullptr; 1386 if (Inverted) { 1387 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1388 return false; 1389 } else { 1390 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1391 return false; 1392 } 1393 if (Visited.insert(LHS).second) 1394 Worklist.push_back(LHS); 1395 if (Visited.insert(RHS).second) 1396 Worklist.push_back(RHS); 1397 return true; 1398 }; 1399 1400 do { 1401 Value *Curr = Worklist.pop_back_val(); 1402 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1403 // those with one use, to avoid instruction duplication. 1404 if (Curr->hasOneUse()) 1405 if (!GoThrough(Curr)) 1406 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1407 LeafConditions.push_back(ICmp); 1408 } while (!Worklist.empty()); 1409 1410 // If the current basic block has the same exit count as the whole loop, and 1411 // it consists of multiple icmp's, try to collect all icmp's that give exact 1412 // same exit count. For all other icmp's, we could use one less iteration, 1413 // because their value on the last iteration doesn't really matter. 1414 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1415 if (!SkipLastIter && LeafConditions.size() > 1 && 1416 SE->getExitCount(L, ExitingBB, 1417 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1418 MaxIter) 1419 for (auto *ICmp : LeafConditions) { 1420 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1421 /*ControlsExit*/ false); 1422 auto *ExitMax = EL.SymbolicMaxNotTaken; 1423 if (isa<SCEVCouldNotCompute>(ExitMax)) 1424 continue; 1425 // They could be of different types (specifically this happens after 1426 // IV widening). 1427 auto *WiderType = 1428 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1429 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1430 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1431 if (WideExitMax == WideMaxIter) 1432 ICmpsFailingOnLastIter.insert(ICmp); 1433 } 1434 1435 bool Changed = false; 1436 for (auto *OldCond : LeafConditions) { 1437 // Skip last iteration for this icmp under one of two conditions: 1438 // - We do it for all conditions; 1439 // - There is another ICmp that would fail on last iter, so this one doesn't 1440 // really matter. 1441 bool OptimisticSkipLastIter = SkipLastIter; 1442 if (!OptimisticSkipLastIter) { 1443 if (ICmpsFailingOnLastIter.size() > 1) 1444 OptimisticSkipLastIter = true; 1445 else if (ICmpsFailingOnLastIter.size() == 1) 1446 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1447 } 1448 if (auto Replaced = 1449 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1450 OptimisticSkipLastIter, SE, Rewriter)) { 1451 Changed = true; 1452 auto *NewCond = *Replaced; 1453 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1454 NCI->setName(OldCond->getName() + ".first_iter"); 1455 } 1456 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1457 << " with " << *NewCond << "\n"); 1458 assert(OldCond->hasOneUse() && "Must be!"); 1459 OldCond->replaceAllUsesWith(NewCond); 1460 DeadInsts.push_back(OldCond); 1461 // Make sure we no longer consider this condition as failing on last 1462 // iteration. 1463 ICmpsFailingOnLastIter.erase(OldCond); 1464 } 1465 } 1466 return Changed; 1467 } 1468 1469 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1470 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1471 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1472 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1473 // it already has flags. The alternative to this would be to extending the 1474 // set of "interesting" IV users to include the icmp, but doing that 1475 // regresses results in practice by querying SCEVs before trip counts which 1476 // rely on them which results in SCEV caching sub-optimal answers. The 1477 // concern about caching sub-optimal results is why we only query SCEVs of 1478 // the loop invariant RHS here. 1479 SmallVector<BasicBlock*, 16> ExitingBlocks; 1480 L->getExitingBlocks(ExitingBlocks); 1481 bool Changed = false; 1482 for (auto *ExitingBB : ExitingBlocks) { 1483 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1484 if (!BI) 1485 continue; 1486 assert(BI->isConditional() && "exit branch must be conditional"); 1487 1488 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1489 if (!ICmp || !ICmp->hasOneUse()) 1490 continue; 1491 1492 auto *LHS = ICmp->getOperand(0); 1493 auto *RHS = ICmp->getOperand(1); 1494 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1495 // poisoning cache with sub-optimal results. For the must-execute case, 1496 // this is a neccessary precondition for correctness. 1497 if (!L->isLoopInvariant(RHS)) { 1498 if (!L->isLoopInvariant(LHS)) 1499 continue; 1500 // Same logic applies for the inverse case 1501 std::swap(LHS, RHS); 1502 } 1503 1504 // Match (icmp signed-cond zext, RHS) 1505 Value *LHSOp = nullptr; 1506 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1507 continue; 1508 1509 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1510 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1511 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1512 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1513 FullCR = FullCR.zeroExtend(OuterBitWidth); 1514 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1515 if (FullCR.contains(RHSCR)) { 1516 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1517 // replace the signed condition with the unsigned version. 1518 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1519 Changed = true; 1520 // Note: No SCEV invalidation needed. We've changed the predicate, but 1521 // have not changed exit counts, or the values produced by the compare. 1522 continue; 1523 } 1524 } 1525 1526 // Now that we've canonicalized the condition to match the extend, 1527 // see if we can rotate the extend out of the loop. 1528 for (auto *ExitingBB : ExitingBlocks) { 1529 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1530 if (!BI) 1531 continue; 1532 assert(BI->isConditional() && "exit branch must be conditional"); 1533 1534 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1535 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1536 continue; 1537 1538 bool Swapped = false; 1539 auto *LHS = ICmp->getOperand(0); 1540 auto *RHS = ICmp->getOperand(1); 1541 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1542 // Nothing to rotate 1543 continue; 1544 if (L->isLoopInvariant(LHS)) { 1545 // Same logic applies for the inverse case until we actually pick 1546 // which operand of the compare to update. 1547 Swapped = true; 1548 std::swap(LHS, RHS); 1549 } 1550 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1551 1552 // Match (icmp unsigned-cond zext, RHS) 1553 // TODO: Extend to handle corresponding sext/signed-cmp case 1554 // TODO: Extend to other invertible functions 1555 Value *LHSOp = nullptr; 1556 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1557 continue; 1558 1559 // In general, we only rotate if we can do so without increasing the number 1560 // of instructions. The exception is when we have an zext(add-rec). The 1561 // reason for allowing this exception is that we know we need to get rid 1562 // of the zext for SCEV to be able to compute a trip count for said loops; 1563 // we consider the new trip count valuable enough to increase instruction 1564 // count by one. 1565 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1566 continue; 1567 1568 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1569 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1570 // when zext is loop varying and RHS is loop invariant. This converts 1571 // loop varying work to loop-invariant work. 1572 auto doRotateTransform = [&]() { 1573 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1574 auto *NewRHS = 1575 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1576 L->getLoopPreheader()->getTerminator()); 1577 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1578 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1579 if (LHS->use_empty()) 1580 DeadInsts.push_back(LHS); 1581 }; 1582 1583 1584 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1585 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1586 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1587 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1588 FullCR = FullCR.zeroExtend(OuterBitWidth); 1589 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1590 if (FullCR.contains(RHSCR)) { 1591 doRotateTransform(); 1592 Changed = true; 1593 // Note, we are leaving SCEV in an unfortunately imprecise case here 1594 // as rotation tends to reveal information about trip counts not 1595 // previously visible. 1596 continue; 1597 } 1598 } 1599 1600 return Changed; 1601 } 1602 1603 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1604 SmallVector<BasicBlock*, 16> ExitingBlocks; 1605 L->getExitingBlocks(ExitingBlocks); 1606 1607 // Remove all exits which aren't both rewriteable and execute on every 1608 // iteration. 1609 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1610 // If our exitting block exits multiple loops, we can only rewrite the 1611 // innermost one. Otherwise, we're changing how many times the innermost 1612 // loop runs before it exits. 1613 if (LI->getLoopFor(ExitingBB) != L) 1614 return true; 1615 1616 // Can't rewrite non-branch yet. 1617 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1618 if (!BI) 1619 return true; 1620 1621 // Likewise, the loop latch must be dominated by the exiting BB. 1622 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1623 return true; 1624 1625 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1626 // If already constant, nothing to do. However, if this is an 1627 // unconditional exit, we can still replace header phis with their 1628 // preheader value. 1629 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1630 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1631 return true; 1632 } 1633 1634 return false; 1635 }); 1636 1637 if (ExitingBlocks.empty()) 1638 return false; 1639 1640 // Get a symbolic upper bound on the loop backedge taken count. 1641 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1642 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1643 return false; 1644 1645 // Visit our exit blocks in order of dominance. We know from the fact that 1646 // all exits must dominate the latch, so there is a total dominance order 1647 // between them. 1648 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1649 // std::sort sorts in ascending order, so we want the inverse of 1650 // the normal dominance relation. 1651 if (A == B) return false; 1652 if (DT->properlyDominates(A, B)) 1653 return true; 1654 else { 1655 assert(DT->properlyDominates(B, A) && 1656 "expected total dominance order!"); 1657 return false; 1658 } 1659 }); 1660 #ifdef ASSERT 1661 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1662 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1663 } 1664 #endif 1665 1666 bool Changed = false; 1667 bool SkipLastIter = false; 1668 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1669 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1670 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1671 return; 1672 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1673 CurrMaxExit = MaxExitCount; 1674 else 1675 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1676 // If the loop has more than 1 iteration, all further checks will be 1677 // executed 1 iteration less. 1678 if (CurrMaxExit == MaxBECount) 1679 SkipLastIter = true; 1680 }; 1681 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1682 for (BasicBlock *ExitingBB : ExitingBlocks) { 1683 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1684 const SCEV *MaxExitCount = SE->getExitCount( 1685 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1686 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1687 // Okay, we do not know the exit count here. Can we at least prove that it 1688 // will remain the same within iteration space? 1689 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1690 auto OptimizeCond = [&](bool SkipLastIter) { 1691 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1692 MaxBECount, SkipLastIter, 1693 SE, Rewriter, DeadInsts); 1694 }; 1695 1696 // TODO: We might have proved that we can skip the last iteration for 1697 // this check. In this case, we only want to check the condition on the 1698 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1699 // corner case: 1700 // 1701 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1702 // 1703 // If we could not prove that len != 0, then we also could not prove that 1704 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1705 // OptimizeCond will likely not prove anything for it, even if it could 1706 // prove the same fact for len. 1707 // 1708 // As a temporary solution, we query both last and pre-last iterations in 1709 // hope that we will be able to prove triviality for at least one of 1710 // them. We can stop querying MaxBECount for this case once SCEV 1711 // understands that (MaxBECount - 1) will not overflow here. 1712 if (OptimizeCond(false)) 1713 Changed = true; 1714 else if (SkipLastIter && OptimizeCond(true)) 1715 Changed = true; 1716 UpdateSkipLastIter(MaxExitCount); 1717 continue; 1718 } 1719 1720 UpdateSkipLastIter(ExactExitCount); 1721 1722 // If we know we'd exit on the first iteration, rewrite the exit to 1723 // reflect this. This does not imply the loop must exit through this 1724 // exit; there may be an earlier one taken on the first iteration. 1725 // We know that the backedge can't be taken, so we replace all 1726 // the header PHIs with values coming from the preheader. 1727 if (ExactExitCount->isZero()) { 1728 foldExit(L, ExitingBB, true, DeadInsts); 1729 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1730 Changed = true; 1731 continue; 1732 } 1733 1734 assert(ExactExitCount->getType()->isIntegerTy() && 1735 MaxBECount->getType()->isIntegerTy() && 1736 "Exit counts must be integers"); 1737 1738 Type *WiderType = 1739 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1740 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1741 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1742 assert(MaxBECount->getType() == ExactExitCount->getType()); 1743 1744 // Can we prove that some other exit must be taken strictly before this 1745 // one? 1746 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1747 ExactExitCount)) { 1748 foldExit(L, ExitingBB, false, DeadInsts); 1749 Changed = true; 1750 continue; 1751 } 1752 1753 // As we run, keep track of which exit counts we've encountered. If we 1754 // find a duplicate, we've found an exit which would have exited on the 1755 // exiting iteration, but (from the visit order) strictly follows another 1756 // which does the same and is thus dead. 1757 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1758 foldExit(L, ExitingBB, false, DeadInsts); 1759 Changed = true; 1760 continue; 1761 } 1762 1763 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1764 // here. If we kept track of the min of dominanting exits so far, we could 1765 // discharge exits with EC >= MDEC. This is less powerful than the existing 1766 // transform (since later exits aren't considered), but potentially more 1767 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1768 // or a >u b. Such a case is not currently known. 1769 } 1770 return Changed; 1771 } 1772 1773 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1774 SmallVector<BasicBlock*, 16> ExitingBlocks; 1775 L->getExitingBlocks(ExitingBlocks); 1776 1777 // Finally, see if we can rewrite our exit conditions into a loop invariant 1778 // form. If we have a read-only loop, and we can tell that we must exit down 1779 // a path which does not need any of the values computed within the loop, we 1780 // can rewrite the loop to exit on the first iteration. Note that this 1781 // doesn't either a) tell us the loop exits on the first iteration (unless 1782 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1783 // This transformation looks a lot like a restricted form of dead loop 1784 // elimination, but restricted to read-only loops and without neccesssarily 1785 // needing to kill the loop entirely. 1786 if (!LoopPredication) 1787 return false; 1788 1789 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1790 // through *explicit* control flow. We have to eliminate the possibility of 1791 // implicit exits (see below) before we know it's truly exact. 1792 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1793 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1794 return false; 1795 1796 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1797 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1798 1799 auto BadExit = [&](BasicBlock *ExitingBB) { 1800 // If our exiting block exits multiple loops, we can only rewrite the 1801 // innermost one. Otherwise, we're changing how many times the innermost 1802 // loop runs before it exits. 1803 if (LI->getLoopFor(ExitingBB) != L) 1804 return true; 1805 1806 // Can't rewrite non-branch yet. 1807 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1808 if (!BI) 1809 return true; 1810 1811 // If already constant, nothing to do. 1812 if (isa<Constant>(BI->getCondition())) 1813 return true; 1814 1815 // If the exit block has phis, we need to be able to compute the values 1816 // within the loop which contains them. This assumes trivially lcssa phis 1817 // have already been removed; TODO: generalize 1818 BasicBlock *ExitBlock = 1819 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1820 if (!ExitBlock->phis().empty()) 1821 return true; 1822 1823 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1824 if (isa<SCEVCouldNotCompute>(ExitCount) || 1825 !Rewriter.isSafeToExpand(ExitCount)) 1826 return true; 1827 1828 assert(SE->isLoopInvariant(ExitCount, L) && 1829 "Exit count must be loop invariant"); 1830 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1831 return false; 1832 }; 1833 1834 // If we have any exits which can't be predicated themselves, than we can't 1835 // predicate any exit which isn't guaranteed to execute before it. Consider 1836 // two exits (a) and (b) which would both exit on the same iteration. If we 1837 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1838 // we could convert a loop from exiting through (a) to one exiting through 1839 // (b). Note that this problem exists only for exits with the same exit 1840 // count, and we could be more aggressive when exit counts are known inequal. 1841 llvm::sort(ExitingBlocks, 1842 [&](BasicBlock *A, BasicBlock *B) { 1843 // std::sort sorts in ascending order, so we want the inverse of 1844 // the normal dominance relation, plus a tie breaker for blocks 1845 // unordered by dominance. 1846 if (DT->properlyDominates(A, B)) return true; 1847 if (DT->properlyDominates(B, A)) return false; 1848 return A->getName() < B->getName(); 1849 }); 1850 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1851 // exits before the backedge). If they aren't, reasoning about reachability 1852 // is complicated and we choose not to for now. 1853 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1854 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1855 return false; 1856 1857 // Given our sorted total order, we know that exit[j] must be evaluated 1858 // after all exit[i] such j > i. 1859 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1860 if (BadExit(ExitingBlocks[i])) { 1861 ExitingBlocks.resize(i); 1862 break; 1863 } 1864 1865 if (ExitingBlocks.empty()) 1866 return false; 1867 1868 // We rely on not being able to reach an exiting block on a later iteration 1869 // then it's statically compute exit count. The implementaton of 1870 // getExitCount currently has this invariant, but assert it here so that 1871 // breakage is obvious if this ever changes.. 1872 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1873 return DT->dominates(ExitingBB, L->getLoopLatch()); 1874 })); 1875 1876 // At this point, ExitingBlocks consists of only those blocks which are 1877 // predicatable. Given that, we know we have at least one exit we can 1878 // predicate if the loop is doesn't have side effects and doesn't have any 1879 // implicit exits (because then our exact BTC isn't actually exact). 1880 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1881 // suggestions on how to improve this? I can obviously bail out for outer 1882 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1883 // is that enough for *all* side effects? 1884 for (BasicBlock *BB : L->blocks()) 1885 for (auto &I : *BB) 1886 // TODO:isGuaranteedToTransfer 1887 if (I.mayHaveSideEffects()) 1888 return false; 1889 1890 bool Changed = false; 1891 // Finally, do the actual predication for all predicatable blocks. A couple 1892 // of notes here: 1893 // 1) We don't bother to constant fold dominated exits with identical exit 1894 // counts; that's simply a form of CSE/equality propagation and we leave 1895 // it for dedicated passes. 1896 // 2) We insert the comparison at the branch. Hoisting introduces additional 1897 // legality constraints and we leave that to dedicated logic. We want to 1898 // predicate even if we can't insert a loop invariant expression as 1899 // peeling or unrolling will likely reduce the cost of the otherwise loop 1900 // varying check. 1901 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1902 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1903 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1904 for (BasicBlock *ExitingBB : ExitingBlocks) { 1905 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1906 1907 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1908 Value *NewCond; 1909 if (ExitCount == ExactBTC) { 1910 NewCond = L->contains(BI->getSuccessor(0)) ? 1911 B.getFalse() : B.getTrue(); 1912 } else { 1913 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1914 if (!ExactBTCV) 1915 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1916 Value *RHS = ExactBTCV; 1917 if (ECV->getType() != RHS->getType()) { 1918 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1919 ECV = B.CreateZExt(ECV, WiderTy); 1920 RHS = B.CreateZExt(RHS, WiderTy); 1921 } 1922 auto Pred = L->contains(BI->getSuccessor(0)) ? 1923 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1924 NewCond = B.CreateICmp(Pred, ECV, RHS); 1925 } 1926 Value *OldCond = BI->getCondition(); 1927 BI->setCondition(NewCond); 1928 if (OldCond->use_empty()) 1929 DeadInsts.emplace_back(OldCond); 1930 Changed = true; 1931 } 1932 1933 return Changed; 1934 } 1935 1936 //===----------------------------------------------------------------------===// 1937 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1938 //===----------------------------------------------------------------------===// 1939 1940 bool IndVarSimplify::run(Loop *L) { 1941 // We need (and expect!) the incoming loop to be in LCSSA. 1942 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1943 "LCSSA required to run indvars!"); 1944 1945 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1946 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1947 // canonicalization can be a pessimization without LSR to "clean up" 1948 // afterwards. 1949 // - We depend on having a preheader; in particular, 1950 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1951 // and we're in trouble if we can't find the induction variable even when 1952 // we've manually inserted one. 1953 // - LFTR relies on having a single backedge. 1954 if (!L->isLoopSimplifyForm()) 1955 return false; 1956 1957 bool Changed = false; 1958 // If there are any floating-point recurrences, attempt to 1959 // transform them to use integer recurrences. 1960 Changed |= rewriteNonIntegerIVs(L); 1961 1962 // Create a rewriter object which we'll use to transform the code with. 1963 SCEVExpander Rewriter(*SE, DL, "indvars"); 1964 #ifndef NDEBUG 1965 Rewriter.setDebugType(DEBUG_TYPE); 1966 #endif 1967 1968 // Eliminate redundant IV users. 1969 // 1970 // Simplification works best when run before other consumers of SCEV. We 1971 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1972 // other expressions involving loop IVs have been evaluated. This helps SCEV 1973 // set no-wrap flags before normalizing sign/zero extension. 1974 Rewriter.disableCanonicalMode(); 1975 Changed |= simplifyAndExtend(L, Rewriter, LI); 1976 1977 // Check to see if we can compute the final value of any expressions 1978 // that are recurrent in the loop, and substitute the exit values from the 1979 // loop into any instructions outside of the loop that use the final values 1980 // of the current expressions. 1981 if (ReplaceExitValue != NeverRepl) { 1982 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1983 ReplaceExitValue, DeadInsts)) { 1984 NumReplaced += Rewrites; 1985 Changed = true; 1986 } 1987 } 1988 1989 // Eliminate redundant IV cycles. 1990 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1991 1992 // Try to convert exit conditions to unsigned and rotate computation 1993 // out of the loop. Note: Handles invalidation internally if needed. 1994 Changed |= canonicalizeExitCondition(L); 1995 1996 // Try to eliminate loop exits based on analyzeable exit counts 1997 if (optimizeLoopExits(L, Rewriter)) { 1998 Changed = true; 1999 // Given we've changed exit counts, notify SCEV 2000 // Some nested loops may share same folded exit basic block, 2001 // thus we need to notify top most loop. 2002 SE->forgetTopmostLoop(L); 2003 } 2004 2005 // Try to form loop invariant tests for loop exits by changing how many 2006 // iterations of the loop run when that is unobservable. 2007 if (predicateLoopExits(L, Rewriter)) { 2008 Changed = true; 2009 // Given we've changed exit counts, notify SCEV 2010 SE->forgetLoop(L); 2011 } 2012 2013 // If we have a trip count expression, rewrite the loop's exit condition 2014 // using it. 2015 if (!DisableLFTR) { 2016 BasicBlock *PreHeader = L->getLoopPreheader(); 2017 2018 SmallVector<BasicBlock*, 16> ExitingBlocks; 2019 L->getExitingBlocks(ExitingBlocks); 2020 for (BasicBlock *ExitingBB : ExitingBlocks) { 2021 // Can't rewrite non-branch yet. 2022 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2023 continue; 2024 2025 // If our exitting block exits multiple loops, we can only rewrite the 2026 // innermost one. Otherwise, we're changing how many times the innermost 2027 // loop runs before it exits. 2028 if (LI->getLoopFor(ExitingBB) != L) 2029 continue; 2030 2031 if (!needsLFTR(L, ExitingBB)) 2032 continue; 2033 2034 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2035 if (isa<SCEVCouldNotCompute>(ExitCount)) 2036 continue; 2037 2038 // This was handled above, but as we form SCEVs, we can sometimes refine 2039 // existing ones; this allows exit counts to be folded to zero which 2040 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2041 // until stable to handle cases like this better. 2042 if (ExitCount->isZero()) 2043 continue; 2044 2045 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2046 if (!IndVar) 2047 continue; 2048 2049 // Avoid high cost expansions. Note: This heuristic is questionable in 2050 // that our definition of "high cost" is not exactly principled. 2051 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2052 TTI, PreHeader->getTerminator())) 2053 continue; 2054 2055 // Check preconditions for proper SCEVExpander operation. SCEV does not 2056 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2057 // any pass that uses the SCEVExpander must do it. This does not work 2058 // well for loop passes because SCEVExpander makes assumptions about 2059 // all loops, while LoopPassManager only forces the current loop to be 2060 // simplified. 2061 // 2062 // FIXME: SCEV expansion has no way to bail out, so the caller must 2063 // explicitly check any assumptions made by SCEV. Brittle. 2064 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2065 if (!AR || AR->getLoop()->getLoopPreheader()) 2066 Changed |= linearFunctionTestReplace(L, ExitingBB, 2067 ExitCount, IndVar, 2068 Rewriter); 2069 } 2070 } 2071 // Clear the rewriter cache, because values that are in the rewriter's cache 2072 // can be deleted in the loop below, causing the AssertingVH in the cache to 2073 // trigger. 2074 Rewriter.clear(); 2075 2076 // Now that we're done iterating through lists, clean up any instructions 2077 // which are now dead. 2078 while (!DeadInsts.empty()) { 2079 Value *V = DeadInsts.pop_back_val(); 2080 2081 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2082 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2083 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2084 Changed |= 2085 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2086 } 2087 2088 // The Rewriter may not be used from this point on. 2089 2090 // Loop-invariant instructions in the preheader that aren't used in the 2091 // loop may be sunk below the loop to reduce register pressure. 2092 Changed |= sinkUnusedInvariants(L); 2093 2094 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2095 // trip count and therefore can further simplify exit values in addition to 2096 // rewriteLoopExitValues. 2097 Changed |= rewriteFirstIterationLoopExitValues(L); 2098 2099 // Clean up dead instructions. 2100 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2101 2102 // Check a post-condition. 2103 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2104 "Indvars did not preserve LCSSA!"); 2105 if (VerifyMemorySSA && MSSAU) 2106 MSSAU->getMemorySSA()->verifyMemorySSA(); 2107 2108 return Changed; 2109 } 2110 2111 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2112 LoopStandardAnalysisResults &AR, 2113 LPMUpdater &) { 2114 Function *F = L.getHeader()->getParent(); 2115 const DataLayout &DL = F->getParent()->getDataLayout(); 2116 2117 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2118 WidenIndVars && AllowIVWidening); 2119 if (!IVS.run(&L)) 2120 return PreservedAnalyses::all(); 2121 2122 auto PA = getLoopPassPreservedAnalyses(); 2123 PA.preserveSet<CFGAnalyses>(); 2124 if (AR.MSSA) 2125 PA.preserve<MemorySSAAnalysis>(); 2126 return PA; 2127 } 2128