1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/InitializePasses.h" 68 #include "llvm/Pass.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 77 #include "llvm/Transforms/Utils/Local.h" 78 #include "llvm/Transforms/Utils/LoopUtils.h" 79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 80 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 81 #include <cassert> 82 #include <cstdint> 83 #include <utility> 84 85 using namespace llvm; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 static cl::opt<ReplaceExitVal> ReplaceExitValue( 97 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 98 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 99 cl::values( 100 clEnumValN(NeverRepl, "never", "never replace exit value"), 101 clEnumValN(OnlyCheapRepl, "cheap", 102 "only replace exit value when the cost is cheap"), 103 clEnumValN( 104 UnusedIndVarInLoop, "unusedindvarinloop", 105 "only replace exit value when it is an unused " 106 "induction variable in the loop and has cheap replacement cost"), 107 clEnumValN(NoHardUse, "noharduse", 108 "only replace exit values when loop def likely dead"), 109 clEnumValN(AlwaysRepl, "always", 110 "always replace exit value whenever possible"))); 111 112 static cl::opt<bool> UsePostIncrementRanges( 113 "indvars-post-increment-ranges", cl::Hidden, 114 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 115 cl::init(true)); 116 117 static cl::opt<bool> 118 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 119 cl::desc("Disable Linear Function Test Replace optimization")); 120 121 static cl::opt<bool> 122 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 123 cl::desc("Predicate conditions in read only loops")); 124 125 static cl::opt<bool> 126 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 127 cl::desc("Allow widening of indvars to eliminate s/zext")); 128 129 namespace { 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 std::unique_ptr<MemorySSAUpdater> MSSAU; 139 140 SmallVector<WeakTrackingVH, 16> DeadInsts; 141 bool WidenIndVars; 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 /// Try to improve our exit conditions by converting condition from signed 148 /// to unsigned or rotating computation out of the loop. 149 /// (See inline comment about why this is duplicated from simplifyAndExtend) 150 bool canonicalizeExitCondition(Loop *L); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool rewriteFirstIterationLoopExitValues(Loop *L); 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 170 WidenIndVars(WidenIndVars) { 171 if (MSSA) 172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 173 } 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 //===----------------------------------------------------------------------===// 181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 182 //===----------------------------------------------------------------------===// 183 184 /// Convert APF to an integer, if possible. 185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 186 bool isExact = false; 187 // See if we can convert this to an int64_t 188 uint64_t UIntVal; 189 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 190 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 191 !isExact) 192 return false; 193 IntVal = UIntVal; 194 return true; 195 } 196 197 /// If the loop has floating induction variable then insert corresponding 198 /// integer induction variable if possible. 199 /// For example, 200 /// for(double i = 0; i < 10000; ++i) 201 /// bar(i) 202 /// is converted into 203 /// for(int i = 0; i < 10000; ++i) 204 /// bar((double)i); 205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 206 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 207 unsigned BackEdge = IncomingEdge^1; 208 209 // Check incoming value. 210 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 211 212 int64_t InitValue; 213 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 214 return false; 215 216 // Check IV increment. Reject this PN if increment operation is not 217 // an add or increment value can not be represented by an integer. 218 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 219 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 220 221 // If this is not an add of the PHI with a constantfp, or if the constant fp 222 // is not an integer, bail out. 223 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 224 int64_t IncValue; 225 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 226 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 227 return false; 228 229 // Check Incr uses. One user is PN and the other user is an exit condition 230 // used by the conditional terminator. 231 Value::user_iterator IncrUse = Incr->user_begin(); 232 Instruction *U1 = cast<Instruction>(*IncrUse++); 233 if (IncrUse == Incr->user_end()) return false; 234 Instruction *U2 = cast<Instruction>(*IncrUse++); 235 if (IncrUse != Incr->user_end()) return false; 236 237 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 238 // only used by a branch, we can't transform it. 239 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 240 if (!Compare) 241 Compare = dyn_cast<FCmpInst>(U2); 242 if (!Compare || !Compare->hasOneUse() || 243 !isa<BranchInst>(Compare->user_back())) 244 return false; 245 246 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 247 248 // We need to verify that the branch actually controls the iteration count 249 // of the loop. If not, the new IV can overflow and no one will notice. 250 // The branch block must be in the loop and one of the successors must be out 251 // of the loop. 252 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 253 if (!L->contains(TheBr->getParent()) || 254 (L->contains(TheBr->getSuccessor(0)) && 255 L->contains(TheBr->getSuccessor(1)))) 256 return false; 257 258 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 259 // transform it. 260 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 261 int64_t ExitValue; 262 if (ExitValueVal == nullptr || 263 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 264 return false; 265 266 // Find new predicate for integer comparison. 267 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 268 switch (Compare->getPredicate()) { 269 default: return false; // Unknown comparison. 270 case CmpInst::FCMP_OEQ: 271 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 272 case CmpInst::FCMP_ONE: 273 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 274 case CmpInst::FCMP_OGT: 275 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 276 case CmpInst::FCMP_OGE: 277 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 278 case CmpInst::FCMP_OLT: 279 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 280 case CmpInst::FCMP_OLE: 281 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 282 } 283 284 // We convert the floating point induction variable to a signed i32 value if 285 // we can. This is only safe if the comparison will not overflow in a way 286 // that won't be trapped by the integer equivalent operations. Check for this 287 // now. 288 // TODO: We could use i64 if it is native and the range requires it. 289 290 // The start/stride/exit values must all fit in signed i32. 291 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 292 return false; 293 294 // If not actually striding (add x, 0.0), avoid touching the code. 295 if (IncValue == 0) 296 return false; 297 298 // Positive and negative strides have different safety conditions. 299 if (IncValue > 0) { 300 // If we have a positive stride, we require the init to be less than the 301 // exit value. 302 if (InitValue >= ExitValue) 303 return false; 304 305 uint32_t Range = uint32_t(ExitValue-InitValue); 306 // Check for infinite loop, either: 307 // while (i <= Exit) or until (i > Exit) 308 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 309 if (++Range == 0) return false; // Range overflows. 310 } 311 312 unsigned Leftover = Range % uint32_t(IncValue); 313 314 // If this is an equality comparison, we require that the strided value 315 // exactly land on the exit value, otherwise the IV condition will wrap 316 // around and do things the fp IV wouldn't. 317 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 318 Leftover != 0) 319 return false; 320 321 // If the stride would wrap around the i32 before exiting, we can't 322 // transform the IV. 323 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 324 return false; 325 } else { 326 // If we have a negative stride, we require the init to be greater than the 327 // exit value. 328 if (InitValue <= ExitValue) 329 return false; 330 331 uint32_t Range = uint32_t(InitValue-ExitValue); 332 // Check for infinite loop, either: 333 // while (i >= Exit) or until (i < Exit) 334 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 335 if (++Range == 0) return false; // Range overflows. 336 } 337 338 unsigned Leftover = Range % uint32_t(-IncValue); 339 340 // If this is an equality comparison, we require that the strided value 341 // exactly land on the exit value, otherwise the IV condition will wrap 342 // around and do things the fp IV wouldn't. 343 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 344 Leftover != 0) 345 return false; 346 347 // If the stride would wrap around the i32 before exiting, we can't 348 // transform the IV. 349 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 350 return false; 351 } 352 353 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 354 355 // Insert new integer induction variable. 356 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 357 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 358 PN->getIncomingBlock(IncomingEdge)); 359 360 Value *NewAdd = 361 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 362 Incr->getName()+".int", Incr); 363 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 364 365 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 366 ConstantInt::get(Int32Ty, ExitValue), 367 Compare->getName()); 368 369 // In the following deletions, PN may become dead and may be deleted. 370 // Use a WeakTrackingVH to observe whether this happens. 371 WeakTrackingVH WeakPH = PN; 372 373 // Delete the old floating point exit comparison. The branch starts using the 374 // new comparison. 375 NewCompare->takeName(Compare); 376 Compare->replaceAllUsesWith(NewCompare); 377 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 378 379 // Delete the old floating point increment. 380 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 381 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 382 383 // If the FP induction variable still has uses, this is because something else 384 // in the loop uses its value. In order to canonicalize the induction 385 // variable, we chose to eliminate the IV and rewrite it in terms of an 386 // int->fp cast. 387 // 388 // We give preference to sitofp over uitofp because it is faster on most 389 // platforms. 390 if (WeakPH) { 391 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 392 &*PN->getParent()->getFirstInsertionPt()); 393 PN->replaceAllUsesWith(Conv); 394 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 395 } 396 return true; 397 } 398 399 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 400 // First step. Check to see if there are any floating-point recurrences. 401 // If there are, change them into integer recurrences, permitting analysis by 402 // the SCEV routines. 403 BasicBlock *Header = L->getHeader(); 404 405 SmallVector<WeakTrackingVH, 8> PHIs; 406 for (PHINode &PN : Header->phis()) 407 PHIs.push_back(&PN); 408 409 bool Changed = false; 410 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 411 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 412 Changed |= handleFloatingPointIV(L, PN); 413 414 // If the loop previously had floating-point IV, ScalarEvolution 415 // may not have been able to compute a trip count. Now that we've done some 416 // re-writing, the trip count may be computable. 417 if (Changed) 418 SE->forgetLoop(L); 419 return Changed; 420 } 421 422 //===---------------------------------------------------------------------===// 423 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 424 // they will exit at the first iteration. 425 //===---------------------------------------------------------------------===// 426 427 /// Check to see if this loop has loop invariant conditions which lead to loop 428 /// exits. If so, we know that if the exit path is taken, it is at the first 429 /// loop iteration. This lets us predict exit values of PHI nodes that live in 430 /// loop header. 431 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 432 // Verify the input to the pass is already in LCSSA form. 433 assert(L->isLCSSAForm(*DT)); 434 435 SmallVector<BasicBlock *, 8> ExitBlocks; 436 L->getUniqueExitBlocks(ExitBlocks); 437 438 bool MadeAnyChanges = false; 439 for (auto *ExitBB : ExitBlocks) { 440 // If there are no more PHI nodes in this exit block, then no more 441 // values defined inside the loop are used on this path. 442 for (PHINode &PN : ExitBB->phis()) { 443 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 444 IncomingValIdx != E; ++IncomingValIdx) { 445 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 446 447 // Can we prove that the exit must run on the first iteration if it 448 // runs at all? (i.e. early exits are fine for our purposes, but 449 // traces which lead to this exit being taken on the 2nd iteration 450 // aren't.) Note that this is about whether the exit branch is 451 // executed, not about whether it is taken. 452 if (!L->getLoopLatch() || 453 !DT->dominates(IncomingBB, L->getLoopLatch())) 454 continue; 455 456 // Get condition that leads to the exit path. 457 auto *TermInst = IncomingBB->getTerminator(); 458 459 Value *Cond = nullptr; 460 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 461 // Must be a conditional branch, otherwise the block 462 // should not be in the loop. 463 Cond = BI->getCondition(); 464 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 465 Cond = SI->getCondition(); 466 else 467 continue; 468 469 if (!L->isLoopInvariant(Cond)) 470 continue; 471 472 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 473 474 // Only deal with PHIs in the loop header. 475 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 476 continue; 477 478 // If ExitVal is a PHI on the loop header, then we know its 479 // value along this exit because the exit can only be taken 480 // on the first iteration. 481 auto *LoopPreheader = L->getLoopPreheader(); 482 assert(LoopPreheader && "Invalid loop"); 483 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 484 if (PreheaderIdx != -1) { 485 assert(ExitVal->getParent() == L->getHeader() && 486 "ExitVal must be in loop header"); 487 MadeAnyChanges = true; 488 PN.setIncomingValue(IncomingValIdx, 489 ExitVal->getIncomingValue(PreheaderIdx)); 490 SE->forgetValue(&PN); 491 } 492 } 493 } 494 } 495 return MadeAnyChanges; 496 } 497 498 //===----------------------------------------------------------------------===// 499 // IV Widening - Extend the width of an IV to cover its widest uses. 500 //===----------------------------------------------------------------------===// 501 502 /// Update information about the induction variable that is extended by this 503 /// sign or zero extend operation. This is used to determine the final width of 504 /// the IV before actually widening it. 505 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 506 ScalarEvolution *SE, 507 const TargetTransformInfo *TTI) { 508 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 509 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 510 return; 511 512 Type *Ty = Cast->getType(); 513 uint64_t Width = SE->getTypeSizeInBits(Ty); 514 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 515 return; 516 517 // Check that `Cast` actually extends the induction variable (we rely on this 518 // later). This takes care of cases where `Cast` is extending a truncation of 519 // the narrow induction variable, and thus can end up being narrower than the 520 // "narrow" induction variable. 521 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 522 if (NarrowIVWidth >= Width) 523 return; 524 525 // Cast is either an sext or zext up to this point. 526 // We should not widen an indvar if arithmetics on the wider indvar are more 527 // expensive than those on the narrower indvar. We check only the cost of ADD 528 // because at least an ADD is required to increment the induction variable. We 529 // could compute more comprehensively the cost of all instructions on the 530 // induction variable when necessary. 531 if (TTI && 532 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 533 TTI->getArithmeticInstrCost(Instruction::Add, 534 Cast->getOperand(0)->getType())) { 535 return; 536 } 537 538 if (!WI.WidestNativeType || 539 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 540 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 541 WI.IsSigned = IsSigned; 542 return; 543 } 544 545 // We extend the IV to satisfy the sign of its user(s), or 'signed' 546 // if there are multiple users with both sign- and zero extensions, 547 // in order not to introduce nondeterministic behaviour based on the 548 // unspecified order of a PHI nodes' users-iterator. 549 WI.IsSigned |= IsSigned; 550 } 551 552 //===----------------------------------------------------------------------===// 553 // Live IV Reduction - Minimize IVs live across the loop. 554 //===----------------------------------------------------------------------===// 555 556 //===----------------------------------------------------------------------===// 557 // Simplification of IV users based on SCEV evaluation. 558 //===----------------------------------------------------------------------===// 559 560 namespace { 561 562 class IndVarSimplifyVisitor : public IVVisitor { 563 ScalarEvolution *SE; 564 const TargetTransformInfo *TTI; 565 PHINode *IVPhi; 566 567 public: 568 WideIVInfo WI; 569 570 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 571 const TargetTransformInfo *TTI, 572 const DominatorTree *DTree) 573 : SE(SCEV), TTI(TTI), IVPhi(IV) { 574 DT = DTree; 575 WI.NarrowIV = IVPhi; 576 } 577 578 // Implement the interface used by simplifyUsersOfIV. 579 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 580 }; 581 582 } // end anonymous namespace 583 584 /// Iteratively perform simplification on a worklist of IV users. Each 585 /// successive simplification may push more users which may themselves be 586 /// candidates for simplification. 587 /// 588 /// Sign/Zero extend elimination is interleaved with IV simplification. 589 bool IndVarSimplify::simplifyAndExtend(Loop *L, 590 SCEVExpander &Rewriter, 591 LoopInfo *LI) { 592 SmallVector<WideIVInfo, 8> WideIVs; 593 594 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 595 Intrinsic::getName(Intrinsic::experimental_guard)); 596 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 597 598 SmallVector<PHINode *, 8> LoopPhis; 599 for (PHINode &PN : L->getHeader()->phis()) 600 LoopPhis.push_back(&PN); 601 602 // Each round of simplification iterates through the SimplifyIVUsers worklist 603 // for all current phis, then determines whether any IVs can be 604 // widened. Widening adds new phis to LoopPhis, inducing another round of 605 // simplification on the wide IVs. 606 bool Changed = false; 607 while (!LoopPhis.empty()) { 608 // Evaluate as many IV expressions as possible before widening any IVs. This 609 // forces SCEV to set no-wrap flags before evaluating sign/zero 610 // extension. The first time SCEV attempts to normalize sign/zero extension, 611 // the result becomes final. So for the most predictable results, we delay 612 // evaluation of sign/zero extend evaluation until needed, and avoid running 613 // other SCEV based analysis prior to simplifyAndExtend. 614 do { 615 PHINode *CurrIV = LoopPhis.pop_back_val(); 616 617 // Information about sign/zero extensions of CurrIV. 618 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 619 620 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 621 &Visitor); 622 623 if (Visitor.WI.WidestNativeType) { 624 WideIVs.push_back(Visitor.WI); 625 } 626 } while(!LoopPhis.empty()); 627 628 // Continue if we disallowed widening. 629 if (!WidenIndVars) 630 continue; 631 632 for (; !WideIVs.empty(); WideIVs.pop_back()) { 633 unsigned ElimExt; 634 unsigned Widened; 635 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 636 DT, DeadInsts, ElimExt, Widened, 637 HasGuards, UsePostIncrementRanges)) { 638 NumElimExt += ElimExt; 639 NumWidened += Widened; 640 Changed = true; 641 LoopPhis.push_back(WidePhi); 642 } 643 } 644 } 645 return Changed; 646 } 647 648 //===----------------------------------------------------------------------===// 649 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 650 //===----------------------------------------------------------------------===// 651 652 /// Given an Value which is hoped to be part of an add recurance in the given 653 /// loop, return the associated Phi node if so. Otherwise, return null. Note 654 /// that this is less general than SCEVs AddRec checking. 655 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 656 Instruction *IncI = dyn_cast<Instruction>(IncV); 657 if (!IncI) 658 return nullptr; 659 660 switch (IncI->getOpcode()) { 661 case Instruction::Add: 662 case Instruction::Sub: 663 break; 664 case Instruction::GetElementPtr: 665 // An IV counter must preserve its type. 666 if (IncI->getNumOperands() == 2) 667 break; 668 [[fallthrough]]; 669 default: 670 return nullptr; 671 } 672 673 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 674 if (Phi && Phi->getParent() == L->getHeader()) { 675 if (L->isLoopInvariant(IncI->getOperand(1))) 676 return Phi; 677 return nullptr; 678 } 679 if (IncI->getOpcode() == Instruction::GetElementPtr) 680 return nullptr; 681 682 // Allow add/sub to be commuted. 683 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 684 if (Phi && Phi->getParent() == L->getHeader()) { 685 if (L->isLoopInvariant(IncI->getOperand(0))) 686 return Phi; 687 } 688 return nullptr; 689 } 690 691 /// Whether the current loop exit test is based on this value. Currently this 692 /// is limited to a direct use in the loop condition. 693 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 694 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 695 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 696 // TODO: Allow non-icmp loop test. 697 if (!ICmp) 698 return false; 699 700 // TODO: Allow indirect use. 701 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 702 } 703 704 /// linearFunctionTestReplace policy. Return true unless we can show that the 705 /// current exit test is already sufficiently canonical. 706 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 707 assert(L->getLoopLatch() && "Must be in simplified form"); 708 709 // Avoid converting a constant or loop invariant test back to a runtime 710 // test. This is critical for when SCEV's cached ExitCount is less precise 711 // than the current IR (such as after we've proven a particular exit is 712 // actually dead and thus the BE count never reaches our ExitCount.) 713 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 714 if (L->isLoopInvariant(BI->getCondition())) 715 return false; 716 717 // Do LFTR to simplify the exit condition to an ICMP. 718 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 719 if (!Cond) 720 return true; 721 722 // Do LFTR to simplify the exit ICMP to EQ/NE 723 ICmpInst::Predicate Pred = Cond->getPredicate(); 724 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 725 return true; 726 727 // Look for a loop invariant RHS 728 Value *LHS = Cond->getOperand(0); 729 Value *RHS = Cond->getOperand(1); 730 if (!L->isLoopInvariant(RHS)) { 731 if (!L->isLoopInvariant(LHS)) 732 return true; 733 std::swap(LHS, RHS); 734 } 735 // Look for a simple IV counter LHS 736 PHINode *Phi = dyn_cast<PHINode>(LHS); 737 if (!Phi) 738 Phi = getLoopPhiForCounter(LHS, L); 739 740 if (!Phi) 741 return true; 742 743 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 744 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 745 if (Idx < 0) 746 return true; 747 748 // Do LFTR if the exit condition's IV is *not* a simple counter. 749 Value *IncV = Phi->getIncomingValue(Idx); 750 return Phi != getLoopPhiForCounter(IncV, L); 751 } 752 753 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 754 /// down to checking that all operands are constant and listing instructions 755 /// that may hide undef. 756 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 757 unsigned Depth) { 758 if (isa<Constant>(V)) 759 return !isa<UndefValue>(V); 760 761 if (Depth >= 6) 762 return false; 763 764 // Conservatively handle non-constant non-instructions. For example, Arguments 765 // may be undef. 766 Instruction *I = dyn_cast<Instruction>(V); 767 if (!I) 768 return false; 769 770 // Load and return values may be undef. 771 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 772 return false; 773 774 // Optimistically handle other instructions. 775 for (Value *Op : I->operands()) { 776 if (!Visited.insert(Op).second) 777 continue; 778 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 779 return false; 780 } 781 return true; 782 } 783 784 /// Return true if the given value is concrete. We must prove that undef can 785 /// never reach it. 786 /// 787 /// TODO: If we decide that this is a good approach to checking for undef, we 788 /// may factor it into a common location. 789 static bool hasConcreteDef(Value *V) { 790 SmallPtrSet<Value*, 8> Visited; 791 Visited.insert(V); 792 return hasConcreteDefImpl(V, Visited, 0); 793 } 794 795 /// Return true if the given phi is a "counter" in L. A counter is an 796 /// add recurance (of integer or pointer type) with an arbitrary start, and a 797 /// step of 1. Note that L must have exactly one latch. 798 static bool isLoopCounter(PHINode* Phi, Loop *L, 799 ScalarEvolution *SE) { 800 assert(Phi->getParent() == L->getHeader()); 801 assert(L->getLoopLatch()); 802 803 if (!SE->isSCEVable(Phi->getType())) 804 return false; 805 806 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 807 if (!AR || AR->getLoop() != L || !AR->isAffine()) 808 return false; 809 810 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 811 if (!Step || !Step->isOne()) 812 return false; 813 814 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 815 Value *IncV = Phi->getIncomingValue(LatchIdx); 816 return (getLoopPhiForCounter(IncV, L) == Phi && 817 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 818 } 819 820 /// Search the loop header for a loop counter (anadd rec w/step of one) 821 /// suitable for use by LFTR. If multiple counters are available, select the 822 /// "best" one based profitable heuristics. 823 /// 824 /// BECount may be an i8* pointer type. The pointer difference is already 825 /// valid count without scaling the address stride, so it remains a pointer 826 /// expression as far as SCEV is concerned. 827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 828 const SCEV *BECount, 829 ScalarEvolution *SE, DominatorTree *DT) { 830 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 831 832 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 833 834 // Loop over all of the PHI nodes, looking for a simple counter. 835 PHINode *BestPhi = nullptr; 836 const SCEV *BestInit = nullptr; 837 BasicBlock *LatchBlock = L->getLoopLatch(); 838 assert(LatchBlock && "Must be in simplified form"); 839 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 840 841 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 842 PHINode *Phi = cast<PHINode>(I); 843 if (!isLoopCounter(Phi, L, SE)) 844 continue; 845 846 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 847 848 // AR may be a pointer type, while BECount is an integer type. 849 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 850 // AR may not be a narrower type, or we may never exit. 851 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 852 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 853 continue; 854 855 // Avoid reusing a potentially undef value to compute other values that may 856 // have originally had a concrete definition. 857 if (!hasConcreteDef(Phi)) { 858 // We explicitly allow unknown phis as long as they are already used by 859 // the loop exit test. This is legal since performing LFTR could not 860 // increase the number of undef users. 861 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 862 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 863 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 864 continue; 865 } 866 867 // Avoid introducing undefined behavior due to poison which didn't exist in 868 // the original program. (Annoyingly, the rules for poison and undef 869 // propagation are distinct, so this does NOT cover the undef case above.) 870 // We have to ensure that we don't introduce UB by introducing a use on an 871 // iteration where said IV produces poison. Our strategy here differs for 872 // pointers and integer IVs. For integers, we strip and reinfer as needed, 873 // see code in linearFunctionTestReplace. For pointers, we restrict 874 // transforms as there is no good way to reinfer inbounds once lost. 875 if (!Phi->getType()->isIntegerTy() && 876 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 877 continue; 878 879 const SCEV *Init = AR->getStart(); 880 881 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 882 // Don't force a live loop counter if another IV can be used. 883 if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 884 continue; 885 886 // Prefer to count-from-zero. This is a more "canonical" counter form. It 887 // also prefers integer to pointer IVs. 888 if (BestInit->isZero() != Init->isZero()) { 889 if (BestInit->isZero()) 890 continue; 891 } 892 // If two IVs both count from zero or both count from nonzero then the 893 // narrower is likely a dead phi that has been widened. Use the wider phi 894 // to allow the other to be eliminated. 895 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 896 continue; 897 } 898 BestPhi = Phi; 899 BestInit = Init; 900 } 901 return BestPhi; 902 } 903 904 /// Insert an IR expression which computes the value held by the IV IndVar 905 /// (which must be an loop counter w/unit stride) after the backedge of loop L 906 /// is taken ExitCount times. 907 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 908 const SCEV *ExitCount, bool UsePostInc, Loop *L, 909 SCEVExpander &Rewriter, ScalarEvolution *SE) { 910 assert(isLoopCounter(IndVar, L, SE)); 911 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 912 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 913 const SCEV *IVInit = AR->getStart(); 914 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 915 916 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 917 // finds a valid pointer IV. 918 if (IndVar->getType()->isPointerTy()) { 919 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; 920 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); 921 assert(SE->isLoopInvariant(IVLimit, L) && 922 "Computed iteration count is not loop invariant!"); 923 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), 924 ExitingBB->getTerminator()); 925 } else { 926 // In any other case, convert both IVInit and ExitCount to integers before 927 // comparing. This may result in SCEV expansion of pointers, but in practice 928 // SCEV will fold the pointer arithmetic away as such: 929 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 930 // 931 // Valid Cases: (1) both integers is most common; (2) both may be pointers 932 // for simple memset-style loops. 933 // 934 // IVInit integer and ExitCount pointer would only occur if a canonical IV 935 // were generated on top of case #2, which is not expected. 936 937 // For unit stride, IVCount = Start + ExitCount with 2's complement 938 // overflow. 939 940 // For integer IVs, truncate the IV before computing IVInit + BECount, 941 // unless we know apriori that the limit must be a constant when evaluated 942 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 943 // of the IV in the loop over a (potentially) expensive expansion of the 944 // widened exit count add(zext(add)) expression. 945 if (SE->getTypeSizeInBits(IVInit->getType()) 946 > SE->getTypeSizeInBits(ExitCount->getType())) { 947 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 948 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 949 else 950 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 951 } 952 953 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 954 955 if (UsePostInc) 956 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 957 958 // Expand the code for the iteration count. 959 assert(SE->isLoopInvariant(IVLimit, L) && 960 "Computed iteration count is not loop invariant!"); 961 // Ensure that we generate the same type as IndVar, or a smaller integer 962 // type. In the presence of null pointer values, we have an integer type 963 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 964 Type *LimitTy = ExitCount->getType(); 965 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 966 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 967 } 968 } 969 970 /// This method rewrites the exit condition of the loop to be a canonical != 971 /// comparison against the incremented loop induction variable. This pass is 972 /// able to rewrite the exit tests of any loop where the SCEV analysis can 973 /// determine a loop-invariant trip count of the loop, which is actually a much 974 /// broader range than just linear tests. 975 bool IndVarSimplify:: 976 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 977 const SCEV *ExitCount, 978 PHINode *IndVar, SCEVExpander &Rewriter) { 979 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 980 assert(isLoopCounter(IndVar, L, SE)); 981 Instruction * const IncVar = 982 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 983 984 // Initialize CmpIndVar to the preincremented IV. 985 Value *CmpIndVar = IndVar; 986 bool UsePostInc = false; 987 988 // If the exiting block is the same as the backedge block, we prefer to 989 // compare against the post-incremented value, otherwise we must compare 990 // against the preincremented value. 991 if (ExitingBB == L->getLoopLatch()) { 992 // For pointer IVs, we chose to not strip inbounds which requires us not 993 // to add a potentially UB introducing use. We need to either a) show 994 // the loop test we're modifying is already in post-inc form, or b) show 995 // that adding a use must not introduce UB. 996 bool SafeToPostInc = 997 IndVar->getType()->isIntegerTy() || 998 isLoopExitTestBasedOn(IncVar, ExitingBB) || 999 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1000 if (SafeToPostInc) { 1001 UsePostInc = true; 1002 CmpIndVar = IncVar; 1003 } 1004 } 1005 1006 // It may be necessary to drop nowrap flags on the incrementing instruction 1007 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1008 // case the increment might have previously been poison on the last iteration 1009 // only) or if LFTR switches to a different IV that was previously dynamically 1010 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1011 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1012 // check), because the pre-inc addrec flags may be adopted from the original 1013 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1014 // TODO: This handling is inaccurate for one case: If we switch to a 1015 // dynamically dead IV that wraps on the first loop iteration only, which is 1016 // not covered by the post-inc addrec. (If the new IV was not dynamically 1017 // dead, it could not be poison on the first iteration in the first place.) 1018 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1019 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1020 if (BO->hasNoUnsignedWrap()) 1021 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1022 if (BO->hasNoSignedWrap()) 1023 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1024 } 1025 1026 Value *ExitCnt = genLoopLimit( 1027 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1028 assert(ExitCnt->getType()->isPointerTy() == 1029 IndVar->getType()->isPointerTy() && 1030 "genLoopLimit missed a cast"); 1031 1032 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1033 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1034 ICmpInst::Predicate P; 1035 if (L->contains(BI->getSuccessor(0))) 1036 P = ICmpInst::ICMP_NE; 1037 else 1038 P = ICmpInst::ICMP_EQ; 1039 1040 IRBuilder<> Builder(BI); 1041 1042 // The new loop exit condition should reuse the debug location of the 1043 // original loop exit condition. 1044 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1045 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1046 1047 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1048 // avoid the expensive expansion of the limit expression in the wider type, 1049 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1050 // since we know (from the exit count bitwidth), that we can't self-wrap in 1051 // the narrower type. 1052 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1053 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1054 if (CmpIndVarSize > ExitCntSize) { 1055 assert(!CmpIndVar->getType()->isPointerTy() && 1056 !ExitCnt->getType()->isPointerTy()); 1057 1058 // Before resorting to actually inserting the truncate, use the same 1059 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1060 // the other side of the comparison instead. We still evaluate the limit 1061 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1062 // a truncate within in. 1063 bool Extended = false; 1064 const SCEV *IV = SE->getSCEV(CmpIndVar); 1065 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 1066 const SCEV *ZExtTrunc = 1067 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1068 1069 if (ZExtTrunc == IV) { 1070 Extended = true; 1071 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1072 "wide.trip.count"); 1073 } else { 1074 const SCEV *SExtTrunc = 1075 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1076 if (SExtTrunc == IV) { 1077 Extended = true; 1078 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1079 "wide.trip.count"); 1080 } 1081 } 1082 1083 if (Extended) { 1084 bool Discard; 1085 L->makeLoopInvariant(ExitCnt, Discard); 1086 } else 1087 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1088 "lftr.wideiv"); 1089 } 1090 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1091 << " LHS:" << *CmpIndVar << '\n' 1092 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1093 << "\n" 1094 << " RHS:\t" << *ExitCnt << "\n" 1095 << "ExitCount:\t" << *ExitCount << "\n" 1096 << " was: " << *BI->getCondition() << "\n"); 1097 1098 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1099 Value *OrigCond = BI->getCondition(); 1100 // It's tempting to use replaceAllUsesWith here to fully replace the old 1101 // comparison, but that's not immediately safe, since users of the old 1102 // comparison may not be dominated by the new comparison. Instead, just 1103 // update the branch to use the new comparison; in the common case this 1104 // will make old comparison dead. 1105 BI->setCondition(Cond); 1106 DeadInsts.emplace_back(OrigCond); 1107 1108 ++NumLFTR; 1109 return true; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1114 //===----------------------------------------------------------------------===// 1115 1116 /// If there's a single exit block, sink any loop-invariant values that 1117 /// were defined in the preheader but not used inside the loop into the 1118 /// exit block to reduce register pressure in the loop. 1119 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1120 BasicBlock *ExitBlock = L->getExitBlock(); 1121 if (!ExitBlock) return false; 1122 1123 BasicBlock *Preheader = L->getLoopPreheader(); 1124 if (!Preheader) return false; 1125 1126 bool MadeAnyChanges = false; 1127 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1128 BasicBlock::iterator I(Preheader->getTerminator()); 1129 while (I != Preheader->begin()) { 1130 --I; 1131 // New instructions were inserted at the end of the preheader. 1132 if (isa<PHINode>(I)) 1133 break; 1134 1135 // Don't move instructions which might have side effects, since the side 1136 // effects need to complete before instructions inside the loop. Also don't 1137 // move instructions which might read memory, since the loop may modify 1138 // memory. Note that it's okay if the instruction might have undefined 1139 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1140 // block. 1141 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1142 continue; 1143 1144 // Skip debug info intrinsics. 1145 if (isa<DbgInfoIntrinsic>(I)) 1146 continue; 1147 1148 // Skip eh pad instructions. 1149 if (I->isEHPad()) 1150 continue; 1151 1152 // Don't sink alloca: we never want to sink static alloca's out of the 1153 // entry block, and correctly sinking dynamic alloca's requires 1154 // checks for stacksave/stackrestore intrinsics. 1155 // FIXME: Refactor this check somehow? 1156 if (isa<AllocaInst>(I)) 1157 continue; 1158 1159 // Determine if there is a use in or before the loop (direct or 1160 // otherwise). 1161 bool UsedInLoop = false; 1162 for (Use &U : I->uses()) { 1163 Instruction *User = cast<Instruction>(U.getUser()); 1164 BasicBlock *UseBB = User->getParent(); 1165 if (PHINode *P = dyn_cast<PHINode>(User)) { 1166 unsigned i = 1167 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1168 UseBB = P->getIncomingBlock(i); 1169 } 1170 if (UseBB == Preheader || L->contains(UseBB)) { 1171 UsedInLoop = true; 1172 break; 1173 } 1174 } 1175 1176 // If there is, the def must remain in the preheader. 1177 if (UsedInLoop) 1178 continue; 1179 1180 // Otherwise, sink it to the exit block. 1181 Instruction *ToMove = &*I; 1182 bool Done = false; 1183 1184 if (I != Preheader->begin()) { 1185 // Skip debug info intrinsics. 1186 do { 1187 --I; 1188 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1189 1190 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1191 Done = true; 1192 } else { 1193 Done = true; 1194 } 1195 1196 MadeAnyChanges = true; 1197 ToMove->moveBefore(*ExitBlock, InsertPt); 1198 SE->forgetValue(ToMove); 1199 if (Done) break; 1200 InsertPt = ToMove->getIterator(); 1201 } 1202 1203 return MadeAnyChanges; 1204 } 1205 1206 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1207 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1208 auto *OldCond = BI->getCondition(); 1209 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1210 << " with " << *NewCond << "\n"); 1211 BI->setCondition(NewCond); 1212 if (OldCond->use_empty()) 1213 DeadInsts.emplace_back(OldCond); 1214 } 1215 1216 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1217 bool IsTaken) { 1218 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1219 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1220 auto *OldCond = BI->getCondition(); 1221 return ConstantInt::get(OldCond->getType(), 1222 IsTaken ? ExitIfTrue : !ExitIfTrue); 1223 } 1224 1225 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1226 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1227 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1228 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1229 replaceExitCond(BI, NewCond, DeadInsts); 1230 } 1231 1232 static void replaceLoopPHINodesWithPreheaderValues( 1233 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1234 ScalarEvolution &SE) { 1235 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1236 auto *LoopPreheader = L->getLoopPreheader(); 1237 auto *LoopHeader = L->getHeader(); 1238 SmallVector<Instruction *> Worklist; 1239 for (auto &PN : LoopHeader->phis()) { 1240 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1241 for (User *U : PN.users()) 1242 Worklist.push_back(cast<Instruction>(U)); 1243 SE.forgetValue(&PN); 1244 PN.replaceAllUsesWith(PreheaderIncoming); 1245 DeadInsts.emplace_back(&PN); 1246 } 1247 1248 // Replacing with the preheader value will often allow IV users to simplify 1249 // (especially if the preheader value is a constant). 1250 SmallPtrSet<Instruction *, 16> Visited; 1251 while (!Worklist.empty()) { 1252 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1253 if (!Visited.insert(I).second) 1254 continue; 1255 1256 // Don't simplify instructions outside the loop. 1257 if (!L->contains(I)) 1258 continue; 1259 1260 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1261 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1262 for (User *U : I->users()) 1263 Worklist.push_back(cast<Instruction>(U)); 1264 I->replaceAllUsesWith(Res); 1265 DeadInsts.emplace_back(I); 1266 } 1267 } 1268 } 1269 1270 static Value * 1271 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1272 const ScalarEvolution::LoopInvariantPredicate &LIP, 1273 SCEVExpander &Rewriter) { 1274 ICmpInst::Predicate InvariantPred = LIP.Pred; 1275 BasicBlock *Preheader = L->getLoopPreheader(); 1276 assert(Preheader && "Preheader doesn't exist"); 1277 Rewriter.setInsertPoint(Preheader->getTerminator()); 1278 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1279 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1280 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1281 if (ExitIfTrue) 1282 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1283 IRBuilder<> Builder(Preheader->getTerminator()); 1284 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1285 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1286 BI->getCondition()->getName()); 1287 } 1288 1289 static std::optional<Value *> 1290 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1291 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1292 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1293 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1294 Value *LHS = ICmp->getOperand(0); 1295 Value *RHS = ICmp->getOperand(1); 1296 1297 // 'LHS pred RHS' should now mean that we stay in loop. 1298 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1299 if (Inverted) 1300 Pred = CmpInst::getInversePredicate(Pred); 1301 1302 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1303 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1304 // Can we prove it to be trivially true or false? 1305 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1306 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1307 1308 auto *ARTy = LHSS->getType(); 1309 auto *MaxIterTy = MaxIter->getType(); 1310 // If possible, adjust types. 1311 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1312 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1313 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1314 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1315 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1316 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1317 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1318 } 1319 1320 if (SkipLastIter) { 1321 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1322 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1323 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1324 // So we manually construct umin(a - 1, b - 1). 1325 SmallVector<const SCEV *, 4> Elements; 1326 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1327 for (auto *Op : UMin->operands()) 1328 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1329 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1330 } else 1331 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1332 } 1333 1334 // Check if there is a loop-invariant predicate equivalent to our check. 1335 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1336 L, BI, MaxIter); 1337 if (!LIP) 1338 return std::nullopt; 1339 1340 // Can we prove it to be trivially true? 1341 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1342 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1343 else 1344 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1345 } 1346 1347 static bool optimizeLoopExitWithUnknownExitCount( 1348 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1349 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1350 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1351 assert( 1352 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1353 "Not a loop exit!"); 1354 1355 // For branch that stays in loop by TRUE condition, go through AND. For branch 1356 // that stays in loop by FALSE condition, go through OR. Both gives the 1357 // similar logic: "stay in loop iff all conditions are true(false)". 1358 bool Inverted = L->contains(BI->getSuccessor(1)); 1359 SmallVector<ICmpInst *, 4> LeafConditions; 1360 SmallVector<Value *, 4> Worklist; 1361 SmallPtrSet<Value *, 4> Visited; 1362 Value *OldCond = BI->getCondition(); 1363 Visited.insert(OldCond); 1364 Worklist.push_back(OldCond); 1365 1366 auto GoThrough = [&](Value *V) { 1367 Value *LHS = nullptr, *RHS = nullptr; 1368 if (Inverted) { 1369 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1370 return false; 1371 } else { 1372 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1373 return false; 1374 } 1375 if (Visited.insert(LHS).second) 1376 Worklist.push_back(LHS); 1377 if (Visited.insert(RHS).second) 1378 Worklist.push_back(RHS); 1379 return true; 1380 }; 1381 1382 do { 1383 Value *Curr = Worklist.pop_back_val(); 1384 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1385 // those with one use, to avoid instruction duplication. 1386 if (Curr->hasOneUse()) 1387 if (!GoThrough(Curr)) 1388 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1389 LeafConditions.push_back(ICmp); 1390 } while (!Worklist.empty()); 1391 1392 // If the current basic block has the same exit count as the whole loop, and 1393 // it consists of multiple icmp's, try to collect all icmp's that give exact 1394 // same exit count. For all other icmp's, we could use one less iteration, 1395 // because their value on the last iteration doesn't really matter. 1396 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1397 if (!SkipLastIter && LeafConditions.size() > 1 && 1398 SE->getExitCount(L, ExitingBB, 1399 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1400 MaxIter) 1401 for (auto *ICmp : LeafConditions) { 1402 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1403 /*ControlsExit*/ false); 1404 auto *ExitMax = EL.SymbolicMaxNotTaken; 1405 if (isa<SCEVCouldNotCompute>(ExitMax)) 1406 continue; 1407 // They could be of different types (specifically this happens after 1408 // IV widening). 1409 auto *WiderType = 1410 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1411 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1412 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1413 if (WideExitMax == WideMaxIter) 1414 ICmpsFailingOnLastIter.insert(ICmp); 1415 } 1416 1417 bool Changed = false; 1418 for (auto *OldCond : LeafConditions) { 1419 // Skip last iteration for this icmp under one of two conditions: 1420 // - We do it for all conditions; 1421 // - There is another ICmp that would fail on last iter, so this one doesn't 1422 // really matter. 1423 bool OptimisticSkipLastIter = SkipLastIter; 1424 if (!OptimisticSkipLastIter) { 1425 if (ICmpsFailingOnLastIter.size() > 1) 1426 OptimisticSkipLastIter = true; 1427 else if (ICmpsFailingOnLastIter.size() == 1) 1428 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1429 } 1430 if (auto Replaced = 1431 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1432 OptimisticSkipLastIter, SE, Rewriter)) { 1433 Changed = true; 1434 auto *NewCond = *Replaced; 1435 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1436 NCI->setName(OldCond->getName() + ".first_iter"); 1437 } 1438 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1439 << " with " << *NewCond << "\n"); 1440 assert(OldCond->hasOneUse() && "Must be!"); 1441 OldCond->replaceAllUsesWith(NewCond); 1442 DeadInsts.push_back(OldCond); 1443 // Make sure we no longer consider this condition as failing on last 1444 // iteration. 1445 ICmpsFailingOnLastIter.erase(OldCond); 1446 } 1447 } 1448 return Changed; 1449 } 1450 1451 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1452 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1453 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1454 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1455 // it already has flags. The alternative to this would be to extending the 1456 // set of "interesting" IV users to include the icmp, but doing that 1457 // regresses results in practice by querying SCEVs before trip counts which 1458 // rely on them which results in SCEV caching sub-optimal answers. The 1459 // concern about caching sub-optimal results is why we only query SCEVs of 1460 // the loop invariant RHS here. 1461 SmallVector<BasicBlock*, 16> ExitingBlocks; 1462 L->getExitingBlocks(ExitingBlocks); 1463 bool Changed = false; 1464 for (auto *ExitingBB : ExitingBlocks) { 1465 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1466 if (!BI) 1467 continue; 1468 assert(BI->isConditional() && "exit branch must be conditional"); 1469 1470 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1471 if (!ICmp || !ICmp->hasOneUse()) 1472 continue; 1473 1474 auto *LHS = ICmp->getOperand(0); 1475 auto *RHS = ICmp->getOperand(1); 1476 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1477 // poisoning cache with sub-optimal results. For the must-execute case, 1478 // this is a neccessary precondition for correctness. 1479 if (!L->isLoopInvariant(RHS)) { 1480 if (!L->isLoopInvariant(LHS)) 1481 continue; 1482 // Same logic applies for the inverse case 1483 std::swap(LHS, RHS); 1484 } 1485 1486 // Match (icmp signed-cond zext, RHS) 1487 Value *LHSOp = nullptr; 1488 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1489 continue; 1490 1491 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1492 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1493 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1494 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1495 FullCR = FullCR.zeroExtend(OuterBitWidth); 1496 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1497 if (FullCR.contains(RHSCR)) { 1498 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1499 // replace the signed condition with the unsigned version. 1500 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1501 Changed = true; 1502 // Note: No SCEV invalidation needed. We've changed the predicate, but 1503 // have not changed exit counts, or the values produced by the compare. 1504 continue; 1505 } 1506 } 1507 1508 // Now that we've canonicalized the condition to match the extend, 1509 // see if we can rotate the extend out of the loop. 1510 for (auto *ExitingBB : ExitingBlocks) { 1511 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1512 if (!BI) 1513 continue; 1514 assert(BI->isConditional() && "exit branch must be conditional"); 1515 1516 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1517 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1518 continue; 1519 1520 bool Swapped = false; 1521 auto *LHS = ICmp->getOperand(0); 1522 auto *RHS = ICmp->getOperand(1); 1523 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1524 // Nothing to rotate 1525 continue; 1526 if (L->isLoopInvariant(LHS)) { 1527 // Same logic applies for the inverse case until we actually pick 1528 // which operand of the compare to update. 1529 Swapped = true; 1530 std::swap(LHS, RHS); 1531 } 1532 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1533 1534 // Match (icmp unsigned-cond zext, RHS) 1535 // TODO: Extend to handle corresponding sext/signed-cmp case 1536 // TODO: Extend to other invertible functions 1537 Value *LHSOp = nullptr; 1538 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1539 continue; 1540 1541 // In general, we only rotate if we can do so without increasing the number 1542 // of instructions. The exception is when we have an zext(add-rec). The 1543 // reason for allowing this exception is that we know we need to get rid 1544 // of the zext for SCEV to be able to compute a trip count for said loops; 1545 // we consider the new trip count valuable enough to increase instruction 1546 // count by one. 1547 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1548 continue; 1549 1550 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1551 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1552 // when zext is loop varying and RHS is loop invariant. This converts 1553 // loop varying work to loop-invariant work. 1554 auto doRotateTransform = [&]() { 1555 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1556 auto *NewRHS = 1557 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1558 L->getLoopPreheader()->getTerminator()); 1559 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1560 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1561 if (LHS->use_empty()) 1562 DeadInsts.push_back(LHS); 1563 }; 1564 1565 1566 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1567 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1568 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1569 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1570 FullCR = FullCR.zeroExtend(OuterBitWidth); 1571 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1572 if (FullCR.contains(RHSCR)) { 1573 doRotateTransform(); 1574 Changed = true; 1575 // Note, we are leaving SCEV in an unfortunately imprecise case here 1576 // as rotation tends to reveal information about trip counts not 1577 // previously visible. 1578 continue; 1579 } 1580 } 1581 1582 return Changed; 1583 } 1584 1585 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1586 SmallVector<BasicBlock*, 16> ExitingBlocks; 1587 L->getExitingBlocks(ExitingBlocks); 1588 1589 // Remove all exits which aren't both rewriteable and execute on every 1590 // iteration. 1591 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1592 // If our exitting block exits multiple loops, we can only rewrite the 1593 // innermost one. Otherwise, we're changing how many times the innermost 1594 // loop runs before it exits. 1595 if (LI->getLoopFor(ExitingBB) != L) 1596 return true; 1597 1598 // Can't rewrite non-branch yet. 1599 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1600 if (!BI) 1601 return true; 1602 1603 // Likewise, the loop latch must be dominated by the exiting BB. 1604 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1605 return true; 1606 1607 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1608 // If already constant, nothing to do. However, if this is an 1609 // unconditional exit, we can still replace header phis with their 1610 // preheader value. 1611 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1612 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1613 return true; 1614 } 1615 1616 return false; 1617 }); 1618 1619 if (ExitingBlocks.empty()) 1620 return false; 1621 1622 // Get a symbolic upper bound on the loop backedge taken count. 1623 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1624 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1625 return false; 1626 1627 // Visit our exit blocks in order of dominance. We know from the fact that 1628 // all exits must dominate the latch, so there is a total dominance order 1629 // between them. 1630 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1631 // std::sort sorts in ascending order, so we want the inverse of 1632 // the normal dominance relation. 1633 if (A == B) return false; 1634 if (DT->properlyDominates(A, B)) 1635 return true; 1636 else { 1637 assert(DT->properlyDominates(B, A) && 1638 "expected total dominance order!"); 1639 return false; 1640 } 1641 }); 1642 #ifdef ASSERT 1643 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1644 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1645 } 1646 #endif 1647 1648 bool Changed = false; 1649 bool SkipLastIter = false; 1650 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1651 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1652 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1653 return; 1654 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1655 CurrMaxExit = MaxExitCount; 1656 else 1657 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1658 // If the loop has more than 1 iteration, all further checks will be 1659 // executed 1 iteration less. 1660 if (CurrMaxExit == MaxBECount) 1661 SkipLastIter = true; 1662 }; 1663 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1664 for (BasicBlock *ExitingBB : ExitingBlocks) { 1665 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1666 const SCEV *MaxExitCount = SE->getExitCount( 1667 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1668 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1669 // Okay, we do not know the exit count here. Can we at least prove that it 1670 // will remain the same within iteration space? 1671 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1672 auto OptimizeCond = [&](bool SkipLastIter) { 1673 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1674 MaxBECount, SkipLastIter, 1675 SE, Rewriter, DeadInsts); 1676 }; 1677 1678 // TODO: We might have proved that we can skip the last iteration for 1679 // this check. In this case, we only want to check the condition on the 1680 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1681 // corner case: 1682 // 1683 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1684 // 1685 // If we could not prove that len != 0, then we also could not prove that 1686 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1687 // OptimizeCond will likely not prove anything for it, even if it could 1688 // prove the same fact for len. 1689 // 1690 // As a temporary solution, we query both last and pre-last iterations in 1691 // hope that we will be able to prove triviality for at least one of 1692 // them. We can stop querying MaxBECount for this case once SCEV 1693 // understands that (MaxBECount - 1) will not overflow here. 1694 if (OptimizeCond(false)) 1695 Changed = true; 1696 else if (SkipLastIter && OptimizeCond(true)) 1697 Changed = true; 1698 UpdateSkipLastIter(MaxExitCount); 1699 continue; 1700 } 1701 1702 UpdateSkipLastIter(ExactExitCount); 1703 1704 // If we know we'd exit on the first iteration, rewrite the exit to 1705 // reflect this. This does not imply the loop must exit through this 1706 // exit; there may be an earlier one taken on the first iteration. 1707 // We know that the backedge can't be taken, so we replace all 1708 // the header PHIs with values coming from the preheader. 1709 if (ExactExitCount->isZero()) { 1710 foldExit(L, ExitingBB, true, DeadInsts); 1711 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1712 Changed = true; 1713 continue; 1714 } 1715 1716 assert(ExactExitCount->getType()->isIntegerTy() && 1717 MaxBECount->getType()->isIntegerTy() && 1718 "Exit counts must be integers"); 1719 1720 Type *WiderType = 1721 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1722 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1723 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1724 assert(MaxBECount->getType() == ExactExitCount->getType()); 1725 1726 // Can we prove that some other exit must be taken strictly before this 1727 // one? 1728 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1729 ExactExitCount)) { 1730 foldExit(L, ExitingBB, false, DeadInsts); 1731 Changed = true; 1732 continue; 1733 } 1734 1735 // As we run, keep track of which exit counts we've encountered. If we 1736 // find a duplicate, we've found an exit which would have exited on the 1737 // exiting iteration, but (from the visit order) strictly follows another 1738 // which does the same and is thus dead. 1739 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1740 foldExit(L, ExitingBB, false, DeadInsts); 1741 Changed = true; 1742 continue; 1743 } 1744 1745 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1746 // here. If we kept track of the min of dominanting exits so far, we could 1747 // discharge exits with EC >= MDEC. This is less powerful than the existing 1748 // transform (since later exits aren't considered), but potentially more 1749 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1750 // or a >u b. Such a case is not currently known. 1751 } 1752 return Changed; 1753 } 1754 1755 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1756 SmallVector<BasicBlock*, 16> ExitingBlocks; 1757 L->getExitingBlocks(ExitingBlocks); 1758 1759 // Finally, see if we can rewrite our exit conditions into a loop invariant 1760 // form. If we have a read-only loop, and we can tell that we must exit down 1761 // a path which does not need any of the values computed within the loop, we 1762 // can rewrite the loop to exit on the first iteration. Note that this 1763 // doesn't either a) tell us the loop exits on the first iteration (unless 1764 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1765 // This transformation looks a lot like a restricted form of dead loop 1766 // elimination, but restricted to read-only loops and without neccesssarily 1767 // needing to kill the loop entirely. 1768 if (!LoopPredication) 1769 return false; 1770 1771 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1772 // through *explicit* control flow. We have to eliminate the possibility of 1773 // implicit exits (see below) before we know it's truly exact. 1774 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1775 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1776 return false; 1777 1778 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1779 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1780 1781 auto BadExit = [&](BasicBlock *ExitingBB) { 1782 // If our exiting block exits multiple loops, we can only rewrite the 1783 // innermost one. Otherwise, we're changing how many times the innermost 1784 // loop runs before it exits. 1785 if (LI->getLoopFor(ExitingBB) != L) 1786 return true; 1787 1788 // Can't rewrite non-branch yet. 1789 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1790 if (!BI) 1791 return true; 1792 1793 // If already constant, nothing to do. 1794 if (isa<Constant>(BI->getCondition())) 1795 return true; 1796 1797 // If the exit block has phis, we need to be able to compute the values 1798 // within the loop which contains them. This assumes trivially lcssa phis 1799 // have already been removed; TODO: generalize 1800 BasicBlock *ExitBlock = 1801 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1802 if (!ExitBlock->phis().empty()) 1803 return true; 1804 1805 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1806 if (isa<SCEVCouldNotCompute>(ExitCount) || 1807 !Rewriter.isSafeToExpand(ExitCount)) 1808 return true; 1809 1810 assert(SE->isLoopInvariant(ExitCount, L) && 1811 "Exit count must be loop invariant"); 1812 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1813 return false; 1814 }; 1815 1816 // If we have any exits which can't be predicated themselves, than we can't 1817 // predicate any exit which isn't guaranteed to execute before it. Consider 1818 // two exits (a) and (b) which would both exit on the same iteration. If we 1819 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1820 // we could convert a loop from exiting through (a) to one exiting through 1821 // (b). Note that this problem exists only for exits with the same exit 1822 // count, and we could be more aggressive when exit counts are known inequal. 1823 llvm::sort(ExitingBlocks, 1824 [&](BasicBlock *A, BasicBlock *B) { 1825 // std::sort sorts in ascending order, so we want the inverse of 1826 // the normal dominance relation, plus a tie breaker for blocks 1827 // unordered by dominance. 1828 if (DT->properlyDominates(A, B)) return true; 1829 if (DT->properlyDominates(B, A)) return false; 1830 return A->getName() < B->getName(); 1831 }); 1832 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1833 // exits before the backedge). If they aren't, reasoning about reachability 1834 // is complicated and we choose not to for now. 1835 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1836 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1837 return false; 1838 1839 // Given our sorted total order, we know that exit[j] must be evaluated 1840 // after all exit[i] such j > i. 1841 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1842 if (BadExit(ExitingBlocks[i])) { 1843 ExitingBlocks.resize(i); 1844 break; 1845 } 1846 1847 if (ExitingBlocks.empty()) 1848 return false; 1849 1850 // We rely on not being able to reach an exiting block on a later iteration 1851 // then it's statically compute exit count. The implementaton of 1852 // getExitCount currently has this invariant, but assert it here so that 1853 // breakage is obvious if this ever changes.. 1854 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1855 return DT->dominates(ExitingBB, L->getLoopLatch()); 1856 })); 1857 1858 // At this point, ExitingBlocks consists of only those blocks which are 1859 // predicatable. Given that, we know we have at least one exit we can 1860 // predicate if the loop is doesn't have side effects and doesn't have any 1861 // implicit exits (because then our exact BTC isn't actually exact). 1862 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1863 // suggestions on how to improve this? I can obviously bail out for outer 1864 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1865 // is that enough for *all* side effects? 1866 for (BasicBlock *BB : L->blocks()) 1867 for (auto &I : *BB) 1868 // TODO:isGuaranteedToTransfer 1869 if (I.mayHaveSideEffects()) 1870 return false; 1871 1872 bool Changed = false; 1873 // Finally, do the actual predication for all predicatable blocks. A couple 1874 // of notes here: 1875 // 1) We don't bother to constant fold dominated exits with identical exit 1876 // counts; that's simply a form of CSE/equality propagation and we leave 1877 // it for dedicated passes. 1878 // 2) We insert the comparison at the branch. Hoisting introduces additional 1879 // legality constraints and we leave that to dedicated logic. We want to 1880 // predicate even if we can't insert a loop invariant expression as 1881 // peeling or unrolling will likely reduce the cost of the otherwise loop 1882 // varying check. 1883 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1884 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1885 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1886 for (BasicBlock *ExitingBB : ExitingBlocks) { 1887 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1888 1889 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1890 Value *NewCond; 1891 if (ExitCount == ExactBTC) { 1892 NewCond = L->contains(BI->getSuccessor(0)) ? 1893 B.getFalse() : B.getTrue(); 1894 } else { 1895 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1896 if (!ExactBTCV) 1897 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1898 Value *RHS = ExactBTCV; 1899 if (ECV->getType() != RHS->getType()) { 1900 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1901 ECV = B.CreateZExt(ECV, WiderTy); 1902 RHS = B.CreateZExt(RHS, WiderTy); 1903 } 1904 auto Pred = L->contains(BI->getSuccessor(0)) ? 1905 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1906 NewCond = B.CreateICmp(Pred, ECV, RHS); 1907 } 1908 Value *OldCond = BI->getCondition(); 1909 BI->setCondition(NewCond); 1910 if (OldCond->use_empty()) 1911 DeadInsts.emplace_back(OldCond); 1912 Changed = true; 1913 } 1914 1915 return Changed; 1916 } 1917 1918 //===----------------------------------------------------------------------===// 1919 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1920 //===----------------------------------------------------------------------===// 1921 1922 bool IndVarSimplify::run(Loop *L) { 1923 // We need (and expect!) the incoming loop to be in LCSSA. 1924 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1925 "LCSSA required to run indvars!"); 1926 1927 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1928 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1929 // canonicalization can be a pessimization without LSR to "clean up" 1930 // afterwards. 1931 // - We depend on having a preheader; in particular, 1932 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1933 // and we're in trouble if we can't find the induction variable even when 1934 // we've manually inserted one. 1935 // - LFTR relies on having a single backedge. 1936 if (!L->isLoopSimplifyForm()) 1937 return false; 1938 1939 bool Changed = false; 1940 // If there are any floating-point recurrences, attempt to 1941 // transform them to use integer recurrences. 1942 Changed |= rewriteNonIntegerIVs(L); 1943 1944 // Create a rewriter object which we'll use to transform the code with. 1945 SCEVExpander Rewriter(*SE, DL, "indvars"); 1946 #ifndef NDEBUG 1947 Rewriter.setDebugType(DEBUG_TYPE); 1948 #endif 1949 1950 // Eliminate redundant IV users. 1951 // 1952 // Simplification works best when run before other consumers of SCEV. We 1953 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1954 // other expressions involving loop IVs have been evaluated. This helps SCEV 1955 // set no-wrap flags before normalizing sign/zero extension. 1956 Rewriter.disableCanonicalMode(); 1957 Changed |= simplifyAndExtend(L, Rewriter, LI); 1958 1959 // Check to see if we can compute the final value of any expressions 1960 // that are recurrent in the loop, and substitute the exit values from the 1961 // loop into any instructions outside of the loop that use the final values 1962 // of the current expressions. 1963 if (ReplaceExitValue != NeverRepl) { 1964 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1965 ReplaceExitValue, DeadInsts)) { 1966 NumReplaced += Rewrites; 1967 Changed = true; 1968 } 1969 } 1970 1971 // Eliminate redundant IV cycles. 1972 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1973 1974 // Try to convert exit conditions to unsigned and rotate computation 1975 // out of the loop. Note: Handles invalidation internally if needed. 1976 Changed |= canonicalizeExitCondition(L); 1977 1978 // Try to eliminate loop exits based on analyzeable exit counts 1979 if (optimizeLoopExits(L, Rewriter)) { 1980 Changed = true; 1981 // Given we've changed exit counts, notify SCEV 1982 // Some nested loops may share same folded exit basic block, 1983 // thus we need to notify top most loop. 1984 SE->forgetTopmostLoop(L); 1985 } 1986 1987 // Try to form loop invariant tests for loop exits by changing how many 1988 // iterations of the loop run when that is unobservable. 1989 if (predicateLoopExits(L, Rewriter)) { 1990 Changed = true; 1991 // Given we've changed exit counts, notify SCEV 1992 SE->forgetLoop(L); 1993 } 1994 1995 // If we have a trip count expression, rewrite the loop's exit condition 1996 // using it. 1997 if (!DisableLFTR) { 1998 BasicBlock *PreHeader = L->getLoopPreheader(); 1999 2000 SmallVector<BasicBlock*, 16> ExitingBlocks; 2001 L->getExitingBlocks(ExitingBlocks); 2002 for (BasicBlock *ExitingBB : ExitingBlocks) { 2003 // Can't rewrite non-branch yet. 2004 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2005 continue; 2006 2007 // If our exitting block exits multiple loops, we can only rewrite the 2008 // innermost one. Otherwise, we're changing how many times the innermost 2009 // loop runs before it exits. 2010 if (LI->getLoopFor(ExitingBB) != L) 2011 continue; 2012 2013 if (!needsLFTR(L, ExitingBB)) 2014 continue; 2015 2016 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2017 if (isa<SCEVCouldNotCompute>(ExitCount)) 2018 continue; 2019 2020 // This was handled above, but as we form SCEVs, we can sometimes refine 2021 // existing ones; this allows exit counts to be folded to zero which 2022 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2023 // until stable to handle cases like this better. 2024 if (ExitCount->isZero()) 2025 continue; 2026 2027 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2028 if (!IndVar) 2029 continue; 2030 2031 // Avoid high cost expansions. Note: This heuristic is questionable in 2032 // that our definition of "high cost" is not exactly principled. 2033 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2034 TTI, PreHeader->getTerminator())) 2035 continue; 2036 2037 // Check preconditions for proper SCEVExpander operation. SCEV does not 2038 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2039 // any pass that uses the SCEVExpander must do it. This does not work 2040 // well for loop passes because SCEVExpander makes assumptions about 2041 // all loops, while LoopPassManager only forces the current loop to be 2042 // simplified. 2043 // 2044 // FIXME: SCEV expansion has no way to bail out, so the caller must 2045 // explicitly check any assumptions made by SCEV. Brittle. 2046 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2047 if (!AR || AR->getLoop()->getLoopPreheader()) 2048 Changed |= linearFunctionTestReplace(L, ExitingBB, 2049 ExitCount, IndVar, 2050 Rewriter); 2051 } 2052 } 2053 // Clear the rewriter cache, because values that are in the rewriter's cache 2054 // can be deleted in the loop below, causing the AssertingVH in the cache to 2055 // trigger. 2056 Rewriter.clear(); 2057 2058 // Now that we're done iterating through lists, clean up any instructions 2059 // which are now dead. 2060 while (!DeadInsts.empty()) { 2061 Value *V = DeadInsts.pop_back_val(); 2062 2063 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2064 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2065 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2066 Changed |= 2067 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2068 } 2069 2070 // The Rewriter may not be used from this point on. 2071 2072 // Loop-invariant instructions in the preheader that aren't used in the 2073 // loop may be sunk below the loop to reduce register pressure. 2074 Changed |= sinkUnusedInvariants(L); 2075 2076 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2077 // trip count and therefore can further simplify exit values in addition to 2078 // rewriteLoopExitValues. 2079 Changed |= rewriteFirstIterationLoopExitValues(L); 2080 2081 // Clean up dead instructions. 2082 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2083 2084 // Check a post-condition. 2085 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2086 "Indvars did not preserve LCSSA!"); 2087 if (VerifyMemorySSA && MSSAU) 2088 MSSAU->getMemorySSA()->verifyMemorySSA(); 2089 2090 return Changed; 2091 } 2092 2093 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2094 LoopStandardAnalysisResults &AR, 2095 LPMUpdater &) { 2096 Function *F = L.getHeader()->getParent(); 2097 const DataLayout &DL = F->getParent()->getDataLayout(); 2098 2099 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2100 WidenIndVars && AllowIVWidening); 2101 if (!IVS.run(&L)) 2102 return PreservedAnalyses::all(); 2103 2104 auto PA = getLoopPassPreservedAnalyses(); 2105 PA.preserveSet<CFGAnalyses>(); 2106 if (AR.MSSA) 2107 PA.preserve<MemorySSAAnalysis>(); 2108 return PA; 2109 } 2110