1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/InitializePasses.h" 68 #include "llvm/Pass.h" 69 #include "llvm/Support/Casting.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Compiler.h" 72 #include "llvm/Support/Debug.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include "llvm/Transforms/Scalar.h" 76 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 77 #include "llvm/Transforms/Utils/Local.h" 78 #include "llvm/Transforms/Utils/LoopUtils.h" 79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 80 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 81 #include <cassert> 82 #include <cstdint> 83 #include <utility> 84 85 using namespace llvm; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "indvars" 89 90 STATISTIC(NumWidened , "Number of indvars widened"); 91 STATISTIC(NumReplaced , "Number of exit values replaced"); 92 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 93 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 94 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 95 96 static cl::opt<ReplaceExitVal> ReplaceExitValue( 97 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 98 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 99 cl::values( 100 clEnumValN(NeverRepl, "never", "never replace exit value"), 101 clEnumValN(OnlyCheapRepl, "cheap", 102 "only replace exit value when the cost is cheap"), 103 clEnumValN( 104 UnusedIndVarInLoop, "unusedindvarinloop", 105 "only replace exit value when it is an unused " 106 "induction variable in the loop and has cheap replacement cost"), 107 clEnumValN(NoHardUse, "noharduse", 108 "only replace exit values when loop def likely dead"), 109 clEnumValN(AlwaysRepl, "always", 110 "always replace exit value whenever possible"))); 111 112 static cl::opt<bool> UsePostIncrementRanges( 113 "indvars-post-increment-ranges", cl::Hidden, 114 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 115 cl::init(true)); 116 117 static cl::opt<bool> 118 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 119 cl::desc("Disable Linear Function Test Replace optimization")); 120 121 static cl::opt<bool> 122 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 123 cl::desc("Predicate conditions in read only loops")); 124 125 static cl::opt<bool> 126 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 127 cl::desc("Allow widening of indvars to eliminate s/zext")); 128 129 namespace { 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 std::unique_ptr<MemorySSAUpdater> MSSAU; 139 140 SmallVector<WeakTrackingVH, 16> DeadInsts; 141 bool WidenIndVars; 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 /// Try to improve our exit conditions by converting condition from signed 148 /// to unsigned or rotating computation out of the loop. 149 /// (See inline comment about why this is duplicated from simplifyAndExtend) 150 bool canonicalizeExitCondition(Loop *L); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool rewriteFirstIterationLoopExitValues(Loop *L); 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 170 WidenIndVars(WidenIndVars) { 171 if (MSSA) 172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 173 } 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 //===----------------------------------------------------------------------===// 181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 182 //===----------------------------------------------------------------------===// 183 184 /// Convert APF to an integer, if possible. 185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 186 bool isExact = false; 187 // See if we can convert this to an int64_t 188 uint64_t UIntVal; 189 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 190 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 191 !isExact) 192 return false; 193 IntVal = UIntVal; 194 return true; 195 } 196 197 /// If the loop has floating induction variable then insert corresponding 198 /// integer induction variable if possible. 199 /// For example, 200 /// for(double i = 0; i < 10000; ++i) 201 /// bar(i) 202 /// is converted into 203 /// for(int i = 0; i < 10000; ++i) 204 /// bar((double)i); 205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 206 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 207 unsigned BackEdge = IncomingEdge^1; 208 209 // Check incoming value. 210 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 211 212 int64_t InitValue; 213 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 214 return false; 215 216 // Check IV increment. Reject this PN if increment operation is not 217 // an add or increment value can not be represented by an integer. 218 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 219 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 220 221 // If this is not an add of the PHI with a constantfp, or if the constant fp 222 // is not an integer, bail out. 223 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 224 int64_t IncValue; 225 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 226 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 227 return false; 228 229 // Check Incr uses. One user is PN and the other user is an exit condition 230 // used by the conditional terminator. 231 Value::user_iterator IncrUse = Incr->user_begin(); 232 Instruction *U1 = cast<Instruction>(*IncrUse++); 233 if (IncrUse == Incr->user_end()) return false; 234 Instruction *U2 = cast<Instruction>(*IncrUse++); 235 if (IncrUse != Incr->user_end()) return false; 236 237 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 238 // only used by a branch, we can't transform it. 239 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 240 if (!Compare) 241 Compare = dyn_cast<FCmpInst>(U2); 242 if (!Compare || !Compare->hasOneUse() || 243 !isa<BranchInst>(Compare->user_back())) 244 return false; 245 246 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 247 248 // We need to verify that the branch actually controls the iteration count 249 // of the loop. If not, the new IV can overflow and no one will notice. 250 // The branch block must be in the loop and one of the successors must be out 251 // of the loop. 252 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 253 if (!L->contains(TheBr->getParent()) || 254 (L->contains(TheBr->getSuccessor(0)) && 255 L->contains(TheBr->getSuccessor(1)))) 256 return false; 257 258 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 259 // transform it. 260 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 261 int64_t ExitValue; 262 if (ExitValueVal == nullptr || 263 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 264 return false; 265 266 // Find new predicate for integer comparison. 267 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 268 switch (Compare->getPredicate()) { 269 default: return false; // Unknown comparison. 270 case CmpInst::FCMP_OEQ: 271 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 272 case CmpInst::FCMP_ONE: 273 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 274 case CmpInst::FCMP_OGT: 275 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 276 case CmpInst::FCMP_OGE: 277 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 278 case CmpInst::FCMP_OLT: 279 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 280 case CmpInst::FCMP_OLE: 281 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 282 } 283 284 // We convert the floating point induction variable to a signed i32 value if 285 // we can. This is only safe if the comparison will not overflow in a way 286 // that won't be trapped by the integer equivalent operations. Check for this 287 // now. 288 // TODO: We could use i64 if it is native and the range requires it. 289 290 // The start/stride/exit values must all fit in signed i32. 291 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 292 return false; 293 294 // If not actually striding (add x, 0.0), avoid touching the code. 295 if (IncValue == 0) 296 return false; 297 298 // Positive and negative strides have different safety conditions. 299 if (IncValue > 0) { 300 // If we have a positive stride, we require the init to be less than the 301 // exit value. 302 if (InitValue >= ExitValue) 303 return false; 304 305 uint32_t Range = uint32_t(ExitValue-InitValue); 306 // Check for infinite loop, either: 307 // while (i <= Exit) or until (i > Exit) 308 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 309 if (++Range == 0) return false; // Range overflows. 310 } 311 312 unsigned Leftover = Range % uint32_t(IncValue); 313 314 // If this is an equality comparison, we require that the strided value 315 // exactly land on the exit value, otherwise the IV condition will wrap 316 // around and do things the fp IV wouldn't. 317 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 318 Leftover != 0) 319 return false; 320 321 // If the stride would wrap around the i32 before exiting, we can't 322 // transform the IV. 323 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 324 return false; 325 } else { 326 // If we have a negative stride, we require the init to be greater than the 327 // exit value. 328 if (InitValue <= ExitValue) 329 return false; 330 331 uint32_t Range = uint32_t(InitValue-ExitValue); 332 // Check for infinite loop, either: 333 // while (i >= Exit) or until (i < Exit) 334 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 335 if (++Range == 0) return false; // Range overflows. 336 } 337 338 unsigned Leftover = Range % uint32_t(-IncValue); 339 340 // If this is an equality comparison, we require that the strided value 341 // exactly land on the exit value, otherwise the IV condition will wrap 342 // around and do things the fp IV wouldn't. 343 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 344 Leftover != 0) 345 return false; 346 347 // If the stride would wrap around the i32 before exiting, we can't 348 // transform the IV. 349 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 350 return false; 351 } 352 353 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 354 355 // Insert new integer induction variable. 356 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 357 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 358 PN->getIncomingBlock(IncomingEdge)); 359 360 Value *NewAdd = 361 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 362 Incr->getName()+".int", Incr); 363 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 364 365 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 366 ConstantInt::get(Int32Ty, ExitValue), 367 Compare->getName()); 368 369 // In the following deletions, PN may become dead and may be deleted. 370 // Use a WeakTrackingVH to observe whether this happens. 371 WeakTrackingVH WeakPH = PN; 372 373 // Delete the old floating point exit comparison. The branch starts using the 374 // new comparison. 375 NewCompare->takeName(Compare); 376 Compare->replaceAllUsesWith(NewCompare); 377 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 378 379 // Delete the old floating point increment. 380 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 381 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 382 383 // If the FP induction variable still has uses, this is because something else 384 // in the loop uses its value. In order to canonicalize the induction 385 // variable, we chose to eliminate the IV and rewrite it in terms of an 386 // int->fp cast. 387 // 388 // We give preference to sitofp over uitofp because it is faster on most 389 // platforms. 390 if (WeakPH) { 391 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 392 &*PN->getParent()->getFirstInsertionPt()); 393 PN->replaceAllUsesWith(Conv); 394 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 395 } 396 return true; 397 } 398 399 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 400 // First step. Check to see if there are any floating-point recurrences. 401 // If there are, change them into integer recurrences, permitting analysis by 402 // the SCEV routines. 403 BasicBlock *Header = L->getHeader(); 404 405 SmallVector<WeakTrackingVH, 8> PHIs; 406 for (PHINode &PN : Header->phis()) 407 PHIs.push_back(&PN); 408 409 bool Changed = false; 410 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 411 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 412 Changed |= handleFloatingPointIV(L, PN); 413 414 // If the loop previously had floating-point IV, ScalarEvolution 415 // may not have been able to compute a trip count. Now that we've done some 416 // re-writing, the trip count may be computable. 417 if (Changed) 418 SE->forgetLoop(L); 419 return Changed; 420 } 421 422 //===---------------------------------------------------------------------===// 423 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 424 // they will exit at the first iteration. 425 //===---------------------------------------------------------------------===// 426 427 /// Check to see if this loop has loop invariant conditions which lead to loop 428 /// exits. If so, we know that if the exit path is taken, it is at the first 429 /// loop iteration. This lets us predict exit values of PHI nodes that live in 430 /// loop header. 431 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 432 // Verify the input to the pass is already in LCSSA form. 433 assert(L->isLCSSAForm(*DT)); 434 435 SmallVector<BasicBlock *, 8> ExitBlocks; 436 L->getUniqueExitBlocks(ExitBlocks); 437 438 bool MadeAnyChanges = false; 439 for (auto *ExitBB : ExitBlocks) { 440 // If there are no more PHI nodes in this exit block, then no more 441 // values defined inside the loop are used on this path. 442 for (PHINode &PN : ExitBB->phis()) { 443 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 444 IncomingValIdx != E; ++IncomingValIdx) { 445 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 446 447 // Can we prove that the exit must run on the first iteration if it 448 // runs at all? (i.e. early exits are fine for our purposes, but 449 // traces which lead to this exit being taken on the 2nd iteration 450 // aren't.) Note that this is about whether the exit branch is 451 // executed, not about whether it is taken. 452 if (!L->getLoopLatch() || 453 !DT->dominates(IncomingBB, L->getLoopLatch())) 454 continue; 455 456 // Get condition that leads to the exit path. 457 auto *TermInst = IncomingBB->getTerminator(); 458 459 Value *Cond = nullptr; 460 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 461 // Must be a conditional branch, otherwise the block 462 // should not be in the loop. 463 Cond = BI->getCondition(); 464 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 465 Cond = SI->getCondition(); 466 else 467 continue; 468 469 if (!L->isLoopInvariant(Cond)) 470 continue; 471 472 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 473 474 // Only deal with PHIs in the loop header. 475 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 476 continue; 477 478 // If ExitVal is a PHI on the loop header, then we know its 479 // value along this exit because the exit can only be taken 480 // on the first iteration. 481 auto *LoopPreheader = L->getLoopPreheader(); 482 assert(LoopPreheader && "Invalid loop"); 483 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 484 if (PreheaderIdx != -1) { 485 assert(ExitVal->getParent() == L->getHeader() && 486 "ExitVal must be in loop header"); 487 MadeAnyChanges = true; 488 PN.setIncomingValue(IncomingValIdx, 489 ExitVal->getIncomingValue(PreheaderIdx)); 490 SE->forgetValue(&PN); 491 } 492 } 493 } 494 } 495 return MadeAnyChanges; 496 } 497 498 //===----------------------------------------------------------------------===// 499 // IV Widening - Extend the width of an IV to cover its widest uses. 500 //===----------------------------------------------------------------------===// 501 502 /// Update information about the induction variable that is extended by this 503 /// sign or zero extend operation. This is used to determine the final width of 504 /// the IV before actually widening it. 505 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 506 ScalarEvolution *SE, 507 const TargetTransformInfo *TTI) { 508 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 509 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 510 return; 511 512 Type *Ty = Cast->getType(); 513 uint64_t Width = SE->getTypeSizeInBits(Ty); 514 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 515 return; 516 517 // Check that `Cast` actually extends the induction variable (we rely on this 518 // later). This takes care of cases where `Cast` is extending a truncation of 519 // the narrow induction variable, and thus can end up being narrower than the 520 // "narrow" induction variable. 521 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 522 if (NarrowIVWidth >= Width) 523 return; 524 525 // Cast is either an sext or zext up to this point. 526 // We should not widen an indvar if arithmetics on the wider indvar are more 527 // expensive than those on the narrower indvar. We check only the cost of ADD 528 // because at least an ADD is required to increment the induction variable. We 529 // could compute more comprehensively the cost of all instructions on the 530 // induction variable when necessary. 531 if (TTI && 532 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 533 TTI->getArithmeticInstrCost(Instruction::Add, 534 Cast->getOperand(0)->getType())) { 535 return; 536 } 537 538 if (!WI.WidestNativeType || 539 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 540 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 541 WI.IsSigned = IsSigned; 542 return; 543 } 544 545 // We extend the IV to satisfy the sign of its user(s), or 'signed' 546 // if there are multiple users with both sign- and zero extensions, 547 // in order not to introduce nondeterministic behaviour based on the 548 // unspecified order of a PHI nodes' users-iterator. 549 WI.IsSigned |= IsSigned; 550 } 551 552 //===----------------------------------------------------------------------===// 553 // Live IV Reduction - Minimize IVs live across the loop. 554 //===----------------------------------------------------------------------===// 555 556 //===----------------------------------------------------------------------===// 557 // Simplification of IV users based on SCEV evaluation. 558 //===----------------------------------------------------------------------===// 559 560 namespace { 561 562 class IndVarSimplifyVisitor : public IVVisitor { 563 ScalarEvolution *SE; 564 const TargetTransformInfo *TTI; 565 PHINode *IVPhi; 566 567 public: 568 WideIVInfo WI; 569 570 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 571 const TargetTransformInfo *TTI, 572 const DominatorTree *DTree) 573 : SE(SCEV), TTI(TTI), IVPhi(IV) { 574 DT = DTree; 575 WI.NarrowIV = IVPhi; 576 } 577 578 // Implement the interface used by simplifyUsersOfIV. 579 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 580 }; 581 582 } // end anonymous namespace 583 584 /// Iteratively perform simplification on a worklist of IV users. Each 585 /// successive simplification may push more users which may themselves be 586 /// candidates for simplification. 587 /// 588 /// Sign/Zero extend elimination is interleaved with IV simplification. 589 bool IndVarSimplify::simplifyAndExtend(Loop *L, 590 SCEVExpander &Rewriter, 591 LoopInfo *LI) { 592 SmallVector<WideIVInfo, 8> WideIVs; 593 594 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 595 Intrinsic::getName(Intrinsic::experimental_guard)); 596 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 597 598 SmallVector<PHINode *, 8> LoopPhis; 599 for (PHINode &PN : L->getHeader()->phis()) 600 LoopPhis.push_back(&PN); 601 602 // Each round of simplification iterates through the SimplifyIVUsers worklist 603 // for all current phis, then determines whether any IVs can be 604 // widened. Widening adds new phis to LoopPhis, inducing another round of 605 // simplification on the wide IVs. 606 bool Changed = false; 607 while (!LoopPhis.empty()) { 608 // Evaluate as many IV expressions as possible before widening any IVs. This 609 // forces SCEV to set no-wrap flags before evaluating sign/zero 610 // extension. The first time SCEV attempts to normalize sign/zero extension, 611 // the result becomes final. So for the most predictable results, we delay 612 // evaluation of sign/zero extend evaluation until needed, and avoid running 613 // other SCEV based analysis prior to simplifyAndExtend. 614 do { 615 PHINode *CurrIV = LoopPhis.pop_back_val(); 616 617 // Information about sign/zero extensions of CurrIV. 618 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 619 620 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 621 &Visitor); 622 623 if (Visitor.WI.WidestNativeType) { 624 WideIVs.push_back(Visitor.WI); 625 } 626 } while(!LoopPhis.empty()); 627 628 // Continue if we disallowed widening. 629 if (!WidenIndVars) 630 continue; 631 632 for (; !WideIVs.empty(); WideIVs.pop_back()) { 633 unsigned ElimExt; 634 unsigned Widened; 635 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 636 DT, DeadInsts, ElimExt, Widened, 637 HasGuards, UsePostIncrementRanges)) { 638 NumElimExt += ElimExt; 639 NumWidened += Widened; 640 Changed = true; 641 LoopPhis.push_back(WidePhi); 642 } 643 } 644 } 645 return Changed; 646 } 647 648 //===----------------------------------------------------------------------===// 649 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 650 //===----------------------------------------------------------------------===// 651 652 /// Given an Value which is hoped to be part of an add recurance in the given 653 /// loop, return the associated Phi node if so. Otherwise, return null. Note 654 /// that this is less general than SCEVs AddRec checking. 655 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 656 Instruction *IncI = dyn_cast<Instruction>(IncV); 657 if (!IncI) 658 return nullptr; 659 660 switch (IncI->getOpcode()) { 661 case Instruction::Add: 662 case Instruction::Sub: 663 break; 664 case Instruction::GetElementPtr: 665 // An IV counter must preserve its type. 666 if (IncI->getNumOperands() == 2) 667 break; 668 [[fallthrough]]; 669 default: 670 return nullptr; 671 } 672 673 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 674 if (Phi && Phi->getParent() == L->getHeader()) { 675 if (L->isLoopInvariant(IncI->getOperand(1))) 676 return Phi; 677 return nullptr; 678 } 679 if (IncI->getOpcode() == Instruction::GetElementPtr) 680 return nullptr; 681 682 // Allow add/sub to be commuted. 683 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 684 if (Phi && Phi->getParent() == L->getHeader()) { 685 if (L->isLoopInvariant(IncI->getOperand(0))) 686 return Phi; 687 } 688 return nullptr; 689 } 690 691 /// Whether the current loop exit test is based on this value. Currently this 692 /// is limited to a direct use in the loop condition. 693 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 694 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 695 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 696 // TODO: Allow non-icmp loop test. 697 if (!ICmp) 698 return false; 699 700 // TODO: Allow indirect use. 701 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 702 } 703 704 /// linearFunctionTestReplace policy. Return true unless we can show that the 705 /// current exit test is already sufficiently canonical. 706 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 707 assert(L->getLoopLatch() && "Must be in simplified form"); 708 709 // Avoid converting a constant or loop invariant test back to a runtime 710 // test. This is critical for when SCEV's cached ExitCount is less precise 711 // than the current IR (such as after we've proven a particular exit is 712 // actually dead and thus the BE count never reaches our ExitCount.) 713 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 714 if (L->isLoopInvariant(BI->getCondition())) 715 return false; 716 717 // Do LFTR to simplify the exit condition to an ICMP. 718 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 719 if (!Cond) 720 return true; 721 722 // Do LFTR to simplify the exit ICMP to EQ/NE 723 ICmpInst::Predicate Pred = Cond->getPredicate(); 724 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 725 return true; 726 727 // Look for a loop invariant RHS 728 Value *LHS = Cond->getOperand(0); 729 Value *RHS = Cond->getOperand(1); 730 if (!L->isLoopInvariant(RHS)) { 731 if (!L->isLoopInvariant(LHS)) 732 return true; 733 std::swap(LHS, RHS); 734 } 735 // Look for a simple IV counter LHS 736 PHINode *Phi = dyn_cast<PHINode>(LHS); 737 if (!Phi) 738 Phi = getLoopPhiForCounter(LHS, L); 739 740 if (!Phi) 741 return true; 742 743 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 744 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 745 if (Idx < 0) 746 return true; 747 748 // Do LFTR if the exit condition's IV is *not* a simple counter. 749 Value *IncV = Phi->getIncomingValue(Idx); 750 return Phi != getLoopPhiForCounter(IncV, L); 751 } 752 753 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 754 /// down to checking that all operands are constant and listing instructions 755 /// that may hide undef. 756 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 757 unsigned Depth) { 758 if (isa<Constant>(V)) 759 return !isa<UndefValue>(V); 760 761 if (Depth >= 6) 762 return false; 763 764 // Conservatively handle non-constant non-instructions. For example, Arguments 765 // may be undef. 766 Instruction *I = dyn_cast<Instruction>(V); 767 if (!I) 768 return false; 769 770 // Load and return values may be undef. 771 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 772 return false; 773 774 // Optimistically handle other instructions. 775 for (Value *Op : I->operands()) { 776 if (!Visited.insert(Op).second) 777 continue; 778 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 779 return false; 780 } 781 return true; 782 } 783 784 /// Return true if the given value is concrete. We must prove that undef can 785 /// never reach it. 786 /// 787 /// TODO: If we decide that this is a good approach to checking for undef, we 788 /// may factor it into a common location. 789 static bool hasConcreteDef(Value *V) { 790 SmallPtrSet<Value*, 8> Visited; 791 Visited.insert(V); 792 return hasConcreteDefImpl(V, Visited, 0); 793 } 794 795 /// Return true if the given phi is a "counter" in L. A counter is an 796 /// add recurance (of integer or pointer type) with an arbitrary start, and a 797 /// step of 1. Note that L must have exactly one latch. 798 static bool isLoopCounter(PHINode* Phi, Loop *L, 799 ScalarEvolution *SE) { 800 assert(Phi->getParent() == L->getHeader()); 801 assert(L->getLoopLatch()); 802 803 if (!SE->isSCEVable(Phi->getType())) 804 return false; 805 806 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 807 if (!AR || AR->getLoop() != L || !AR->isAffine()) 808 return false; 809 810 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 811 if (!Step || !Step->isOne()) 812 return false; 813 814 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 815 Value *IncV = Phi->getIncomingValue(LatchIdx); 816 return (getLoopPhiForCounter(IncV, L) == Phi && 817 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 818 } 819 820 /// Search the loop header for a loop counter (anadd rec w/step of one) 821 /// suitable for use by LFTR. If multiple counters are available, select the 822 /// "best" one based profitable heuristics. 823 /// 824 /// BECount may be an i8* pointer type. The pointer difference is already 825 /// valid count without scaling the address stride, so it remains a pointer 826 /// expression as far as SCEV is concerned. 827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 828 const SCEV *BECount, 829 ScalarEvolution *SE, DominatorTree *DT) { 830 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 831 832 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 833 834 // Loop over all of the PHI nodes, looking for a simple counter. 835 PHINode *BestPhi = nullptr; 836 const SCEV *BestInit = nullptr; 837 BasicBlock *LatchBlock = L->getLoopLatch(); 838 assert(LatchBlock && "Must be in simplified form"); 839 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 840 841 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 842 PHINode *Phi = cast<PHINode>(I); 843 if (!isLoopCounter(Phi, L, SE)) 844 continue; 845 846 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 847 848 // AR may be a pointer type, while BECount is an integer type. 849 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 850 // AR may not be a narrower type, or we may never exit. 851 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 852 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 853 continue; 854 855 // Avoid reusing a potentially undef value to compute other values that may 856 // have originally had a concrete definition. 857 if (!hasConcreteDef(Phi)) { 858 // We explicitly allow unknown phis as long as they are already used by 859 // the loop exit test. This is legal since performing LFTR could not 860 // increase the number of undef users. 861 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 862 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 863 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 864 continue; 865 } 866 867 // Avoid introducing undefined behavior due to poison which didn't exist in 868 // the original program. (Annoyingly, the rules for poison and undef 869 // propagation are distinct, so this does NOT cover the undef case above.) 870 // We have to ensure that we don't introduce UB by introducing a use on an 871 // iteration where said IV produces poison. Our strategy here differs for 872 // pointers and integer IVs. For integers, we strip and reinfer as needed, 873 // see code in linearFunctionTestReplace. For pointers, we restrict 874 // transforms as there is no good way to reinfer inbounds once lost. 875 if (!Phi->getType()->isIntegerTy() && 876 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 877 continue; 878 879 const SCEV *Init = AR->getStart(); 880 881 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 882 // Don't force a live loop counter if another IV can be used. 883 if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 884 continue; 885 886 // Prefer to count-from-zero. This is a more "canonical" counter form. It 887 // also prefers integer to pointer IVs. 888 if (BestInit->isZero() != Init->isZero()) { 889 if (BestInit->isZero()) 890 continue; 891 } 892 // If two IVs both count from zero or both count from nonzero then the 893 // narrower is likely a dead phi that has been widened. Use the wider phi 894 // to allow the other to be eliminated. 895 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 896 continue; 897 } 898 BestPhi = Phi; 899 BestInit = Init; 900 } 901 return BestPhi; 902 } 903 904 /// Insert an IR expression which computes the value held by the IV IndVar 905 /// (which must be an loop counter w/unit stride) after the backedge of loop L 906 /// is taken ExitCount times. 907 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 908 const SCEV *ExitCount, bool UsePostInc, Loop *L, 909 SCEVExpander &Rewriter, ScalarEvolution *SE) { 910 assert(isLoopCounter(IndVar, L, SE)); 911 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 912 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 913 const SCEV *IVInit = AR->getStart(); 914 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 915 916 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 917 // finds a valid pointer IV. 918 if (IndVar->getType()->isPointerTy()) { 919 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; 920 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); 921 assert(SE->isLoopInvariant(IVLimit, L) && 922 "Computed iteration count is not loop invariant!"); 923 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), 924 ExitingBB->getTerminator()); 925 } else { 926 // In any other case, convert both IVInit and ExitCount to integers before 927 // comparing. This may result in SCEV expansion of pointers, but in practice 928 // SCEV will fold the pointer arithmetic away as such: 929 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 930 // 931 // Valid Cases: (1) both integers is most common; (2) both may be pointers 932 // for simple memset-style loops. 933 // 934 // IVInit integer and ExitCount pointer would only occur if a canonical IV 935 // were generated on top of case #2, which is not expected. 936 937 // For unit stride, IVCount = Start + ExitCount with 2's complement 938 // overflow. 939 940 // For integer IVs, truncate the IV before computing IVInit + BECount, 941 // unless we know apriori that the limit must be a constant when evaluated 942 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 943 // of the IV in the loop over a (potentially) expensive expansion of the 944 // widened exit count add(zext(add)) expression. 945 if (SE->getTypeSizeInBits(IVInit->getType()) 946 > SE->getTypeSizeInBits(ExitCount->getType())) { 947 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 948 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 949 else 950 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 951 } 952 953 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 954 955 if (UsePostInc) 956 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 957 958 // Expand the code for the iteration count. 959 assert(SE->isLoopInvariant(IVLimit, L) && 960 "Computed iteration count is not loop invariant!"); 961 // Ensure that we generate the same type as IndVar, or a smaller integer 962 // type. In the presence of null pointer values, we have an integer type 963 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 964 return Rewriter.expandCodeFor(IVLimit, ExitCount->getType(), 965 ExitingBB->getTerminator()); 966 } 967 } 968 969 /// This method rewrites the exit condition of the loop to be a canonical != 970 /// comparison against the incremented loop induction variable. This pass is 971 /// able to rewrite the exit tests of any loop where the SCEV analysis can 972 /// determine a loop-invariant trip count of the loop, which is actually a much 973 /// broader range than just linear tests. 974 bool IndVarSimplify:: 975 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 976 const SCEV *ExitCount, 977 PHINode *IndVar, SCEVExpander &Rewriter) { 978 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 979 assert(isLoopCounter(IndVar, L, SE)); 980 Instruction * const IncVar = 981 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 982 983 // Initialize CmpIndVar to the preincremented IV. 984 Value *CmpIndVar = IndVar; 985 bool UsePostInc = false; 986 987 // If the exiting block is the same as the backedge block, we prefer to 988 // compare against the post-incremented value, otherwise we must compare 989 // against the preincremented value. 990 if (ExitingBB == L->getLoopLatch()) { 991 // For pointer IVs, we chose to not strip inbounds which requires us not 992 // to add a potentially UB introducing use. We need to either a) show 993 // the loop test we're modifying is already in post-inc form, or b) show 994 // that adding a use must not introduce UB. 995 bool SafeToPostInc = 996 IndVar->getType()->isIntegerTy() || 997 isLoopExitTestBasedOn(IncVar, ExitingBB) || 998 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 999 if (SafeToPostInc) { 1000 UsePostInc = true; 1001 CmpIndVar = IncVar; 1002 } 1003 } 1004 1005 // It may be necessary to drop nowrap flags on the incrementing instruction 1006 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1007 // case the increment might have previously been poison on the last iteration 1008 // only) or if LFTR switches to a different IV that was previously dynamically 1009 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1010 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1011 // check), because the pre-inc addrec flags may be adopted from the original 1012 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1013 // TODO: This handling is inaccurate for one case: If we switch to a 1014 // dynamically dead IV that wraps on the first loop iteration only, which is 1015 // not covered by the post-inc addrec. (If the new IV was not dynamically 1016 // dead, it could not be poison on the first iteration in the first place.) 1017 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1018 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1019 if (BO->hasNoUnsignedWrap()) 1020 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1021 if (BO->hasNoSignedWrap()) 1022 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1023 } 1024 1025 Value *ExitCnt = genLoopLimit( 1026 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1027 assert(ExitCnt->getType()->isPointerTy() == 1028 IndVar->getType()->isPointerTy() && 1029 "genLoopLimit missed a cast"); 1030 1031 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1032 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1033 ICmpInst::Predicate P; 1034 if (L->contains(BI->getSuccessor(0))) 1035 P = ICmpInst::ICMP_NE; 1036 else 1037 P = ICmpInst::ICMP_EQ; 1038 1039 IRBuilder<> Builder(BI); 1040 1041 // The new loop exit condition should reuse the debug location of the 1042 // original loop exit condition. 1043 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1044 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1045 1046 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1047 // avoid the expensive expansion of the limit expression in the wider type, 1048 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1049 // since we know (from the exit count bitwidth), that we can't self-wrap in 1050 // the narrower type. 1051 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1052 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1053 if (CmpIndVarSize > ExitCntSize) { 1054 assert(!CmpIndVar->getType()->isPointerTy() && 1055 !ExitCnt->getType()->isPointerTy()); 1056 1057 // Before resorting to actually inserting the truncate, use the same 1058 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1059 // the other side of the comparison instead. We still evaluate the limit 1060 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1061 // a truncate within in. 1062 bool Extended = false; 1063 const SCEV *IV = SE->getSCEV(CmpIndVar); 1064 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 1065 const SCEV *ZExtTrunc = 1066 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1067 1068 if (ZExtTrunc == IV) { 1069 Extended = true; 1070 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1071 "wide.trip.count"); 1072 } else { 1073 const SCEV *SExtTrunc = 1074 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1075 if (SExtTrunc == IV) { 1076 Extended = true; 1077 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1078 "wide.trip.count"); 1079 } 1080 } 1081 1082 if (Extended) { 1083 bool Discard; 1084 L->makeLoopInvariant(ExitCnt, Discard); 1085 } else 1086 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1087 "lftr.wideiv"); 1088 } 1089 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1090 << " LHS:" << *CmpIndVar << '\n' 1091 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1092 << "\n" 1093 << " RHS:\t" << *ExitCnt << "\n" 1094 << "ExitCount:\t" << *ExitCount << "\n" 1095 << " was: " << *BI->getCondition() << "\n"); 1096 1097 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1098 Value *OrigCond = BI->getCondition(); 1099 // It's tempting to use replaceAllUsesWith here to fully replace the old 1100 // comparison, but that's not immediately safe, since users of the old 1101 // comparison may not be dominated by the new comparison. Instead, just 1102 // update the branch to use the new comparison; in the common case this 1103 // will make old comparison dead. 1104 BI->setCondition(Cond); 1105 DeadInsts.emplace_back(OrigCond); 1106 1107 ++NumLFTR; 1108 return true; 1109 } 1110 1111 //===----------------------------------------------------------------------===// 1112 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1113 //===----------------------------------------------------------------------===// 1114 1115 /// If there's a single exit block, sink any loop-invariant values that 1116 /// were defined in the preheader but not used inside the loop into the 1117 /// exit block to reduce register pressure in the loop. 1118 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1119 BasicBlock *ExitBlock = L->getExitBlock(); 1120 if (!ExitBlock) return false; 1121 1122 BasicBlock *Preheader = L->getLoopPreheader(); 1123 if (!Preheader) return false; 1124 1125 bool MadeAnyChanges = false; 1126 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1127 BasicBlock::iterator I(Preheader->getTerminator()); 1128 while (I != Preheader->begin()) { 1129 --I; 1130 // New instructions were inserted at the end of the preheader. 1131 if (isa<PHINode>(I)) 1132 break; 1133 1134 // Don't move instructions which might have side effects, since the side 1135 // effects need to complete before instructions inside the loop. Also don't 1136 // move instructions which might read memory, since the loop may modify 1137 // memory. Note that it's okay if the instruction might have undefined 1138 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1139 // block. 1140 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1141 continue; 1142 1143 // Skip debug info intrinsics. 1144 if (isa<DbgInfoIntrinsic>(I)) 1145 continue; 1146 1147 // Skip eh pad instructions. 1148 if (I->isEHPad()) 1149 continue; 1150 1151 // Don't sink alloca: we never want to sink static alloca's out of the 1152 // entry block, and correctly sinking dynamic alloca's requires 1153 // checks for stacksave/stackrestore intrinsics. 1154 // FIXME: Refactor this check somehow? 1155 if (isa<AllocaInst>(I)) 1156 continue; 1157 1158 // Determine if there is a use in or before the loop (direct or 1159 // otherwise). 1160 bool UsedInLoop = false; 1161 for (Use &U : I->uses()) { 1162 Instruction *User = cast<Instruction>(U.getUser()); 1163 BasicBlock *UseBB = User->getParent(); 1164 if (PHINode *P = dyn_cast<PHINode>(User)) { 1165 unsigned i = 1166 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1167 UseBB = P->getIncomingBlock(i); 1168 } 1169 if (UseBB == Preheader || L->contains(UseBB)) { 1170 UsedInLoop = true; 1171 break; 1172 } 1173 } 1174 1175 // If there is, the def must remain in the preheader. 1176 if (UsedInLoop) 1177 continue; 1178 1179 // Otherwise, sink it to the exit block. 1180 Instruction *ToMove = &*I; 1181 bool Done = false; 1182 1183 if (I != Preheader->begin()) { 1184 // Skip debug info intrinsics. 1185 do { 1186 --I; 1187 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1188 1189 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1190 Done = true; 1191 } else { 1192 Done = true; 1193 } 1194 1195 MadeAnyChanges = true; 1196 ToMove->moveBefore(*ExitBlock, InsertPt); 1197 SE->forgetValue(ToMove); 1198 if (Done) break; 1199 InsertPt = ToMove->getIterator(); 1200 } 1201 1202 return MadeAnyChanges; 1203 } 1204 1205 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1206 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1207 auto *OldCond = BI->getCondition(); 1208 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1209 << " with " << *NewCond << "\n"); 1210 BI->setCondition(NewCond); 1211 if (OldCond->use_empty()) 1212 DeadInsts.emplace_back(OldCond); 1213 } 1214 1215 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1216 bool IsTaken) { 1217 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1218 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1219 auto *OldCond = BI->getCondition(); 1220 return ConstantInt::get(OldCond->getType(), 1221 IsTaken ? ExitIfTrue : !ExitIfTrue); 1222 } 1223 1224 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1225 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1226 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1227 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1228 replaceExitCond(BI, NewCond, DeadInsts); 1229 } 1230 1231 static void replaceLoopPHINodesWithPreheaderValues( 1232 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1233 ScalarEvolution &SE) { 1234 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1235 auto *LoopPreheader = L->getLoopPreheader(); 1236 auto *LoopHeader = L->getHeader(); 1237 SmallVector<Instruction *> Worklist; 1238 for (auto &PN : LoopHeader->phis()) { 1239 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1240 for (User *U : PN.users()) 1241 Worklist.push_back(cast<Instruction>(U)); 1242 SE.forgetValue(&PN); 1243 PN.replaceAllUsesWith(PreheaderIncoming); 1244 DeadInsts.emplace_back(&PN); 1245 } 1246 1247 // Replacing with the preheader value will often allow IV users to simplify 1248 // (especially if the preheader value is a constant). 1249 SmallPtrSet<Instruction *, 16> Visited; 1250 while (!Worklist.empty()) { 1251 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1252 if (!Visited.insert(I).second) 1253 continue; 1254 1255 // Don't simplify instructions outside the loop. 1256 if (!L->contains(I)) 1257 continue; 1258 1259 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1260 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1261 for (User *U : I->users()) 1262 Worklist.push_back(cast<Instruction>(U)); 1263 I->replaceAllUsesWith(Res); 1264 DeadInsts.emplace_back(I); 1265 } 1266 } 1267 } 1268 1269 static Value * 1270 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1271 const ScalarEvolution::LoopInvariantPredicate &LIP, 1272 SCEVExpander &Rewriter) { 1273 ICmpInst::Predicate InvariantPred = LIP.Pred; 1274 BasicBlock *Preheader = L->getLoopPreheader(); 1275 assert(Preheader && "Preheader doesn't exist"); 1276 Rewriter.setInsertPoint(Preheader->getTerminator()); 1277 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1278 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1279 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1280 if (ExitIfTrue) 1281 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1282 IRBuilder<> Builder(Preheader->getTerminator()); 1283 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1284 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1285 BI->getCondition()->getName()); 1286 } 1287 1288 static std::optional<Value *> 1289 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1290 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1291 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1292 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1293 Value *LHS = ICmp->getOperand(0); 1294 Value *RHS = ICmp->getOperand(1); 1295 1296 // 'LHS pred RHS' should now mean that we stay in loop. 1297 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1298 if (Inverted) 1299 Pred = CmpInst::getInversePredicate(Pred); 1300 1301 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1302 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1303 // Can we prove it to be trivially true or false? 1304 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1305 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1306 1307 auto *ARTy = LHSS->getType(); 1308 auto *MaxIterTy = MaxIter->getType(); 1309 // If possible, adjust types. 1310 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1311 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1312 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1313 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1314 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1315 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1316 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1317 } 1318 1319 if (SkipLastIter) { 1320 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1321 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1322 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1323 // So we manually construct umin(a - 1, b - 1). 1324 SmallVector<const SCEV *, 4> Elements; 1325 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1326 for (auto *Op : UMin->operands()) 1327 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1328 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1329 } else 1330 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1331 } 1332 1333 // Check if there is a loop-invariant predicate equivalent to our check. 1334 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1335 L, BI, MaxIter); 1336 if (!LIP) 1337 return std::nullopt; 1338 1339 // Can we prove it to be trivially true? 1340 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1341 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1342 else 1343 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1344 } 1345 1346 static bool optimizeLoopExitWithUnknownExitCount( 1347 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1348 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1349 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1350 assert( 1351 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1352 "Not a loop exit!"); 1353 1354 // For branch that stays in loop by TRUE condition, go through AND. For branch 1355 // that stays in loop by FALSE condition, go through OR. Both gives the 1356 // similar logic: "stay in loop iff all conditions are true(false)". 1357 bool Inverted = L->contains(BI->getSuccessor(1)); 1358 SmallVector<ICmpInst *, 4> LeafConditions; 1359 SmallVector<Value *, 4> Worklist; 1360 SmallPtrSet<Value *, 4> Visited; 1361 Value *OldCond = BI->getCondition(); 1362 Visited.insert(OldCond); 1363 Worklist.push_back(OldCond); 1364 1365 auto GoThrough = [&](Value *V) { 1366 Value *LHS = nullptr, *RHS = nullptr; 1367 if (Inverted) { 1368 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1369 return false; 1370 } else { 1371 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1372 return false; 1373 } 1374 if (Visited.insert(LHS).second) 1375 Worklist.push_back(LHS); 1376 if (Visited.insert(RHS).second) 1377 Worklist.push_back(RHS); 1378 return true; 1379 }; 1380 1381 do { 1382 Value *Curr = Worklist.pop_back_val(); 1383 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1384 // those with one use, to avoid instruction duplication. 1385 if (Curr->hasOneUse()) 1386 if (!GoThrough(Curr)) 1387 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1388 LeafConditions.push_back(ICmp); 1389 } while (!Worklist.empty()); 1390 1391 // If the current basic block has the same exit count as the whole loop, and 1392 // it consists of multiple icmp's, try to collect all icmp's that give exact 1393 // same exit count. For all other icmp's, we could use one less iteration, 1394 // because their value on the last iteration doesn't really matter. 1395 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1396 if (!SkipLastIter && LeafConditions.size() > 1 && 1397 SE->getExitCount(L, ExitingBB, 1398 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1399 MaxIter) 1400 for (auto *ICmp : LeafConditions) { 1401 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1402 /*ControlsExit*/ false); 1403 auto *ExitMax = EL.SymbolicMaxNotTaken; 1404 if (isa<SCEVCouldNotCompute>(ExitMax)) 1405 continue; 1406 // They could be of different types (specifically this happens after 1407 // IV widening). 1408 auto *WiderType = 1409 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1410 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1411 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1412 if (WideExitMax == WideMaxIter) 1413 ICmpsFailingOnLastIter.insert(ICmp); 1414 } 1415 1416 bool Changed = false; 1417 for (auto *OldCond : LeafConditions) { 1418 // Skip last iteration for this icmp under one of two conditions: 1419 // - We do it for all conditions; 1420 // - There is another ICmp that would fail on last iter, so this one doesn't 1421 // really matter. 1422 bool OptimisticSkipLastIter = SkipLastIter; 1423 if (!OptimisticSkipLastIter) { 1424 if (ICmpsFailingOnLastIter.size() > 1) 1425 OptimisticSkipLastIter = true; 1426 else if (ICmpsFailingOnLastIter.size() == 1) 1427 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1428 } 1429 if (auto Replaced = 1430 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1431 OptimisticSkipLastIter, SE, Rewriter)) { 1432 Changed = true; 1433 auto *NewCond = *Replaced; 1434 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1435 NCI->setName(OldCond->getName() + ".first_iter"); 1436 } 1437 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1438 << " with " << *NewCond << "\n"); 1439 assert(OldCond->hasOneUse() && "Must be!"); 1440 OldCond->replaceAllUsesWith(NewCond); 1441 DeadInsts.push_back(OldCond); 1442 // Make sure we no longer consider this condition as failing on last 1443 // iteration. 1444 ICmpsFailingOnLastIter.erase(OldCond); 1445 } 1446 } 1447 return Changed; 1448 } 1449 1450 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1451 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1452 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1453 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1454 // it already has flags. The alternative to this would be to extending the 1455 // set of "interesting" IV users to include the icmp, but doing that 1456 // regresses results in practice by querying SCEVs before trip counts which 1457 // rely on them which results in SCEV caching sub-optimal answers. The 1458 // concern about caching sub-optimal results is why we only query SCEVs of 1459 // the loop invariant RHS here. 1460 SmallVector<BasicBlock*, 16> ExitingBlocks; 1461 L->getExitingBlocks(ExitingBlocks); 1462 bool Changed = false; 1463 for (auto *ExitingBB : ExitingBlocks) { 1464 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1465 if (!BI) 1466 continue; 1467 assert(BI->isConditional() && "exit branch must be conditional"); 1468 1469 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1470 if (!ICmp || !ICmp->hasOneUse()) 1471 continue; 1472 1473 auto *LHS = ICmp->getOperand(0); 1474 auto *RHS = ICmp->getOperand(1); 1475 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1476 // poisoning cache with sub-optimal results. For the must-execute case, 1477 // this is a neccessary precondition for correctness. 1478 if (!L->isLoopInvariant(RHS)) { 1479 if (!L->isLoopInvariant(LHS)) 1480 continue; 1481 // Same logic applies for the inverse case 1482 std::swap(LHS, RHS); 1483 } 1484 1485 // Match (icmp signed-cond zext, RHS) 1486 Value *LHSOp = nullptr; 1487 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1488 continue; 1489 1490 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1491 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1492 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1493 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1494 FullCR = FullCR.zeroExtend(OuterBitWidth); 1495 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1496 if (FullCR.contains(RHSCR)) { 1497 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1498 // replace the signed condition with the unsigned version. 1499 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1500 Changed = true; 1501 // Note: No SCEV invalidation needed. We've changed the predicate, but 1502 // have not changed exit counts, or the values produced by the compare. 1503 continue; 1504 } 1505 } 1506 1507 // Now that we've canonicalized the condition to match the extend, 1508 // see if we can rotate the extend out of the loop. 1509 for (auto *ExitingBB : ExitingBlocks) { 1510 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1511 if (!BI) 1512 continue; 1513 assert(BI->isConditional() && "exit branch must be conditional"); 1514 1515 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1516 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1517 continue; 1518 1519 bool Swapped = false; 1520 auto *LHS = ICmp->getOperand(0); 1521 auto *RHS = ICmp->getOperand(1); 1522 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1523 // Nothing to rotate 1524 continue; 1525 if (L->isLoopInvariant(LHS)) { 1526 // Same logic applies for the inverse case until we actually pick 1527 // which operand of the compare to update. 1528 Swapped = true; 1529 std::swap(LHS, RHS); 1530 } 1531 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1532 1533 // Match (icmp unsigned-cond zext, RHS) 1534 // TODO: Extend to handle corresponding sext/signed-cmp case 1535 // TODO: Extend to other invertible functions 1536 Value *LHSOp = nullptr; 1537 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1538 continue; 1539 1540 // In general, we only rotate if we can do so without increasing the number 1541 // of instructions. The exception is when we have an zext(add-rec). The 1542 // reason for allowing this exception is that we know we need to get rid 1543 // of the zext for SCEV to be able to compute a trip count for said loops; 1544 // we consider the new trip count valuable enough to increase instruction 1545 // count by one. 1546 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1547 continue; 1548 1549 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1550 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1551 // when zext is loop varying and RHS is loop invariant. This converts 1552 // loop varying work to loop-invariant work. 1553 auto doRotateTransform = [&]() { 1554 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1555 auto *NewRHS = 1556 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1557 L->getLoopPreheader()->getTerminator()); 1558 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1559 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1560 if (LHS->use_empty()) 1561 DeadInsts.push_back(LHS); 1562 }; 1563 1564 1565 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1566 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1567 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1568 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1569 FullCR = FullCR.zeroExtend(OuterBitWidth); 1570 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1571 if (FullCR.contains(RHSCR)) { 1572 doRotateTransform(); 1573 Changed = true; 1574 // Note, we are leaving SCEV in an unfortunately imprecise case here 1575 // as rotation tends to reveal information about trip counts not 1576 // previously visible. 1577 continue; 1578 } 1579 } 1580 1581 return Changed; 1582 } 1583 1584 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1585 SmallVector<BasicBlock*, 16> ExitingBlocks; 1586 L->getExitingBlocks(ExitingBlocks); 1587 1588 // Remove all exits which aren't both rewriteable and execute on every 1589 // iteration. 1590 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1591 // If our exitting block exits multiple loops, we can only rewrite the 1592 // innermost one. Otherwise, we're changing how many times the innermost 1593 // loop runs before it exits. 1594 if (LI->getLoopFor(ExitingBB) != L) 1595 return true; 1596 1597 // Can't rewrite non-branch yet. 1598 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1599 if (!BI) 1600 return true; 1601 1602 // Likewise, the loop latch must be dominated by the exiting BB. 1603 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1604 return true; 1605 1606 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1607 // If already constant, nothing to do. However, if this is an 1608 // unconditional exit, we can still replace header phis with their 1609 // preheader value. 1610 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1611 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1612 return true; 1613 } 1614 1615 return false; 1616 }); 1617 1618 if (ExitingBlocks.empty()) 1619 return false; 1620 1621 // Get a symbolic upper bound on the loop backedge taken count. 1622 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1623 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1624 return false; 1625 1626 // Visit our exit blocks in order of dominance. We know from the fact that 1627 // all exits must dominate the latch, so there is a total dominance order 1628 // between them. 1629 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1630 // std::sort sorts in ascending order, so we want the inverse of 1631 // the normal dominance relation. 1632 if (A == B) return false; 1633 if (DT->properlyDominates(A, B)) 1634 return true; 1635 else { 1636 assert(DT->properlyDominates(B, A) && 1637 "expected total dominance order!"); 1638 return false; 1639 } 1640 }); 1641 #ifdef ASSERT 1642 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1643 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1644 } 1645 #endif 1646 1647 bool Changed = false; 1648 bool SkipLastIter = false; 1649 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1650 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1651 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1652 return; 1653 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1654 CurrMaxExit = MaxExitCount; 1655 else 1656 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1657 // If the loop has more than 1 iteration, all further checks will be 1658 // executed 1 iteration less. 1659 if (CurrMaxExit == MaxBECount) 1660 SkipLastIter = true; 1661 }; 1662 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1663 for (BasicBlock *ExitingBB : ExitingBlocks) { 1664 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1665 const SCEV *MaxExitCount = SE->getExitCount( 1666 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1667 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1668 // Okay, we do not know the exit count here. Can we at least prove that it 1669 // will remain the same within iteration space? 1670 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1671 auto OptimizeCond = [&](bool SkipLastIter) { 1672 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1673 MaxBECount, SkipLastIter, 1674 SE, Rewriter, DeadInsts); 1675 }; 1676 1677 // TODO: We might have proved that we can skip the last iteration for 1678 // this check. In this case, we only want to check the condition on the 1679 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1680 // corner case: 1681 // 1682 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1683 // 1684 // If we could not prove that len != 0, then we also could not prove that 1685 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1686 // OptimizeCond will likely not prove anything for it, even if it could 1687 // prove the same fact for len. 1688 // 1689 // As a temporary solution, we query both last and pre-last iterations in 1690 // hope that we will be able to prove triviality for at least one of 1691 // them. We can stop querying MaxBECount for this case once SCEV 1692 // understands that (MaxBECount - 1) will not overflow here. 1693 if (OptimizeCond(false)) 1694 Changed = true; 1695 else if (SkipLastIter && OptimizeCond(true)) 1696 Changed = true; 1697 UpdateSkipLastIter(MaxExitCount); 1698 continue; 1699 } 1700 1701 UpdateSkipLastIter(ExactExitCount); 1702 1703 // If we know we'd exit on the first iteration, rewrite the exit to 1704 // reflect this. This does not imply the loop must exit through this 1705 // exit; there may be an earlier one taken on the first iteration. 1706 // We know that the backedge can't be taken, so we replace all 1707 // the header PHIs with values coming from the preheader. 1708 if (ExactExitCount->isZero()) { 1709 foldExit(L, ExitingBB, true, DeadInsts); 1710 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1711 Changed = true; 1712 continue; 1713 } 1714 1715 assert(ExactExitCount->getType()->isIntegerTy() && 1716 MaxBECount->getType()->isIntegerTy() && 1717 "Exit counts must be integers"); 1718 1719 Type *WiderType = 1720 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1721 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1722 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1723 assert(MaxBECount->getType() == ExactExitCount->getType()); 1724 1725 // Can we prove that some other exit must be taken strictly before this 1726 // one? 1727 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1728 ExactExitCount)) { 1729 foldExit(L, ExitingBB, false, DeadInsts); 1730 Changed = true; 1731 continue; 1732 } 1733 1734 // As we run, keep track of which exit counts we've encountered. If we 1735 // find a duplicate, we've found an exit which would have exited on the 1736 // exiting iteration, but (from the visit order) strictly follows another 1737 // which does the same and is thus dead. 1738 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1739 foldExit(L, ExitingBB, false, DeadInsts); 1740 Changed = true; 1741 continue; 1742 } 1743 1744 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1745 // here. If we kept track of the min of dominanting exits so far, we could 1746 // discharge exits with EC >= MDEC. This is less powerful than the existing 1747 // transform (since later exits aren't considered), but potentially more 1748 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1749 // or a >u b. Such a case is not currently known. 1750 } 1751 return Changed; 1752 } 1753 1754 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1755 SmallVector<BasicBlock*, 16> ExitingBlocks; 1756 L->getExitingBlocks(ExitingBlocks); 1757 1758 // Finally, see if we can rewrite our exit conditions into a loop invariant 1759 // form. If we have a read-only loop, and we can tell that we must exit down 1760 // a path which does not need any of the values computed within the loop, we 1761 // can rewrite the loop to exit on the first iteration. Note that this 1762 // doesn't either a) tell us the loop exits on the first iteration (unless 1763 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1764 // This transformation looks a lot like a restricted form of dead loop 1765 // elimination, but restricted to read-only loops and without neccesssarily 1766 // needing to kill the loop entirely. 1767 if (!LoopPredication) 1768 return false; 1769 1770 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1771 // through *explicit* control flow. We have to eliminate the possibility of 1772 // implicit exits (see below) before we know it's truly exact. 1773 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1774 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1775 return false; 1776 1777 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1778 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1779 1780 auto BadExit = [&](BasicBlock *ExitingBB) { 1781 // If our exiting block exits multiple loops, we can only rewrite the 1782 // innermost one. Otherwise, we're changing how many times the innermost 1783 // loop runs before it exits. 1784 if (LI->getLoopFor(ExitingBB) != L) 1785 return true; 1786 1787 // Can't rewrite non-branch yet. 1788 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1789 if (!BI) 1790 return true; 1791 1792 // If already constant, nothing to do. 1793 if (isa<Constant>(BI->getCondition())) 1794 return true; 1795 1796 // If the exit block has phis, we need to be able to compute the values 1797 // within the loop which contains them. This assumes trivially lcssa phis 1798 // have already been removed; TODO: generalize 1799 BasicBlock *ExitBlock = 1800 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1801 if (!ExitBlock->phis().empty()) 1802 return true; 1803 1804 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1805 if (isa<SCEVCouldNotCompute>(ExitCount) || 1806 !Rewriter.isSafeToExpand(ExitCount)) 1807 return true; 1808 1809 assert(SE->isLoopInvariant(ExitCount, L) && 1810 "Exit count must be loop invariant"); 1811 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1812 return false; 1813 }; 1814 1815 // If we have any exits which can't be predicated themselves, than we can't 1816 // predicate any exit which isn't guaranteed to execute before it. Consider 1817 // two exits (a) and (b) which would both exit on the same iteration. If we 1818 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1819 // we could convert a loop from exiting through (a) to one exiting through 1820 // (b). Note that this problem exists only for exits with the same exit 1821 // count, and we could be more aggressive when exit counts are known inequal. 1822 llvm::sort(ExitingBlocks, 1823 [&](BasicBlock *A, BasicBlock *B) { 1824 // std::sort sorts in ascending order, so we want the inverse of 1825 // the normal dominance relation, plus a tie breaker for blocks 1826 // unordered by dominance. 1827 if (DT->properlyDominates(A, B)) return true; 1828 if (DT->properlyDominates(B, A)) return false; 1829 return A->getName() < B->getName(); 1830 }); 1831 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1832 // exits before the backedge). If they aren't, reasoning about reachability 1833 // is complicated and we choose not to for now. 1834 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1835 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1836 return false; 1837 1838 // Given our sorted total order, we know that exit[j] must be evaluated 1839 // after all exit[i] such j > i. 1840 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1841 if (BadExit(ExitingBlocks[i])) { 1842 ExitingBlocks.resize(i); 1843 break; 1844 } 1845 1846 if (ExitingBlocks.empty()) 1847 return false; 1848 1849 // We rely on not being able to reach an exiting block on a later iteration 1850 // then it's statically compute exit count. The implementaton of 1851 // getExitCount currently has this invariant, but assert it here so that 1852 // breakage is obvious if this ever changes.. 1853 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1854 return DT->dominates(ExitingBB, L->getLoopLatch()); 1855 })); 1856 1857 // At this point, ExitingBlocks consists of only those blocks which are 1858 // predicatable. Given that, we know we have at least one exit we can 1859 // predicate if the loop is doesn't have side effects and doesn't have any 1860 // implicit exits (because then our exact BTC isn't actually exact). 1861 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1862 // suggestions on how to improve this? I can obviously bail out for outer 1863 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1864 // is that enough for *all* side effects? 1865 for (BasicBlock *BB : L->blocks()) 1866 for (auto &I : *BB) 1867 // TODO:isGuaranteedToTransfer 1868 if (I.mayHaveSideEffects()) 1869 return false; 1870 1871 bool Changed = false; 1872 // Finally, do the actual predication for all predicatable blocks. A couple 1873 // of notes here: 1874 // 1) We don't bother to constant fold dominated exits with identical exit 1875 // counts; that's simply a form of CSE/equality propagation and we leave 1876 // it for dedicated passes. 1877 // 2) We insert the comparison at the branch. Hoisting introduces additional 1878 // legality constraints and we leave that to dedicated logic. We want to 1879 // predicate even if we can't insert a loop invariant expression as 1880 // peeling or unrolling will likely reduce the cost of the otherwise loop 1881 // varying check. 1882 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1883 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1884 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1885 for (BasicBlock *ExitingBB : ExitingBlocks) { 1886 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1887 1888 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1889 Value *NewCond; 1890 if (ExitCount == ExactBTC) { 1891 NewCond = L->contains(BI->getSuccessor(0)) ? 1892 B.getFalse() : B.getTrue(); 1893 } else { 1894 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1895 if (!ExactBTCV) 1896 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1897 Value *RHS = ExactBTCV; 1898 if (ECV->getType() != RHS->getType()) { 1899 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1900 ECV = B.CreateZExt(ECV, WiderTy); 1901 RHS = B.CreateZExt(RHS, WiderTy); 1902 } 1903 auto Pred = L->contains(BI->getSuccessor(0)) ? 1904 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1905 NewCond = B.CreateICmp(Pred, ECV, RHS); 1906 } 1907 Value *OldCond = BI->getCondition(); 1908 BI->setCondition(NewCond); 1909 if (OldCond->use_empty()) 1910 DeadInsts.emplace_back(OldCond); 1911 Changed = true; 1912 } 1913 1914 return Changed; 1915 } 1916 1917 //===----------------------------------------------------------------------===// 1918 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1919 //===----------------------------------------------------------------------===// 1920 1921 bool IndVarSimplify::run(Loop *L) { 1922 // We need (and expect!) the incoming loop to be in LCSSA. 1923 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1924 "LCSSA required to run indvars!"); 1925 1926 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1927 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1928 // canonicalization can be a pessimization without LSR to "clean up" 1929 // afterwards. 1930 // - We depend on having a preheader; in particular, 1931 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1932 // and we're in trouble if we can't find the induction variable even when 1933 // we've manually inserted one. 1934 // - LFTR relies on having a single backedge. 1935 if (!L->isLoopSimplifyForm()) 1936 return false; 1937 1938 bool Changed = false; 1939 // If there are any floating-point recurrences, attempt to 1940 // transform them to use integer recurrences. 1941 Changed |= rewriteNonIntegerIVs(L); 1942 1943 // Create a rewriter object which we'll use to transform the code with. 1944 SCEVExpander Rewriter(*SE, DL, "indvars"); 1945 #ifndef NDEBUG 1946 Rewriter.setDebugType(DEBUG_TYPE); 1947 #endif 1948 1949 // Eliminate redundant IV users. 1950 // 1951 // Simplification works best when run before other consumers of SCEV. We 1952 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1953 // other expressions involving loop IVs have been evaluated. This helps SCEV 1954 // set no-wrap flags before normalizing sign/zero extension. 1955 Rewriter.disableCanonicalMode(); 1956 Changed |= simplifyAndExtend(L, Rewriter, LI); 1957 1958 // Check to see if we can compute the final value of any expressions 1959 // that are recurrent in the loop, and substitute the exit values from the 1960 // loop into any instructions outside of the loop that use the final values 1961 // of the current expressions. 1962 if (ReplaceExitValue != NeverRepl) { 1963 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1964 ReplaceExitValue, DeadInsts)) { 1965 NumReplaced += Rewrites; 1966 Changed = true; 1967 } 1968 } 1969 1970 // Eliminate redundant IV cycles. 1971 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1972 1973 // Try to convert exit conditions to unsigned and rotate computation 1974 // out of the loop. Note: Handles invalidation internally if needed. 1975 Changed |= canonicalizeExitCondition(L); 1976 1977 // Try to eliminate loop exits based on analyzeable exit counts 1978 if (optimizeLoopExits(L, Rewriter)) { 1979 Changed = true; 1980 // Given we've changed exit counts, notify SCEV 1981 // Some nested loops may share same folded exit basic block, 1982 // thus we need to notify top most loop. 1983 SE->forgetTopmostLoop(L); 1984 } 1985 1986 // Try to form loop invariant tests for loop exits by changing how many 1987 // iterations of the loop run when that is unobservable. 1988 if (predicateLoopExits(L, Rewriter)) { 1989 Changed = true; 1990 // Given we've changed exit counts, notify SCEV 1991 SE->forgetLoop(L); 1992 } 1993 1994 // If we have a trip count expression, rewrite the loop's exit condition 1995 // using it. 1996 if (!DisableLFTR) { 1997 BasicBlock *PreHeader = L->getLoopPreheader(); 1998 1999 SmallVector<BasicBlock*, 16> ExitingBlocks; 2000 L->getExitingBlocks(ExitingBlocks); 2001 for (BasicBlock *ExitingBB : ExitingBlocks) { 2002 // Can't rewrite non-branch yet. 2003 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2004 continue; 2005 2006 // If our exitting block exits multiple loops, we can only rewrite the 2007 // innermost one. Otherwise, we're changing how many times the innermost 2008 // loop runs before it exits. 2009 if (LI->getLoopFor(ExitingBB) != L) 2010 continue; 2011 2012 if (!needsLFTR(L, ExitingBB)) 2013 continue; 2014 2015 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2016 if (isa<SCEVCouldNotCompute>(ExitCount)) 2017 continue; 2018 2019 // This was handled above, but as we form SCEVs, we can sometimes refine 2020 // existing ones; this allows exit counts to be folded to zero which 2021 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2022 // until stable to handle cases like this better. 2023 if (ExitCount->isZero()) 2024 continue; 2025 2026 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2027 if (!IndVar) 2028 continue; 2029 2030 // Avoid high cost expansions. Note: This heuristic is questionable in 2031 // that our definition of "high cost" is not exactly principled. 2032 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2033 TTI, PreHeader->getTerminator())) 2034 continue; 2035 2036 // Check preconditions for proper SCEVExpander operation. SCEV does not 2037 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2038 // any pass that uses the SCEVExpander must do it. This does not work 2039 // well for loop passes because SCEVExpander makes assumptions about 2040 // all loops, while LoopPassManager only forces the current loop to be 2041 // simplified. 2042 // 2043 // FIXME: SCEV expansion has no way to bail out, so the caller must 2044 // explicitly check any assumptions made by SCEV. Brittle. 2045 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2046 if (!AR || AR->getLoop()->getLoopPreheader()) 2047 Changed |= linearFunctionTestReplace(L, ExitingBB, 2048 ExitCount, IndVar, 2049 Rewriter); 2050 } 2051 } 2052 // Clear the rewriter cache, because values that are in the rewriter's cache 2053 // can be deleted in the loop below, causing the AssertingVH in the cache to 2054 // trigger. 2055 Rewriter.clear(); 2056 2057 // Now that we're done iterating through lists, clean up any instructions 2058 // which are now dead. 2059 while (!DeadInsts.empty()) { 2060 Value *V = DeadInsts.pop_back_val(); 2061 2062 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2063 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2064 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2065 Changed |= 2066 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2067 } 2068 2069 // The Rewriter may not be used from this point on. 2070 2071 // Loop-invariant instructions in the preheader that aren't used in the 2072 // loop may be sunk below the loop to reduce register pressure. 2073 Changed |= sinkUnusedInvariants(L); 2074 2075 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2076 // trip count and therefore can further simplify exit values in addition to 2077 // rewriteLoopExitValues. 2078 Changed |= rewriteFirstIterationLoopExitValues(L); 2079 2080 // Clean up dead instructions. 2081 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2082 2083 // Check a post-condition. 2084 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2085 "Indvars did not preserve LCSSA!"); 2086 if (VerifyMemorySSA && MSSAU) 2087 MSSAU->getMemorySSA()->verifyMemorySSA(); 2088 2089 return Changed; 2090 } 2091 2092 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2093 LoopStandardAnalysisResults &AR, 2094 LPMUpdater &) { 2095 Function *F = L.getHeader()->getParent(); 2096 const DataLayout &DL = F->getParent()->getDataLayout(); 2097 2098 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2099 WidenIndVars && AllowIVWidening); 2100 if (!IVS.run(&L)) 2101 return PreservedAnalyses::all(); 2102 2103 auto PA = getLoopPassPreservedAnalyses(); 2104 PA.preserveSet<CFGAnalyses>(); 2105 if (AR.MSSA) 2106 PA.preserve<MemorySSAAnalysis>(); 2107 return PA; 2108 } 2109