1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/PassManager.h" 59 #include "llvm/IR/PatternMatch.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/IR/Use.h" 62 #include "llvm/IR/User.h" 63 #include "llvm/IR/Value.h" 64 #include "llvm/IR/ValueHandle.h" 65 #include "llvm/Support/Casting.h" 66 #include "llvm/Support/CommandLine.h" 67 #include "llvm/Support/Debug.h" 68 #include "llvm/Support/MathExtras.h" 69 #include "llvm/Support/raw_ostream.h" 70 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 71 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 72 #include "llvm/Transforms/Utils/Local.h" 73 #include "llvm/Transforms/Utils/LoopUtils.h" 74 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 75 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 76 #include <cassert> 77 #include <cstdint> 78 #include <utility> 79 80 using namespace llvm; 81 using namespace PatternMatch; 82 83 #define DEBUG_TYPE "indvars" 84 85 STATISTIC(NumWidened , "Number of indvars widened"); 86 STATISTIC(NumReplaced , "Number of exit values replaced"); 87 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 88 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 89 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 90 91 static cl::opt<ReplaceExitVal> ReplaceExitValue( 92 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 93 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 94 cl::values( 95 clEnumValN(NeverRepl, "never", "never replace exit value"), 96 clEnumValN(OnlyCheapRepl, "cheap", 97 "only replace exit value when the cost is cheap"), 98 clEnumValN( 99 UnusedIndVarInLoop, "unusedindvarinloop", 100 "only replace exit value when it is an unused " 101 "induction variable in the loop and has cheap replacement cost"), 102 clEnumValN(NoHardUse, "noharduse", 103 "only replace exit values when loop def likely dead"), 104 clEnumValN(AlwaysRepl, "always", 105 "always replace exit value whenever possible"))); 106 107 static cl::opt<bool> UsePostIncrementRanges( 108 "indvars-post-increment-ranges", cl::Hidden, 109 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 110 cl::init(true)); 111 112 static cl::opt<bool> 113 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 114 cl::desc("Disable Linear Function Test Replace optimization")); 115 116 static cl::opt<bool> 117 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 118 cl::desc("Predicate conditions in read only loops")); 119 120 static cl::opt<bool> 121 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 122 cl::desc("Allow widening of indvars to eliminate s/zext")); 123 124 namespace { 125 126 class IndVarSimplify { 127 LoopInfo *LI; 128 ScalarEvolution *SE; 129 DominatorTree *DT; 130 const DataLayout &DL; 131 TargetLibraryInfo *TLI; 132 const TargetTransformInfo *TTI; 133 std::unique_ptr<MemorySSAUpdater> MSSAU; 134 135 SmallVector<WeakTrackingVH, 16> DeadInsts; 136 bool WidenIndVars; 137 138 bool RunUnswitching = false; 139 140 bool handleFloatingPointIV(Loop *L, PHINode *PH); 141 bool rewriteNonIntegerIVs(Loop *L); 142 143 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 144 /// Try to improve our exit conditions by converting condition from signed 145 /// to unsigned or rotating computation out of the loop. 146 /// (See inline comment about why this is duplicated from simplifyAndExtend) 147 bool canonicalizeExitCondition(Loop *L); 148 /// Try to eliminate loop exits based on analyzeable exit counts 149 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 150 /// Try to form loop invariant tests for loop exits by changing how many 151 /// iterations of the loop run when that is unobservable. 152 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 153 154 bool rewriteFirstIterationLoopExitValues(Loop *L); 155 156 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 157 const SCEV *ExitCount, 158 PHINode *IndVar, SCEVExpander &Rewriter); 159 160 bool sinkUnusedInvariants(Loop *L); 161 162 public: 163 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 164 const DataLayout &DL, TargetLibraryInfo *TLI, 165 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 166 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 167 WidenIndVars(WidenIndVars) { 168 if (MSSA) 169 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 170 } 171 172 bool run(Loop *L); 173 174 bool runUnswitching() const { return RunUnswitching; } 175 }; 176 177 } // end anonymous namespace 178 179 //===----------------------------------------------------------------------===// 180 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 181 //===----------------------------------------------------------------------===// 182 183 /// Convert APF to an integer, if possible. 184 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 185 bool isExact = false; 186 // See if we can convert this to an int64_t 187 uint64_t UIntVal; 188 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 189 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 190 !isExact) 191 return false; 192 IntVal = UIntVal; 193 return true; 194 } 195 196 /// If the loop has floating induction variable then insert corresponding 197 /// integer induction variable if possible. 198 /// For example, 199 /// for(double i = 0; i < 10000; ++i) 200 /// bar(i) 201 /// is converted into 202 /// for(int i = 0; i < 10000; ++i) 203 /// bar((double)i); 204 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 205 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 206 unsigned BackEdge = IncomingEdge^1; 207 208 // Check incoming value. 209 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 210 211 int64_t InitValue; 212 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 213 return false; 214 215 // Check IV increment. Reject this PN if increment operation is not 216 // an add or increment value can not be represented by an integer. 217 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 218 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 219 220 // If this is not an add of the PHI with a constantfp, or if the constant fp 221 // is not an integer, bail out. 222 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 223 int64_t IncValue; 224 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 225 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 226 return false; 227 228 // Check Incr uses. One user is PN and the other user is an exit condition 229 // used by the conditional terminator. 230 Value::user_iterator IncrUse = Incr->user_begin(); 231 Instruction *U1 = cast<Instruction>(*IncrUse++); 232 if (IncrUse == Incr->user_end()) return false; 233 Instruction *U2 = cast<Instruction>(*IncrUse++); 234 if (IncrUse != Incr->user_end()) return false; 235 236 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 237 // only used by a branch, we can't transform it. 238 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 239 if (!Compare) 240 Compare = dyn_cast<FCmpInst>(U2); 241 if (!Compare || !Compare->hasOneUse() || 242 !isa<BranchInst>(Compare->user_back())) 243 return false; 244 245 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 246 247 // We need to verify that the branch actually controls the iteration count 248 // of the loop. If not, the new IV can overflow and no one will notice. 249 // The branch block must be in the loop and one of the successors must be out 250 // of the loop. 251 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 252 if (!L->contains(TheBr->getParent()) || 253 (L->contains(TheBr->getSuccessor(0)) && 254 L->contains(TheBr->getSuccessor(1)))) 255 return false; 256 257 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 258 // transform it. 259 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 260 int64_t ExitValue; 261 if (ExitValueVal == nullptr || 262 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 263 return false; 264 265 // Find new predicate for integer comparison. 266 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 267 switch (Compare->getPredicate()) { 268 default: return false; // Unknown comparison. 269 case CmpInst::FCMP_OEQ: 270 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 271 case CmpInst::FCMP_ONE: 272 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 273 case CmpInst::FCMP_OGT: 274 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 275 case CmpInst::FCMP_OGE: 276 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 277 case CmpInst::FCMP_OLT: 278 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 279 case CmpInst::FCMP_OLE: 280 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 281 } 282 283 // We convert the floating point induction variable to a signed i32 value if 284 // we can. This is only safe if the comparison will not overflow in a way 285 // that won't be trapped by the integer equivalent operations. Check for this 286 // now. 287 // TODO: We could use i64 if it is native and the range requires it. 288 289 // The start/stride/exit values must all fit in signed i32. 290 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 291 return false; 292 293 // If not actually striding (add x, 0.0), avoid touching the code. 294 if (IncValue == 0) 295 return false; 296 297 // Positive and negative strides have different safety conditions. 298 if (IncValue > 0) { 299 // If we have a positive stride, we require the init to be less than the 300 // exit value. 301 if (InitValue >= ExitValue) 302 return false; 303 304 uint32_t Range = uint32_t(ExitValue-InitValue); 305 // Check for infinite loop, either: 306 // while (i <= Exit) or until (i > Exit) 307 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 308 if (++Range == 0) return false; // Range overflows. 309 } 310 311 unsigned Leftover = Range % uint32_t(IncValue); 312 313 // If this is an equality comparison, we require that the strided value 314 // exactly land on the exit value, otherwise the IV condition will wrap 315 // around and do things the fp IV wouldn't. 316 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 317 Leftover != 0) 318 return false; 319 320 // If the stride would wrap around the i32 before exiting, we can't 321 // transform the IV. 322 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 323 return false; 324 } else { 325 // If we have a negative stride, we require the init to be greater than the 326 // exit value. 327 if (InitValue <= ExitValue) 328 return false; 329 330 uint32_t Range = uint32_t(InitValue-ExitValue); 331 // Check for infinite loop, either: 332 // while (i >= Exit) or until (i < Exit) 333 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 334 if (++Range == 0) return false; // Range overflows. 335 } 336 337 unsigned Leftover = Range % uint32_t(-IncValue); 338 339 // If this is an equality comparison, we require that the strided value 340 // exactly land on the exit value, otherwise the IV condition will wrap 341 // around and do things the fp IV wouldn't. 342 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 343 Leftover != 0) 344 return false; 345 346 // If the stride would wrap around the i32 before exiting, we can't 347 // transform the IV. 348 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 349 return false; 350 } 351 352 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 353 354 // Insert new integer induction variable. 355 PHINode *NewPHI = 356 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator()); 357 NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue), 358 PN->getIncomingBlock(IncomingEdge)); 359 NewPHI->setDebugLoc(PN->getDebugLoc()); 360 361 Instruction *NewAdd = BinaryOperator::CreateAdd( 362 NewPHI, ConstantInt::getSigned(Int32Ty, IncValue), 363 Incr->getName() + ".int", Incr->getIterator()); 364 NewAdd->setDebugLoc(Incr->getDebugLoc()); 365 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 366 367 ICmpInst *NewCompare = new ICmpInst( 368 TheBr->getIterator(), NewPred, NewAdd, 369 ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName()); 370 NewCompare->setDebugLoc(Compare->getDebugLoc()); 371 372 // In the following deletions, PN may become dead and may be deleted. 373 // Use a WeakTrackingVH to observe whether this happens. 374 WeakTrackingVH WeakPH = PN; 375 376 // Delete the old floating point exit comparison. The branch starts using the 377 // new comparison. 378 NewCompare->takeName(Compare); 379 Compare->replaceAllUsesWith(NewCompare); 380 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 381 382 // Delete the old floating point increment. 383 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 384 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 385 386 // If the FP induction variable still has uses, this is because something else 387 // in the loop uses its value. In order to canonicalize the induction 388 // variable, we chose to eliminate the IV and rewrite it in terms of an 389 // int->fp cast. 390 // 391 // We give preference to sitofp over uitofp because it is faster on most 392 // platforms. 393 if (WeakPH) { 394 Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 395 PN->getParent()->getFirstInsertionPt()); 396 Conv->setDebugLoc(PN->getDebugLoc()); 397 PN->replaceAllUsesWith(Conv); 398 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 399 } 400 return true; 401 } 402 403 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 404 // First step. Check to see if there are any floating-point recurrences. 405 // If there are, change them into integer recurrences, permitting analysis by 406 // the SCEV routines. 407 BasicBlock *Header = L->getHeader(); 408 409 SmallVector<WeakTrackingVH, 8> PHIs; 410 for (PHINode &PN : Header->phis()) 411 PHIs.push_back(&PN); 412 413 bool Changed = false; 414 for (WeakTrackingVH &PHI : PHIs) 415 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI)) 416 Changed |= handleFloatingPointIV(L, PN); 417 418 // If the loop previously had floating-point IV, ScalarEvolution 419 // may not have been able to compute a trip count. Now that we've done some 420 // re-writing, the trip count may be computable. 421 if (Changed) 422 SE->forgetLoop(L); 423 return Changed; 424 } 425 426 //===---------------------------------------------------------------------===// 427 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 428 // they will exit at the first iteration. 429 //===---------------------------------------------------------------------===// 430 431 /// Check to see if this loop has loop invariant conditions which lead to loop 432 /// exits. If so, we know that if the exit path is taken, it is at the first 433 /// loop iteration. This lets us predict exit values of PHI nodes that live in 434 /// loop header. 435 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 436 // Verify the input to the pass is already in LCSSA form. 437 assert(L->isLCSSAForm(*DT)); 438 439 SmallVector<BasicBlock *, 8> ExitBlocks; 440 L->getUniqueExitBlocks(ExitBlocks); 441 442 bool MadeAnyChanges = false; 443 for (auto *ExitBB : ExitBlocks) { 444 // If there are no more PHI nodes in this exit block, then no more 445 // values defined inside the loop are used on this path. 446 for (PHINode &PN : ExitBB->phis()) { 447 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 448 IncomingValIdx != E; ++IncomingValIdx) { 449 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 450 451 // Can we prove that the exit must run on the first iteration if it 452 // runs at all? (i.e. early exits are fine for our purposes, but 453 // traces which lead to this exit being taken on the 2nd iteration 454 // aren't.) Note that this is about whether the exit branch is 455 // executed, not about whether it is taken. 456 if (!L->getLoopLatch() || 457 !DT->dominates(IncomingBB, L->getLoopLatch())) 458 continue; 459 460 // Get condition that leads to the exit path. 461 auto *TermInst = IncomingBB->getTerminator(); 462 463 Value *Cond = nullptr; 464 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 465 // Must be a conditional branch, otherwise the block 466 // should not be in the loop. 467 Cond = BI->getCondition(); 468 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 469 Cond = SI->getCondition(); 470 else 471 continue; 472 473 if (!L->isLoopInvariant(Cond)) 474 continue; 475 476 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 477 478 // Only deal with PHIs in the loop header. 479 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 480 continue; 481 482 // If ExitVal is a PHI on the loop header, then we know its 483 // value along this exit because the exit can only be taken 484 // on the first iteration. 485 auto *LoopPreheader = L->getLoopPreheader(); 486 assert(LoopPreheader && "Invalid loop"); 487 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 488 if (PreheaderIdx != -1) { 489 assert(ExitVal->getParent() == L->getHeader() && 490 "ExitVal must be in loop header"); 491 MadeAnyChanges = true; 492 PN.setIncomingValue(IncomingValIdx, 493 ExitVal->getIncomingValue(PreheaderIdx)); 494 SE->forgetValue(&PN); 495 } 496 } 497 } 498 } 499 return MadeAnyChanges; 500 } 501 502 //===----------------------------------------------------------------------===// 503 // IV Widening - Extend the width of an IV to cover its widest uses. 504 //===----------------------------------------------------------------------===// 505 506 /// Update information about the induction variable that is extended by this 507 /// sign or zero extend operation. This is used to determine the final width of 508 /// the IV before actually widening it. 509 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 510 ScalarEvolution *SE, 511 const TargetTransformInfo *TTI) { 512 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 513 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 514 return; 515 516 Type *Ty = Cast->getType(); 517 uint64_t Width = SE->getTypeSizeInBits(Ty); 518 if (!Cast->getDataLayout().isLegalInteger(Width)) 519 return; 520 521 // Check that `Cast` actually extends the induction variable (we rely on this 522 // later). This takes care of cases where `Cast` is extending a truncation of 523 // the narrow induction variable, and thus can end up being narrower than the 524 // "narrow" induction variable. 525 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 526 if (NarrowIVWidth >= Width) 527 return; 528 529 // Cast is either an sext or zext up to this point. 530 // We should not widen an indvar if arithmetics on the wider indvar are more 531 // expensive than those on the narrower indvar. We check only the cost of ADD 532 // because at least an ADD is required to increment the induction variable. We 533 // could compute more comprehensively the cost of all instructions on the 534 // induction variable when necessary. 535 if (TTI && 536 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 537 TTI->getArithmeticInstrCost(Instruction::Add, 538 Cast->getOperand(0)->getType())) { 539 return; 540 } 541 542 if (!WI.WidestNativeType || 543 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 544 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 545 WI.IsSigned = IsSigned; 546 return; 547 } 548 549 // We extend the IV to satisfy the sign of its user(s), or 'signed' 550 // if there are multiple users with both sign- and zero extensions, 551 // in order not to introduce nondeterministic behaviour based on the 552 // unspecified order of a PHI nodes' users-iterator. 553 WI.IsSigned |= IsSigned; 554 } 555 556 //===----------------------------------------------------------------------===// 557 // Live IV Reduction - Minimize IVs live across the loop. 558 //===----------------------------------------------------------------------===// 559 560 //===----------------------------------------------------------------------===// 561 // Simplification of IV users based on SCEV evaluation. 562 //===----------------------------------------------------------------------===// 563 564 namespace { 565 566 class IndVarSimplifyVisitor : public IVVisitor { 567 ScalarEvolution *SE; 568 const TargetTransformInfo *TTI; 569 PHINode *IVPhi; 570 571 public: 572 WideIVInfo WI; 573 574 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 575 const TargetTransformInfo *TTI, 576 const DominatorTree *DTree) 577 : SE(SCEV), TTI(TTI), IVPhi(IV) { 578 DT = DTree; 579 WI.NarrowIV = IVPhi; 580 } 581 582 // Implement the interface used by simplifyUsersOfIV. 583 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 584 }; 585 586 } // end anonymous namespace 587 588 /// Iteratively perform simplification on a worklist of IV users. Each 589 /// successive simplification may push more users which may themselves be 590 /// candidates for simplification. 591 /// 592 /// Sign/Zero extend elimination is interleaved with IV simplification. 593 bool IndVarSimplify::simplifyAndExtend(Loop *L, 594 SCEVExpander &Rewriter, 595 LoopInfo *LI) { 596 SmallVector<WideIVInfo, 8> WideIVs; 597 598 auto *GuardDecl = Intrinsic::getDeclarationIfExists( 599 L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard); 600 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 601 602 SmallVector<PHINode *, 8> LoopPhis; 603 for (PHINode &PN : L->getHeader()->phis()) 604 LoopPhis.push_back(&PN); 605 606 // Each round of simplification iterates through the SimplifyIVUsers worklist 607 // for all current phis, then determines whether any IVs can be 608 // widened. Widening adds new phis to LoopPhis, inducing another round of 609 // simplification on the wide IVs. 610 bool Changed = false; 611 while (!LoopPhis.empty()) { 612 // Evaluate as many IV expressions as possible before widening any IVs. This 613 // forces SCEV to set no-wrap flags before evaluating sign/zero 614 // extension. The first time SCEV attempts to normalize sign/zero extension, 615 // the result becomes final. So for the most predictable results, we delay 616 // evaluation of sign/zero extend evaluation until needed, and avoid running 617 // other SCEV based analysis prior to simplifyAndExtend. 618 do { 619 PHINode *CurrIV = LoopPhis.pop_back_val(); 620 621 // Information about sign/zero extensions of CurrIV. 622 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 623 624 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, 625 Rewriter, &Visitor); 626 627 Changed |= C; 628 RunUnswitching |= U; 629 if (Visitor.WI.WidestNativeType) { 630 WideIVs.push_back(Visitor.WI); 631 } 632 } while(!LoopPhis.empty()); 633 634 // Continue if we disallowed widening. 635 if (!WidenIndVars) 636 continue; 637 638 for (; !WideIVs.empty(); WideIVs.pop_back()) { 639 unsigned ElimExt; 640 unsigned Widened; 641 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 642 DT, DeadInsts, ElimExt, Widened, 643 HasGuards, UsePostIncrementRanges)) { 644 NumElimExt += ElimExt; 645 NumWidened += Widened; 646 Changed = true; 647 LoopPhis.push_back(WidePhi); 648 } 649 } 650 } 651 return Changed; 652 } 653 654 //===----------------------------------------------------------------------===// 655 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 656 //===----------------------------------------------------------------------===// 657 658 /// Given an Value which is hoped to be part of an add recurance in the given 659 /// loop, return the associated Phi node if so. Otherwise, return null. Note 660 /// that this is less general than SCEVs AddRec checking. 661 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 662 Instruction *IncI = dyn_cast<Instruction>(IncV); 663 if (!IncI) 664 return nullptr; 665 666 switch (IncI->getOpcode()) { 667 case Instruction::Add: 668 case Instruction::Sub: 669 break; 670 case Instruction::GetElementPtr: 671 // An IV counter must preserve its type. 672 if (IncI->getNumOperands() == 2) 673 break; 674 [[fallthrough]]; 675 default: 676 return nullptr; 677 } 678 679 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 680 if (Phi && Phi->getParent() == L->getHeader()) { 681 if (L->isLoopInvariant(IncI->getOperand(1))) 682 return Phi; 683 return nullptr; 684 } 685 if (IncI->getOpcode() == Instruction::GetElementPtr) 686 return nullptr; 687 688 // Allow add/sub to be commuted. 689 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 690 if (Phi && Phi->getParent() == L->getHeader()) { 691 if (L->isLoopInvariant(IncI->getOperand(0))) 692 return Phi; 693 } 694 return nullptr; 695 } 696 697 /// Whether the current loop exit test is based on this value. Currently this 698 /// is limited to a direct use in the loop condition. 699 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 700 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 701 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 702 // TODO: Allow non-icmp loop test. 703 if (!ICmp) 704 return false; 705 706 // TODO: Allow indirect use. 707 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 708 } 709 710 /// linearFunctionTestReplace policy. Return true unless we can show that the 711 /// current exit test is already sufficiently canonical. 712 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 713 assert(L->getLoopLatch() && "Must be in simplified form"); 714 715 // Avoid converting a constant or loop invariant test back to a runtime 716 // test. This is critical for when SCEV's cached ExitCount is less precise 717 // than the current IR (such as after we've proven a particular exit is 718 // actually dead and thus the BE count never reaches our ExitCount.) 719 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 720 if (L->isLoopInvariant(BI->getCondition())) 721 return false; 722 723 // Do LFTR to simplify the exit condition to an ICMP. 724 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 725 if (!Cond) 726 return true; 727 728 // Do LFTR to simplify the exit ICMP to EQ/NE 729 ICmpInst::Predicate Pred = Cond->getPredicate(); 730 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 731 return true; 732 733 // Look for a loop invariant RHS 734 Value *LHS = Cond->getOperand(0); 735 Value *RHS = Cond->getOperand(1); 736 if (!L->isLoopInvariant(RHS)) { 737 if (!L->isLoopInvariant(LHS)) 738 return true; 739 std::swap(LHS, RHS); 740 } 741 // Look for a simple IV counter LHS 742 PHINode *Phi = dyn_cast<PHINode>(LHS); 743 if (!Phi) 744 Phi = getLoopPhiForCounter(LHS, L); 745 746 if (!Phi) 747 return true; 748 749 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 750 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 751 if (Idx < 0) 752 return true; 753 754 // Do LFTR if the exit condition's IV is *not* a simple counter. 755 Value *IncV = Phi->getIncomingValue(Idx); 756 return Phi != getLoopPhiForCounter(IncV, L); 757 } 758 759 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 760 /// down to checking that all operands are constant and listing instructions 761 /// that may hide undef. 762 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 763 unsigned Depth) { 764 if (isa<Constant>(V)) 765 return !isa<UndefValue>(V); 766 767 if (Depth >= 6) 768 return false; 769 770 // Conservatively handle non-constant non-instructions. For example, Arguments 771 // may be undef. 772 Instruction *I = dyn_cast<Instruction>(V); 773 if (!I) 774 return false; 775 776 // Load and return values may be undef. 777 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 778 return false; 779 780 // Optimistically handle other instructions. 781 for (Value *Op : I->operands()) { 782 if (!Visited.insert(Op).second) 783 continue; 784 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 785 return false; 786 } 787 return true; 788 } 789 790 /// Return true if the given value is concrete. We must prove that undef can 791 /// never reach it. 792 /// 793 /// TODO: If we decide that this is a good approach to checking for undef, we 794 /// may factor it into a common location. 795 static bool hasConcreteDef(Value *V) { 796 SmallPtrSet<Value*, 8> Visited; 797 Visited.insert(V); 798 return hasConcreteDefImpl(V, Visited, 0); 799 } 800 801 /// Return true if the given phi is a "counter" in L. A counter is an 802 /// add recurance (of integer or pointer type) with an arbitrary start, and a 803 /// step of 1. Note that L must have exactly one latch. 804 static bool isLoopCounter(PHINode* Phi, Loop *L, 805 ScalarEvolution *SE) { 806 assert(Phi->getParent() == L->getHeader()); 807 assert(L->getLoopLatch()); 808 809 if (!SE->isSCEVable(Phi->getType())) 810 return false; 811 812 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 813 if (!AR || AR->getLoop() != L || !AR->isAffine()) 814 return false; 815 816 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 817 if (!Step || !Step->isOne()) 818 return false; 819 820 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 821 Value *IncV = Phi->getIncomingValue(LatchIdx); 822 return (getLoopPhiForCounter(IncV, L) == Phi && 823 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 824 } 825 826 /// Search the loop header for a loop counter (anadd rec w/step of one) 827 /// suitable for use by LFTR. If multiple counters are available, select the 828 /// "best" one based profitable heuristics. 829 /// 830 /// BECount may be an i8* pointer type. The pointer difference is already 831 /// valid count without scaling the address stride, so it remains a pointer 832 /// expression as far as SCEV is concerned. 833 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 834 const SCEV *BECount, 835 ScalarEvolution *SE, DominatorTree *DT) { 836 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 837 838 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 839 840 // Loop over all of the PHI nodes, looking for a simple counter. 841 PHINode *BestPhi = nullptr; 842 const SCEV *BestInit = nullptr; 843 BasicBlock *LatchBlock = L->getLoopLatch(); 844 assert(LatchBlock && "Must be in simplified form"); 845 const DataLayout &DL = L->getHeader()->getDataLayout(); 846 847 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 848 PHINode *Phi = cast<PHINode>(I); 849 if (!isLoopCounter(Phi, L, SE)) 850 continue; 851 852 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 853 854 // AR may be a pointer type, while BECount is an integer type. 855 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 856 // AR may not be a narrower type, or we may never exit. 857 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 858 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 859 continue; 860 861 // Avoid reusing a potentially undef value to compute other values that may 862 // have originally had a concrete definition. 863 if (!hasConcreteDef(Phi)) { 864 // We explicitly allow unknown phis as long as they are already used by 865 // the loop exit test. This is legal since performing LFTR could not 866 // increase the number of undef users. 867 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 868 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 869 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 870 continue; 871 } 872 873 // Avoid introducing undefined behavior due to poison which didn't exist in 874 // the original program. (Annoyingly, the rules for poison and undef 875 // propagation are distinct, so this does NOT cover the undef case above.) 876 // We have to ensure that we don't introduce UB by introducing a use on an 877 // iteration where said IV produces poison. Our strategy here differs for 878 // pointers and integer IVs. For integers, we strip and reinfer as needed, 879 // see code in linearFunctionTestReplace. For pointers, we restrict 880 // transforms as there is no good way to reinfer inbounds once lost. 881 if (!Phi->getType()->isIntegerTy() && 882 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 883 continue; 884 885 const SCEV *Init = AR->getStart(); 886 887 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 888 // Don't force a live loop counter if another IV can be used. 889 if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 890 continue; 891 892 // Prefer to count-from-zero. This is a more "canonical" counter form. It 893 // also prefers integer to pointer IVs. 894 if (BestInit->isZero() != Init->isZero()) { 895 if (BestInit->isZero()) 896 continue; 897 } 898 // If two IVs both count from zero or both count from nonzero then the 899 // narrower is likely a dead phi that has been widened. Use the wider phi 900 // to allow the other to be eliminated. 901 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 902 continue; 903 } 904 BestPhi = Phi; 905 BestInit = Init; 906 } 907 return BestPhi; 908 } 909 910 /// Insert an IR expression which computes the value held by the IV IndVar 911 /// (which must be an loop counter w/unit stride) after the backedge of loop L 912 /// is taken ExitCount times. 913 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 914 const SCEV *ExitCount, bool UsePostInc, Loop *L, 915 SCEVExpander &Rewriter, ScalarEvolution *SE) { 916 assert(isLoopCounter(IndVar, L, SE)); 917 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 918 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 919 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 920 921 // For integer IVs, truncate the IV before computing the limit unless we 922 // know apriori that the limit must be a constant when evaluated in the 923 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the 924 // IV in the loop over a (potentially) expensive expansion of the widened 925 // exit count add(zext(add)) expression. 926 if (IndVar->getType()->isIntegerTy() && 927 SE->getTypeSizeInBits(AR->getType()) > 928 SE->getTypeSizeInBits(ExitCount->getType())) { 929 const SCEV *IVInit = AR->getStart(); 930 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) 931 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType())); 932 } 933 934 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; 935 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); 936 assert(SE->isLoopInvariant(IVLimit, L) && 937 "Computed iteration count is not loop invariant!"); 938 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(), 939 ExitingBB->getTerminator()); 940 } 941 942 /// This method rewrites the exit condition of the loop to be a canonical != 943 /// comparison against the incremented loop induction variable. This pass is 944 /// able to rewrite the exit tests of any loop where the SCEV analysis can 945 /// determine a loop-invariant trip count of the loop, which is actually a much 946 /// broader range than just linear tests. 947 bool IndVarSimplify:: 948 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 949 const SCEV *ExitCount, 950 PHINode *IndVar, SCEVExpander &Rewriter) { 951 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 952 assert(isLoopCounter(IndVar, L, SE)); 953 Instruction * const IncVar = 954 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 955 956 // Initialize CmpIndVar to the preincremented IV. 957 Value *CmpIndVar = IndVar; 958 bool UsePostInc = false; 959 960 // If the exiting block is the same as the backedge block, we prefer to 961 // compare against the post-incremented value, otherwise we must compare 962 // against the preincremented value. 963 if (ExitingBB == L->getLoopLatch()) { 964 // For pointer IVs, we chose to not strip inbounds which requires us not 965 // to add a potentially UB introducing use. We need to either a) show 966 // the loop test we're modifying is already in post-inc form, or b) show 967 // that adding a use must not introduce UB. 968 bool SafeToPostInc = 969 IndVar->getType()->isIntegerTy() || 970 isLoopExitTestBasedOn(IncVar, ExitingBB) || 971 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 972 if (SafeToPostInc) { 973 UsePostInc = true; 974 CmpIndVar = IncVar; 975 } 976 } 977 978 // It may be necessary to drop nowrap flags on the incrementing instruction 979 // if either LFTR moves from a pre-inc check to a post-inc check (in which 980 // case the increment might have previously been poison on the last iteration 981 // only) or if LFTR switches to a different IV that was previously dynamically 982 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 983 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 984 // check), because the pre-inc addrec flags may be adopted from the original 985 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 986 // TODO: This handling is inaccurate for one case: If we switch to a 987 // dynamically dead IV that wraps on the first loop iteration only, which is 988 // not covered by the post-inc addrec. (If the new IV was not dynamically 989 // dead, it could not be poison on the first iteration in the first place.) 990 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 991 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 992 if (BO->hasNoUnsignedWrap()) 993 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 994 if (BO->hasNoSignedWrap()) 995 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 996 } 997 998 Value *ExitCnt = genLoopLimit( 999 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1000 assert(ExitCnt->getType()->isPointerTy() == 1001 IndVar->getType()->isPointerTy() && 1002 "genLoopLimit missed a cast"); 1003 1004 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1005 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1006 ICmpInst::Predicate P; 1007 if (L->contains(BI->getSuccessor(0))) 1008 P = ICmpInst::ICMP_NE; 1009 else 1010 P = ICmpInst::ICMP_EQ; 1011 1012 IRBuilder<> Builder(BI); 1013 1014 // The new loop exit condition should reuse the debug location of the 1015 // original loop exit condition. 1016 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1017 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1018 1019 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1020 // avoid the expensive expansion of the limit expression in the wider type, 1021 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1022 // since we know (from the exit count bitwidth), that we can't self-wrap in 1023 // the narrower type. 1024 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1025 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1026 if (CmpIndVarSize > ExitCntSize) { 1027 assert(!CmpIndVar->getType()->isPointerTy() && 1028 !ExitCnt->getType()->isPointerTy()); 1029 1030 // Before resorting to actually inserting the truncate, use the same 1031 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1032 // the other side of the comparison instead. We still evaluate the limit 1033 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1034 // a truncate within in. 1035 bool Extended = false; 1036 const SCEV *IV = SE->getSCEV(CmpIndVar); 1037 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 1038 const SCEV *ZExtTrunc = 1039 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1040 1041 if (ZExtTrunc == IV) { 1042 Extended = true; 1043 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1044 "wide.trip.count"); 1045 } else { 1046 const SCEV *SExtTrunc = 1047 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1048 if (SExtTrunc == IV) { 1049 Extended = true; 1050 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1051 "wide.trip.count"); 1052 } 1053 } 1054 1055 if (Extended) { 1056 bool Discard; 1057 L->makeLoopInvariant(ExitCnt, Discard); 1058 } else 1059 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1060 "lftr.wideiv"); 1061 } 1062 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1063 << " LHS:" << *CmpIndVar << '\n' 1064 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1065 << "\n" 1066 << " RHS:\t" << *ExitCnt << "\n" 1067 << "ExitCount:\t" << *ExitCount << "\n" 1068 << " was: " << *BI->getCondition() << "\n"); 1069 1070 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1071 Value *OrigCond = BI->getCondition(); 1072 // It's tempting to use replaceAllUsesWith here to fully replace the old 1073 // comparison, but that's not immediately safe, since users of the old 1074 // comparison may not be dominated by the new comparison. Instead, just 1075 // update the branch to use the new comparison; in the common case this 1076 // will make old comparison dead. 1077 BI->setCondition(Cond); 1078 DeadInsts.emplace_back(OrigCond); 1079 1080 ++NumLFTR; 1081 return true; 1082 } 1083 1084 //===----------------------------------------------------------------------===// 1085 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1086 //===----------------------------------------------------------------------===// 1087 1088 /// If there's a single exit block, sink any loop-invariant values that 1089 /// were defined in the preheader but not used inside the loop into the 1090 /// exit block to reduce register pressure in the loop. 1091 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1092 BasicBlock *ExitBlock = L->getExitBlock(); 1093 if (!ExitBlock) return false; 1094 1095 BasicBlock *Preheader = L->getLoopPreheader(); 1096 if (!Preheader) return false; 1097 1098 bool MadeAnyChanges = false; 1099 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1100 BasicBlock::iterator I(Preheader->getTerminator()); 1101 while (I != Preheader->begin()) { 1102 --I; 1103 // New instructions were inserted at the end of the preheader. 1104 if (isa<PHINode>(I)) 1105 break; 1106 1107 // Don't move instructions which might have side effects, since the side 1108 // effects need to complete before instructions inside the loop. Also don't 1109 // move instructions which might read memory, since the loop may modify 1110 // memory. Note that it's okay if the instruction might have undefined 1111 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1112 // block. 1113 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1114 continue; 1115 1116 // Skip debug info intrinsics. 1117 if (isa<DbgInfoIntrinsic>(I)) 1118 continue; 1119 1120 // Skip eh pad instructions. 1121 if (I->isEHPad()) 1122 continue; 1123 1124 // Don't sink alloca: we never want to sink static alloca's out of the 1125 // entry block, and correctly sinking dynamic alloca's requires 1126 // checks for stacksave/stackrestore intrinsics. 1127 // FIXME: Refactor this check somehow? 1128 if (isa<AllocaInst>(I)) 1129 continue; 1130 1131 // Determine if there is a use in or before the loop (direct or 1132 // otherwise). 1133 bool UsedInLoop = false; 1134 for (Use &U : I->uses()) { 1135 Instruction *User = cast<Instruction>(U.getUser()); 1136 BasicBlock *UseBB = User->getParent(); 1137 if (PHINode *P = dyn_cast<PHINode>(User)) { 1138 unsigned i = 1139 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1140 UseBB = P->getIncomingBlock(i); 1141 } 1142 if (UseBB == Preheader || L->contains(UseBB)) { 1143 UsedInLoop = true; 1144 break; 1145 } 1146 } 1147 1148 // If there is, the def must remain in the preheader. 1149 if (UsedInLoop) 1150 continue; 1151 1152 // Otherwise, sink it to the exit block. 1153 Instruction *ToMove = &*I; 1154 bool Done = false; 1155 1156 if (I != Preheader->begin()) { 1157 // Skip debug info intrinsics. 1158 do { 1159 --I; 1160 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1161 1162 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1163 Done = true; 1164 } else { 1165 Done = true; 1166 } 1167 1168 MadeAnyChanges = true; 1169 ToMove->moveBefore(*ExitBlock, InsertPt); 1170 SE->forgetValue(ToMove); 1171 if (Done) break; 1172 InsertPt = ToMove->getIterator(); 1173 } 1174 1175 return MadeAnyChanges; 1176 } 1177 1178 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1179 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1180 auto *OldCond = BI->getCondition(); 1181 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1182 << " with " << *NewCond << "\n"); 1183 BI->setCondition(NewCond); 1184 if (OldCond->use_empty()) 1185 DeadInsts.emplace_back(OldCond); 1186 } 1187 1188 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1189 bool IsTaken) { 1190 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1191 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1192 auto *OldCond = BI->getCondition(); 1193 return ConstantInt::get(OldCond->getType(), 1194 IsTaken ? ExitIfTrue : !ExitIfTrue); 1195 } 1196 1197 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1198 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1199 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1200 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1201 replaceExitCond(BI, NewCond, DeadInsts); 1202 } 1203 1204 static void replaceLoopPHINodesWithPreheaderValues( 1205 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1206 ScalarEvolution &SE) { 1207 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1208 auto *LoopPreheader = L->getLoopPreheader(); 1209 auto *LoopHeader = L->getHeader(); 1210 SmallVector<Instruction *> Worklist; 1211 for (auto &PN : LoopHeader->phis()) { 1212 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1213 for (User *U : PN.users()) 1214 Worklist.push_back(cast<Instruction>(U)); 1215 SE.forgetValue(&PN); 1216 PN.replaceAllUsesWith(PreheaderIncoming); 1217 DeadInsts.emplace_back(&PN); 1218 } 1219 1220 // Replacing with the preheader value will often allow IV users to simplify 1221 // (especially if the preheader value is a constant). 1222 SmallPtrSet<Instruction *, 16> Visited; 1223 while (!Worklist.empty()) { 1224 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1225 if (!Visited.insert(I).second) 1226 continue; 1227 1228 // Don't simplify instructions outside the loop. 1229 if (!L->contains(I)) 1230 continue; 1231 1232 Value *Res = simplifyInstruction(I, I->getDataLayout()); 1233 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1234 for (User *U : I->users()) 1235 Worklist.push_back(cast<Instruction>(U)); 1236 I->replaceAllUsesWith(Res); 1237 DeadInsts.emplace_back(I); 1238 } 1239 } 1240 } 1241 1242 static Value * 1243 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1244 const ScalarEvolution::LoopInvariantPredicate &LIP, 1245 SCEVExpander &Rewriter) { 1246 ICmpInst::Predicate InvariantPred = LIP.Pred; 1247 BasicBlock *Preheader = L->getLoopPreheader(); 1248 assert(Preheader && "Preheader doesn't exist"); 1249 Rewriter.setInsertPoint(Preheader->getTerminator()); 1250 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1251 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1252 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1253 if (ExitIfTrue) 1254 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1255 IRBuilder<> Builder(Preheader->getTerminator()); 1256 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1257 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1258 BI->getCondition()->getName()); 1259 } 1260 1261 static std::optional<Value *> 1262 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1263 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1264 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1265 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1266 Value *LHS = ICmp->getOperand(0); 1267 Value *RHS = ICmp->getOperand(1); 1268 1269 // 'LHS pred RHS' should now mean that we stay in loop. 1270 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1271 if (Inverted) 1272 Pred = CmpInst::getInversePredicate(Pred); 1273 1274 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1275 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1276 // Can we prove it to be trivially true or false? 1277 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1278 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1279 1280 auto *ARTy = LHSS->getType(); 1281 auto *MaxIterTy = MaxIter->getType(); 1282 // If possible, adjust types. 1283 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1284 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1285 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1286 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1287 const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1288 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1289 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1290 } 1291 1292 if (SkipLastIter) { 1293 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1294 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1295 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1296 // So we manually construct umin(a - 1, b - 1). 1297 SmallVector<const SCEV *, 4> Elements; 1298 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1299 for (const SCEV *Op : UMin->operands()) 1300 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1301 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1302 } else 1303 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1304 } 1305 1306 // Check if there is a loop-invariant predicate equivalent to our check. 1307 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1308 L, BI, MaxIter); 1309 if (!LIP) 1310 return std::nullopt; 1311 1312 // Can we prove it to be trivially true? 1313 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1314 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1315 else 1316 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1317 } 1318 1319 static bool optimizeLoopExitWithUnknownExitCount( 1320 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1321 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1322 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1323 assert( 1324 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1325 "Not a loop exit!"); 1326 1327 // For branch that stays in loop by TRUE condition, go through AND. For branch 1328 // that stays in loop by FALSE condition, go through OR. Both gives the 1329 // similar logic: "stay in loop iff all conditions are true(false)". 1330 bool Inverted = L->contains(BI->getSuccessor(1)); 1331 SmallVector<ICmpInst *, 4> LeafConditions; 1332 SmallVector<Value *, 4> Worklist; 1333 SmallPtrSet<Value *, 4> Visited; 1334 Value *OldCond = BI->getCondition(); 1335 Visited.insert(OldCond); 1336 Worklist.push_back(OldCond); 1337 1338 auto GoThrough = [&](Value *V) { 1339 Value *LHS = nullptr, *RHS = nullptr; 1340 if (Inverted) { 1341 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1342 return false; 1343 } else { 1344 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1345 return false; 1346 } 1347 if (Visited.insert(LHS).second) 1348 Worklist.push_back(LHS); 1349 if (Visited.insert(RHS).second) 1350 Worklist.push_back(RHS); 1351 return true; 1352 }; 1353 1354 do { 1355 Value *Curr = Worklist.pop_back_val(); 1356 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1357 // those with one use, to avoid instruction duplication. 1358 if (Curr->hasOneUse()) 1359 if (!GoThrough(Curr)) 1360 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1361 LeafConditions.push_back(ICmp); 1362 } while (!Worklist.empty()); 1363 1364 // If the current basic block has the same exit count as the whole loop, and 1365 // it consists of multiple icmp's, try to collect all icmp's that give exact 1366 // same exit count. For all other icmp's, we could use one less iteration, 1367 // because their value on the last iteration doesn't really matter. 1368 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1369 if (!SkipLastIter && LeafConditions.size() > 1 && 1370 SE->getExitCount(L, ExitingBB, 1371 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1372 MaxIter) 1373 for (auto *ICmp : LeafConditions) { 1374 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1375 /*ControlsExit*/ false); 1376 const SCEV *ExitMax = EL.SymbolicMaxNotTaken; 1377 if (isa<SCEVCouldNotCompute>(ExitMax)) 1378 continue; 1379 // They could be of different types (specifically this happens after 1380 // IV widening). 1381 auto *WiderType = 1382 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1383 const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1384 const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1385 if (WideExitMax == WideMaxIter) 1386 ICmpsFailingOnLastIter.insert(ICmp); 1387 } 1388 1389 bool Changed = false; 1390 for (auto *OldCond : LeafConditions) { 1391 // Skip last iteration for this icmp under one of two conditions: 1392 // - We do it for all conditions; 1393 // - There is another ICmp that would fail on last iter, so this one doesn't 1394 // really matter. 1395 bool OptimisticSkipLastIter = SkipLastIter; 1396 if (!OptimisticSkipLastIter) { 1397 if (ICmpsFailingOnLastIter.size() > 1) 1398 OptimisticSkipLastIter = true; 1399 else if (ICmpsFailingOnLastIter.size() == 1) 1400 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1401 } 1402 if (auto Replaced = 1403 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1404 OptimisticSkipLastIter, SE, Rewriter)) { 1405 Changed = true; 1406 auto *NewCond = *Replaced; 1407 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1408 NCI->setName(OldCond->getName() + ".first_iter"); 1409 } 1410 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1411 << " with " << *NewCond << "\n"); 1412 assert(OldCond->hasOneUse() && "Must be!"); 1413 OldCond->replaceAllUsesWith(NewCond); 1414 DeadInsts.push_back(OldCond); 1415 // Make sure we no longer consider this condition as failing on last 1416 // iteration. 1417 ICmpsFailingOnLastIter.erase(OldCond); 1418 } 1419 } 1420 return Changed; 1421 } 1422 1423 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1424 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1425 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1426 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1427 // it already has flags. The alternative to this would be to extending the 1428 // set of "interesting" IV users to include the icmp, but doing that 1429 // regresses results in practice by querying SCEVs before trip counts which 1430 // rely on them which results in SCEV caching sub-optimal answers. The 1431 // concern about caching sub-optimal results is why we only query SCEVs of 1432 // the loop invariant RHS here. 1433 SmallVector<BasicBlock*, 16> ExitingBlocks; 1434 L->getExitingBlocks(ExitingBlocks); 1435 bool Changed = false; 1436 for (auto *ExitingBB : ExitingBlocks) { 1437 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1438 if (!BI) 1439 continue; 1440 assert(BI->isConditional() && "exit branch must be conditional"); 1441 1442 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1443 if (!ICmp || !ICmp->hasOneUse()) 1444 continue; 1445 1446 auto *LHS = ICmp->getOperand(0); 1447 auto *RHS = ICmp->getOperand(1); 1448 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1449 // poisoning cache with sub-optimal results. For the must-execute case, 1450 // this is a neccessary precondition for correctness. 1451 if (!L->isLoopInvariant(RHS)) { 1452 if (!L->isLoopInvariant(LHS)) 1453 continue; 1454 // Same logic applies for the inverse case 1455 std::swap(LHS, RHS); 1456 } 1457 1458 // Match (icmp signed-cond zext, RHS) 1459 Value *LHSOp = nullptr; 1460 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1461 continue; 1462 1463 const DataLayout &DL = ExitingBB->getDataLayout(); 1464 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1465 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1466 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1467 FullCR = FullCR.zeroExtend(OuterBitWidth); 1468 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1469 if (FullCR.contains(RHSCR)) { 1470 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1471 // replace the signed condition with the unsigned version. 1472 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1473 Changed = true; 1474 // Note: No SCEV invalidation needed. We've changed the predicate, but 1475 // have not changed exit counts, or the values produced by the compare. 1476 continue; 1477 } 1478 } 1479 1480 // Now that we've canonicalized the condition to match the extend, 1481 // see if we can rotate the extend out of the loop. 1482 for (auto *ExitingBB : ExitingBlocks) { 1483 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1484 if (!BI) 1485 continue; 1486 assert(BI->isConditional() && "exit branch must be conditional"); 1487 1488 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1489 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1490 continue; 1491 1492 bool Swapped = false; 1493 auto *LHS = ICmp->getOperand(0); 1494 auto *RHS = ICmp->getOperand(1); 1495 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1496 // Nothing to rotate 1497 continue; 1498 if (L->isLoopInvariant(LHS)) { 1499 // Same logic applies for the inverse case until we actually pick 1500 // which operand of the compare to update. 1501 Swapped = true; 1502 std::swap(LHS, RHS); 1503 } 1504 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1505 1506 // Match (icmp unsigned-cond zext, RHS) 1507 // TODO: Extend to handle corresponding sext/signed-cmp case 1508 // TODO: Extend to other invertible functions 1509 Value *LHSOp = nullptr; 1510 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1511 continue; 1512 1513 // In general, we only rotate if we can do so without increasing the number 1514 // of instructions. The exception is when we have an zext(add-rec). The 1515 // reason for allowing this exception is that we know we need to get rid 1516 // of the zext for SCEV to be able to compute a trip count for said loops; 1517 // we consider the new trip count valuable enough to increase instruction 1518 // count by one. 1519 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1520 continue; 1521 1522 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1523 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1524 // when zext is loop varying and RHS is loop invariant. This converts 1525 // loop varying work to loop-invariant work. 1526 auto doRotateTransform = [&]() { 1527 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1528 auto *NewRHS = CastInst::Create( 1529 Instruction::Trunc, RHS, LHSOp->getType(), "", 1530 L->getLoopPreheader()->getTerminator()->getIterator()); 1531 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1532 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1533 if (LHS->use_empty()) 1534 DeadInsts.push_back(LHS); 1535 }; 1536 1537 1538 const DataLayout &DL = ExitingBB->getDataLayout(); 1539 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1540 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1541 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1542 FullCR = FullCR.zeroExtend(OuterBitWidth); 1543 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1544 if (FullCR.contains(RHSCR)) { 1545 doRotateTransform(); 1546 Changed = true; 1547 // Note, we are leaving SCEV in an unfortunately imprecise case here 1548 // as rotation tends to reveal information about trip counts not 1549 // previously visible. 1550 continue; 1551 } 1552 } 1553 1554 return Changed; 1555 } 1556 1557 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1558 SmallVector<BasicBlock*, 16> ExitingBlocks; 1559 L->getExitingBlocks(ExitingBlocks); 1560 1561 // Remove all exits which aren't both rewriteable and execute on every 1562 // iteration. 1563 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1564 // If our exitting block exits multiple loops, we can only rewrite the 1565 // innermost one. Otherwise, we're changing how many times the innermost 1566 // loop runs before it exits. 1567 if (LI->getLoopFor(ExitingBB) != L) 1568 return true; 1569 1570 // Can't rewrite non-branch yet. 1571 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1572 if (!BI) 1573 return true; 1574 1575 // Likewise, the loop latch must be dominated by the exiting BB. 1576 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1577 return true; 1578 1579 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1580 // If already constant, nothing to do. However, if this is an 1581 // unconditional exit, we can still replace header phis with their 1582 // preheader value. 1583 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1584 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1585 return true; 1586 } 1587 1588 return false; 1589 }); 1590 1591 if (ExitingBlocks.empty()) 1592 return false; 1593 1594 // Get a symbolic upper bound on the loop backedge taken count. 1595 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1596 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1597 return false; 1598 1599 // Visit our exit blocks in order of dominance. We know from the fact that 1600 // all exits must dominate the latch, so there is a total dominance order 1601 // between them. 1602 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1603 // std::sort sorts in ascending order, so we want the inverse of 1604 // the normal dominance relation. 1605 if (A == B) return false; 1606 if (DT->properlyDominates(A, B)) 1607 return true; 1608 else { 1609 assert(DT->properlyDominates(B, A) && 1610 "expected total dominance order!"); 1611 return false; 1612 } 1613 }); 1614 #ifdef ASSERT 1615 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1616 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1617 } 1618 #endif 1619 1620 bool Changed = false; 1621 bool SkipLastIter = false; 1622 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1623 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1624 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1625 return; 1626 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1627 CurrMaxExit = MaxExitCount; 1628 else 1629 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1630 // If the loop has more than 1 iteration, all further checks will be 1631 // executed 1 iteration less. 1632 if (CurrMaxExit == MaxBECount) 1633 SkipLastIter = true; 1634 }; 1635 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1636 for (BasicBlock *ExitingBB : ExitingBlocks) { 1637 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1638 const SCEV *MaxExitCount = SE->getExitCount( 1639 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1640 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1641 // Okay, we do not know the exit count here. Can we at least prove that it 1642 // will remain the same within iteration space? 1643 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1644 auto OptimizeCond = [&](bool SkipLastIter) { 1645 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1646 MaxBECount, SkipLastIter, 1647 SE, Rewriter, DeadInsts); 1648 }; 1649 1650 // TODO: We might have proved that we can skip the last iteration for 1651 // this check. In this case, we only want to check the condition on the 1652 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1653 // corner case: 1654 // 1655 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1656 // 1657 // If we could not prove that len != 0, then we also could not prove that 1658 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1659 // OptimizeCond will likely not prove anything for it, even if it could 1660 // prove the same fact for len. 1661 // 1662 // As a temporary solution, we query both last and pre-last iterations in 1663 // hope that we will be able to prove triviality for at least one of 1664 // them. We can stop querying MaxBECount for this case once SCEV 1665 // understands that (MaxBECount - 1) will not overflow here. 1666 if (OptimizeCond(false)) 1667 Changed = true; 1668 else if (SkipLastIter && OptimizeCond(true)) 1669 Changed = true; 1670 UpdateSkipLastIter(MaxExitCount); 1671 continue; 1672 } 1673 1674 UpdateSkipLastIter(ExactExitCount); 1675 1676 // If we know we'd exit on the first iteration, rewrite the exit to 1677 // reflect this. This does not imply the loop must exit through this 1678 // exit; there may be an earlier one taken on the first iteration. 1679 // We know that the backedge can't be taken, so we replace all 1680 // the header PHIs with values coming from the preheader. 1681 if (ExactExitCount->isZero()) { 1682 foldExit(L, ExitingBB, true, DeadInsts); 1683 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1684 Changed = true; 1685 continue; 1686 } 1687 1688 assert(ExactExitCount->getType()->isIntegerTy() && 1689 MaxBECount->getType()->isIntegerTy() && 1690 "Exit counts must be integers"); 1691 1692 Type *WiderType = 1693 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1694 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1695 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1696 assert(MaxBECount->getType() == ExactExitCount->getType()); 1697 1698 // Can we prove that some other exit must be taken strictly before this 1699 // one? 1700 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1701 ExactExitCount)) { 1702 foldExit(L, ExitingBB, false, DeadInsts); 1703 Changed = true; 1704 continue; 1705 } 1706 1707 // As we run, keep track of which exit counts we've encountered. If we 1708 // find a duplicate, we've found an exit which would have exited on the 1709 // exiting iteration, but (from the visit order) strictly follows another 1710 // which does the same and is thus dead. 1711 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1712 foldExit(L, ExitingBB, false, DeadInsts); 1713 Changed = true; 1714 continue; 1715 } 1716 1717 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1718 // here. If we kept track of the min of dominanting exits so far, we could 1719 // discharge exits with EC >= MDEC. This is less powerful than the existing 1720 // transform (since later exits aren't considered), but potentially more 1721 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1722 // or a >u b. Such a case is not currently known. 1723 } 1724 return Changed; 1725 } 1726 1727 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1728 SmallVector<BasicBlock*, 16> ExitingBlocks; 1729 L->getExitingBlocks(ExitingBlocks); 1730 1731 // Finally, see if we can rewrite our exit conditions into a loop invariant 1732 // form. If we have a read-only loop, and we can tell that we must exit down 1733 // a path which does not need any of the values computed within the loop, we 1734 // can rewrite the loop to exit on the first iteration. Note that this 1735 // doesn't either a) tell us the loop exits on the first iteration (unless 1736 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1737 // This transformation looks a lot like a restricted form of dead loop 1738 // elimination, but restricted to read-only loops and without neccesssarily 1739 // needing to kill the loop entirely. 1740 if (!LoopPredication) 1741 return false; 1742 1743 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1744 // through *explicit* control flow. We have to eliminate the possibility of 1745 // implicit exits (see below) before we know it's truly exact. 1746 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1747 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1748 return false; 1749 1750 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1751 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1752 1753 auto BadExit = [&](BasicBlock *ExitingBB) { 1754 // If our exiting block exits multiple loops, we can only rewrite the 1755 // innermost one. Otherwise, we're changing how many times the innermost 1756 // loop runs before it exits. 1757 if (LI->getLoopFor(ExitingBB) != L) 1758 return true; 1759 1760 // Can't rewrite non-branch yet. 1761 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1762 if (!BI) 1763 return true; 1764 1765 // If already constant, nothing to do. 1766 if (isa<Constant>(BI->getCondition())) 1767 return true; 1768 1769 // If the exit block has phis, we need to be able to compute the values 1770 // within the loop which contains them. This assumes trivially lcssa phis 1771 // have already been removed; TODO: generalize 1772 BasicBlock *ExitBlock = 1773 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1774 if (!ExitBlock->phis().empty()) 1775 return true; 1776 1777 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1778 if (isa<SCEVCouldNotCompute>(ExitCount) || 1779 !Rewriter.isSafeToExpand(ExitCount)) 1780 return true; 1781 1782 assert(SE->isLoopInvariant(ExitCount, L) && 1783 "Exit count must be loop invariant"); 1784 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1785 return false; 1786 }; 1787 1788 // Make sure all exits dominate the latch. This means there is a linear chain 1789 // of exits. We check this before sorting so we have a total order. 1790 BasicBlock *Latch = L->getLoopLatch(); 1791 for (BasicBlock *ExitingBB : ExitingBlocks) 1792 if (!DT->dominates(ExitingBB, Latch)) 1793 return false; 1794 1795 // If we have any exits which can't be predicated themselves, than we can't 1796 // predicate any exit which isn't guaranteed to execute before it. Consider 1797 // two exits (a) and (b) which would both exit on the same iteration. If we 1798 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1799 // we could convert a loop from exiting through (a) to one exiting through 1800 // (b). Note that this problem exists only for exits with the same exit 1801 // count, and we could be more aggressive when exit counts are known inequal. 1802 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1803 // llvm::sort sorts in ascending order, so we want the inverse of 1804 // the normal dominance relation. 1805 if (A == B) 1806 return false; 1807 if (DT->properlyDominates(A, B)) 1808 return true; 1809 if (DT->properlyDominates(B, A)) 1810 return false; 1811 llvm_unreachable("Should have total dominance order"); 1812 }); 1813 1814 // Make sure our exit blocks are really a total order (i.e. a linear chain of 1815 // exits before the backedge). 1816 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1817 assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) && 1818 "Not sorted by dominance"); 1819 1820 // Given our sorted total order, we know that exit[j] must be evaluated 1821 // after all exit[i] such j > i. 1822 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1823 if (BadExit(ExitingBlocks[i])) { 1824 ExitingBlocks.resize(i); 1825 break; 1826 } 1827 1828 if (ExitingBlocks.empty()) 1829 return false; 1830 1831 // At this point, ExitingBlocks consists of only those blocks which are 1832 // predicatable. Given that, we know we have at least one exit we can 1833 // predicate if the loop is doesn't have side effects and doesn't have any 1834 // implicit exits (because then our exact BTC isn't actually exact). 1835 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1836 // suggestions on how to improve this? I can obviously bail out for outer 1837 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1838 // is that enough for *all* side effects? 1839 for (BasicBlock *BB : L->blocks()) 1840 for (auto &I : *BB) 1841 // TODO:isGuaranteedToTransfer 1842 if (I.mayHaveSideEffects()) 1843 return false; 1844 1845 bool Changed = false; 1846 // Finally, do the actual predication for all predicatable blocks. A couple 1847 // of notes here: 1848 // 1) We don't bother to constant fold dominated exits with identical exit 1849 // counts; that's simply a form of CSE/equality propagation and we leave 1850 // it for dedicated passes. 1851 // 2) We insert the comparison at the branch. Hoisting introduces additional 1852 // legality constraints and we leave that to dedicated logic. We want to 1853 // predicate even if we can't insert a loop invariant expression as 1854 // peeling or unrolling will likely reduce the cost of the otherwise loop 1855 // varying check. 1856 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1857 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1858 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1859 for (BasicBlock *ExitingBB : ExitingBlocks) { 1860 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1861 1862 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1863 Value *NewCond; 1864 if (ExitCount == ExactBTC) { 1865 NewCond = L->contains(BI->getSuccessor(0)) ? 1866 B.getFalse() : B.getTrue(); 1867 } else { 1868 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1869 if (!ExactBTCV) 1870 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1871 Value *RHS = ExactBTCV; 1872 if (ECV->getType() != RHS->getType()) { 1873 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1874 ECV = B.CreateZExt(ECV, WiderTy); 1875 RHS = B.CreateZExt(RHS, WiderTy); 1876 } 1877 auto Pred = L->contains(BI->getSuccessor(0)) ? 1878 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1879 NewCond = B.CreateICmp(Pred, ECV, RHS); 1880 } 1881 Value *OldCond = BI->getCondition(); 1882 BI->setCondition(NewCond); 1883 if (OldCond->use_empty()) 1884 DeadInsts.emplace_back(OldCond); 1885 Changed = true; 1886 RunUnswitching = true; 1887 } 1888 1889 return Changed; 1890 } 1891 1892 //===----------------------------------------------------------------------===// 1893 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1894 //===----------------------------------------------------------------------===// 1895 1896 bool IndVarSimplify::run(Loop *L) { 1897 // We need (and expect!) the incoming loop to be in LCSSA. 1898 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1899 "LCSSA required to run indvars!"); 1900 1901 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1902 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1903 // canonicalization can be a pessimization without LSR to "clean up" 1904 // afterwards. 1905 // - We depend on having a preheader; in particular, 1906 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1907 // and we're in trouble if we can't find the induction variable even when 1908 // we've manually inserted one. 1909 // - LFTR relies on having a single backedge. 1910 if (!L->isLoopSimplifyForm()) 1911 return false; 1912 1913 bool Changed = false; 1914 // If there are any floating-point recurrences, attempt to 1915 // transform them to use integer recurrences. 1916 Changed |= rewriteNonIntegerIVs(L); 1917 1918 // Create a rewriter object which we'll use to transform the code with. 1919 SCEVExpander Rewriter(*SE, DL, "indvars"); 1920 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 1921 Rewriter.setDebugType(DEBUG_TYPE); 1922 #endif 1923 1924 // Eliminate redundant IV users. 1925 // 1926 // Simplification works best when run before other consumers of SCEV. We 1927 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1928 // other expressions involving loop IVs have been evaluated. This helps SCEV 1929 // set no-wrap flags before normalizing sign/zero extension. 1930 Rewriter.disableCanonicalMode(); 1931 Changed |= simplifyAndExtend(L, Rewriter, LI); 1932 1933 // Check to see if we can compute the final value of any expressions 1934 // that are recurrent in the loop, and substitute the exit values from the 1935 // loop into any instructions outside of the loop that use the final values 1936 // of the current expressions. 1937 if (ReplaceExitValue != NeverRepl) { 1938 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1939 ReplaceExitValue, DeadInsts)) { 1940 NumReplaced += Rewrites; 1941 Changed = true; 1942 } 1943 } 1944 1945 // Eliminate redundant IV cycles. 1946 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1947 1948 // Try to convert exit conditions to unsigned and rotate computation 1949 // out of the loop. Note: Handles invalidation internally if needed. 1950 Changed |= canonicalizeExitCondition(L); 1951 1952 // Try to eliminate loop exits based on analyzeable exit counts 1953 if (optimizeLoopExits(L, Rewriter)) { 1954 Changed = true; 1955 // Given we've changed exit counts, notify SCEV 1956 // Some nested loops may share same folded exit basic block, 1957 // thus we need to notify top most loop. 1958 SE->forgetTopmostLoop(L); 1959 } 1960 1961 // Try to form loop invariant tests for loop exits by changing how many 1962 // iterations of the loop run when that is unobservable. 1963 if (predicateLoopExits(L, Rewriter)) { 1964 Changed = true; 1965 // Given we've changed exit counts, notify SCEV 1966 SE->forgetLoop(L); 1967 } 1968 1969 // If we have a trip count expression, rewrite the loop's exit condition 1970 // using it. 1971 if (!DisableLFTR) { 1972 BasicBlock *PreHeader = L->getLoopPreheader(); 1973 1974 SmallVector<BasicBlock*, 16> ExitingBlocks; 1975 L->getExitingBlocks(ExitingBlocks); 1976 for (BasicBlock *ExitingBB : ExitingBlocks) { 1977 // Can't rewrite non-branch yet. 1978 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1979 continue; 1980 1981 // If our exitting block exits multiple loops, we can only rewrite the 1982 // innermost one. Otherwise, we're changing how many times the innermost 1983 // loop runs before it exits. 1984 if (LI->getLoopFor(ExitingBB) != L) 1985 continue; 1986 1987 if (!needsLFTR(L, ExitingBB)) 1988 continue; 1989 1990 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1991 if (isa<SCEVCouldNotCompute>(ExitCount)) 1992 continue; 1993 1994 // This was handled above, but as we form SCEVs, we can sometimes refine 1995 // existing ones; this allows exit counts to be folded to zero which 1996 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1997 // until stable to handle cases like this better. 1998 if (ExitCount->isZero()) 1999 continue; 2000 2001 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2002 if (!IndVar) 2003 continue; 2004 2005 // Avoid high cost expansions. Note: This heuristic is questionable in 2006 // that our definition of "high cost" is not exactly principled. 2007 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2008 TTI, PreHeader->getTerminator())) 2009 continue; 2010 2011 if (!Rewriter.isSafeToExpand(ExitCount)) 2012 continue; 2013 2014 Changed |= linearFunctionTestReplace(L, ExitingBB, 2015 ExitCount, IndVar, 2016 Rewriter); 2017 } 2018 } 2019 // Clear the rewriter cache, because values that are in the rewriter's cache 2020 // can be deleted in the loop below, causing the AssertingVH in the cache to 2021 // trigger. 2022 Rewriter.clear(); 2023 2024 // Now that we're done iterating through lists, clean up any instructions 2025 // which are now dead. 2026 while (!DeadInsts.empty()) { 2027 Value *V = DeadInsts.pop_back_val(); 2028 2029 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2030 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2031 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2032 Changed |= 2033 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2034 } 2035 2036 // The Rewriter may not be used from this point on. 2037 2038 // Loop-invariant instructions in the preheader that aren't used in the 2039 // loop may be sunk below the loop to reduce register pressure. 2040 Changed |= sinkUnusedInvariants(L); 2041 2042 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2043 // trip count and therefore can further simplify exit values in addition to 2044 // rewriteLoopExitValues. 2045 Changed |= rewriteFirstIterationLoopExitValues(L); 2046 2047 // Clean up dead instructions. 2048 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2049 2050 // Check a post-condition. 2051 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2052 "Indvars did not preserve LCSSA!"); 2053 if (VerifyMemorySSA && MSSAU) 2054 MSSAU->getMemorySSA()->verifyMemorySSA(); 2055 2056 return Changed; 2057 } 2058 2059 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2060 LoopStandardAnalysisResults &AR, 2061 LPMUpdater &) { 2062 Function *F = L.getHeader()->getParent(); 2063 const DataLayout &DL = F->getDataLayout(); 2064 2065 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2066 WidenIndVars && AllowIVWidening); 2067 if (!IVS.run(&L)) 2068 return PreservedAnalyses::all(); 2069 2070 auto PA = getLoopPassPreservedAnalyses(); 2071 PA.preserveSet<CFGAnalyses>(); 2072 if (IVS.runUnswitching()) { 2073 AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR); 2074 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>(); 2075 } 2076 2077 if (AR.MSSA) 2078 PA.preserve<MemorySSAAnalysis>(); 2079 return PA; 2080 } 2081