1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/SmallSet.h" 32 #include "llvm/ADT/SmallVector.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/iterator_range.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/LoopPass.h" 37 #include "llvm/Analysis/MemorySSA.h" 38 #include "llvm/Analysis/MemorySSAUpdater.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/Analysis/TargetLibraryInfo.h" 42 #include "llvm/Analysis/TargetTransformInfo.h" 43 #include "llvm/Analysis/ValueTracking.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/ConstantRange.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DataLayout.h" 49 #include "llvm/IR/DerivedTypes.h" 50 #include "llvm/IR/Dominators.h" 51 #include "llvm/IR/Function.h" 52 #include "llvm/IR/IRBuilder.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/Intrinsics.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/User.h" 65 #include "llvm/IR/Value.h" 66 #include "llvm/IR/ValueHandle.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/CommandLine.h" 69 #include "llvm/Support/Compiler.h" 70 #include "llvm/Support/Debug.h" 71 #include "llvm/Support/MathExtras.h" 72 #include "llvm/Support/raw_ostream.h" 73 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 74 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 75 #include "llvm/Transforms/Utils/Local.h" 76 #include "llvm/Transforms/Utils/LoopUtils.h" 77 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 78 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 79 #include <cassert> 80 #include <cstdint> 81 #include <utility> 82 83 using namespace llvm; 84 using namespace PatternMatch; 85 86 #define DEBUG_TYPE "indvars" 87 88 STATISTIC(NumWidened , "Number of indvars widened"); 89 STATISTIC(NumReplaced , "Number of exit values replaced"); 90 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 91 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 92 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 93 94 static cl::opt<ReplaceExitVal> ReplaceExitValue( 95 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 96 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 97 cl::values( 98 clEnumValN(NeverRepl, "never", "never replace exit value"), 99 clEnumValN(OnlyCheapRepl, "cheap", 100 "only replace exit value when the cost is cheap"), 101 clEnumValN( 102 UnusedIndVarInLoop, "unusedindvarinloop", 103 "only replace exit value when it is an unused " 104 "induction variable in the loop and has cheap replacement cost"), 105 clEnumValN(NoHardUse, "noharduse", 106 "only replace exit values when loop def likely dead"), 107 clEnumValN(AlwaysRepl, "always", 108 "always replace exit value whenever possible"))); 109 110 static cl::opt<bool> UsePostIncrementRanges( 111 "indvars-post-increment-ranges", cl::Hidden, 112 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 113 cl::init(true)); 114 115 static cl::opt<bool> 116 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 117 cl::desc("Disable Linear Function Test Replace optimization")); 118 119 static cl::opt<bool> 120 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 121 cl::desc("Predicate conditions in read only loops")); 122 123 static cl::opt<bool> 124 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 125 cl::desc("Allow widening of indvars to eliminate s/zext")); 126 127 namespace { 128 129 class IndVarSimplify { 130 LoopInfo *LI; 131 ScalarEvolution *SE; 132 DominatorTree *DT; 133 const DataLayout &DL; 134 TargetLibraryInfo *TLI; 135 const TargetTransformInfo *TTI; 136 std::unique_ptr<MemorySSAUpdater> MSSAU; 137 138 SmallVector<WeakTrackingVH, 16> DeadInsts; 139 bool WidenIndVars; 140 141 bool RunUnswitching = false; 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 /// Try to improve our exit conditions by converting condition from signed 148 /// to unsigned or rotating computation out of the loop. 149 /// (See inline comment about why this is duplicated from simplifyAndExtend) 150 bool canonicalizeExitCondition(Loop *L); 151 /// Try to eliminate loop exits based on analyzeable exit counts 152 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 153 /// Try to form loop invariant tests for loop exits by changing how many 154 /// iterations of the loop run when that is unobservable. 155 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 156 157 bool rewriteFirstIterationLoopExitValues(Loop *L); 158 159 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 160 const SCEV *ExitCount, 161 PHINode *IndVar, SCEVExpander &Rewriter); 162 163 bool sinkUnusedInvariants(Loop *L); 164 165 public: 166 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 167 const DataLayout &DL, TargetLibraryInfo *TLI, 168 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 169 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 170 WidenIndVars(WidenIndVars) { 171 if (MSSA) 172 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 173 } 174 175 bool run(Loop *L); 176 177 bool runUnswitching() const { return RunUnswitching; } 178 }; 179 180 } // end anonymous namespace 181 182 //===----------------------------------------------------------------------===// 183 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 184 //===----------------------------------------------------------------------===// 185 186 /// Convert APF to an integer, if possible. 187 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 188 bool isExact = false; 189 // See if we can convert this to an int64_t 190 uint64_t UIntVal; 191 if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true, 192 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 193 !isExact) 194 return false; 195 IntVal = UIntVal; 196 return true; 197 } 198 199 /// If the loop has floating induction variable then insert corresponding 200 /// integer induction variable if possible. 201 /// For example, 202 /// for(double i = 0; i < 10000; ++i) 203 /// bar(i) 204 /// is converted into 205 /// for(int i = 0; i < 10000; ++i) 206 /// bar((double)i); 207 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 208 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 209 unsigned BackEdge = IncomingEdge^1; 210 211 // Check incoming value. 212 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 213 214 int64_t InitValue; 215 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 216 return false; 217 218 // Check IV increment. Reject this PN if increment operation is not 219 // an add or increment value can not be represented by an integer. 220 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 221 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 222 223 // If this is not an add of the PHI with a constantfp, or if the constant fp 224 // is not an integer, bail out. 225 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 226 int64_t IncValue; 227 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 228 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 229 return false; 230 231 // Check Incr uses. One user is PN and the other user is an exit condition 232 // used by the conditional terminator. 233 Value::user_iterator IncrUse = Incr->user_begin(); 234 Instruction *U1 = cast<Instruction>(*IncrUse++); 235 if (IncrUse == Incr->user_end()) return false; 236 Instruction *U2 = cast<Instruction>(*IncrUse++); 237 if (IncrUse != Incr->user_end()) return false; 238 239 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 240 // only used by a branch, we can't transform it. 241 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 242 if (!Compare) 243 Compare = dyn_cast<FCmpInst>(U2); 244 if (!Compare || !Compare->hasOneUse() || 245 !isa<BranchInst>(Compare->user_back())) 246 return false; 247 248 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 249 250 // We need to verify that the branch actually controls the iteration count 251 // of the loop. If not, the new IV can overflow and no one will notice. 252 // The branch block must be in the loop and one of the successors must be out 253 // of the loop. 254 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 255 if (!L->contains(TheBr->getParent()) || 256 (L->contains(TheBr->getSuccessor(0)) && 257 L->contains(TheBr->getSuccessor(1)))) 258 return false; 259 260 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 261 // transform it. 262 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 263 int64_t ExitValue; 264 if (ExitValueVal == nullptr || 265 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 266 return false; 267 268 // Find new predicate for integer comparison. 269 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 270 switch (Compare->getPredicate()) { 271 default: return false; // Unknown comparison. 272 case CmpInst::FCMP_OEQ: 273 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 274 case CmpInst::FCMP_ONE: 275 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 276 case CmpInst::FCMP_OGT: 277 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 278 case CmpInst::FCMP_OGE: 279 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 280 case CmpInst::FCMP_OLT: 281 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 282 case CmpInst::FCMP_OLE: 283 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 284 } 285 286 // We convert the floating point induction variable to a signed i32 value if 287 // we can. This is only safe if the comparison will not overflow in a way 288 // that won't be trapped by the integer equivalent operations. Check for this 289 // now. 290 // TODO: We could use i64 if it is native and the range requires it. 291 292 // The start/stride/exit values must all fit in signed i32. 293 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 294 return false; 295 296 // If not actually striding (add x, 0.0), avoid touching the code. 297 if (IncValue == 0) 298 return false; 299 300 // Positive and negative strides have different safety conditions. 301 if (IncValue > 0) { 302 // If we have a positive stride, we require the init to be less than the 303 // exit value. 304 if (InitValue >= ExitValue) 305 return false; 306 307 uint32_t Range = uint32_t(ExitValue-InitValue); 308 // Check for infinite loop, either: 309 // while (i <= Exit) or until (i > Exit) 310 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 311 if (++Range == 0) return false; // Range overflows. 312 } 313 314 unsigned Leftover = Range % uint32_t(IncValue); 315 316 // If this is an equality comparison, we require that the strided value 317 // exactly land on the exit value, otherwise the IV condition will wrap 318 // around and do things the fp IV wouldn't. 319 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 320 Leftover != 0) 321 return false; 322 323 // If the stride would wrap around the i32 before exiting, we can't 324 // transform the IV. 325 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 326 return false; 327 } else { 328 // If we have a negative stride, we require the init to be greater than the 329 // exit value. 330 if (InitValue <= ExitValue) 331 return false; 332 333 uint32_t Range = uint32_t(InitValue-ExitValue); 334 // Check for infinite loop, either: 335 // while (i >= Exit) or until (i < Exit) 336 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 337 if (++Range == 0) return false; // Range overflows. 338 } 339 340 unsigned Leftover = Range % uint32_t(-IncValue); 341 342 // If this is an equality comparison, we require that the strided value 343 // exactly land on the exit value, otherwise the IV condition will wrap 344 // around and do things the fp IV wouldn't. 345 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 346 Leftover != 0) 347 return false; 348 349 // If the stride would wrap around the i32 before exiting, we can't 350 // transform the IV. 351 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 352 return false; 353 } 354 355 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 356 357 // Insert new integer induction variable. 358 PHINode *NewPHI = 359 PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator()); 360 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 361 PN->getIncomingBlock(IncomingEdge)); 362 363 Value *NewAdd = 364 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 365 Incr->getName() + ".int", Incr->getIterator()); 366 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 367 368 ICmpInst *NewCompare = 369 new ICmpInst(TheBr->getIterator(), NewPred, NewAdd, 370 ConstantInt::get(Int32Ty, ExitValue), Compare->getName()); 371 372 // In the following deletions, PN may become dead and may be deleted. 373 // Use a WeakTrackingVH to observe whether this happens. 374 WeakTrackingVH WeakPH = PN; 375 376 // Delete the old floating point exit comparison. The branch starts using the 377 // new comparison. 378 NewCompare->takeName(Compare); 379 Compare->replaceAllUsesWith(NewCompare); 380 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 381 382 // Delete the old floating point increment. 383 Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType())); 384 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 385 386 // If the FP induction variable still has uses, this is because something else 387 // in the loop uses its value. In order to canonicalize the induction 388 // variable, we chose to eliminate the IV and rewrite it in terms of an 389 // int->fp cast. 390 // 391 // We give preference to sitofp over uitofp because it is faster on most 392 // platforms. 393 if (WeakPH) { 394 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 395 PN->getParent()->getFirstInsertionPt()); 396 PN->replaceAllUsesWith(Conv); 397 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 398 } 399 return true; 400 } 401 402 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 403 // First step. Check to see if there are any floating-point recurrences. 404 // If there are, change them into integer recurrences, permitting analysis by 405 // the SCEV routines. 406 BasicBlock *Header = L->getHeader(); 407 408 SmallVector<WeakTrackingVH, 8> PHIs; 409 for (PHINode &PN : Header->phis()) 410 PHIs.push_back(&PN); 411 412 bool Changed = false; 413 for (WeakTrackingVH &PHI : PHIs) 414 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI)) 415 Changed |= handleFloatingPointIV(L, PN); 416 417 // If the loop previously had floating-point IV, ScalarEvolution 418 // may not have been able to compute a trip count. Now that we've done some 419 // re-writing, the trip count may be computable. 420 if (Changed) 421 SE->forgetLoop(L); 422 return Changed; 423 } 424 425 //===---------------------------------------------------------------------===// 426 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 427 // they will exit at the first iteration. 428 //===---------------------------------------------------------------------===// 429 430 /// Check to see if this loop has loop invariant conditions which lead to loop 431 /// exits. If so, we know that if the exit path is taken, it is at the first 432 /// loop iteration. This lets us predict exit values of PHI nodes that live in 433 /// loop header. 434 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 435 // Verify the input to the pass is already in LCSSA form. 436 assert(L->isLCSSAForm(*DT)); 437 438 SmallVector<BasicBlock *, 8> ExitBlocks; 439 L->getUniqueExitBlocks(ExitBlocks); 440 441 bool MadeAnyChanges = false; 442 for (auto *ExitBB : ExitBlocks) { 443 // If there are no more PHI nodes in this exit block, then no more 444 // values defined inside the loop are used on this path. 445 for (PHINode &PN : ExitBB->phis()) { 446 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 447 IncomingValIdx != E; ++IncomingValIdx) { 448 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 449 450 // Can we prove that the exit must run on the first iteration if it 451 // runs at all? (i.e. early exits are fine for our purposes, but 452 // traces which lead to this exit being taken on the 2nd iteration 453 // aren't.) Note that this is about whether the exit branch is 454 // executed, not about whether it is taken. 455 if (!L->getLoopLatch() || 456 !DT->dominates(IncomingBB, L->getLoopLatch())) 457 continue; 458 459 // Get condition that leads to the exit path. 460 auto *TermInst = IncomingBB->getTerminator(); 461 462 Value *Cond = nullptr; 463 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 464 // Must be a conditional branch, otherwise the block 465 // should not be in the loop. 466 Cond = BI->getCondition(); 467 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 468 Cond = SI->getCondition(); 469 else 470 continue; 471 472 if (!L->isLoopInvariant(Cond)) 473 continue; 474 475 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 476 477 // Only deal with PHIs in the loop header. 478 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 479 continue; 480 481 // If ExitVal is a PHI on the loop header, then we know its 482 // value along this exit because the exit can only be taken 483 // on the first iteration. 484 auto *LoopPreheader = L->getLoopPreheader(); 485 assert(LoopPreheader && "Invalid loop"); 486 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 487 if (PreheaderIdx != -1) { 488 assert(ExitVal->getParent() == L->getHeader() && 489 "ExitVal must be in loop header"); 490 MadeAnyChanges = true; 491 PN.setIncomingValue(IncomingValIdx, 492 ExitVal->getIncomingValue(PreheaderIdx)); 493 SE->forgetValue(&PN); 494 } 495 } 496 } 497 } 498 return MadeAnyChanges; 499 } 500 501 //===----------------------------------------------------------------------===// 502 // IV Widening - Extend the width of an IV to cover its widest uses. 503 //===----------------------------------------------------------------------===// 504 505 /// Update information about the induction variable that is extended by this 506 /// sign or zero extend operation. This is used to determine the final width of 507 /// the IV before actually widening it. 508 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 509 ScalarEvolution *SE, 510 const TargetTransformInfo *TTI) { 511 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 512 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 513 return; 514 515 Type *Ty = Cast->getType(); 516 uint64_t Width = SE->getTypeSizeInBits(Ty); 517 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 518 return; 519 520 // Check that `Cast` actually extends the induction variable (we rely on this 521 // later). This takes care of cases where `Cast` is extending a truncation of 522 // the narrow induction variable, and thus can end up being narrower than the 523 // "narrow" induction variable. 524 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 525 if (NarrowIVWidth >= Width) 526 return; 527 528 // Cast is either an sext or zext up to this point. 529 // We should not widen an indvar if arithmetics on the wider indvar are more 530 // expensive than those on the narrower indvar. We check only the cost of ADD 531 // because at least an ADD is required to increment the induction variable. We 532 // could compute more comprehensively the cost of all instructions on the 533 // induction variable when necessary. 534 if (TTI && 535 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 536 TTI->getArithmeticInstrCost(Instruction::Add, 537 Cast->getOperand(0)->getType())) { 538 return; 539 } 540 541 if (!WI.WidestNativeType || 542 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 543 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 544 WI.IsSigned = IsSigned; 545 return; 546 } 547 548 // We extend the IV to satisfy the sign of its user(s), or 'signed' 549 // if there are multiple users with both sign- and zero extensions, 550 // in order not to introduce nondeterministic behaviour based on the 551 // unspecified order of a PHI nodes' users-iterator. 552 WI.IsSigned |= IsSigned; 553 } 554 555 //===----------------------------------------------------------------------===// 556 // Live IV Reduction - Minimize IVs live across the loop. 557 //===----------------------------------------------------------------------===// 558 559 //===----------------------------------------------------------------------===// 560 // Simplification of IV users based on SCEV evaluation. 561 //===----------------------------------------------------------------------===// 562 563 namespace { 564 565 class IndVarSimplifyVisitor : public IVVisitor { 566 ScalarEvolution *SE; 567 const TargetTransformInfo *TTI; 568 PHINode *IVPhi; 569 570 public: 571 WideIVInfo WI; 572 573 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 574 const TargetTransformInfo *TTI, 575 const DominatorTree *DTree) 576 : SE(SCEV), TTI(TTI), IVPhi(IV) { 577 DT = DTree; 578 WI.NarrowIV = IVPhi; 579 } 580 581 // Implement the interface used by simplifyUsersOfIV. 582 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 583 }; 584 585 } // end anonymous namespace 586 587 /// Iteratively perform simplification on a worklist of IV users. Each 588 /// successive simplification may push more users which may themselves be 589 /// candidates for simplification. 590 /// 591 /// Sign/Zero extend elimination is interleaved with IV simplification. 592 bool IndVarSimplify::simplifyAndExtend(Loop *L, 593 SCEVExpander &Rewriter, 594 LoopInfo *LI) { 595 SmallVector<WideIVInfo, 8> WideIVs; 596 597 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 598 Intrinsic::getName(Intrinsic::experimental_guard)); 599 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 600 601 SmallVector<PHINode *, 8> LoopPhis; 602 for (PHINode &PN : L->getHeader()->phis()) 603 LoopPhis.push_back(&PN); 604 605 // Each round of simplification iterates through the SimplifyIVUsers worklist 606 // for all current phis, then determines whether any IVs can be 607 // widened. Widening adds new phis to LoopPhis, inducing another round of 608 // simplification on the wide IVs. 609 bool Changed = false; 610 while (!LoopPhis.empty()) { 611 // Evaluate as many IV expressions as possible before widening any IVs. This 612 // forces SCEV to set no-wrap flags before evaluating sign/zero 613 // extension. The first time SCEV attempts to normalize sign/zero extension, 614 // the result becomes final. So for the most predictable results, we delay 615 // evaluation of sign/zero extend evaluation until needed, and avoid running 616 // other SCEV based analysis prior to simplifyAndExtend. 617 do { 618 PHINode *CurrIV = LoopPhis.pop_back_val(); 619 620 // Information about sign/zero extensions of CurrIV. 621 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 622 623 const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, 624 Rewriter, &Visitor); 625 626 Changed |= C; 627 RunUnswitching |= U; 628 if (Visitor.WI.WidestNativeType) { 629 WideIVs.push_back(Visitor.WI); 630 } 631 } while(!LoopPhis.empty()); 632 633 // Continue if we disallowed widening. 634 if (!WidenIndVars) 635 continue; 636 637 for (; !WideIVs.empty(); WideIVs.pop_back()) { 638 unsigned ElimExt; 639 unsigned Widened; 640 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 641 DT, DeadInsts, ElimExt, Widened, 642 HasGuards, UsePostIncrementRanges)) { 643 NumElimExt += ElimExt; 644 NumWidened += Widened; 645 Changed = true; 646 LoopPhis.push_back(WidePhi); 647 } 648 } 649 } 650 return Changed; 651 } 652 653 //===----------------------------------------------------------------------===// 654 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 655 //===----------------------------------------------------------------------===// 656 657 /// Given an Value which is hoped to be part of an add recurance in the given 658 /// loop, return the associated Phi node if so. Otherwise, return null. Note 659 /// that this is less general than SCEVs AddRec checking. 660 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 661 Instruction *IncI = dyn_cast<Instruction>(IncV); 662 if (!IncI) 663 return nullptr; 664 665 switch (IncI->getOpcode()) { 666 case Instruction::Add: 667 case Instruction::Sub: 668 break; 669 case Instruction::GetElementPtr: 670 // An IV counter must preserve its type. 671 if (IncI->getNumOperands() == 2) 672 break; 673 [[fallthrough]]; 674 default: 675 return nullptr; 676 } 677 678 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 679 if (Phi && Phi->getParent() == L->getHeader()) { 680 if (L->isLoopInvariant(IncI->getOperand(1))) 681 return Phi; 682 return nullptr; 683 } 684 if (IncI->getOpcode() == Instruction::GetElementPtr) 685 return nullptr; 686 687 // Allow add/sub to be commuted. 688 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 689 if (Phi && Phi->getParent() == L->getHeader()) { 690 if (L->isLoopInvariant(IncI->getOperand(0))) 691 return Phi; 692 } 693 return nullptr; 694 } 695 696 /// Whether the current loop exit test is based on this value. Currently this 697 /// is limited to a direct use in the loop condition. 698 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 699 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 700 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 701 // TODO: Allow non-icmp loop test. 702 if (!ICmp) 703 return false; 704 705 // TODO: Allow indirect use. 706 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 707 } 708 709 /// linearFunctionTestReplace policy. Return true unless we can show that the 710 /// current exit test is already sufficiently canonical. 711 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 712 assert(L->getLoopLatch() && "Must be in simplified form"); 713 714 // Avoid converting a constant or loop invariant test back to a runtime 715 // test. This is critical for when SCEV's cached ExitCount is less precise 716 // than the current IR (such as after we've proven a particular exit is 717 // actually dead and thus the BE count never reaches our ExitCount.) 718 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 719 if (L->isLoopInvariant(BI->getCondition())) 720 return false; 721 722 // Do LFTR to simplify the exit condition to an ICMP. 723 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 724 if (!Cond) 725 return true; 726 727 // Do LFTR to simplify the exit ICMP to EQ/NE 728 ICmpInst::Predicate Pred = Cond->getPredicate(); 729 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 730 return true; 731 732 // Look for a loop invariant RHS 733 Value *LHS = Cond->getOperand(0); 734 Value *RHS = Cond->getOperand(1); 735 if (!L->isLoopInvariant(RHS)) { 736 if (!L->isLoopInvariant(LHS)) 737 return true; 738 std::swap(LHS, RHS); 739 } 740 // Look for a simple IV counter LHS 741 PHINode *Phi = dyn_cast<PHINode>(LHS); 742 if (!Phi) 743 Phi = getLoopPhiForCounter(LHS, L); 744 745 if (!Phi) 746 return true; 747 748 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 749 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 750 if (Idx < 0) 751 return true; 752 753 // Do LFTR if the exit condition's IV is *not* a simple counter. 754 Value *IncV = Phi->getIncomingValue(Idx); 755 return Phi != getLoopPhiForCounter(IncV, L); 756 } 757 758 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 759 /// down to checking that all operands are constant and listing instructions 760 /// that may hide undef. 761 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 762 unsigned Depth) { 763 if (isa<Constant>(V)) 764 return !isa<UndefValue>(V); 765 766 if (Depth >= 6) 767 return false; 768 769 // Conservatively handle non-constant non-instructions. For example, Arguments 770 // may be undef. 771 Instruction *I = dyn_cast<Instruction>(V); 772 if (!I) 773 return false; 774 775 // Load and return values may be undef. 776 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 777 return false; 778 779 // Optimistically handle other instructions. 780 for (Value *Op : I->operands()) { 781 if (!Visited.insert(Op).second) 782 continue; 783 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 784 return false; 785 } 786 return true; 787 } 788 789 /// Return true if the given value is concrete. We must prove that undef can 790 /// never reach it. 791 /// 792 /// TODO: If we decide that this is a good approach to checking for undef, we 793 /// may factor it into a common location. 794 static bool hasConcreteDef(Value *V) { 795 SmallPtrSet<Value*, 8> Visited; 796 Visited.insert(V); 797 return hasConcreteDefImpl(V, Visited, 0); 798 } 799 800 /// Return true if the given phi is a "counter" in L. A counter is an 801 /// add recurance (of integer or pointer type) with an arbitrary start, and a 802 /// step of 1. Note that L must have exactly one latch. 803 static bool isLoopCounter(PHINode* Phi, Loop *L, 804 ScalarEvolution *SE) { 805 assert(Phi->getParent() == L->getHeader()); 806 assert(L->getLoopLatch()); 807 808 if (!SE->isSCEVable(Phi->getType())) 809 return false; 810 811 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 812 if (!AR || AR->getLoop() != L || !AR->isAffine()) 813 return false; 814 815 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 816 if (!Step || !Step->isOne()) 817 return false; 818 819 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 820 Value *IncV = Phi->getIncomingValue(LatchIdx); 821 return (getLoopPhiForCounter(IncV, L) == Phi && 822 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 823 } 824 825 /// Search the loop header for a loop counter (anadd rec w/step of one) 826 /// suitable for use by LFTR. If multiple counters are available, select the 827 /// "best" one based profitable heuristics. 828 /// 829 /// BECount may be an i8* pointer type. The pointer difference is already 830 /// valid count without scaling the address stride, so it remains a pointer 831 /// expression as far as SCEV is concerned. 832 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 833 const SCEV *BECount, 834 ScalarEvolution *SE, DominatorTree *DT) { 835 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 836 837 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 838 839 // Loop over all of the PHI nodes, looking for a simple counter. 840 PHINode *BestPhi = nullptr; 841 const SCEV *BestInit = nullptr; 842 BasicBlock *LatchBlock = L->getLoopLatch(); 843 assert(LatchBlock && "Must be in simplified form"); 844 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 845 846 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 847 PHINode *Phi = cast<PHINode>(I); 848 if (!isLoopCounter(Phi, L, SE)) 849 continue; 850 851 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 852 853 // AR may be a pointer type, while BECount is an integer type. 854 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 855 // AR may not be a narrower type, or we may never exit. 856 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 857 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 858 continue; 859 860 // Avoid reusing a potentially undef value to compute other values that may 861 // have originally had a concrete definition. 862 if (!hasConcreteDef(Phi)) { 863 // We explicitly allow unknown phis as long as they are already used by 864 // the loop exit test. This is legal since performing LFTR could not 865 // increase the number of undef users. 866 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 867 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 868 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 869 continue; 870 } 871 872 // Avoid introducing undefined behavior due to poison which didn't exist in 873 // the original program. (Annoyingly, the rules for poison and undef 874 // propagation are distinct, so this does NOT cover the undef case above.) 875 // We have to ensure that we don't introduce UB by introducing a use on an 876 // iteration where said IV produces poison. Our strategy here differs for 877 // pointers and integer IVs. For integers, we strip and reinfer as needed, 878 // see code in linearFunctionTestReplace. For pointers, we restrict 879 // transforms as there is no good way to reinfer inbounds once lost. 880 if (!Phi->getType()->isIntegerTy() && 881 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 882 continue; 883 884 const SCEV *Init = AR->getStart(); 885 886 if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { 887 // Don't force a live loop counter if another IV can be used. 888 if (isAlmostDeadIV(Phi, LatchBlock, Cond)) 889 continue; 890 891 // Prefer to count-from-zero. This is a more "canonical" counter form. It 892 // also prefers integer to pointer IVs. 893 if (BestInit->isZero() != Init->isZero()) { 894 if (BestInit->isZero()) 895 continue; 896 } 897 // If two IVs both count from zero or both count from nonzero then the 898 // narrower is likely a dead phi that has been widened. Use the wider phi 899 // to allow the other to be eliminated. 900 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 901 continue; 902 } 903 BestPhi = Phi; 904 BestInit = Init; 905 } 906 return BestPhi; 907 } 908 909 /// Insert an IR expression which computes the value held by the IV IndVar 910 /// (which must be an loop counter w/unit stride) after the backedge of loop L 911 /// is taken ExitCount times. 912 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 913 const SCEV *ExitCount, bool UsePostInc, Loop *L, 914 SCEVExpander &Rewriter, ScalarEvolution *SE) { 915 assert(isLoopCounter(IndVar, L, SE)); 916 assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); 917 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 918 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 919 920 // For integer IVs, truncate the IV before computing the limit unless we 921 // know apriori that the limit must be a constant when evaluated in the 922 // bitwidth of the IV. We prefer (potentially) keeping a truncate of the 923 // IV in the loop over a (potentially) expensive expansion of the widened 924 // exit count add(zext(add)) expression. 925 if (IndVar->getType()->isIntegerTy() && 926 SE->getTypeSizeInBits(AR->getType()) > 927 SE->getTypeSizeInBits(ExitCount->getType())) { 928 const SCEV *IVInit = AR->getStart(); 929 if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) 930 AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType())); 931 } 932 933 const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; 934 const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); 935 assert(SE->isLoopInvariant(IVLimit, L) && 936 "Computed iteration count is not loop invariant!"); 937 return Rewriter.expandCodeFor(IVLimit, ARBase->getType(), 938 ExitingBB->getTerminator()); 939 } 940 941 /// This method rewrites the exit condition of the loop to be a canonical != 942 /// comparison against the incremented loop induction variable. This pass is 943 /// able to rewrite the exit tests of any loop where the SCEV analysis can 944 /// determine a loop-invariant trip count of the loop, which is actually a much 945 /// broader range than just linear tests. 946 bool IndVarSimplify:: 947 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 948 const SCEV *ExitCount, 949 PHINode *IndVar, SCEVExpander &Rewriter) { 950 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 951 assert(isLoopCounter(IndVar, L, SE)); 952 Instruction * const IncVar = 953 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 954 955 // Initialize CmpIndVar to the preincremented IV. 956 Value *CmpIndVar = IndVar; 957 bool UsePostInc = false; 958 959 // If the exiting block is the same as the backedge block, we prefer to 960 // compare against the post-incremented value, otherwise we must compare 961 // against the preincremented value. 962 if (ExitingBB == L->getLoopLatch()) { 963 // For pointer IVs, we chose to not strip inbounds which requires us not 964 // to add a potentially UB introducing use. We need to either a) show 965 // the loop test we're modifying is already in post-inc form, or b) show 966 // that adding a use must not introduce UB. 967 bool SafeToPostInc = 968 IndVar->getType()->isIntegerTy() || 969 isLoopExitTestBasedOn(IncVar, ExitingBB) || 970 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 971 if (SafeToPostInc) { 972 UsePostInc = true; 973 CmpIndVar = IncVar; 974 } 975 } 976 977 // It may be necessary to drop nowrap flags on the incrementing instruction 978 // if either LFTR moves from a pre-inc check to a post-inc check (in which 979 // case the increment might have previously been poison on the last iteration 980 // only) or if LFTR switches to a different IV that was previously dynamically 981 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 982 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 983 // check), because the pre-inc addrec flags may be adopted from the original 984 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 985 // TODO: This handling is inaccurate for one case: If we switch to a 986 // dynamically dead IV that wraps on the first loop iteration only, which is 987 // not covered by the post-inc addrec. (If the new IV was not dynamically 988 // dead, it could not be poison on the first iteration in the first place.) 989 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 990 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 991 if (BO->hasNoUnsignedWrap()) 992 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 993 if (BO->hasNoSignedWrap()) 994 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 995 } 996 997 Value *ExitCnt = genLoopLimit( 998 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 999 assert(ExitCnt->getType()->isPointerTy() == 1000 IndVar->getType()->isPointerTy() && 1001 "genLoopLimit missed a cast"); 1002 1003 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1004 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1005 ICmpInst::Predicate P; 1006 if (L->contains(BI->getSuccessor(0))) 1007 P = ICmpInst::ICMP_NE; 1008 else 1009 P = ICmpInst::ICMP_EQ; 1010 1011 IRBuilder<> Builder(BI); 1012 1013 // The new loop exit condition should reuse the debug location of the 1014 // original loop exit condition. 1015 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1016 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1017 1018 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1019 // avoid the expensive expansion of the limit expression in the wider type, 1020 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1021 // since we know (from the exit count bitwidth), that we can't self-wrap in 1022 // the narrower type. 1023 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1024 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1025 if (CmpIndVarSize > ExitCntSize) { 1026 assert(!CmpIndVar->getType()->isPointerTy() && 1027 !ExitCnt->getType()->isPointerTy()); 1028 1029 // Before resorting to actually inserting the truncate, use the same 1030 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1031 // the other side of the comparison instead. We still evaluate the limit 1032 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1033 // a truncate within in. 1034 bool Extended = false; 1035 const SCEV *IV = SE->getSCEV(CmpIndVar); 1036 const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); 1037 const SCEV *ZExtTrunc = 1038 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1039 1040 if (ZExtTrunc == IV) { 1041 Extended = true; 1042 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1043 "wide.trip.count"); 1044 } else { 1045 const SCEV *SExtTrunc = 1046 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1047 if (SExtTrunc == IV) { 1048 Extended = true; 1049 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1050 "wide.trip.count"); 1051 } 1052 } 1053 1054 if (Extended) { 1055 bool Discard; 1056 L->makeLoopInvariant(ExitCnt, Discard); 1057 } else 1058 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1059 "lftr.wideiv"); 1060 } 1061 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1062 << " LHS:" << *CmpIndVar << '\n' 1063 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1064 << "\n" 1065 << " RHS:\t" << *ExitCnt << "\n" 1066 << "ExitCount:\t" << *ExitCount << "\n" 1067 << " was: " << *BI->getCondition() << "\n"); 1068 1069 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1070 Value *OrigCond = BI->getCondition(); 1071 // It's tempting to use replaceAllUsesWith here to fully replace the old 1072 // comparison, but that's not immediately safe, since users of the old 1073 // comparison may not be dominated by the new comparison. Instead, just 1074 // update the branch to use the new comparison; in the common case this 1075 // will make old comparison dead. 1076 BI->setCondition(Cond); 1077 DeadInsts.emplace_back(OrigCond); 1078 1079 ++NumLFTR; 1080 return true; 1081 } 1082 1083 //===----------------------------------------------------------------------===// 1084 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1085 //===----------------------------------------------------------------------===// 1086 1087 /// If there's a single exit block, sink any loop-invariant values that 1088 /// were defined in the preheader but not used inside the loop into the 1089 /// exit block to reduce register pressure in the loop. 1090 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1091 BasicBlock *ExitBlock = L->getExitBlock(); 1092 if (!ExitBlock) return false; 1093 1094 BasicBlock *Preheader = L->getLoopPreheader(); 1095 if (!Preheader) return false; 1096 1097 bool MadeAnyChanges = false; 1098 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1099 BasicBlock::iterator I(Preheader->getTerminator()); 1100 while (I != Preheader->begin()) { 1101 --I; 1102 // New instructions were inserted at the end of the preheader. 1103 if (isa<PHINode>(I)) 1104 break; 1105 1106 // Don't move instructions which might have side effects, since the side 1107 // effects need to complete before instructions inside the loop. Also don't 1108 // move instructions which might read memory, since the loop may modify 1109 // memory. Note that it's okay if the instruction might have undefined 1110 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1111 // block. 1112 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1113 continue; 1114 1115 // Skip debug info intrinsics. 1116 if (isa<DbgInfoIntrinsic>(I)) 1117 continue; 1118 1119 // Skip eh pad instructions. 1120 if (I->isEHPad()) 1121 continue; 1122 1123 // Don't sink alloca: we never want to sink static alloca's out of the 1124 // entry block, and correctly sinking dynamic alloca's requires 1125 // checks for stacksave/stackrestore intrinsics. 1126 // FIXME: Refactor this check somehow? 1127 if (isa<AllocaInst>(I)) 1128 continue; 1129 1130 // Determine if there is a use in or before the loop (direct or 1131 // otherwise). 1132 bool UsedInLoop = false; 1133 for (Use &U : I->uses()) { 1134 Instruction *User = cast<Instruction>(U.getUser()); 1135 BasicBlock *UseBB = User->getParent(); 1136 if (PHINode *P = dyn_cast<PHINode>(User)) { 1137 unsigned i = 1138 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1139 UseBB = P->getIncomingBlock(i); 1140 } 1141 if (UseBB == Preheader || L->contains(UseBB)) { 1142 UsedInLoop = true; 1143 break; 1144 } 1145 } 1146 1147 // If there is, the def must remain in the preheader. 1148 if (UsedInLoop) 1149 continue; 1150 1151 // Otherwise, sink it to the exit block. 1152 Instruction *ToMove = &*I; 1153 bool Done = false; 1154 1155 if (I != Preheader->begin()) { 1156 // Skip debug info intrinsics. 1157 do { 1158 --I; 1159 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1160 1161 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1162 Done = true; 1163 } else { 1164 Done = true; 1165 } 1166 1167 MadeAnyChanges = true; 1168 ToMove->moveBefore(*ExitBlock, InsertPt); 1169 SE->forgetValue(ToMove); 1170 if (Done) break; 1171 InsertPt = ToMove->getIterator(); 1172 } 1173 1174 return MadeAnyChanges; 1175 } 1176 1177 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1178 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1179 auto *OldCond = BI->getCondition(); 1180 LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI 1181 << " with " << *NewCond << "\n"); 1182 BI->setCondition(NewCond); 1183 if (OldCond->use_empty()) 1184 DeadInsts.emplace_back(OldCond); 1185 } 1186 1187 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB, 1188 bool IsTaken) { 1189 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1190 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1191 auto *OldCond = BI->getCondition(); 1192 return ConstantInt::get(OldCond->getType(), 1193 IsTaken ? ExitIfTrue : !ExitIfTrue); 1194 } 1195 1196 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1197 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1198 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1199 auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken); 1200 replaceExitCond(BI, NewCond, DeadInsts); 1201 } 1202 1203 static void replaceLoopPHINodesWithPreheaderValues( 1204 LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1205 ScalarEvolution &SE) { 1206 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1207 auto *LoopPreheader = L->getLoopPreheader(); 1208 auto *LoopHeader = L->getHeader(); 1209 SmallVector<Instruction *> Worklist; 1210 for (auto &PN : LoopHeader->phis()) { 1211 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1212 for (User *U : PN.users()) 1213 Worklist.push_back(cast<Instruction>(U)); 1214 SE.forgetValue(&PN); 1215 PN.replaceAllUsesWith(PreheaderIncoming); 1216 DeadInsts.emplace_back(&PN); 1217 } 1218 1219 // Replacing with the preheader value will often allow IV users to simplify 1220 // (especially if the preheader value is a constant). 1221 SmallPtrSet<Instruction *, 16> Visited; 1222 while (!Worklist.empty()) { 1223 auto *I = cast<Instruction>(Worklist.pop_back_val()); 1224 if (!Visited.insert(I).second) 1225 continue; 1226 1227 // Don't simplify instructions outside the loop. 1228 if (!L->contains(I)) 1229 continue; 1230 1231 Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout()); 1232 if (Res && LI->replacementPreservesLCSSAForm(I, Res)) { 1233 for (User *U : I->users()) 1234 Worklist.push_back(cast<Instruction>(U)); 1235 I->replaceAllUsesWith(Res); 1236 DeadInsts.emplace_back(I); 1237 } 1238 } 1239 } 1240 1241 static Value * 1242 createInvariantCond(const Loop *L, BasicBlock *ExitingBB, 1243 const ScalarEvolution::LoopInvariantPredicate &LIP, 1244 SCEVExpander &Rewriter) { 1245 ICmpInst::Predicate InvariantPred = LIP.Pred; 1246 BasicBlock *Preheader = L->getLoopPreheader(); 1247 assert(Preheader && "Preheader doesn't exist"); 1248 Rewriter.setInsertPoint(Preheader->getTerminator()); 1249 auto *LHSV = Rewriter.expandCodeFor(LIP.LHS); 1250 auto *RHSV = Rewriter.expandCodeFor(LIP.RHS); 1251 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1252 if (ExitIfTrue) 1253 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1254 IRBuilder<> Builder(Preheader->getTerminator()); 1255 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1256 return Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1257 BI->getCondition()->getName()); 1258 } 1259 1260 static std::optional<Value *> 1261 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB, 1262 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1263 ScalarEvolution *SE, SCEVExpander &Rewriter) { 1264 ICmpInst::Predicate Pred = ICmp->getPredicate(); 1265 Value *LHS = ICmp->getOperand(0); 1266 Value *RHS = ICmp->getOperand(1); 1267 1268 // 'LHS pred RHS' should now mean that we stay in loop. 1269 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1270 if (Inverted) 1271 Pred = CmpInst::getInversePredicate(Pred); 1272 1273 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1274 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1275 // Can we prove it to be trivially true or false? 1276 if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI)) 1277 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV); 1278 1279 auto *ARTy = LHSS->getType(); 1280 auto *MaxIterTy = MaxIter->getType(); 1281 // If possible, adjust types. 1282 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1283 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1284 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1285 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1286 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1287 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1288 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1289 } 1290 1291 if (SkipLastIter) { 1292 // Semantically skip last iter is "subtract 1, do not bother about unsigned 1293 // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal 1294 // with umin in a smart way, but umin(a, b) - 1 will likely not simplify. 1295 // So we manually construct umin(a - 1, b - 1). 1296 SmallVector<const SCEV *, 4> Elements; 1297 if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) { 1298 for (auto *Op : UMin->operands()) 1299 Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType()))); 1300 MaxIter = SE->getUMinFromMismatchedTypes(Elements); 1301 } else 1302 MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType())); 1303 } 1304 1305 // Check if there is a loop-invariant predicate equivalent to our check. 1306 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1307 L, BI, MaxIter); 1308 if (!LIP) 1309 return std::nullopt; 1310 1311 // Can we prove it to be trivially true? 1312 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1313 return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false); 1314 else 1315 return createInvariantCond(L, ExitingBB, *LIP, Rewriter); 1316 } 1317 1318 static bool optimizeLoopExitWithUnknownExitCount( 1319 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter, 1320 bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter, 1321 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1322 assert( 1323 (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) && 1324 "Not a loop exit!"); 1325 1326 // For branch that stays in loop by TRUE condition, go through AND. For branch 1327 // that stays in loop by FALSE condition, go through OR. Both gives the 1328 // similar logic: "stay in loop iff all conditions are true(false)". 1329 bool Inverted = L->contains(BI->getSuccessor(1)); 1330 SmallVector<ICmpInst *, 4> LeafConditions; 1331 SmallVector<Value *, 4> Worklist; 1332 SmallPtrSet<Value *, 4> Visited; 1333 Value *OldCond = BI->getCondition(); 1334 Visited.insert(OldCond); 1335 Worklist.push_back(OldCond); 1336 1337 auto GoThrough = [&](Value *V) { 1338 Value *LHS = nullptr, *RHS = nullptr; 1339 if (Inverted) { 1340 if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) 1341 return false; 1342 } else { 1343 if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) 1344 return false; 1345 } 1346 if (Visited.insert(LHS).second) 1347 Worklist.push_back(LHS); 1348 if (Visited.insert(RHS).second) 1349 Worklist.push_back(RHS); 1350 return true; 1351 }; 1352 1353 do { 1354 Value *Curr = Worklist.pop_back_val(); 1355 // Go through AND/OR conditions. Collect leaf ICMPs. We only care about 1356 // those with one use, to avoid instruction duplication. 1357 if (Curr->hasOneUse()) 1358 if (!GoThrough(Curr)) 1359 if (auto *ICmp = dyn_cast<ICmpInst>(Curr)) 1360 LeafConditions.push_back(ICmp); 1361 } while (!Worklist.empty()); 1362 1363 // If the current basic block has the same exit count as the whole loop, and 1364 // it consists of multiple icmp's, try to collect all icmp's that give exact 1365 // same exit count. For all other icmp's, we could use one less iteration, 1366 // because their value on the last iteration doesn't really matter. 1367 SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter; 1368 if (!SkipLastIter && LeafConditions.size() > 1 && 1369 SE->getExitCount(L, ExitingBB, 1370 ScalarEvolution::ExitCountKind::SymbolicMaximum) == 1371 MaxIter) 1372 for (auto *ICmp : LeafConditions) { 1373 auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted, 1374 /*ControlsExit*/ false); 1375 auto *ExitMax = EL.SymbolicMaxNotTaken; 1376 if (isa<SCEVCouldNotCompute>(ExitMax)) 1377 continue; 1378 // They could be of different types (specifically this happens after 1379 // IV widening). 1380 auto *WiderType = 1381 SE->getWiderType(ExitMax->getType(), MaxIter->getType()); 1382 auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType); 1383 auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType); 1384 if (WideExitMax == WideMaxIter) 1385 ICmpsFailingOnLastIter.insert(ICmp); 1386 } 1387 1388 bool Changed = false; 1389 for (auto *OldCond : LeafConditions) { 1390 // Skip last iteration for this icmp under one of two conditions: 1391 // - We do it for all conditions; 1392 // - There is another ICmp that would fail on last iter, so this one doesn't 1393 // really matter. 1394 bool OptimisticSkipLastIter = SkipLastIter; 1395 if (!OptimisticSkipLastIter) { 1396 if (ICmpsFailingOnLastIter.size() > 1) 1397 OptimisticSkipLastIter = true; 1398 else if (ICmpsFailingOnLastIter.size() == 1) 1399 OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond); 1400 } 1401 if (auto Replaced = 1402 createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted, 1403 OptimisticSkipLastIter, SE, Rewriter)) { 1404 Changed = true; 1405 auto *NewCond = *Replaced; 1406 if (auto *NCI = dyn_cast<Instruction>(NewCond)) { 1407 NCI->setName(OldCond->getName() + ".first_iter"); 1408 } 1409 LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond 1410 << " with " << *NewCond << "\n"); 1411 assert(OldCond->hasOneUse() && "Must be!"); 1412 OldCond->replaceAllUsesWith(NewCond); 1413 DeadInsts.push_back(OldCond); 1414 // Make sure we no longer consider this condition as failing on last 1415 // iteration. 1416 ICmpsFailingOnLastIter.erase(OldCond); 1417 } 1418 } 1419 return Changed; 1420 } 1421 1422 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1423 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1424 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1425 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1426 // it already has flags. The alternative to this would be to extending the 1427 // set of "interesting" IV users to include the icmp, but doing that 1428 // regresses results in practice by querying SCEVs before trip counts which 1429 // rely on them which results in SCEV caching sub-optimal answers. The 1430 // concern about caching sub-optimal results is why we only query SCEVs of 1431 // the loop invariant RHS here. 1432 SmallVector<BasicBlock*, 16> ExitingBlocks; 1433 L->getExitingBlocks(ExitingBlocks); 1434 bool Changed = false; 1435 for (auto *ExitingBB : ExitingBlocks) { 1436 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1437 if (!BI) 1438 continue; 1439 assert(BI->isConditional() && "exit branch must be conditional"); 1440 1441 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1442 if (!ICmp || !ICmp->hasOneUse()) 1443 continue; 1444 1445 auto *LHS = ICmp->getOperand(0); 1446 auto *RHS = ICmp->getOperand(1); 1447 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1448 // poisoning cache with sub-optimal results. For the must-execute case, 1449 // this is a neccessary precondition for correctness. 1450 if (!L->isLoopInvariant(RHS)) { 1451 if (!L->isLoopInvariant(LHS)) 1452 continue; 1453 // Same logic applies for the inverse case 1454 std::swap(LHS, RHS); 1455 } 1456 1457 // Match (icmp signed-cond zext, RHS) 1458 Value *LHSOp = nullptr; 1459 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1460 continue; 1461 1462 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1463 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1464 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1465 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1466 FullCR = FullCR.zeroExtend(OuterBitWidth); 1467 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1468 if (FullCR.contains(RHSCR)) { 1469 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1470 // replace the signed condition with the unsigned version. 1471 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1472 Changed = true; 1473 // Note: No SCEV invalidation needed. We've changed the predicate, but 1474 // have not changed exit counts, or the values produced by the compare. 1475 continue; 1476 } 1477 } 1478 1479 // Now that we've canonicalized the condition to match the extend, 1480 // see if we can rotate the extend out of the loop. 1481 for (auto *ExitingBB : ExitingBlocks) { 1482 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1483 if (!BI) 1484 continue; 1485 assert(BI->isConditional() && "exit branch must be conditional"); 1486 1487 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1488 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1489 continue; 1490 1491 bool Swapped = false; 1492 auto *LHS = ICmp->getOperand(0); 1493 auto *RHS = ICmp->getOperand(1); 1494 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1495 // Nothing to rotate 1496 continue; 1497 if (L->isLoopInvariant(LHS)) { 1498 // Same logic applies for the inverse case until we actually pick 1499 // which operand of the compare to update. 1500 Swapped = true; 1501 std::swap(LHS, RHS); 1502 } 1503 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1504 1505 // Match (icmp unsigned-cond zext, RHS) 1506 // TODO: Extend to handle corresponding sext/signed-cmp case 1507 // TODO: Extend to other invertible functions 1508 Value *LHSOp = nullptr; 1509 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1510 continue; 1511 1512 // In general, we only rotate if we can do so without increasing the number 1513 // of instructions. The exception is when we have an zext(add-rec). The 1514 // reason for allowing this exception is that we know we need to get rid 1515 // of the zext for SCEV to be able to compute a trip count for said loops; 1516 // we consider the new trip count valuable enough to increase instruction 1517 // count by one. 1518 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1519 continue; 1520 1521 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1522 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1523 // when zext is loop varying and RHS is loop invariant. This converts 1524 // loop varying work to loop-invariant work. 1525 auto doRotateTransform = [&]() { 1526 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1527 auto *NewRHS = CastInst::Create( 1528 Instruction::Trunc, RHS, LHSOp->getType(), "", 1529 L->getLoopPreheader()->getTerminator()->getIterator()); 1530 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1531 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1532 if (LHS->use_empty()) 1533 DeadInsts.push_back(LHS); 1534 }; 1535 1536 1537 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1538 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1539 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1540 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1541 FullCR = FullCR.zeroExtend(OuterBitWidth); 1542 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1543 if (FullCR.contains(RHSCR)) { 1544 doRotateTransform(); 1545 Changed = true; 1546 // Note, we are leaving SCEV in an unfortunately imprecise case here 1547 // as rotation tends to reveal information about trip counts not 1548 // previously visible. 1549 continue; 1550 } 1551 } 1552 1553 return Changed; 1554 } 1555 1556 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1557 SmallVector<BasicBlock*, 16> ExitingBlocks; 1558 L->getExitingBlocks(ExitingBlocks); 1559 1560 // Remove all exits which aren't both rewriteable and execute on every 1561 // iteration. 1562 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1563 // If our exitting block exits multiple loops, we can only rewrite the 1564 // innermost one. Otherwise, we're changing how many times the innermost 1565 // loop runs before it exits. 1566 if (LI->getLoopFor(ExitingBB) != L) 1567 return true; 1568 1569 // Can't rewrite non-branch yet. 1570 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1571 if (!BI) 1572 return true; 1573 1574 // Likewise, the loop latch must be dominated by the exiting BB. 1575 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1576 return true; 1577 1578 if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 1579 // If already constant, nothing to do. However, if this is an 1580 // unconditional exit, we can still replace header phis with their 1581 // preheader value. 1582 if (!L->contains(BI->getSuccessor(CI->isNullValue()))) 1583 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1584 return true; 1585 } 1586 1587 return false; 1588 }); 1589 1590 if (ExitingBlocks.empty()) 1591 return false; 1592 1593 // Get a symbolic upper bound on the loop backedge taken count. 1594 const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L); 1595 if (isa<SCEVCouldNotCompute>(MaxBECount)) 1596 return false; 1597 1598 // Visit our exit blocks in order of dominance. We know from the fact that 1599 // all exits must dominate the latch, so there is a total dominance order 1600 // between them. 1601 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1602 // std::sort sorts in ascending order, so we want the inverse of 1603 // the normal dominance relation. 1604 if (A == B) return false; 1605 if (DT->properlyDominates(A, B)) 1606 return true; 1607 else { 1608 assert(DT->properlyDominates(B, A) && 1609 "expected total dominance order!"); 1610 return false; 1611 } 1612 }); 1613 #ifdef ASSERT 1614 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1615 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1616 } 1617 #endif 1618 1619 bool Changed = false; 1620 bool SkipLastIter = false; 1621 const SCEV *CurrMaxExit = SE->getCouldNotCompute(); 1622 auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) { 1623 if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount)) 1624 return; 1625 if (isa<SCEVCouldNotCompute>(CurrMaxExit)) 1626 CurrMaxExit = MaxExitCount; 1627 else 1628 CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount); 1629 // If the loop has more than 1 iteration, all further checks will be 1630 // executed 1 iteration less. 1631 if (CurrMaxExit == MaxBECount) 1632 SkipLastIter = true; 1633 }; 1634 SmallSet<const SCEV *, 8> DominatingExactExitCounts; 1635 for (BasicBlock *ExitingBB : ExitingBlocks) { 1636 const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB); 1637 const SCEV *MaxExitCount = SE->getExitCount( 1638 L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum); 1639 if (isa<SCEVCouldNotCompute>(ExactExitCount)) { 1640 // Okay, we do not know the exit count here. Can we at least prove that it 1641 // will remain the same within iteration space? 1642 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1643 auto OptimizeCond = [&](bool SkipLastIter) { 1644 return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB, 1645 MaxBECount, SkipLastIter, 1646 SE, Rewriter, DeadInsts); 1647 }; 1648 1649 // TODO: We might have proved that we can skip the last iteration for 1650 // this check. In this case, we only want to check the condition on the 1651 // pre-last iteration (MaxBECount - 1). However, there is a nasty 1652 // corner case: 1653 // 1654 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1655 // 1656 // If we could not prove that len != 0, then we also could not prove that 1657 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1658 // OptimizeCond will likely not prove anything for it, even if it could 1659 // prove the same fact for len. 1660 // 1661 // As a temporary solution, we query both last and pre-last iterations in 1662 // hope that we will be able to prove triviality for at least one of 1663 // them. We can stop querying MaxBECount for this case once SCEV 1664 // understands that (MaxBECount - 1) will not overflow here. 1665 if (OptimizeCond(false)) 1666 Changed = true; 1667 else if (SkipLastIter && OptimizeCond(true)) 1668 Changed = true; 1669 UpdateSkipLastIter(MaxExitCount); 1670 continue; 1671 } 1672 1673 UpdateSkipLastIter(ExactExitCount); 1674 1675 // If we know we'd exit on the first iteration, rewrite the exit to 1676 // reflect this. This does not imply the loop must exit through this 1677 // exit; there may be an earlier one taken on the first iteration. 1678 // We know that the backedge can't be taken, so we replace all 1679 // the header PHIs with values coming from the preheader. 1680 if (ExactExitCount->isZero()) { 1681 foldExit(L, ExitingBB, true, DeadInsts); 1682 replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); 1683 Changed = true; 1684 continue; 1685 } 1686 1687 assert(ExactExitCount->getType()->isIntegerTy() && 1688 MaxBECount->getType()->isIntegerTy() && 1689 "Exit counts must be integers"); 1690 1691 Type *WiderType = 1692 SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType()); 1693 ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType); 1694 MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType); 1695 assert(MaxBECount->getType() == ExactExitCount->getType()); 1696 1697 // Can we prove that some other exit must be taken strictly before this 1698 // one? 1699 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount, 1700 ExactExitCount)) { 1701 foldExit(L, ExitingBB, false, DeadInsts); 1702 Changed = true; 1703 continue; 1704 } 1705 1706 // As we run, keep track of which exit counts we've encountered. If we 1707 // find a duplicate, we've found an exit which would have exited on the 1708 // exiting iteration, but (from the visit order) strictly follows another 1709 // which does the same and is thus dead. 1710 if (!DominatingExactExitCounts.insert(ExactExitCount).second) { 1711 foldExit(L, ExitingBB, false, DeadInsts); 1712 Changed = true; 1713 continue; 1714 } 1715 1716 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1717 // here. If we kept track of the min of dominanting exits so far, we could 1718 // discharge exits with EC >= MDEC. This is less powerful than the existing 1719 // transform (since later exits aren't considered), but potentially more 1720 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1721 // or a >u b. Such a case is not currently known. 1722 } 1723 return Changed; 1724 } 1725 1726 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1727 SmallVector<BasicBlock*, 16> ExitingBlocks; 1728 L->getExitingBlocks(ExitingBlocks); 1729 1730 // Finally, see if we can rewrite our exit conditions into a loop invariant 1731 // form. If we have a read-only loop, and we can tell that we must exit down 1732 // a path which does not need any of the values computed within the loop, we 1733 // can rewrite the loop to exit on the first iteration. Note that this 1734 // doesn't either a) tell us the loop exits on the first iteration (unless 1735 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1736 // This transformation looks a lot like a restricted form of dead loop 1737 // elimination, but restricted to read-only loops and without neccesssarily 1738 // needing to kill the loop entirely. 1739 if (!LoopPredication) 1740 return false; 1741 1742 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1743 // through *explicit* control flow. We have to eliminate the possibility of 1744 // implicit exits (see below) before we know it's truly exact. 1745 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1746 if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) 1747 return false; 1748 1749 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1750 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1751 1752 auto BadExit = [&](BasicBlock *ExitingBB) { 1753 // If our exiting block exits multiple loops, we can only rewrite the 1754 // innermost one. Otherwise, we're changing how many times the innermost 1755 // loop runs before it exits. 1756 if (LI->getLoopFor(ExitingBB) != L) 1757 return true; 1758 1759 // Can't rewrite non-branch yet. 1760 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1761 if (!BI) 1762 return true; 1763 1764 // If already constant, nothing to do. 1765 if (isa<Constant>(BI->getCondition())) 1766 return true; 1767 1768 // If the exit block has phis, we need to be able to compute the values 1769 // within the loop which contains them. This assumes trivially lcssa phis 1770 // have already been removed; TODO: generalize 1771 BasicBlock *ExitBlock = 1772 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1773 if (!ExitBlock->phis().empty()) 1774 return true; 1775 1776 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1777 if (isa<SCEVCouldNotCompute>(ExitCount) || 1778 !Rewriter.isSafeToExpand(ExitCount)) 1779 return true; 1780 1781 assert(SE->isLoopInvariant(ExitCount, L) && 1782 "Exit count must be loop invariant"); 1783 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1784 return false; 1785 }; 1786 1787 // If we have any exits which can't be predicated themselves, than we can't 1788 // predicate any exit which isn't guaranteed to execute before it. Consider 1789 // two exits (a) and (b) which would both exit on the same iteration. If we 1790 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1791 // we could convert a loop from exiting through (a) to one exiting through 1792 // (b). Note that this problem exists only for exits with the same exit 1793 // count, and we could be more aggressive when exit counts are known inequal. 1794 llvm::sort(ExitingBlocks, 1795 [&](BasicBlock *A, BasicBlock *B) { 1796 // std::sort sorts in ascending order, so we want the inverse of 1797 // the normal dominance relation, plus a tie breaker for blocks 1798 // unordered by dominance. 1799 if (DT->properlyDominates(A, B)) return true; 1800 if (DT->properlyDominates(B, A)) return false; 1801 return A->getName() < B->getName(); 1802 }); 1803 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1804 // exits before the backedge). If they aren't, reasoning about reachability 1805 // is complicated and we choose not to for now. 1806 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1807 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1808 return false; 1809 1810 // Given our sorted total order, we know that exit[j] must be evaluated 1811 // after all exit[i] such j > i. 1812 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1813 if (BadExit(ExitingBlocks[i])) { 1814 ExitingBlocks.resize(i); 1815 break; 1816 } 1817 1818 if (ExitingBlocks.empty()) 1819 return false; 1820 1821 // We rely on not being able to reach an exiting block on a later iteration 1822 // then it's statically compute exit count. The implementaton of 1823 // getExitCount currently has this invariant, but assert it here so that 1824 // breakage is obvious if this ever changes.. 1825 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1826 return DT->dominates(ExitingBB, L->getLoopLatch()); 1827 })); 1828 1829 // At this point, ExitingBlocks consists of only those blocks which are 1830 // predicatable. Given that, we know we have at least one exit we can 1831 // predicate if the loop is doesn't have side effects and doesn't have any 1832 // implicit exits (because then our exact BTC isn't actually exact). 1833 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1834 // suggestions on how to improve this? I can obviously bail out for outer 1835 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1836 // is that enough for *all* side effects? 1837 for (BasicBlock *BB : L->blocks()) 1838 for (auto &I : *BB) 1839 // TODO:isGuaranteedToTransfer 1840 if (I.mayHaveSideEffects()) 1841 return false; 1842 1843 bool Changed = false; 1844 // Finally, do the actual predication for all predicatable blocks. A couple 1845 // of notes here: 1846 // 1) We don't bother to constant fold dominated exits with identical exit 1847 // counts; that's simply a form of CSE/equality propagation and we leave 1848 // it for dedicated passes. 1849 // 2) We insert the comparison at the branch. Hoisting introduces additional 1850 // legality constraints and we leave that to dedicated logic. We want to 1851 // predicate even if we can't insert a loop invariant expression as 1852 // peeling or unrolling will likely reduce the cost of the otherwise loop 1853 // varying check. 1854 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1855 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1856 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1857 for (BasicBlock *ExitingBB : ExitingBlocks) { 1858 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1859 1860 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1861 Value *NewCond; 1862 if (ExitCount == ExactBTC) { 1863 NewCond = L->contains(BI->getSuccessor(0)) ? 1864 B.getFalse() : B.getTrue(); 1865 } else { 1866 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1867 if (!ExactBTCV) 1868 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1869 Value *RHS = ExactBTCV; 1870 if (ECV->getType() != RHS->getType()) { 1871 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1872 ECV = B.CreateZExt(ECV, WiderTy); 1873 RHS = B.CreateZExt(RHS, WiderTy); 1874 } 1875 auto Pred = L->contains(BI->getSuccessor(0)) ? 1876 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1877 NewCond = B.CreateICmp(Pred, ECV, RHS); 1878 } 1879 Value *OldCond = BI->getCondition(); 1880 BI->setCondition(NewCond); 1881 if (OldCond->use_empty()) 1882 DeadInsts.emplace_back(OldCond); 1883 Changed = true; 1884 RunUnswitching = true; 1885 } 1886 1887 return Changed; 1888 } 1889 1890 //===----------------------------------------------------------------------===// 1891 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1892 //===----------------------------------------------------------------------===// 1893 1894 bool IndVarSimplify::run(Loop *L) { 1895 // We need (and expect!) the incoming loop to be in LCSSA. 1896 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1897 "LCSSA required to run indvars!"); 1898 1899 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1900 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1901 // canonicalization can be a pessimization without LSR to "clean up" 1902 // afterwards. 1903 // - We depend on having a preheader; in particular, 1904 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1905 // and we're in trouble if we can't find the induction variable even when 1906 // we've manually inserted one. 1907 // - LFTR relies on having a single backedge. 1908 if (!L->isLoopSimplifyForm()) 1909 return false; 1910 1911 bool Changed = false; 1912 // If there are any floating-point recurrences, attempt to 1913 // transform them to use integer recurrences. 1914 Changed |= rewriteNonIntegerIVs(L); 1915 1916 // Create a rewriter object which we'll use to transform the code with. 1917 SCEVExpander Rewriter(*SE, DL, "indvars"); 1918 #ifndef NDEBUG 1919 Rewriter.setDebugType(DEBUG_TYPE); 1920 #endif 1921 1922 // Eliminate redundant IV users. 1923 // 1924 // Simplification works best when run before other consumers of SCEV. We 1925 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1926 // other expressions involving loop IVs have been evaluated. This helps SCEV 1927 // set no-wrap flags before normalizing sign/zero extension. 1928 Rewriter.disableCanonicalMode(); 1929 Changed |= simplifyAndExtend(L, Rewriter, LI); 1930 1931 // Check to see if we can compute the final value of any expressions 1932 // that are recurrent in the loop, and substitute the exit values from the 1933 // loop into any instructions outside of the loop that use the final values 1934 // of the current expressions. 1935 if (ReplaceExitValue != NeverRepl) { 1936 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1937 ReplaceExitValue, DeadInsts)) { 1938 NumReplaced += Rewrites; 1939 Changed = true; 1940 } 1941 } 1942 1943 // Eliminate redundant IV cycles. 1944 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1945 1946 // Try to convert exit conditions to unsigned and rotate computation 1947 // out of the loop. Note: Handles invalidation internally if needed. 1948 Changed |= canonicalizeExitCondition(L); 1949 1950 // Try to eliminate loop exits based on analyzeable exit counts 1951 if (optimizeLoopExits(L, Rewriter)) { 1952 Changed = true; 1953 // Given we've changed exit counts, notify SCEV 1954 // Some nested loops may share same folded exit basic block, 1955 // thus we need to notify top most loop. 1956 SE->forgetTopmostLoop(L); 1957 } 1958 1959 // Try to form loop invariant tests for loop exits by changing how many 1960 // iterations of the loop run when that is unobservable. 1961 if (predicateLoopExits(L, Rewriter)) { 1962 Changed = true; 1963 // Given we've changed exit counts, notify SCEV 1964 SE->forgetLoop(L); 1965 } 1966 1967 // If we have a trip count expression, rewrite the loop's exit condition 1968 // using it. 1969 if (!DisableLFTR) { 1970 BasicBlock *PreHeader = L->getLoopPreheader(); 1971 1972 SmallVector<BasicBlock*, 16> ExitingBlocks; 1973 L->getExitingBlocks(ExitingBlocks); 1974 for (BasicBlock *ExitingBB : ExitingBlocks) { 1975 // Can't rewrite non-branch yet. 1976 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1977 continue; 1978 1979 // If our exitting block exits multiple loops, we can only rewrite the 1980 // innermost one. Otherwise, we're changing how many times the innermost 1981 // loop runs before it exits. 1982 if (LI->getLoopFor(ExitingBB) != L) 1983 continue; 1984 1985 if (!needsLFTR(L, ExitingBB)) 1986 continue; 1987 1988 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1989 if (isa<SCEVCouldNotCompute>(ExitCount)) 1990 continue; 1991 1992 // This was handled above, but as we form SCEVs, we can sometimes refine 1993 // existing ones; this allows exit counts to be folded to zero which 1994 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1995 // until stable to handle cases like this better. 1996 if (ExitCount->isZero()) 1997 continue; 1998 1999 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2000 if (!IndVar) 2001 continue; 2002 2003 // Avoid high cost expansions. Note: This heuristic is questionable in 2004 // that our definition of "high cost" is not exactly principled. 2005 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 2006 TTI, PreHeader->getTerminator())) 2007 continue; 2008 2009 if (!Rewriter.isSafeToExpand(ExitCount)) 2010 continue; 2011 2012 Changed |= linearFunctionTestReplace(L, ExitingBB, 2013 ExitCount, IndVar, 2014 Rewriter); 2015 } 2016 } 2017 // Clear the rewriter cache, because values that are in the rewriter's cache 2018 // can be deleted in the loop below, causing the AssertingVH in the cache to 2019 // trigger. 2020 Rewriter.clear(); 2021 2022 // Now that we're done iterating through lists, clean up any instructions 2023 // which are now dead. 2024 while (!DeadInsts.empty()) { 2025 Value *V = DeadInsts.pop_back_val(); 2026 2027 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2028 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2029 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2030 Changed |= 2031 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2032 } 2033 2034 // The Rewriter may not be used from this point on. 2035 2036 // Loop-invariant instructions in the preheader that aren't used in the 2037 // loop may be sunk below the loop to reduce register pressure. 2038 Changed |= sinkUnusedInvariants(L); 2039 2040 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2041 // trip count and therefore can further simplify exit values in addition to 2042 // rewriteLoopExitValues. 2043 Changed |= rewriteFirstIterationLoopExitValues(L); 2044 2045 // Clean up dead instructions. 2046 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2047 2048 // Check a post-condition. 2049 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2050 "Indvars did not preserve LCSSA!"); 2051 if (VerifyMemorySSA && MSSAU) 2052 MSSAU->getMemorySSA()->verifyMemorySSA(); 2053 2054 return Changed; 2055 } 2056 2057 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2058 LoopStandardAnalysisResults &AR, 2059 LPMUpdater &) { 2060 Function *F = L.getHeader()->getParent(); 2061 const DataLayout &DL = F->getParent()->getDataLayout(); 2062 2063 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2064 WidenIndVars && AllowIVWidening); 2065 if (!IVS.run(&L)) 2066 return PreservedAnalyses::all(); 2067 2068 auto PA = getLoopPassPreservedAnalyses(); 2069 PA.preserveSet<CFGAnalyses>(); 2070 if (IVS.runUnswitching()) { 2071 AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR); 2072 PA.preserve<ShouldRunExtraSimpleLoopUnswitch>(); 2073 } 2074 2075 if (AR.MSSA) 2076 PA.preserve<MemorySSAAnalysis>(); 2077 return PA; 2078 } 2079