1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/Sequence.h" 12 #include "llvm/ADT/SetVector.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/LoopAnalysisManager.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/Constant.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/InstrTypes.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Use.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/GenericDomTree.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 39 #include "llvm/Transforms/Utils/LoopUtils.h" 40 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <iterator> 44 #include <utility> 45 46 #define DEBUG_TYPE "simple-loop-unswitch" 47 48 using namespace llvm; 49 50 STATISTIC(NumBranches, "Number of branches unswitched"); 51 STATISTIC(NumSwitches, "Number of switches unswitched"); 52 STATISTIC(NumTrivial, "Number of unswitches that are trivial"); 53 54 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, 55 Constant &Replacement) { 56 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 57 58 // Replace uses of LIC in the loop with the given constant. 59 for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { 60 // Grab the use and walk past it so we can clobber it in the use list. 61 Use *U = &*UI++; 62 Instruction *UserI = dyn_cast<Instruction>(U->getUser()); 63 if (!UserI || !L.contains(UserI)) 64 continue; 65 66 // Replace this use within the loop body. 67 *U = &Replacement; 68 } 69 } 70 71 /// Update the dominator tree after removing one exiting predecessor of a loop 72 /// exit block. 73 static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, 74 DominatorTree &DT) { 75 assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && 76 "Cannot have empty predecessors of the loop exit block if we split " 77 "off a block to unswitch!"); 78 79 BasicBlock *IDom = *pred_begin(LoopExitBB); 80 // Walk all of the other predecessors finding the nearest common dominator 81 // until all predecessors are covered or we reach the loop header. The loop 82 // header necessarily dominates all loop exit blocks in loop simplified form 83 // so we can early-exit the moment we hit that block. 84 for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); 85 PI != PE && IDom != L.getHeader(); ++PI) 86 IDom = DT.findNearestCommonDominator(IDom, *PI); 87 88 DT.changeImmediateDominator(LoopExitBB, IDom); 89 } 90 91 /// Update the dominator tree after unswitching a particular former exit block. 92 /// 93 /// This handles the full update of the dominator tree after hoisting a block 94 /// that previously was an exit block (or split off of an exit block) up to be 95 /// reached from the new immediate dominator of the preheader. 96 /// 97 /// The common case is simple -- we just move the unswitched block to have an 98 /// immediate dominator of the old preheader. But in complex cases, there may 99 /// be other blocks reachable from the unswitched block that are immediately 100 /// dominated by some node between the unswitched one and the old preheader. 101 /// All of these also need to be hoisted in the dominator tree. We also want to 102 /// minimize queries to the dominator tree because each step of this 103 /// invalidates any DFS numbers that would make queries fast. 104 static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, 105 DominatorTree &DT) { 106 DomTreeNode *OldPHNode = DT[OldPH]; 107 DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; 108 // If the dominator tree has already been updated for this unswitched node, 109 // we're done. This makes it easier to use this routine if there are multiple 110 // paths to the same unswitched destination. 111 if (UnswitchedNode->getIDom() == OldPHNode) 112 return; 113 114 // First collect the domtree nodes that we are hoisting over. These are the 115 // set of nodes which may have children that need to be hoisted as well. 116 SmallPtrSet<DomTreeNode *, 4> DomChain; 117 for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; 118 IDom = IDom->getIDom()) 119 DomChain.insert(IDom); 120 121 // The unswitched block ends up immediately dominated by the old preheader -- 122 // regardless of whether it is the loop exit block or split off of the loop 123 // exit block. 124 DT.changeImmediateDominator(UnswitchedNode, OldPHNode); 125 126 // Blocks reachable from the unswitched block may need to change their IDom 127 // as well. 128 SmallSetVector<BasicBlock *, 4> Worklist; 129 for (auto *SuccBB : successors(UnswitchedBB)) 130 Worklist.insert(SuccBB); 131 132 // Walk the worklist. We grow the list in the loop and so must recompute size. 133 for (int i = 0; i < (int)Worklist.size(); ++i) { 134 auto *BB = Worklist[i]; 135 136 DomTreeNode *Node = DT[BB]; 137 assert(!DomChain.count(Node) && 138 "Cannot be dominated by a block you can reach!"); 139 // If this block doesn't have an immediate dominator somewhere in the chain 140 // we hoisted over, then its position in the domtree hasn't changed. Either 141 // it is above the region hoisted and still valid, or it is below the 142 // hoisted block and so was trivially updated. This also applies to 143 // everything reachable from this block so we're completely done with the 144 // it. 145 if (!DomChain.count(Node->getIDom())) 146 continue; 147 148 // We need to change the IDom for this node but also walk its successors 149 // which could have similar dominance position. 150 DT.changeImmediateDominator(Node, OldPHNode); 151 for (auto *SuccBB : successors(BB)) 152 Worklist.insert(SuccBB); 153 } 154 } 155 156 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial 157 /// incoming values along this edge. 158 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, 159 BasicBlock &ExitBB) { 160 for (Instruction &I : ExitBB) { 161 auto *PN = dyn_cast<PHINode>(&I); 162 if (!PN) 163 // No more PHIs to check. 164 return true; 165 166 // If the incoming value for this edge isn't loop invariant the unswitch 167 // won't be trivial. 168 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) 169 return false; 170 } 171 llvm_unreachable("Basic blocks should never be empty!"); 172 } 173 174 /// Rewrite the PHI nodes in an unswitched loop exit basic block. 175 /// 176 /// Requires that the loop exit and unswitched basic block are the same, and 177 /// that the exiting block was a unique predecessor of that block. Rewrites the 178 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial 179 /// PHI nodes from the old preheader that now contains the unswitched 180 /// terminator. 181 static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, 182 BasicBlock &OldExitingBB, 183 BasicBlock &OldPH) { 184 for (Instruction &I : UnswitchedBB) { 185 auto *PN = dyn_cast<PHINode>(&I); 186 if (!PN) 187 // No more PHIs to check. 188 break; 189 190 // When the loop exit is directly unswitched we just need to update the 191 // incoming basic block. We loop to handle weird cases with repeated 192 // incoming blocks, but expect to typically only have one operand here. 193 for (auto i : seq<int>(0, PN->getNumOperands())) { 194 assert(PN->getIncomingBlock(i) == &OldExitingBB && 195 "Found incoming block different from unique predecessor!"); 196 PN->setIncomingBlock(i, &OldPH); 197 } 198 } 199 } 200 201 /// Rewrite the PHI nodes in the loop exit basic block and the split off 202 /// unswitched block. 203 /// 204 /// Because the exit block remains an exit from the loop, this rewrites the 205 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI 206 /// nodes into the unswitched basic block to select between the value in the 207 /// old preheader and the loop exit. 208 static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, 209 BasicBlock &UnswitchedBB, 210 BasicBlock &OldExitingBB, 211 BasicBlock &OldPH) { 212 assert(&ExitBB != &UnswitchedBB && 213 "Must have different loop exit and unswitched blocks!"); 214 Instruction *InsertPt = &*UnswitchedBB.begin(); 215 for (Instruction &I : ExitBB) { 216 auto *PN = dyn_cast<PHINode>(&I); 217 if (!PN) 218 // No more PHIs to check. 219 break; 220 221 auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2, 222 PN->getName() + ".split", InsertPt); 223 224 // Walk backwards over the old PHI node's inputs to minimize the cost of 225 // removing each one. We have to do this weird loop manually so that we 226 // create the same number of new incoming edges in the new PHI as we expect 227 // each case-based edge to be included in the unswitched switch in some 228 // cases. 229 // FIXME: This is really, really gross. It would be much cleaner if LLVM 230 // allowed us to create a single entry for a predecessor block without 231 // having separate entries for each "edge" even though these edges are 232 // required to produce identical results. 233 for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) { 234 if (PN->getIncomingBlock(i) != &OldExitingBB) 235 continue; 236 237 Value *Incoming = PN->removeIncomingValue(i); 238 NewPN->addIncoming(Incoming, &OldPH); 239 } 240 241 // Now replace the old PHI with the new one and wire the old one in as an 242 // input to the new one. 243 PN->replaceAllUsesWith(NewPN); 244 NewPN->addIncoming(PN, &ExitBB); 245 } 246 } 247 248 /// Unswitch a trivial branch if the condition is loop invariant. 249 /// 250 /// This routine should only be called when loop code leading to the branch has 251 /// been validated as trivial (no side effects). This routine checks if the 252 /// condition is invariant and one of the successors is a loop exit. This 253 /// allows us to unswitch without duplicating the loop, making it trivial. 254 /// 255 /// If this routine fails to unswitch the branch it returns false. 256 /// 257 /// If the branch can be unswitched, this routine splits the preheader and 258 /// hoists the branch above that split. Preserves loop simplified form 259 /// (splitting the exit block as necessary). It simplifies the branch within 260 /// the loop to an unconditional branch but doesn't remove it entirely. Further 261 /// cleanup can be done with some simplify-cfg like pass. 262 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, 263 LoopInfo &LI) { 264 assert(BI.isConditional() && "Can only unswitch a conditional branch!"); 265 DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); 266 267 Value *LoopCond = BI.getCondition(); 268 269 // Need a trivial loop condition to unswitch. 270 if (!L.isLoopInvariant(LoopCond)) 271 return false; 272 273 // FIXME: We should compute this once at the start and update it! 274 SmallVector<BasicBlock *, 16> ExitBlocks; 275 L.getExitBlocks(ExitBlocks); 276 SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), 277 ExitBlocks.end()); 278 279 // Check to see if a successor of the branch is guaranteed to 280 // exit through a unique exit block without having any 281 // side-effects. If so, determine the value of Cond that causes 282 // it to do this. 283 ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); 284 ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); 285 int LoopExitSuccIdx = 0; 286 auto *LoopExitBB = BI.getSuccessor(0); 287 if (!ExitBlockSet.count(LoopExitBB)) { 288 std::swap(CondVal, Replacement); 289 LoopExitSuccIdx = 1; 290 LoopExitBB = BI.getSuccessor(1); 291 if (!ExitBlockSet.count(LoopExitBB)) 292 return false; 293 } 294 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); 295 assert(L.contains(ContinueBB) && 296 "Cannot have both successors exit and still be in the loop!"); 297 298 auto *ParentBB = BI.getParent(); 299 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) 300 return false; 301 302 DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal 303 << " == " << LoopCond << "\n"); 304 305 // Split the preheader, so that we know that there is a safe place to insert 306 // the conditional branch. We will change the preheader to have a conditional 307 // branch on LoopCond. 308 BasicBlock *OldPH = L.getLoopPreheader(); 309 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); 310 311 // Now that we have a place to insert the conditional branch, create a place 312 // to branch to: this is the exit block out of the loop that we are 313 // unswitching. We need to split this if there are other loop predecessors. 314 // Because the loop is in simplified form, *any* other predecessor is enough. 315 BasicBlock *UnswitchedBB; 316 if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { 317 (void)PredBB; 318 assert(PredBB == BI.getParent() && 319 "A branch's parent isn't a predecessor!"); 320 UnswitchedBB = LoopExitBB; 321 } else { 322 UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); 323 } 324 325 // Now splice the branch to gate reaching the new preheader and re-point its 326 // successors. 327 OldPH->getInstList().splice(std::prev(OldPH->end()), 328 BI.getParent()->getInstList(), BI); 329 OldPH->getTerminator()->eraseFromParent(); 330 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); 331 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); 332 333 // Create a new unconditional branch that will continue the loop as a new 334 // terminator. 335 BranchInst::Create(ContinueBB, ParentBB); 336 337 // Rewrite the relevant PHI nodes. 338 if (UnswitchedBB == LoopExitBB) 339 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); 340 else 341 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, 342 *ParentBB, *OldPH); 343 344 // Now we need to update the dominator tree. 345 updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); 346 // But if we split something off of the loop exit block then we also removed 347 // one of the predecessors for the loop exit block and may need to update its 348 // idom. 349 if (UnswitchedBB != LoopExitBB) 350 updateLoopExitIDom(LoopExitBB, L, DT); 351 352 // Since this is an i1 condition we can also trivially replace uses of it 353 // within the loop with a constant. 354 replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); 355 356 ++NumTrivial; 357 ++NumBranches; 358 return true; 359 } 360 361 /// Unswitch a trivial switch if the condition is loop invariant. 362 /// 363 /// This routine should only be called when loop code leading to the switch has 364 /// been validated as trivial (no side effects). This routine checks if the 365 /// condition is invariant and that at least one of the successors is a loop 366 /// exit. This allows us to unswitch without duplicating the loop, making it 367 /// trivial. 368 /// 369 /// If this routine fails to unswitch the switch it returns false. 370 /// 371 /// If the switch can be unswitched, this routine splits the preheader and 372 /// copies the switch above that split. If the default case is one of the 373 /// exiting cases, it copies the non-exiting cases and points them at the new 374 /// preheader. If the default case is not exiting, it copies the exiting cases 375 /// and points the default at the preheader. It preserves loop simplified form 376 /// (splitting the exit blocks as necessary). It simplifies the switch within 377 /// the loop by removing now-dead cases. If the default case is one of those 378 /// unswitched, it replaces its destination with a new basic block containing 379 /// only unreachable. Such basic blocks, while technically loop exits, are not 380 /// considered for unswitching so this is a stable transform and the same 381 /// switch will not be revisited. If after unswitching there is only a single 382 /// in-loop successor, the switch is further simplified to an unconditional 383 /// branch. Still more cleanup can be done with some simplify-cfg like pass. 384 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, 385 LoopInfo &LI) { 386 DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); 387 Value *LoopCond = SI.getCondition(); 388 389 // If this isn't switching on an invariant condition, we can't unswitch it. 390 if (!L.isLoopInvariant(LoopCond)) 391 return false; 392 393 auto *ParentBB = SI.getParent(); 394 395 // FIXME: We should compute this once at the start and update it! 396 SmallVector<BasicBlock *, 16> ExitBlocks; 397 L.getExitBlocks(ExitBlocks); 398 SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), 399 ExitBlocks.end()); 400 401 SmallVector<int, 4> ExitCaseIndices; 402 for (auto Case : SI.cases()) { 403 auto *SuccBB = Case.getCaseSuccessor(); 404 if (ExitBlockSet.count(SuccBB) && 405 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB)) 406 ExitCaseIndices.push_back(Case.getCaseIndex()); 407 } 408 BasicBlock *DefaultExitBB = nullptr; 409 if (ExitBlockSet.count(SI.getDefaultDest()) && 410 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) && 411 !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) 412 DefaultExitBB = SI.getDefaultDest(); 413 else if (ExitCaseIndices.empty()) 414 return false; 415 416 DEBUG(dbgs() << " unswitching trivial cases...\n"); 417 418 SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; 419 ExitCases.reserve(ExitCaseIndices.size()); 420 // We walk the case indices backwards so that we remove the last case first 421 // and don't disrupt the earlier indices. 422 for (unsigned Index : reverse(ExitCaseIndices)) { 423 auto CaseI = SI.case_begin() + Index; 424 // Save the value of this case. 425 ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); 426 // Delete the unswitched cases. 427 SI.removeCase(CaseI); 428 } 429 430 // Check if after this all of the remaining cases point at the same 431 // successor. 432 BasicBlock *CommonSuccBB = nullptr; 433 if (SI.getNumCases() > 0 && 434 std::all_of(std::next(SI.case_begin()), SI.case_end(), 435 [&SI](const SwitchInst::CaseHandle &Case) { 436 return Case.getCaseSuccessor() == 437 SI.case_begin()->getCaseSuccessor(); 438 })) 439 CommonSuccBB = SI.case_begin()->getCaseSuccessor(); 440 441 if (DefaultExitBB) { 442 // We can't remove the default edge so replace it with an edge to either 443 // the single common remaining successor (if we have one) or an unreachable 444 // block. 445 if (CommonSuccBB) { 446 SI.setDefaultDest(CommonSuccBB); 447 } else { 448 BasicBlock *UnreachableBB = BasicBlock::Create( 449 ParentBB->getContext(), 450 Twine(ParentBB->getName()) + ".unreachable_default", 451 ParentBB->getParent()); 452 new UnreachableInst(ParentBB->getContext(), UnreachableBB); 453 SI.setDefaultDest(UnreachableBB); 454 DT.addNewBlock(UnreachableBB, ParentBB); 455 } 456 } else { 457 // If we're not unswitching the default, we need it to match any cases to 458 // have a common successor or if we have no cases it is the common 459 // successor. 460 if (SI.getNumCases() == 0) 461 CommonSuccBB = SI.getDefaultDest(); 462 else if (SI.getDefaultDest() != CommonSuccBB) 463 CommonSuccBB = nullptr; 464 } 465 466 // Split the preheader, so that we know that there is a safe place to insert 467 // the switch. 468 BasicBlock *OldPH = L.getLoopPreheader(); 469 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); 470 OldPH->getTerminator()->eraseFromParent(); 471 472 // Now add the unswitched switch. 473 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); 474 475 // Rewrite the IR for the unswitched basic blocks. This requires two steps. 476 // First, we split any exit blocks with remaining in-loop predecessors. Then 477 // we update the PHIs in one of two ways depending on if there was a split. 478 // We walk in reverse so that we split in the same order as the cases 479 // appeared. This is purely for convenience of reading the resulting IR, but 480 // it doesn't cost anything really. 481 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs; 482 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; 483 // Handle the default exit if necessary. 484 // FIXME: It'd be great if we could merge this with the loop below but LLVM's 485 // ranges aren't quite powerful enough yet. 486 if (DefaultExitBB) { 487 if (pred_empty(DefaultExitBB)) { 488 UnswitchedExitBBs.insert(DefaultExitBB); 489 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); 490 } else { 491 auto *SplitBB = 492 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); 493 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, 494 *ParentBB, *OldPH); 495 updateLoopExitIDom(DefaultExitBB, L, DT); 496 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; 497 } 498 } 499 // Note that we must use a reference in the for loop so that we update the 500 // container. 501 for (auto &CasePair : reverse(ExitCases)) { 502 // Grab a reference to the exit block in the pair so that we can update it. 503 BasicBlock *ExitBB = CasePair.second; 504 505 // If this case is the last edge into the exit block, we can simply reuse it 506 // as it will no longer be a loop exit. No mapping necessary. 507 if (pred_empty(ExitBB)) { 508 // Only rewrite once. 509 if (UnswitchedExitBBs.insert(ExitBB).second) 510 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); 511 continue; 512 } 513 514 // Otherwise we need to split the exit block so that we retain an exit 515 // block from the loop and a target for the unswitched condition. 516 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; 517 if (!SplitExitBB) { 518 // If this is the first time we see this, do the split and remember it. 519 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); 520 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, 521 *ParentBB, *OldPH); 522 updateLoopExitIDom(ExitBB, L, DT); 523 } 524 // Update the case pair to point to the split block. 525 CasePair.second = SplitExitBB; 526 } 527 528 // Now add the unswitched cases. We do this in reverse order as we built them 529 // in reverse order. 530 for (auto CasePair : reverse(ExitCases)) { 531 ConstantInt *CaseVal = CasePair.first; 532 BasicBlock *UnswitchedBB = CasePair.second; 533 534 NewSI->addCase(CaseVal, UnswitchedBB); 535 updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); 536 } 537 538 // If the default was unswitched, re-point it and add explicit cases for 539 // entering the loop. 540 if (DefaultExitBB) { 541 NewSI->setDefaultDest(DefaultExitBB); 542 updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); 543 544 // We removed all the exit cases, so we just copy the cases to the 545 // unswitched switch. 546 for (auto Case : SI.cases()) 547 NewSI->addCase(Case.getCaseValue(), NewPH); 548 } 549 550 // If we ended up with a common successor for every path through the switch 551 // after unswitching, rewrite it to an unconditional branch to make it easy 552 // to recognize. Otherwise we potentially have to recognize the default case 553 // pointing at unreachable and other complexity. 554 if (CommonSuccBB) { 555 BasicBlock *BB = SI.getParent(); 556 SI.eraseFromParent(); 557 BranchInst::Create(CommonSuccBB, BB); 558 } 559 560 DT.verifyDomTree(); 561 ++NumTrivial; 562 ++NumSwitches; 563 return true; 564 } 565 566 /// This routine scans the loop to find a branch or switch which occurs before 567 /// any side effects occur. These can potentially be unswitched without 568 /// duplicating the loop. If a branch or switch is successfully unswitched the 569 /// scanning continues to see if subsequent branches or switches have become 570 /// trivial. Once all trivial candidates have been unswitched, this routine 571 /// returns. 572 /// 573 /// The return value indicates whether anything was unswitched (and therefore 574 /// changed). 575 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, 576 LoopInfo &LI) { 577 bool Changed = false; 578 579 // If loop header has only one reachable successor we should keep looking for 580 // trivial condition candidates in the successor as well. An alternative is 581 // to constant fold conditions and merge successors into loop header (then we 582 // only need to check header's terminator). The reason for not doing this in 583 // LoopUnswitch pass is that it could potentially break LoopPassManager's 584 // invariants. Folding dead branches could either eliminate the current loop 585 // or make other loops unreachable. LCSSA form might also not be preserved 586 // after deleting branches. The following code keeps traversing loop header's 587 // successors until it finds the trivial condition candidate (condition that 588 // is not a constant). Since unswitching generates branches with constant 589 // conditions, this scenario could be very common in practice. 590 BasicBlock *CurrentBB = L.getHeader(); 591 SmallPtrSet<BasicBlock *, 8> Visited; 592 Visited.insert(CurrentBB); 593 do { 594 // Check if there are any side-effecting instructions (e.g. stores, calls, 595 // volatile loads) in the part of the loop that the code *would* execute 596 // without unswitching. 597 if (llvm::any_of(*CurrentBB, 598 [](Instruction &I) { return I.mayHaveSideEffects(); })) 599 return Changed; 600 601 TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); 602 603 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 604 // Don't bother trying to unswitch past a switch with a constant 605 // condition. This should be removed prior to running this pass by 606 // simplify-cfg. 607 if (isa<Constant>(SI->getCondition())) 608 return Changed; 609 610 if (!unswitchTrivialSwitch(L, *SI, DT, LI)) 611 // Coludn't unswitch this one so we're done. 612 return Changed; 613 614 // Mark that we managed to unswitch something. 615 Changed = true; 616 617 // If unswitching turned the terminator into an unconditional branch then 618 // we can continue. The unswitching logic specifically works to fold any 619 // cases it can into an unconditional branch to make it easier to 620 // recognize here. 621 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); 622 if (!BI || BI->isConditional()) 623 return Changed; 624 625 CurrentBB = BI->getSuccessor(0); 626 continue; 627 } 628 629 auto *BI = dyn_cast<BranchInst>(CurrentTerm); 630 if (!BI) 631 // We do not understand other terminator instructions. 632 return Changed; 633 634 // Don't bother trying to unswitch past an unconditional branch or a branch 635 // with a constant value. These should be removed by simplify-cfg prior to 636 // running this pass. 637 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 638 return Changed; 639 640 // Found a trivial condition candidate: non-foldable conditional branch. If 641 // we fail to unswitch this, we can't do anything else that is trivial. 642 if (!unswitchTrivialBranch(L, *BI, DT, LI)) 643 return Changed; 644 645 // Mark that we managed to unswitch something. 646 Changed = true; 647 648 // We unswitched the branch. This should always leave us with an 649 // unconditional branch that we can follow now. 650 BI = cast<BranchInst>(CurrentBB->getTerminator()); 651 assert(!BI->isConditional() && 652 "Cannot form a conditional branch by unswitching1"); 653 CurrentBB = BI->getSuccessor(0); 654 655 // When continuing, if we exit the loop or reach a previous visited block, 656 // then we can not reach any trivial condition candidates (unfoldable 657 // branch instructions or switch instructions) and no unswitch can happen. 658 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); 659 660 return Changed; 661 } 662 663 /// Unswitch control flow predicated on loop invariant conditions. 664 /// 665 /// This first hoists all branches or switches which are trivial (IE, do not 666 /// require duplicating any part of the loop) out of the loop body. It then 667 /// looks at other loop invariant control flows and tries to unswitch those as 668 /// well by cloning the loop if the result is small enough. 669 static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, 670 AssumptionCache &AC) { 671 assert(L.isLCSSAForm(DT) && 672 "Loops must be in LCSSA form before unswitching."); 673 bool Changed = false; 674 675 // Must be in loop simplified form: we need a preheader and dedicated exits. 676 if (!L.isLoopSimplifyForm()) 677 return false; 678 679 // Try trivial unswitch first before loop over other basic blocks in the loop. 680 Changed |= unswitchAllTrivialConditions(L, DT, LI); 681 682 // FIXME: Add support for non-trivial unswitching by cloning the loop. 683 684 return Changed; 685 } 686 687 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, 688 LoopStandardAnalysisResults &AR, 689 LPMUpdater &U) { 690 Function &F = *L.getHeader()->getParent(); 691 (void)F; 692 693 DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); 694 695 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) 696 return PreservedAnalyses::all(); 697 698 #ifndef NDEBUG 699 // Historically this pass has had issues with the dominator tree so verify it 700 // in asserts builds. 701 AR.DT.verifyDomTree(); 702 #endif 703 return getLoopPassPreservedAnalyses(); 704 } 705 706 namespace { 707 708 class SimpleLoopUnswitchLegacyPass : public LoopPass { 709 public: 710 static char ID; // Pass ID, replacement for typeid 711 712 explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { 713 initializeSimpleLoopUnswitchLegacyPassPass( 714 *PassRegistry::getPassRegistry()); 715 } 716 717 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 718 719 void getAnalysisUsage(AnalysisUsage &AU) const override { 720 AU.addRequired<AssumptionCacheTracker>(); 721 getLoopAnalysisUsage(AU); 722 } 723 }; 724 725 } // end anonymous namespace 726 727 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { 728 if (skipLoop(L)) 729 return false; 730 731 Function &F = *L->getHeader()->getParent(); 732 733 DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); 734 735 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 736 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 737 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 738 739 bool Changed = unswitchLoop(*L, DT, LI, AC); 740 741 #ifndef NDEBUG 742 // Historically this pass has had issues with the dominator tree so verify it 743 // in asserts builds. 744 DT.verifyDomTree(); 745 #endif 746 return Changed; 747 } 748 749 char SimpleLoopUnswitchLegacyPass::ID = 0; 750 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", 751 "Simple unswitch loops", false, false) 752 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 753 INITIALIZE_PASS_DEPENDENCY(LoopPass) 754 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 755 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", 756 "Simple unswitch loops", false, false) 757 758 Pass *llvm::createSimpleLoopUnswitchLegacyPass() { 759 return new SimpleLoopUnswitchLegacyPass(); 760 } 761