1 //===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/Sequence.h" 12 #include "llvm/ADT/SetVector.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/AssumptionCache.h" 19 #include "llvm/Analysis/LoopAnalysisManager.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/Constant.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/InstrTypes.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Use.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/GenericDomTree.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 39 #include "llvm/Transforms/Utils/LoopUtils.h" 40 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <iterator> 44 #include <utility> 45 46 #define DEBUG_TYPE "simple-loop-unswitch" 47 48 using namespace llvm; 49 50 STATISTIC(NumBranches, "Number of branches unswitched"); 51 STATISTIC(NumSwitches, "Number of switches unswitched"); 52 STATISTIC(NumTrivial, "Number of unswitches that are trivial"); 53 54 static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, 55 Constant &Replacement) { 56 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 57 58 // Replace uses of LIC in the loop with the given constant. 59 for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { 60 // Grab the use and walk past it so we can clobber it in the use list. 61 Use *U = &*UI++; 62 Instruction *UserI = dyn_cast<Instruction>(U->getUser()); 63 if (!UserI || !L.contains(UserI)) 64 continue; 65 66 // Replace this use within the loop body. 67 *U = &Replacement; 68 } 69 } 70 71 /// Update the dominator tree after removing one exiting predecessor of a loop 72 /// exit block. 73 static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, 74 DominatorTree &DT) { 75 assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && 76 "Cannot have empty predecessors of the loop exit block if we split " 77 "off a block to unswitch!"); 78 79 BasicBlock *IDom = *pred_begin(LoopExitBB); 80 // Walk all of the other predecessors finding the nearest common dominator 81 // until all predecessors are covered or we reach the loop header. The loop 82 // header necessarily dominates all loop exit blocks in loop simplified form 83 // so we can early-exit the moment we hit that block. 84 for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); 85 PI != PE && IDom != L.getHeader(); ++PI) 86 IDom = DT.findNearestCommonDominator(IDom, *PI); 87 88 DT.changeImmediateDominator(LoopExitBB, IDom); 89 } 90 91 /// Update the dominator tree after unswitching a particular former exit block. 92 /// 93 /// This handles the full update of the dominator tree after hoisting a block 94 /// that previously was an exit block (or split off of an exit block) up to be 95 /// reached from the new immediate dominator of the preheader. 96 /// 97 /// The common case is simple -- we just move the unswitched block to have an 98 /// immediate dominator of the old preheader. But in complex cases, there may 99 /// be other blocks reachable from the unswitched block that are immediately 100 /// dominated by some node between the unswitched one and the old preheader. 101 /// All of these also need to be hoisted in the dominator tree. We also want to 102 /// minimize queries to the dominator tree because each step of this 103 /// invalidates any DFS numbers that would make queries fast. 104 static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, 105 DominatorTree &DT) { 106 DomTreeNode *OldPHNode = DT[OldPH]; 107 DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; 108 // If the dominator tree has already been updated for this unswitched node, 109 // we're done. This makes it easier to use this routine if there are multiple 110 // paths to the same unswitched destination. 111 if (UnswitchedNode->getIDom() == OldPHNode) 112 return; 113 114 // First collect the domtree nodes that we are hoisting over. These are the 115 // set of nodes which may have children that need to be hoisted as well. 116 SmallPtrSet<DomTreeNode *, 4> DomChain; 117 for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; 118 IDom = IDom->getIDom()) 119 DomChain.insert(IDom); 120 121 // The unswitched block ends up immediately dominated by the old preheader -- 122 // regardless of whether it is the loop exit block or split off of the loop 123 // exit block. 124 DT.changeImmediateDominator(UnswitchedNode, OldPHNode); 125 126 // For everything that moves up the dominator tree, we need to examine the 127 // dominator frontier to see if it additionally should move up the dominator 128 // tree. This lambda appends the dominator frontier for a node on the 129 // worklist. 130 // 131 // Note that we don't currently use the IDFCalculator here for two reasons: 132 // 1) It computes dominator tree levels for the entire function on each run 133 // of 'compute'. While this isn't terrible, given that we expect to update 134 // relatively small subtrees of the domtree, it isn't necessarily the right 135 // tradeoff. 136 // 2) The interface doesn't fit this usage well. It doesn't operate in 137 // append-only, and builds several sets that we don't need. 138 // 139 // FIXME: Neither of these issues are a big deal and could be addressed with 140 // some amount of refactoring of IDFCalculator. That would allow us to share 141 // the core logic here (which is solving the same core problem). 142 SmallSetVector<BasicBlock *, 4> Worklist; 143 SmallVector<DomTreeNode *, 4> DomNodes; 144 SmallPtrSet<BasicBlock *, 4> DomSet; 145 auto AppendDomFrontier = [&](DomTreeNode *Node) { 146 assert(DomNodes.empty() && "Must start with no dominator nodes."); 147 assert(DomSet.empty() && "Must start with an empty dominator set."); 148 149 // First flatten this subtree into sequence of nodes by doing a pre-order 150 // walk. 151 DomNodes.push_back(Node); 152 // We intentionally re-evaluate the size as each node can add new children. 153 // Because this is a tree walk, this cannot add any duplicates. 154 for (int i = 0; i < (int)DomNodes.size(); ++i) 155 DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end()); 156 157 // Now create a set of the basic blocks so we can quickly test for 158 // dominated successors. We could in theory use the DFS numbers of the 159 // dominator tree for this, but we want this to remain predictably fast 160 // even while we mutate the dominator tree in ways that would invalidate 161 // the DFS numbering. 162 for (DomTreeNode *InnerN : DomNodes) 163 DomSet.insert(InnerN->getBlock()); 164 165 // Now re-walk the nodes, appending every successor of every node that isn't 166 // in the set. Note that we don't append the node itself, even though if it 167 // is a successor it does not strictly dominate itself and thus it would be 168 // part of the dominance frontier. The reason we don't append it is that 169 // the node passed in came *from* the worklist and so it has already been 170 // processed. 171 for (DomTreeNode *InnerN : DomNodes) 172 for (BasicBlock *SuccBB : successors(InnerN->getBlock())) 173 if (!DomSet.count(SuccBB)) 174 Worklist.insert(SuccBB); 175 176 DomNodes.clear(); 177 DomSet.clear(); 178 }; 179 180 // Append the initial dom frontier nodes. 181 AppendDomFrontier(UnswitchedNode); 182 183 // Walk the worklist. We grow the list in the loop and so must recompute size. 184 for (int i = 0; i < (int)Worklist.size(); ++i) { 185 auto *BB = Worklist[i]; 186 187 DomTreeNode *Node = DT[BB]; 188 assert(!DomChain.count(Node) && 189 "Cannot be dominated by a block you can reach!"); 190 191 // If this block had an immediate dominator somewhere in the chain 192 // we hoisted over, then its position in the domtree needs to move as it is 193 // reachable from a node hoisted over this chain. 194 if (!DomChain.count(Node->getIDom())) 195 continue; 196 197 DT.changeImmediateDominator(Node, OldPHNode); 198 199 // Now add this node's dominator frontier to the worklist as well. 200 AppendDomFrontier(Node); 201 } 202 } 203 204 /// Check that all the LCSSA PHI nodes in the loop exit block have trivial 205 /// incoming values along this edge. 206 static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, 207 BasicBlock &ExitBB) { 208 for (Instruction &I : ExitBB) { 209 auto *PN = dyn_cast<PHINode>(&I); 210 if (!PN) 211 // No more PHIs to check. 212 return true; 213 214 // If the incoming value for this edge isn't loop invariant the unswitch 215 // won't be trivial. 216 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) 217 return false; 218 } 219 llvm_unreachable("Basic blocks should never be empty!"); 220 } 221 222 /// Rewrite the PHI nodes in an unswitched loop exit basic block. 223 /// 224 /// Requires that the loop exit and unswitched basic block are the same, and 225 /// that the exiting block was a unique predecessor of that block. Rewrites the 226 /// PHI nodes in that block such that what were LCSSA PHI nodes become trivial 227 /// PHI nodes from the old preheader that now contains the unswitched 228 /// terminator. 229 static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, 230 BasicBlock &OldExitingBB, 231 BasicBlock &OldPH) { 232 for (Instruction &I : UnswitchedBB) { 233 auto *PN = dyn_cast<PHINode>(&I); 234 if (!PN) 235 // No more PHIs to check. 236 break; 237 238 // When the loop exit is directly unswitched we just need to update the 239 // incoming basic block. We loop to handle weird cases with repeated 240 // incoming blocks, but expect to typically only have one operand here. 241 for (auto i : seq<int>(0, PN->getNumOperands())) { 242 assert(PN->getIncomingBlock(i) == &OldExitingBB && 243 "Found incoming block different from unique predecessor!"); 244 PN->setIncomingBlock(i, &OldPH); 245 } 246 } 247 } 248 249 /// Rewrite the PHI nodes in the loop exit basic block and the split off 250 /// unswitched block. 251 /// 252 /// Because the exit block remains an exit from the loop, this rewrites the 253 /// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI 254 /// nodes into the unswitched basic block to select between the value in the 255 /// old preheader and the loop exit. 256 static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, 257 BasicBlock &UnswitchedBB, 258 BasicBlock &OldExitingBB, 259 BasicBlock &OldPH) { 260 assert(&ExitBB != &UnswitchedBB && 261 "Must have different loop exit and unswitched blocks!"); 262 Instruction *InsertPt = &*UnswitchedBB.begin(); 263 for (Instruction &I : ExitBB) { 264 auto *PN = dyn_cast<PHINode>(&I); 265 if (!PN) 266 // No more PHIs to check. 267 break; 268 269 auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2, 270 PN->getName() + ".split", InsertPt); 271 272 // Walk backwards over the old PHI node's inputs to minimize the cost of 273 // removing each one. We have to do this weird loop manually so that we 274 // create the same number of new incoming edges in the new PHI as we expect 275 // each case-based edge to be included in the unswitched switch in some 276 // cases. 277 // FIXME: This is really, really gross. It would be much cleaner if LLVM 278 // allowed us to create a single entry for a predecessor block without 279 // having separate entries for each "edge" even though these edges are 280 // required to produce identical results. 281 for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) { 282 if (PN->getIncomingBlock(i) != &OldExitingBB) 283 continue; 284 285 Value *Incoming = PN->removeIncomingValue(i); 286 NewPN->addIncoming(Incoming, &OldPH); 287 } 288 289 // Now replace the old PHI with the new one and wire the old one in as an 290 // input to the new one. 291 PN->replaceAllUsesWith(NewPN); 292 NewPN->addIncoming(PN, &ExitBB); 293 } 294 } 295 296 /// Unswitch a trivial branch if the condition is loop invariant. 297 /// 298 /// This routine should only be called when loop code leading to the branch has 299 /// been validated as trivial (no side effects). This routine checks if the 300 /// condition is invariant and one of the successors is a loop exit. This 301 /// allows us to unswitch without duplicating the loop, making it trivial. 302 /// 303 /// If this routine fails to unswitch the branch it returns false. 304 /// 305 /// If the branch can be unswitched, this routine splits the preheader and 306 /// hoists the branch above that split. Preserves loop simplified form 307 /// (splitting the exit block as necessary). It simplifies the branch within 308 /// the loop to an unconditional branch but doesn't remove it entirely. Further 309 /// cleanup can be done with some simplify-cfg like pass. 310 static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, 311 LoopInfo &LI) { 312 assert(BI.isConditional() && "Can only unswitch a conditional branch!"); 313 DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); 314 315 Value *LoopCond = BI.getCondition(); 316 317 // Need a trivial loop condition to unswitch. 318 if (!L.isLoopInvariant(LoopCond)) 319 return false; 320 321 // FIXME: We should compute this once at the start and update it! 322 SmallVector<BasicBlock *, 16> ExitBlocks; 323 L.getExitBlocks(ExitBlocks); 324 SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), 325 ExitBlocks.end()); 326 327 // Check to see if a successor of the branch is guaranteed to 328 // exit through a unique exit block without having any 329 // side-effects. If so, determine the value of Cond that causes 330 // it to do this. 331 ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); 332 ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); 333 int LoopExitSuccIdx = 0; 334 auto *LoopExitBB = BI.getSuccessor(0); 335 if (!ExitBlockSet.count(LoopExitBB)) { 336 std::swap(CondVal, Replacement); 337 LoopExitSuccIdx = 1; 338 LoopExitBB = BI.getSuccessor(1); 339 if (!ExitBlockSet.count(LoopExitBB)) 340 return false; 341 } 342 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); 343 assert(L.contains(ContinueBB) && 344 "Cannot have both successors exit and still be in the loop!"); 345 346 auto *ParentBB = BI.getParent(); 347 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) 348 return false; 349 350 DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal 351 << " == " << LoopCond << "\n"); 352 353 // Split the preheader, so that we know that there is a safe place to insert 354 // the conditional branch. We will change the preheader to have a conditional 355 // branch on LoopCond. 356 BasicBlock *OldPH = L.getLoopPreheader(); 357 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); 358 359 // Now that we have a place to insert the conditional branch, create a place 360 // to branch to: this is the exit block out of the loop that we are 361 // unswitching. We need to split this if there are other loop predecessors. 362 // Because the loop is in simplified form, *any* other predecessor is enough. 363 BasicBlock *UnswitchedBB; 364 if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { 365 (void)PredBB; 366 assert(PredBB == BI.getParent() && 367 "A branch's parent isn't a predecessor!"); 368 UnswitchedBB = LoopExitBB; 369 } else { 370 UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); 371 } 372 373 // Now splice the branch to gate reaching the new preheader and re-point its 374 // successors. 375 OldPH->getInstList().splice(std::prev(OldPH->end()), 376 BI.getParent()->getInstList(), BI); 377 OldPH->getTerminator()->eraseFromParent(); 378 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); 379 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); 380 381 // Create a new unconditional branch that will continue the loop as a new 382 // terminator. 383 BranchInst::Create(ContinueBB, ParentBB); 384 385 // Rewrite the relevant PHI nodes. 386 if (UnswitchedBB == LoopExitBB) 387 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); 388 else 389 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, 390 *ParentBB, *OldPH); 391 392 // Now we need to update the dominator tree. 393 updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); 394 // But if we split something off of the loop exit block then we also removed 395 // one of the predecessors for the loop exit block and may need to update its 396 // idom. 397 if (UnswitchedBB != LoopExitBB) 398 updateLoopExitIDom(LoopExitBB, L, DT); 399 400 // Since this is an i1 condition we can also trivially replace uses of it 401 // within the loop with a constant. 402 replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); 403 404 ++NumTrivial; 405 ++NumBranches; 406 return true; 407 } 408 409 /// Unswitch a trivial switch if the condition is loop invariant. 410 /// 411 /// This routine should only be called when loop code leading to the switch has 412 /// been validated as trivial (no side effects). This routine checks if the 413 /// condition is invariant and that at least one of the successors is a loop 414 /// exit. This allows us to unswitch without duplicating the loop, making it 415 /// trivial. 416 /// 417 /// If this routine fails to unswitch the switch it returns false. 418 /// 419 /// If the switch can be unswitched, this routine splits the preheader and 420 /// copies the switch above that split. If the default case is one of the 421 /// exiting cases, it copies the non-exiting cases and points them at the new 422 /// preheader. If the default case is not exiting, it copies the exiting cases 423 /// and points the default at the preheader. It preserves loop simplified form 424 /// (splitting the exit blocks as necessary). It simplifies the switch within 425 /// the loop by removing now-dead cases. If the default case is one of those 426 /// unswitched, it replaces its destination with a new basic block containing 427 /// only unreachable. Such basic blocks, while technically loop exits, are not 428 /// considered for unswitching so this is a stable transform and the same 429 /// switch will not be revisited. If after unswitching there is only a single 430 /// in-loop successor, the switch is further simplified to an unconditional 431 /// branch. Still more cleanup can be done with some simplify-cfg like pass. 432 static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, 433 LoopInfo &LI) { 434 DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); 435 Value *LoopCond = SI.getCondition(); 436 437 // If this isn't switching on an invariant condition, we can't unswitch it. 438 if (!L.isLoopInvariant(LoopCond)) 439 return false; 440 441 auto *ParentBB = SI.getParent(); 442 443 // FIXME: We should compute this once at the start and update it! 444 SmallVector<BasicBlock *, 16> ExitBlocks; 445 L.getExitBlocks(ExitBlocks); 446 SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), 447 ExitBlocks.end()); 448 449 SmallVector<int, 4> ExitCaseIndices; 450 for (auto Case : SI.cases()) { 451 auto *SuccBB = Case.getCaseSuccessor(); 452 if (ExitBlockSet.count(SuccBB) && 453 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB)) 454 ExitCaseIndices.push_back(Case.getCaseIndex()); 455 } 456 BasicBlock *DefaultExitBB = nullptr; 457 if (ExitBlockSet.count(SI.getDefaultDest()) && 458 areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) && 459 !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) 460 DefaultExitBB = SI.getDefaultDest(); 461 else if (ExitCaseIndices.empty()) 462 return false; 463 464 DEBUG(dbgs() << " unswitching trivial cases...\n"); 465 466 SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; 467 ExitCases.reserve(ExitCaseIndices.size()); 468 // We walk the case indices backwards so that we remove the last case first 469 // and don't disrupt the earlier indices. 470 for (unsigned Index : reverse(ExitCaseIndices)) { 471 auto CaseI = SI.case_begin() + Index; 472 // Save the value of this case. 473 ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); 474 // Delete the unswitched cases. 475 SI.removeCase(CaseI); 476 } 477 478 // Check if after this all of the remaining cases point at the same 479 // successor. 480 BasicBlock *CommonSuccBB = nullptr; 481 if (SI.getNumCases() > 0 && 482 std::all_of(std::next(SI.case_begin()), SI.case_end(), 483 [&SI](const SwitchInst::CaseHandle &Case) { 484 return Case.getCaseSuccessor() == 485 SI.case_begin()->getCaseSuccessor(); 486 })) 487 CommonSuccBB = SI.case_begin()->getCaseSuccessor(); 488 489 if (DefaultExitBB) { 490 // We can't remove the default edge so replace it with an edge to either 491 // the single common remaining successor (if we have one) or an unreachable 492 // block. 493 if (CommonSuccBB) { 494 SI.setDefaultDest(CommonSuccBB); 495 } else { 496 BasicBlock *UnreachableBB = BasicBlock::Create( 497 ParentBB->getContext(), 498 Twine(ParentBB->getName()) + ".unreachable_default", 499 ParentBB->getParent()); 500 new UnreachableInst(ParentBB->getContext(), UnreachableBB); 501 SI.setDefaultDest(UnreachableBB); 502 DT.addNewBlock(UnreachableBB, ParentBB); 503 } 504 } else { 505 // If we're not unswitching the default, we need it to match any cases to 506 // have a common successor or if we have no cases it is the common 507 // successor. 508 if (SI.getNumCases() == 0) 509 CommonSuccBB = SI.getDefaultDest(); 510 else if (SI.getDefaultDest() != CommonSuccBB) 511 CommonSuccBB = nullptr; 512 } 513 514 // Split the preheader, so that we know that there is a safe place to insert 515 // the switch. 516 BasicBlock *OldPH = L.getLoopPreheader(); 517 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); 518 OldPH->getTerminator()->eraseFromParent(); 519 520 // Now add the unswitched switch. 521 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); 522 523 // Rewrite the IR for the unswitched basic blocks. This requires two steps. 524 // First, we split any exit blocks with remaining in-loop predecessors. Then 525 // we update the PHIs in one of two ways depending on if there was a split. 526 // We walk in reverse so that we split in the same order as the cases 527 // appeared. This is purely for convenience of reading the resulting IR, but 528 // it doesn't cost anything really. 529 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs; 530 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; 531 // Handle the default exit if necessary. 532 // FIXME: It'd be great if we could merge this with the loop below but LLVM's 533 // ranges aren't quite powerful enough yet. 534 if (DefaultExitBB) { 535 if (pred_empty(DefaultExitBB)) { 536 UnswitchedExitBBs.insert(DefaultExitBB); 537 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); 538 } else { 539 auto *SplitBB = 540 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); 541 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, 542 *ParentBB, *OldPH); 543 updateLoopExitIDom(DefaultExitBB, L, DT); 544 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; 545 } 546 } 547 // Note that we must use a reference in the for loop so that we update the 548 // container. 549 for (auto &CasePair : reverse(ExitCases)) { 550 // Grab a reference to the exit block in the pair so that we can update it. 551 BasicBlock *ExitBB = CasePair.second; 552 553 // If this case is the last edge into the exit block, we can simply reuse it 554 // as it will no longer be a loop exit. No mapping necessary. 555 if (pred_empty(ExitBB)) { 556 // Only rewrite once. 557 if (UnswitchedExitBBs.insert(ExitBB).second) 558 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); 559 continue; 560 } 561 562 // Otherwise we need to split the exit block so that we retain an exit 563 // block from the loop and a target for the unswitched condition. 564 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; 565 if (!SplitExitBB) { 566 // If this is the first time we see this, do the split and remember it. 567 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); 568 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, 569 *ParentBB, *OldPH); 570 updateLoopExitIDom(ExitBB, L, DT); 571 } 572 // Update the case pair to point to the split block. 573 CasePair.second = SplitExitBB; 574 } 575 576 // Now add the unswitched cases. We do this in reverse order as we built them 577 // in reverse order. 578 for (auto CasePair : reverse(ExitCases)) { 579 ConstantInt *CaseVal = CasePair.first; 580 BasicBlock *UnswitchedBB = CasePair.second; 581 582 NewSI->addCase(CaseVal, UnswitchedBB); 583 updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); 584 } 585 586 // If the default was unswitched, re-point it and add explicit cases for 587 // entering the loop. 588 if (DefaultExitBB) { 589 NewSI->setDefaultDest(DefaultExitBB); 590 updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); 591 592 // We removed all the exit cases, so we just copy the cases to the 593 // unswitched switch. 594 for (auto Case : SI.cases()) 595 NewSI->addCase(Case.getCaseValue(), NewPH); 596 } 597 598 // If we ended up with a common successor for every path through the switch 599 // after unswitching, rewrite it to an unconditional branch to make it easy 600 // to recognize. Otherwise we potentially have to recognize the default case 601 // pointing at unreachable and other complexity. 602 if (CommonSuccBB) { 603 BasicBlock *BB = SI.getParent(); 604 SI.eraseFromParent(); 605 BranchInst::Create(CommonSuccBB, BB); 606 } 607 608 DT.verifyDomTree(); 609 ++NumTrivial; 610 ++NumSwitches; 611 return true; 612 } 613 614 /// This routine scans the loop to find a branch or switch which occurs before 615 /// any side effects occur. These can potentially be unswitched without 616 /// duplicating the loop. If a branch or switch is successfully unswitched the 617 /// scanning continues to see if subsequent branches or switches have become 618 /// trivial. Once all trivial candidates have been unswitched, this routine 619 /// returns. 620 /// 621 /// The return value indicates whether anything was unswitched (and therefore 622 /// changed). 623 static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, 624 LoopInfo &LI) { 625 bool Changed = false; 626 627 // If loop header has only one reachable successor we should keep looking for 628 // trivial condition candidates in the successor as well. An alternative is 629 // to constant fold conditions and merge successors into loop header (then we 630 // only need to check header's terminator). The reason for not doing this in 631 // LoopUnswitch pass is that it could potentially break LoopPassManager's 632 // invariants. Folding dead branches could either eliminate the current loop 633 // or make other loops unreachable. LCSSA form might also not be preserved 634 // after deleting branches. The following code keeps traversing loop header's 635 // successors until it finds the trivial condition candidate (condition that 636 // is not a constant). Since unswitching generates branches with constant 637 // conditions, this scenario could be very common in practice. 638 BasicBlock *CurrentBB = L.getHeader(); 639 SmallPtrSet<BasicBlock *, 8> Visited; 640 Visited.insert(CurrentBB); 641 do { 642 // Check if there are any side-effecting instructions (e.g. stores, calls, 643 // volatile loads) in the part of the loop that the code *would* execute 644 // without unswitching. 645 if (llvm::any_of(*CurrentBB, 646 [](Instruction &I) { return I.mayHaveSideEffects(); })) 647 return Changed; 648 649 TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); 650 651 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 652 // Don't bother trying to unswitch past a switch with a constant 653 // condition. This should be removed prior to running this pass by 654 // simplify-cfg. 655 if (isa<Constant>(SI->getCondition())) 656 return Changed; 657 658 if (!unswitchTrivialSwitch(L, *SI, DT, LI)) 659 // Coludn't unswitch this one so we're done. 660 return Changed; 661 662 // Mark that we managed to unswitch something. 663 Changed = true; 664 665 // If unswitching turned the terminator into an unconditional branch then 666 // we can continue. The unswitching logic specifically works to fold any 667 // cases it can into an unconditional branch to make it easier to 668 // recognize here. 669 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); 670 if (!BI || BI->isConditional()) 671 return Changed; 672 673 CurrentBB = BI->getSuccessor(0); 674 continue; 675 } 676 677 auto *BI = dyn_cast<BranchInst>(CurrentTerm); 678 if (!BI) 679 // We do not understand other terminator instructions. 680 return Changed; 681 682 // Don't bother trying to unswitch past an unconditional branch or a branch 683 // with a constant value. These should be removed by simplify-cfg prior to 684 // running this pass. 685 if (!BI->isConditional() || isa<Constant>(BI->getCondition())) 686 return Changed; 687 688 // Found a trivial condition candidate: non-foldable conditional branch. If 689 // we fail to unswitch this, we can't do anything else that is trivial. 690 if (!unswitchTrivialBranch(L, *BI, DT, LI)) 691 return Changed; 692 693 // Mark that we managed to unswitch something. 694 Changed = true; 695 696 // We unswitched the branch. This should always leave us with an 697 // unconditional branch that we can follow now. 698 BI = cast<BranchInst>(CurrentBB->getTerminator()); 699 assert(!BI->isConditional() && 700 "Cannot form a conditional branch by unswitching1"); 701 CurrentBB = BI->getSuccessor(0); 702 703 // When continuing, if we exit the loop or reach a previous visited block, 704 // then we can not reach any trivial condition candidates (unfoldable 705 // branch instructions or switch instructions) and no unswitch can happen. 706 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); 707 708 return Changed; 709 } 710 711 /// Unswitch control flow predicated on loop invariant conditions. 712 /// 713 /// This first hoists all branches or switches which are trivial (IE, do not 714 /// require duplicating any part of the loop) out of the loop body. It then 715 /// looks at other loop invariant control flows and tries to unswitch those as 716 /// well by cloning the loop if the result is small enough. 717 static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, 718 AssumptionCache &AC) { 719 assert(L.isLCSSAForm(DT) && 720 "Loops must be in LCSSA form before unswitching."); 721 bool Changed = false; 722 723 // Must be in loop simplified form: we need a preheader and dedicated exits. 724 if (!L.isLoopSimplifyForm()) 725 return false; 726 727 // Try trivial unswitch first before loop over other basic blocks in the loop. 728 Changed |= unswitchAllTrivialConditions(L, DT, LI); 729 730 // FIXME: Add support for non-trivial unswitching by cloning the loop. 731 732 return Changed; 733 } 734 735 PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, 736 LoopStandardAnalysisResults &AR, 737 LPMUpdater &U) { 738 Function &F = *L.getHeader()->getParent(); 739 (void)F; 740 741 DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); 742 743 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) 744 return PreservedAnalyses::all(); 745 746 #ifndef NDEBUG 747 // Historically this pass has had issues with the dominator tree so verify it 748 // in asserts builds. 749 AR.DT.verifyDomTree(); 750 #endif 751 return getLoopPassPreservedAnalyses(); 752 } 753 754 namespace { 755 756 class SimpleLoopUnswitchLegacyPass : public LoopPass { 757 public: 758 static char ID; // Pass ID, replacement for typeid 759 760 explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { 761 initializeSimpleLoopUnswitchLegacyPassPass( 762 *PassRegistry::getPassRegistry()); 763 } 764 765 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 766 767 void getAnalysisUsage(AnalysisUsage &AU) const override { 768 AU.addRequired<AssumptionCacheTracker>(); 769 getLoopAnalysisUsage(AU); 770 } 771 }; 772 773 } // end anonymous namespace 774 775 bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { 776 if (skipLoop(L)) 777 return false; 778 779 Function &F = *L->getHeader()->getParent(); 780 781 DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); 782 783 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 784 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 785 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 786 787 bool Changed = unswitchLoop(*L, DT, LI, AC); 788 789 #ifndef NDEBUG 790 // Historically this pass has had issues with the dominator tree so verify it 791 // in asserts builds. 792 DT.verifyDomTree(); 793 #endif 794 return Changed; 795 } 796 797 char SimpleLoopUnswitchLegacyPass::ID = 0; 798 INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", 799 "Simple unswitch loops", false, false) 800 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 801 INITIALIZE_PASS_DEPENDENCY(LoopPass) 802 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 803 INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", 804 "Simple unswitch loops", false, false) 805 806 Pass *llvm::createSimpleLoopUnswitchLegacyPass() { 807 return new SimpleLoopUnswitchLegacyPass(); 808 } 809