1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/MapVector.h" 11 #include "llvm/ADT/PostOrderIterator.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 17 #include "llvm/Analysis/LoopInfo.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/IR/Argument.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CFG.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Use.h" 35 #include "llvm/IR/User.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/IR/ValueHandle.h" 38 #include "llvm/InitializePasses.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/Transforms/Utils/SSAUpdater.h" 49 #include <algorithm> 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 static const char *const FlowBlockName = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 using BBPredicates = DenseMap<BasicBlock *, Value *>; 90 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 91 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 92 93 /// Finds the nearest common dominator of a set of BasicBlocks. 94 /// 95 /// For every BB you add to the set, you can specify whether we "remember" the 96 /// block. When you get the common dominator, you can also ask whether it's one 97 /// of the blocks we remembered. 98 class NearestCommonDominator { 99 DominatorTree *DT; 100 BasicBlock *Result = nullptr; 101 bool ResultIsRemembered = false; 102 103 /// Add BB to the resulting dominator. 104 void addBlock(BasicBlock *BB, bool Remember) { 105 if (!Result) { 106 Result = BB; 107 ResultIsRemembered = Remember; 108 return; 109 } 110 111 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 112 if (NewResult != Result) 113 ResultIsRemembered = false; 114 if (NewResult == BB) 115 ResultIsRemembered |= Remember; 116 Result = NewResult; 117 } 118 119 public: 120 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 121 122 void addBlock(BasicBlock *BB) { 123 addBlock(BB, /* Remember = */ false); 124 } 125 126 void addAndRememberBlock(BasicBlock *BB) { 127 addBlock(BB, /* Remember = */ true); 128 } 129 130 /// Get the nearest common dominator of all the BBs added via addBlock() and 131 /// addAndRememberBlock(). 132 BasicBlock *result() { return Result; } 133 134 /// Is the BB returned by getResult() one of the blocks we added to the set 135 /// with addAndRememberBlock()? 136 bool resultIsRememberedBlock() { return ResultIsRemembered; } 137 }; 138 139 /// Transforms the control flow graph on one single entry/exit region 140 /// at a time. 141 /// 142 /// After the transform all "If"/"Then"/"Else" style control flow looks like 143 /// this: 144 /// 145 /// \verbatim 146 /// 1 147 /// || 148 /// | | 149 /// 2 | 150 /// | / 151 /// |/ 152 /// 3 153 /// || Where: 154 /// | | 1 = "If" block, calculates the condition 155 /// 4 | 2 = "Then" subregion, runs if the condition is true 156 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 157 /// |/ 4 = "Else" optional subregion, runs if the condition is false 158 /// 5 5 = "End" block, also rejoins the control flow 159 /// \endverbatim 160 /// 161 /// Control flow is expressed as a branch where the true exit goes into the 162 /// "Then"/"Else" region, while the false exit skips the region 163 /// The condition for the optional "Else" region is expressed as a PHI node. 164 /// The incoming values of the PHI node are true for the "If" edge and false 165 /// for the "Then" edge. 166 /// 167 /// Additionally to that even complicated loops look like this: 168 /// 169 /// \verbatim 170 /// 1 171 /// || 172 /// | | 173 /// 2 ^ Where: 174 /// | / 1 = "Entry" block 175 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 176 /// 3 3 = "Flow" block, with back edge to entry block 177 /// | 178 /// \endverbatim 179 /// 180 /// The back edge of the "Flow" block is always on the false side of the branch 181 /// while the true side continues the general flow. So the loop condition 182 /// consist of a network of PHI nodes where the true incoming values expresses 183 /// breaks and the false values expresses continue states. 184 class StructurizeCFG : public RegionPass { 185 bool SkipUniformRegions; 186 187 Type *Boolean; 188 ConstantInt *BoolTrue; 189 ConstantInt *BoolFalse; 190 UndefValue *BoolUndef; 191 192 Function *Func; 193 Region *ParentRegion; 194 195 LegacyDivergenceAnalysis *DA; 196 DominatorTree *DT; 197 LoopInfo *LI; 198 199 SmallVector<RegionNode *, 8> Order; 200 BBSet Visited; 201 202 SmallVector<WeakVH, 8> AffectedPhis; 203 BBPhiMap DeletedPhis; 204 BB2BBVecMap AddedPhis; 205 206 PredMap Predicates; 207 BranchVector Conditions; 208 209 BB2BBMap Loops; 210 PredMap LoopPreds; 211 BranchVector LoopConds; 212 213 RegionNode *PrevNode; 214 215 void orderNodes(); 216 217 Loop *getAdjustedLoop(RegionNode *RN); 218 unsigned getAdjustedLoopDepth(RegionNode *RN); 219 220 void analyzeLoops(RegionNode *N); 221 222 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 223 224 void gatherPredicates(RegionNode *N); 225 226 void collectInfos(); 227 228 void insertConditions(bool Loops); 229 230 void delPhiValues(BasicBlock *From, BasicBlock *To); 231 232 void addPhiValues(BasicBlock *From, BasicBlock *To); 233 234 void setPhiValues(); 235 236 void simplifyAffectedPhis(); 237 238 void killTerminator(BasicBlock *BB); 239 240 void changeExit(RegionNode *Node, BasicBlock *NewExit, 241 bool IncludeDominator); 242 243 BasicBlock *getNextFlow(BasicBlock *Dominator); 244 245 BasicBlock *needPrefix(bool NeedEmpty); 246 247 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 248 249 void setPrevNode(BasicBlock *BB); 250 251 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 252 253 bool isPredictableTrue(RegionNode *Node); 254 255 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 256 257 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 258 259 void createFlow(); 260 261 void rebuildSSA(); 262 263 public: 264 static char ID; 265 266 explicit StructurizeCFG(bool SkipUniformRegions_ = false) 267 : RegionPass(ID), 268 SkipUniformRegions(SkipUniformRegions_) { 269 if (ForceSkipUniformRegions.getNumOccurrences()) 270 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 271 initializeStructurizeCFGPass(*PassRegistry::getPassRegistry()); 272 } 273 274 bool doInitialization(Region *R, RGPassManager &RGM) override; 275 276 bool runOnRegion(Region *R, RGPassManager &RGM) override; 277 278 StringRef getPassName() const override { return "Structurize control flow"; } 279 280 void getAnalysisUsage(AnalysisUsage &AU) const override { 281 if (SkipUniformRegions) 282 AU.addRequired<LegacyDivergenceAnalysis>(); 283 AU.addRequiredID(LowerSwitchID); 284 AU.addRequired<DominatorTreeWrapperPass>(); 285 AU.addRequired<LoopInfoWrapperPass>(); 286 287 AU.addPreserved<DominatorTreeWrapperPass>(); 288 RegionPass::getAnalysisUsage(AU); 289 } 290 }; 291 292 } // end anonymous namespace 293 294 char StructurizeCFG::ID = 0; 295 296 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG", 297 false, false) 298 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 299 INITIALIZE_PASS_DEPENDENCY(LowerSwitch) 300 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 301 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 302 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG", 303 false, false) 304 305 /// Initialize the types and constants used in the pass 306 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { 307 LLVMContext &Context = R->getEntry()->getContext(); 308 309 Boolean = Type::getInt1Ty(Context); 310 BoolTrue = ConstantInt::getTrue(Context); 311 BoolFalse = ConstantInt::getFalse(Context); 312 BoolUndef = UndefValue::get(Boolean); 313 314 return false; 315 } 316 317 /// Use the exit block to determine the loop if RN is a SubRegion. 318 Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) { 319 if (RN->isSubRegion()) { 320 Region *SubRegion = RN->getNodeAs<Region>(); 321 return LI->getLoopFor(SubRegion->getExit()); 322 } 323 324 return LI->getLoopFor(RN->getEntry()); 325 } 326 327 /// Use the exit block to determine the loop depth if RN is a SubRegion. 328 unsigned StructurizeCFG::getAdjustedLoopDepth(RegionNode *RN) { 329 if (RN->isSubRegion()) { 330 Region *SubR = RN->getNodeAs<Region>(); 331 return LI->getLoopDepth(SubR->getExit()); 332 } 333 334 return LI->getLoopDepth(RN->getEntry()); 335 } 336 337 /// Build up the general order of nodes 338 void StructurizeCFG::orderNodes() { 339 ReversePostOrderTraversal<Region*> RPOT(ParentRegion); 340 SmallDenseMap<Loop*, unsigned, 8> LoopBlocks; 341 342 // The reverse post-order traversal of the list gives us an ordering close 343 // to what we want. The only problem with it is that sometimes backedges 344 // for outer loops will be visited before backedges for inner loops. 345 for (RegionNode *RN : RPOT) { 346 Loop *Loop = getAdjustedLoop(RN); 347 ++LoopBlocks[Loop]; 348 } 349 350 unsigned CurrentLoopDepth = 0; 351 Loop *CurrentLoop = nullptr; 352 for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { 353 RegionNode *RN = cast<RegionNode>(*I); 354 unsigned LoopDepth = getAdjustedLoopDepth(RN); 355 356 if (is_contained(Order, *I)) 357 continue; 358 359 if (LoopDepth < CurrentLoopDepth) { 360 // Make sure we have visited all blocks in this loop before moving back to 361 // the outer loop. 362 363 auto LoopI = I; 364 while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { 365 LoopI++; 366 if (getAdjustedLoop(cast<RegionNode>(*LoopI)) == CurrentLoop) { 367 --BlockCount; 368 Order.push_back(*LoopI); 369 } 370 } 371 } 372 373 CurrentLoop = getAdjustedLoop(RN); 374 if (CurrentLoop) 375 LoopBlocks[CurrentLoop]--; 376 377 CurrentLoopDepth = LoopDepth; 378 Order.push_back(*I); 379 } 380 381 // This pass originally used a post-order traversal and then operated on 382 // the list in reverse. Now that we are using a reverse post-order traversal 383 // rather than re-working the whole pass to operate on the list in order, 384 // we just reverse the list and continue to operate on it in reverse. 385 std::reverse(Order.begin(), Order.end()); 386 } 387 388 /// Determine the end of the loops 389 void StructurizeCFG::analyzeLoops(RegionNode *N) { 390 if (N->isSubRegion()) { 391 // Test for exit as back edge 392 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 393 if (Visited.count(Exit)) 394 Loops[Exit] = N->getEntry(); 395 396 } else { 397 // Test for successors as back edge 398 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 399 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 400 401 for (BasicBlock *Succ : Term->successors()) 402 if (Visited.count(Succ)) 403 Loops[Succ] = BB; 404 } 405 } 406 407 /// Build the condition for one edge 408 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 409 bool Invert) { 410 Value *Cond = Invert ? BoolFalse : BoolTrue; 411 if (Term->isConditional()) { 412 Cond = Term->getCondition(); 413 414 if (Idx != (unsigned)Invert) 415 Cond = invertCondition(Cond); 416 } 417 return Cond; 418 } 419 420 /// Analyze the predecessors of each block and build up predicates 421 void StructurizeCFG::gatherPredicates(RegionNode *N) { 422 RegionInfo *RI = ParentRegion->getRegionInfo(); 423 BasicBlock *BB = N->getEntry(); 424 BBPredicates &Pred = Predicates[BB]; 425 BBPredicates &LPred = LoopPreds[BB]; 426 427 for (BasicBlock *P : predecessors(BB)) { 428 // Ignore it if it's a branch from outside into our region entry 429 if (!ParentRegion->contains(P)) 430 continue; 431 432 Region *R = RI->getRegionFor(P); 433 if (R == ParentRegion) { 434 // It's a top level block in our region 435 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 436 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 437 BasicBlock *Succ = Term->getSuccessor(i); 438 if (Succ != BB) 439 continue; 440 441 if (Visited.count(P)) { 442 // Normal forward edge 443 if (Term->isConditional()) { 444 // Try to treat it like an ELSE block 445 BasicBlock *Other = Term->getSuccessor(!i); 446 if (Visited.count(Other) && !Loops.count(Other) && 447 !Pred.count(Other) && !Pred.count(P)) { 448 449 Pred[Other] = BoolFalse; 450 Pred[P] = BoolTrue; 451 continue; 452 } 453 } 454 Pred[P] = buildCondition(Term, i, false); 455 } else { 456 // Back edge 457 LPred[P] = buildCondition(Term, i, true); 458 } 459 } 460 } else { 461 // It's an exit from a sub region 462 while (R->getParent() != ParentRegion) 463 R = R->getParent(); 464 465 // Edge from inside a subregion to its entry, ignore it 466 if (*R == *N) 467 continue; 468 469 BasicBlock *Entry = R->getEntry(); 470 if (Visited.count(Entry)) 471 Pred[Entry] = BoolTrue; 472 else 473 LPred[Entry] = BoolFalse; 474 } 475 } 476 } 477 478 /// Collect various loop and predicate infos 479 void StructurizeCFG::collectInfos() { 480 // Reset predicate 481 Predicates.clear(); 482 483 // and loop infos 484 Loops.clear(); 485 LoopPreds.clear(); 486 487 // Reset the visited nodes 488 Visited.clear(); 489 490 for (RegionNode *RN : reverse(Order)) { 491 LLVM_DEBUG(dbgs() << "Visiting: " 492 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 493 << RN->getEntry()->getName() << " Loop Depth: " 494 << LI->getLoopDepth(RN->getEntry()) << "\n"); 495 496 // Analyze all the conditions leading to a node 497 gatherPredicates(RN); 498 499 // Remember that we've seen this node 500 Visited.insert(RN->getEntry()); 501 502 // Find the last back edges 503 analyzeLoops(RN); 504 } 505 } 506 507 /// Insert the missing branch conditions 508 void StructurizeCFG::insertConditions(bool Loops) { 509 BranchVector &Conds = Loops ? LoopConds : Conditions; 510 Value *Default = Loops ? BoolTrue : BoolFalse; 511 SSAUpdater PhiInserter; 512 513 for (BranchInst *Term : Conds) { 514 assert(Term->isConditional()); 515 516 BasicBlock *Parent = Term->getParent(); 517 BasicBlock *SuccTrue = Term->getSuccessor(0); 518 BasicBlock *SuccFalse = Term->getSuccessor(1); 519 520 PhiInserter.Initialize(Boolean, ""); 521 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 522 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 523 524 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 525 526 NearestCommonDominator Dominator(DT); 527 Dominator.addBlock(Parent); 528 529 Value *ParentValue = nullptr; 530 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 531 BasicBlock *BB = BBAndPred.first; 532 Value *Pred = BBAndPred.second; 533 534 if (BB == Parent) { 535 ParentValue = Pred; 536 break; 537 } 538 PhiInserter.AddAvailableValue(BB, Pred); 539 Dominator.addAndRememberBlock(BB); 540 } 541 542 if (ParentValue) { 543 Term->setCondition(ParentValue); 544 } else { 545 if (!Dominator.resultIsRememberedBlock()) 546 PhiInserter.AddAvailableValue(Dominator.result(), Default); 547 548 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 549 } 550 } 551 } 552 553 /// Remove all PHI values coming from "From" into "To" and remember 554 /// them in DeletedPhis 555 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 556 PhiMap &Map = DeletedPhis[To]; 557 for (PHINode &Phi : To->phis()) { 558 bool Recorded = false; 559 while (Phi.getBasicBlockIndex(From) != -1) { 560 Value *Deleted = Phi.removeIncomingValue(From, false); 561 Map[&Phi].push_back(std::make_pair(From, Deleted)); 562 if (!Recorded) { 563 AffectedPhis.push_back(&Phi); 564 Recorded = true; 565 } 566 } 567 } 568 } 569 570 /// Add a dummy PHI value as soon as we knew the new predecessor 571 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 572 for (PHINode &Phi : To->phis()) { 573 Value *Undef = UndefValue::get(Phi.getType()); 574 Phi.addIncoming(Undef, From); 575 } 576 AddedPhis[To].push_back(From); 577 } 578 579 /// Add the real PHI value as soon as everything is set up 580 void StructurizeCFG::setPhiValues() { 581 SmallVector<PHINode *, 8> InsertedPhis; 582 SSAUpdater Updater(&InsertedPhis); 583 for (const auto &AddedPhi : AddedPhis) { 584 BasicBlock *To = AddedPhi.first; 585 const BBVector &From = AddedPhi.second; 586 587 if (!DeletedPhis.count(To)) 588 continue; 589 590 PhiMap &Map = DeletedPhis[To]; 591 for (const auto &PI : Map) { 592 PHINode *Phi = PI.first; 593 Value *Undef = UndefValue::get(Phi->getType()); 594 Updater.Initialize(Phi->getType(), ""); 595 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 596 Updater.AddAvailableValue(To, Undef); 597 598 NearestCommonDominator Dominator(DT); 599 Dominator.addBlock(To); 600 for (const auto &VI : PI.second) { 601 Updater.AddAvailableValue(VI.first, VI.second); 602 Dominator.addAndRememberBlock(VI.first); 603 } 604 605 if (!Dominator.resultIsRememberedBlock()) 606 Updater.AddAvailableValue(Dominator.result(), Undef); 607 608 for (BasicBlock *FI : From) 609 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 610 AffectedPhis.push_back(Phi); 611 } 612 613 DeletedPhis.erase(To); 614 } 615 assert(DeletedPhis.empty()); 616 617 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 618 } 619 620 void StructurizeCFG::simplifyAffectedPhis() { 621 bool Changed; 622 do { 623 Changed = false; 624 SimplifyQuery Q(Func->getParent()->getDataLayout()); 625 Q.DT = DT; 626 for (WeakVH VH : AffectedPhis) { 627 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 628 if (auto NewValue = SimplifyInstruction(Phi, Q)) { 629 Phi->replaceAllUsesWith(NewValue); 630 Phi->eraseFromParent(); 631 Changed = true; 632 } 633 } 634 } 635 } while (Changed); 636 } 637 638 /// Remove phi values from all successors and then remove the terminator. 639 void StructurizeCFG::killTerminator(BasicBlock *BB) { 640 Instruction *Term = BB->getTerminator(); 641 if (!Term) 642 return; 643 644 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); 645 SI != SE; ++SI) 646 delPhiValues(BB, *SI); 647 648 if (DA) 649 DA->removeValue(Term); 650 Term->eraseFromParent(); 651 } 652 653 /// Let node exit(s) point to NewExit 654 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 655 bool IncludeDominator) { 656 if (Node->isSubRegion()) { 657 Region *SubRegion = Node->getNodeAs<Region>(); 658 BasicBlock *OldExit = SubRegion->getExit(); 659 BasicBlock *Dominator = nullptr; 660 661 // Find all the edges from the sub region to the exit 662 for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) { 663 // Incrememt BBI before mucking with BB's terminator. 664 BasicBlock *BB = *BBI++; 665 666 if (!SubRegion->contains(BB)) 667 continue; 668 669 // Modify the edges to point to the new exit 670 delPhiValues(BB, OldExit); 671 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 672 addPhiValues(BB, NewExit); 673 674 // Find the new dominator (if requested) 675 if (IncludeDominator) { 676 if (!Dominator) 677 Dominator = BB; 678 else 679 Dominator = DT->findNearestCommonDominator(Dominator, BB); 680 } 681 } 682 683 // Change the dominator (if requested) 684 if (Dominator) 685 DT->changeImmediateDominator(NewExit, Dominator); 686 687 // Update the region info 688 SubRegion->replaceExit(NewExit); 689 } else { 690 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 691 killTerminator(BB); 692 BranchInst::Create(NewExit, BB); 693 addPhiValues(BB, NewExit); 694 if (IncludeDominator) 695 DT->changeImmediateDominator(NewExit, BB); 696 } 697 } 698 699 /// Create a new flow node and update dominator tree and region info 700 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 701 LLVMContext &Context = Func->getContext(); 702 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 703 Order.back()->getEntry(); 704 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 705 Func, Insert); 706 DT->addNewBlock(Flow, Dominator); 707 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 708 return Flow; 709 } 710 711 /// Create a new or reuse the previous node as flow node 712 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 713 BasicBlock *Entry = PrevNode->getEntry(); 714 715 if (!PrevNode->isSubRegion()) { 716 killTerminator(Entry); 717 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 718 return Entry; 719 } 720 721 // create a new flow node 722 BasicBlock *Flow = getNextFlow(Entry); 723 724 // and wire it up 725 changeExit(PrevNode, Flow, true); 726 PrevNode = ParentRegion->getBBNode(Flow); 727 return Flow; 728 } 729 730 /// Returns the region exit if possible, otherwise just a new flow node 731 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 732 bool ExitUseAllowed) { 733 if (!Order.empty() || !ExitUseAllowed) 734 return getNextFlow(Flow); 735 736 BasicBlock *Exit = ParentRegion->getExit(); 737 DT->changeImmediateDominator(Exit, Flow); 738 addPhiValues(Flow, Exit); 739 return Exit; 740 } 741 742 /// Set the previous node 743 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 744 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 745 : nullptr; 746 } 747 748 /// Does BB dominate all the predicates of Node? 749 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 750 BBPredicates &Preds = Predicates[Node->getEntry()]; 751 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 752 return DT->dominates(BB, Pred.first); 753 }); 754 } 755 756 /// Can we predict that this node will always be called? 757 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 758 BBPredicates &Preds = Predicates[Node->getEntry()]; 759 bool Dominated = false; 760 761 // Regionentry is always true 762 if (!PrevNode) 763 return true; 764 765 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 766 BasicBlock *BB = Pred.first; 767 Value *V = Pred.second; 768 769 if (V != BoolTrue) 770 return false; 771 772 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 773 Dominated = true; 774 } 775 776 // TODO: The dominator check is too strict 777 return Dominated; 778 } 779 780 /// Take one node from the order vector and wire it up 781 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 782 BasicBlock *LoopEnd) { 783 RegionNode *Node = Order.pop_back_val(); 784 Visited.insert(Node->getEntry()); 785 786 if (isPredictableTrue(Node)) { 787 // Just a linear flow 788 if (PrevNode) { 789 changeExit(PrevNode, Node->getEntry(), true); 790 } 791 PrevNode = Node; 792 } else { 793 // Insert extra prefix node (or reuse last one) 794 BasicBlock *Flow = needPrefix(false); 795 796 // Insert extra postfix node (or use exit instead) 797 BasicBlock *Entry = Node->getEntry(); 798 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 799 800 // let it point to entry and next block 801 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 802 addPhiValues(Flow, Entry); 803 DT->changeImmediateDominator(Entry, Flow); 804 805 PrevNode = Node; 806 while (!Order.empty() && !Visited.count(LoopEnd) && 807 dominatesPredicates(Entry, Order.back())) { 808 handleLoops(false, LoopEnd); 809 } 810 811 changeExit(PrevNode, Next, false); 812 setPrevNode(Next); 813 } 814 } 815 816 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 817 BasicBlock *LoopEnd) { 818 RegionNode *Node = Order.back(); 819 BasicBlock *LoopStart = Node->getEntry(); 820 821 if (!Loops.count(LoopStart)) { 822 wireFlow(ExitUseAllowed, LoopEnd); 823 return; 824 } 825 826 if (!isPredictableTrue(Node)) 827 LoopStart = needPrefix(true); 828 829 LoopEnd = Loops[Node->getEntry()]; 830 wireFlow(false, LoopEnd); 831 while (!Visited.count(LoopEnd)) { 832 handleLoops(false, LoopEnd); 833 } 834 835 // If the start of the loop is the entry block, we can't branch to it so 836 // insert a new dummy entry block. 837 Function *LoopFunc = LoopStart->getParent(); 838 if (LoopStart == &LoopFunc->getEntryBlock()) { 839 LoopStart->setName("entry.orig"); 840 841 BasicBlock *NewEntry = 842 BasicBlock::Create(LoopStart->getContext(), 843 "entry", 844 LoopFunc, 845 LoopStart); 846 BranchInst::Create(LoopStart, NewEntry); 847 DT->setNewRoot(NewEntry); 848 } 849 850 // Create an extra loop end node 851 LoopEnd = needPrefix(false); 852 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 853 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 854 BoolUndef, LoopEnd)); 855 addPhiValues(LoopEnd, LoopStart); 856 setPrevNode(Next); 857 } 858 859 /// After this function control flow looks like it should be, but 860 /// branches and PHI nodes only have undefined conditions. 861 void StructurizeCFG::createFlow() { 862 BasicBlock *Exit = ParentRegion->getExit(); 863 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 864 865 AffectedPhis.clear(); 866 DeletedPhis.clear(); 867 AddedPhis.clear(); 868 Conditions.clear(); 869 LoopConds.clear(); 870 871 PrevNode = nullptr; 872 Visited.clear(); 873 874 while (!Order.empty()) { 875 handleLoops(EntryDominatesExit, nullptr); 876 } 877 878 if (PrevNode) 879 changeExit(PrevNode, Exit, EntryDominatesExit); 880 else 881 assert(EntryDominatesExit); 882 } 883 884 /// Handle a rare case where the disintegrated nodes instructions 885 /// no longer dominate all their uses. Not sure if this is really necessary 886 void StructurizeCFG::rebuildSSA() { 887 SSAUpdater Updater; 888 for (BasicBlock *BB : ParentRegion->blocks()) 889 for (Instruction &I : *BB) { 890 bool Initialized = false; 891 // We may modify the use list as we iterate over it, so be careful to 892 // compute the next element in the use list at the top of the loop. 893 for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) { 894 Use &U = *UI++; 895 Instruction *User = cast<Instruction>(U.getUser()); 896 if (User->getParent() == BB) { 897 continue; 898 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 899 if (UserPN->getIncomingBlock(U) == BB) 900 continue; 901 } 902 903 if (DT->dominates(&I, User)) 904 continue; 905 906 if (!Initialized) { 907 Value *Undef = UndefValue::get(I.getType()); 908 Updater.Initialize(I.getType(), ""); 909 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 910 Updater.AddAvailableValue(BB, &I); 911 Initialized = true; 912 } 913 Updater.RewriteUseAfterInsertions(U); 914 } 915 } 916 } 917 918 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 919 const LegacyDivergenceAnalysis &DA) { 920 // Bool for if all sub-regions are uniform. 921 bool SubRegionsAreUniform = true; 922 // Count of how many direct children are conditional. 923 unsigned ConditionalDirectChildren = 0; 924 925 for (auto E : R->elements()) { 926 if (!E->isSubRegion()) { 927 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 928 if (!Br || !Br->isConditional()) 929 continue; 930 931 if (!DA.isUniform(Br)) 932 return false; 933 934 // One of our direct children is conditional. 935 ConditionalDirectChildren++; 936 937 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 938 << " has uniform terminator\n"); 939 } else { 940 // Explicitly refuse to treat regions as uniform if they have non-uniform 941 // subregions. We cannot rely on DivergenceAnalysis for branches in 942 // subregions because those branches may have been removed and re-created, 943 // so we look for our metadata instead. 944 // 945 // Warning: It would be nice to treat regions as uniform based only on 946 // their direct child basic blocks' terminators, regardless of whether 947 // subregions are uniform or not. However, this requires a very careful 948 // look at SIAnnotateControlFlow to make sure nothing breaks there. 949 for (auto BB : E->getNodeAs<Region>()->blocks()) { 950 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 951 if (!Br || !Br->isConditional()) 952 continue; 953 954 if (!Br->getMetadata(UniformMDKindID)) { 955 // Early exit if we cannot have relaxed uniform regions. 956 if (!RelaxedUniformRegions) 957 return false; 958 959 SubRegionsAreUniform = false; 960 break; 961 } 962 } 963 } 964 } 965 966 // Our region is uniform if: 967 // 1. All conditional branches that are direct children are uniform (checked 968 // above). 969 // 2. And either: 970 // a. All sub-regions are uniform. 971 // b. There is one or less conditional branches among the direct children. 972 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 973 } 974 975 /// Run the transformation for each region found 976 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { 977 if (R->isTopLevelRegion()) 978 return false; 979 980 DA = nullptr; 981 982 if (SkipUniformRegions) { 983 // TODO: We could probably be smarter here with how we handle sub-regions. 984 // We currently rely on the fact that metadata is set by earlier invocations 985 // of the pass on sub-regions, and that this metadata doesn't get lost -- 986 // but we shouldn't rely on metadata for correctness! 987 unsigned UniformMDKindID = 988 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 989 DA = &getAnalysis<LegacyDivergenceAnalysis>(); 990 991 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 992 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 993 << '\n'); 994 995 // Mark all direct child block terminators as having been treated as 996 // uniform. To account for a possible future in which non-uniform 997 // sub-regions are treated more cleverly, indirect children are not 998 // marked as uniform. 999 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1000 for (RegionNode *E : R->elements()) { 1001 if (E->isSubRegion()) 1002 continue; 1003 1004 if (Instruction *Term = E->getEntry()->getTerminator()) 1005 Term->setMetadata(UniformMDKindID, MD); 1006 } 1007 1008 return false; 1009 } 1010 } 1011 1012 Func = R->getEntry()->getParent(); 1013 ParentRegion = R; 1014 1015 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1016 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1017 1018 orderNodes(); 1019 collectInfos(); 1020 createFlow(); 1021 insertConditions(false); 1022 insertConditions(true); 1023 setPhiValues(); 1024 simplifyAffectedPhis(); 1025 rebuildSSA(); 1026 1027 // Cleanup 1028 Order.clear(); 1029 Visited.clear(); 1030 DeletedPhis.clear(); 1031 AddedPhis.clear(); 1032 Predicates.clear(); 1033 Conditions.clear(); 1034 Loops.clear(); 1035 LoopPreds.clear(); 1036 LoopConds.clear(); 1037 1038 return true; 1039 } 1040 1041 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1042 return new StructurizeCFG(SkipUniformRegions); 1043 } 1044