1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/MapVector.h" 11 #include "llvm/ADT/SCCIterator.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 17 #include "llvm/Analysis/RegionInfo.h" 18 #include "llvm/Analysis/RegionIterator.h" 19 #include "llvm/Analysis/RegionPass.h" 20 #include "llvm/IR/Argument.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/CFG.h" 23 #include "llvm/IR/Constant.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/InstrTypes.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Metadata.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Use.h" 35 #include "llvm/IR/User.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/IR/ValueHandle.h" 38 #include "llvm/InitializePasses.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/Transforms/Utils/SSAUpdater.h" 49 #include <algorithm> 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 static const char *const FlowBlockName = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 using BBPredicates = DenseMap<BasicBlock *, Value *>; 90 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 91 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 92 93 // A traits type that is intended to be used in graph algorithms. The graph 94 // traits starts at an entry node, and traverses the RegionNodes that are in 95 // the Nodes set. 96 struct SubGraphTraits { 97 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 98 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 99 100 // This wraps a set of Nodes into the iterator, so we know which edges to 101 // filter out. 102 class WrappedSuccIterator 103 : public iterator_adaptor_base< 104 WrappedSuccIterator, BaseSuccIterator, 105 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 106 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 107 SmallDenseSet<RegionNode *> *Nodes; 108 109 public: 110 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 111 : iterator_adaptor_base(It), Nodes(Nodes) {} 112 113 NodeRef operator*() const { return {*I, Nodes}; } 114 }; 115 116 static bool filterAll(const NodeRef &N) { return true; } 117 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 118 119 using ChildIteratorType = 120 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 121 122 static NodeRef getEntryNode(Region *R) { 123 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 124 } 125 126 static NodeRef getEntryNode(NodeRef N) { return N; } 127 128 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 129 auto *filter = N.second ? &filterSet : &filterAll; 130 return make_filter_range( 131 make_range<WrappedSuccIterator>( 132 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 133 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 134 filter); 135 } 136 137 static ChildIteratorType child_begin(const NodeRef &N) { 138 return children(N).begin(); 139 } 140 141 static ChildIteratorType child_end(const NodeRef &N) { 142 return children(N).end(); 143 } 144 }; 145 146 /// Finds the nearest common dominator of a set of BasicBlocks. 147 /// 148 /// For every BB you add to the set, you can specify whether we "remember" the 149 /// block. When you get the common dominator, you can also ask whether it's one 150 /// of the blocks we remembered. 151 class NearestCommonDominator { 152 DominatorTree *DT; 153 BasicBlock *Result = nullptr; 154 bool ResultIsRemembered = false; 155 156 /// Add BB to the resulting dominator. 157 void addBlock(BasicBlock *BB, bool Remember) { 158 if (!Result) { 159 Result = BB; 160 ResultIsRemembered = Remember; 161 return; 162 } 163 164 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 165 if (NewResult != Result) 166 ResultIsRemembered = false; 167 if (NewResult == BB) 168 ResultIsRemembered |= Remember; 169 Result = NewResult; 170 } 171 172 public: 173 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 174 175 void addBlock(BasicBlock *BB) { 176 addBlock(BB, /* Remember = */ false); 177 } 178 179 void addAndRememberBlock(BasicBlock *BB) { 180 addBlock(BB, /* Remember = */ true); 181 } 182 183 /// Get the nearest common dominator of all the BBs added via addBlock() and 184 /// addAndRememberBlock(). 185 BasicBlock *result() { return Result; } 186 187 /// Is the BB returned by getResult() one of the blocks we added to the set 188 /// with addAndRememberBlock()? 189 bool resultIsRememberedBlock() { return ResultIsRemembered; } 190 }; 191 192 /// Transforms the control flow graph on one single entry/exit region 193 /// at a time. 194 /// 195 /// After the transform all "If"/"Then"/"Else" style control flow looks like 196 /// this: 197 /// 198 /// \verbatim 199 /// 1 200 /// || 201 /// | | 202 /// 2 | 203 /// | / 204 /// |/ 205 /// 3 206 /// || Where: 207 /// | | 1 = "If" block, calculates the condition 208 /// 4 | 2 = "Then" subregion, runs if the condition is true 209 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 210 /// |/ 4 = "Else" optional subregion, runs if the condition is false 211 /// 5 5 = "End" block, also rejoins the control flow 212 /// \endverbatim 213 /// 214 /// Control flow is expressed as a branch where the true exit goes into the 215 /// "Then"/"Else" region, while the false exit skips the region 216 /// The condition for the optional "Else" region is expressed as a PHI node. 217 /// The incoming values of the PHI node are true for the "If" edge and false 218 /// for the "Then" edge. 219 /// 220 /// Additionally to that even complicated loops look like this: 221 /// 222 /// \verbatim 223 /// 1 224 /// || 225 /// | | 226 /// 2 ^ Where: 227 /// | / 1 = "Entry" block 228 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 229 /// 3 3 = "Flow" block, with back edge to entry block 230 /// | 231 /// \endverbatim 232 /// 233 /// The back edge of the "Flow" block is always on the false side of the branch 234 /// while the true side continues the general flow. So the loop condition 235 /// consist of a network of PHI nodes where the true incoming values expresses 236 /// breaks and the false values expresses continue states. 237 class StructurizeCFG : public RegionPass { 238 bool SkipUniformRegions; 239 240 Type *Boolean; 241 ConstantInt *BoolTrue; 242 ConstantInt *BoolFalse; 243 UndefValue *BoolUndef; 244 245 Function *Func; 246 Region *ParentRegion; 247 248 LegacyDivergenceAnalysis *DA; 249 DominatorTree *DT; 250 251 SmallVector<RegionNode *, 8> Order; 252 BBSet Visited; 253 254 SmallVector<WeakVH, 8> AffectedPhis; 255 BBPhiMap DeletedPhis; 256 BB2BBVecMap AddedPhis; 257 258 PredMap Predicates; 259 BranchVector Conditions; 260 261 BB2BBMap Loops; 262 PredMap LoopPreds; 263 BranchVector LoopConds; 264 265 RegionNode *PrevNode; 266 267 void orderNodes(); 268 269 void analyzeLoops(RegionNode *N); 270 271 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 272 273 void gatherPredicates(RegionNode *N); 274 275 void collectInfos(); 276 277 void insertConditions(bool Loops); 278 279 void delPhiValues(BasicBlock *From, BasicBlock *To); 280 281 void addPhiValues(BasicBlock *From, BasicBlock *To); 282 283 void setPhiValues(); 284 285 void simplifyAffectedPhis(); 286 287 void killTerminator(BasicBlock *BB); 288 289 void changeExit(RegionNode *Node, BasicBlock *NewExit, 290 bool IncludeDominator); 291 292 BasicBlock *getNextFlow(BasicBlock *Dominator); 293 294 BasicBlock *needPrefix(bool NeedEmpty); 295 296 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 297 298 void setPrevNode(BasicBlock *BB); 299 300 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 301 302 bool isPredictableTrue(RegionNode *Node); 303 304 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 305 306 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 307 308 void createFlow(); 309 310 void rebuildSSA(); 311 312 public: 313 static char ID; 314 315 explicit StructurizeCFG(bool SkipUniformRegions_ = false) 316 : RegionPass(ID), 317 SkipUniformRegions(SkipUniformRegions_) { 318 if (ForceSkipUniformRegions.getNumOccurrences()) 319 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 320 initializeStructurizeCFGPass(*PassRegistry::getPassRegistry()); 321 } 322 323 bool doInitialization(Region *R, RGPassManager &RGM) override; 324 325 bool runOnRegion(Region *R, RGPassManager &RGM) override; 326 327 StringRef getPassName() const override { return "Structurize control flow"; } 328 329 void getAnalysisUsage(AnalysisUsage &AU) const override { 330 if (SkipUniformRegions) 331 AU.addRequired<LegacyDivergenceAnalysis>(); 332 AU.addRequiredID(LowerSwitchID); 333 AU.addRequired<DominatorTreeWrapperPass>(); 334 335 AU.addPreserved<DominatorTreeWrapperPass>(); 336 RegionPass::getAnalysisUsage(AU); 337 } 338 }; 339 340 } // end anonymous namespace 341 342 char StructurizeCFG::ID = 0; 343 344 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG", 345 false, false) 346 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 347 INITIALIZE_PASS_DEPENDENCY(LowerSwitch) 348 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 349 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 350 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG", 351 false, false) 352 353 /// Initialize the types and constants used in the pass 354 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { 355 LLVMContext &Context = R->getEntry()->getContext(); 356 357 Boolean = Type::getInt1Ty(Context); 358 BoolTrue = ConstantInt::getTrue(Context); 359 BoolFalse = ConstantInt::getFalse(Context); 360 BoolUndef = UndefValue::get(Boolean); 361 362 return false; 363 } 364 365 /// Build up the general order of nodes, by performing a topological sort of the 366 /// parent region's nodes, while ensuring that there is no outer cycle node 367 /// between any two inner cycle nodes. 368 void StructurizeCFG::orderNodes() { 369 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 370 GraphTraits<Region *>::nodes_end(ParentRegion))); 371 if (Order.empty()) 372 return; 373 374 SmallDenseSet<RegionNode *> Nodes; 375 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 376 377 // A list of range indices of SCCs in Order, to be processed. 378 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 379 unsigned I = 0, E = Order.size(); 380 while (true) { 381 // Run through all the SCCs in the subgraph starting with Entry. 382 for (auto SCCI = 383 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 384 EntryNode); 385 !SCCI.isAtEnd(); ++SCCI) { 386 auto &SCC = *SCCI; 387 388 // An SCC up to the size of 2, can be reduced to an entry (the last node), 389 // and a possible additional node. Therefore, it is already in order, and 390 // there is no need to add it to the work-list. 391 unsigned Size = SCC.size(); 392 if (Size > 2) 393 WorkList.emplace_back(I, I + Size); 394 395 // Add the SCC nodes to the Order array. 396 for (auto &N : SCC) { 397 assert(I < E && "SCC size mismatch!"); 398 Order[I++] = N.first; 399 } 400 } 401 assert(I == E && "SCC size mismatch!"); 402 403 // If there are no more SCCs to order, then we are done. 404 if (WorkList.empty()) 405 break; 406 407 std::tie(I, E) = WorkList.pop_back_val(); 408 409 // Collect the set of nodes in the SCC's subgraph. These are only the 410 // possible child nodes; we do not add the entry (last node) otherwise we 411 // will have the same exact SCC all over again. 412 Nodes.clear(); 413 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 414 415 // Update the entry node. 416 EntryNode.first = Order[E - 1]; 417 EntryNode.second = &Nodes; 418 } 419 } 420 421 /// Determine the end of the loops 422 void StructurizeCFG::analyzeLoops(RegionNode *N) { 423 if (N->isSubRegion()) { 424 // Test for exit as back edge 425 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 426 if (Visited.count(Exit)) 427 Loops[Exit] = N->getEntry(); 428 429 } else { 430 // Test for successors as back edge 431 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 432 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 433 434 for (BasicBlock *Succ : Term->successors()) 435 if (Visited.count(Succ)) 436 Loops[Succ] = BB; 437 } 438 } 439 440 /// Build the condition for one edge 441 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 442 bool Invert) { 443 Value *Cond = Invert ? BoolFalse : BoolTrue; 444 if (Term->isConditional()) { 445 Cond = Term->getCondition(); 446 447 if (Idx != (unsigned)Invert) 448 Cond = invertCondition(Cond); 449 } 450 return Cond; 451 } 452 453 /// Analyze the predecessors of each block and build up predicates 454 void StructurizeCFG::gatherPredicates(RegionNode *N) { 455 RegionInfo *RI = ParentRegion->getRegionInfo(); 456 BasicBlock *BB = N->getEntry(); 457 BBPredicates &Pred = Predicates[BB]; 458 BBPredicates &LPred = LoopPreds[BB]; 459 460 for (BasicBlock *P : predecessors(BB)) { 461 // Ignore it if it's a branch from outside into our region entry 462 if (!ParentRegion->contains(P)) 463 continue; 464 465 Region *R = RI->getRegionFor(P); 466 if (R == ParentRegion) { 467 // It's a top level block in our region 468 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 469 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 470 BasicBlock *Succ = Term->getSuccessor(i); 471 if (Succ != BB) 472 continue; 473 474 if (Visited.count(P)) { 475 // Normal forward edge 476 if (Term->isConditional()) { 477 // Try to treat it like an ELSE block 478 BasicBlock *Other = Term->getSuccessor(!i); 479 if (Visited.count(Other) && !Loops.count(Other) && 480 !Pred.count(Other) && !Pred.count(P)) { 481 482 Pred[Other] = BoolFalse; 483 Pred[P] = BoolTrue; 484 continue; 485 } 486 } 487 Pred[P] = buildCondition(Term, i, false); 488 } else { 489 // Back edge 490 LPred[P] = buildCondition(Term, i, true); 491 } 492 } 493 } else { 494 // It's an exit from a sub region 495 while (R->getParent() != ParentRegion) 496 R = R->getParent(); 497 498 // Edge from inside a subregion to its entry, ignore it 499 if (*R == *N) 500 continue; 501 502 BasicBlock *Entry = R->getEntry(); 503 if (Visited.count(Entry)) 504 Pred[Entry] = BoolTrue; 505 else 506 LPred[Entry] = BoolFalse; 507 } 508 } 509 } 510 511 /// Collect various loop and predicate infos 512 void StructurizeCFG::collectInfos() { 513 // Reset predicate 514 Predicates.clear(); 515 516 // and loop infos 517 Loops.clear(); 518 LoopPreds.clear(); 519 520 // Reset the visited nodes 521 Visited.clear(); 522 523 for (RegionNode *RN : reverse(Order)) { 524 LLVM_DEBUG(dbgs() << "Visiting: " 525 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 526 << RN->getEntry()->getName() << "\n"); 527 528 // Analyze all the conditions leading to a node 529 gatherPredicates(RN); 530 531 // Remember that we've seen this node 532 Visited.insert(RN->getEntry()); 533 534 // Find the last back edges 535 analyzeLoops(RN); 536 } 537 } 538 539 /// Insert the missing branch conditions 540 void StructurizeCFG::insertConditions(bool Loops) { 541 BranchVector &Conds = Loops ? LoopConds : Conditions; 542 Value *Default = Loops ? BoolTrue : BoolFalse; 543 SSAUpdater PhiInserter; 544 545 for (BranchInst *Term : Conds) { 546 assert(Term->isConditional()); 547 548 BasicBlock *Parent = Term->getParent(); 549 BasicBlock *SuccTrue = Term->getSuccessor(0); 550 BasicBlock *SuccFalse = Term->getSuccessor(1); 551 552 PhiInserter.Initialize(Boolean, ""); 553 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 554 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 555 556 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 557 558 NearestCommonDominator Dominator(DT); 559 Dominator.addBlock(Parent); 560 561 Value *ParentValue = nullptr; 562 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 563 BasicBlock *BB = BBAndPred.first; 564 Value *Pred = BBAndPred.second; 565 566 if (BB == Parent) { 567 ParentValue = Pred; 568 break; 569 } 570 PhiInserter.AddAvailableValue(BB, Pred); 571 Dominator.addAndRememberBlock(BB); 572 } 573 574 if (ParentValue) { 575 Term->setCondition(ParentValue); 576 } else { 577 if (!Dominator.resultIsRememberedBlock()) 578 PhiInserter.AddAvailableValue(Dominator.result(), Default); 579 580 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 581 } 582 } 583 } 584 585 /// Remove all PHI values coming from "From" into "To" and remember 586 /// them in DeletedPhis 587 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 588 PhiMap &Map = DeletedPhis[To]; 589 for (PHINode &Phi : To->phis()) { 590 bool Recorded = false; 591 while (Phi.getBasicBlockIndex(From) != -1) { 592 Value *Deleted = Phi.removeIncomingValue(From, false); 593 Map[&Phi].push_back(std::make_pair(From, Deleted)); 594 if (!Recorded) { 595 AffectedPhis.push_back(&Phi); 596 Recorded = true; 597 } 598 } 599 } 600 } 601 602 /// Add a dummy PHI value as soon as we knew the new predecessor 603 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 604 for (PHINode &Phi : To->phis()) { 605 Value *Undef = UndefValue::get(Phi.getType()); 606 Phi.addIncoming(Undef, From); 607 } 608 AddedPhis[To].push_back(From); 609 } 610 611 /// Add the real PHI value as soon as everything is set up 612 void StructurizeCFG::setPhiValues() { 613 SmallVector<PHINode *, 8> InsertedPhis; 614 SSAUpdater Updater(&InsertedPhis); 615 for (const auto &AddedPhi : AddedPhis) { 616 BasicBlock *To = AddedPhi.first; 617 const BBVector &From = AddedPhi.second; 618 619 if (!DeletedPhis.count(To)) 620 continue; 621 622 PhiMap &Map = DeletedPhis[To]; 623 for (const auto &PI : Map) { 624 PHINode *Phi = PI.first; 625 Value *Undef = UndefValue::get(Phi->getType()); 626 Updater.Initialize(Phi->getType(), ""); 627 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 628 Updater.AddAvailableValue(To, Undef); 629 630 NearestCommonDominator Dominator(DT); 631 Dominator.addBlock(To); 632 for (const auto &VI : PI.second) { 633 Updater.AddAvailableValue(VI.first, VI.second); 634 Dominator.addAndRememberBlock(VI.first); 635 } 636 637 if (!Dominator.resultIsRememberedBlock()) 638 Updater.AddAvailableValue(Dominator.result(), Undef); 639 640 for (BasicBlock *FI : From) 641 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 642 AffectedPhis.push_back(Phi); 643 } 644 645 DeletedPhis.erase(To); 646 } 647 assert(DeletedPhis.empty()); 648 649 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 650 } 651 652 void StructurizeCFG::simplifyAffectedPhis() { 653 bool Changed; 654 do { 655 Changed = false; 656 SimplifyQuery Q(Func->getParent()->getDataLayout()); 657 Q.DT = DT; 658 for (WeakVH VH : AffectedPhis) { 659 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 660 if (auto NewValue = SimplifyInstruction(Phi, Q)) { 661 Phi->replaceAllUsesWith(NewValue); 662 Phi->eraseFromParent(); 663 Changed = true; 664 } 665 } 666 } 667 } while (Changed); 668 } 669 670 /// Remove phi values from all successors and then remove the terminator. 671 void StructurizeCFG::killTerminator(BasicBlock *BB) { 672 Instruction *Term = BB->getTerminator(); 673 if (!Term) 674 return; 675 676 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); 677 SI != SE; ++SI) 678 delPhiValues(BB, *SI); 679 680 if (DA) 681 DA->removeValue(Term); 682 Term->eraseFromParent(); 683 } 684 685 /// Let node exit(s) point to NewExit 686 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 687 bool IncludeDominator) { 688 if (Node->isSubRegion()) { 689 Region *SubRegion = Node->getNodeAs<Region>(); 690 BasicBlock *OldExit = SubRegion->getExit(); 691 BasicBlock *Dominator = nullptr; 692 693 // Find all the edges from the sub region to the exit 694 for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) { 695 // Incrememt BBI before mucking with BB's terminator. 696 BasicBlock *BB = *BBI++; 697 698 if (!SubRegion->contains(BB)) 699 continue; 700 701 // Modify the edges to point to the new exit 702 delPhiValues(BB, OldExit); 703 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 704 addPhiValues(BB, NewExit); 705 706 // Find the new dominator (if requested) 707 if (IncludeDominator) { 708 if (!Dominator) 709 Dominator = BB; 710 else 711 Dominator = DT->findNearestCommonDominator(Dominator, BB); 712 } 713 } 714 715 // Change the dominator (if requested) 716 if (Dominator) 717 DT->changeImmediateDominator(NewExit, Dominator); 718 719 // Update the region info 720 SubRegion->replaceExit(NewExit); 721 } else { 722 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 723 killTerminator(BB); 724 BranchInst::Create(NewExit, BB); 725 addPhiValues(BB, NewExit); 726 if (IncludeDominator) 727 DT->changeImmediateDominator(NewExit, BB); 728 } 729 } 730 731 /// Create a new flow node and update dominator tree and region info 732 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 733 LLVMContext &Context = Func->getContext(); 734 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 735 Order.back()->getEntry(); 736 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 737 Func, Insert); 738 DT->addNewBlock(Flow, Dominator); 739 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 740 return Flow; 741 } 742 743 /// Create a new or reuse the previous node as flow node 744 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 745 BasicBlock *Entry = PrevNode->getEntry(); 746 747 if (!PrevNode->isSubRegion()) { 748 killTerminator(Entry); 749 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 750 return Entry; 751 } 752 753 // create a new flow node 754 BasicBlock *Flow = getNextFlow(Entry); 755 756 // and wire it up 757 changeExit(PrevNode, Flow, true); 758 PrevNode = ParentRegion->getBBNode(Flow); 759 return Flow; 760 } 761 762 /// Returns the region exit if possible, otherwise just a new flow node 763 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 764 bool ExitUseAllowed) { 765 if (!Order.empty() || !ExitUseAllowed) 766 return getNextFlow(Flow); 767 768 BasicBlock *Exit = ParentRegion->getExit(); 769 DT->changeImmediateDominator(Exit, Flow); 770 addPhiValues(Flow, Exit); 771 return Exit; 772 } 773 774 /// Set the previous node 775 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 776 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 777 : nullptr; 778 } 779 780 /// Does BB dominate all the predicates of Node? 781 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 782 BBPredicates &Preds = Predicates[Node->getEntry()]; 783 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 784 return DT->dominates(BB, Pred.first); 785 }); 786 } 787 788 /// Can we predict that this node will always be called? 789 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 790 BBPredicates &Preds = Predicates[Node->getEntry()]; 791 bool Dominated = false; 792 793 // Regionentry is always true 794 if (!PrevNode) 795 return true; 796 797 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 798 BasicBlock *BB = Pred.first; 799 Value *V = Pred.second; 800 801 if (V != BoolTrue) 802 return false; 803 804 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 805 Dominated = true; 806 } 807 808 // TODO: The dominator check is too strict 809 return Dominated; 810 } 811 812 /// Take one node from the order vector and wire it up 813 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 814 BasicBlock *LoopEnd) { 815 RegionNode *Node = Order.pop_back_val(); 816 Visited.insert(Node->getEntry()); 817 818 if (isPredictableTrue(Node)) { 819 // Just a linear flow 820 if (PrevNode) { 821 changeExit(PrevNode, Node->getEntry(), true); 822 } 823 PrevNode = Node; 824 } else { 825 // Insert extra prefix node (or reuse last one) 826 BasicBlock *Flow = needPrefix(false); 827 828 // Insert extra postfix node (or use exit instead) 829 BasicBlock *Entry = Node->getEntry(); 830 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 831 832 // let it point to entry and next block 833 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 834 addPhiValues(Flow, Entry); 835 DT->changeImmediateDominator(Entry, Flow); 836 837 PrevNode = Node; 838 while (!Order.empty() && !Visited.count(LoopEnd) && 839 dominatesPredicates(Entry, Order.back())) { 840 handleLoops(false, LoopEnd); 841 } 842 843 changeExit(PrevNode, Next, false); 844 setPrevNode(Next); 845 } 846 } 847 848 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 849 BasicBlock *LoopEnd) { 850 RegionNode *Node = Order.back(); 851 BasicBlock *LoopStart = Node->getEntry(); 852 853 if (!Loops.count(LoopStart)) { 854 wireFlow(ExitUseAllowed, LoopEnd); 855 return; 856 } 857 858 if (!isPredictableTrue(Node)) 859 LoopStart = needPrefix(true); 860 861 LoopEnd = Loops[Node->getEntry()]; 862 wireFlow(false, LoopEnd); 863 while (!Visited.count(LoopEnd)) { 864 handleLoops(false, LoopEnd); 865 } 866 867 // If the start of the loop is the entry block, we can't branch to it so 868 // insert a new dummy entry block. 869 Function *LoopFunc = LoopStart->getParent(); 870 if (LoopStart == &LoopFunc->getEntryBlock()) { 871 LoopStart->setName("entry.orig"); 872 873 BasicBlock *NewEntry = 874 BasicBlock::Create(LoopStart->getContext(), 875 "entry", 876 LoopFunc, 877 LoopStart); 878 BranchInst::Create(LoopStart, NewEntry); 879 DT->setNewRoot(NewEntry); 880 } 881 882 // Create an extra loop end node 883 LoopEnd = needPrefix(false); 884 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 885 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 886 BoolUndef, LoopEnd)); 887 addPhiValues(LoopEnd, LoopStart); 888 setPrevNode(Next); 889 } 890 891 /// After this function control flow looks like it should be, but 892 /// branches and PHI nodes only have undefined conditions. 893 void StructurizeCFG::createFlow() { 894 BasicBlock *Exit = ParentRegion->getExit(); 895 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 896 897 AffectedPhis.clear(); 898 DeletedPhis.clear(); 899 AddedPhis.clear(); 900 Conditions.clear(); 901 LoopConds.clear(); 902 903 PrevNode = nullptr; 904 Visited.clear(); 905 906 while (!Order.empty()) { 907 handleLoops(EntryDominatesExit, nullptr); 908 } 909 910 if (PrevNode) 911 changeExit(PrevNode, Exit, EntryDominatesExit); 912 else 913 assert(EntryDominatesExit); 914 } 915 916 /// Handle a rare case where the disintegrated nodes instructions 917 /// no longer dominate all their uses. Not sure if this is really necessary 918 void StructurizeCFG::rebuildSSA() { 919 SSAUpdater Updater; 920 for (BasicBlock *BB : ParentRegion->blocks()) 921 for (Instruction &I : *BB) { 922 bool Initialized = false; 923 // We may modify the use list as we iterate over it, so be careful to 924 // compute the next element in the use list at the top of the loop. 925 for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) { 926 Use &U = *UI++; 927 Instruction *User = cast<Instruction>(U.getUser()); 928 if (User->getParent() == BB) { 929 continue; 930 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 931 if (UserPN->getIncomingBlock(U) == BB) 932 continue; 933 } 934 935 if (DT->dominates(&I, User)) 936 continue; 937 938 if (!Initialized) { 939 Value *Undef = UndefValue::get(I.getType()); 940 Updater.Initialize(I.getType(), ""); 941 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 942 Updater.AddAvailableValue(BB, &I); 943 Initialized = true; 944 } 945 Updater.RewriteUseAfterInsertions(U); 946 } 947 } 948 } 949 950 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 951 const LegacyDivergenceAnalysis &DA) { 952 // Bool for if all sub-regions are uniform. 953 bool SubRegionsAreUniform = true; 954 // Count of how many direct children are conditional. 955 unsigned ConditionalDirectChildren = 0; 956 957 for (auto E : R->elements()) { 958 if (!E->isSubRegion()) { 959 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 960 if (!Br || !Br->isConditional()) 961 continue; 962 963 if (!DA.isUniform(Br)) 964 return false; 965 966 // One of our direct children is conditional. 967 ConditionalDirectChildren++; 968 969 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 970 << " has uniform terminator\n"); 971 } else { 972 // Explicitly refuse to treat regions as uniform if they have non-uniform 973 // subregions. We cannot rely on DivergenceAnalysis for branches in 974 // subregions because those branches may have been removed and re-created, 975 // so we look for our metadata instead. 976 // 977 // Warning: It would be nice to treat regions as uniform based only on 978 // their direct child basic blocks' terminators, regardless of whether 979 // subregions are uniform or not. However, this requires a very careful 980 // look at SIAnnotateControlFlow to make sure nothing breaks there. 981 for (auto BB : E->getNodeAs<Region>()->blocks()) { 982 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 983 if (!Br || !Br->isConditional()) 984 continue; 985 986 if (!Br->getMetadata(UniformMDKindID)) { 987 // Early exit if we cannot have relaxed uniform regions. 988 if (!RelaxedUniformRegions) 989 return false; 990 991 SubRegionsAreUniform = false; 992 break; 993 } 994 } 995 } 996 } 997 998 // Our region is uniform if: 999 // 1. All conditional branches that are direct children are uniform (checked 1000 // above). 1001 // 2. And either: 1002 // a. All sub-regions are uniform. 1003 // b. There is one or less conditional branches among the direct children. 1004 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1005 } 1006 1007 /// Run the transformation for each region found 1008 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { 1009 if (R->isTopLevelRegion()) 1010 return false; 1011 1012 DA = nullptr; 1013 1014 if (SkipUniformRegions) { 1015 // TODO: We could probably be smarter here with how we handle sub-regions. 1016 // We currently rely on the fact that metadata is set by earlier invocations 1017 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1018 // but we shouldn't rely on metadata for correctness! 1019 unsigned UniformMDKindID = 1020 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1021 DA = &getAnalysis<LegacyDivergenceAnalysis>(); 1022 1023 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 1024 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1025 << '\n'); 1026 1027 // Mark all direct child block terminators as having been treated as 1028 // uniform. To account for a possible future in which non-uniform 1029 // sub-regions are treated more cleverly, indirect children are not 1030 // marked as uniform. 1031 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1032 for (RegionNode *E : R->elements()) { 1033 if (E->isSubRegion()) 1034 continue; 1035 1036 if (Instruction *Term = E->getEntry()->getTerminator()) 1037 Term->setMetadata(UniformMDKindID, MD); 1038 } 1039 1040 return false; 1041 } 1042 } 1043 1044 Func = R->getEntry()->getParent(); 1045 ParentRegion = R; 1046 1047 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1048 1049 orderNodes(); 1050 collectInfos(); 1051 createFlow(); 1052 insertConditions(false); 1053 insertConditions(true); 1054 setPhiValues(); 1055 simplifyAffectedPhis(); 1056 rebuildSSA(); 1057 1058 // Cleanup 1059 Order.clear(); 1060 Visited.clear(); 1061 DeletedPhis.clear(); 1062 AddedPhis.clear(); 1063 Predicates.clear(); 1064 Conditions.clear(); 1065 Loops.clear(); 1066 LoopPreds.clear(); 1067 LoopConds.clear(); 1068 1069 return true; 1070 } 1071 1072 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1073 return new StructurizeCFG(SkipUniformRegions); 1074 } 1075