1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/SCCIterator.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallPtrSet.h" 15 #include "llvm/ADT/SmallSet.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/Analysis/UniformityAnalysis.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CFG.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/InstrTypes.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Metadata.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Use.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/IR/ValueHandle.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Transforms/Scalar.h" 44 #include "llvm/Transforms/Utils.h" 45 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 46 #include "llvm/Transforms/Utils/Local.h" 47 #include "llvm/Transforms/Utils/SSAUpdater.h" 48 #include <algorithm> 49 #include <cassert> 50 #include <utility> 51 52 using namespace llvm; 53 using namespace llvm::PatternMatch; 54 55 #define DEBUG_TYPE "structurizecfg" 56 57 // The name for newly created blocks. 58 const char FlowBlockName[] = "Flow"; 59 60 namespace { 61 62 static cl::opt<bool> ForceSkipUniformRegions( 63 "structurizecfg-skip-uniform-regions", 64 cl::Hidden, 65 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 66 cl::init(false)); 67 68 static cl::opt<bool> 69 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 70 cl::desc("Allow relaxed uniform region checks"), 71 cl::init(true)); 72 73 // Definition of the complex types used in this pass. 74 75 using BBValuePair = std::pair<BasicBlock *, Value *>; 76 77 using RNVector = SmallVector<RegionNode *, 8>; 78 using BBVector = SmallVector<BasicBlock *, 8>; 79 using BranchVector = SmallVector<BranchInst *, 8>; 80 using BBValueVector = SmallVector<BBValuePair, 2>; 81 82 using BBSet = SmallPtrSet<BasicBlock *, 8>; 83 84 using PhiMap = MapVector<PHINode *, BBValueVector>; 85 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 86 87 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 88 using BBPredicates = DenseMap<BasicBlock *, Value *>; 89 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 90 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 91 92 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; 93 94 // A traits type that is intended to be used in graph algorithms. The graph 95 // traits starts at an entry node, and traverses the RegionNodes that are in 96 // the Nodes set. 97 struct SubGraphTraits { 98 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 99 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 100 101 // This wraps a set of Nodes into the iterator, so we know which edges to 102 // filter out. 103 class WrappedSuccIterator 104 : public iterator_adaptor_base< 105 WrappedSuccIterator, BaseSuccIterator, 106 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 107 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 108 SmallDenseSet<RegionNode *> *Nodes; 109 110 public: 111 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 112 : iterator_adaptor_base(It), Nodes(Nodes) {} 113 114 NodeRef operator*() const { return {*I, Nodes}; } 115 }; 116 117 static bool filterAll(const NodeRef &N) { return true; } 118 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 119 120 using ChildIteratorType = 121 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 122 123 static NodeRef getEntryNode(Region *R) { 124 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 125 } 126 127 static NodeRef getEntryNode(NodeRef N) { return N; } 128 129 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 130 auto *filter = N.second ? &filterSet : &filterAll; 131 return make_filter_range( 132 make_range<WrappedSuccIterator>( 133 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 134 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 135 filter); 136 } 137 138 static ChildIteratorType child_begin(const NodeRef &N) { 139 return children(N).begin(); 140 } 141 142 static ChildIteratorType child_end(const NodeRef &N) { 143 return children(N).end(); 144 } 145 }; 146 147 /// Finds the nearest common dominator of a set of BasicBlocks. 148 /// 149 /// For every BB you add to the set, you can specify whether we "remember" the 150 /// block. When you get the common dominator, you can also ask whether it's one 151 /// of the blocks we remembered. 152 class NearestCommonDominator { 153 DominatorTree *DT; 154 BasicBlock *Result = nullptr; 155 bool ResultIsRemembered = false; 156 157 /// Add BB to the resulting dominator. 158 void addBlock(BasicBlock *BB, bool Remember) { 159 if (!Result) { 160 Result = BB; 161 ResultIsRemembered = Remember; 162 return; 163 } 164 165 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 166 if (NewResult != Result) 167 ResultIsRemembered = false; 168 if (NewResult == BB) 169 ResultIsRemembered |= Remember; 170 Result = NewResult; 171 } 172 173 public: 174 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 175 176 void addBlock(BasicBlock *BB) { 177 addBlock(BB, /* Remember = */ false); 178 } 179 180 void addAndRememberBlock(BasicBlock *BB) { 181 addBlock(BB, /* Remember = */ true); 182 } 183 184 /// Get the nearest common dominator of all the BBs added via addBlock() and 185 /// addAndRememberBlock(). 186 BasicBlock *result() { return Result; } 187 188 /// Is the BB returned by getResult() one of the blocks we added to the set 189 /// with addAndRememberBlock()? 190 bool resultIsRememberedBlock() { return ResultIsRemembered; } 191 }; 192 193 /// Transforms the control flow graph on one single entry/exit region 194 /// at a time. 195 /// 196 /// After the transform all "If"/"Then"/"Else" style control flow looks like 197 /// this: 198 /// 199 /// \verbatim 200 /// 1 201 /// || 202 /// | | 203 /// 2 | 204 /// | / 205 /// |/ 206 /// 3 207 /// || Where: 208 /// | | 1 = "If" block, calculates the condition 209 /// 4 | 2 = "Then" subregion, runs if the condition is true 210 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 211 /// |/ 4 = "Else" optional subregion, runs if the condition is false 212 /// 5 5 = "End" block, also rejoins the control flow 213 /// \endverbatim 214 /// 215 /// Control flow is expressed as a branch where the true exit goes into the 216 /// "Then"/"Else" region, while the false exit skips the region 217 /// The condition for the optional "Else" region is expressed as a PHI node. 218 /// The incoming values of the PHI node are true for the "If" edge and false 219 /// for the "Then" edge. 220 /// 221 /// Additionally to that even complicated loops look like this: 222 /// 223 /// \verbatim 224 /// 1 225 /// || 226 /// | | 227 /// 2 ^ Where: 228 /// | / 1 = "Entry" block 229 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 230 /// 3 3 = "Flow" block, with back edge to entry block 231 /// | 232 /// \endverbatim 233 /// 234 /// The back edge of the "Flow" block is always on the false side of the branch 235 /// while the true side continues the general flow. So the loop condition 236 /// consist of a network of PHI nodes where the true incoming values expresses 237 /// breaks and the false values expresses continue states. 238 239 class StructurizeCFG { 240 Type *Boolean; 241 ConstantInt *BoolTrue; 242 ConstantInt *BoolFalse; 243 Value *BoolPoison; 244 245 Function *Func; 246 Region *ParentRegion; 247 248 UniformityInfo *UA = nullptr; 249 DominatorTree *DT; 250 251 SmallVector<RegionNode *, 8> Order; 252 BBSet Visited; 253 BBSet FlowSet; 254 255 SmallVector<WeakVH, 8> AffectedPhis; 256 BBPhiMap DeletedPhis; 257 BB2BBVecMap AddedPhis; 258 259 PredMap Predicates; 260 BranchVector Conditions; 261 262 BB2BBMap Loops; 263 PredMap LoopPreds; 264 BranchVector LoopConds; 265 266 BranchDebugLocMap TermDL; 267 268 RegionNode *PrevNode; 269 270 void orderNodes(); 271 272 void analyzeLoops(RegionNode *N); 273 274 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 275 276 void gatherPredicates(RegionNode *N); 277 278 void collectInfos(); 279 280 void insertConditions(bool Loops); 281 282 void simplifyConditions(); 283 284 void delPhiValues(BasicBlock *From, BasicBlock *To); 285 286 void addPhiValues(BasicBlock *From, BasicBlock *To); 287 288 void findUndefBlocks(BasicBlock *PHIBlock, 289 const SmallSet<BasicBlock *, 8> &Incomings, 290 SmallVector<BasicBlock *> &UndefBlks) const; 291 void setPhiValues(); 292 293 void simplifyAffectedPhis(); 294 295 void killTerminator(BasicBlock *BB); 296 297 void changeExit(RegionNode *Node, BasicBlock *NewExit, 298 bool IncludeDominator); 299 300 BasicBlock *getNextFlow(BasicBlock *Dominator); 301 302 BasicBlock *needPrefix(bool NeedEmpty); 303 304 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 305 306 void setPrevNode(BasicBlock *BB); 307 308 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 309 310 bool isPredictableTrue(RegionNode *Node); 311 312 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 313 314 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 315 316 void createFlow(); 317 318 void rebuildSSA(); 319 320 public: 321 void init(Region *R); 322 bool run(Region *R, DominatorTree *DT); 323 bool makeUniformRegion(Region *R, UniformityInfo &UA); 324 }; 325 326 class StructurizeCFGLegacyPass : public RegionPass { 327 bool SkipUniformRegions; 328 329 public: 330 static char ID; 331 332 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 333 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 334 if (ForceSkipUniformRegions.getNumOccurrences()) 335 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 336 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 337 } 338 339 bool runOnRegion(Region *R, RGPassManager &RGM) override { 340 StructurizeCFG SCFG; 341 SCFG.init(R); 342 if (SkipUniformRegions) { 343 UniformityInfo &UA = 344 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 345 if (SCFG.makeUniformRegion(R, UA)) 346 return false; 347 } 348 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 349 return SCFG.run(R, DT); 350 } 351 352 StringRef getPassName() const override { return "Structurize control flow"; } 353 354 void getAnalysisUsage(AnalysisUsage &AU) const override { 355 if (SkipUniformRegions) 356 AU.addRequired<UniformityInfoWrapperPass>(); 357 AU.addRequired<DominatorTreeWrapperPass>(); 358 359 AU.addPreserved<DominatorTreeWrapperPass>(); 360 RegionPass::getAnalysisUsage(AU); 361 } 362 }; 363 364 } // end anonymous namespace 365 366 char StructurizeCFGLegacyPass::ID = 0; 367 368 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 369 "Structurize the CFG", false, false) 370 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 371 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 372 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 373 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 374 "Structurize the CFG", false, false) 375 376 /// Build up the general order of nodes, by performing a topological sort of the 377 /// parent region's nodes, while ensuring that there is no outer cycle node 378 /// between any two inner cycle nodes. 379 void StructurizeCFG::orderNodes() { 380 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 381 GraphTraits<Region *>::nodes_end(ParentRegion))); 382 if (Order.empty()) 383 return; 384 385 SmallDenseSet<RegionNode *> Nodes; 386 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 387 388 // A list of range indices of SCCs in Order, to be processed. 389 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 390 unsigned I = 0, E = Order.size(); 391 while (true) { 392 // Run through all the SCCs in the subgraph starting with Entry. 393 for (auto SCCI = 394 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 395 EntryNode); 396 !SCCI.isAtEnd(); ++SCCI) { 397 auto &SCC = *SCCI; 398 399 // An SCC up to the size of 2, can be reduced to an entry (the last node), 400 // and a possible additional node. Therefore, it is already in order, and 401 // there is no need to add it to the work-list. 402 unsigned Size = SCC.size(); 403 if (Size > 2) 404 WorkList.emplace_back(I, I + Size); 405 406 // Add the SCC nodes to the Order array. 407 for (const auto &N : SCC) { 408 assert(I < E && "SCC size mismatch!"); 409 Order[I++] = N.first; 410 } 411 } 412 assert(I == E && "SCC size mismatch!"); 413 414 // If there are no more SCCs to order, then we are done. 415 if (WorkList.empty()) 416 break; 417 418 std::tie(I, E) = WorkList.pop_back_val(); 419 420 // Collect the set of nodes in the SCC's subgraph. These are only the 421 // possible child nodes; we do not add the entry (last node) otherwise we 422 // will have the same exact SCC all over again. 423 Nodes.clear(); 424 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 425 426 // Update the entry node. 427 EntryNode.first = Order[E - 1]; 428 EntryNode.second = &Nodes; 429 } 430 } 431 432 /// Determine the end of the loops 433 void StructurizeCFG::analyzeLoops(RegionNode *N) { 434 if (N->isSubRegion()) { 435 // Test for exit as back edge 436 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 437 if (Visited.count(Exit)) 438 Loops[Exit] = N->getEntry(); 439 440 } else { 441 // Test for successors as back edge 442 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 443 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 444 445 for (BasicBlock *Succ : Term->successors()) 446 if (Visited.count(Succ)) 447 Loops[Succ] = BB; 448 } 449 } 450 451 /// Build the condition for one edge 452 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 453 bool Invert) { 454 Value *Cond = Invert ? BoolFalse : BoolTrue; 455 if (Term->isConditional()) { 456 Cond = Term->getCondition(); 457 458 if (Idx != (unsigned)Invert) 459 Cond = invertCondition(Cond); 460 } 461 return Cond; 462 } 463 464 /// Analyze the predecessors of each block and build up predicates 465 void StructurizeCFG::gatherPredicates(RegionNode *N) { 466 RegionInfo *RI = ParentRegion->getRegionInfo(); 467 BasicBlock *BB = N->getEntry(); 468 BBPredicates &Pred = Predicates[BB]; 469 BBPredicates &LPred = LoopPreds[BB]; 470 471 for (BasicBlock *P : predecessors(BB)) { 472 // Ignore it if it's a branch from outside into our region entry 473 if (!ParentRegion->contains(P)) 474 continue; 475 476 Region *R = RI->getRegionFor(P); 477 if (R == ParentRegion) { 478 // It's a top level block in our region 479 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 480 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 481 BasicBlock *Succ = Term->getSuccessor(i); 482 if (Succ != BB) 483 continue; 484 485 if (Visited.count(P)) { 486 // Normal forward edge 487 if (Term->isConditional()) { 488 // Try to treat it like an ELSE block 489 BasicBlock *Other = Term->getSuccessor(!i); 490 if (Visited.count(Other) && !Loops.count(Other) && 491 !Pred.count(Other) && !Pred.count(P)) { 492 493 Pred[Other] = BoolFalse; 494 Pred[P] = BoolTrue; 495 continue; 496 } 497 } 498 Pred[P] = buildCondition(Term, i, false); 499 } else { 500 // Back edge 501 LPred[P] = buildCondition(Term, i, true); 502 } 503 } 504 } else { 505 // It's an exit from a sub region 506 while (R->getParent() != ParentRegion) 507 R = R->getParent(); 508 509 // Edge from inside a subregion to its entry, ignore it 510 if (*R == *N) 511 continue; 512 513 BasicBlock *Entry = R->getEntry(); 514 if (Visited.count(Entry)) 515 Pred[Entry] = BoolTrue; 516 else 517 LPred[Entry] = BoolFalse; 518 } 519 } 520 } 521 522 /// Collect various loop and predicate infos 523 void StructurizeCFG::collectInfos() { 524 // Reset predicate 525 Predicates.clear(); 526 527 // and loop infos 528 Loops.clear(); 529 LoopPreds.clear(); 530 531 // Reset the visited nodes 532 Visited.clear(); 533 534 for (RegionNode *RN : reverse(Order)) { 535 LLVM_DEBUG(dbgs() << "Visiting: " 536 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 537 << RN->getEntry()->getName() << "\n"); 538 539 // Analyze all the conditions leading to a node 540 gatherPredicates(RN); 541 542 // Remember that we've seen this node 543 Visited.insert(RN->getEntry()); 544 545 // Find the last back edges 546 analyzeLoops(RN); 547 } 548 549 // Reset the collected term debug locations 550 TermDL.clear(); 551 552 for (BasicBlock &BB : *Func) { 553 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) 554 TermDL[&BB] = DL; 555 } 556 } 557 558 /// Insert the missing branch conditions 559 void StructurizeCFG::insertConditions(bool Loops) { 560 BranchVector &Conds = Loops ? LoopConds : Conditions; 561 Value *Default = Loops ? BoolTrue : BoolFalse; 562 SSAUpdater PhiInserter; 563 564 for (BranchInst *Term : Conds) { 565 assert(Term->isConditional()); 566 567 BasicBlock *Parent = Term->getParent(); 568 BasicBlock *SuccTrue = Term->getSuccessor(0); 569 BasicBlock *SuccFalse = Term->getSuccessor(1); 570 571 PhiInserter.Initialize(Boolean, ""); 572 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 573 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 574 575 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 576 577 NearestCommonDominator Dominator(DT); 578 Dominator.addBlock(Parent); 579 580 Value *ParentValue = nullptr; 581 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 582 BasicBlock *BB = BBAndPred.first; 583 Value *Pred = BBAndPred.second; 584 585 if (BB == Parent) { 586 ParentValue = Pred; 587 break; 588 } 589 PhiInserter.AddAvailableValue(BB, Pred); 590 Dominator.addAndRememberBlock(BB); 591 } 592 593 if (ParentValue) { 594 Term->setCondition(ParentValue); 595 } else { 596 if (!Dominator.resultIsRememberedBlock()) 597 PhiInserter.AddAvailableValue(Dominator.result(), Default); 598 599 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 600 } 601 } 602 } 603 604 /// Simplify any inverted conditions that were built by buildConditions. 605 void StructurizeCFG::simplifyConditions() { 606 SmallVector<Instruction *> InstToErase; 607 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 608 auto &Preds = I.second; 609 for (auto &J : Preds) { 610 auto &Cond = J.second; 611 Instruction *Inverted; 612 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 613 !Cond->use_empty()) { 614 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 615 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 616 Cond->replaceAllUsesWith(InvertedCmp); 617 InstToErase.push_back(cast<Instruction>(Cond)); 618 } 619 } 620 } 621 } 622 for (auto *I : InstToErase) 623 I->eraseFromParent(); 624 } 625 626 /// Remove all PHI values coming from "From" into "To" and remember 627 /// them in DeletedPhis 628 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 629 PhiMap &Map = DeletedPhis[To]; 630 for (PHINode &Phi : To->phis()) { 631 bool Recorded = false; 632 while (Phi.getBasicBlockIndex(From) != -1) { 633 Value *Deleted = Phi.removeIncomingValue(From, false); 634 Map[&Phi].push_back(std::make_pair(From, Deleted)); 635 if (!Recorded) { 636 AffectedPhis.push_back(&Phi); 637 Recorded = true; 638 } 639 } 640 } 641 } 642 643 /// Add a dummy PHI value as soon as we knew the new predecessor 644 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 645 for (PHINode &Phi : To->phis()) { 646 Value *Undef = UndefValue::get(Phi.getType()); 647 Phi.addIncoming(Undef, From); 648 } 649 AddedPhis[To].push_back(From); 650 } 651 652 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 653 /// from predecessors \p Incomings, we have a chance to mark the available value 654 /// from some blocks as undefined. The function will find out all such blocks 655 /// and return in \p UndefBlks. 656 void StructurizeCFG::findUndefBlocks( 657 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 658 SmallVector<BasicBlock *> &UndefBlks) const { 659 // We may get a post-structured CFG like below: 660 // 661 // | P1 662 // |/ 663 // F1 664 // |\ 665 // | N 666 // |/ 667 // F2 668 // |\ 669 // | P2 670 // |/ 671 // F3 672 // |\ 673 // B 674 // 675 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 676 // of B before structurization. F1/F2/F3 are flow blocks inserted during 677 // structurization process. Block N is not a predecessor of B before 678 // structurization, but are placed between the predecessors(P1/P2) of B after 679 // structurization. This usually means that threads went to N never take the 680 // path N->F2->F3->B. For example, the threads take the branch F1->N may 681 // always take the branch F2->P2. So, when we are reconstructing a PHI 682 // originally in B, we can safely say the incoming value from N is undefined. 683 SmallSet<BasicBlock *, 8> VisitedBlock; 684 SmallVector<BasicBlock *, 8> Stack; 685 if (PHIBlock == ParentRegion->getExit()) { 686 for (auto P : predecessors(PHIBlock)) { 687 if (ParentRegion->contains(P)) 688 Stack.push_back(P); 689 } 690 } else { 691 append_range(Stack, predecessors(PHIBlock)); 692 } 693 694 // Do a backward traversal over the CFG, and stop further searching if 695 // the block is not a Flow. If a block is neither flow block nor the 696 // incoming predecessor, then the incoming value from the block is 697 // undefined value for the PHI being reconstructed. 698 while (!Stack.empty()) { 699 BasicBlock *Current = Stack.pop_back_val(); 700 if (!VisitedBlock.insert(Current).second) 701 continue; 702 703 if (FlowSet.contains(Current)) { 704 for (auto P : predecessors(Current)) 705 Stack.push_back(P); 706 } else if (!Incomings.contains(Current)) { 707 UndefBlks.push_back(Current); 708 } 709 } 710 } 711 712 /// Add the real PHI value as soon as everything is set up 713 void StructurizeCFG::setPhiValues() { 714 SmallVector<PHINode *, 8> InsertedPhis; 715 SSAUpdater Updater(&InsertedPhis); 716 for (const auto &AddedPhi : AddedPhis) { 717 BasicBlock *To = AddedPhi.first; 718 const BBVector &From = AddedPhi.second; 719 720 if (!DeletedPhis.count(To)) 721 continue; 722 723 SmallVector<BasicBlock *> UndefBlks; 724 bool CachedUndefs = false; 725 PhiMap &Map = DeletedPhis[To]; 726 for (const auto &PI : Map) { 727 PHINode *Phi = PI.first; 728 Value *Undef = UndefValue::get(Phi->getType()); 729 Updater.Initialize(Phi->getType(), ""); 730 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 731 Updater.AddAvailableValue(To, Undef); 732 733 SmallSet<BasicBlock *, 8> Incomings; 734 SmallVector<BasicBlock *> ConstantPreds; 735 for (const auto &VI : PI.second) { 736 Incomings.insert(VI.first); 737 Updater.AddAvailableValue(VI.first, VI.second); 738 if (isa<Constant>(VI.second)) 739 ConstantPreds.push_back(VI.first); 740 } 741 742 if (!CachedUndefs) { 743 findUndefBlocks(To, Incomings, UndefBlks); 744 CachedUndefs = true; 745 } 746 747 for (auto UB : UndefBlks) { 748 // If this undef block is dominated by any predecessor(before 749 // structurization) of reconstructed PHI with constant incoming value, 750 // don't mark the available value as undefined. Setting undef to such 751 // block will stop us from getting optimal phi insertion. 752 if (any_of(ConstantPreds, 753 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 754 continue; 755 Updater.AddAvailableValue(UB, Undef); 756 } 757 758 for (BasicBlock *FI : From) 759 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 760 AffectedPhis.push_back(Phi); 761 } 762 763 DeletedPhis.erase(To); 764 } 765 assert(DeletedPhis.empty()); 766 767 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 768 } 769 770 void StructurizeCFG::simplifyAffectedPhis() { 771 bool Changed; 772 do { 773 Changed = false; 774 SimplifyQuery Q(Func->getDataLayout()); 775 Q.DT = DT; 776 // Setting CanUseUndef to true might extend value liveness, set it to false 777 // to achieve better register pressure. 778 Q.CanUseUndef = false; 779 for (WeakVH VH : AffectedPhis) { 780 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 781 if (auto NewValue = simplifyInstruction(Phi, Q)) { 782 Phi->replaceAllUsesWith(NewValue); 783 Phi->eraseFromParent(); 784 Changed = true; 785 } 786 } 787 } 788 } while (Changed); 789 } 790 791 /// Remove phi values from all successors and then remove the terminator. 792 void StructurizeCFG::killTerminator(BasicBlock *BB) { 793 Instruction *Term = BB->getTerminator(); 794 if (!Term) 795 return; 796 797 for (BasicBlock *Succ : successors(BB)) 798 delPhiValues(BB, Succ); 799 800 Term->eraseFromParent(); 801 } 802 803 /// Let node exit(s) point to NewExit 804 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 805 bool IncludeDominator) { 806 if (Node->isSubRegion()) { 807 Region *SubRegion = Node->getNodeAs<Region>(); 808 BasicBlock *OldExit = SubRegion->getExit(); 809 BasicBlock *Dominator = nullptr; 810 811 // Find all the edges from the sub region to the exit. 812 // We use make_early_inc_range here because we modify BB's terminator. 813 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 814 if (!SubRegion->contains(BB)) 815 continue; 816 817 // Modify the edges to point to the new exit 818 delPhiValues(BB, OldExit); 819 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 820 addPhiValues(BB, NewExit); 821 822 // Find the new dominator (if requested) 823 if (IncludeDominator) { 824 if (!Dominator) 825 Dominator = BB; 826 else 827 Dominator = DT->findNearestCommonDominator(Dominator, BB); 828 } 829 } 830 831 // Change the dominator (if requested) 832 if (Dominator) 833 DT->changeImmediateDominator(NewExit, Dominator); 834 835 // Update the region info 836 SubRegion->replaceExit(NewExit); 837 } else { 838 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 839 killTerminator(BB); 840 BranchInst *Br = BranchInst::Create(NewExit, BB); 841 Br->setDebugLoc(TermDL[BB]); 842 addPhiValues(BB, NewExit); 843 if (IncludeDominator) 844 DT->changeImmediateDominator(NewExit, BB); 845 } 846 } 847 848 /// Create a new flow node and update dominator tree and region info 849 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 850 LLVMContext &Context = Func->getContext(); 851 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 852 Order.back()->getEntry(); 853 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 854 Func, Insert); 855 FlowSet.insert(Flow); 856 857 // use a temporary variable to avoid a use-after-free if the map's storage is 858 // reallocated 859 DebugLoc DL = TermDL[Dominator]; 860 TermDL[Flow] = std::move(DL); 861 862 DT->addNewBlock(Flow, Dominator); 863 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 864 return Flow; 865 } 866 867 /// Create a new or reuse the previous node as flow node 868 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 869 BasicBlock *Entry = PrevNode->getEntry(); 870 871 if (!PrevNode->isSubRegion()) { 872 killTerminator(Entry); 873 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 874 return Entry; 875 } 876 877 // create a new flow node 878 BasicBlock *Flow = getNextFlow(Entry); 879 880 // and wire it up 881 changeExit(PrevNode, Flow, true); 882 PrevNode = ParentRegion->getBBNode(Flow); 883 return Flow; 884 } 885 886 /// Returns the region exit if possible, otherwise just a new flow node 887 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 888 bool ExitUseAllowed) { 889 if (!Order.empty() || !ExitUseAllowed) 890 return getNextFlow(Flow); 891 892 BasicBlock *Exit = ParentRegion->getExit(); 893 DT->changeImmediateDominator(Exit, Flow); 894 addPhiValues(Flow, Exit); 895 return Exit; 896 } 897 898 /// Set the previous node 899 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 900 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 901 : nullptr; 902 } 903 904 /// Does BB dominate all the predicates of Node? 905 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 906 BBPredicates &Preds = Predicates[Node->getEntry()]; 907 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 908 return DT->dominates(BB, Pred.first); 909 }); 910 } 911 912 /// Can we predict that this node will always be called? 913 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 914 BBPredicates &Preds = Predicates[Node->getEntry()]; 915 bool Dominated = false; 916 917 // Regionentry is always true 918 if (!PrevNode) 919 return true; 920 921 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 922 BasicBlock *BB = Pred.first; 923 Value *V = Pred.second; 924 925 if (V != BoolTrue) 926 return false; 927 928 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 929 Dominated = true; 930 } 931 932 // TODO: The dominator check is too strict 933 return Dominated; 934 } 935 936 /// Take one node from the order vector and wire it up 937 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 938 BasicBlock *LoopEnd) { 939 RegionNode *Node = Order.pop_back_val(); 940 Visited.insert(Node->getEntry()); 941 942 if (isPredictableTrue(Node)) { 943 // Just a linear flow 944 if (PrevNode) { 945 changeExit(PrevNode, Node->getEntry(), true); 946 } 947 PrevNode = Node; 948 } else { 949 // Insert extra prefix node (or reuse last one) 950 BasicBlock *Flow = needPrefix(false); 951 952 // Insert extra postfix node (or use exit instead) 953 BasicBlock *Entry = Node->getEntry(); 954 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 955 956 // let it point to entry and next block 957 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 958 Br->setDebugLoc(TermDL[Flow]); 959 Conditions.push_back(Br); 960 addPhiValues(Flow, Entry); 961 DT->changeImmediateDominator(Entry, Flow); 962 963 PrevNode = Node; 964 while (!Order.empty() && !Visited.count(LoopEnd) && 965 dominatesPredicates(Entry, Order.back())) { 966 handleLoops(false, LoopEnd); 967 } 968 969 changeExit(PrevNode, Next, false); 970 setPrevNode(Next); 971 } 972 } 973 974 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 975 BasicBlock *LoopEnd) { 976 RegionNode *Node = Order.back(); 977 BasicBlock *LoopStart = Node->getEntry(); 978 979 if (!Loops.count(LoopStart)) { 980 wireFlow(ExitUseAllowed, LoopEnd); 981 return; 982 } 983 984 if (!isPredictableTrue(Node)) 985 LoopStart = needPrefix(true); 986 987 LoopEnd = Loops[Node->getEntry()]; 988 wireFlow(false, LoopEnd); 989 while (!Visited.count(LoopEnd)) { 990 handleLoops(false, LoopEnd); 991 } 992 993 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 994 995 // Create an extra loop end node 996 LoopEnd = needPrefix(false); 997 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 998 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 999 Br->setDebugLoc(TermDL[LoopEnd]); 1000 LoopConds.push_back(Br); 1001 addPhiValues(LoopEnd, LoopStart); 1002 setPrevNode(Next); 1003 } 1004 1005 /// After this function control flow looks like it should be, but 1006 /// branches and PHI nodes only have undefined conditions. 1007 void StructurizeCFG::createFlow() { 1008 BasicBlock *Exit = ParentRegion->getExit(); 1009 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1010 1011 AffectedPhis.clear(); 1012 DeletedPhis.clear(); 1013 AddedPhis.clear(); 1014 Conditions.clear(); 1015 LoopConds.clear(); 1016 1017 PrevNode = nullptr; 1018 Visited.clear(); 1019 1020 while (!Order.empty()) { 1021 handleLoops(EntryDominatesExit, nullptr); 1022 } 1023 1024 if (PrevNode) 1025 changeExit(PrevNode, Exit, EntryDominatesExit); 1026 else 1027 assert(EntryDominatesExit); 1028 } 1029 1030 /// Handle a rare case where the disintegrated nodes instructions 1031 /// no longer dominate all their uses. Not sure if this is really necessary 1032 void StructurizeCFG::rebuildSSA() { 1033 SSAUpdater Updater; 1034 for (BasicBlock *BB : ParentRegion->blocks()) 1035 for (Instruction &I : *BB) { 1036 bool Initialized = false; 1037 // We may modify the use list as we iterate over it, so we use 1038 // make_early_inc_range. 1039 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1040 Instruction *User = cast<Instruction>(U.getUser()); 1041 if (User->getParent() == BB) { 1042 continue; 1043 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1044 if (UserPN->getIncomingBlock(U) == BB) 1045 continue; 1046 } 1047 1048 if (DT->dominates(&I, User)) 1049 continue; 1050 1051 if (!Initialized) { 1052 Value *Undef = UndefValue::get(I.getType()); 1053 Updater.Initialize(I.getType(), ""); 1054 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1055 Updater.AddAvailableValue(BB, &I); 1056 Initialized = true; 1057 } 1058 Updater.RewriteUseAfterInsertions(U); 1059 } 1060 } 1061 } 1062 1063 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1064 const UniformityInfo &UA) { 1065 // Bool for if all sub-regions are uniform. 1066 bool SubRegionsAreUniform = true; 1067 // Count of how many direct children are conditional. 1068 unsigned ConditionalDirectChildren = 0; 1069 1070 for (auto *E : R->elements()) { 1071 if (!E->isSubRegion()) { 1072 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1073 if (!Br || !Br->isConditional()) 1074 continue; 1075 1076 if (!UA.isUniform(Br)) 1077 return false; 1078 1079 // One of our direct children is conditional. 1080 ConditionalDirectChildren++; 1081 1082 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1083 << " has uniform terminator\n"); 1084 } else { 1085 // Explicitly refuse to treat regions as uniform if they have non-uniform 1086 // subregions. We cannot rely on UniformityAnalysis for branches in 1087 // subregions because those branches may have been removed and re-created, 1088 // so we look for our metadata instead. 1089 // 1090 // Warning: It would be nice to treat regions as uniform based only on 1091 // their direct child basic blocks' terminators, regardless of whether 1092 // subregions are uniform or not. However, this requires a very careful 1093 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1094 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1095 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1096 if (!Br || !Br->isConditional()) 1097 continue; 1098 1099 if (!Br->getMetadata(UniformMDKindID)) { 1100 // Early exit if we cannot have relaxed uniform regions. 1101 if (!RelaxedUniformRegions) 1102 return false; 1103 1104 SubRegionsAreUniform = false; 1105 break; 1106 } 1107 } 1108 } 1109 } 1110 1111 // Our region is uniform if: 1112 // 1. All conditional branches that are direct children are uniform (checked 1113 // above). 1114 // 2. And either: 1115 // a. All sub-regions are uniform. 1116 // b. There is one or less conditional branches among the direct children. 1117 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1118 } 1119 1120 void StructurizeCFG::init(Region *R) { 1121 LLVMContext &Context = R->getEntry()->getContext(); 1122 1123 Boolean = Type::getInt1Ty(Context); 1124 BoolTrue = ConstantInt::getTrue(Context); 1125 BoolFalse = ConstantInt::getFalse(Context); 1126 BoolPoison = PoisonValue::get(Boolean); 1127 1128 this->UA = nullptr; 1129 } 1130 1131 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1132 if (R->isTopLevelRegion()) 1133 return false; 1134 1135 this->UA = &UA; 1136 1137 // TODO: We could probably be smarter here with how we handle sub-regions. 1138 // We currently rely on the fact that metadata is set by earlier invocations 1139 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1140 // but we shouldn't rely on metadata for correctness! 1141 unsigned UniformMDKindID = 1142 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1143 1144 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1145 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1146 << '\n'); 1147 1148 // Mark all direct child block terminators as having been treated as 1149 // uniform. To account for a possible future in which non-uniform 1150 // sub-regions are treated more cleverly, indirect children are not 1151 // marked as uniform. 1152 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1153 for (RegionNode *E : R->elements()) { 1154 if (E->isSubRegion()) 1155 continue; 1156 1157 if (Instruction *Term = E->getEntry()->getTerminator()) 1158 Term->setMetadata(UniformMDKindID, MD); 1159 } 1160 1161 return true; 1162 } 1163 return false; 1164 } 1165 1166 /// Run the transformation for each region found 1167 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1168 if (R->isTopLevelRegion()) 1169 return false; 1170 1171 this->DT = DT; 1172 1173 Func = R->getEntry()->getParent(); 1174 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); 1175 1176 ParentRegion = R; 1177 1178 orderNodes(); 1179 collectInfos(); 1180 createFlow(); 1181 insertConditions(false); 1182 insertConditions(true); 1183 setPhiValues(); 1184 simplifyConditions(); 1185 simplifyAffectedPhis(); 1186 rebuildSSA(); 1187 1188 // Cleanup 1189 Order.clear(); 1190 Visited.clear(); 1191 DeletedPhis.clear(); 1192 AddedPhis.clear(); 1193 Predicates.clear(); 1194 Conditions.clear(); 1195 Loops.clear(); 1196 LoopPreds.clear(); 1197 LoopConds.clear(); 1198 FlowSet.clear(); 1199 TermDL.clear(); 1200 1201 return true; 1202 } 1203 1204 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1205 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1206 } 1207 1208 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1209 Regions.push_back(&R); 1210 for (const auto &E : R) 1211 addRegionIntoQueue(*E, Regions); 1212 } 1213 1214 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) 1215 : SkipUniformRegions(SkipUniformRegions_) { 1216 if (ForceSkipUniformRegions.getNumOccurrences()) 1217 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 1218 } 1219 1220 void StructurizeCFGPass::printPipeline( 1221 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1222 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline( 1223 OS, MapClassName2PassName); 1224 if (SkipUniformRegions) 1225 OS << "<skip-uniform-regions>"; 1226 } 1227 1228 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1229 FunctionAnalysisManager &AM) { 1230 1231 bool Changed = false; 1232 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1233 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1234 1235 UniformityInfo *UI = nullptr; 1236 if (SkipUniformRegions) 1237 UI = &AM.getResult<UniformityInfoAnalysis>(F); 1238 1239 std::vector<Region *> Regions; 1240 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1241 while (!Regions.empty()) { 1242 Region *R = Regions.back(); 1243 Regions.pop_back(); 1244 1245 StructurizeCFG SCFG; 1246 SCFG.init(R); 1247 1248 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { 1249 Changed = true; // May have added metadata. 1250 continue; 1251 } 1252 1253 Changed |= SCFG.run(R, DT); 1254 } 1255 if (!Changed) 1256 return PreservedAnalyses::all(); 1257 PreservedAnalyses PA; 1258 PA.preserve<DominatorTreeAnalysis>(); 1259 return PA; 1260 } 1261