1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/EquivalenceClasses.h" 12 #include "llvm/ADT/MapVector.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SmallPtrSet.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/RegionInfo.h" 20 #include "llvm/Analysis/RegionIterator.h" 21 #include "llvm/Analysis/RegionPass.h" 22 #include "llvm/Analysis/UniformityAnalysis.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ProfDataUtils.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Use.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/IR/ValueHandle.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 48 #include "llvm/Transforms/Utils/Local.h" 49 #include "llvm/Transforms/Utils/SSAUpdater.h" 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 const char FlowBlockName[] = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>; 91 92 class CondBranchWeights { 93 uint32_t TrueWeight; 94 uint32_t FalseWeight; 95 96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} 97 98 public: 99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) { 100 assert(Br.isConditional()); 101 102 uint64_t T, F; 103 if (!extractBranchWeights(Br, T, F)) 104 return std::nullopt; 105 106 return CondBranchWeights(T, F); 107 } 108 109 static void setMetadata(BranchInst &Br, 110 const MaybeCondBranchWeights &Weights) { 111 assert(Br.isConditional()); 112 if (!Weights) 113 return; 114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; 115 setBranchWeights(Br, Arr, false); 116 } 117 118 CondBranchWeights invert() const { 119 return CondBranchWeights{FalseWeight, TrueWeight}; 120 } 121 }; 122 123 struct PredInfo { 124 Value *Pred; 125 MaybeCondBranchWeights Weights; 126 }; 127 128 using BBPredicates = DenseMap<BasicBlock *, PredInfo>; 129 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 130 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 131 132 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; 133 134 // A traits type that is intended to be used in graph algorithms. The graph 135 // traits starts at an entry node, and traverses the RegionNodes that are in 136 // the Nodes set. 137 struct SubGraphTraits { 138 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 139 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 140 141 // This wraps a set of Nodes into the iterator, so we know which edges to 142 // filter out. 143 class WrappedSuccIterator 144 : public iterator_adaptor_base< 145 WrappedSuccIterator, BaseSuccIterator, 146 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 147 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 148 SmallDenseSet<RegionNode *> *Nodes; 149 150 public: 151 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 152 : iterator_adaptor_base(It), Nodes(Nodes) {} 153 154 NodeRef operator*() const { return {*I, Nodes}; } 155 }; 156 157 static bool filterAll(const NodeRef &N) { return true; } 158 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 159 160 using ChildIteratorType = 161 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 162 163 static NodeRef getEntryNode(Region *R) { 164 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 165 } 166 167 static NodeRef getEntryNode(NodeRef N) { return N; } 168 169 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 170 auto *filter = N.second ? &filterSet : &filterAll; 171 return make_filter_range( 172 make_range<WrappedSuccIterator>( 173 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 174 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 175 filter); 176 } 177 178 static ChildIteratorType child_begin(const NodeRef &N) { 179 return children(N).begin(); 180 } 181 182 static ChildIteratorType child_end(const NodeRef &N) { 183 return children(N).end(); 184 } 185 }; 186 187 /// Finds the nearest common dominator of a set of BasicBlocks. 188 /// 189 /// For every BB you add to the set, you can specify whether we "remember" the 190 /// block. When you get the common dominator, you can also ask whether it's one 191 /// of the blocks we remembered. 192 class NearestCommonDominator { 193 DominatorTree *DT; 194 BasicBlock *Result = nullptr; 195 bool ResultIsRemembered = false; 196 197 /// Add BB to the resulting dominator. 198 void addBlock(BasicBlock *BB, bool Remember) { 199 if (!Result) { 200 Result = BB; 201 ResultIsRemembered = Remember; 202 return; 203 } 204 205 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 206 if (NewResult != Result) 207 ResultIsRemembered = false; 208 if (NewResult == BB) 209 ResultIsRemembered |= Remember; 210 Result = NewResult; 211 } 212 213 public: 214 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 215 216 void addBlock(BasicBlock *BB) { 217 addBlock(BB, /* Remember = */ false); 218 } 219 220 void addAndRememberBlock(BasicBlock *BB) { 221 addBlock(BB, /* Remember = */ true); 222 } 223 224 /// Get the nearest common dominator of all the BBs added via addBlock() and 225 /// addAndRememberBlock(). 226 BasicBlock *result() { return Result; } 227 228 /// Is the BB returned by getResult() one of the blocks we added to the set 229 /// with addAndRememberBlock()? 230 bool resultIsRememberedBlock() { return ResultIsRemembered; } 231 }; 232 233 /// Transforms the control flow graph on one single entry/exit region 234 /// at a time. 235 /// 236 /// After the transform all "If"/"Then"/"Else" style control flow looks like 237 /// this: 238 /// 239 /// \verbatim 240 /// 1 241 /// || 242 /// | | 243 /// 2 | 244 /// | / 245 /// |/ 246 /// 3 247 /// || Where: 248 /// | | 1 = "If" block, calculates the condition 249 /// 4 | 2 = "Then" subregion, runs if the condition is true 250 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 251 /// |/ 4 = "Else" optional subregion, runs if the condition is false 252 /// 5 5 = "End" block, also rejoins the control flow 253 /// \endverbatim 254 /// 255 /// Control flow is expressed as a branch where the true exit goes into the 256 /// "Then"/"Else" region, while the false exit skips the region 257 /// The condition for the optional "Else" region is expressed as a PHI node. 258 /// The incoming values of the PHI node are true for the "If" edge and false 259 /// for the "Then" edge. 260 /// 261 /// Additionally to that even complicated loops look like this: 262 /// 263 /// \verbatim 264 /// 1 265 /// || 266 /// | | 267 /// 2 ^ Where: 268 /// | / 1 = "Entry" block 269 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 270 /// 3 3 = "Flow" block, with back edge to entry block 271 /// | 272 /// \endverbatim 273 /// 274 /// The back edge of the "Flow" block is always on the false side of the branch 275 /// while the true side continues the general flow. So the loop condition 276 /// consist of a network of PHI nodes where the true incoming values expresses 277 /// breaks and the false values expresses continue states. 278 279 class StructurizeCFG { 280 Type *Boolean; 281 ConstantInt *BoolTrue; 282 ConstantInt *BoolFalse; 283 Value *BoolPoison; 284 285 Function *Func; 286 Region *ParentRegion; 287 288 UniformityInfo *UA = nullptr; 289 DominatorTree *DT; 290 291 SmallVector<RegionNode *, 8> Order; 292 BBSet Visited; 293 BBSet FlowSet; 294 295 SmallVector<WeakVH, 8> AffectedPhis; 296 BBPhiMap DeletedPhis; 297 BB2BBVecMap AddedPhis; 298 299 PredMap Predicates; 300 BranchVector Conditions; 301 302 BB2BBMap Loops; 303 PredMap LoopPreds; 304 BranchVector LoopConds; 305 306 BranchDebugLocMap TermDL; 307 308 RegionNode *PrevNode; 309 310 void orderNodes(); 311 312 void analyzeLoops(RegionNode *N); 313 314 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 315 316 void gatherPredicates(RegionNode *N); 317 318 void collectInfos(); 319 320 void insertConditions(bool Loops); 321 322 void simplifyConditions(); 323 324 void delPhiValues(BasicBlock *From, BasicBlock *To); 325 326 void addPhiValues(BasicBlock *From, BasicBlock *To); 327 328 void findUndefBlocks(BasicBlock *PHIBlock, 329 const SmallSet<BasicBlock *, 8> &Incomings, 330 SmallVector<BasicBlock *> &UndefBlks) const; 331 332 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, 333 PHINode *B); 334 335 void setPhiValues(); 336 337 void simplifyAffectedPhis(); 338 339 void killTerminator(BasicBlock *BB); 340 341 void changeExit(RegionNode *Node, BasicBlock *NewExit, 342 bool IncludeDominator); 343 344 BasicBlock *getNextFlow(BasicBlock *Dominator); 345 346 BasicBlock *needPrefix(bool NeedEmpty); 347 348 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 349 350 void setPrevNode(BasicBlock *BB); 351 352 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 353 354 bool isPredictableTrue(RegionNode *Node); 355 356 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 357 358 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 359 360 void createFlow(); 361 362 void rebuildSSA(); 363 364 public: 365 void init(Region *R); 366 bool run(Region *R, DominatorTree *DT); 367 bool makeUniformRegion(Region *R, UniformityInfo &UA); 368 }; 369 370 class StructurizeCFGLegacyPass : public RegionPass { 371 bool SkipUniformRegions; 372 373 public: 374 static char ID; 375 376 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 377 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 378 if (ForceSkipUniformRegions.getNumOccurrences()) 379 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 380 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 381 } 382 383 bool runOnRegion(Region *R, RGPassManager &RGM) override { 384 StructurizeCFG SCFG; 385 SCFG.init(R); 386 if (SkipUniformRegions) { 387 UniformityInfo &UA = 388 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 389 if (SCFG.makeUniformRegion(R, UA)) 390 return false; 391 } 392 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 393 return SCFG.run(R, DT); 394 } 395 396 StringRef getPassName() const override { return "Structurize control flow"; } 397 398 void getAnalysisUsage(AnalysisUsage &AU) const override { 399 if (SkipUniformRegions) 400 AU.addRequired<UniformityInfoWrapperPass>(); 401 AU.addRequired<DominatorTreeWrapperPass>(); 402 403 AU.addPreserved<DominatorTreeWrapperPass>(); 404 RegionPass::getAnalysisUsage(AU); 405 } 406 }; 407 408 } // end anonymous namespace 409 410 char StructurizeCFGLegacyPass::ID = 0; 411 412 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 413 "Structurize the CFG", false, false) 414 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 415 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 416 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 417 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 418 "Structurize the CFG", false, false) 419 420 /// Build up the general order of nodes, by performing a topological sort of the 421 /// parent region's nodes, while ensuring that there is no outer cycle node 422 /// between any two inner cycle nodes. 423 void StructurizeCFG::orderNodes() { 424 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 425 GraphTraits<Region *>::nodes_end(ParentRegion))); 426 if (Order.empty()) 427 return; 428 429 SmallDenseSet<RegionNode *> Nodes; 430 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 431 432 // A list of range indices of SCCs in Order, to be processed. 433 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 434 unsigned I = 0, E = Order.size(); 435 while (true) { 436 // Run through all the SCCs in the subgraph starting with Entry. 437 for (auto SCCI = 438 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 439 EntryNode); 440 !SCCI.isAtEnd(); ++SCCI) { 441 auto &SCC = *SCCI; 442 443 // An SCC up to the size of 2, can be reduced to an entry (the last node), 444 // and a possible additional node. Therefore, it is already in order, and 445 // there is no need to add it to the work-list. 446 unsigned Size = SCC.size(); 447 if (Size > 2) 448 WorkList.emplace_back(I, I + Size); 449 450 // Add the SCC nodes to the Order array. 451 for (const auto &N : SCC) { 452 assert(I < E && "SCC size mismatch!"); 453 Order[I++] = N.first; 454 } 455 } 456 assert(I == E && "SCC size mismatch!"); 457 458 // If there are no more SCCs to order, then we are done. 459 if (WorkList.empty()) 460 break; 461 462 std::tie(I, E) = WorkList.pop_back_val(); 463 464 // Collect the set of nodes in the SCC's subgraph. These are only the 465 // possible child nodes; we do not add the entry (last node) otherwise we 466 // will have the same exact SCC all over again. 467 Nodes.clear(); 468 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 469 470 // Update the entry node. 471 EntryNode.first = Order[E - 1]; 472 EntryNode.second = &Nodes; 473 } 474 } 475 476 /// Determine the end of the loops 477 void StructurizeCFG::analyzeLoops(RegionNode *N) { 478 if (N->isSubRegion()) { 479 // Test for exit as back edge 480 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 481 if (Visited.count(Exit)) 482 Loops[Exit] = N->getEntry(); 483 484 } else { 485 // Test for successors as back edge 486 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 487 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 488 489 for (BasicBlock *Succ : Term->successors()) 490 if (Visited.count(Succ)) 491 Loops[Succ] = BB; 492 } 493 } 494 495 /// Build the condition for one edge 496 PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 497 bool Invert) { 498 Value *Cond = Invert ? BoolFalse : BoolTrue; 499 MaybeCondBranchWeights Weights; 500 501 if (Term->isConditional()) { 502 Cond = Term->getCondition(); 503 Weights = CondBranchWeights::tryParse(*Term); 504 505 if (Idx != (unsigned)Invert) { 506 Cond = invertCondition(Cond); 507 if (Weights) 508 Weights = Weights->invert(); 509 } 510 } 511 return {Cond, Weights}; 512 } 513 514 /// Analyze the predecessors of each block and build up predicates 515 void StructurizeCFG::gatherPredicates(RegionNode *N) { 516 RegionInfo *RI = ParentRegion->getRegionInfo(); 517 BasicBlock *BB = N->getEntry(); 518 BBPredicates &Pred = Predicates[BB]; 519 BBPredicates &LPred = LoopPreds[BB]; 520 521 for (BasicBlock *P : predecessors(BB)) { 522 // Ignore it if it's a branch from outside into our region entry 523 if (!ParentRegion->contains(P)) 524 continue; 525 526 Region *R = RI->getRegionFor(P); 527 if (R == ParentRegion) { 528 // It's a top level block in our region 529 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 530 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 531 BasicBlock *Succ = Term->getSuccessor(i); 532 if (Succ != BB) 533 continue; 534 535 if (Visited.count(P)) { 536 // Normal forward edge 537 if (Term->isConditional()) { 538 // Try to treat it like an ELSE block 539 BasicBlock *Other = Term->getSuccessor(!i); 540 if (Visited.count(Other) && !Loops.count(Other) && 541 !Pred.count(Other) && !Pred.count(P)) { 542 543 Pred[Other] = {BoolFalse, std::nullopt}; 544 Pred[P] = {BoolTrue, std::nullopt}; 545 continue; 546 } 547 } 548 Pred[P] = buildCondition(Term, i, false); 549 } else { 550 // Back edge 551 LPred[P] = buildCondition(Term, i, true); 552 } 553 } 554 } else { 555 // It's an exit from a sub region 556 while (R->getParent() != ParentRegion) 557 R = R->getParent(); 558 559 // Edge from inside a subregion to its entry, ignore it 560 if (*R == *N) 561 continue; 562 563 BasicBlock *Entry = R->getEntry(); 564 if (Visited.count(Entry)) 565 Pred[Entry] = {BoolTrue, std::nullopt}; 566 else 567 LPred[Entry] = {BoolFalse, std::nullopt}; 568 } 569 } 570 } 571 572 /// Collect various loop and predicate infos 573 void StructurizeCFG::collectInfos() { 574 // Reset predicate 575 Predicates.clear(); 576 577 // and loop infos 578 Loops.clear(); 579 LoopPreds.clear(); 580 581 // Reset the visited nodes 582 Visited.clear(); 583 584 for (RegionNode *RN : reverse(Order)) { 585 LLVM_DEBUG(dbgs() << "Visiting: " 586 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 587 << RN->getEntry()->getName() << "\n"); 588 589 // Analyze all the conditions leading to a node 590 gatherPredicates(RN); 591 592 // Remember that we've seen this node 593 Visited.insert(RN->getEntry()); 594 595 // Find the last back edges 596 analyzeLoops(RN); 597 } 598 599 // Reset the collected term debug locations 600 TermDL.clear(); 601 602 for (BasicBlock &BB : *Func) { 603 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) 604 TermDL[&BB] = DL; 605 } 606 } 607 608 /// Insert the missing branch conditions 609 void StructurizeCFG::insertConditions(bool Loops) { 610 BranchVector &Conds = Loops ? LoopConds : Conditions; 611 Value *Default = Loops ? BoolTrue : BoolFalse; 612 SSAUpdater PhiInserter; 613 614 for (BranchInst *Term : Conds) { 615 assert(Term->isConditional()); 616 617 BasicBlock *Parent = Term->getParent(); 618 BasicBlock *SuccTrue = Term->getSuccessor(0); 619 BasicBlock *SuccFalse = Term->getSuccessor(1); 620 621 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 622 623 if (Preds.size() == 1 && Preds.begin()->first == Parent) { 624 auto &PI = Preds.begin()->second; 625 Term->setCondition(PI.Pred); 626 CondBranchWeights::setMetadata(*Term, PI.Weights); 627 } else { 628 PhiInserter.Initialize(Boolean, ""); 629 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 630 631 NearestCommonDominator Dominator(DT); 632 Dominator.addBlock(Parent); 633 634 for (auto [BB, PI] : Preds) { 635 assert(BB != Parent); 636 PhiInserter.AddAvailableValue(BB, PI.Pred); 637 Dominator.addAndRememberBlock(BB); 638 } 639 640 if (!Dominator.resultIsRememberedBlock()) 641 PhiInserter.AddAvailableValue(Dominator.result(), Default); 642 643 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 644 } 645 } 646 } 647 648 /// Simplify any inverted conditions that were built by buildConditions. 649 void StructurizeCFG::simplifyConditions() { 650 SmallVector<Instruction *> InstToErase; 651 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 652 auto &Preds = I.second; 653 for (auto [BB, PI] : Preds) { 654 Instruction *Inverted; 655 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) && 656 !PI.Pred->use_empty()) { 657 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 658 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 659 PI.Pred->replaceAllUsesWith(InvertedCmp); 660 InstToErase.push_back(cast<Instruction>(PI.Pred)); 661 } 662 } 663 } 664 } 665 for (auto *I : InstToErase) 666 I->eraseFromParent(); 667 } 668 669 /// Remove all PHI values coming from "From" into "To" and remember 670 /// them in DeletedPhis 671 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 672 PhiMap &Map = DeletedPhis[To]; 673 for (PHINode &Phi : To->phis()) { 674 bool Recorded = false; 675 while (Phi.getBasicBlockIndex(From) != -1) { 676 Value *Deleted = Phi.removeIncomingValue(From, false); 677 Map[&Phi].push_back(std::make_pair(From, Deleted)); 678 if (!Recorded) { 679 AffectedPhis.push_back(&Phi); 680 Recorded = true; 681 } 682 } 683 } 684 } 685 686 /// Add a dummy PHI value as soon as we knew the new predecessor 687 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 688 for (PHINode &Phi : To->phis()) { 689 Value *Poison = PoisonValue::get(Phi.getType()); 690 Phi.addIncoming(Poison, From); 691 } 692 AddedPhis[To].push_back(From); 693 } 694 695 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 696 /// from predecessors \p Incomings, we have a chance to mark the available value 697 /// from some blocks as undefined. The function will find out all such blocks 698 /// and return in \p UndefBlks. 699 void StructurizeCFG::findUndefBlocks( 700 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 701 SmallVector<BasicBlock *> &UndefBlks) const { 702 // We may get a post-structured CFG like below: 703 // 704 // | P1 705 // |/ 706 // F1 707 // |\ 708 // | N 709 // |/ 710 // F2 711 // |\ 712 // | P2 713 // |/ 714 // F3 715 // |\ 716 // B 717 // 718 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 719 // of B before structurization. F1/F2/F3 are flow blocks inserted during 720 // structurization process. Block N is not a predecessor of B before 721 // structurization, but are placed between the predecessors(P1/P2) of B after 722 // structurization. This usually means that threads went to N never take the 723 // path N->F2->F3->B. For example, the threads take the branch F1->N may 724 // always take the branch F2->P2. So, when we are reconstructing a PHI 725 // originally in B, we can safely say the incoming value from N is undefined. 726 SmallSet<BasicBlock *, 8> VisitedBlock; 727 SmallVector<BasicBlock *, 8> Stack; 728 if (PHIBlock == ParentRegion->getExit()) { 729 for (auto P : predecessors(PHIBlock)) { 730 if (ParentRegion->contains(P)) 731 Stack.push_back(P); 732 } 733 } else { 734 append_range(Stack, predecessors(PHIBlock)); 735 } 736 737 // Do a backward traversal over the CFG, and stop further searching if 738 // the block is not a Flow. If a block is neither flow block nor the 739 // incoming predecessor, then the incoming value from the block is 740 // undefined value for the PHI being reconstructed. 741 while (!Stack.empty()) { 742 BasicBlock *Current = Stack.pop_back_val(); 743 if (!VisitedBlock.insert(Current).second) 744 continue; 745 746 if (FlowSet.contains(Current)) { 747 for (auto P : predecessors(Current)) 748 Stack.push_back(P); 749 } else if (!Incomings.contains(Current)) { 750 UndefBlks.push_back(Current); 751 } 752 } 753 } 754 755 // If two phi nodes have compatible incoming values (for each 756 // incoming block, either they have the same incoming value or only one phi 757 // node has an incoming value), let them share the merged incoming values. The 758 // merge process is guided by the equivalence information from \p PhiClasses. 759 // The function will possibly update the incoming values of leader phi in 760 // DeletedPhis. 761 void StructurizeCFG::mergeIfCompatible( 762 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) { 763 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A)); 764 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B)); 765 // They are already in the same class, no work needed. 766 if (ItA == ItB) 767 return; 768 769 PHINode *LeaderA = *ItA; 770 PHINode *LeaderB = *ItB; 771 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA]; 772 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB]; 773 774 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end()); 775 for (auto [BB, V] : IncomingB) { 776 auto BBIt = Mergeable.find(BB); 777 if (BBIt != Mergeable.end() && BBIt->second != V) 778 return; 779 // Either IncomingA does not have this value or IncomingA has the same 780 // value. 781 Mergeable.insert({BB, V}); 782 } 783 784 // Update the incoming value of leaderA. 785 IncomingA.assign(Mergeable.begin(), Mergeable.end()); 786 PhiClasses.unionSets(ItA, ItB); 787 } 788 789 /// Add the real PHI value as soon as everything is set up 790 void StructurizeCFG::setPhiValues() { 791 SmallVector<PHINode *, 8> InsertedPhis; 792 SSAUpdater Updater(&InsertedPhis); 793 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap; 794 795 // Find phi nodes that have compatible incoming values (either they have 796 // the same value for the same block or only one phi node has an incoming 797 // value, see example below). We only search again the phi's that are 798 // referenced by another phi, which is the case we care about. 799 // 800 // For example (-- means no incoming value): 801 // phi1 : BB1:phi2 BB2:v BB3:-- 802 // phi2: BB1:-- BB2:v BB3:w 803 // 804 // Then we can merge these incoming values and let phi1, phi2 use the 805 // same set of incoming values: 806 // 807 // phi1&phi2: BB1:phi2 BB2:v BB3:w 808 // 809 // By doing this, phi1 and phi2 would share more intermediate phi nodes. 810 // This would help reduce the number of phi nodes during SSA reconstruction 811 // and ultimately result in fewer COPY instructions. 812 // 813 // This should be correct, because if a phi node does not have incoming 814 // value from certain block, this means the block is not the predecessor 815 // of the parent block, so we actually don't care about its incoming value. 816 EquivalenceClasses<PHINode *> PhiClasses; 817 for (const auto &[To, From] : AddedPhis) { 818 auto OldPhiIt = DeletedPhis.find(To); 819 if (OldPhiIt == DeletedPhis.end()) 820 continue; 821 822 PhiMap &BlkPhis = OldPhiIt->second; 823 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 824 SmallSet<BasicBlock *, 8> Incomings; 825 826 // Get the undefined blocks shared by all the phi nodes. 827 if (!BlkPhis.empty()) { 828 for (const auto &VI : BlkPhis.front().second) 829 Incomings.insert(VI.first); 830 findUndefBlocks(To, Incomings, UndefBlks); 831 } 832 833 for (const auto &[Phi, Incomings] : OldPhiIt->second) { 834 SmallVector<PHINode *> IncomingPHIs; 835 for (const auto &[BB, V] : Incomings) { 836 // First, for each phi, check whether it has incoming value which is 837 // another phi. 838 if (PHINode *P = dyn_cast<PHINode>(V)) 839 IncomingPHIs.push_back(P); 840 } 841 842 for (auto *OtherPhi : IncomingPHIs) { 843 // Skip phis that are unrelated to the phi reconstruction for now. 844 if (!DeletedPhis.contains(OtherPhi->getParent())) 845 continue; 846 mergeIfCompatible(PhiClasses, Phi, OtherPhi); 847 } 848 } 849 } 850 851 for (const auto &AddedPhi : AddedPhis) { 852 BasicBlock *To = AddedPhi.first; 853 const BBVector &From = AddedPhi.second; 854 855 if (!DeletedPhis.count(To)) 856 continue; 857 858 PhiMap &Map = DeletedPhis[To]; 859 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 860 for (const auto &[Phi, Incoming] : Map) { 861 Value *Undef = UndefValue::get(Phi->getType()); 862 Updater.Initialize(Phi->getType(), ""); 863 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 864 Updater.AddAvailableValue(To, Undef); 865 866 // Use leader phi's incoming if there is. 867 auto LeaderIt = PhiClasses.findLeader(Phi); 868 bool UseIncomingOfLeader = 869 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi; 870 const auto &IncomingMap = 871 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt] 872 : Incoming; 873 874 SmallVector<BasicBlock *> ConstantPreds; 875 for (const auto &[BB, V] : IncomingMap) { 876 Updater.AddAvailableValue(BB, V); 877 if (isa<Constant>(V)) 878 ConstantPreds.push_back(BB); 879 } 880 881 for (auto UB : UndefBlks) { 882 // If this undef block is dominated by any predecessor(before 883 // structurization) of reconstructed PHI with constant incoming value, 884 // don't mark the available value as undefined. Setting undef to such 885 // block will stop us from getting optimal phi insertion. 886 if (any_of(ConstantPreds, 887 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 888 continue; 889 // Maybe already get a value through sharing with other phi nodes. 890 if (Updater.HasValueForBlock(UB)) 891 continue; 892 893 Updater.AddAvailableValue(UB, Undef); 894 } 895 896 for (BasicBlock *FI : From) 897 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 898 AffectedPhis.push_back(Phi); 899 } 900 } 901 902 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 903 } 904 905 void StructurizeCFG::simplifyAffectedPhis() { 906 bool Changed; 907 do { 908 Changed = false; 909 SimplifyQuery Q(Func->getDataLayout()); 910 Q.DT = DT; 911 // Setting CanUseUndef to true might extend value liveness, set it to false 912 // to achieve better register pressure. 913 Q.CanUseUndef = false; 914 for (WeakVH VH : AffectedPhis) { 915 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 916 if (auto NewValue = simplifyInstruction(Phi, Q)) { 917 Phi->replaceAllUsesWith(NewValue); 918 Phi->eraseFromParent(); 919 Changed = true; 920 } 921 } 922 } 923 } while (Changed); 924 } 925 926 /// Remove phi values from all successors and then remove the terminator. 927 void StructurizeCFG::killTerminator(BasicBlock *BB) { 928 Instruction *Term = BB->getTerminator(); 929 if (!Term) 930 return; 931 932 for (BasicBlock *Succ : successors(BB)) 933 delPhiValues(BB, Succ); 934 935 Term->eraseFromParent(); 936 } 937 938 /// Let node exit(s) point to NewExit 939 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 940 bool IncludeDominator) { 941 if (Node->isSubRegion()) { 942 Region *SubRegion = Node->getNodeAs<Region>(); 943 BasicBlock *OldExit = SubRegion->getExit(); 944 BasicBlock *Dominator = nullptr; 945 946 // Find all the edges from the sub region to the exit. 947 // We use make_early_inc_range here because we modify BB's terminator. 948 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 949 if (!SubRegion->contains(BB)) 950 continue; 951 952 // Modify the edges to point to the new exit 953 delPhiValues(BB, OldExit); 954 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 955 addPhiValues(BB, NewExit); 956 957 // Find the new dominator (if requested) 958 if (IncludeDominator) { 959 if (!Dominator) 960 Dominator = BB; 961 else 962 Dominator = DT->findNearestCommonDominator(Dominator, BB); 963 } 964 } 965 966 // Change the dominator (if requested) 967 if (Dominator) 968 DT->changeImmediateDominator(NewExit, Dominator); 969 970 // Update the region info 971 SubRegion->replaceExit(NewExit); 972 } else { 973 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 974 killTerminator(BB); 975 BranchInst *Br = BranchInst::Create(NewExit, BB); 976 Br->setDebugLoc(TermDL[BB]); 977 addPhiValues(BB, NewExit); 978 if (IncludeDominator) 979 DT->changeImmediateDominator(NewExit, BB); 980 } 981 } 982 983 /// Create a new flow node and update dominator tree and region info 984 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 985 LLVMContext &Context = Func->getContext(); 986 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 987 Order.back()->getEntry(); 988 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 989 Func, Insert); 990 FlowSet.insert(Flow); 991 992 // use a temporary variable to avoid a use-after-free if the map's storage is 993 // reallocated 994 DebugLoc DL = TermDL[Dominator]; 995 TermDL[Flow] = std::move(DL); 996 997 DT->addNewBlock(Flow, Dominator); 998 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 999 return Flow; 1000 } 1001 1002 /// Create a new or reuse the previous node as flow node 1003 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 1004 BasicBlock *Entry = PrevNode->getEntry(); 1005 1006 if (!PrevNode->isSubRegion()) { 1007 killTerminator(Entry); 1008 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 1009 return Entry; 1010 } 1011 1012 // create a new flow node 1013 BasicBlock *Flow = getNextFlow(Entry); 1014 1015 // and wire it up 1016 changeExit(PrevNode, Flow, true); 1017 PrevNode = ParentRegion->getBBNode(Flow); 1018 return Flow; 1019 } 1020 1021 /// Returns the region exit if possible, otherwise just a new flow node 1022 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 1023 bool ExitUseAllowed) { 1024 if (!Order.empty() || !ExitUseAllowed) 1025 return getNextFlow(Flow); 1026 1027 BasicBlock *Exit = ParentRegion->getExit(); 1028 DT->changeImmediateDominator(Exit, Flow); 1029 addPhiValues(Flow, Exit); 1030 return Exit; 1031 } 1032 1033 /// Set the previous node 1034 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 1035 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 1036 : nullptr; 1037 } 1038 1039 /// Does BB dominate all the predicates of Node? 1040 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 1041 BBPredicates &Preds = Predicates[Node->getEntry()]; 1042 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) { 1043 return DT->dominates(BB, Pred.first); 1044 }); 1045 } 1046 1047 /// Can we predict that this node will always be called? 1048 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 1049 BBPredicates &Preds = Predicates[Node->getEntry()]; 1050 bool Dominated = false; 1051 1052 // Regionentry is always true 1053 if (!PrevNode) 1054 return true; 1055 1056 for (auto [BB, PI] : Preds) { 1057 if (PI.Pred != BoolTrue) 1058 return false; 1059 1060 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 1061 Dominated = true; 1062 } 1063 1064 // TODO: The dominator check is too strict 1065 return Dominated; 1066 } 1067 1068 /// Take one node from the order vector and wire it up 1069 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 1070 BasicBlock *LoopEnd) { 1071 RegionNode *Node = Order.pop_back_val(); 1072 Visited.insert(Node->getEntry()); 1073 1074 if (isPredictableTrue(Node)) { 1075 // Just a linear flow 1076 if (PrevNode) { 1077 changeExit(PrevNode, Node->getEntry(), true); 1078 } 1079 PrevNode = Node; 1080 } else { 1081 // Insert extra prefix node (or reuse last one) 1082 BasicBlock *Flow = needPrefix(false); 1083 1084 // Insert extra postfix node (or use exit instead) 1085 BasicBlock *Entry = Node->getEntry(); 1086 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 1087 1088 // let it point to entry and next block 1089 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 1090 Br->setDebugLoc(TermDL[Flow]); 1091 Conditions.push_back(Br); 1092 addPhiValues(Flow, Entry); 1093 DT->changeImmediateDominator(Entry, Flow); 1094 1095 PrevNode = Node; 1096 while (!Order.empty() && !Visited.count(LoopEnd) && 1097 dominatesPredicates(Entry, Order.back())) { 1098 handleLoops(false, LoopEnd); 1099 } 1100 1101 changeExit(PrevNode, Next, false); 1102 setPrevNode(Next); 1103 } 1104 } 1105 1106 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 1107 BasicBlock *LoopEnd) { 1108 RegionNode *Node = Order.back(); 1109 BasicBlock *LoopStart = Node->getEntry(); 1110 1111 if (!Loops.count(LoopStart)) { 1112 wireFlow(ExitUseAllowed, LoopEnd); 1113 return; 1114 } 1115 1116 if (!isPredictableTrue(Node)) 1117 LoopStart = needPrefix(true); 1118 1119 LoopEnd = Loops[Node->getEntry()]; 1120 wireFlow(false, LoopEnd); 1121 while (!Visited.count(LoopEnd)) { 1122 handleLoops(false, LoopEnd); 1123 } 1124 1125 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 1126 1127 // Create an extra loop end node 1128 LoopEnd = needPrefix(false); 1129 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 1130 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 1131 Br->setDebugLoc(TermDL[LoopEnd]); 1132 LoopConds.push_back(Br); 1133 addPhiValues(LoopEnd, LoopStart); 1134 setPrevNode(Next); 1135 } 1136 1137 /// After this function control flow looks like it should be, but 1138 /// branches and PHI nodes only have undefined conditions. 1139 void StructurizeCFG::createFlow() { 1140 BasicBlock *Exit = ParentRegion->getExit(); 1141 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1142 1143 AffectedPhis.clear(); 1144 DeletedPhis.clear(); 1145 AddedPhis.clear(); 1146 Conditions.clear(); 1147 LoopConds.clear(); 1148 1149 PrevNode = nullptr; 1150 Visited.clear(); 1151 1152 while (!Order.empty()) { 1153 handleLoops(EntryDominatesExit, nullptr); 1154 } 1155 1156 if (PrevNode) 1157 changeExit(PrevNode, Exit, EntryDominatesExit); 1158 else 1159 assert(EntryDominatesExit); 1160 } 1161 1162 /// Handle a rare case where the disintegrated nodes instructions 1163 /// no longer dominate all their uses. Not sure if this is really necessary 1164 void StructurizeCFG::rebuildSSA() { 1165 SSAUpdater Updater; 1166 for (BasicBlock *BB : ParentRegion->blocks()) 1167 for (Instruction &I : *BB) { 1168 bool Initialized = false; 1169 // We may modify the use list as we iterate over it, so we use 1170 // make_early_inc_range. 1171 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1172 Instruction *User = cast<Instruction>(U.getUser()); 1173 if (User->getParent() == BB) { 1174 continue; 1175 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1176 if (UserPN->getIncomingBlock(U) == BB) 1177 continue; 1178 } 1179 1180 if (DT->dominates(&I, User)) 1181 continue; 1182 1183 if (!Initialized) { 1184 Value *Undef = UndefValue::get(I.getType()); 1185 Updater.Initialize(I.getType(), ""); 1186 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1187 Updater.AddAvailableValue(BB, &I); 1188 Initialized = true; 1189 } 1190 Updater.RewriteUseAfterInsertions(U); 1191 } 1192 } 1193 } 1194 1195 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1196 const UniformityInfo &UA) { 1197 // Bool for if all sub-regions are uniform. 1198 bool SubRegionsAreUniform = true; 1199 // Count of how many direct children are conditional. 1200 unsigned ConditionalDirectChildren = 0; 1201 1202 for (auto *E : R->elements()) { 1203 if (!E->isSubRegion()) { 1204 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1205 if (!Br || !Br->isConditional()) 1206 continue; 1207 1208 if (!UA.isUniform(Br)) 1209 return false; 1210 1211 // One of our direct children is conditional. 1212 ConditionalDirectChildren++; 1213 1214 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1215 << " has uniform terminator\n"); 1216 } else { 1217 // Explicitly refuse to treat regions as uniform if they have non-uniform 1218 // subregions. We cannot rely on UniformityAnalysis for branches in 1219 // subregions because those branches may have been removed and re-created, 1220 // so we look for our metadata instead. 1221 // 1222 // Warning: It would be nice to treat regions as uniform based only on 1223 // their direct child basic blocks' terminators, regardless of whether 1224 // subregions are uniform or not. However, this requires a very careful 1225 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1226 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1227 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1228 if (!Br || !Br->isConditional()) 1229 continue; 1230 1231 if (!Br->getMetadata(UniformMDKindID)) { 1232 // Early exit if we cannot have relaxed uniform regions. 1233 if (!RelaxedUniformRegions) 1234 return false; 1235 1236 SubRegionsAreUniform = false; 1237 break; 1238 } 1239 } 1240 } 1241 } 1242 1243 // Our region is uniform if: 1244 // 1. All conditional branches that are direct children are uniform (checked 1245 // above). 1246 // 2. And either: 1247 // a. All sub-regions are uniform. 1248 // b. There is one or less conditional branches among the direct children. 1249 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1250 } 1251 1252 void StructurizeCFG::init(Region *R) { 1253 LLVMContext &Context = R->getEntry()->getContext(); 1254 1255 Boolean = Type::getInt1Ty(Context); 1256 BoolTrue = ConstantInt::getTrue(Context); 1257 BoolFalse = ConstantInt::getFalse(Context); 1258 BoolPoison = PoisonValue::get(Boolean); 1259 1260 this->UA = nullptr; 1261 } 1262 1263 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1264 if (R->isTopLevelRegion()) 1265 return false; 1266 1267 this->UA = &UA; 1268 1269 // TODO: We could probably be smarter here with how we handle sub-regions. 1270 // We currently rely on the fact that metadata is set by earlier invocations 1271 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1272 // but we shouldn't rely on metadata for correctness! 1273 unsigned UniformMDKindID = 1274 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1275 1276 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1277 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1278 << '\n'); 1279 1280 // Mark all direct child block terminators as having been treated as 1281 // uniform. To account for a possible future in which non-uniform 1282 // sub-regions are treated more cleverly, indirect children are not 1283 // marked as uniform. 1284 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1285 for (RegionNode *E : R->elements()) { 1286 if (E->isSubRegion()) 1287 continue; 1288 1289 if (Instruction *Term = E->getEntry()->getTerminator()) 1290 Term->setMetadata(UniformMDKindID, MD); 1291 } 1292 1293 return true; 1294 } 1295 return false; 1296 } 1297 1298 /// Run the transformation for each region found 1299 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1300 if (R->isTopLevelRegion()) 1301 return false; 1302 1303 this->DT = DT; 1304 1305 Func = R->getEntry()->getParent(); 1306 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); 1307 1308 ParentRegion = R; 1309 1310 orderNodes(); 1311 collectInfos(); 1312 createFlow(); 1313 insertConditions(false); 1314 insertConditions(true); 1315 setPhiValues(); 1316 simplifyConditions(); 1317 simplifyAffectedPhis(); 1318 rebuildSSA(); 1319 1320 // Cleanup 1321 Order.clear(); 1322 Visited.clear(); 1323 DeletedPhis.clear(); 1324 AddedPhis.clear(); 1325 Predicates.clear(); 1326 Conditions.clear(); 1327 Loops.clear(); 1328 LoopPreds.clear(); 1329 LoopConds.clear(); 1330 FlowSet.clear(); 1331 TermDL.clear(); 1332 1333 return true; 1334 } 1335 1336 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1337 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1338 } 1339 1340 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1341 Regions.push_back(&R); 1342 for (const auto &E : R) 1343 addRegionIntoQueue(*E, Regions); 1344 } 1345 1346 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) 1347 : SkipUniformRegions(SkipUniformRegions_) { 1348 if (ForceSkipUniformRegions.getNumOccurrences()) 1349 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 1350 } 1351 1352 void StructurizeCFGPass::printPipeline( 1353 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1354 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline( 1355 OS, MapClassName2PassName); 1356 if (SkipUniformRegions) 1357 OS << "<skip-uniform-regions>"; 1358 } 1359 1360 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1361 FunctionAnalysisManager &AM) { 1362 1363 bool Changed = false; 1364 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1365 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1366 1367 UniformityInfo *UI = nullptr; 1368 if (SkipUniformRegions) 1369 UI = &AM.getResult<UniformityInfoAnalysis>(F); 1370 1371 std::vector<Region *> Regions; 1372 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1373 while (!Regions.empty()) { 1374 Region *R = Regions.back(); 1375 Regions.pop_back(); 1376 1377 StructurizeCFG SCFG; 1378 SCFG.init(R); 1379 1380 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { 1381 Changed = true; // May have added metadata. 1382 continue; 1383 } 1384 1385 Changed |= SCFG.run(R, DT); 1386 } 1387 if (!Changed) 1388 return PreservedAnalyses::all(); 1389 PreservedAnalyses PA; 1390 PA.preserve<DominatorTreeAnalysis>(); 1391 return PA; 1392 } 1393