1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/EquivalenceClasses.h" 12 #include "llvm/ADT/MapVector.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SmallPtrSet.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/RegionInfo.h" 20 #include "llvm/Analysis/RegionIterator.h" 21 #include "llvm/Analysis/RegionPass.h" 22 #include "llvm/Analysis/UniformityAnalysis.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ProfDataUtils.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Use.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/IR/ValueHandle.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 48 #include "llvm/Transforms/Utils/Local.h" 49 #include "llvm/Transforms/Utils/SSAUpdater.h" 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 const char FlowBlockName[] = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>; 91 92 class CondBranchWeights { 93 uint32_t TrueWeight; 94 uint32_t FalseWeight; 95 96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} 97 98 public: 99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) { 100 assert(Br.isConditional()); 101 102 uint64_t T, F; 103 if (!extractBranchWeights(Br, T, F)) 104 return std::nullopt; 105 106 return CondBranchWeights(T, F); 107 } 108 109 static void setMetadata(BranchInst &Br, 110 const MaybeCondBranchWeights &Weights) { 111 assert(Br.isConditional()); 112 if (!Weights) 113 return; 114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; 115 setBranchWeights(Br, Arr, false); 116 } 117 118 CondBranchWeights invert() const { 119 return CondBranchWeights{FalseWeight, TrueWeight}; 120 } 121 }; 122 123 struct PredInfo { 124 Value *Pred; 125 MaybeCondBranchWeights Weights; 126 }; 127 128 using BBPredicates = DenseMap<BasicBlock *, PredInfo>; 129 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 130 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 131 132 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; 133 134 // A traits type that is intended to be used in graph algorithms. The graph 135 // traits starts at an entry node, and traverses the RegionNodes that are in 136 // the Nodes set. 137 struct SubGraphTraits { 138 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 139 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 140 141 // This wraps a set of Nodes into the iterator, so we know which edges to 142 // filter out. 143 class WrappedSuccIterator 144 : public iterator_adaptor_base< 145 WrappedSuccIterator, BaseSuccIterator, 146 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 147 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 148 SmallDenseSet<RegionNode *> *Nodes; 149 150 public: 151 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 152 : iterator_adaptor_base(It), Nodes(Nodes) {} 153 154 NodeRef operator*() const { return {*I, Nodes}; } 155 }; 156 157 static bool filterAll(const NodeRef &N) { return true; } 158 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 159 160 using ChildIteratorType = 161 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 162 163 static NodeRef getEntryNode(Region *R) { 164 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 165 } 166 167 static NodeRef getEntryNode(NodeRef N) { return N; } 168 169 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 170 auto *filter = N.second ? &filterSet : &filterAll; 171 return make_filter_range( 172 make_range<WrappedSuccIterator>( 173 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 174 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 175 filter); 176 } 177 178 static ChildIteratorType child_begin(const NodeRef &N) { 179 return children(N).begin(); 180 } 181 182 static ChildIteratorType child_end(const NodeRef &N) { 183 return children(N).end(); 184 } 185 }; 186 187 /// Finds the nearest common dominator of a set of BasicBlocks. 188 /// 189 /// For every BB you add to the set, you can specify whether we "remember" the 190 /// block. When you get the common dominator, you can also ask whether it's one 191 /// of the blocks we remembered. 192 class NearestCommonDominator { 193 DominatorTree *DT; 194 BasicBlock *Result = nullptr; 195 bool ResultIsRemembered = false; 196 197 /// Add BB to the resulting dominator. 198 void addBlock(BasicBlock *BB, bool Remember) { 199 if (!Result) { 200 Result = BB; 201 ResultIsRemembered = Remember; 202 return; 203 } 204 205 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 206 if (NewResult != Result) 207 ResultIsRemembered = false; 208 if (NewResult == BB) 209 ResultIsRemembered |= Remember; 210 Result = NewResult; 211 } 212 213 public: 214 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 215 216 void addBlock(BasicBlock *BB) { 217 addBlock(BB, /* Remember = */ false); 218 } 219 220 void addAndRememberBlock(BasicBlock *BB) { 221 addBlock(BB, /* Remember = */ true); 222 } 223 224 /// Get the nearest common dominator of all the BBs added via addBlock() and 225 /// addAndRememberBlock(). 226 BasicBlock *result() { return Result; } 227 228 /// Is the BB returned by getResult() one of the blocks we added to the set 229 /// with addAndRememberBlock()? 230 bool resultIsRememberedBlock() { return ResultIsRemembered; } 231 }; 232 233 /// Transforms the control flow graph on one single entry/exit region 234 /// at a time. 235 /// 236 /// After the transform all "If"/"Then"/"Else" style control flow looks like 237 /// this: 238 /// 239 /// \verbatim 240 /// 1 241 /// || 242 /// | | 243 /// 2 | 244 /// | / 245 /// |/ 246 /// 3 247 /// || Where: 248 /// | | 1 = "If" block, calculates the condition 249 /// 4 | 2 = "Then" subregion, runs if the condition is true 250 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 251 /// |/ 4 = "Else" optional subregion, runs if the condition is false 252 /// 5 5 = "End" block, also rejoins the control flow 253 /// \endverbatim 254 /// 255 /// Control flow is expressed as a branch where the true exit goes into the 256 /// "Then"/"Else" region, while the false exit skips the region 257 /// The condition for the optional "Else" region is expressed as a PHI node. 258 /// The incoming values of the PHI node are true for the "If" edge and false 259 /// for the "Then" edge. 260 /// 261 /// Additionally to that even complicated loops look like this: 262 /// 263 /// \verbatim 264 /// 1 265 /// || 266 /// | | 267 /// 2 ^ Where: 268 /// | / 1 = "Entry" block 269 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 270 /// 3 3 = "Flow" block, with back edge to entry block 271 /// | 272 /// \endverbatim 273 /// 274 /// The back edge of the "Flow" block is always on the false side of the branch 275 /// while the true side continues the general flow. So the loop condition 276 /// consist of a network of PHI nodes where the true incoming values expresses 277 /// breaks and the false values expresses continue states. 278 279 class StructurizeCFG { 280 Type *Boolean; 281 ConstantInt *BoolTrue; 282 ConstantInt *BoolFalse; 283 Value *BoolPoison; 284 285 Function *Func; 286 Region *ParentRegion; 287 288 UniformityInfo *UA = nullptr; 289 DominatorTree *DT; 290 291 SmallVector<RegionNode *, 8> Order; 292 BBSet Visited; 293 BBSet FlowSet; 294 295 SmallVector<WeakVH, 8> AffectedPhis; 296 BBPhiMap DeletedPhis; 297 BB2BBVecMap AddedPhis; 298 299 PredMap Predicates; 300 BranchVector Conditions; 301 302 BB2BBMap Loops; 303 PredMap LoopPreds; 304 BranchVector LoopConds; 305 306 BranchDebugLocMap TermDL; 307 308 RegionNode *PrevNode; 309 310 void orderNodes(); 311 312 void analyzeLoops(RegionNode *N); 313 314 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 315 316 void gatherPredicates(RegionNode *N); 317 318 void collectInfos(); 319 320 void insertConditions(bool Loops); 321 322 void simplifyConditions(); 323 324 void delPhiValues(BasicBlock *From, BasicBlock *To); 325 326 void addPhiValues(BasicBlock *From, BasicBlock *To); 327 328 void findUndefBlocks(BasicBlock *PHIBlock, 329 const SmallSet<BasicBlock *, 8> &Incomings, 330 SmallVector<BasicBlock *> &UndefBlks) const; 331 332 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, 333 PHINode *B); 334 335 void setPhiValues(); 336 337 void simplifyAffectedPhis(); 338 339 void killTerminator(BasicBlock *BB); 340 341 void changeExit(RegionNode *Node, BasicBlock *NewExit, 342 bool IncludeDominator); 343 344 BasicBlock *getNextFlow(BasicBlock *Dominator); 345 346 BasicBlock *needPrefix(bool NeedEmpty); 347 348 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 349 350 void setPrevNode(BasicBlock *BB); 351 352 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 353 354 bool isPredictableTrue(RegionNode *Node); 355 356 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 357 358 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 359 360 void createFlow(); 361 362 void rebuildSSA(); 363 364 public: 365 void init(Region *R); 366 bool run(Region *R, DominatorTree *DT); 367 bool makeUniformRegion(Region *R, UniformityInfo &UA); 368 }; 369 370 class StructurizeCFGLegacyPass : public RegionPass { 371 bool SkipUniformRegions; 372 373 public: 374 static char ID; 375 376 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 377 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 378 if (ForceSkipUniformRegions.getNumOccurrences()) 379 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 380 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 381 } 382 383 bool runOnRegion(Region *R, RGPassManager &RGM) override { 384 StructurizeCFG SCFG; 385 SCFG.init(R); 386 if (SkipUniformRegions) { 387 UniformityInfo &UA = 388 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 389 if (SCFG.makeUniformRegion(R, UA)) 390 return false; 391 } 392 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 393 return SCFG.run(R, DT); 394 } 395 396 StringRef getPassName() const override { return "Structurize control flow"; } 397 398 void getAnalysisUsage(AnalysisUsage &AU) const override { 399 if (SkipUniformRegions) 400 AU.addRequired<UniformityInfoWrapperPass>(); 401 AU.addRequired<DominatorTreeWrapperPass>(); 402 403 AU.addPreserved<DominatorTreeWrapperPass>(); 404 RegionPass::getAnalysisUsage(AU); 405 } 406 }; 407 408 } // end anonymous namespace 409 410 char StructurizeCFGLegacyPass::ID = 0; 411 412 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 413 "Structurize the CFG", false, false) 414 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 415 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 416 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 417 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 418 "Structurize the CFG", false, false) 419 420 /// Build up the general order of nodes, by performing a topological sort of the 421 /// parent region's nodes, while ensuring that there is no outer cycle node 422 /// between any two inner cycle nodes. 423 void StructurizeCFG::orderNodes() { 424 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 425 GraphTraits<Region *>::nodes_end(ParentRegion))); 426 if (Order.empty()) 427 return; 428 429 SmallDenseSet<RegionNode *> Nodes; 430 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 431 432 // A list of range indices of SCCs in Order, to be processed. 433 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 434 unsigned I = 0, E = Order.size(); 435 while (true) { 436 // Run through all the SCCs in the subgraph starting with Entry. 437 for (auto SCCI = 438 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 439 EntryNode); 440 !SCCI.isAtEnd(); ++SCCI) { 441 auto &SCC = *SCCI; 442 443 // An SCC up to the size of 2, can be reduced to an entry (the last node), 444 // and a possible additional node. Therefore, it is already in order, and 445 // there is no need to add it to the work-list. 446 unsigned Size = SCC.size(); 447 if (Size > 2) 448 WorkList.emplace_back(I, I + Size); 449 450 // Add the SCC nodes to the Order array. 451 for (const auto &N : SCC) { 452 assert(I < E && "SCC size mismatch!"); 453 Order[I++] = N.first; 454 } 455 } 456 assert(I == E && "SCC size mismatch!"); 457 458 // If there are no more SCCs to order, then we are done. 459 if (WorkList.empty()) 460 break; 461 462 std::tie(I, E) = WorkList.pop_back_val(); 463 464 // Collect the set of nodes in the SCC's subgraph. These are only the 465 // possible child nodes; we do not add the entry (last node) otherwise we 466 // will have the same exact SCC all over again. 467 Nodes.clear(); 468 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 469 470 // Update the entry node. 471 EntryNode.first = Order[E - 1]; 472 EntryNode.second = &Nodes; 473 } 474 } 475 476 /// Determine the end of the loops 477 void StructurizeCFG::analyzeLoops(RegionNode *N) { 478 if (N->isSubRegion()) { 479 // Test for exit as back edge 480 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 481 if (Visited.count(Exit)) 482 Loops[Exit] = N->getEntry(); 483 484 } else { 485 // Test for successors as back edge 486 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 487 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 488 489 for (BasicBlock *Succ : Term->successors()) 490 if (Visited.count(Succ)) 491 Loops[Succ] = BB; 492 } 493 } 494 495 /// Build the condition for one edge 496 PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 497 bool Invert) { 498 Value *Cond = Invert ? BoolFalse : BoolTrue; 499 MaybeCondBranchWeights Weights; 500 501 if (Term->isConditional()) { 502 Cond = Term->getCondition(); 503 Weights = CondBranchWeights::tryParse(*Term); 504 505 if (Idx != (unsigned)Invert) { 506 Cond = invertCondition(Cond); 507 if (Weights) 508 Weights = Weights->invert(); 509 } 510 } 511 return {Cond, Weights}; 512 } 513 514 /// Analyze the predecessors of each block and build up predicates 515 void StructurizeCFG::gatherPredicates(RegionNode *N) { 516 RegionInfo *RI = ParentRegion->getRegionInfo(); 517 BasicBlock *BB = N->getEntry(); 518 BBPredicates &Pred = Predicates[BB]; 519 BBPredicates &LPred = LoopPreds[BB]; 520 521 for (BasicBlock *P : predecessors(BB)) { 522 // Ignore it if it's a branch from outside into our region entry 523 if (!ParentRegion->contains(P)) 524 continue; 525 526 Region *R = RI->getRegionFor(P); 527 if (R == ParentRegion) { 528 // It's a top level block in our region 529 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 530 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 531 BasicBlock *Succ = Term->getSuccessor(i); 532 if (Succ != BB) 533 continue; 534 535 if (Visited.count(P)) { 536 // Normal forward edge 537 if (Term->isConditional()) { 538 // Try to treat it like an ELSE block 539 BasicBlock *Other = Term->getSuccessor(!i); 540 if (Visited.count(Other) && !Loops.count(Other) && 541 !Pred.count(Other) && !Pred.count(P)) { 542 543 Pred[Other] = {BoolFalse, std::nullopt}; 544 Pred[P] = {BoolTrue, std::nullopt}; 545 continue; 546 } 547 } 548 Pred[P] = buildCondition(Term, i, false); 549 } else { 550 // Back edge 551 LPred[P] = buildCondition(Term, i, true); 552 } 553 } 554 } else { 555 // It's an exit from a sub region 556 while (R->getParent() != ParentRegion) 557 R = R->getParent(); 558 559 // Edge from inside a subregion to its entry, ignore it 560 if (*R == *N) 561 continue; 562 563 BasicBlock *Entry = R->getEntry(); 564 if (Visited.count(Entry)) 565 Pred[Entry] = {BoolTrue, std::nullopt}; 566 else 567 LPred[Entry] = {BoolFalse, std::nullopt}; 568 } 569 } 570 } 571 572 /// Collect various loop and predicate infos 573 void StructurizeCFG::collectInfos() { 574 // Reset predicate 575 Predicates.clear(); 576 577 // and loop infos 578 Loops.clear(); 579 LoopPreds.clear(); 580 581 // Reset the visited nodes 582 Visited.clear(); 583 584 for (RegionNode *RN : reverse(Order)) { 585 LLVM_DEBUG(dbgs() << "Visiting: " 586 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 587 << RN->getEntry()->getName() << "\n"); 588 589 // Analyze all the conditions leading to a node 590 gatherPredicates(RN); 591 592 // Remember that we've seen this node 593 Visited.insert(RN->getEntry()); 594 595 // Find the last back edges 596 analyzeLoops(RN); 597 } 598 599 // Reset the collected term debug locations 600 TermDL.clear(); 601 602 for (BasicBlock &BB : *Func) { 603 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) 604 TermDL[&BB] = DL; 605 } 606 } 607 608 /// Insert the missing branch conditions 609 void StructurizeCFG::insertConditions(bool Loops) { 610 BranchVector &Conds = Loops ? LoopConds : Conditions; 611 Value *Default = Loops ? BoolTrue : BoolFalse; 612 SSAUpdater PhiInserter; 613 614 for (BranchInst *Term : Conds) { 615 assert(Term->isConditional()); 616 617 BasicBlock *Parent = Term->getParent(); 618 BasicBlock *SuccTrue = Term->getSuccessor(0); 619 BasicBlock *SuccFalse = Term->getSuccessor(1); 620 621 PhiInserter.Initialize(Boolean, ""); 622 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 623 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 624 625 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 626 627 NearestCommonDominator Dominator(DT); 628 Dominator.addBlock(Parent); 629 630 PredInfo ParentInfo{nullptr, std::nullopt}; 631 for (auto [BB, PI] : Preds) { 632 if (BB == Parent) { 633 ParentInfo = PI; 634 break; 635 } 636 PhiInserter.AddAvailableValue(BB, PI.Pred); 637 Dominator.addAndRememberBlock(BB); 638 } 639 640 if (ParentInfo.Pred) { 641 Term->setCondition(ParentInfo.Pred); 642 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); 643 } else { 644 if (!Dominator.resultIsRememberedBlock()) 645 PhiInserter.AddAvailableValue(Dominator.result(), Default); 646 647 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 648 } 649 } 650 } 651 652 /// Simplify any inverted conditions that were built by buildConditions. 653 void StructurizeCFG::simplifyConditions() { 654 SmallVector<Instruction *> InstToErase; 655 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 656 auto &Preds = I.second; 657 for (auto [BB, PI] : Preds) { 658 Instruction *Inverted; 659 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) && 660 !PI.Pred->use_empty()) { 661 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 662 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 663 PI.Pred->replaceAllUsesWith(InvertedCmp); 664 InstToErase.push_back(cast<Instruction>(PI.Pred)); 665 } 666 } 667 } 668 } 669 for (auto *I : InstToErase) 670 I->eraseFromParent(); 671 } 672 673 /// Remove all PHI values coming from "From" into "To" and remember 674 /// them in DeletedPhis 675 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 676 PhiMap &Map = DeletedPhis[To]; 677 for (PHINode &Phi : To->phis()) { 678 bool Recorded = false; 679 while (Phi.getBasicBlockIndex(From) != -1) { 680 Value *Deleted = Phi.removeIncomingValue(From, false); 681 Map[&Phi].push_back(std::make_pair(From, Deleted)); 682 if (!Recorded) { 683 AffectedPhis.push_back(&Phi); 684 Recorded = true; 685 } 686 } 687 } 688 } 689 690 /// Add a dummy PHI value as soon as we knew the new predecessor 691 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 692 for (PHINode &Phi : To->phis()) { 693 Value *Undef = UndefValue::get(Phi.getType()); 694 Phi.addIncoming(Undef, From); 695 } 696 AddedPhis[To].push_back(From); 697 } 698 699 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 700 /// from predecessors \p Incomings, we have a chance to mark the available value 701 /// from some blocks as undefined. The function will find out all such blocks 702 /// and return in \p UndefBlks. 703 void StructurizeCFG::findUndefBlocks( 704 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 705 SmallVector<BasicBlock *> &UndefBlks) const { 706 // We may get a post-structured CFG like below: 707 // 708 // | P1 709 // |/ 710 // F1 711 // |\ 712 // | N 713 // |/ 714 // F2 715 // |\ 716 // | P2 717 // |/ 718 // F3 719 // |\ 720 // B 721 // 722 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 723 // of B before structurization. F1/F2/F3 are flow blocks inserted during 724 // structurization process. Block N is not a predecessor of B before 725 // structurization, but are placed between the predecessors(P1/P2) of B after 726 // structurization. This usually means that threads went to N never take the 727 // path N->F2->F3->B. For example, the threads take the branch F1->N may 728 // always take the branch F2->P2. So, when we are reconstructing a PHI 729 // originally in B, we can safely say the incoming value from N is undefined. 730 SmallSet<BasicBlock *, 8> VisitedBlock; 731 SmallVector<BasicBlock *, 8> Stack; 732 if (PHIBlock == ParentRegion->getExit()) { 733 for (auto P : predecessors(PHIBlock)) { 734 if (ParentRegion->contains(P)) 735 Stack.push_back(P); 736 } 737 } else { 738 append_range(Stack, predecessors(PHIBlock)); 739 } 740 741 // Do a backward traversal over the CFG, and stop further searching if 742 // the block is not a Flow. If a block is neither flow block nor the 743 // incoming predecessor, then the incoming value from the block is 744 // undefined value for the PHI being reconstructed. 745 while (!Stack.empty()) { 746 BasicBlock *Current = Stack.pop_back_val(); 747 if (!VisitedBlock.insert(Current).second) 748 continue; 749 750 if (FlowSet.contains(Current)) { 751 for (auto P : predecessors(Current)) 752 Stack.push_back(P); 753 } else if (!Incomings.contains(Current)) { 754 UndefBlks.push_back(Current); 755 } 756 } 757 } 758 759 // If two phi nodes have compatible incoming values (for each 760 // incoming block, either they have the same incoming value or only one phi 761 // node has an incoming value), let them share the merged incoming values. The 762 // merge process is guided by the equivalence information from \p PhiClasses. 763 // The function will possibly update the incoming values of leader phi in 764 // DeletedPhis. 765 void StructurizeCFG::mergeIfCompatible( 766 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) { 767 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A)); 768 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B)); 769 // They are already in the same class, no work needed. 770 if (ItA == ItB) 771 return; 772 773 PHINode *LeaderA = *ItA; 774 PHINode *LeaderB = *ItB; 775 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA]; 776 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB]; 777 778 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end()); 779 for (auto [BB, V] : IncomingB) { 780 auto BBIt = Mergeable.find(BB); 781 if (BBIt != Mergeable.end() && BBIt->second != V) 782 return; 783 // Either IncomingA does not have this value or IncomingA has the same 784 // value. 785 Mergeable.insert({BB, V}); 786 } 787 788 // Update the incoming value of leaderA. 789 IncomingA.assign(Mergeable.begin(), Mergeable.end()); 790 PhiClasses.unionSets(ItA, ItB); 791 } 792 793 /// Add the real PHI value as soon as everything is set up 794 void StructurizeCFG::setPhiValues() { 795 SmallVector<PHINode *, 8> InsertedPhis; 796 SSAUpdater Updater(&InsertedPhis); 797 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap; 798 799 // Find phi nodes that have compatible incoming values (either they have 800 // the same value for the same block or only one phi node has an incoming 801 // value, see example below). We only search again the phi's that are 802 // referenced by another phi, which is the case we care about. 803 // 804 // For example (-- means no incoming value): 805 // phi1 : BB1:phi2 BB2:v BB3:-- 806 // phi2: BB1:-- BB2:v BB3:w 807 // 808 // Then we can merge these incoming values and let phi1, phi2 use the 809 // same set of incoming values: 810 // 811 // phi1&phi2: BB1:phi2 BB2:v BB3:w 812 // 813 // By doing this, phi1 and phi2 would share more intermediate phi nodes. 814 // This would help reduce the number of phi nodes during SSA reconstruction 815 // and ultimately result in fewer COPY instructions. 816 // 817 // This should be correct, because if a phi node does not have incoming 818 // value from certain block, this means the block is not the predecessor 819 // of the parent block, so we actually don't care about its incoming value. 820 EquivalenceClasses<PHINode *> PhiClasses; 821 for (const auto &[To, From] : AddedPhis) { 822 auto OldPhiIt = DeletedPhis.find(To); 823 if (OldPhiIt == DeletedPhis.end()) 824 continue; 825 826 PhiMap &BlkPhis = OldPhiIt->second; 827 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 828 SmallSet<BasicBlock *, 8> Incomings; 829 830 // Get the undefined blocks shared by all the phi nodes. 831 if (!BlkPhis.empty()) { 832 for (const auto &VI : BlkPhis.front().second) 833 Incomings.insert(VI.first); 834 findUndefBlocks(To, Incomings, UndefBlks); 835 } 836 837 for (const auto &[Phi, Incomings] : OldPhiIt->second) { 838 SmallVector<PHINode *> IncomingPHIs; 839 for (const auto &[BB, V] : Incomings) { 840 // First, for each phi, check whether it has incoming value which is 841 // another phi. 842 if (PHINode *P = dyn_cast<PHINode>(V)) 843 IncomingPHIs.push_back(P); 844 } 845 846 for (auto *OtherPhi : IncomingPHIs) { 847 // Skip phis that are unrelated to the phi reconstruction for now. 848 if (!DeletedPhis.contains(OtherPhi->getParent())) 849 continue; 850 mergeIfCompatible(PhiClasses, Phi, OtherPhi); 851 } 852 } 853 } 854 855 for (const auto &AddedPhi : AddedPhis) { 856 BasicBlock *To = AddedPhi.first; 857 const BBVector &From = AddedPhi.second; 858 859 if (!DeletedPhis.count(To)) 860 continue; 861 862 PhiMap &Map = DeletedPhis[To]; 863 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 864 for (const auto &[Phi, Incoming] : Map) { 865 Value *Undef = UndefValue::get(Phi->getType()); 866 Updater.Initialize(Phi->getType(), ""); 867 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 868 Updater.AddAvailableValue(To, Undef); 869 870 // Use leader phi's incoming if there is. 871 auto LeaderIt = PhiClasses.findLeader(Phi); 872 bool UseIncomingOfLeader = 873 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi; 874 const auto &IncomingMap = 875 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt] 876 : Incoming; 877 878 SmallVector<BasicBlock *> ConstantPreds; 879 for (const auto &[BB, V] : IncomingMap) { 880 Updater.AddAvailableValue(BB, V); 881 if (isa<Constant>(V)) 882 ConstantPreds.push_back(BB); 883 } 884 885 for (auto UB : UndefBlks) { 886 // If this undef block is dominated by any predecessor(before 887 // structurization) of reconstructed PHI with constant incoming value, 888 // don't mark the available value as undefined. Setting undef to such 889 // block will stop us from getting optimal phi insertion. 890 if (any_of(ConstantPreds, 891 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 892 continue; 893 // Maybe already get a value through sharing with other phi nodes. 894 if (Updater.HasValueForBlock(UB)) 895 continue; 896 897 Updater.AddAvailableValue(UB, Undef); 898 } 899 900 for (BasicBlock *FI : From) 901 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 902 AffectedPhis.push_back(Phi); 903 } 904 } 905 906 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 907 } 908 909 void StructurizeCFG::simplifyAffectedPhis() { 910 bool Changed; 911 do { 912 Changed = false; 913 SimplifyQuery Q(Func->getDataLayout()); 914 Q.DT = DT; 915 // Setting CanUseUndef to true might extend value liveness, set it to false 916 // to achieve better register pressure. 917 Q.CanUseUndef = false; 918 for (WeakVH VH : AffectedPhis) { 919 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 920 if (auto NewValue = simplifyInstruction(Phi, Q)) { 921 Phi->replaceAllUsesWith(NewValue); 922 Phi->eraseFromParent(); 923 Changed = true; 924 } 925 } 926 } 927 } while (Changed); 928 } 929 930 /// Remove phi values from all successors and then remove the terminator. 931 void StructurizeCFG::killTerminator(BasicBlock *BB) { 932 Instruction *Term = BB->getTerminator(); 933 if (!Term) 934 return; 935 936 for (BasicBlock *Succ : successors(BB)) 937 delPhiValues(BB, Succ); 938 939 Term->eraseFromParent(); 940 } 941 942 /// Let node exit(s) point to NewExit 943 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 944 bool IncludeDominator) { 945 if (Node->isSubRegion()) { 946 Region *SubRegion = Node->getNodeAs<Region>(); 947 BasicBlock *OldExit = SubRegion->getExit(); 948 BasicBlock *Dominator = nullptr; 949 950 // Find all the edges from the sub region to the exit. 951 // We use make_early_inc_range here because we modify BB's terminator. 952 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 953 if (!SubRegion->contains(BB)) 954 continue; 955 956 // Modify the edges to point to the new exit 957 delPhiValues(BB, OldExit); 958 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 959 addPhiValues(BB, NewExit); 960 961 // Find the new dominator (if requested) 962 if (IncludeDominator) { 963 if (!Dominator) 964 Dominator = BB; 965 else 966 Dominator = DT->findNearestCommonDominator(Dominator, BB); 967 } 968 } 969 970 // Change the dominator (if requested) 971 if (Dominator) 972 DT->changeImmediateDominator(NewExit, Dominator); 973 974 // Update the region info 975 SubRegion->replaceExit(NewExit); 976 } else { 977 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 978 killTerminator(BB); 979 BranchInst *Br = BranchInst::Create(NewExit, BB); 980 Br->setDebugLoc(TermDL[BB]); 981 addPhiValues(BB, NewExit); 982 if (IncludeDominator) 983 DT->changeImmediateDominator(NewExit, BB); 984 } 985 } 986 987 /// Create a new flow node and update dominator tree and region info 988 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 989 LLVMContext &Context = Func->getContext(); 990 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 991 Order.back()->getEntry(); 992 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 993 Func, Insert); 994 FlowSet.insert(Flow); 995 996 // use a temporary variable to avoid a use-after-free if the map's storage is 997 // reallocated 998 DebugLoc DL = TermDL[Dominator]; 999 TermDL[Flow] = std::move(DL); 1000 1001 DT->addNewBlock(Flow, Dominator); 1002 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 1003 return Flow; 1004 } 1005 1006 /// Create a new or reuse the previous node as flow node 1007 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 1008 BasicBlock *Entry = PrevNode->getEntry(); 1009 1010 if (!PrevNode->isSubRegion()) { 1011 killTerminator(Entry); 1012 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 1013 return Entry; 1014 } 1015 1016 // create a new flow node 1017 BasicBlock *Flow = getNextFlow(Entry); 1018 1019 // and wire it up 1020 changeExit(PrevNode, Flow, true); 1021 PrevNode = ParentRegion->getBBNode(Flow); 1022 return Flow; 1023 } 1024 1025 /// Returns the region exit if possible, otherwise just a new flow node 1026 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 1027 bool ExitUseAllowed) { 1028 if (!Order.empty() || !ExitUseAllowed) 1029 return getNextFlow(Flow); 1030 1031 BasicBlock *Exit = ParentRegion->getExit(); 1032 DT->changeImmediateDominator(Exit, Flow); 1033 addPhiValues(Flow, Exit); 1034 return Exit; 1035 } 1036 1037 /// Set the previous node 1038 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 1039 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 1040 : nullptr; 1041 } 1042 1043 /// Does BB dominate all the predicates of Node? 1044 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 1045 BBPredicates &Preds = Predicates[Node->getEntry()]; 1046 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) { 1047 return DT->dominates(BB, Pred.first); 1048 }); 1049 } 1050 1051 /// Can we predict that this node will always be called? 1052 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 1053 BBPredicates &Preds = Predicates[Node->getEntry()]; 1054 bool Dominated = false; 1055 1056 // Regionentry is always true 1057 if (!PrevNode) 1058 return true; 1059 1060 for (auto [BB, PI] : Preds) { 1061 if (PI.Pred != BoolTrue) 1062 return false; 1063 1064 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 1065 Dominated = true; 1066 } 1067 1068 // TODO: The dominator check is too strict 1069 return Dominated; 1070 } 1071 1072 /// Take one node from the order vector and wire it up 1073 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 1074 BasicBlock *LoopEnd) { 1075 RegionNode *Node = Order.pop_back_val(); 1076 Visited.insert(Node->getEntry()); 1077 1078 if (isPredictableTrue(Node)) { 1079 // Just a linear flow 1080 if (PrevNode) { 1081 changeExit(PrevNode, Node->getEntry(), true); 1082 } 1083 PrevNode = Node; 1084 } else { 1085 // Insert extra prefix node (or reuse last one) 1086 BasicBlock *Flow = needPrefix(false); 1087 1088 // Insert extra postfix node (or use exit instead) 1089 BasicBlock *Entry = Node->getEntry(); 1090 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 1091 1092 // let it point to entry and next block 1093 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 1094 Br->setDebugLoc(TermDL[Flow]); 1095 Conditions.push_back(Br); 1096 addPhiValues(Flow, Entry); 1097 DT->changeImmediateDominator(Entry, Flow); 1098 1099 PrevNode = Node; 1100 while (!Order.empty() && !Visited.count(LoopEnd) && 1101 dominatesPredicates(Entry, Order.back())) { 1102 handleLoops(false, LoopEnd); 1103 } 1104 1105 changeExit(PrevNode, Next, false); 1106 setPrevNode(Next); 1107 } 1108 } 1109 1110 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 1111 BasicBlock *LoopEnd) { 1112 RegionNode *Node = Order.back(); 1113 BasicBlock *LoopStart = Node->getEntry(); 1114 1115 if (!Loops.count(LoopStart)) { 1116 wireFlow(ExitUseAllowed, LoopEnd); 1117 return; 1118 } 1119 1120 if (!isPredictableTrue(Node)) 1121 LoopStart = needPrefix(true); 1122 1123 LoopEnd = Loops[Node->getEntry()]; 1124 wireFlow(false, LoopEnd); 1125 while (!Visited.count(LoopEnd)) { 1126 handleLoops(false, LoopEnd); 1127 } 1128 1129 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 1130 1131 // Create an extra loop end node 1132 LoopEnd = needPrefix(false); 1133 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 1134 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 1135 Br->setDebugLoc(TermDL[LoopEnd]); 1136 LoopConds.push_back(Br); 1137 addPhiValues(LoopEnd, LoopStart); 1138 setPrevNode(Next); 1139 } 1140 1141 /// After this function control flow looks like it should be, but 1142 /// branches and PHI nodes only have undefined conditions. 1143 void StructurizeCFG::createFlow() { 1144 BasicBlock *Exit = ParentRegion->getExit(); 1145 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1146 1147 AffectedPhis.clear(); 1148 DeletedPhis.clear(); 1149 AddedPhis.clear(); 1150 Conditions.clear(); 1151 LoopConds.clear(); 1152 1153 PrevNode = nullptr; 1154 Visited.clear(); 1155 1156 while (!Order.empty()) { 1157 handleLoops(EntryDominatesExit, nullptr); 1158 } 1159 1160 if (PrevNode) 1161 changeExit(PrevNode, Exit, EntryDominatesExit); 1162 else 1163 assert(EntryDominatesExit); 1164 } 1165 1166 /// Handle a rare case where the disintegrated nodes instructions 1167 /// no longer dominate all their uses. Not sure if this is really necessary 1168 void StructurizeCFG::rebuildSSA() { 1169 SSAUpdater Updater; 1170 for (BasicBlock *BB : ParentRegion->blocks()) 1171 for (Instruction &I : *BB) { 1172 bool Initialized = false; 1173 // We may modify the use list as we iterate over it, so we use 1174 // make_early_inc_range. 1175 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1176 Instruction *User = cast<Instruction>(U.getUser()); 1177 if (User->getParent() == BB) { 1178 continue; 1179 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1180 if (UserPN->getIncomingBlock(U) == BB) 1181 continue; 1182 } 1183 1184 if (DT->dominates(&I, User)) 1185 continue; 1186 1187 if (!Initialized) { 1188 Value *Undef = UndefValue::get(I.getType()); 1189 Updater.Initialize(I.getType(), ""); 1190 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1191 Updater.AddAvailableValue(BB, &I); 1192 Initialized = true; 1193 } 1194 Updater.RewriteUseAfterInsertions(U); 1195 } 1196 } 1197 } 1198 1199 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1200 const UniformityInfo &UA) { 1201 // Bool for if all sub-regions are uniform. 1202 bool SubRegionsAreUniform = true; 1203 // Count of how many direct children are conditional. 1204 unsigned ConditionalDirectChildren = 0; 1205 1206 for (auto *E : R->elements()) { 1207 if (!E->isSubRegion()) { 1208 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1209 if (!Br || !Br->isConditional()) 1210 continue; 1211 1212 if (!UA.isUniform(Br)) 1213 return false; 1214 1215 // One of our direct children is conditional. 1216 ConditionalDirectChildren++; 1217 1218 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1219 << " has uniform terminator\n"); 1220 } else { 1221 // Explicitly refuse to treat regions as uniform if they have non-uniform 1222 // subregions. We cannot rely on UniformityAnalysis for branches in 1223 // subregions because those branches may have been removed and re-created, 1224 // so we look for our metadata instead. 1225 // 1226 // Warning: It would be nice to treat regions as uniform based only on 1227 // their direct child basic blocks' terminators, regardless of whether 1228 // subregions are uniform or not. However, this requires a very careful 1229 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1230 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1231 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1232 if (!Br || !Br->isConditional()) 1233 continue; 1234 1235 if (!Br->getMetadata(UniformMDKindID)) { 1236 // Early exit if we cannot have relaxed uniform regions. 1237 if (!RelaxedUniformRegions) 1238 return false; 1239 1240 SubRegionsAreUniform = false; 1241 break; 1242 } 1243 } 1244 } 1245 } 1246 1247 // Our region is uniform if: 1248 // 1. All conditional branches that are direct children are uniform (checked 1249 // above). 1250 // 2. And either: 1251 // a. All sub-regions are uniform. 1252 // b. There is one or less conditional branches among the direct children. 1253 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1254 } 1255 1256 void StructurizeCFG::init(Region *R) { 1257 LLVMContext &Context = R->getEntry()->getContext(); 1258 1259 Boolean = Type::getInt1Ty(Context); 1260 BoolTrue = ConstantInt::getTrue(Context); 1261 BoolFalse = ConstantInt::getFalse(Context); 1262 BoolPoison = PoisonValue::get(Boolean); 1263 1264 this->UA = nullptr; 1265 } 1266 1267 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1268 if (R->isTopLevelRegion()) 1269 return false; 1270 1271 this->UA = &UA; 1272 1273 // TODO: We could probably be smarter here with how we handle sub-regions. 1274 // We currently rely on the fact that metadata is set by earlier invocations 1275 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1276 // but we shouldn't rely on metadata for correctness! 1277 unsigned UniformMDKindID = 1278 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1279 1280 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1281 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1282 << '\n'); 1283 1284 // Mark all direct child block terminators as having been treated as 1285 // uniform. To account for a possible future in which non-uniform 1286 // sub-regions are treated more cleverly, indirect children are not 1287 // marked as uniform. 1288 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1289 for (RegionNode *E : R->elements()) { 1290 if (E->isSubRegion()) 1291 continue; 1292 1293 if (Instruction *Term = E->getEntry()->getTerminator()) 1294 Term->setMetadata(UniformMDKindID, MD); 1295 } 1296 1297 return true; 1298 } 1299 return false; 1300 } 1301 1302 /// Run the transformation for each region found 1303 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1304 if (R->isTopLevelRegion()) 1305 return false; 1306 1307 this->DT = DT; 1308 1309 Func = R->getEntry()->getParent(); 1310 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); 1311 1312 ParentRegion = R; 1313 1314 orderNodes(); 1315 collectInfos(); 1316 createFlow(); 1317 insertConditions(false); 1318 insertConditions(true); 1319 setPhiValues(); 1320 simplifyConditions(); 1321 simplifyAffectedPhis(); 1322 rebuildSSA(); 1323 1324 // Cleanup 1325 Order.clear(); 1326 Visited.clear(); 1327 DeletedPhis.clear(); 1328 AddedPhis.clear(); 1329 Predicates.clear(); 1330 Conditions.clear(); 1331 Loops.clear(); 1332 LoopPreds.clear(); 1333 LoopConds.clear(); 1334 FlowSet.clear(); 1335 TermDL.clear(); 1336 1337 return true; 1338 } 1339 1340 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1341 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1342 } 1343 1344 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1345 Regions.push_back(&R); 1346 for (const auto &E : R) 1347 addRegionIntoQueue(*E, Regions); 1348 } 1349 1350 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) 1351 : SkipUniformRegions(SkipUniformRegions_) { 1352 if (ForceSkipUniformRegions.getNumOccurrences()) 1353 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 1354 } 1355 1356 void StructurizeCFGPass::printPipeline( 1357 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1358 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline( 1359 OS, MapClassName2PassName); 1360 if (SkipUniformRegions) 1361 OS << "<skip-uniform-regions>"; 1362 } 1363 1364 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1365 FunctionAnalysisManager &AM) { 1366 1367 bool Changed = false; 1368 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1369 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1370 1371 UniformityInfo *UI = nullptr; 1372 if (SkipUniformRegions) 1373 UI = &AM.getResult<UniformityInfoAnalysis>(F); 1374 1375 std::vector<Region *> Regions; 1376 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1377 while (!Regions.empty()) { 1378 Region *R = Regions.back(); 1379 Regions.pop_back(); 1380 1381 StructurizeCFG SCFG; 1382 SCFG.init(R); 1383 1384 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { 1385 Changed = true; // May have added metadata. 1386 continue; 1387 } 1388 1389 Changed |= SCFG.run(R, DT); 1390 } 1391 if (!Changed) 1392 return PreservedAnalyses::all(); 1393 PreservedAnalyses PA; 1394 PA.preserve<DominatorTreeAnalysis>(); 1395 return PA; 1396 } 1397