1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/EquivalenceClasses.h" 12 #include "llvm/ADT/MapVector.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SmallPtrSet.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/RegionInfo.h" 20 #include "llvm/Analysis/RegionIterator.h" 21 #include "llvm/Analysis/RegionPass.h" 22 #include "llvm/Analysis/UniformityAnalysis.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ProfDataUtils.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Use.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/IR/ValueHandle.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 48 #include "llvm/Transforms/Utils/Local.h" 49 #include "llvm/Transforms/Utils/SSAUpdater.h" 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 const char FlowBlockName[] = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>; 91 92 class CondBranchWeights { 93 uint32_t TrueWeight; 94 uint32_t FalseWeight; 95 96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} 97 98 public: 99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) { 100 assert(Br.isConditional()); 101 102 uint64_t T, F; 103 if (!extractBranchWeights(Br, T, F)) 104 return std::nullopt; 105 106 return CondBranchWeights(T, F); 107 } 108 109 static void setMetadata(BranchInst &Br, 110 const MaybeCondBranchWeights &Weights) { 111 assert(Br.isConditional()); 112 if (!Weights) 113 return; 114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; 115 setBranchWeights(Br, Arr, false); 116 } 117 118 CondBranchWeights invert() const { 119 return CondBranchWeights{FalseWeight, TrueWeight}; 120 } 121 }; 122 123 using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>; 124 125 using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>; 126 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 127 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 128 129 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; 130 131 // A traits type that is intended to be used in graph algorithms. The graph 132 // traits starts at an entry node, and traverses the RegionNodes that are in 133 // the Nodes set. 134 struct SubGraphTraits { 135 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 136 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 137 138 // This wraps a set of Nodes into the iterator, so we know which edges to 139 // filter out. 140 class WrappedSuccIterator 141 : public iterator_adaptor_base< 142 WrappedSuccIterator, BaseSuccIterator, 143 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 144 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 145 SmallDenseSet<RegionNode *> *Nodes; 146 147 public: 148 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 149 : iterator_adaptor_base(It), Nodes(Nodes) {} 150 151 NodeRef operator*() const { return {*I, Nodes}; } 152 }; 153 154 static bool filterAll(const NodeRef &N) { return true; } 155 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 156 157 using ChildIteratorType = 158 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 159 160 static NodeRef getEntryNode(Region *R) { 161 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 162 } 163 164 static NodeRef getEntryNode(NodeRef N) { return N; } 165 166 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 167 auto *filter = N.second ? &filterSet : &filterAll; 168 return make_filter_range( 169 make_range<WrappedSuccIterator>( 170 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 171 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 172 filter); 173 } 174 175 static ChildIteratorType child_begin(const NodeRef &N) { 176 return children(N).begin(); 177 } 178 179 static ChildIteratorType child_end(const NodeRef &N) { 180 return children(N).end(); 181 } 182 }; 183 184 /// Finds the nearest common dominator of a set of BasicBlocks. 185 /// 186 /// For every BB you add to the set, you can specify whether we "remember" the 187 /// block. When you get the common dominator, you can also ask whether it's one 188 /// of the blocks we remembered. 189 class NearestCommonDominator { 190 DominatorTree *DT; 191 BasicBlock *Result = nullptr; 192 bool ResultIsRemembered = false; 193 194 /// Add BB to the resulting dominator. 195 void addBlock(BasicBlock *BB, bool Remember) { 196 if (!Result) { 197 Result = BB; 198 ResultIsRemembered = Remember; 199 return; 200 } 201 202 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 203 if (NewResult != Result) 204 ResultIsRemembered = false; 205 if (NewResult == BB) 206 ResultIsRemembered |= Remember; 207 Result = NewResult; 208 } 209 210 public: 211 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 212 213 void addBlock(BasicBlock *BB) { 214 addBlock(BB, /* Remember = */ false); 215 } 216 217 void addAndRememberBlock(BasicBlock *BB) { 218 addBlock(BB, /* Remember = */ true); 219 } 220 221 /// Get the nearest common dominator of all the BBs added via addBlock() and 222 /// addAndRememberBlock(). 223 BasicBlock *result() { return Result; } 224 225 /// Is the BB returned by getResult() one of the blocks we added to the set 226 /// with addAndRememberBlock()? 227 bool resultIsRememberedBlock() { return ResultIsRemembered; } 228 }; 229 230 /// Transforms the control flow graph on one single entry/exit region 231 /// at a time. 232 /// 233 /// After the transform all "If"/"Then"/"Else" style control flow looks like 234 /// this: 235 /// 236 /// \verbatim 237 /// 1 238 /// || 239 /// | | 240 /// 2 | 241 /// | / 242 /// |/ 243 /// 3 244 /// || Where: 245 /// | | 1 = "If" block, calculates the condition 246 /// 4 | 2 = "Then" subregion, runs if the condition is true 247 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 248 /// |/ 4 = "Else" optional subregion, runs if the condition is false 249 /// 5 5 = "End" block, also rejoins the control flow 250 /// \endverbatim 251 /// 252 /// Control flow is expressed as a branch where the true exit goes into the 253 /// "Then"/"Else" region, while the false exit skips the region 254 /// The condition for the optional "Else" region is expressed as a PHI node. 255 /// The incoming values of the PHI node are true for the "If" edge and false 256 /// for the "Then" edge. 257 /// 258 /// Additionally to that even complicated loops look like this: 259 /// 260 /// \verbatim 261 /// 1 262 /// || 263 /// | | 264 /// 2 ^ Where: 265 /// | / 1 = "Entry" block 266 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 267 /// 3 3 = "Flow" block, with back edge to entry block 268 /// | 269 /// \endverbatim 270 /// 271 /// The back edge of the "Flow" block is always on the false side of the branch 272 /// while the true side continues the general flow. So the loop condition 273 /// consist of a network of PHI nodes where the true incoming values expresses 274 /// breaks and the false values expresses continue states. 275 276 class StructurizeCFG { 277 Type *Boolean; 278 ConstantInt *BoolTrue; 279 ConstantInt *BoolFalse; 280 Value *BoolPoison; 281 282 Function *Func; 283 Region *ParentRegion; 284 285 UniformityInfo *UA = nullptr; 286 DominatorTree *DT; 287 288 SmallVector<RegionNode *, 8> Order; 289 BBSet Visited; 290 BBSet FlowSet; 291 292 SmallVector<WeakVH, 8> AffectedPhis; 293 BBPhiMap DeletedPhis; 294 BB2BBVecMap AddedPhis; 295 296 PredMap Predicates; 297 BranchVector Conditions; 298 299 BB2BBMap Loops; 300 PredMap LoopPreds; 301 BranchVector LoopConds; 302 303 BranchDebugLocMap TermDL; 304 305 RegionNode *PrevNode; 306 307 void orderNodes(); 308 309 void analyzeLoops(RegionNode *N); 310 311 ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 312 313 void gatherPredicates(RegionNode *N); 314 315 void collectInfos(); 316 317 void insertConditions(bool Loops); 318 319 void simplifyConditions(); 320 321 void delPhiValues(BasicBlock *From, BasicBlock *To); 322 323 void addPhiValues(BasicBlock *From, BasicBlock *To); 324 325 void findUndefBlocks(BasicBlock *PHIBlock, 326 const SmallSet<BasicBlock *, 8> &Incomings, 327 SmallVector<BasicBlock *> &UndefBlks) const; 328 329 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, 330 PHINode *B); 331 332 void setPhiValues(); 333 334 void simplifyAffectedPhis(); 335 336 void killTerminator(BasicBlock *BB); 337 338 void changeExit(RegionNode *Node, BasicBlock *NewExit, 339 bool IncludeDominator); 340 341 BasicBlock *getNextFlow(BasicBlock *Dominator); 342 343 BasicBlock *needPrefix(bool NeedEmpty); 344 345 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 346 347 void setPrevNode(BasicBlock *BB); 348 349 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 350 351 bool isPredictableTrue(RegionNode *Node); 352 353 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 354 355 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 356 357 void createFlow(); 358 359 void rebuildSSA(); 360 361 public: 362 void init(Region *R); 363 bool run(Region *R, DominatorTree *DT); 364 bool makeUniformRegion(Region *R, UniformityInfo &UA); 365 }; 366 367 class StructurizeCFGLegacyPass : public RegionPass { 368 bool SkipUniformRegions; 369 370 public: 371 static char ID; 372 373 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 374 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 375 if (ForceSkipUniformRegions.getNumOccurrences()) 376 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 377 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 378 } 379 380 bool runOnRegion(Region *R, RGPassManager &RGM) override { 381 StructurizeCFG SCFG; 382 SCFG.init(R); 383 if (SkipUniformRegions) { 384 UniformityInfo &UA = 385 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 386 if (SCFG.makeUniformRegion(R, UA)) 387 return false; 388 } 389 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 390 return SCFG.run(R, DT); 391 } 392 393 StringRef getPassName() const override { return "Structurize control flow"; } 394 395 void getAnalysisUsage(AnalysisUsage &AU) const override { 396 if (SkipUniformRegions) 397 AU.addRequired<UniformityInfoWrapperPass>(); 398 AU.addRequired<DominatorTreeWrapperPass>(); 399 400 AU.addPreserved<DominatorTreeWrapperPass>(); 401 RegionPass::getAnalysisUsage(AU); 402 } 403 }; 404 405 } // end anonymous namespace 406 407 char StructurizeCFGLegacyPass::ID = 0; 408 409 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 410 "Structurize the CFG", false, false) 411 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 412 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 413 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 414 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 415 "Structurize the CFG", false, false) 416 417 /// Build up the general order of nodes, by performing a topological sort of the 418 /// parent region's nodes, while ensuring that there is no outer cycle node 419 /// between any two inner cycle nodes. 420 void StructurizeCFG::orderNodes() { 421 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 422 GraphTraits<Region *>::nodes_end(ParentRegion))); 423 if (Order.empty()) 424 return; 425 426 SmallDenseSet<RegionNode *> Nodes; 427 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 428 429 // A list of range indices of SCCs in Order, to be processed. 430 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 431 unsigned I = 0, E = Order.size(); 432 while (true) { 433 // Run through all the SCCs in the subgraph starting with Entry. 434 for (auto SCCI = 435 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 436 EntryNode); 437 !SCCI.isAtEnd(); ++SCCI) { 438 auto &SCC = *SCCI; 439 440 // An SCC up to the size of 2, can be reduced to an entry (the last node), 441 // and a possible additional node. Therefore, it is already in order, and 442 // there is no need to add it to the work-list. 443 unsigned Size = SCC.size(); 444 if (Size > 2) 445 WorkList.emplace_back(I, I + Size); 446 447 // Add the SCC nodes to the Order array. 448 for (const auto &N : SCC) { 449 assert(I < E && "SCC size mismatch!"); 450 Order[I++] = N.first; 451 } 452 } 453 assert(I == E && "SCC size mismatch!"); 454 455 // If there are no more SCCs to order, then we are done. 456 if (WorkList.empty()) 457 break; 458 459 std::tie(I, E) = WorkList.pop_back_val(); 460 461 // Collect the set of nodes in the SCC's subgraph. These are only the 462 // possible child nodes; we do not add the entry (last node) otherwise we 463 // will have the same exact SCC all over again. 464 Nodes.clear(); 465 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 466 467 // Update the entry node. 468 EntryNode.first = Order[E - 1]; 469 EntryNode.second = &Nodes; 470 } 471 } 472 473 /// Determine the end of the loops 474 void StructurizeCFG::analyzeLoops(RegionNode *N) { 475 if (N->isSubRegion()) { 476 // Test for exit as back edge 477 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 478 if (Visited.count(Exit)) 479 Loops[Exit] = N->getEntry(); 480 481 } else { 482 // Test for successors as back edge 483 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 484 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 485 486 for (BasicBlock *Succ : Term->successors()) 487 if (Visited.count(Succ)) 488 Loops[Succ] = BB; 489 } 490 } 491 492 /// Build the condition for one edge 493 ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 494 bool Invert) { 495 Value *Cond = Invert ? BoolFalse : BoolTrue; 496 MaybeCondBranchWeights Weights; 497 498 if (Term->isConditional()) { 499 Cond = Term->getCondition(); 500 Weights = CondBranchWeights::tryParse(*Term); 501 502 if (Idx != (unsigned)Invert) { 503 Cond = invertCondition(Cond); 504 if (Weights) 505 Weights = Weights->invert(); 506 } 507 } 508 return {Cond, Weights}; 509 } 510 511 /// Analyze the predecessors of each block and build up predicates 512 void StructurizeCFG::gatherPredicates(RegionNode *N) { 513 RegionInfo *RI = ParentRegion->getRegionInfo(); 514 BasicBlock *BB = N->getEntry(); 515 BBPredicates &Pred = Predicates[BB]; 516 BBPredicates &LPred = LoopPreds[BB]; 517 518 for (BasicBlock *P : predecessors(BB)) { 519 // Ignore it if it's a branch from outside into our region entry 520 if (!ParentRegion->contains(P)) 521 continue; 522 523 Region *R = RI->getRegionFor(P); 524 if (R == ParentRegion) { 525 // It's a top level block in our region 526 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 527 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 528 BasicBlock *Succ = Term->getSuccessor(i); 529 if (Succ != BB) 530 continue; 531 532 if (Visited.count(P)) { 533 // Normal forward edge 534 if (Term->isConditional()) { 535 // Try to treat it like an ELSE block 536 BasicBlock *Other = Term->getSuccessor(!i); 537 if (Visited.count(Other) && !Loops.count(Other) && 538 !Pred.count(Other) && !Pred.count(P)) { 539 540 Pred[Other] = {BoolFalse, std::nullopt}; 541 Pred[P] = {BoolTrue, std::nullopt}; 542 continue; 543 } 544 } 545 Pred[P] = buildCondition(Term, i, false); 546 } else { 547 // Back edge 548 LPred[P] = buildCondition(Term, i, true); 549 } 550 } 551 } else { 552 // It's an exit from a sub region 553 while (R->getParent() != ParentRegion) 554 R = R->getParent(); 555 556 // Edge from inside a subregion to its entry, ignore it 557 if (*R == *N) 558 continue; 559 560 BasicBlock *Entry = R->getEntry(); 561 if (Visited.count(Entry)) 562 Pred[Entry] = {BoolTrue, std::nullopt}; 563 else 564 LPred[Entry] = {BoolFalse, std::nullopt}; 565 } 566 } 567 } 568 569 /// Collect various loop and predicate infos 570 void StructurizeCFG::collectInfos() { 571 // Reset predicate 572 Predicates.clear(); 573 574 // and loop infos 575 Loops.clear(); 576 LoopPreds.clear(); 577 578 // Reset the visited nodes 579 Visited.clear(); 580 581 for (RegionNode *RN : reverse(Order)) { 582 LLVM_DEBUG(dbgs() << "Visiting: " 583 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 584 << RN->getEntry()->getName() << "\n"); 585 586 // Analyze all the conditions leading to a node 587 gatherPredicates(RN); 588 589 // Remember that we've seen this node 590 Visited.insert(RN->getEntry()); 591 592 // Find the last back edges 593 analyzeLoops(RN); 594 } 595 596 // Reset the collected term debug locations 597 TermDL.clear(); 598 599 for (BasicBlock &BB : *Func) { 600 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) 601 TermDL[&BB] = DL; 602 } 603 } 604 605 /// Insert the missing branch conditions 606 void StructurizeCFG::insertConditions(bool Loops) { 607 BranchVector &Conds = Loops ? LoopConds : Conditions; 608 Value *Default = Loops ? BoolTrue : BoolFalse; 609 SSAUpdater PhiInserter; 610 611 for (BranchInst *Term : Conds) { 612 assert(Term->isConditional()); 613 614 BasicBlock *Parent = Term->getParent(); 615 BasicBlock *SuccTrue = Term->getSuccessor(0); 616 BasicBlock *SuccFalse = Term->getSuccessor(1); 617 618 PhiInserter.Initialize(Boolean, ""); 619 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 620 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 621 622 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 623 624 NearestCommonDominator Dominator(DT); 625 Dominator.addBlock(Parent); 626 627 Value *ParentValue = nullptr; 628 MaybeCondBranchWeights ParentWeights = std::nullopt; 629 for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) { 630 BasicBlock *BB = BBAndPred.first; 631 auto [Pred, Weight] = BBAndPred.second; 632 633 if (BB == Parent) { 634 ParentValue = Pred; 635 ParentWeights = Weight; 636 break; 637 } 638 PhiInserter.AddAvailableValue(BB, Pred); 639 Dominator.addAndRememberBlock(BB); 640 } 641 642 if (ParentValue) { 643 Term->setCondition(ParentValue); 644 CondBranchWeights::setMetadata(*Term, ParentWeights); 645 } else { 646 if (!Dominator.resultIsRememberedBlock()) 647 PhiInserter.AddAvailableValue(Dominator.result(), Default); 648 649 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 650 } 651 } 652 } 653 654 /// Simplify any inverted conditions that were built by buildConditions. 655 void StructurizeCFG::simplifyConditions() { 656 SmallVector<Instruction *> InstToErase; 657 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 658 auto &Preds = I.second; 659 for (auto &J : Preds) { 660 Value *Cond = J.second.first; 661 Instruction *Inverted; 662 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 663 !Cond->use_empty()) { 664 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 665 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 666 Cond->replaceAllUsesWith(InvertedCmp); 667 InstToErase.push_back(cast<Instruction>(Cond)); 668 } 669 } 670 } 671 } 672 for (auto *I : InstToErase) 673 I->eraseFromParent(); 674 } 675 676 /// Remove all PHI values coming from "From" into "To" and remember 677 /// them in DeletedPhis 678 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 679 PhiMap &Map = DeletedPhis[To]; 680 for (PHINode &Phi : To->phis()) { 681 bool Recorded = false; 682 while (Phi.getBasicBlockIndex(From) != -1) { 683 Value *Deleted = Phi.removeIncomingValue(From, false); 684 Map[&Phi].push_back(std::make_pair(From, Deleted)); 685 if (!Recorded) { 686 AffectedPhis.push_back(&Phi); 687 Recorded = true; 688 } 689 } 690 } 691 } 692 693 /// Add a dummy PHI value as soon as we knew the new predecessor 694 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 695 for (PHINode &Phi : To->phis()) { 696 Value *Undef = UndefValue::get(Phi.getType()); 697 Phi.addIncoming(Undef, From); 698 } 699 AddedPhis[To].push_back(From); 700 } 701 702 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 703 /// from predecessors \p Incomings, we have a chance to mark the available value 704 /// from some blocks as undefined. The function will find out all such blocks 705 /// and return in \p UndefBlks. 706 void StructurizeCFG::findUndefBlocks( 707 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 708 SmallVector<BasicBlock *> &UndefBlks) const { 709 // We may get a post-structured CFG like below: 710 // 711 // | P1 712 // |/ 713 // F1 714 // |\ 715 // | N 716 // |/ 717 // F2 718 // |\ 719 // | P2 720 // |/ 721 // F3 722 // |\ 723 // B 724 // 725 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 726 // of B before structurization. F1/F2/F3 are flow blocks inserted during 727 // structurization process. Block N is not a predecessor of B before 728 // structurization, but are placed between the predecessors(P1/P2) of B after 729 // structurization. This usually means that threads went to N never take the 730 // path N->F2->F3->B. For example, the threads take the branch F1->N may 731 // always take the branch F2->P2. So, when we are reconstructing a PHI 732 // originally in B, we can safely say the incoming value from N is undefined. 733 SmallSet<BasicBlock *, 8> VisitedBlock; 734 SmallVector<BasicBlock *, 8> Stack; 735 if (PHIBlock == ParentRegion->getExit()) { 736 for (auto P : predecessors(PHIBlock)) { 737 if (ParentRegion->contains(P)) 738 Stack.push_back(P); 739 } 740 } else { 741 append_range(Stack, predecessors(PHIBlock)); 742 } 743 744 // Do a backward traversal over the CFG, and stop further searching if 745 // the block is not a Flow. If a block is neither flow block nor the 746 // incoming predecessor, then the incoming value from the block is 747 // undefined value for the PHI being reconstructed. 748 while (!Stack.empty()) { 749 BasicBlock *Current = Stack.pop_back_val(); 750 if (!VisitedBlock.insert(Current).second) 751 continue; 752 753 if (FlowSet.contains(Current)) { 754 for (auto P : predecessors(Current)) 755 Stack.push_back(P); 756 } else if (!Incomings.contains(Current)) { 757 UndefBlks.push_back(Current); 758 } 759 } 760 } 761 762 // If two phi nodes have compatible incoming values (for each 763 // incoming block, either they have the same incoming value or only one phi 764 // node has an incoming value), let them share the merged incoming values. The 765 // merge process is guided by the equivalence information from \p PhiClasses. 766 // The function will possibly update the incoming values of leader phi in 767 // DeletedPhis. 768 void StructurizeCFG::mergeIfCompatible( 769 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) { 770 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A)); 771 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B)); 772 // They are already in the same class, no work needed. 773 if (ItA == ItB) 774 return; 775 776 PHINode *LeaderA = *ItA; 777 PHINode *LeaderB = *ItB; 778 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA]; 779 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB]; 780 781 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end()); 782 for (auto [BB, V] : IncomingB) { 783 auto BBIt = Mergeable.find(BB); 784 if (BBIt != Mergeable.end() && BBIt->second != V) 785 return; 786 // Either IncomingA does not have this value or IncomingA has the same 787 // value. 788 Mergeable.insert({BB, V}); 789 } 790 791 // Update the incoming value of leaderA. 792 IncomingA.assign(Mergeable.begin(), Mergeable.end()); 793 PhiClasses.unionSets(ItA, ItB); 794 } 795 796 /// Add the real PHI value as soon as everything is set up 797 void StructurizeCFG::setPhiValues() { 798 SmallVector<PHINode *, 8> InsertedPhis; 799 SSAUpdater Updater(&InsertedPhis); 800 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap; 801 802 // Find phi nodes that have compatible incoming values (either they have 803 // the same value for the same block or only one phi node has an incoming 804 // value, see example below). We only search again the phi's that are 805 // referenced by another phi, which is the case we care about. 806 // 807 // For example (-- means no incoming value): 808 // phi1 : BB1:phi2 BB2:v BB3:-- 809 // phi2: BB1:-- BB2:v BB3:w 810 // 811 // Then we can merge these incoming values and let phi1, phi2 use the 812 // same set of incoming values: 813 // 814 // phi1&phi2: BB1:phi2 BB2:v BB3:w 815 // 816 // By doing this, phi1 and phi2 would share more intermediate phi nodes. 817 // This would help reduce the number of phi nodes during SSA reconstruction 818 // and ultimately result in fewer COPY instructions. 819 // 820 // This should be correct, because if a phi node does not have incoming 821 // value from certain block, this means the block is not the predecessor 822 // of the parent block, so we actually don't care about its incoming value. 823 EquivalenceClasses<PHINode *> PhiClasses; 824 for (const auto &[To, From] : AddedPhis) { 825 auto OldPhiIt = DeletedPhis.find(To); 826 if (OldPhiIt == DeletedPhis.end()) 827 continue; 828 829 PhiMap &BlkPhis = OldPhiIt->second; 830 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 831 SmallSet<BasicBlock *, 8> Incomings; 832 833 // Get the undefined blocks shared by all the phi nodes. 834 if (!BlkPhis.empty()) { 835 for (const auto &VI : BlkPhis.front().second) 836 Incomings.insert(VI.first); 837 findUndefBlocks(To, Incomings, UndefBlks); 838 } 839 840 for (const auto &[Phi, Incomings] : OldPhiIt->second) { 841 SmallVector<PHINode *> IncomingPHIs; 842 for (const auto &[BB, V] : Incomings) { 843 // First, for each phi, check whether it has incoming value which is 844 // another phi. 845 if (PHINode *P = dyn_cast<PHINode>(V)) 846 IncomingPHIs.push_back(P); 847 } 848 849 for (auto *OtherPhi : IncomingPHIs) { 850 // Skip phis that are unrelated to the phi reconstruction for now. 851 if (!DeletedPhis.contains(OtherPhi->getParent())) 852 continue; 853 mergeIfCompatible(PhiClasses, Phi, OtherPhi); 854 } 855 } 856 } 857 858 for (const auto &AddedPhi : AddedPhis) { 859 BasicBlock *To = AddedPhi.first; 860 const BBVector &From = AddedPhi.second; 861 862 if (!DeletedPhis.count(To)) 863 continue; 864 865 PhiMap &Map = DeletedPhis[To]; 866 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 867 for (const auto &[Phi, Incoming] : Map) { 868 Value *Undef = UndefValue::get(Phi->getType()); 869 Updater.Initialize(Phi->getType(), ""); 870 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 871 Updater.AddAvailableValue(To, Undef); 872 873 // Use leader phi's incoming if there is. 874 auto LeaderIt = PhiClasses.findLeader(Phi); 875 bool UseIncomingOfLeader = 876 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi; 877 const auto &IncomingMap = 878 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt] 879 : Incoming; 880 881 SmallVector<BasicBlock *> ConstantPreds; 882 for (const auto &[BB, V] : IncomingMap) { 883 Updater.AddAvailableValue(BB, V); 884 if (isa<Constant>(V)) 885 ConstantPreds.push_back(BB); 886 } 887 888 for (auto UB : UndefBlks) { 889 // If this undef block is dominated by any predecessor(before 890 // structurization) of reconstructed PHI with constant incoming value, 891 // don't mark the available value as undefined. Setting undef to such 892 // block will stop us from getting optimal phi insertion. 893 if (any_of(ConstantPreds, 894 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 895 continue; 896 // Maybe already get a value through sharing with other phi nodes. 897 if (Updater.HasValueForBlock(UB)) 898 continue; 899 900 Updater.AddAvailableValue(UB, Undef); 901 } 902 903 for (BasicBlock *FI : From) 904 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 905 AffectedPhis.push_back(Phi); 906 } 907 } 908 909 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 910 } 911 912 void StructurizeCFG::simplifyAffectedPhis() { 913 bool Changed; 914 do { 915 Changed = false; 916 SimplifyQuery Q(Func->getDataLayout()); 917 Q.DT = DT; 918 // Setting CanUseUndef to true might extend value liveness, set it to false 919 // to achieve better register pressure. 920 Q.CanUseUndef = false; 921 for (WeakVH VH : AffectedPhis) { 922 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 923 if (auto NewValue = simplifyInstruction(Phi, Q)) { 924 Phi->replaceAllUsesWith(NewValue); 925 Phi->eraseFromParent(); 926 Changed = true; 927 } 928 } 929 } 930 } while (Changed); 931 } 932 933 /// Remove phi values from all successors and then remove the terminator. 934 void StructurizeCFG::killTerminator(BasicBlock *BB) { 935 Instruction *Term = BB->getTerminator(); 936 if (!Term) 937 return; 938 939 for (BasicBlock *Succ : successors(BB)) 940 delPhiValues(BB, Succ); 941 942 Term->eraseFromParent(); 943 } 944 945 /// Let node exit(s) point to NewExit 946 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 947 bool IncludeDominator) { 948 if (Node->isSubRegion()) { 949 Region *SubRegion = Node->getNodeAs<Region>(); 950 BasicBlock *OldExit = SubRegion->getExit(); 951 BasicBlock *Dominator = nullptr; 952 953 // Find all the edges from the sub region to the exit. 954 // We use make_early_inc_range here because we modify BB's terminator. 955 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 956 if (!SubRegion->contains(BB)) 957 continue; 958 959 // Modify the edges to point to the new exit 960 delPhiValues(BB, OldExit); 961 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 962 addPhiValues(BB, NewExit); 963 964 // Find the new dominator (if requested) 965 if (IncludeDominator) { 966 if (!Dominator) 967 Dominator = BB; 968 else 969 Dominator = DT->findNearestCommonDominator(Dominator, BB); 970 } 971 } 972 973 // Change the dominator (if requested) 974 if (Dominator) 975 DT->changeImmediateDominator(NewExit, Dominator); 976 977 // Update the region info 978 SubRegion->replaceExit(NewExit); 979 } else { 980 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 981 killTerminator(BB); 982 BranchInst *Br = BranchInst::Create(NewExit, BB); 983 Br->setDebugLoc(TermDL[BB]); 984 addPhiValues(BB, NewExit); 985 if (IncludeDominator) 986 DT->changeImmediateDominator(NewExit, BB); 987 } 988 } 989 990 /// Create a new flow node and update dominator tree and region info 991 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 992 LLVMContext &Context = Func->getContext(); 993 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 994 Order.back()->getEntry(); 995 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 996 Func, Insert); 997 FlowSet.insert(Flow); 998 999 // use a temporary variable to avoid a use-after-free if the map's storage is 1000 // reallocated 1001 DebugLoc DL = TermDL[Dominator]; 1002 TermDL[Flow] = std::move(DL); 1003 1004 DT->addNewBlock(Flow, Dominator); 1005 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 1006 return Flow; 1007 } 1008 1009 /// Create a new or reuse the previous node as flow node 1010 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 1011 BasicBlock *Entry = PrevNode->getEntry(); 1012 1013 if (!PrevNode->isSubRegion()) { 1014 killTerminator(Entry); 1015 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 1016 return Entry; 1017 } 1018 1019 // create a new flow node 1020 BasicBlock *Flow = getNextFlow(Entry); 1021 1022 // and wire it up 1023 changeExit(PrevNode, Flow, true); 1024 PrevNode = ParentRegion->getBBNode(Flow); 1025 return Flow; 1026 } 1027 1028 /// Returns the region exit if possible, otherwise just a new flow node 1029 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 1030 bool ExitUseAllowed) { 1031 if (!Order.empty() || !ExitUseAllowed) 1032 return getNextFlow(Flow); 1033 1034 BasicBlock *Exit = ParentRegion->getExit(); 1035 DT->changeImmediateDominator(Exit, Flow); 1036 addPhiValues(Flow, Exit); 1037 return Exit; 1038 } 1039 1040 /// Set the previous node 1041 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 1042 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 1043 : nullptr; 1044 } 1045 1046 /// Does BB dominate all the predicates of Node? 1047 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 1048 BBPredicates &Preds = Predicates[Node->getEntry()]; 1049 return llvm::all_of(Preds, 1050 [&](std::pair<BasicBlock *, ValueWeightPair> Pred) { 1051 return DT->dominates(BB, Pred.first); 1052 }); 1053 } 1054 1055 /// Can we predict that this node will always be called? 1056 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 1057 BBPredicates &Preds = Predicates[Node->getEntry()]; 1058 bool Dominated = false; 1059 1060 // Regionentry is always true 1061 if (!PrevNode) 1062 return true; 1063 1064 for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) { 1065 BasicBlock *BB = Pred.first; 1066 Value *V = Pred.second.first; 1067 1068 if (V != BoolTrue) 1069 return false; 1070 1071 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 1072 Dominated = true; 1073 } 1074 1075 // TODO: The dominator check is too strict 1076 return Dominated; 1077 } 1078 1079 /// Take one node from the order vector and wire it up 1080 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 1081 BasicBlock *LoopEnd) { 1082 RegionNode *Node = Order.pop_back_val(); 1083 Visited.insert(Node->getEntry()); 1084 1085 if (isPredictableTrue(Node)) { 1086 // Just a linear flow 1087 if (PrevNode) { 1088 changeExit(PrevNode, Node->getEntry(), true); 1089 } 1090 PrevNode = Node; 1091 } else { 1092 // Insert extra prefix node (or reuse last one) 1093 BasicBlock *Flow = needPrefix(false); 1094 1095 // Insert extra postfix node (or use exit instead) 1096 BasicBlock *Entry = Node->getEntry(); 1097 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 1098 1099 // let it point to entry and next block 1100 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 1101 Br->setDebugLoc(TermDL[Flow]); 1102 Conditions.push_back(Br); 1103 addPhiValues(Flow, Entry); 1104 DT->changeImmediateDominator(Entry, Flow); 1105 1106 PrevNode = Node; 1107 while (!Order.empty() && !Visited.count(LoopEnd) && 1108 dominatesPredicates(Entry, Order.back())) { 1109 handleLoops(false, LoopEnd); 1110 } 1111 1112 changeExit(PrevNode, Next, false); 1113 setPrevNode(Next); 1114 } 1115 } 1116 1117 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 1118 BasicBlock *LoopEnd) { 1119 RegionNode *Node = Order.back(); 1120 BasicBlock *LoopStart = Node->getEntry(); 1121 1122 if (!Loops.count(LoopStart)) { 1123 wireFlow(ExitUseAllowed, LoopEnd); 1124 return; 1125 } 1126 1127 if (!isPredictableTrue(Node)) 1128 LoopStart = needPrefix(true); 1129 1130 LoopEnd = Loops[Node->getEntry()]; 1131 wireFlow(false, LoopEnd); 1132 while (!Visited.count(LoopEnd)) { 1133 handleLoops(false, LoopEnd); 1134 } 1135 1136 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 1137 1138 // Create an extra loop end node 1139 LoopEnd = needPrefix(false); 1140 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 1141 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 1142 Br->setDebugLoc(TermDL[LoopEnd]); 1143 LoopConds.push_back(Br); 1144 addPhiValues(LoopEnd, LoopStart); 1145 setPrevNode(Next); 1146 } 1147 1148 /// After this function control flow looks like it should be, but 1149 /// branches and PHI nodes only have undefined conditions. 1150 void StructurizeCFG::createFlow() { 1151 BasicBlock *Exit = ParentRegion->getExit(); 1152 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1153 1154 AffectedPhis.clear(); 1155 DeletedPhis.clear(); 1156 AddedPhis.clear(); 1157 Conditions.clear(); 1158 LoopConds.clear(); 1159 1160 PrevNode = nullptr; 1161 Visited.clear(); 1162 1163 while (!Order.empty()) { 1164 handleLoops(EntryDominatesExit, nullptr); 1165 } 1166 1167 if (PrevNode) 1168 changeExit(PrevNode, Exit, EntryDominatesExit); 1169 else 1170 assert(EntryDominatesExit); 1171 } 1172 1173 /// Handle a rare case where the disintegrated nodes instructions 1174 /// no longer dominate all their uses. Not sure if this is really necessary 1175 void StructurizeCFG::rebuildSSA() { 1176 SSAUpdater Updater; 1177 for (BasicBlock *BB : ParentRegion->blocks()) 1178 for (Instruction &I : *BB) { 1179 bool Initialized = false; 1180 // We may modify the use list as we iterate over it, so we use 1181 // make_early_inc_range. 1182 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1183 Instruction *User = cast<Instruction>(U.getUser()); 1184 if (User->getParent() == BB) { 1185 continue; 1186 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1187 if (UserPN->getIncomingBlock(U) == BB) 1188 continue; 1189 } 1190 1191 if (DT->dominates(&I, User)) 1192 continue; 1193 1194 if (!Initialized) { 1195 Value *Undef = UndefValue::get(I.getType()); 1196 Updater.Initialize(I.getType(), ""); 1197 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1198 Updater.AddAvailableValue(BB, &I); 1199 Initialized = true; 1200 } 1201 Updater.RewriteUseAfterInsertions(U); 1202 } 1203 } 1204 } 1205 1206 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1207 const UniformityInfo &UA) { 1208 // Bool for if all sub-regions are uniform. 1209 bool SubRegionsAreUniform = true; 1210 // Count of how many direct children are conditional. 1211 unsigned ConditionalDirectChildren = 0; 1212 1213 for (auto *E : R->elements()) { 1214 if (!E->isSubRegion()) { 1215 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1216 if (!Br || !Br->isConditional()) 1217 continue; 1218 1219 if (!UA.isUniform(Br)) 1220 return false; 1221 1222 // One of our direct children is conditional. 1223 ConditionalDirectChildren++; 1224 1225 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1226 << " has uniform terminator\n"); 1227 } else { 1228 // Explicitly refuse to treat regions as uniform if they have non-uniform 1229 // subregions. We cannot rely on UniformityAnalysis for branches in 1230 // subregions because those branches may have been removed and re-created, 1231 // so we look for our metadata instead. 1232 // 1233 // Warning: It would be nice to treat regions as uniform based only on 1234 // their direct child basic blocks' terminators, regardless of whether 1235 // subregions are uniform or not. However, this requires a very careful 1236 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1237 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1238 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1239 if (!Br || !Br->isConditional()) 1240 continue; 1241 1242 if (!Br->getMetadata(UniformMDKindID)) { 1243 // Early exit if we cannot have relaxed uniform regions. 1244 if (!RelaxedUniformRegions) 1245 return false; 1246 1247 SubRegionsAreUniform = false; 1248 break; 1249 } 1250 } 1251 } 1252 } 1253 1254 // Our region is uniform if: 1255 // 1. All conditional branches that are direct children are uniform (checked 1256 // above). 1257 // 2. And either: 1258 // a. All sub-regions are uniform. 1259 // b. There is one or less conditional branches among the direct children. 1260 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1261 } 1262 1263 void StructurizeCFG::init(Region *R) { 1264 LLVMContext &Context = R->getEntry()->getContext(); 1265 1266 Boolean = Type::getInt1Ty(Context); 1267 BoolTrue = ConstantInt::getTrue(Context); 1268 BoolFalse = ConstantInt::getFalse(Context); 1269 BoolPoison = PoisonValue::get(Boolean); 1270 1271 this->UA = nullptr; 1272 } 1273 1274 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1275 if (R->isTopLevelRegion()) 1276 return false; 1277 1278 this->UA = &UA; 1279 1280 // TODO: We could probably be smarter here with how we handle sub-regions. 1281 // We currently rely on the fact that metadata is set by earlier invocations 1282 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1283 // but we shouldn't rely on metadata for correctness! 1284 unsigned UniformMDKindID = 1285 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1286 1287 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1288 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1289 << '\n'); 1290 1291 // Mark all direct child block terminators as having been treated as 1292 // uniform. To account for a possible future in which non-uniform 1293 // sub-regions are treated more cleverly, indirect children are not 1294 // marked as uniform. 1295 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1296 for (RegionNode *E : R->elements()) { 1297 if (E->isSubRegion()) 1298 continue; 1299 1300 if (Instruction *Term = E->getEntry()->getTerminator()) 1301 Term->setMetadata(UniformMDKindID, MD); 1302 } 1303 1304 return true; 1305 } 1306 return false; 1307 } 1308 1309 /// Run the transformation for each region found 1310 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1311 if (R->isTopLevelRegion()) 1312 return false; 1313 1314 this->DT = DT; 1315 1316 Func = R->getEntry()->getParent(); 1317 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); 1318 1319 ParentRegion = R; 1320 1321 orderNodes(); 1322 collectInfos(); 1323 createFlow(); 1324 insertConditions(false); 1325 insertConditions(true); 1326 setPhiValues(); 1327 simplifyConditions(); 1328 simplifyAffectedPhis(); 1329 rebuildSSA(); 1330 1331 // Cleanup 1332 Order.clear(); 1333 Visited.clear(); 1334 DeletedPhis.clear(); 1335 AddedPhis.clear(); 1336 Predicates.clear(); 1337 Conditions.clear(); 1338 Loops.clear(); 1339 LoopPreds.clear(); 1340 LoopConds.clear(); 1341 FlowSet.clear(); 1342 TermDL.clear(); 1343 1344 return true; 1345 } 1346 1347 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1348 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1349 } 1350 1351 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1352 Regions.push_back(&R); 1353 for (const auto &E : R) 1354 addRegionIntoQueue(*E, Regions); 1355 } 1356 1357 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) 1358 : SkipUniformRegions(SkipUniformRegions_) { 1359 if (ForceSkipUniformRegions.getNumOccurrences()) 1360 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 1361 } 1362 1363 void StructurizeCFGPass::printPipeline( 1364 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1365 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline( 1366 OS, MapClassName2PassName); 1367 if (SkipUniformRegions) 1368 OS << "<skip-uniform-regions>"; 1369 } 1370 1371 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1372 FunctionAnalysisManager &AM) { 1373 1374 bool Changed = false; 1375 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1376 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1377 1378 UniformityInfo *UI = nullptr; 1379 if (SkipUniformRegions) 1380 UI = &AM.getResult<UniformityInfoAnalysis>(F); 1381 1382 std::vector<Region *> Regions; 1383 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1384 while (!Regions.empty()) { 1385 Region *R = Regions.back(); 1386 Regions.pop_back(); 1387 1388 StructurizeCFG SCFG; 1389 SCFG.init(R); 1390 1391 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { 1392 Changed = true; // May have added metadata. 1393 continue; 1394 } 1395 1396 Changed |= SCFG.run(R, DT); 1397 } 1398 if (!Changed) 1399 return PreservedAnalyses::all(); 1400 PreservedAnalyses PA; 1401 PA.preserve<DominatorTreeAnalysis>(); 1402 return PA; 1403 } 1404