1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/MapVector.h" 11 #include "llvm/ADT/PostOrderIterator.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 17 #include "llvm/Analysis/LoopInfo.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/IR/Argument.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CFG.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Use.h" 35 #include "llvm/IR/User.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/IR/ValueHandle.h" 38 #include "llvm/InitializePasses.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/Transforms/Utils/SSAUpdater.h" 49 #include <algorithm> 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 static const char *const FlowBlockName = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 using BBPredicates = DenseMap<BasicBlock *, Value *>; 90 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 91 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 92 93 /// Finds the nearest common dominator of a set of BasicBlocks. 94 /// 95 /// For every BB you add to the set, you can specify whether we "remember" the 96 /// block. When you get the common dominator, you can also ask whether it's one 97 /// of the blocks we remembered. 98 class NearestCommonDominator { 99 DominatorTree *DT; 100 BasicBlock *Result = nullptr; 101 bool ResultIsRemembered = false; 102 103 /// Add BB to the resulting dominator. 104 void addBlock(BasicBlock *BB, bool Remember) { 105 if (!Result) { 106 Result = BB; 107 ResultIsRemembered = Remember; 108 return; 109 } 110 111 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 112 if (NewResult != Result) 113 ResultIsRemembered = false; 114 if (NewResult == BB) 115 ResultIsRemembered |= Remember; 116 Result = NewResult; 117 } 118 119 public: 120 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 121 122 void addBlock(BasicBlock *BB) { 123 addBlock(BB, /* Remember = */ false); 124 } 125 126 void addAndRememberBlock(BasicBlock *BB) { 127 addBlock(BB, /* Remember = */ true); 128 } 129 130 /// Get the nearest common dominator of all the BBs added via addBlock() and 131 /// addAndRememberBlock(). 132 BasicBlock *result() { return Result; } 133 134 /// Is the BB returned by getResult() one of the blocks we added to the set 135 /// with addAndRememberBlock()? 136 bool resultIsRememberedBlock() { return ResultIsRemembered; } 137 }; 138 139 /// Transforms the control flow graph on one single entry/exit region 140 /// at a time. 141 /// 142 /// After the transform all "If"/"Then"/"Else" style control flow looks like 143 /// this: 144 /// 145 /// \verbatim 146 /// 1 147 /// || 148 /// | | 149 /// 2 | 150 /// | / 151 /// |/ 152 /// 3 153 /// || Where: 154 /// | | 1 = "If" block, calculates the condition 155 /// 4 | 2 = "Then" subregion, runs if the condition is true 156 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 157 /// |/ 4 = "Else" optional subregion, runs if the condition is false 158 /// 5 5 = "End" block, also rejoins the control flow 159 /// \endverbatim 160 /// 161 /// Control flow is expressed as a branch where the true exit goes into the 162 /// "Then"/"Else" region, while the false exit skips the region 163 /// The condition for the optional "Else" region is expressed as a PHI node. 164 /// The incoming values of the PHI node are true for the "If" edge and false 165 /// for the "Then" edge. 166 /// 167 /// Additionally to that even complicated loops look like this: 168 /// 169 /// \verbatim 170 /// 1 171 /// || 172 /// | | 173 /// 2 ^ Where: 174 /// | / 1 = "Entry" block 175 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 176 /// 3 3 = "Flow" block, with back edge to entry block 177 /// | 178 /// \endverbatim 179 /// 180 /// The back edge of the "Flow" block is always on the false side of the branch 181 /// while the true side continues the general flow. So the loop condition 182 /// consist of a network of PHI nodes where the true incoming values expresses 183 /// breaks and the false values expresses continue states. 184 class StructurizeCFG : public RegionPass { 185 bool SkipUniformRegions; 186 187 Type *Boolean; 188 ConstantInt *BoolTrue; 189 ConstantInt *BoolFalse; 190 UndefValue *BoolUndef; 191 192 Function *Func; 193 Region *ParentRegion; 194 195 LegacyDivergenceAnalysis *DA; 196 DominatorTree *DT; 197 LoopInfo *LI; 198 199 SmallVector<RegionNode *, 8> Order; 200 BBSet Visited; 201 202 SmallVector<WeakVH, 8> AffectedPhis; 203 BBPhiMap DeletedPhis; 204 BB2BBVecMap AddedPhis; 205 206 PredMap Predicates; 207 BranchVector Conditions; 208 209 BB2BBMap Loops; 210 PredMap LoopPreds; 211 BranchVector LoopConds; 212 213 RegionNode *PrevNode; 214 215 void orderNodes(); 216 217 Loop *getAdjustedLoop(RegionNode *RN); 218 219 void analyzeLoops(RegionNode *N); 220 221 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 222 223 void gatherPredicates(RegionNode *N); 224 225 void collectInfos(); 226 227 void insertConditions(bool Loops); 228 229 void delPhiValues(BasicBlock *From, BasicBlock *To); 230 231 void addPhiValues(BasicBlock *From, BasicBlock *To); 232 233 void setPhiValues(); 234 235 void simplifyAffectedPhis(); 236 237 void killTerminator(BasicBlock *BB); 238 239 void changeExit(RegionNode *Node, BasicBlock *NewExit, 240 bool IncludeDominator); 241 242 BasicBlock *getNextFlow(BasicBlock *Dominator); 243 244 BasicBlock *needPrefix(bool NeedEmpty); 245 246 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 247 248 void setPrevNode(BasicBlock *BB); 249 250 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 251 252 bool isPredictableTrue(RegionNode *Node); 253 254 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 255 256 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 257 258 void createFlow(); 259 260 void rebuildSSA(); 261 262 public: 263 static char ID; 264 265 explicit StructurizeCFG(bool SkipUniformRegions_ = false) 266 : RegionPass(ID), 267 SkipUniformRegions(SkipUniformRegions_) { 268 if (ForceSkipUniformRegions.getNumOccurrences()) 269 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 270 initializeStructurizeCFGPass(*PassRegistry::getPassRegistry()); 271 } 272 273 bool doInitialization(Region *R, RGPassManager &RGM) override; 274 275 bool runOnRegion(Region *R, RGPassManager &RGM) override; 276 277 StringRef getPassName() const override { return "Structurize control flow"; } 278 279 void getAnalysisUsage(AnalysisUsage &AU) const override { 280 if (SkipUniformRegions) 281 AU.addRequired<LegacyDivergenceAnalysis>(); 282 AU.addRequiredID(LowerSwitchID); 283 AU.addRequired<DominatorTreeWrapperPass>(); 284 AU.addRequired<LoopInfoWrapperPass>(); 285 286 AU.addPreserved<DominatorTreeWrapperPass>(); 287 RegionPass::getAnalysisUsage(AU); 288 } 289 }; 290 291 } // end anonymous namespace 292 293 char StructurizeCFG::ID = 0; 294 295 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG", 296 false, false) 297 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 298 INITIALIZE_PASS_DEPENDENCY(LowerSwitch) 299 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 300 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 301 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG", 302 false, false) 303 304 /// Initialize the types and constants used in the pass 305 bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { 306 LLVMContext &Context = R->getEntry()->getContext(); 307 308 Boolean = Type::getInt1Ty(Context); 309 BoolTrue = ConstantInt::getTrue(Context); 310 BoolFalse = ConstantInt::getFalse(Context); 311 BoolUndef = UndefValue::get(Boolean); 312 313 return false; 314 } 315 316 /// Use the exit block to determine the loop if RN is a SubRegion. 317 Loop *StructurizeCFG::getAdjustedLoop(RegionNode *RN) { 318 if (RN->isSubRegion()) { 319 Region *SubRegion = RN->getNodeAs<Region>(); 320 return LI->getLoopFor(SubRegion->getExit()); 321 } 322 323 return LI->getLoopFor(RN->getEntry()); 324 } 325 326 /// Build up the general order of nodes, by performing a topology sort of the 327 /// parent region's nodes, while ensuring that there is no outer loop node 328 /// between any two inner loop nodes. 329 void StructurizeCFG::orderNodes() { 330 SmallVector<RegionNode *, 32> POT; 331 SmallDenseMap<Loop *, unsigned, 8> LoopSizes; 332 for (RegionNode *RN : post_order(ParentRegion)) { 333 POT.push_back(RN); 334 335 // Accumulate the number of nodes inside the region that belong to a loop. 336 Loop *Loop = getAdjustedLoop(RN); 337 ++LoopSizes[Loop]; 338 } 339 // A quick exit for the case where all nodes belong to the same loop (or no 340 // loop at all). 341 if (LoopSizes.size() <= 1U) { 342 Order.assign(POT.begin(), POT.end()); 343 return; 344 } 345 Order.resize(POT.size()); 346 347 // The post-order traversal of the list gives us an ordering close to what we 348 // want. The only problem with it is that sometimes backedges for outer loops 349 // will be visited before backedges for inner loops. So now we fix that by 350 // inserting the nodes in order, while making sure that encountered inner loop 351 // are complete before their parents (outer loops). 352 353 SmallVector<Loop *, 8> WorkList; 354 // Get the size of the outermost region (the nodes that don't belong to any 355 // loop inside ParentRegion). 356 unsigned ZeroCurrentLoopSize = 0U; 357 auto LSI = LoopSizes.find(nullptr); 358 unsigned *CurrentLoopSize = 359 LSI != LoopSizes.end() ? &LSI->second : &ZeroCurrentLoopSize; 360 Loop *CurrentLoop = nullptr; 361 362 // The "skipped" list is actually located at the (reversed) beginning of the 363 // POT. This saves us the use of an intermediate container. 364 // Note that there is always enough room, for the skipped nodes, before the 365 // current location, as we have just passed at least that amount of nodes. 366 367 auto Begin = POT.rbegin(); 368 auto I = Begin, SkippedEnd = Begin; 369 auto O = Order.rbegin(), OE = Order.rend(); 370 while (O != OE) { 371 // If we have any skipped nodes, then erase the gap between the end of the 372 // "skipped" list, and the current location. 373 if (SkippedEnd != Begin) { 374 POT.erase(I.base(), SkippedEnd.base()); 375 I = SkippedEnd = Begin = POT.rbegin(); 376 } 377 378 // Keep processing outer loops, in order (from inner most, to outer). 379 if (!WorkList.empty()) { 380 CurrentLoop = WorkList.pop_back_val(); 381 CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; 382 } 383 384 // Keep processing loops while only going deeper (into inner loops). 385 do { 386 assert(I != POT.rend()); 387 RegionNode *RN = *I++; 388 389 Loop *L = getAdjustedLoop(RN); 390 if (L != CurrentLoop) { 391 // If L is a loop inside CurrentLoop, then CurrentLoop must be the 392 // parent of L. 393 // To prove this, we will contradict the opposite: 394 // Let P be the parent of L. If CurrentLoop is the parent of P, then 395 // the header of P must have been processed already, as it must 396 // dominate the other blocks of P (otherwise P is an irreducible loop, 397 // and won't be recorded in the LoopInfo), especially L (inside). But 398 // then CurrentLoop must have been updated to P at the time of 399 // processing the header of P, which conflicts with the assumption 400 // that CurrentLoop is not P. 401 402 // If L is not a loop inside CurrentLoop, then skip RN. 403 if (!L || L->getParentLoop() != CurrentLoop) { 404 // Skip the node by pushing it to the end of the "skipped" list. 405 *SkippedEnd++ = RN; 406 continue; 407 } 408 409 // If we still haven't processed all the nodes that belong to 410 // CurrentLoop, then make sure we come back later, to finish the job, by 411 // pushing it to the WorkList. 412 if (*CurrentLoopSize) 413 WorkList.push_back(CurrentLoop); 414 415 CurrentLoop = L; 416 CurrentLoopSize = &LoopSizes.find(CurrentLoop)->second; 417 } 418 419 assert(O != OE); 420 *O++ = RN; 421 422 // If we have finished processing the current loop, then we are done here. 423 --*CurrentLoopSize; 424 } while (*CurrentLoopSize); 425 } 426 assert(WorkList.empty()); 427 assert(SkippedEnd == Begin); 428 } 429 430 /// Determine the end of the loops 431 void StructurizeCFG::analyzeLoops(RegionNode *N) { 432 if (N->isSubRegion()) { 433 // Test for exit as back edge 434 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 435 if (Visited.count(Exit)) 436 Loops[Exit] = N->getEntry(); 437 438 } else { 439 // Test for successors as back edge 440 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 441 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 442 443 for (BasicBlock *Succ : Term->successors()) 444 if (Visited.count(Succ)) 445 Loops[Succ] = BB; 446 } 447 } 448 449 /// Build the condition for one edge 450 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 451 bool Invert) { 452 Value *Cond = Invert ? BoolFalse : BoolTrue; 453 if (Term->isConditional()) { 454 Cond = Term->getCondition(); 455 456 if (Idx != (unsigned)Invert) 457 Cond = invertCondition(Cond); 458 } 459 return Cond; 460 } 461 462 /// Analyze the predecessors of each block and build up predicates 463 void StructurizeCFG::gatherPredicates(RegionNode *N) { 464 RegionInfo *RI = ParentRegion->getRegionInfo(); 465 BasicBlock *BB = N->getEntry(); 466 BBPredicates &Pred = Predicates[BB]; 467 BBPredicates &LPred = LoopPreds[BB]; 468 469 for (BasicBlock *P : predecessors(BB)) { 470 // Ignore it if it's a branch from outside into our region entry 471 if (!ParentRegion->contains(P)) 472 continue; 473 474 Region *R = RI->getRegionFor(P); 475 if (R == ParentRegion) { 476 // It's a top level block in our region 477 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 478 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 479 BasicBlock *Succ = Term->getSuccessor(i); 480 if (Succ != BB) 481 continue; 482 483 if (Visited.count(P)) { 484 // Normal forward edge 485 if (Term->isConditional()) { 486 // Try to treat it like an ELSE block 487 BasicBlock *Other = Term->getSuccessor(!i); 488 if (Visited.count(Other) && !Loops.count(Other) && 489 !Pred.count(Other) && !Pred.count(P)) { 490 491 Pred[Other] = BoolFalse; 492 Pred[P] = BoolTrue; 493 continue; 494 } 495 } 496 Pred[P] = buildCondition(Term, i, false); 497 } else { 498 // Back edge 499 LPred[P] = buildCondition(Term, i, true); 500 } 501 } 502 } else { 503 // It's an exit from a sub region 504 while (R->getParent() != ParentRegion) 505 R = R->getParent(); 506 507 // Edge from inside a subregion to its entry, ignore it 508 if (*R == *N) 509 continue; 510 511 BasicBlock *Entry = R->getEntry(); 512 if (Visited.count(Entry)) 513 Pred[Entry] = BoolTrue; 514 else 515 LPred[Entry] = BoolFalse; 516 } 517 } 518 } 519 520 /// Collect various loop and predicate infos 521 void StructurizeCFG::collectInfos() { 522 // Reset predicate 523 Predicates.clear(); 524 525 // and loop infos 526 Loops.clear(); 527 LoopPreds.clear(); 528 529 // Reset the visited nodes 530 Visited.clear(); 531 532 for (RegionNode *RN : reverse(Order)) { 533 LLVM_DEBUG(dbgs() << "Visiting: " 534 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 535 << RN->getEntry()->getName() << " Loop Depth: " 536 << LI->getLoopDepth(RN->getEntry()) << "\n"); 537 538 // Analyze all the conditions leading to a node 539 gatherPredicates(RN); 540 541 // Remember that we've seen this node 542 Visited.insert(RN->getEntry()); 543 544 // Find the last back edges 545 analyzeLoops(RN); 546 } 547 } 548 549 /// Insert the missing branch conditions 550 void StructurizeCFG::insertConditions(bool Loops) { 551 BranchVector &Conds = Loops ? LoopConds : Conditions; 552 Value *Default = Loops ? BoolTrue : BoolFalse; 553 SSAUpdater PhiInserter; 554 555 for (BranchInst *Term : Conds) { 556 assert(Term->isConditional()); 557 558 BasicBlock *Parent = Term->getParent(); 559 BasicBlock *SuccTrue = Term->getSuccessor(0); 560 BasicBlock *SuccFalse = Term->getSuccessor(1); 561 562 PhiInserter.Initialize(Boolean, ""); 563 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 564 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 565 566 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 567 568 NearestCommonDominator Dominator(DT); 569 Dominator.addBlock(Parent); 570 571 Value *ParentValue = nullptr; 572 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 573 BasicBlock *BB = BBAndPred.first; 574 Value *Pred = BBAndPred.second; 575 576 if (BB == Parent) { 577 ParentValue = Pred; 578 break; 579 } 580 PhiInserter.AddAvailableValue(BB, Pred); 581 Dominator.addAndRememberBlock(BB); 582 } 583 584 if (ParentValue) { 585 Term->setCondition(ParentValue); 586 } else { 587 if (!Dominator.resultIsRememberedBlock()) 588 PhiInserter.AddAvailableValue(Dominator.result(), Default); 589 590 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 591 } 592 } 593 } 594 595 /// Remove all PHI values coming from "From" into "To" and remember 596 /// them in DeletedPhis 597 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 598 PhiMap &Map = DeletedPhis[To]; 599 for (PHINode &Phi : To->phis()) { 600 bool Recorded = false; 601 while (Phi.getBasicBlockIndex(From) != -1) { 602 Value *Deleted = Phi.removeIncomingValue(From, false); 603 Map[&Phi].push_back(std::make_pair(From, Deleted)); 604 if (!Recorded) { 605 AffectedPhis.push_back(&Phi); 606 Recorded = true; 607 } 608 } 609 } 610 } 611 612 /// Add a dummy PHI value as soon as we knew the new predecessor 613 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 614 for (PHINode &Phi : To->phis()) { 615 Value *Undef = UndefValue::get(Phi.getType()); 616 Phi.addIncoming(Undef, From); 617 } 618 AddedPhis[To].push_back(From); 619 } 620 621 /// Add the real PHI value as soon as everything is set up 622 void StructurizeCFG::setPhiValues() { 623 SmallVector<PHINode *, 8> InsertedPhis; 624 SSAUpdater Updater(&InsertedPhis); 625 for (const auto &AddedPhi : AddedPhis) { 626 BasicBlock *To = AddedPhi.first; 627 const BBVector &From = AddedPhi.second; 628 629 if (!DeletedPhis.count(To)) 630 continue; 631 632 PhiMap &Map = DeletedPhis[To]; 633 for (const auto &PI : Map) { 634 PHINode *Phi = PI.first; 635 Value *Undef = UndefValue::get(Phi->getType()); 636 Updater.Initialize(Phi->getType(), ""); 637 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 638 Updater.AddAvailableValue(To, Undef); 639 640 NearestCommonDominator Dominator(DT); 641 Dominator.addBlock(To); 642 for (const auto &VI : PI.second) { 643 Updater.AddAvailableValue(VI.first, VI.second); 644 Dominator.addAndRememberBlock(VI.first); 645 } 646 647 if (!Dominator.resultIsRememberedBlock()) 648 Updater.AddAvailableValue(Dominator.result(), Undef); 649 650 for (BasicBlock *FI : From) 651 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 652 AffectedPhis.push_back(Phi); 653 } 654 655 DeletedPhis.erase(To); 656 } 657 assert(DeletedPhis.empty()); 658 659 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 660 } 661 662 void StructurizeCFG::simplifyAffectedPhis() { 663 bool Changed; 664 do { 665 Changed = false; 666 SimplifyQuery Q(Func->getParent()->getDataLayout()); 667 Q.DT = DT; 668 for (WeakVH VH : AffectedPhis) { 669 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 670 if (auto NewValue = SimplifyInstruction(Phi, Q)) { 671 Phi->replaceAllUsesWith(NewValue); 672 Phi->eraseFromParent(); 673 Changed = true; 674 } 675 } 676 } 677 } while (Changed); 678 } 679 680 /// Remove phi values from all successors and then remove the terminator. 681 void StructurizeCFG::killTerminator(BasicBlock *BB) { 682 Instruction *Term = BB->getTerminator(); 683 if (!Term) 684 return; 685 686 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); 687 SI != SE; ++SI) 688 delPhiValues(BB, *SI); 689 690 if (DA) 691 DA->removeValue(Term); 692 Term->eraseFromParent(); 693 } 694 695 /// Let node exit(s) point to NewExit 696 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 697 bool IncludeDominator) { 698 if (Node->isSubRegion()) { 699 Region *SubRegion = Node->getNodeAs<Region>(); 700 BasicBlock *OldExit = SubRegion->getExit(); 701 BasicBlock *Dominator = nullptr; 702 703 // Find all the edges from the sub region to the exit 704 for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) { 705 // Incrememt BBI before mucking with BB's terminator. 706 BasicBlock *BB = *BBI++; 707 708 if (!SubRegion->contains(BB)) 709 continue; 710 711 // Modify the edges to point to the new exit 712 delPhiValues(BB, OldExit); 713 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 714 addPhiValues(BB, NewExit); 715 716 // Find the new dominator (if requested) 717 if (IncludeDominator) { 718 if (!Dominator) 719 Dominator = BB; 720 else 721 Dominator = DT->findNearestCommonDominator(Dominator, BB); 722 } 723 } 724 725 // Change the dominator (if requested) 726 if (Dominator) 727 DT->changeImmediateDominator(NewExit, Dominator); 728 729 // Update the region info 730 SubRegion->replaceExit(NewExit); 731 } else { 732 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 733 killTerminator(BB); 734 BranchInst::Create(NewExit, BB); 735 addPhiValues(BB, NewExit); 736 if (IncludeDominator) 737 DT->changeImmediateDominator(NewExit, BB); 738 } 739 } 740 741 /// Create a new flow node and update dominator tree and region info 742 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 743 LLVMContext &Context = Func->getContext(); 744 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 745 Order.back()->getEntry(); 746 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 747 Func, Insert); 748 DT->addNewBlock(Flow, Dominator); 749 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 750 return Flow; 751 } 752 753 /// Create a new or reuse the previous node as flow node 754 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 755 BasicBlock *Entry = PrevNode->getEntry(); 756 757 if (!PrevNode->isSubRegion()) { 758 killTerminator(Entry); 759 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 760 return Entry; 761 } 762 763 // create a new flow node 764 BasicBlock *Flow = getNextFlow(Entry); 765 766 // and wire it up 767 changeExit(PrevNode, Flow, true); 768 PrevNode = ParentRegion->getBBNode(Flow); 769 return Flow; 770 } 771 772 /// Returns the region exit if possible, otherwise just a new flow node 773 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 774 bool ExitUseAllowed) { 775 if (!Order.empty() || !ExitUseAllowed) 776 return getNextFlow(Flow); 777 778 BasicBlock *Exit = ParentRegion->getExit(); 779 DT->changeImmediateDominator(Exit, Flow); 780 addPhiValues(Flow, Exit); 781 return Exit; 782 } 783 784 /// Set the previous node 785 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 786 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 787 : nullptr; 788 } 789 790 /// Does BB dominate all the predicates of Node? 791 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 792 BBPredicates &Preds = Predicates[Node->getEntry()]; 793 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 794 return DT->dominates(BB, Pred.first); 795 }); 796 } 797 798 /// Can we predict that this node will always be called? 799 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 800 BBPredicates &Preds = Predicates[Node->getEntry()]; 801 bool Dominated = false; 802 803 // Regionentry is always true 804 if (!PrevNode) 805 return true; 806 807 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 808 BasicBlock *BB = Pred.first; 809 Value *V = Pred.second; 810 811 if (V != BoolTrue) 812 return false; 813 814 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 815 Dominated = true; 816 } 817 818 // TODO: The dominator check is too strict 819 return Dominated; 820 } 821 822 /// Take one node from the order vector and wire it up 823 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 824 BasicBlock *LoopEnd) { 825 RegionNode *Node = Order.pop_back_val(); 826 Visited.insert(Node->getEntry()); 827 828 if (isPredictableTrue(Node)) { 829 // Just a linear flow 830 if (PrevNode) { 831 changeExit(PrevNode, Node->getEntry(), true); 832 } 833 PrevNode = Node; 834 } else { 835 // Insert extra prefix node (or reuse last one) 836 BasicBlock *Flow = needPrefix(false); 837 838 // Insert extra postfix node (or use exit instead) 839 BasicBlock *Entry = Node->getEntry(); 840 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 841 842 // let it point to entry and next block 843 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 844 addPhiValues(Flow, Entry); 845 DT->changeImmediateDominator(Entry, Flow); 846 847 PrevNode = Node; 848 while (!Order.empty() && !Visited.count(LoopEnd) && 849 dominatesPredicates(Entry, Order.back())) { 850 handleLoops(false, LoopEnd); 851 } 852 853 changeExit(PrevNode, Next, false); 854 setPrevNode(Next); 855 } 856 } 857 858 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 859 BasicBlock *LoopEnd) { 860 RegionNode *Node = Order.back(); 861 BasicBlock *LoopStart = Node->getEntry(); 862 863 if (!Loops.count(LoopStart)) { 864 wireFlow(ExitUseAllowed, LoopEnd); 865 return; 866 } 867 868 if (!isPredictableTrue(Node)) 869 LoopStart = needPrefix(true); 870 871 LoopEnd = Loops[Node->getEntry()]; 872 wireFlow(false, LoopEnd); 873 while (!Visited.count(LoopEnd)) { 874 handleLoops(false, LoopEnd); 875 } 876 877 // If the start of the loop is the entry block, we can't branch to it so 878 // insert a new dummy entry block. 879 Function *LoopFunc = LoopStart->getParent(); 880 if (LoopStart == &LoopFunc->getEntryBlock()) { 881 LoopStart->setName("entry.orig"); 882 883 BasicBlock *NewEntry = 884 BasicBlock::Create(LoopStart->getContext(), 885 "entry", 886 LoopFunc, 887 LoopStart); 888 BranchInst::Create(LoopStart, NewEntry); 889 DT->setNewRoot(NewEntry); 890 } 891 892 // Create an extra loop end node 893 LoopEnd = needPrefix(false); 894 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 895 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 896 BoolUndef, LoopEnd)); 897 addPhiValues(LoopEnd, LoopStart); 898 setPrevNode(Next); 899 } 900 901 /// After this function control flow looks like it should be, but 902 /// branches and PHI nodes only have undefined conditions. 903 void StructurizeCFG::createFlow() { 904 BasicBlock *Exit = ParentRegion->getExit(); 905 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 906 907 AffectedPhis.clear(); 908 DeletedPhis.clear(); 909 AddedPhis.clear(); 910 Conditions.clear(); 911 LoopConds.clear(); 912 913 PrevNode = nullptr; 914 Visited.clear(); 915 916 while (!Order.empty()) { 917 handleLoops(EntryDominatesExit, nullptr); 918 } 919 920 if (PrevNode) 921 changeExit(PrevNode, Exit, EntryDominatesExit); 922 else 923 assert(EntryDominatesExit); 924 } 925 926 /// Handle a rare case where the disintegrated nodes instructions 927 /// no longer dominate all their uses. Not sure if this is really necessary 928 void StructurizeCFG::rebuildSSA() { 929 SSAUpdater Updater; 930 for (BasicBlock *BB : ParentRegion->blocks()) 931 for (Instruction &I : *BB) { 932 bool Initialized = false; 933 // We may modify the use list as we iterate over it, so be careful to 934 // compute the next element in the use list at the top of the loop. 935 for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) { 936 Use &U = *UI++; 937 Instruction *User = cast<Instruction>(U.getUser()); 938 if (User->getParent() == BB) { 939 continue; 940 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 941 if (UserPN->getIncomingBlock(U) == BB) 942 continue; 943 } 944 945 if (DT->dominates(&I, User)) 946 continue; 947 948 if (!Initialized) { 949 Value *Undef = UndefValue::get(I.getType()); 950 Updater.Initialize(I.getType(), ""); 951 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 952 Updater.AddAvailableValue(BB, &I); 953 Initialized = true; 954 } 955 Updater.RewriteUseAfterInsertions(U); 956 } 957 } 958 } 959 960 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 961 const LegacyDivergenceAnalysis &DA) { 962 // Bool for if all sub-regions are uniform. 963 bool SubRegionsAreUniform = true; 964 // Count of how many direct children are conditional. 965 unsigned ConditionalDirectChildren = 0; 966 967 for (auto E : R->elements()) { 968 if (!E->isSubRegion()) { 969 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 970 if (!Br || !Br->isConditional()) 971 continue; 972 973 if (!DA.isUniform(Br)) 974 return false; 975 976 // One of our direct children is conditional. 977 ConditionalDirectChildren++; 978 979 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 980 << " has uniform terminator\n"); 981 } else { 982 // Explicitly refuse to treat regions as uniform if they have non-uniform 983 // subregions. We cannot rely on DivergenceAnalysis for branches in 984 // subregions because those branches may have been removed and re-created, 985 // so we look for our metadata instead. 986 // 987 // Warning: It would be nice to treat regions as uniform based only on 988 // their direct child basic blocks' terminators, regardless of whether 989 // subregions are uniform or not. However, this requires a very careful 990 // look at SIAnnotateControlFlow to make sure nothing breaks there. 991 for (auto BB : E->getNodeAs<Region>()->blocks()) { 992 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 993 if (!Br || !Br->isConditional()) 994 continue; 995 996 if (!Br->getMetadata(UniformMDKindID)) { 997 // Early exit if we cannot have relaxed uniform regions. 998 if (!RelaxedUniformRegions) 999 return false; 1000 1001 SubRegionsAreUniform = false; 1002 break; 1003 } 1004 } 1005 } 1006 } 1007 1008 // Our region is uniform if: 1009 // 1. All conditional branches that are direct children are uniform (checked 1010 // above). 1011 // 2. And either: 1012 // a. All sub-regions are uniform. 1013 // b. There is one or less conditional branches among the direct children. 1014 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1015 } 1016 1017 /// Run the transformation for each region found 1018 bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { 1019 if (R->isTopLevelRegion()) 1020 return false; 1021 1022 DA = nullptr; 1023 1024 if (SkipUniformRegions) { 1025 // TODO: We could probably be smarter here with how we handle sub-regions. 1026 // We currently rely on the fact that metadata is set by earlier invocations 1027 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1028 // but we shouldn't rely on metadata for correctness! 1029 unsigned UniformMDKindID = 1030 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1031 DA = &getAnalysis<LegacyDivergenceAnalysis>(); 1032 1033 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 1034 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1035 << '\n'); 1036 1037 // Mark all direct child block terminators as having been treated as 1038 // uniform. To account for a possible future in which non-uniform 1039 // sub-regions are treated more cleverly, indirect children are not 1040 // marked as uniform. 1041 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1042 for (RegionNode *E : R->elements()) { 1043 if (E->isSubRegion()) 1044 continue; 1045 1046 if (Instruction *Term = E->getEntry()->getTerminator()) 1047 Term->setMetadata(UniformMDKindID, MD); 1048 } 1049 1050 return false; 1051 } 1052 } 1053 1054 Func = R->getEntry()->getParent(); 1055 ParentRegion = R; 1056 1057 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1058 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1059 1060 orderNodes(); 1061 collectInfos(); 1062 createFlow(); 1063 insertConditions(false); 1064 insertConditions(true); 1065 setPhiValues(); 1066 simplifyAffectedPhis(); 1067 rebuildSSA(); 1068 1069 // Cleanup 1070 Order.clear(); 1071 Visited.clear(); 1072 DeletedPhis.clear(); 1073 AddedPhis.clear(); 1074 Predicates.clear(); 1075 Conditions.clear(); 1076 Loops.clear(); 1077 LoopPreds.clear(); 1078 LoopConds.clear(); 1079 1080 return true; 1081 } 1082 1083 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1084 return new StructurizeCFG(SkipUniformRegions); 1085 } 1086