1 //===- R600MachineCFGStructurizer.cpp - CFG Structurizer ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/R600MCTargetDesc.h" 10 #include "R600.h" 11 #include "R600RegisterInfo.h" 12 #include "R600Subtarget.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/Statistic.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineJumpTableInfo.h" 18 #include "llvm/CodeGen/MachineLoopInfo.h" 19 #include "llvm/CodeGen/MachinePostDominators.h" 20 #include "llvm/InitializePasses.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "structcfg" 25 26 #define DEFAULT_VEC_SLOTS 8 27 28 // TODO: move-begin. 29 30 //===----------------------------------------------------------------------===// 31 // 32 // Statistics for CFGStructurizer. 33 // 34 //===----------------------------------------------------------------------===// 35 36 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 37 "matched"); 38 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 39 "matched"); 40 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 41 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 42 43 namespace llvm { 44 45 void initializeR600MachineCFGStructurizerPass(PassRegistry &); 46 47 } // end namespace llvm 48 49 namespace { 50 51 //===----------------------------------------------------------------------===// 52 // 53 // Miscellaneous utility for CFGStructurizer. 54 // 55 //===----------------------------------------------------------------------===// 56 57 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); 58 59 #define SHOWNEWBLK(b, msg) \ 60 LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 61 dbgs() << "\n";); 62 63 #define SHOWBLK_DETAIL(b, msg) \ 64 LLVM_DEBUG(if (b) { \ 65 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 66 b->print(dbgs()); \ 67 dbgs() << "\n"; \ 68 }); 69 70 #define INVALIDSCCNUM -1 71 72 //===----------------------------------------------------------------------===// 73 // 74 // supporting data structure for CFGStructurizer 75 // 76 //===----------------------------------------------------------------------===// 77 78 class BlockInformation { 79 public: 80 bool IsRetired = false; 81 int SccNum = INVALIDSCCNUM; 82 83 BlockInformation() = default; 84 }; 85 86 //===----------------------------------------------------------------------===// 87 // 88 // CFGStructurizer 89 // 90 //===----------------------------------------------------------------------===// 91 92 class R600MachineCFGStructurizer : public MachineFunctionPass { 93 public: 94 using MBBVector = SmallVector<MachineBasicBlock *, 32>; 95 using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; 96 using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; 97 98 enum PathToKind { 99 Not_SinglePath = 0, 100 SinglePath_InPath = 1, 101 SinglePath_NotInPath = 2 102 }; 103 104 static char ID; 105 106 R600MachineCFGStructurizer() : MachineFunctionPass(ID) { 107 initializeR600MachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 108 } 109 110 StringRef getPassName() const override { 111 return "AMDGPU Control Flow Graph structurizer Pass"; 112 } 113 114 void getAnalysisUsage(AnalysisUsage &AU) const override { 115 AU.addRequired<MachineDominatorTree>(); 116 AU.addRequired<MachinePostDominatorTree>(); 117 AU.addRequired<MachineLoopInfo>(); 118 MachineFunctionPass::getAnalysisUsage(AU); 119 } 120 121 /// Perform the CFG structurization 122 bool run(); 123 124 /// Perform the CFG preparation 125 /// This step will remove every unconditionnal/dead jump instructions and make 126 /// sure all loops have an exit block 127 bool prepare(); 128 129 bool runOnMachineFunction(MachineFunction &MF) override { 130 // FIXME: This pass causes verification failures. 131 MF.getProperties().set( 132 MachineFunctionProperties::Property::FailsVerification); 133 134 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 135 TRI = &TII->getRegisterInfo(); 136 LLVM_DEBUG(MF.dump();); 137 OrderedBlks.clear(); 138 Visited.clear(); 139 FuncRep = &MF; 140 MLI = &getAnalysis<MachineLoopInfo>(); 141 LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 142 MDT = &getAnalysis<MachineDominatorTree>(); 143 LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); 144 PDT = &getAnalysis<MachinePostDominatorTree>(); 145 LLVM_DEBUG(PDT->print(dbgs());); 146 prepare(); 147 run(); 148 LLVM_DEBUG(MF.dump();); 149 return true; 150 } 151 152 protected: 153 MachineDominatorTree *MDT; 154 MachinePostDominatorTree *PDT; 155 MachineLoopInfo *MLI; 156 const R600InstrInfo *TII = nullptr; 157 const R600RegisterInfo *TRI = nullptr; 158 159 // PRINT FUNCTIONS 160 /// Print the ordered Blocks. 161 void printOrderedBlocks() const { 162 size_t i = 0; 163 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 164 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 165 dbgs() << "BB" << (*iterBlk)->getNumber(); 166 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 167 if (i != 0 && i % 10 == 0) { 168 dbgs() << "\n"; 169 } else { 170 dbgs() << " "; 171 } 172 } 173 } 174 175 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 176 for (const MachineLoop *L : LoopInfo) 177 L->print(dbgs()); 178 } 179 180 // UTILITY FUNCTIONS 181 int getSCCNum(MachineBasicBlock *MBB) const; 182 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 183 bool hasBackEdge(MachineBasicBlock *MBB) const; 184 bool isRetiredBlock(MachineBasicBlock *MBB) const; 185 bool isActiveLoophead(MachineBasicBlock *MBB) const; 186 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 187 bool AllowSideEntry = true) const; 188 int countActiveBlock(MBBVector::const_iterator It, 189 MBBVector::const_iterator E) const; 190 bool needMigrateBlock(MachineBasicBlock *MBB) const; 191 192 // Utility Functions 193 void reversePredicateSetter(MachineBasicBlock::iterator I, 194 MachineBasicBlock &MBB); 195 /// Compute the reversed DFS post order of Blocks 196 void orderBlocks(MachineFunction *MF); 197 198 // Function originally from CFGStructTraits 199 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 200 const DebugLoc &DL = DebugLoc()); 201 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 202 const DebugLoc &DL = DebugLoc()); 203 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 204 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 205 const DebugLoc &DL); 206 void insertCondBranchBefore(MachineBasicBlock *MBB, 207 MachineBasicBlock::iterator I, int NewOpcode, 208 int RegNum, const DebugLoc &DL); 209 210 static int getBranchNzeroOpcode(int OldOpcode); 211 static int getBranchZeroOpcode(int OldOpcode); 212 static int getContinueNzeroOpcode(int OldOpcode); 213 static int getContinueZeroOpcode(int OldOpcode); 214 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 215 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 216 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 217 MachineInstr *MI); 218 static bool isCondBranch(MachineInstr *MI); 219 static bool isUncondBranch(MachineInstr *MI); 220 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 221 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 222 223 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 224 /// 225 /// BB with backward-edge could have move instructions after the branch 226 /// instruction. Such move instruction "belong to" the loop backward-edge. 227 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 228 229 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 230 static bool isReturnBlock(MachineBasicBlock *MBB); 231 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 232 MachineBasicBlock *SrcMBB); 233 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 234 235 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 236 /// because the AMDGPU instruction is not recognized as terminator fix this 237 /// and retire this routine 238 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 239 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 240 241 static void wrapup(MachineBasicBlock *MBB); 242 243 int patternMatch(MachineBasicBlock *MBB); 244 int patternMatchGroup(MachineBasicBlock *MBB); 245 int serialPatternMatch(MachineBasicBlock *MBB); 246 int ifPatternMatch(MachineBasicBlock *MBB); 247 int loopendPatternMatch(); 248 int mergeLoop(MachineLoop *LoopRep); 249 250 /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in 251 /// the same loop with LoopLandInfo without explicitly keeping track of 252 /// loopContBlks and loopBreakBlks, this is a method to get the information. 253 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 254 MachineBasicBlock *Src2MBB); 255 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 256 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 257 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 258 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 259 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 260 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 261 MachineBasicBlock **LandMBBPtr); 262 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 263 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 264 MachineBasicBlock *LandMBB, bool Detail = false); 265 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 266 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 267 void mergeSerialBlock(MachineBasicBlock *DstMBB, 268 MachineBasicBlock *SrcMBB); 269 270 void mergeIfthenelseBlock(MachineInstr *BranchMI, 271 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 272 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 273 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 274 MachineBasicBlock *LandMBB); 275 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 276 MachineBasicBlock *LandMBB); 277 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 278 MachineBasicBlock *ContMBB); 279 280 /// normalizeInfiniteLoopExit change 281 /// B1: 282 /// uncond_br LoopHeader 283 /// 284 /// to 285 /// B1: 286 /// cond_br 1 LoopHeader dummyExit 287 /// and return the newly added dummy exit block 288 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 289 void removeUnconditionalBranch(MachineBasicBlock *MBB); 290 291 /// Remove duplicate branches instructions in a block. 292 /// For instance 293 /// B0: 294 /// cond_br X B1 B2 295 /// cond_br X B1 B2 296 /// is transformed to 297 /// B0: 298 /// cond_br X B1 B2 299 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 300 301 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 302 void removeSuccessor(MachineBasicBlock *MBB); 303 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 304 MachineBasicBlock *PredMBB); 305 void migrateInstruction(MachineBasicBlock *SrcMBB, 306 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 307 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 308 void retireBlock(MachineBasicBlock *MBB); 309 310 private: 311 MBBInfoMap BlockInfoMap; 312 LoopLandInfoMap LLInfoMap; 313 std::map<MachineLoop *, bool> Visited; 314 MachineFunction *FuncRep; 315 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 316 }; 317 318 } // end anonymous namespace 319 320 char R600MachineCFGStructurizer::ID = 0; 321 322 int R600MachineCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 323 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 324 if (It == BlockInfoMap.end()) 325 return INVALIDSCCNUM; 326 return (*It).second->SccNum; 327 } 328 329 MachineBasicBlock *R600MachineCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 330 const { 331 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 332 if (It == LLInfoMap.end()) 333 return nullptr; 334 return (*It).second; 335 } 336 337 bool R600MachineCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 338 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 339 if (!LoopRep) 340 return false; 341 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 342 return MBB->isSuccessor(LoopHeader); 343 } 344 345 bool R600MachineCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 346 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 347 if (It == BlockInfoMap.end()) 348 return false; 349 return (*It).second->IsRetired; 350 } 351 352 bool R600MachineCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 353 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 354 while (LoopRep && LoopRep->getHeader() == MBB) { 355 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 356 if(!LoopLand) 357 return true; 358 if (!isRetiredBlock(LoopLand)) 359 return true; 360 LoopRep = LoopRep->getParentLoop(); 361 } 362 return false; 363 } 364 365 R600MachineCFGStructurizer::PathToKind R600MachineCFGStructurizer::singlePathTo( 366 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 367 bool AllowSideEntry) const { 368 assert(DstMBB); 369 if (SrcMBB == DstMBB) 370 return SinglePath_InPath; 371 while (SrcMBB && SrcMBB->succ_size() == 1) { 372 SrcMBB = *SrcMBB->succ_begin(); 373 if (SrcMBB == DstMBB) 374 return SinglePath_InPath; 375 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 376 return Not_SinglePath; 377 } 378 if (SrcMBB && SrcMBB->succ_size()==0) 379 return SinglePath_NotInPath; 380 return Not_SinglePath; 381 } 382 383 int R600MachineCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 384 MBBVector::const_iterator E) const { 385 int Count = 0; 386 while (It != E) { 387 if (!isRetiredBlock(*It)) 388 ++Count; 389 ++It; 390 } 391 return Count; 392 } 393 394 bool R600MachineCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 395 unsigned BlockSizeThreshold = 30; 396 unsigned CloneInstrThreshold = 100; 397 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 398 399 if(!MultiplePreds) 400 return false; 401 unsigned BlkSize = MBB->size(); 402 return ((BlkSize > BlockSizeThreshold) && 403 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 404 } 405 406 void R600MachineCFGStructurizer::reversePredicateSetter( 407 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 408 assert(I.isValid() && "Expected valid iterator"); 409 for (;; --I) { 410 if (I == MBB.end()) 411 continue; 412 if (I->getOpcode() == R600::PRED_X) { 413 switch (I->getOperand(2).getImm()) { 414 case R600::PRED_SETE_INT: 415 I->getOperand(2).setImm(R600::PRED_SETNE_INT); 416 return; 417 case R600::PRED_SETNE_INT: 418 I->getOperand(2).setImm(R600::PRED_SETE_INT); 419 return; 420 case R600::PRED_SETE: 421 I->getOperand(2).setImm(R600::PRED_SETNE); 422 return; 423 case R600::PRED_SETNE: 424 I->getOperand(2).setImm(R600::PRED_SETE); 425 return; 426 default: 427 llvm_unreachable("PRED_X Opcode invalid!"); 428 } 429 } 430 } 431 } 432 433 void R600MachineCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 434 int NewOpcode, const DebugLoc &DL) { 435 MachineInstr *MI = 436 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 437 MBB->push_back(MI); 438 //assume the instruction doesn't take any reg operand ... 439 SHOWNEWINSTR(MI); 440 } 441 442 MachineInstr *R600MachineCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 443 int NewOpcode, 444 const DebugLoc &DL) { 445 MachineInstr *MI = 446 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 447 if (!MBB->empty()) 448 MBB->insert(MBB->begin(), MI); 449 else 450 MBB->push_back(MI); 451 SHOWNEWINSTR(MI); 452 return MI; 453 } 454 455 MachineInstr *R600MachineCFGStructurizer::insertInstrBefore( 456 MachineBasicBlock::iterator I, int NewOpcode) { 457 MachineInstr *OldMI = &(*I); 458 MachineBasicBlock *MBB = OldMI->getParent(); 459 MachineInstr *NewMBB = 460 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 461 MBB->insert(I, NewMBB); 462 //assume the instruction doesn't take any reg operand ... 463 SHOWNEWINSTR(NewMBB); 464 return NewMBB; 465 } 466 467 void R600MachineCFGStructurizer::insertCondBranchBefore( 468 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 469 MachineInstr *OldMI = &(*I); 470 MachineBasicBlock *MBB = OldMI->getParent(); 471 MachineFunction *MF = MBB->getParent(); 472 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 473 MBB->insert(I, NewMI); 474 MachineInstrBuilder MIB(*MF, NewMI); 475 MIB.addReg(OldMI->getOperand(1).getReg(), false); 476 SHOWNEWINSTR(NewMI); 477 //erase later oldInstr->eraseFromParent(); 478 } 479 480 void R600MachineCFGStructurizer::insertCondBranchBefore( 481 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 482 int RegNum, const DebugLoc &DL) { 483 MachineFunction *MF = blk->getParent(); 484 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 485 //insert before 486 blk->insert(I, NewInstr); 487 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 488 SHOWNEWINSTR(NewInstr); 489 } 490 491 int R600MachineCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 492 switch(OldOpcode) { 493 case R600::JUMP_COND: 494 case R600::JUMP: return R600::IF_PREDICATE_SET; 495 case R600::BRANCH_COND_i32: 496 case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; 497 default: llvm_unreachable("internal error"); 498 } 499 return -1; 500 } 501 502 int R600MachineCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 503 switch(OldOpcode) { 504 case R600::JUMP_COND: 505 case R600::JUMP: return R600::IF_PREDICATE_SET; 506 case R600::BRANCH_COND_i32: 507 case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; 508 default: llvm_unreachable("internal error"); 509 } 510 return -1; 511 } 512 513 int R600MachineCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 514 switch(OldOpcode) { 515 case R600::JUMP_COND: 516 case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; 517 default: llvm_unreachable("internal error"); 518 } 519 return -1; 520 } 521 522 int R600MachineCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 523 switch(OldOpcode) { 524 case R600::JUMP_COND: 525 case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; 526 default: llvm_unreachable("internal error"); 527 } 528 return -1; 529 } 530 531 MachineBasicBlock *R600MachineCFGStructurizer::getTrueBranch(MachineInstr *MI) { 532 return MI->getOperand(0).getMBB(); 533 } 534 535 void R600MachineCFGStructurizer::setTrueBranch(MachineInstr *MI, 536 MachineBasicBlock *MBB) { 537 MI->getOperand(0).setMBB(MBB); 538 } 539 540 MachineBasicBlock * 541 R600MachineCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 542 MachineInstr *MI) { 543 assert(MBB->succ_size() == 2); 544 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 545 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 546 MachineBasicBlock::succ_iterator Next = It; 547 ++Next; 548 return (*It == TrueBranch) ? *Next : *It; 549 } 550 551 bool R600MachineCFGStructurizer::isCondBranch(MachineInstr *MI) { 552 switch (MI->getOpcode()) { 553 case R600::JUMP_COND: 554 case R600::BRANCH_COND_i32: 555 case R600::BRANCH_COND_f32: return true; 556 default: 557 return false; 558 } 559 return false; 560 } 561 562 bool R600MachineCFGStructurizer::isUncondBranch(MachineInstr *MI) { 563 switch (MI->getOpcode()) { 564 case R600::JUMP: 565 case R600::BRANCH: 566 return true; 567 default: 568 return false; 569 } 570 return false; 571 } 572 573 DebugLoc R600MachineCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 574 //get DebugLoc from the first MachineBasicBlock instruction with debug info 575 DebugLoc DL; 576 for (MachineInstr &MI : *MBB) 577 if (MI.getDebugLoc()) 578 DL = MI.getDebugLoc(); 579 return DL; 580 } 581 582 MachineInstr *R600MachineCFGStructurizer::getNormalBlockBranchInstr( 583 MachineBasicBlock *MBB) { 584 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 585 MachineInstr *MI = &*It; 586 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 587 return MI; 588 return nullptr; 589 } 590 591 MachineInstr *R600MachineCFGStructurizer::getLoopendBlockBranchInstr( 592 MachineBasicBlock *MBB) { 593 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 594 It != E; ++It) { 595 // FIXME: Simplify 596 MachineInstr *MI = &*It; 597 if (MI) { 598 if (isCondBranch(MI) || isUncondBranch(MI)) 599 return MI; 600 else if (!TII->isMov(MI->getOpcode())) 601 break; 602 } 603 } 604 return nullptr; 605 } 606 607 MachineInstr *R600MachineCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 608 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 609 if (It != MBB->rend()) { 610 MachineInstr *instr = &(*It); 611 if (instr->getOpcode() == R600::RETURN) 612 return instr; 613 } 614 return nullptr; 615 } 616 617 bool R600MachineCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 618 MachineInstr *MI = getReturnInstr(MBB); 619 bool IsReturn = MBB->succ_empty(); 620 if (MI) 621 assert(IsReturn); 622 else if (IsReturn) 623 LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() 624 << " is return block without RETURN instr\n";); 625 return IsReturn; 626 } 627 628 void R600MachineCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 629 MachineBasicBlock *SrcMBB) { 630 for (MachineBasicBlock *Succ : SrcMBB->successors()) 631 DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of 632 } 633 634 MachineBasicBlock *R600MachineCFGStructurizer::clone(MachineBasicBlock *MBB) { 635 MachineFunction *Func = MBB->getParent(); 636 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 637 Func->push_back(NewMBB); //insert to function 638 for (const MachineInstr &It : *MBB) 639 NewMBB->push_back(Func->CloneMachineInstr(&It)); 640 return NewMBB; 641 } 642 643 void R600MachineCFGStructurizer::replaceInstrUseOfBlockWith( 644 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 645 MachineBasicBlock *NewBlk) { 646 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 647 if (BranchMI && isCondBranch(BranchMI) && 648 getTrueBranch(BranchMI) == OldMBB) 649 setTrueBranch(BranchMI, NewBlk); 650 } 651 652 void R600MachineCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 653 assert((!MBB->getParent()->getJumpTableInfo() 654 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 655 && "found a jump table"); 656 657 //collect continue right before endloop 658 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 659 MachineBasicBlock::iterator Pre = MBB->begin(); 660 MachineBasicBlock::iterator E = MBB->end(); 661 MachineBasicBlock::iterator It = Pre; 662 while (It != E) { 663 if (Pre->getOpcode() == R600::CONTINUE 664 && It->getOpcode() == R600::ENDLOOP) 665 ContInstr.push_back(&*Pre); 666 Pre = It; 667 ++It; 668 } 669 670 //delete continue right before endloop 671 for (unsigned i = 0; i < ContInstr.size(); ++i) 672 ContInstr[i]->eraseFromParent(); 673 674 // TODO to fix up jump table so later phase won't be confused. if 675 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 676 // there isn't such an interface yet. alternatively, replace all the other 677 // blocks in the jump table with the entryBlk //} 678 } 679 680 bool R600MachineCFGStructurizer::prepare() { 681 bool Changed = false; 682 683 //FIXME: if not reducible flow graph, make it so ??? 684 685 LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::prepare\n";); 686 687 orderBlocks(FuncRep); 688 689 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 690 691 // Add an ExitBlk to loop that don't have one 692 for (MachineLoop *LoopRep : *MLI) { 693 MBBVector ExitingMBBs; 694 LoopRep->getExitingBlocks(ExitingMBBs); 695 696 if (ExitingMBBs.size() == 0) { 697 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 698 if (DummyExitBlk) 699 RetBlks.push_back(DummyExitBlk); 700 } 701 } 702 703 // Remove unconditional branch instr. 704 // Add dummy exit block iff there are multiple returns. 705 for (MachineBasicBlock *MBB : OrderedBlks) { 706 removeUnconditionalBranch(MBB); 707 removeRedundantConditionalBranch(MBB); 708 if (isReturnBlock(MBB)) { 709 RetBlks.push_back(MBB); 710 } 711 assert(MBB->succ_size() <= 2); 712 } 713 714 if (RetBlks.size() >= 2) { 715 addDummyExitBlock(RetBlks); 716 Changed = true; 717 } 718 719 return Changed; 720 } 721 722 bool R600MachineCFGStructurizer::run() { 723 //Assume reducible CFG... 724 LLVM_DEBUG(dbgs() << "R600MachineCFGStructurizer::run\n"); 725 726 #ifdef STRESSTEST 727 //Use the worse block ordering to test the algorithm. 728 ReverseVector(orderedBlks); 729 #endif 730 731 LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 732 int NumIter = 0; 733 bool Finish = false; 734 MachineBasicBlock *MBB; 735 bool MakeProgress = false; 736 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 737 OrderedBlks.end()); 738 739 do { 740 ++NumIter; 741 LLVM_DEBUG(dbgs() << "numIter = " << NumIter 742 << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); 743 744 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 745 OrderedBlks.begin(); 746 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 747 OrderedBlks.end(); 748 749 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 750 It; 751 MachineBasicBlock *SccBeginMBB = nullptr; 752 int SccNumBlk = 0; // The number of active blocks, init to a 753 // maximum possible number. 754 int SccNumIter; // Number of iteration in this SCC. 755 756 while (It != E) { 757 MBB = *It; 758 759 if (!SccBeginMBB) { 760 SccBeginIter = It; 761 SccBeginMBB = MBB; 762 SccNumIter = 0; 763 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 764 LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 765 dbgs() << "\n";); 766 } 767 768 if (!isRetiredBlock(MBB)) 769 patternMatch(MBB); 770 771 ++It; 772 773 bool ContNextScc = true; 774 if (It == E 775 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 776 // Just finish one scc. 777 ++SccNumIter; 778 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 779 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 780 LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 781 << ", sccNumIter = " << SccNumIter; 782 dbgs() << "doesn't make any progress\n";); 783 ContNextScc = true; 784 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 785 SccNumBlk = sccRemainedNumBlk; 786 It = SccBeginIter; 787 ContNextScc = false; 788 LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) 789 << "sccNumIter = " << SccNumIter << '\n';); 790 } else { 791 // Finish the current scc. 792 ContNextScc = true; 793 } 794 } else { 795 // Continue on next component in the current scc. 796 ContNextScc = false; 797 } 798 799 if (ContNextScc) 800 SccBeginMBB = nullptr; 801 } //while, "one iteration" over the function. 802 803 MachineBasicBlock *EntryMBB = 804 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 805 if (EntryMBB->succ_empty()) { 806 Finish = true; 807 LLVM_DEBUG(dbgs() << "Reduce to one block\n";); 808 } else { 809 int NewnumRemainedBlk 810 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 811 // consider cloned blocks ?? 812 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 813 MakeProgress = true; 814 NumRemainedBlk = NewnumRemainedBlk; 815 } else { 816 MakeProgress = false; 817 LLVM_DEBUG(dbgs() << "No progress\n";); 818 } 819 } 820 } while (!Finish && MakeProgress); 821 822 // Misc wrap up to maintain the consistency of the Function representation. 823 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 824 825 // Detach retired Block, release memory. 826 for (auto &It : BlockInfoMap) { 827 if (It.second && It.second->IsRetired) { 828 assert((It.first)->getNumber() != -1); 829 LLVM_DEBUG(dbgs() << "Erase BB" << (It.first)->getNumber() << "\n";); 830 It.first->eraseFromParent(); // Remove from the parent Function. 831 } 832 delete It.second; 833 } 834 BlockInfoMap.clear(); 835 LLInfoMap.clear(); 836 837 if (!Finish) { 838 LLVM_DEBUG(FuncRep->viewCFG()); 839 report_fatal_error("IRREDUCIBLE_CFG"); 840 } 841 842 return true; 843 } 844 845 void R600MachineCFGStructurizer::orderBlocks(MachineFunction *MF) { 846 int SccNum = 0; 847 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 848 ++It, ++SccNum) { 849 const std::vector<MachineBasicBlock *> &SccNext = *It; 850 for (MachineBasicBlock *MBB : SccNext) { 851 OrderedBlks.push_back(MBB); 852 recordSccnum(MBB, SccNum); 853 } 854 } 855 856 // walk through all the block in func to check for unreachable 857 for (auto *MBB : nodes(MF)) { 858 SccNum = getSCCNum(MBB); 859 if (SccNum == INVALIDSCCNUM) 860 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 861 } 862 } 863 864 int R600MachineCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 865 int NumMatch = 0; 866 int CurMatch; 867 868 LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); 869 870 while ((CurMatch = patternMatchGroup(MBB)) > 0) 871 NumMatch += CurMatch; 872 873 LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() 874 << ", numMatch = " << NumMatch << "\n";); 875 876 return NumMatch; 877 } 878 879 int R600MachineCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 880 int NumMatch = 0; 881 NumMatch += loopendPatternMatch(); 882 NumMatch += serialPatternMatch(MBB); 883 NumMatch += ifPatternMatch(MBB); 884 return NumMatch; 885 } 886 887 int R600MachineCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 888 if (MBB->succ_size() != 1) 889 return 0; 890 891 MachineBasicBlock *childBlk = *MBB->succ_begin(); 892 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 893 return 0; 894 895 mergeSerialBlock(MBB, childBlk); 896 ++numSerialPatternMatch; 897 return 1; 898 } 899 900 int R600MachineCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 901 //two edges 902 if (MBB->succ_size() != 2) 903 return 0; 904 if (hasBackEdge(MBB)) 905 return 0; 906 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 907 if (!BranchMI) 908 return 0; 909 910 assert(isCondBranch(BranchMI)); 911 int NumMatch = 0; 912 913 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 914 NumMatch += serialPatternMatch(TrueMBB); 915 NumMatch += ifPatternMatch(TrueMBB); 916 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 917 NumMatch += serialPatternMatch(FalseMBB); 918 NumMatch += ifPatternMatch(FalseMBB); 919 MachineBasicBlock *LandBlk; 920 int Cloned = 0; 921 922 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 923 // TODO: Simplify 924 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 925 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 926 // Diamond pattern 927 LandBlk = *TrueMBB->succ_begin(); 928 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 929 // Triangle pattern, false is empty 930 LandBlk = FalseMBB; 931 FalseMBB = nullptr; 932 } else if (FalseMBB->succ_size() == 1 933 && *FalseMBB->succ_begin() == TrueMBB) { 934 // Triangle pattern, true is empty 935 // We reverse the predicate to make a triangle, empty false pattern; 936 std::swap(TrueMBB, FalseMBB); 937 reversePredicateSetter(MBB->end(), *MBB); 938 LandBlk = FalseMBB; 939 FalseMBB = nullptr; 940 } else if (FalseMBB->succ_size() == 1 941 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 942 LandBlk = *FalseMBB->succ_begin(); 943 } else if (TrueMBB->succ_size() == 1 944 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 945 LandBlk = *TrueMBB->succ_begin(); 946 } else { 947 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 948 } 949 950 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 951 // new BB created for landBlk==NULL may introduce new challenge to the 952 // reduction process. 953 if (LandBlk && 954 ((TrueMBB && TrueMBB->pred_size() > 1) 955 || (FalseMBB && FalseMBB->pred_size() > 1))) { 956 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 957 } 958 959 if (TrueMBB && TrueMBB->pred_size() > 1) { 960 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 961 ++Cloned; 962 } 963 964 if (FalseMBB && FalseMBB->pred_size() > 1) { 965 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 966 ++Cloned; 967 } 968 969 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 970 971 ++numIfPatternMatch; 972 973 numClonedBlock += Cloned; 974 975 return 1 + Cloned + NumMatch; 976 } 977 978 int R600MachineCFGStructurizer::loopendPatternMatch() { 979 std::deque<MachineLoop *> NestedLoops; 980 for (auto &It: *MLI) 981 for (MachineLoop *ML : depth_first(It)) 982 NestedLoops.push_front(ML); 983 984 if (NestedLoops.empty()) 985 return 0; 986 987 // Process nested loop outside->inside (we did push_front), 988 // so "continue" to a outside loop won't be mistaken as "break" 989 // of the current loop. 990 int Num = 0; 991 for (MachineLoop *ExaminedLoop : NestedLoops) { 992 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 993 continue; 994 LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 995 int NumBreak = mergeLoop(ExaminedLoop); 996 if (NumBreak == -1) 997 break; 998 Num += NumBreak; 999 } 1000 return Num; 1001 } 1002 1003 int R600MachineCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1004 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1005 MBBVector ExitingMBBs; 1006 LoopRep->getExitingBlocks(ExitingMBBs); 1007 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1008 LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() 1009 << " exiting blocks\n";); 1010 // We assume a single ExitBlk 1011 MBBVector ExitBlks; 1012 LoopRep->getExitBlocks(ExitBlks); 1013 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1014 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1015 ExitBlkSet.insert(ExitBlks[i]); 1016 assert(ExitBlkSet.size() == 1); 1017 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1018 assert(ExitBlk && "Loop has several exit block"); 1019 MBBVector LatchBlks; 1020 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1021 if (LoopRep->contains(LB)) 1022 LatchBlks.push_back(LB); 1023 1024 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1025 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1026 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1027 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1028 int Match = 0; 1029 do { 1030 Match = 0; 1031 Match += serialPatternMatch(LoopHeader); 1032 Match += ifPatternMatch(LoopHeader); 1033 } while (Match > 0); 1034 mergeLooplandBlock(LoopHeader, ExitBlk); 1035 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1036 if (ParentLoop) 1037 MLI->changeLoopFor(LoopHeader, ParentLoop); 1038 else 1039 MLI->removeBlock(LoopHeader); 1040 Visited[LoopRep] = true; 1041 return 1; 1042 } 1043 1044 bool R600MachineCFGStructurizer::isSameloopDetachedContbreak( 1045 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1046 if (Src1MBB->succ_empty()) { 1047 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1048 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1049 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1050 if (TheEntry) { 1051 LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" 1052 << Src1MBB->getNumber() << " src2 = BB" 1053 << Src2MBB->getNumber() << "\n";); 1054 return true; 1055 } 1056 } 1057 } 1058 return false; 1059 } 1060 1061 int R600MachineCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1062 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1063 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1064 if (Num == 0) { 1065 LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" 1066 << "\n";); 1067 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1068 } 1069 return Num; 1070 } 1071 1072 int R600MachineCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1073 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1074 int Num = 0; 1075 MachineBasicBlock *DownBlk; 1076 1077 //trueBlk could be the common post dominator 1078 DownBlk = TrueMBB; 1079 1080 LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1081 << " true = BB" << TrueMBB->getNumber() 1082 << ", numSucc=" << TrueMBB->succ_size() << " false = BB" 1083 << FalseMBB->getNumber() << "\n";); 1084 1085 while (DownBlk) { 1086 LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); 1087 1088 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1089 LLVM_DEBUG(dbgs() << " working\n";); 1090 1091 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1092 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1093 1094 numClonedBlock += Num; 1095 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1096 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1097 Num += ifPatternMatch(HeadMBB); 1098 assert(Num > 0); 1099 1100 break; 1101 } 1102 LLVM_DEBUG(dbgs() << " not working\n";); 1103 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1104 } // walk down the postDomTree 1105 1106 return Num; 1107 } 1108 1109 #ifndef NDEBUG 1110 void R600MachineCFGStructurizer::showImproveSimpleJumpintoIf( 1111 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1112 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1113 dbgs() << "head = BB" << HeadMBB->getNumber() 1114 << " size = " << HeadMBB->size(); 1115 if (Detail) { 1116 dbgs() << "\n"; 1117 HeadMBB->print(dbgs()); 1118 dbgs() << "\n"; 1119 } 1120 1121 if (TrueMBB) { 1122 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1123 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1124 if (Detail) { 1125 dbgs() << "\n"; 1126 TrueMBB->print(dbgs()); 1127 dbgs() << "\n"; 1128 } 1129 } 1130 if (FalseMBB) { 1131 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1132 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1133 if (Detail) { 1134 dbgs() << "\n"; 1135 FalseMBB->print(dbgs()); 1136 dbgs() << "\n"; 1137 } 1138 } 1139 if (LandMBB) { 1140 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1141 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1142 if (Detail) { 1143 dbgs() << "\n"; 1144 LandMBB->print(dbgs()); 1145 dbgs() << "\n"; 1146 } 1147 } 1148 1149 dbgs() << "\n"; 1150 } 1151 #endif 1152 1153 int R600MachineCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1154 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1155 MachineBasicBlock **LandMBBPtr) { 1156 bool MigrateTrue = false; 1157 bool MigrateFalse = false; 1158 1159 MachineBasicBlock *LandBlk = *LandMBBPtr; 1160 1161 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1162 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1163 1164 if (TrueMBB == FalseMBB) 1165 return 0; 1166 1167 MigrateTrue = needMigrateBlock(TrueMBB); 1168 MigrateFalse = needMigrateBlock(FalseMBB); 1169 1170 if (!MigrateTrue && !MigrateFalse) 1171 return 0; 1172 1173 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1174 // have more than one predecessors. without doing this, its predecessor 1175 // rather than headBlk will have undefined value in initReg. 1176 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1177 MigrateTrue = true; 1178 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1179 MigrateFalse = true; 1180 1181 LLVM_DEBUG( 1182 dbgs() << "before improveSimpleJumpintoIf: "; 1183 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1184 1185 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1186 // 1187 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1188 // {initReg = 0; org falseBlk branch } 1189 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1190 // => org landBlk 1191 // if landBlk->pred_size() > 2, put the about if-else inside 1192 // if (initReg !=2) {...} 1193 // 1194 // add initReg = initVal to headBlk 1195 1196 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1197 if (!MigrateTrue || !MigrateFalse) { 1198 // XXX: We have an opportunity here to optimize the "branch into if" case 1199 // here. Branch into if looks like this: 1200 // entry 1201 // / | 1202 // diamond_head branch_from 1203 // / \ | 1204 // diamond_false diamond_true 1205 // \ / 1206 // done 1207 // 1208 // The diamond_head block begins the "if" and the diamond_true block 1209 // is the block being "branched into". 1210 // 1211 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1212 // and if MigrateFalse is true, then FalseBB is the block being 1213 // "branched into" 1214 // 1215 // Here is the pseudo code for how I think the optimization should work: 1216 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1217 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1218 // 3. Move the branch instruction from diamond_head into its own basic 1219 // block (new_block). 1220 // 4. Add an unconditional branch from diamond_head to new_block 1221 // 5. Replace the branch instruction in branch_from with an unconditional 1222 // branch to new_block. If branch_from has multiple predecessors, then 1223 // we need to replace the True/False block in the branch 1224 // instruction instead of replacing it. 1225 // 6. Change the condition of the branch instruction in new_block from 1226 // COND to (COND || GPR0) 1227 // 1228 // In order insert these MOV instruction, we will need to use the 1229 // RegisterScavenger. Usually liveness stops being tracked during 1230 // the late machine optimization passes, however if we implement 1231 // bool TargetRegisterInfo::requiresRegisterScavenging( 1232 // const MachineFunction &MF) 1233 // and have it return true, liveness will be tracked correctly 1234 // by generic optimization passes. We will also need to make sure that 1235 // all of our target-specific passes that run after regalloc and before 1236 // the CFGStructurizer track liveness and we will need to modify this pass 1237 // to correctly track liveness. 1238 // 1239 // After the above changes, the new CFG should look like this: 1240 // entry 1241 // / | 1242 // diamond_head branch_from 1243 // \ / 1244 // new_block 1245 // / | 1246 // diamond_false diamond_true 1247 // \ / 1248 // done 1249 // 1250 // Without this optimization, we are forced to duplicate the diamond_true 1251 // block and we will end up with a CFG like this: 1252 // 1253 // entry 1254 // / | 1255 // diamond_head branch_from 1256 // / \ | 1257 // diamond_false diamond_true diamond_true (duplicate) 1258 // \ / | 1259 // done --------------------| 1260 // 1261 // Duplicating diamond_true can be very costly especially if it has a 1262 // lot of instructions. 1263 return 0; 1264 } 1265 1266 int NumNewBlk = 0; 1267 1268 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1269 1270 //insert R600::ENDIF to avoid special case "input landBlk == NULL" 1271 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); 1272 1273 if (LandBlkHasOtherPred) { 1274 report_fatal_error("Extra register needed to handle CFG"); 1275 Register CmpResReg = 1276 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1277 report_fatal_error("Extra compare instruction needed to handle CFG"); 1278 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, 1279 CmpResReg, DebugLoc()); 1280 } 1281 1282 // XXX: We are running this after RA, so creating virtual registers will 1283 // cause an assertion failure in the PostRA scheduling pass. 1284 Register InitReg = 1285 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1286 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, 1287 DebugLoc()); 1288 1289 if (MigrateTrue) { 1290 migrateInstruction(TrueMBB, LandBlk, I); 1291 // need to uncondionally insert the assignment to ensure a path from its 1292 // predecessor rather than headBlk has valid value in initReg if 1293 // (initVal != 1). 1294 report_fatal_error("Extra register needed to handle CFG"); 1295 } 1296 insertInstrBefore(I, R600::ELSE); 1297 1298 if (MigrateFalse) { 1299 migrateInstruction(FalseMBB, LandBlk, I); 1300 // need to uncondionally insert the assignment to ensure a path from its 1301 // predecessor rather than headBlk has valid value in initReg if 1302 // (initVal != 0) 1303 report_fatal_error("Extra register needed to handle CFG"); 1304 } 1305 1306 if (LandBlkHasOtherPred) { 1307 // add endif 1308 insertInstrBefore(I, R600::ENDIF); 1309 1310 // put initReg = 2 to other predecessors of landBlk 1311 for (MachineBasicBlock *MBB : LandBlk->predecessors()) 1312 if (MBB != TrueMBB && MBB != FalseMBB) 1313 report_fatal_error("Extra register needed to handle CFG"); 1314 } 1315 LLVM_DEBUG( 1316 dbgs() << "result from improveSimpleJumpintoIf: "; 1317 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1318 1319 // update landBlk 1320 *LandMBBPtr = LandBlk; 1321 1322 return NumNewBlk; 1323 } 1324 1325 void R600MachineCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1326 MachineBasicBlock *SrcMBB) { 1327 LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" 1328 << SrcMBB->getNumber() << "\n";); 1329 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1330 1331 DstMBB->removeSuccessor(SrcMBB, true); 1332 cloneSuccessorList(DstMBB, SrcMBB); 1333 1334 removeSuccessor(SrcMBB); 1335 MLI->removeBlock(SrcMBB); 1336 retireBlock(SrcMBB); 1337 } 1338 1339 void R600MachineCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1340 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1341 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1342 assert (TrueMBB); 1343 LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; 1344 if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() 1345 << " } else "; 1346 dbgs() << "{ "; if (FalseMBB) { 1347 dbgs() << "BB" << FalseMBB->getNumber(); 1348 } dbgs() << " }\n "; 1349 dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { 1350 dbgs() << "BB" << LandMBB->getNumber(); 1351 } dbgs() << "\n";); 1352 1353 int OldOpcode = BranchMI->getOpcode(); 1354 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1355 1356 // transform to 1357 // if cond 1358 // trueBlk 1359 // else 1360 // falseBlk 1361 // endif 1362 // landBlk 1363 1364 MachineBasicBlock::iterator I = BranchMI; 1365 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1366 BranchDL); 1367 1368 if (TrueMBB) { 1369 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1370 MBB->removeSuccessor(TrueMBB, true); 1371 if (LandMBB && TrueMBB->succ_size()!=0) 1372 TrueMBB->removeSuccessor(LandMBB, true); 1373 retireBlock(TrueMBB); 1374 MLI->removeBlock(TrueMBB); 1375 } 1376 1377 if (FalseMBB) { 1378 insertInstrBefore(I, R600::ELSE); 1379 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1380 FalseMBB->end()); 1381 MBB->removeSuccessor(FalseMBB, true); 1382 if (LandMBB && !FalseMBB->succ_empty()) 1383 FalseMBB->removeSuccessor(LandMBB, true); 1384 retireBlock(FalseMBB); 1385 MLI->removeBlock(FalseMBB); 1386 } 1387 insertInstrBefore(I, R600::ENDIF); 1388 1389 BranchMI->eraseFromParent(); 1390 1391 if (LandMBB && TrueMBB && FalseMBB) 1392 MBB->addSuccessor(LandMBB); 1393 } 1394 1395 void R600MachineCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1396 MachineBasicBlock *LandMBB) { 1397 LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1398 << " land = BB" << LandMBB->getNumber() << "\n";); 1399 1400 insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); 1401 insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); 1402 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1403 } 1404 1405 void R600MachineCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1406 MachineBasicBlock *LandMBB) { 1407 LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" 1408 << ExitingMBB->getNumber() << " land = BB" 1409 << LandMBB->getNumber() << "\n";); 1410 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1411 assert(BranchMI && isCondBranch(BranchMI)); 1412 DebugLoc DL = BranchMI->getDebugLoc(); 1413 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1414 MachineBasicBlock::iterator I = BranchMI; 1415 if (TrueBranch != LandMBB) 1416 reversePredicateSetter(I, *I->getParent()); 1417 insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); 1418 insertInstrBefore(I, R600::BREAK); 1419 insertInstrBefore(I, R600::ENDIF); 1420 //now branchInst can be erase safely 1421 BranchMI->eraseFromParent(); 1422 //now take care of successors, retire blocks 1423 ExitingMBB->removeSuccessor(LandMBB, true); 1424 } 1425 1426 void R600MachineCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1427 MachineBasicBlock *ContMBB) { 1428 LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1429 << ContingMBB->getNumber() << ", cont = BB" 1430 << ContMBB->getNumber() << "\n";); 1431 1432 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1433 if (MI) { 1434 assert(isCondBranch(MI)); 1435 MachineBasicBlock::iterator I = MI; 1436 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1437 int OldOpcode = MI->getOpcode(); 1438 DebugLoc DL = MI->getDebugLoc(); 1439 1440 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1441 1442 if (!UseContinueLogical) { 1443 int BranchOpcode = 1444 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1445 getBranchZeroOpcode(OldOpcode); 1446 insertCondBranchBefore(I, BranchOpcode, DL); 1447 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1448 insertInstrEnd(ContingMBB, R600::CONTINUE, DL); 1449 insertInstrEnd(ContingMBB, R600::ENDIF, DL); 1450 } else { 1451 int BranchOpcode = 1452 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1453 getContinueZeroOpcode(OldOpcode); 1454 insertCondBranchBefore(I, BranchOpcode, DL); 1455 } 1456 1457 MI->eraseFromParent(); 1458 } else { 1459 // if we've arrived here then we've already erased the branch instruction 1460 // travel back up the basic block to see the last reference of our debug 1461 // location we've just inserted that reference here so it should be 1462 // representative insertEnd to ensure phi-moves, if exist, go before the 1463 // continue-instr. 1464 insertInstrEnd(ContingMBB, R600::CONTINUE, 1465 getLastDebugLocInBB(ContingMBB)); 1466 } 1467 } 1468 1469 int R600MachineCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1470 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1471 int Cloned = 0; 1472 assert(PreMBB->isSuccessor(SrcMBB)); 1473 while (SrcMBB && SrcMBB != DstMBB) { 1474 assert(SrcMBB->succ_size() == 1); 1475 if (SrcMBB->pred_size() > 1) { 1476 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1477 ++Cloned; 1478 } 1479 1480 PreMBB = SrcMBB; 1481 SrcMBB = *SrcMBB->succ_begin(); 1482 } 1483 1484 return Cloned; 1485 } 1486 1487 MachineBasicBlock * 1488 R600MachineCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1489 MachineBasicBlock *PredMBB) { 1490 assert(PredMBB->isSuccessor(MBB) && "succBlk is not a predecessor of curBlk"); 1491 1492 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1493 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1494 //srcBlk, oldBlk, newBlk 1495 1496 PredMBB->replaceSuccessor(MBB, CloneMBB); 1497 1498 // add all successor to cloneBlk 1499 cloneSuccessorList(CloneMBB, MBB); 1500 1501 numClonedInstr += MBB->size(); 1502 1503 LLVM_DEBUG(dbgs() << "Cloned block: " 1504 << "BB" << MBB->getNumber() << "size " << MBB->size() 1505 << "\n";); 1506 1507 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1508 1509 return CloneMBB; 1510 } 1511 1512 void R600MachineCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1513 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1514 MachineBasicBlock::iterator SpliceEnd; 1515 //look for the input branchinstr, not the AMDGPU branchinstr 1516 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1517 if (!BranchMI) { 1518 LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); 1519 SpliceEnd = SrcMBB->end(); 1520 } else { 1521 LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1522 SpliceEnd = BranchMI; 1523 } 1524 LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " 1525 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1526 << "\n";); 1527 1528 //splice insert before insertPos 1529 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1530 1531 LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " 1532 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1533 << '\n';); 1534 } 1535 1536 MachineBasicBlock * 1537 R600MachineCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1538 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1539 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1540 1541 if (!LoopHeader || !LoopLatch) 1542 return nullptr; 1543 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1544 // Is LoopRep an infinite loop ? 1545 if (!BranchMI || !isUncondBranch(BranchMI)) 1546 return nullptr; 1547 1548 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1549 FuncRep->push_back(DummyExitBlk); //insert to function 1550 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1551 LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1552 LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); 1553 Ctx.emitError("Extra register needed to handle CFG"); 1554 return nullptr; 1555 } 1556 1557 void R600MachineCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1558 MachineInstr *BranchMI; 1559 1560 // I saw two unconditional branch in one basic block in example 1561 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1562 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1563 && isUncondBranch(BranchMI)) { 1564 LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1565 BranchMI->eraseFromParent(); 1566 } 1567 } 1568 1569 void R600MachineCFGStructurizer::removeRedundantConditionalBranch( 1570 MachineBasicBlock *MBB) { 1571 if (MBB->succ_size() != 2) 1572 return; 1573 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1574 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1575 if (MBB1 != MBB2) 1576 return; 1577 1578 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1579 assert(BranchMI && isCondBranch(BranchMI)); 1580 LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1581 BranchMI->eraseFromParent(); 1582 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1583 MBB->removeSuccessor(MBB1, true); 1584 } 1585 1586 void R600MachineCFGStructurizer::addDummyExitBlock( 1587 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1588 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1589 FuncRep->push_back(DummyExitBlk); //insert to function 1590 insertInstrEnd(DummyExitBlk, R600::RETURN); 1591 1592 for (MachineBasicBlock *MBB : RetMBB) { 1593 if (MachineInstr *MI = getReturnInstr(MBB)) 1594 MI->eraseFromParent(); 1595 MBB->addSuccessor(DummyExitBlk); 1596 LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1597 << " successors\n";); 1598 } 1599 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1600 } 1601 1602 void R600MachineCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1603 while (MBB->succ_size()) 1604 MBB->removeSuccessor(*MBB->succ_begin()); 1605 } 1606 1607 void R600MachineCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1608 int SccNum) { 1609 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1610 if (!srcBlkInfo) 1611 srcBlkInfo = new BlockInformation(); 1612 srcBlkInfo->SccNum = SccNum; 1613 } 1614 1615 void R600MachineCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1616 LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); 1617 1618 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1619 1620 if (!SrcBlkInfo) 1621 SrcBlkInfo = new BlockInformation(); 1622 1623 SrcBlkInfo->IsRetired = true; 1624 assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet"); 1625 } 1626 1627 INITIALIZE_PASS_BEGIN(R600MachineCFGStructurizer, "amdgpustructurizer", 1628 "AMDGPU CFG Structurizer", false, false) 1629 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1630 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1631 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1632 INITIALIZE_PASS_END(R600MachineCFGStructurizer, "amdgpustructurizer", 1633 "AMDGPU CFG Structurizer", false, false) 1634 1635 FunctionPass *llvm::createR600MachineCFGStructurizerPass() { 1636 return new R600MachineCFGStructurizer(); 1637 } 1638