1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else 11 /// structures and waterfall loops. 12 /// 13 /// When we do structurization, we usually transform an if-else into two 14 /// successive if-then (with a flow block to do predicate inversion). Consider a 15 /// simple case after structurization: A divergent value %a was defined before 16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part: 17 /// bb.if: 18 /// %a = ... 19 /// ... 20 /// bb.then: 21 /// ... = op %a 22 /// ... // %a can be dead here 23 /// bb.flow: 24 /// ... 25 /// bb.else: 26 /// ... = %a 27 /// ... 28 /// bb.endif 29 /// 30 /// As register allocator has no idea of the thread-control-flow, it will just 31 /// assume %a would be alive in the whole range of bb.then because of a later 32 /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect 33 /// to exec mask. For this if-else case, the lanes active in bb.then will be 34 /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead 35 /// after the last use in bb.then until the end of the block. The reason is 36 /// the instructions in bb.then will only overwrite lanes that will never be 37 /// accessed in bb.else. 38 /// 39 /// This pass aims to tell register allocator that %a is in-fact dead, 40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming 41 /// from bb.then, and then replace the uses in the bb.else with the result of 42 /// newly inserted phi. 43 /// 44 /// Two key conditions must be met to ensure correctness: 45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make 46 /// sure the second loop iteration still get correct data. 47 /// 2.) There should be no further uses after the IF-ELSE region. 48 /// 49 /// 50 /// Waterfall loops get inserted around instructions that use divergent values 51 /// but can only be executed with a uniform value. For example an indirect call 52 /// to a divergent address: 53 /// bb.start: 54 /// %a = ... 55 /// %fun = ... 56 /// ... 57 /// bb.loop: 58 /// call %fun (%a) 59 /// ... // %a can be dead here 60 /// loop %bb.loop 61 /// 62 /// The loop block is executed multiple times, but it is run exactly once for 63 /// each active lane. Similar to the if-else case, the register allocator 64 /// assumes that %a is live throughout the loop as it is used again in the next 65 /// iteration. If %a is a VGPR that is unused after the loop, it does not need 66 /// to be live after its last use in the loop block. By inserting a phi-node at 67 /// the start of bb.loop that is undef when coming from bb.loop, the register 68 /// allocation knows that the value of %a does not need to be preserved through 69 /// iterations of the loop. 70 /// 71 // 72 //===----------------------------------------------------------------------===// 73 74 #include "SIOptimizeVGPRLiveRange.h" 75 #include "AMDGPU.h" 76 #include "GCNSubtarget.h" 77 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 78 #include "SIMachineFunctionInfo.h" 79 #include "llvm/CodeGen/LiveVariables.h" 80 #include "llvm/CodeGen/MachineDominators.h" 81 #include "llvm/CodeGen/MachineLoopInfo.h" 82 #include "llvm/CodeGen/TargetRegisterInfo.h" 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "si-opt-vgpr-liverange" 87 88 namespace { 89 90 class SIOptimizeVGPRLiveRange { 91 private: 92 const SIRegisterInfo *TRI = nullptr; 93 const SIInstrInfo *TII = nullptr; 94 LiveVariables *LV = nullptr; 95 MachineDominatorTree *MDT = nullptr; 96 const MachineLoopInfo *Loops = nullptr; 97 MachineRegisterInfo *MRI = nullptr; 98 99 public: 100 SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT, 101 MachineLoopInfo *Loops) 102 : LV(LV), MDT(MDT), Loops(Loops) {} 103 bool run(MachineFunction &MF); 104 105 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; 106 107 void collectElseRegionBlocks(MachineBasicBlock *Flow, 108 MachineBasicBlock *Endif, 109 SmallSetVector<MachineBasicBlock *, 16> &) const; 110 111 void 112 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, 113 MachineBasicBlock *Endif, 114 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 115 SmallVectorImpl<Register> &CandidateRegs) const; 116 117 void collectWaterfallCandidateRegisters( 118 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 119 SmallSetVector<Register, 16> &CandidateRegs, 120 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 121 SmallVectorImpl<MachineInstr *> &Instructions) const; 122 123 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, 124 SmallVectorImpl<MachineInstr *> &Uses) const; 125 126 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, 127 MachineBasicBlock *Flow) const; 128 129 void updateLiveRangeInElseRegion( 130 Register Reg, Register NewReg, MachineBasicBlock *Flow, 131 MachineBasicBlock *Endif, 132 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 133 134 void 135 optimizeLiveRange(Register Reg, MachineBasicBlock *If, 136 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 137 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 138 139 void optimizeWaterfallLiveRange( 140 Register Reg, MachineBasicBlock *LoopHeader, 141 SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, 142 SmallVectorImpl<MachineInstr *> &Instructions) const; 143 }; 144 145 class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass { 146 public: 147 static char ID; 148 149 SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {} 150 151 bool runOnMachineFunction(MachineFunction &MF) override; 152 153 StringRef getPassName() const override { 154 return "SI Optimize VGPR LiveRange"; 155 } 156 157 void getAnalysisUsage(AnalysisUsage &AU) const override { 158 AU.setPreservesCFG(); 159 AU.addRequired<LiveVariablesWrapperPass>(); 160 AU.addRequired<MachineDominatorTreeWrapperPass>(); 161 AU.addRequired<MachineLoopInfoWrapperPass>(); 162 AU.addPreserved<LiveVariablesWrapperPass>(); 163 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 164 AU.addPreserved<MachineLoopInfoWrapperPass>(); 165 MachineFunctionPass::getAnalysisUsage(AU); 166 } 167 168 MachineFunctionProperties getRequiredProperties() const override { 169 return MachineFunctionProperties().set( 170 MachineFunctionProperties::Property::IsSSA); 171 } 172 173 MachineFunctionProperties getClearedProperties() const override { 174 return MachineFunctionProperties().set( 175 MachineFunctionProperties::Property::NoPHIs); 176 } 177 }; 178 179 } // end anonymous namespace 180 181 // Check whether the MBB is a else flow block and get the branching target which 182 // is the Endif block 183 MachineBasicBlock * 184 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { 185 for (auto &BR : MBB->terminators()) { 186 if (BR.getOpcode() == AMDGPU::SI_ELSE) 187 return BR.getOperand(2).getMBB(); 188 } 189 return nullptr; 190 } 191 192 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( 193 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 194 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { 195 assert(Flow != Endif); 196 197 MachineBasicBlock *MBB = Endif; 198 unsigned Cur = 0; 199 while (MBB) { 200 for (auto *Pred : MBB->predecessors()) { 201 if (Pred != Flow) 202 Blocks.insert(Pred); 203 } 204 205 if (Cur < Blocks.size()) 206 MBB = Blocks[Cur++]; 207 else 208 MBB = nullptr; 209 } 210 211 LLVM_DEBUG({ 212 dbgs() << "Found Else blocks: "; 213 for (auto *MBB : Blocks) 214 dbgs() << printMBBReference(*MBB) << ' '; 215 dbgs() << '\n'; 216 }); 217 } 218 219 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. 220 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( 221 Register Reg, MachineBasicBlock *MBB, 222 SmallVectorImpl<MachineInstr *> &Uses) const { 223 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { 224 if (UseMI.getParent() == MBB && !UseMI.isPHI()) 225 Uses.push_back(&UseMI); 226 } 227 } 228 229 /// Collect the killed registers in the ELSE region which are not alive through 230 /// the whole THEN region. 231 void SIOptimizeVGPRLiveRange::collectCandidateRegisters( 232 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, 233 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 234 SmallVectorImpl<Register> &CandidateRegs) const { 235 236 SmallSet<Register, 8> KillsInElse; 237 238 for (auto *Else : ElseBlocks) { 239 for (auto &MI : Else->instrs()) { 240 if (MI.isDebugInstr()) 241 continue; 242 243 for (auto &MO : MI.operands()) { 244 if (!MO.isReg() || !MO.getReg() || MO.isDef()) 245 continue; 246 247 Register MOReg = MO.getReg(); 248 // We can only optimize AGPR/VGPR virtual register 249 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 250 continue; 251 252 if (MO.readsReg()) { 253 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 254 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 255 // Make sure two conditions are met: 256 // a.) the value is defined before/in the IF block 257 // b.) should be defined in the same loop-level. 258 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 259 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { 260 // Check if the register is live into the endif block. If not, 261 // consider it killed in the else region. 262 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 263 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { 264 KillsInElse.insert(MOReg); 265 } else { 266 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) 267 << " as Live in Endif\n"); 268 } 269 } 270 } 271 } 272 } 273 } 274 275 // Check the phis in the Endif, looking for value coming from the ELSE 276 // region. Make sure the phi-use is the last use. 277 for (auto &MI : Endif->phis()) { 278 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 279 auto &MO = MI.getOperand(Idx); 280 auto *Pred = MI.getOperand(Idx + 1).getMBB(); 281 if (Pred == Flow) 282 continue; 283 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); 284 285 if (!MO.isReg() || !MO.getReg() || MO.isUndef()) 286 continue; 287 288 Register Reg = MO.getReg(); 289 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) 290 continue; 291 292 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 293 294 if (VI.isLiveIn(*Endif, Reg, *MRI)) { 295 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) 296 << " as Live in Endif\n"); 297 continue; 298 } 299 // Make sure two conditions are met: 300 // a.) the value is defined before/in the IF block 301 // b.) should be defined in the same loop-level. 302 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); 303 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 304 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 305 KillsInElse.insert(Reg); 306 } 307 } 308 309 auto IsLiveThroughThen = [&](Register Reg) { 310 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 311 ++I) { 312 if (!I->readsReg()) 313 continue; 314 auto *UseMI = I->getParent(); 315 auto *UseMBB = UseMI->getParent(); 316 if (UseMBB == Flow || UseMBB == Endif) { 317 if (!UseMI->isPHI()) 318 return true; 319 320 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); 321 // The register is live through the path If->Flow or Flow->Endif. 322 // we should not optimize for such cases. 323 if ((UseMBB == Flow && IncomingMBB != If) || 324 (UseMBB == Endif && IncomingMBB == Flow)) 325 return true; 326 } 327 } 328 return false; 329 }; 330 331 for (auto Reg : KillsInElse) { 332 if (!IsLiveThroughThen(Reg)) 333 CandidateRegs.push_back(Reg); 334 } 335 } 336 337 /// Collect the registers used in the waterfall loop block that are defined 338 /// before. 339 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( 340 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 341 SmallSetVector<Register, 16> &CandidateRegs, 342 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 343 SmallVectorImpl<MachineInstr *> &Instructions) const { 344 345 // Collect loop instructions, potentially spanning multiple blocks 346 auto *MBB = LoopHeader; 347 for (;;) { 348 Blocks.insert(MBB); 349 for (auto &MI : *MBB) { 350 if (MI.isDebugInstr()) 351 continue; 352 Instructions.push_back(&MI); 353 } 354 if (MBB == LoopEnd) 355 break; 356 357 if ((MBB != LoopHeader && MBB->pred_size() != 1) || 358 (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) { 359 LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n"); 360 return; 361 } 362 363 MBB = *MBB->succ_begin(); 364 } 365 366 for (auto *I : Instructions) { 367 auto &MI = *I; 368 369 for (auto &MO : MI.all_uses()) { 370 if (!MO.getReg()) 371 continue; 372 373 Register MOReg = MO.getReg(); 374 // We can only optimize AGPR/VGPR virtual register 375 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 376 continue; 377 378 if (MO.readsReg()) { 379 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 380 // Make sure the value is defined before the LOOP block 381 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) { 382 // If the variable is used after the loop, the register coalescer will 383 // merge the newly created register and remove the phi node again. 384 // Just do nothing in that case. 385 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); 386 bool IsUsed = false; 387 for (auto *Succ : LoopEnd->successors()) { 388 if (!Blocks.contains(Succ) && 389 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { 390 IsUsed = true; 391 break; 392 } 393 } 394 if (!IsUsed) { 395 LLVM_DEBUG(dbgs() << "Found candidate reg: " 396 << printReg(MOReg, TRI, 0, MRI) << '\n'); 397 CandidateRegs.insert(MOReg); 398 } else { 399 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " 400 << printReg(MOReg, TRI, 0, MRI) << '\n'); 401 } 402 } 403 } 404 } 405 } 406 } 407 408 // Re-calculate the liveness of \p Reg in the THEN-region 409 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( 410 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { 411 SetVector<MachineBasicBlock *> Blocks; 412 SmallVector<MachineBasicBlock *> WorkList({If}); 413 414 // Collect all successors until we see the flow block, where we should 415 // reconverge. 416 while (!WorkList.empty()) { 417 auto *MBB = WorkList.pop_back_val(); 418 for (auto *Succ : MBB->successors()) { 419 if (Succ != Flow && Blocks.insert(Succ)) 420 WorkList.push_back(Succ); 421 } 422 } 423 424 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 425 for (MachineBasicBlock *MBB : Blocks) { 426 // Clear Live bit, as we will recalculate afterwards 427 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) 428 << '\n'); 429 OldVarInfo.AliveBlocks.reset(MBB->getNumber()); 430 } 431 432 SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; 433 434 // Get the blocks the Reg should be alive through 435 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 436 ++I) { 437 auto *UseMI = I->getParent(); 438 if (UseMI->isPHI() && I->readsReg()) { 439 if (Blocks.contains(UseMI->getParent())) 440 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); 441 } 442 } 443 444 for (MachineBasicBlock *MBB : Blocks) { 445 SmallVector<MachineInstr *> Uses; 446 // PHI instructions has been processed before. 447 findNonPHIUsesInBlock(Reg, MBB, Uses); 448 449 if (Uses.size() == 1) { 450 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " 451 << printMBBReference(*MBB) << '\n'); 452 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); 453 } else if (Uses.size() > 1) { 454 // Process the instructions in-order 455 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " 456 << printMBBReference(*MBB) << '\n'); 457 for (MachineInstr &MI : *MBB) { 458 if (llvm::is_contained(Uses, &MI)) 459 LV->HandleVirtRegUse(Reg, MBB, MI); 460 } 461 } 462 463 // Mark Reg alive through the block if this is a PHI incoming block 464 if (PHIIncoming.contains(MBB)) 465 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), 466 MBB); 467 } 468 469 // Set the isKilled flag if we get new Kills in the THEN region. 470 for (auto *MI : OldVarInfo.Kills) { 471 if (Blocks.contains(MI->getParent())) 472 MI->addRegisterKilled(Reg, TRI); 473 } 474 } 475 476 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( 477 Register Reg, Register NewReg, MachineBasicBlock *Flow, 478 MachineBasicBlock *Endif, 479 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 480 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 481 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 482 483 // Transfer aliveBlocks from Reg to NewReg 484 for (auto *MBB : ElseBlocks) { 485 unsigned BBNum = MBB->getNumber(); 486 if (OldVarInfo.AliveBlocks.test(BBNum)) { 487 NewVarInfo.AliveBlocks.set(BBNum); 488 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) 489 << '\n'); 490 OldVarInfo.AliveBlocks.reset(BBNum); 491 } 492 } 493 494 // Transfer the possible Kills in ElseBlocks from Reg to NewReg 495 auto I = OldVarInfo.Kills.begin(); 496 while (I != OldVarInfo.Kills.end()) { 497 if (ElseBlocks.contains((*I)->getParent())) { 498 NewVarInfo.Kills.push_back(*I); 499 I = OldVarInfo.Kills.erase(I); 500 } else { 501 ++I; 502 } 503 } 504 } 505 506 void SIOptimizeVGPRLiveRange::optimizeLiveRange( 507 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, 508 MachineBasicBlock *Endif, 509 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 510 // Insert a new PHI, marking the value from the THEN region being 511 // undef. 512 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 513 const auto *RC = MRI->getRegClass(Reg); 514 Register NewReg = MRI->createVirtualRegister(RC); 515 Register UndefReg = MRI->createVirtualRegister(RC); 516 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), 517 TII->get(TargetOpcode::PHI), NewReg); 518 for (auto *Pred : Flow->predecessors()) { 519 if (Pred == If) 520 PHI.addReg(Reg).addMBB(Pred); 521 else 522 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 523 } 524 525 // Replace all uses in the ELSE region or the PHIs in ENDIF block 526 // Use early increment range because setReg() will update the linked list. 527 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 528 auto *UseMI = O.getParent(); 529 auto *UseBlock = UseMI->getParent(); 530 // Replace uses in Endif block 531 if (UseBlock == Endif) { 532 if (UseMI->isPHI()) 533 O.setReg(NewReg); 534 else if (UseMI->isDebugInstr()) 535 continue; 536 else { 537 // DetectDeadLanes may mark register uses as undef without removing 538 // them, in which case a non-phi instruction using the original register 539 // may exist in the Endif block even though the register is not live 540 // into it. 541 assert(!O.readsReg()); 542 } 543 continue; 544 } 545 546 // Replace uses in Else region 547 if (ElseBlocks.contains(UseBlock)) 548 O.setReg(NewReg); 549 } 550 551 // The optimized Reg is not alive through Flow blocks anymore. 552 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 553 OldVarInfo.AliveBlocks.reset(Flow->getNumber()); 554 555 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); 556 updateLiveRangeInThenRegion(Reg, If, Flow); 557 } 558 559 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( 560 Register Reg, MachineBasicBlock *LoopHeader, 561 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 562 SmallVectorImpl<MachineInstr *> &Instructions) const { 563 // Insert a new PHI, marking the value from the last loop iteration undef. 564 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 565 const auto *RC = MRI->getRegClass(Reg); 566 Register NewReg = MRI->createVirtualRegister(RC); 567 Register UndefReg = MRI->createVirtualRegister(RC); 568 569 // Replace all uses in the LOOP region 570 // Use early increment range because setReg() will update the linked list. 571 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 572 auto *UseMI = O.getParent(); 573 auto *UseBlock = UseMI->getParent(); 574 // Replace uses in Loop blocks 575 if (Blocks.contains(UseBlock)) 576 O.setReg(NewReg); 577 } 578 579 MachineInstrBuilder PHI = 580 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(), 581 TII->get(TargetOpcode::PHI), NewReg); 582 for (auto *Pred : LoopHeader->predecessors()) { 583 if (Blocks.contains(Pred)) 584 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 585 else 586 PHI.addReg(Reg).addMBB(Pred); 587 } 588 589 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 590 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 591 592 // Find last use and mark as kill 593 MachineInstr *Kill = nullptr; 594 for (auto *MI : reverse(Instructions)) { 595 if (MI->readsRegister(NewReg, TRI)) { 596 MI->addRegisterKilled(NewReg, TRI); 597 NewVarInfo.Kills.push_back(MI); 598 Kill = MI; 599 break; 600 } 601 } 602 assert(Kill && "Failed to find last usage of register in loop"); 603 604 MachineBasicBlock *KillBlock = Kill->getParent(); 605 bool PostKillBlock = false; 606 for (auto *Block : Blocks) { 607 auto BBNum = Block->getNumber(); 608 609 // collectWaterfallCandidateRegisters only collects registers that are dead 610 // after the loop. So we know that the old reg is no longer live throughout 611 // the waterfall loop. 612 OldVarInfo.AliveBlocks.reset(BBNum); 613 614 // The new register is live up to (and including) the block that kills it. 615 PostKillBlock |= (Block == KillBlock); 616 if (PostKillBlock) { 617 NewVarInfo.AliveBlocks.reset(BBNum); 618 } else if (Block != LoopHeader) { 619 NewVarInfo.AliveBlocks.set(BBNum); 620 } 621 } 622 } 623 624 char SIOptimizeVGPRLiveRangeLegacy::ID = 0; 625 626 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE, 627 "SI Optimize VGPR LiveRange", false, false) 628 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 629 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 630 INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) 631 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE, 632 "SI Optimize VGPR LiveRange", false, false) 633 634 char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID; 635 636 FunctionPass *llvm::createSIOptimizeVGPRLiveRangeLegacyPass() { 637 return new SIOptimizeVGPRLiveRangeLegacy(); 638 } 639 640 bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) { 641 if (skipFunction(MF.getFunction())) 642 return false; 643 644 LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV(); 645 MachineDominatorTree *MDT = 646 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 647 MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 648 return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF); 649 } 650 651 PreservedAnalyses 652 SIOptimizeVGPRLiveRangePass::run(MachineFunction &MF, 653 MachineFunctionAnalysisManager &MFAM) { 654 MFPropsModifier _(*this, MF); 655 LiveVariables *LV = &MFAM.getResult<LiveVariablesAnalysis>(MF); 656 MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF); 657 MachineLoopInfo *Loops = &MFAM.getResult<MachineLoopAnalysis>(MF); 658 659 bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF); 660 if (!Changed) 661 return PreservedAnalyses::all(); 662 663 auto PA = getMachineFunctionPassPreservedAnalyses(); 664 PA.preserve<LiveVariablesAnalysis>(); 665 PA.preserve<DominatorTreeAnalysis>(); 666 PA.preserve<MachineLoopAnalysis>(); 667 PA.preserveSet<CFGAnalyses>(); 668 return PA; 669 } 670 671 bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) { 672 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 673 TII = ST.getInstrInfo(); 674 TRI = &TII->getRegisterInfo(); 675 MRI = &MF.getRegInfo(); 676 677 bool MadeChange = false; 678 679 // TODO: we need to think about the order of visiting the blocks to get 680 // optimal result for nesting if-else cases. 681 for (MachineBasicBlock &MBB : MF) { 682 for (auto &MI : MBB.terminators()) { 683 // Detect the if-else blocks 684 if (MI.getOpcode() == AMDGPU::SI_IF) { 685 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); 686 auto *Endif = getElseTarget(IfTarget); 687 if (!Endif) 688 continue; 689 690 // Skip unexpected control flow. 691 if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif)) 692 continue; 693 694 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; 695 SmallVector<Register> CandidateRegs; 696 697 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " 698 << printMBBReference(MBB) << ' ' 699 << printMBBReference(*IfTarget) << ' ' 700 << printMBBReference(*Endif) << '\n'); 701 702 // Collect all the blocks in the ELSE region 703 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); 704 705 // Collect the registers can be optimized 706 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, 707 CandidateRegs); 708 MadeChange |= !CandidateRegs.empty(); 709 // Now we are safe to optimize. 710 for (auto Reg : CandidateRegs) 711 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); 712 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { 713 auto *LoopHeader = MI.getOperand(0).getMBB(); 714 auto *LoopEnd = &MBB; 715 716 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " 717 << printMBBReference(*LoopHeader) << '\n'); 718 719 SmallSetVector<Register, 16> CandidateRegs; 720 SmallVector<MachineInstr *, 16> Instructions; 721 SmallSetVector<MachineBasicBlock *, 2> Blocks; 722 723 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, 724 Blocks, Instructions); 725 MadeChange |= !CandidateRegs.empty(); 726 // Now we are safe to optimize. 727 for (auto Reg : CandidateRegs) 728 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); 729 } 730 } 731 } 732 733 return MadeChange; 734 } 735