xref: /llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp (revision c16c0e26236d0468a2411cb3bce219d3c70650e9)
1 //===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11 /// structures and waterfall loops.
12 ///
13 /// When we do structurization, we usually transform an if-else into two
14 /// successive if-then (with a flow block to do predicate inversion). Consider a
15 /// simple case after structurization: A divergent value %a was defined before
16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17 ///    bb.if:
18 ///      %a = ...
19 ///      ...
20 ///    bb.then:
21 ///      ... = op %a
22 ///      ... // %a can be dead here
23 ///    bb.flow:
24 ///      ...
25 ///    bb.else:
26 ///      ... = %a
27 ///      ...
28 ///    bb.endif
29 ///
30 ///  As register allocator has no idea of the thread-control-flow, it will just
31 ///  assume %a would be alive in the whole range of bb.then because of a later
32 ///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33 ///  to exec mask. For this if-else case, the lanes active in bb.then will be
34 ///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35 ///  after the last use in bb.then until the end of the block. The reason is
36 ///  the instructions in bb.then will only overwrite lanes that will never be
37 ///  accessed in bb.else.
38 ///
39 ///  This pass aims to tell register allocator that %a is in-fact dead,
40 ///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41 ///  from bb.then, and then replace the uses in the bb.else with the result of
42 ///  newly inserted phi.
43 ///
44 ///  Two key conditions must be met to ensure correctness:
45 ///  1.) The def-point should be in the same loop-level as if-else-endif to make
46 ///      sure the second loop iteration still get correct data.
47 ///  2.) There should be no further uses after the IF-ELSE region.
48 ///
49 ///
50 /// Waterfall loops get inserted around instructions that use divergent values
51 /// but can only be executed with a uniform value. For example an indirect call
52 /// to a divergent address:
53 ///    bb.start:
54 ///      %a = ...
55 ///      %fun = ...
56 ///      ...
57 ///    bb.loop:
58 ///      call %fun (%a)
59 ///      ... // %a can be dead here
60 ///      loop %bb.loop
61 ///
62 ///  The loop block is executed multiple times, but it is run exactly once for
63 ///  each active lane. Similar to the if-else case, the register allocator
64 ///  assumes that %a is live throughout the loop as it is used again in the next
65 ///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66 ///  to be live after its last use in the loop block. By inserting a phi-node at
67 ///  the start of bb.loop that is undef when coming from bb.loop, the register
68 ///  allocation knows that the value of %a does not need to be preserved through
69 ///  iterations of the loop.
70 ///
71 //
72 //===----------------------------------------------------------------------===//
73 
74 #include "SIOptimizeVGPRLiveRange.h"
75 #include "AMDGPU.h"
76 #include "GCNSubtarget.h"
77 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
78 #include "SIMachineFunctionInfo.h"
79 #include "llvm/CodeGen/LiveVariables.h"
80 #include "llvm/CodeGen/MachineDominators.h"
81 #include "llvm/CodeGen/MachineLoopInfo.h"
82 #include "llvm/CodeGen/TargetRegisterInfo.h"
83 
84 using namespace llvm;
85 
86 #define DEBUG_TYPE "si-opt-vgpr-liverange"
87 
88 namespace {
89 
90 class SIOptimizeVGPRLiveRange {
91 private:
92   const SIRegisterInfo *TRI = nullptr;
93   const SIInstrInfo *TII = nullptr;
94   LiveVariables *LV = nullptr;
95   MachineDominatorTree *MDT = nullptr;
96   const MachineLoopInfo *Loops = nullptr;
97   MachineRegisterInfo *MRI = nullptr;
98 
99 public:
100   SIOptimizeVGPRLiveRange(LiveVariables *LV, MachineDominatorTree *MDT,
101                           MachineLoopInfo *Loops)
102       : LV(LV), MDT(MDT), Loops(Loops) {}
103   bool run(MachineFunction &MF);
104 
105   MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
106 
107   void collectElseRegionBlocks(MachineBasicBlock *Flow,
108                                MachineBasicBlock *Endif,
109                                SmallSetVector<MachineBasicBlock *, 16> &) const;
110 
111   void
112   collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
113                             MachineBasicBlock *Endif,
114                             SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
115                             SmallVectorImpl<Register> &CandidateRegs) const;
116 
117   void collectWaterfallCandidateRegisters(
118       MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
119       SmallSetVector<Register, 16> &CandidateRegs,
120       SmallSetVector<MachineBasicBlock *, 2> &Blocks,
121       SmallVectorImpl<MachineInstr *> &Instructions) const;
122 
123   void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
124                              SmallVectorImpl<MachineInstr *> &Uses) const;
125 
126   void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
127                                    MachineBasicBlock *Flow) const;
128 
129   void updateLiveRangeInElseRegion(
130       Register Reg, Register NewReg, MachineBasicBlock *Flow,
131       MachineBasicBlock *Endif,
132       SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
133 
134   void
135   optimizeLiveRange(Register Reg, MachineBasicBlock *If,
136                     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
137                     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
138 
139   void optimizeWaterfallLiveRange(
140       Register Reg, MachineBasicBlock *LoopHeader,
141       SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
142       SmallVectorImpl<MachineInstr *> &Instructions) const;
143 };
144 
145 class SIOptimizeVGPRLiveRangeLegacy : public MachineFunctionPass {
146 public:
147   static char ID;
148 
149   SIOptimizeVGPRLiveRangeLegacy() : MachineFunctionPass(ID) {}
150 
151   bool runOnMachineFunction(MachineFunction &MF) override;
152 
153   StringRef getPassName() const override {
154     return "SI Optimize VGPR LiveRange";
155   }
156 
157   void getAnalysisUsage(AnalysisUsage &AU) const override {
158     AU.setPreservesCFG();
159     AU.addRequired<LiveVariablesWrapperPass>();
160     AU.addRequired<MachineDominatorTreeWrapperPass>();
161     AU.addRequired<MachineLoopInfoWrapperPass>();
162     AU.addPreserved<LiveVariablesWrapperPass>();
163     AU.addPreserved<MachineDominatorTreeWrapperPass>();
164     AU.addPreserved<MachineLoopInfoWrapperPass>();
165     MachineFunctionPass::getAnalysisUsage(AU);
166   }
167 
168   MachineFunctionProperties getRequiredProperties() const override {
169     return MachineFunctionProperties().set(
170         MachineFunctionProperties::Property::IsSSA);
171   }
172 
173   MachineFunctionProperties getClearedProperties() const override {
174     return MachineFunctionProperties().set(
175         MachineFunctionProperties::Property::NoPHIs);
176   }
177 };
178 
179 } // end anonymous namespace
180 
181 // Check whether the MBB is a else flow block and get the branching target which
182 // is the Endif block
183 MachineBasicBlock *
184 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
185   for (auto &BR : MBB->terminators()) {
186     if (BR.getOpcode() == AMDGPU::SI_ELSE)
187       return BR.getOperand(2).getMBB();
188   }
189   return nullptr;
190 }
191 
192 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
193     MachineBasicBlock *Flow, MachineBasicBlock *Endif,
194     SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
195   assert(Flow != Endif);
196 
197   MachineBasicBlock *MBB = Endif;
198   unsigned Cur = 0;
199   while (MBB) {
200     for (auto *Pred : MBB->predecessors()) {
201       if (Pred != Flow)
202         Blocks.insert(Pred);
203     }
204 
205     if (Cur < Blocks.size())
206       MBB = Blocks[Cur++];
207     else
208       MBB = nullptr;
209   }
210 
211   LLVM_DEBUG({
212     dbgs() << "Found Else blocks: ";
213     for (auto *MBB : Blocks)
214       dbgs() << printMBBReference(*MBB) << ' ';
215     dbgs() << '\n';
216   });
217 }
218 
219 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
220 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
221     Register Reg, MachineBasicBlock *MBB,
222     SmallVectorImpl<MachineInstr *> &Uses) const {
223   for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
224     if (UseMI.getParent() == MBB && !UseMI.isPHI())
225       Uses.push_back(&UseMI);
226   }
227 }
228 
229 /// Collect the killed registers in the ELSE region which are not alive through
230 /// the whole THEN region.
231 void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
232     MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
233     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
234     SmallVectorImpl<Register> &CandidateRegs) const {
235 
236   SmallSet<Register, 8> KillsInElse;
237 
238   for (auto *Else : ElseBlocks) {
239     for (auto &MI : Else->instrs()) {
240       if (MI.isDebugInstr())
241         continue;
242 
243       for (auto &MO : MI.operands()) {
244         if (!MO.isReg() || !MO.getReg() || MO.isDef())
245           continue;
246 
247         Register MOReg = MO.getReg();
248         // We can only optimize AGPR/VGPR virtual register
249         if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
250           continue;
251 
252         if (MO.readsReg()) {
253           LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254           const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
255           // Make sure two conditions are met:
256           // a.) the value is defined before/in the IF block
257           // b.) should be defined in the same loop-level.
258           if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
259               Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
260             // Check if the register is live into the endif block. If not,
261             // consider it killed in the else region.
262             LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
263             if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
264               KillsInElse.insert(MOReg);
265             } else {
266               LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
267                                 << " as Live in Endif\n");
268             }
269           }
270         }
271       }
272     }
273   }
274 
275   // Check the phis in the Endif, looking for value coming from the ELSE
276   // region. Make sure the phi-use is the last use.
277   for (auto &MI : Endif->phis()) {
278     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
279       auto &MO = MI.getOperand(Idx);
280       auto *Pred = MI.getOperand(Idx + 1).getMBB();
281       if (Pred == Flow)
282         continue;
283       assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
284 
285       if (!MO.isReg() || !MO.getReg() || MO.isUndef())
286         continue;
287 
288       Register Reg = MO.getReg();
289       if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
290         continue;
291 
292       LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
293 
294       if (VI.isLiveIn(*Endif, Reg, *MRI)) {
295         LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
296                           << " as Live in Endif\n");
297         continue;
298       }
299       // Make sure two conditions are met:
300       // a.) the value is defined before/in the IF block
301       // b.) should be defined in the same loop-level.
302       const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
303       if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
304           Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
305         KillsInElse.insert(Reg);
306     }
307   }
308 
309   auto IsLiveThroughThen = [&](Register Reg) {
310     for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
311          ++I) {
312       if (!I->readsReg())
313         continue;
314       auto *UseMI = I->getParent();
315       auto *UseMBB = UseMI->getParent();
316       if (UseMBB == Flow || UseMBB == Endif) {
317         if (!UseMI->isPHI())
318           return true;
319 
320         auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
321         // The register is live through the path If->Flow or Flow->Endif.
322         // we should not optimize for such cases.
323         if ((UseMBB == Flow && IncomingMBB != If) ||
324             (UseMBB == Endif && IncomingMBB == Flow))
325           return true;
326       }
327     }
328     return false;
329   };
330 
331   for (auto Reg : KillsInElse) {
332     if (!IsLiveThroughThen(Reg))
333       CandidateRegs.push_back(Reg);
334   }
335 }
336 
337 /// Collect the registers used in the waterfall loop block that are defined
338 /// before.
339 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
340     MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
341     SmallSetVector<Register, 16> &CandidateRegs,
342     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
343     SmallVectorImpl<MachineInstr *> &Instructions) const {
344 
345   // Collect loop instructions, potentially spanning multiple blocks
346   auto *MBB = LoopHeader;
347   for (;;) {
348     Blocks.insert(MBB);
349     for (auto &MI : *MBB) {
350       if (MI.isDebugInstr())
351         continue;
352       Instructions.push_back(&MI);
353     }
354     if (MBB == LoopEnd)
355       break;
356 
357     if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
358         (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
359       LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
360       return;
361     }
362 
363     MBB = *MBB->succ_begin();
364   }
365 
366   for (auto *I : Instructions) {
367     auto &MI = *I;
368 
369     for (auto &MO : MI.all_uses()) {
370       if (!MO.getReg())
371         continue;
372 
373       Register MOReg = MO.getReg();
374       // We can only optimize AGPR/VGPR virtual register
375       if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
376         continue;
377 
378       if (MO.readsReg()) {
379         MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
380         // Make sure the value is defined before the LOOP block
381         if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
382           // If the variable is used after the loop, the register coalescer will
383           // merge the newly created register and remove the phi node again.
384           // Just do nothing in that case.
385           LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
386           bool IsUsed = false;
387           for (auto *Succ : LoopEnd->successors()) {
388             if (!Blocks.contains(Succ) &&
389                 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
390               IsUsed = true;
391               break;
392             }
393           }
394           if (!IsUsed) {
395             LLVM_DEBUG(dbgs() << "Found candidate reg: "
396                               << printReg(MOReg, TRI, 0, MRI) << '\n');
397             CandidateRegs.insert(MOReg);
398           } else {
399             LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
400                               << printReg(MOReg, TRI, 0, MRI) << '\n');
401           }
402         }
403       }
404     }
405   }
406 }
407 
408 // Re-calculate the liveness of \p Reg in the THEN-region
409 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
410     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
411   SetVector<MachineBasicBlock *> Blocks;
412   SmallVector<MachineBasicBlock *> WorkList({If});
413 
414   // Collect all successors until we see the flow block, where we should
415   // reconverge.
416   while (!WorkList.empty()) {
417     auto *MBB = WorkList.pop_back_val();
418     for (auto *Succ : MBB->successors()) {
419       if (Succ != Flow && Blocks.insert(Succ))
420         WorkList.push_back(Succ);
421     }
422   }
423 
424   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
425   for (MachineBasicBlock *MBB : Blocks) {
426     // Clear Live bit, as we will recalculate afterwards
427     LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
428                       << '\n');
429     OldVarInfo.AliveBlocks.reset(MBB->getNumber());
430   }
431 
432   SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
433 
434   // Get the blocks the Reg should be alive through
435   for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
436        ++I) {
437     auto *UseMI = I->getParent();
438     if (UseMI->isPHI() && I->readsReg()) {
439       if (Blocks.contains(UseMI->getParent()))
440         PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
441     }
442   }
443 
444   for (MachineBasicBlock *MBB : Blocks) {
445     SmallVector<MachineInstr *> Uses;
446     // PHI instructions has been processed before.
447     findNonPHIUsesInBlock(Reg, MBB, Uses);
448 
449     if (Uses.size() == 1) {
450       LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
451                         << printMBBReference(*MBB) << '\n');
452       LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
453     } else if (Uses.size() > 1) {
454       // Process the instructions in-order
455       LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
456                         << printMBBReference(*MBB) << '\n');
457       for (MachineInstr &MI : *MBB) {
458         if (llvm::is_contained(Uses, &MI))
459           LV->HandleVirtRegUse(Reg, MBB, MI);
460       }
461     }
462 
463     // Mark Reg alive through the block if this is a PHI incoming block
464     if (PHIIncoming.contains(MBB))
465       LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
466                                   MBB);
467   }
468 
469   // Set the isKilled flag if we get new Kills in the THEN region.
470   for (auto *MI : OldVarInfo.Kills) {
471     if (Blocks.contains(MI->getParent()))
472       MI->addRegisterKilled(Reg, TRI);
473   }
474 }
475 
476 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
477     Register Reg, Register NewReg, MachineBasicBlock *Flow,
478     MachineBasicBlock *Endif,
479     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
480   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
481   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
482 
483   // Transfer aliveBlocks from Reg to NewReg
484   for (auto *MBB : ElseBlocks) {
485     unsigned BBNum = MBB->getNumber();
486     if (OldVarInfo.AliveBlocks.test(BBNum)) {
487       NewVarInfo.AliveBlocks.set(BBNum);
488       LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
489                         << '\n');
490       OldVarInfo.AliveBlocks.reset(BBNum);
491     }
492   }
493 
494   // Transfer the possible Kills in ElseBlocks from Reg to NewReg
495   auto I = OldVarInfo.Kills.begin();
496   while (I != OldVarInfo.Kills.end()) {
497     if (ElseBlocks.contains((*I)->getParent())) {
498       NewVarInfo.Kills.push_back(*I);
499       I = OldVarInfo.Kills.erase(I);
500     } else {
501       ++I;
502     }
503   }
504 }
505 
506 void SIOptimizeVGPRLiveRange::optimizeLiveRange(
507     Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
508     MachineBasicBlock *Endif,
509     SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
510   // Insert a new PHI, marking the value from the THEN region being
511   // undef.
512   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
513   const auto *RC = MRI->getRegClass(Reg);
514   Register NewReg = MRI->createVirtualRegister(RC);
515   Register UndefReg = MRI->createVirtualRegister(RC);
516   MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
517                                     TII->get(TargetOpcode::PHI), NewReg);
518   for (auto *Pred : Flow->predecessors()) {
519     if (Pred == If)
520       PHI.addReg(Reg).addMBB(Pred);
521     else
522       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
523   }
524 
525   // Replace all uses in the ELSE region or the PHIs in ENDIF block
526   // Use early increment range because setReg() will update the linked list.
527   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
528     auto *UseMI = O.getParent();
529     auto *UseBlock = UseMI->getParent();
530     // Replace uses in Endif block
531     if (UseBlock == Endif) {
532       if (UseMI->isPHI())
533         O.setReg(NewReg);
534       else if (UseMI->isDebugInstr())
535         continue;
536       else {
537         // DetectDeadLanes may mark register uses as undef without removing
538         // them, in which case a non-phi instruction using the original register
539         // may exist in the Endif block even though the register is not live
540         // into it.
541         assert(!O.readsReg());
542       }
543       continue;
544     }
545 
546     // Replace uses in Else region
547     if (ElseBlocks.contains(UseBlock))
548       O.setReg(NewReg);
549   }
550 
551   // The optimized Reg is not alive through Flow blocks anymore.
552   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
553   OldVarInfo.AliveBlocks.reset(Flow->getNumber());
554 
555   updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
556   updateLiveRangeInThenRegion(Reg, If, Flow);
557 }
558 
559 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
560     Register Reg, MachineBasicBlock *LoopHeader,
561     SmallSetVector<MachineBasicBlock *, 2> &Blocks,
562     SmallVectorImpl<MachineInstr *> &Instructions) const {
563   // Insert a new PHI, marking the value from the last loop iteration undef.
564   LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
565   const auto *RC = MRI->getRegClass(Reg);
566   Register NewReg = MRI->createVirtualRegister(RC);
567   Register UndefReg = MRI->createVirtualRegister(RC);
568 
569   // Replace all uses in the LOOP region
570   // Use early increment range because setReg() will update the linked list.
571   for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
572     auto *UseMI = O.getParent();
573     auto *UseBlock = UseMI->getParent();
574     // Replace uses in Loop blocks
575     if (Blocks.contains(UseBlock))
576       O.setReg(NewReg);
577   }
578 
579   MachineInstrBuilder PHI =
580       BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
581               TII->get(TargetOpcode::PHI), NewReg);
582   for (auto *Pred : LoopHeader->predecessors()) {
583     if (Blocks.contains(Pred))
584       PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
585     else
586       PHI.addReg(Reg).addMBB(Pred);
587   }
588 
589   LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
590   LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
591 
592   // Find last use and mark as kill
593   MachineInstr *Kill = nullptr;
594   for (auto *MI : reverse(Instructions)) {
595     if (MI->readsRegister(NewReg, TRI)) {
596       MI->addRegisterKilled(NewReg, TRI);
597       NewVarInfo.Kills.push_back(MI);
598       Kill = MI;
599       break;
600     }
601   }
602   assert(Kill && "Failed to find last usage of register in loop");
603 
604   MachineBasicBlock *KillBlock = Kill->getParent();
605   bool PostKillBlock = false;
606   for (auto *Block : Blocks) {
607     auto BBNum = Block->getNumber();
608 
609     // collectWaterfallCandidateRegisters only collects registers that are dead
610     // after the loop. So we know that the old reg is no longer live throughout
611     // the waterfall loop.
612     OldVarInfo.AliveBlocks.reset(BBNum);
613 
614     // The new register is live up to (and including) the block that kills it.
615     PostKillBlock |= (Block == KillBlock);
616     if (PostKillBlock) {
617       NewVarInfo.AliveBlocks.reset(BBNum);
618     } else if (Block != LoopHeader) {
619       NewVarInfo.AliveBlocks.set(BBNum);
620     }
621   }
622 }
623 
624 char SIOptimizeVGPRLiveRangeLegacy::ID = 0;
625 
626 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
627                       "SI Optimize VGPR LiveRange", false, false)
628 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
629 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
630 INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
631 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRangeLegacy, DEBUG_TYPE,
632                     "SI Optimize VGPR LiveRange", false, false)
633 
634 char &llvm::SIOptimizeVGPRLiveRangeLegacyID = SIOptimizeVGPRLiveRangeLegacy::ID;
635 
636 FunctionPass *llvm::createSIOptimizeVGPRLiveRangeLegacyPass() {
637   return new SIOptimizeVGPRLiveRangeLegacy();
638 }
639 
640 bool SIOptimizeVGPRLiveRangeLegacy::runOnMachineFunction(MachineFunction &MF) {
641   if (skipFunction(MF.getFunction()))
642     return false;
643 
644   LiveVariables *LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
645   MachineDominatorTree *MDT =
646       &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
647   MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
648   return SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
649 }
650 
651 PreservedAnalyses
652 SIOptimizeVGPRLiveRangePass::run(MachineFunction &MF,
653                                  MachineFunctionAnalysisManager &MFAM) {
654   MFPropsModifier _(*this, MF);
655   LiveVariables *LV = &MFAM.getResult<LiveVariablesAnalysis>(MF);
656   MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
657   MachineLoopInfo *Loops = &MFAM.getResult<MachineLoopAnalysis>(MF);
658 
659   bool Changed = SIOptimizeVGPRLiveRange(LV, MDT, Loops).run(MF);
660   if (!Changed)
661     return PreservedAnalyses::all();
662 
663   auto PA = getMachineFunctionPassPreservedAnalyses();
664   PA.preserve<LiveVariablesAnalysis>();
665   PA.preserve<DominatorTreeAnalysis>();
666   PA.preserve<MachineLoopAnalysis>();
667   PA.preserveSet<CFGAnalyses>();
668   return PA;
669 }
670 
671 bool SIOptimizeVGPRLiveRange::run(MachineFunction &MF) {
672   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
673   TII = ST.getInstrInfo();
674   TRI = &TII->getRegisterInfo();
675   MRI = &MF.getRegInfo();
676 
677   bool MadeChange = false;
678 
679   // TODO: we need to think about the order of visiting the blocks to get
680   // optimal result for nesting if-else cases.
681   for (MachineBasicBlock &MBB : MF) {
682     for (auto &MI : MBB.terminators()) {
683       // Detect the if-else blocks
684       if (MI.getOpcode() == AMDGPU::SI_IF) {
685         MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
686         auto *Endif = getElseTarget(IfTarget);
687         if (!Endif)
688           continue;
689 
690         // Skip unexpected control flow.
691         if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
692           continue;
693 
694         SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
695         SmallVector<Register> CandidateRegs;
696 
697         LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
698                           << printMBBReference(MBB) << ' '
699                           << printMBBReference(*IfTarget) << ' '
700                           << printMBBReference(*Endif) << '\n');
701 
702         // Collect all the blocks in the ELSE region
703         collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
704 
705         // Collect the registers can be optimized
706         collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
707                                   CandidateRegs);
708         MadeChange |= !CandidateRegs.empty();
709         // Now we are safe to optimize.
710         for (auto Reg : CandidateRegs)
711           optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
712       } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
713         auto *LoopHeader = MI.getOperand(0).getMBB();
714         auto *LoopEnd = &MBB;
715 
716         LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
717                           << printMBBReference(*LoopHeader) << '\n');
718 
719         SmallSetVector<Register, 16> CandidateRegs;
720         SmallVector<MachineInstr *, 16> Instructions;
721         SmallSetVector<MachineBasicBlock *, 2> Blocks;
722 
723         collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
724                                            Blocks, Instructions);
725         MadeChange |= !CandidateRegs.empty();
726         // Now we are safe to optimize.
727         for (auto Reg : CandidateRegs)
728           optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
729       }
730     }
731   }
732 
733   return MadeChange;
734 }
735