xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1fe6060f1SDimitry Andric //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10*349cc55cSDimitry Andric /// This pass combines split register tuple initialization into a single pseudo:
11fe6060f1SDimitry Andric ///
12fe6060f1SDimitry Andric ///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13fe6060f1SDimitry Andric ///   %0.sub0:sreg_64 = S_MOV_B32 2
14fe6060f1SDimitry Andric /// =>
15fe6060f1SDimitry Andric ///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16fe6060f1SDimitry Andric ///
17fe6060f1SDimitry Andric /// This is to allow rematerialization of a value instead of spilling. It is
18fe6060f1SDimitry Andric /// supposed to be done after register coalescer to allow it to do its job and
19fe6060f1SDimitry Andric /// before actual register allocation to allow rematerialization.
20fe6060f1SDimitry Andric ///
21fe6060f1SDimitry Andric /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22fe6060f1SDimitry Andric /// although the same shall be possible with other register classes and
23fe6060f1SDimitry Andric /// instructions if necessary.
24fe6060f1SDimitry Andric ///
25fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
26fe6060f1SDimitry Andric 
27fe6060f1SDimitry Andric #include "AMDGPU.h"
28fe6060f1SDimitry Andric #include "GCNSubtarget.h"
29fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
31fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
32fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
33fe6060f1SDimitry Andric 
34fe6060f1SDimitry Andric using namespace llvm;
35fe6060f1SDimitry Andric 
36fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric namespace {
39fe6060f1SDimitry Andric 
40fe6060f1SDimitry Andric class GCNPreRAOptimizations : public MachineFunctionPass {
41fe6060f1SDimitry Andric private:
42fe6060f1SDimitry Andric   const SIInstrInfo *TII;
43*349cc55cSDimitry Andric   const SIRegisterInfo *TRI;
44fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
45fe6060f1SDimitry Andric   LiveIntervals *LIS;
46fe6060f1SDimitry Andric 
47fe6060f1SDimitry Andric   bool processReg(Register Reg);
48fe6060f1SDimitry Andric 
49fe6060f1SDimitry Andric public:
50fe6060f1SDimitry Andric   static char ID;
51fe6060f1SDimitry Andric 
52fe6060f1SDimitry Andric   GCNPreRAOptimizations() : MachineFunctionPass(ID) {
53fe6060f1SDimitry Andric     initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
54fe6060f1SDimitry Andric   }
55fe6060f1SDimitry Andric 
56fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
57fe6060f1SDimitry Andric 
58fe6060f1SDimitry Andric   StringRef getPassName() const override {
59fe6060f1SDimitry Andric     return "AMDGPU Pre-RA optimizations";
60fe6060f1SDimitry Andric   }
61fe6060f1SDimitry Andric 
62fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
63fe6060f1SDimitry Andric     AU.addRequired<LiveIntervals>();
64fe6060f1SDimitry Andric     AU.setPreservesAll();
65fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
66fe6060f1SDimitry Andric   }
67fe6060f1SDimitry Andric };
68fe6060f1SDimitry Andric 
69fe6060f1SDimitry Andric } // End anonymous namespace.
70fe6060f1SDimitry Andric 
71fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72fe6060f1SDimitry Andric                       "AMDGPU Pre-RA optimizations", false, false)
73fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74fe6060f1SDimitry Andric INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
75fe6060f1SDimitry Andric                     false, false)
76fe6060f1SDimitry Andric 
77fe6060f1SDimitry Andric char GCNPreRAOptimizations::ID = 0;
78fe6060f1SDimitry Andric 
79fe6060f1SDimitry Andric char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80fe6060f1SDimitry Andric 
81fe6060f1SDimitry Andric FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
82fe6060f1SDimitry Andric   return new GCNPreRAOptimizations();
83fe6060f1SDimitry Andric }
84fe6060f1SDimitry Andric 
85fe6060f1SDimitry Andric bool GCNPreRAOptimizations::processReg(Register Reg) {
86fe6060f1SDimitry Andric   MachineInstr *Def0 = nullptr;
87fe6060f1SDimitry Andric   MachineInstr *Def1 = nullptr;
88fe6060f1SDimitry Andric   uint64_t Init = 0;
89*349cc55cSDimitry Andric   bool Changed = false;
90*349cc55cSDimitry Andric   SmallSet<Register, 32> ModifiedRegs;
91*349cc55cSDimitry Andric   bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92fe6060f1SDimitry Andric 
93fe6060f1SDimitry Andric   for (MachineInstr &I : MRI->def_instructions(Reg)) {
94*349cc55cSDimitry Andric     switch (I.getOpcode()) {
95*349cc55cSDimitry Andric     default:
96*349cc55cSDimitry Andric       return false;
97*349cc55cSDimitry Andric     case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
98*349cc55cSDimitry Andric       break;
99*349cc55cSDimitry Andric     case AMDGPU::COPY: {
100*349cc55cSDimitry Andric       // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101*349cc55cSDimitry Andric       // intermdiate temporary VGPR register. Try to find the defining
102*349cc55cSDimitry Andric       // accvgpr_write to avoid temporary registers.
103*349cc55cSDimitry Andric 
104*349cc55cSDimitry Andric       if (!IsAGPRDst)
105*349cc55cSDimitry Andric         return false;
106*349cc55cSDimitry Andric 
107*349cc55cSDimitry Andric       Register SrcReg = I.getOperand(1).getReg();
108*349cc55cSDimitry Andric 
109*349cc55cSDimitry Andric       if (!SrcReg.isVirtual())
110*349cc55cSDimitry Andric         break;
111*349cc55cSDimitry Andric 
112*349cc55cSDimitry Andric       // Check if source of copy is from another AGPR.
113*349cc55cSDimitry Andric       bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
114*349cc55cSDimitry Andric       if (!IsAGPRSrc)
115*349cc55cSDimitry Andric         break;
116*349cc55cSDimitry Andric 
117*349cc55cSDimitry Andric       // def_instructions() does not look at subregs so it may give us a
118*349cc55cSDimitry Andric       // different instruction that defines the same vreg but different subreg
119*349cc55cSDimitry Andric       // so we have to manually check subreg.
120*349cc55cSDimitry Andric       Register SrcSubReg = I.getOperand(1).getSubReg();
121*349cc55cSDimitry Andric       for (auto &Def : MRI->def_instructions(SrcReg)) {
122*349cc55cSDimitry Andric         if (SrcSubReg != Def.getOperand(0).getSubReg())
123*349cc55cSDimitry Andric           continue;
124*349cc55cSDimitry Andric 
125*349cc55cSDimitry Andric         if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
126*349cc55cSDimitry Andric           MachineOperand DefSrcMO = Def.getOperand(1);
127*349cc55cSDimitry Andric 
128*349cc55cSDimitry Andric           // Immediates are not an issue and can be propagated in
129*349cc55cSDimitry Andric           // postrapseudos pass. Only handle cases where defining
130*349cc55cSDimitry Andric           // accvgpr_write source is a vreg.
131*349cc55cSDimitry Andric           if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
132*349cc55cSDimitry Andric             // Propagate source reg of accvgpr write to this copy instruction
133*349cc55cSDimitry Andric             I.getOperand(1).setReg(DefSrcMO.getReg());
134*349cc55cSDimitry Andric             I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
135*349cc55cSDimitry Andric 
136*349cc55cSDimitry Andric             // Reg uses were changed, collect unique set of registers to update
137*349cc55cSDimitry Andric             // live intervals at the end.
138*349cc55cSDimitry Andric             ModifiedRegs.insert(DefSrcMO.getReg());
139*349cc55cSDimitry Andric             ModifiedRegs.insert(SrcReg);
140*349cc55cSDimitry Andric 
141*349cc55cSDimitry Andric             Changed = true;
142*349cc55cSDimitry Andric           }
143*349cc55cSDimitry Andric 
144*349cc55cSDimitry Andric           // Found the defining accvgpr_write, stop looking any further.
145*349cc55cSDimitry Andric           break;
146*349cc55cSDimitry Andric         }
147*349cc55cSDimitry Andric       }
148*349cc55cSDimitry Andric       break;
149*349cc55cSDimitry Andric     }
150*349cc55cSDimitry Andric     case AMDGPU::S_MOV_B32:
151*349cc55cSDimitry Andric       if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
152*349cc55cSDimitry Andric           I.getNumOperands() != 2)
153fe6060f1SDimitry Andric         return false;
154fe6060f1SDimitry Andric 
155fe6060f1SDimitry Andric       switch (I.getOperand(0).getSubReg()) {
156fe6060f1SDimitry Andric       default:
157fe6060f1SDimitry Andric         return false;
158fe6060f1SDimitry Andric       case AMDGPU::sub0:
159fe6060f1SDimitry Andric         if (Def0)
160fe6060f1SDimitry Andric           return false;
161fe6060f1SDimitry Andric         Def0 = &I;
162fe6060f1SDimitry Andric         Init |= I.getOperand(1).getImm() & 0xffffffff;
163fe6060f1SDimitry Andric         break;
164fe6060f1SDimitry Andric       case AMDGPU::sub1:
165fe6060f1SDimitry Andric         if (Def1)
166fe6060f1SDimitry Andric           return false;
167fe6060f1SDimitry Andric         Def1 = &I;
168fe6060f1SDimitry Andric         Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
169fe6060f1SDimitry Andric         break;
170fe6060f1SDimitry Andric       }
171*349cc55cSDimitry Andric       break;
172*349cc55cSDimitry Andric     }
173fe6060f1SDimitry Andric   }
174fe6060f1SDimitry Andric 
175*349cc55cSDimitry Andric   // For AGPR reg, check if live intervals need to be updated.
176*349cc55cSDimitry Andric   if (IsAGPRDst) {
177*349cc55cSDimitry Andric     if (Changed) {
178*349cc55cSDimitry Andric       for (Register RegToUpdate : ModifiedRegs) {
179*349cc55cSDimitry Andric         LIS->removeInterval(RegToUpdate);
180*349cc55cSDimitry Andric         LIS->createAndComputeVirtRegInterval(RegToUpdate);
181*349cc55cSDimitry Andric       }
182*349cc55cSDimitry Andric     }
183*349cc55cSDimitry Andric 
184*349cc55cSDimitry Andric     return Changed;
185*349cc55cSDimitry Andric   }
186*349cc55cSDimitry Andric 
187*349cc55cSDimitry Andric   // For SGPR reg, check if we can combine instructions.
188fe6060f1SDimitry Andric   if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
189*349cc55cSDimitry Andric     return Changed;
190fe6060f1SDimitry Andric 
191fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
192fe6060f1SDimitry Andric                     << "    =>\n");
193fe6060f1SDimitry Andric 
194fe6060f1SDimitry Andric   if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
195fe6060f1SDimitry Andric                                 LIS->getInstructionIndex(*Def0)))
196fe6060f1SDimitry Andric     std::swap(Def0, Def1);
197fe6060f1SDimitry Andric 
198fe6060f1SDimitry Andric   LIS->RemoveMachineInstrFromMaps(*Def0);
199fe6060f1SDimitry Andric   LIS->RemoveMachineInstrFromMaps(*Def1);
200fe6060f1SDimitry Andric   auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
201fe6060f1SDimitry Andric                       TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
202fe6060f1SDimitry Andric                   .addImm(Init);
203fe6060f1SDimitry Andric 
204fe6060f1SDimitry Andric   Def0->eraseFromParent();
205fe6060f1SDimitry Andric   Def1->eraseFromParent();
206fe6060f1SDimitry Andric   LIS->InsertMachineInstrInMaps(*NewI);
207fe6060f1SDimitry Andric   LIS->removeInterval(Reg);
208fe6060f1SDimitry Andric   LIS->createAndComputeVirtRegInterval(Reg);
209fe6060f1SDimitry Andric 
210fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  " << *NewI);
211fe6060f1SDimitry Andric 
212fe6060f1SDimitry Andric   return true;
213fe6060f1SDimitry Andric }
214fe6060f1SDimitry Andric 
215fe6060f1SDimitry Andric bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
216fe6060f1SDimitry Andric   if (skipFunction(MF.getFunction()))
217fe6060f1SDimitry Andric     return false;
218fe6060f1SDimitry Andric 
219fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
220fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
221fe6060f1SDimitry Andric   MRI = &MF.getRegInfo();
222fe6060f1SDimitry Andric   LIS = &getAnalysis<LiveIntervals>();
223*349cc55cSDimitry Andric   TRI = ST.getRegisterInfo();
224fe6060f1SDimitry Andric 
225fe6060f1SDimitry Andric   bool Changed = false;
226fe6060f1SDimitry Andric 
227fe6060f1SDimitry Andric   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
228fe6060f1SDimitry Andric     Register Reg = Register::index2VirtReg(I);
229fe6060f1SDimitry Andric     if (!LIS->hasInterval(Reg))
230fe6060f1SDimitry Andric       continue;
231fe6060f1SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
232*349cc55cSDimitry Andric     if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
233*349cc55cSDimitry Andric         (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
234fe6060f1SDimitry Andric       continue;
235*349cc55cSDimitry Andric 
236fe6060f1SDimitry Andric     Changed |= processReg(Reg);
237fe6060f1SDimitry Andric   }
238fe6060f1SDimitry Andric 
239fe6060f1SDimitry Andric   return Changed;
240fe6060f1SDimitry Andric }
241