1fe6060f1SDimitry Andric //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file 10349cc55cSDimitry Andric /// This pass combines split register tuple initialization into a single pseudo: 11fe6060f1SDimitry Andric /// 12fe6060f1SDimitry Andric /// undef %0.sub1:sreg_64 = S_MOV_B32 1 13fe6060f1SDimitry Andric /// %0.sub0:sreg_64 = S_MOV_B32 2 14fe6060f1SDimitry Andric /// => 15fe6060f1SDimitry Andric /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 16fe6060f1SDimitry Andric /// 17fe6060f1SDimitry Andric /// This is to allow rematerialization of a value instead of spilling. It is 18fe6060f1SDimitry Andric /// supposed to be done after register coalescer to allow it to do its job and 19fe6060f1SDimitry Andric /// before actual register allocation to allow rematerialization. 20fe6060f1SDimitry Andric /// 21fe6060f1SDimitry Andric /// Right now the pass only handles 64 bit SGPRs with immediate initializers, 22fe6060f1SDimitry Andric /// although the same shall be possible with other register classes and 23fe6060f1SDimitry Andric /// instructions if necessary. 24fe6060f1SDimitry Andric /// 25fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 26fe6060f1SDimitry Andric 27fe6060f1SDimitry Andric #include "AMDGPU.h" 28fe6060f1SDimitry Andric #include "GCNSubtarget.h" 29fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 30fe6060f1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 31fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 32fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric using namespace llvm; 35fe6060f1SDimitry Andric 36fe6060f1SDimitry Andric #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" 37fe6060f1SDimitry Andric 38fe6060f1SDimitry Andric namespace { 39fe6060f1SDimitry Andric 40fe6060f1SDimitry Andric class GCNPreRAOptimizations : public MachineFunctionPass { 41fe6060f1SDimitry Andric private: 42fe6060f1SDimitry Andric const SIInstrInfo *TII; 43349cc55cSDimitry Andric const SIRegisterInfo *TRI; 44fe6060f1SDimitry Andric MachineRegisterInfo *MRI; 45fe6060f1SDimitry Andric LiveIntervals *LIS; 46fe6060f1SDimitry Andric 47fe6060f1SDimitry Andric bool processReg(Register Reg); 48fe6060f1SDimitry Andric 49fe6060f1SDimitry Andric public: 50fe6060f1SDimitry Andric static char ID; 51fe6060f1SDimitry Andric 52fe6060f1SDimitry Andric GCNPreRAOptimizations() : MachineFunctionPass(ID) { 53fe6060f1SDimitry Andric initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); 54fe6060f1SDimitry Andric } 55fe6060f1SDimitry Andric 56fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 57fe6060f1SDimitry Andric 58fe6060f1SDimitry Andric StringRef getPassName() const override { 59fe6060f1SDimitry Andric return "AMDGPU Pre-RA optimizations"; 60fe6060f1SDimitry Andric } 61fe6060f1SDimitry Andric 62fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 63*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>(); 64fe6060f1SDimitry Andric AU.setPreservesAll(); 65fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 66fe6060f1SDimitry Andric } 67fe6060f1SDimitry Andric }; 68fe6060f1SDimitry Andric 69fe6060f1SDimitry Andric } // End anonymous namespace. 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, 72fe6060f1SDimitry Andric "AMDGPU Pre-RA optimizations", false, false) 73*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 74fe6060f1SDimitry Andric INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", 75fe6060f1SDimitry Andric false, false) 76fe6060f1SDimitry Andric 77fe6060f1SDimitry Andric char GCNPreRAOptimizations::ID = 0; 78fe6060f1SDimitry Andric 79fe6060f1SDimitry Andric char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; 80fe6060f1SDimitry Andric 81fe6060f1SDimitry Andric FunctionPass *llvm::createGCNPreRAOptimizationsPass() { 82fe6060f1SDimitry Andric return new GCNPreRAOptimizations(); 83fe6060f1SDimitry Andric } 84fe6060f1SDimitry Andric 85fe6060f1SDimitry Andric bool GCNPreRAOptimizations::processReg(Register Reg) { 86fe6060f1SDimitry Andric MachineInstr *Def0 = nullptr; 87fe6060f1SDimitry Andric MachineInstr *Def1 = nullptr; 88fe6060f1SDimitry Andric uint64_t Init = 0; 89349cc55cSDimitry Andric bool Changed = false; 90349cc55cSDimitry Andric SmallSet<Register, 32> ModifiedRegs; 91349cc55cSDimitry Andric bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric for (MachineInstr &I : MRI->def_instructions(Reg)) { 94349cc55cSDimitry Andric switch (I.getOpcode()) { 95349cc55cSDimitry Andric default: 96349cc55cSDimitry Andric return false; 97349cc55cSDimitry Andric case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 98349cc55cSDimitry Andric break; 99349cc55cSDimitry Andric case AMDGPU::COPY: { 100349cc55cSDimitry Andric // Some subtargets cannot do an AGPR to AGPR copy directly, and need an 101349cc55cSDimitry Andric // intermdiate temporary VGPR register. Try to find the defining 102349cc55cSDimitry Andric // accvgpr_write to avoid temporary registers. 103349cc55cSDimitry Andric 104349cc55cSDimitry Andric if (!IsAGPRDst) 105349cc55cSDimitry Andric return false; 106349cc55cSDimitry Andric 107349cc55cSDimitry Andric Register SrcReg = I.getOperand(1).getReg(); 108349cc55cSDimitry Andric 109349cc55cSDimitry Andric if (!SrcReg.isVirtual()) 110349cc55cSDimitry Andric break; 111349cc55cSDimitry Andric 112349cc55cSDimitry Andric // Check if source of copy is from another AGPR. 113349cc55cSDimitry Andric bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); 114349cc55cSDimitry Andric if (!IsAGPRSrc) 115349cc55cSDimitry Andric break; 116349cc55cSDimitry Andric 117349cc55cSDimitry Andric // def_instructions() does not look at subregs so it may give us a 118349cc55cSDimitry Andric // different instruction that defines the same vreg but different subreg 119349cc55cSDimitry Andric // so we have to manually check subreg. 120349cc55cSDimitry Andric Register SrcSubReg = I.getOperand(1).getSubReg(); 121349cc55cSDimitry Andric for (auto &Def : MRI->def_instructions(SrcReg)) { 122349cc55cSDimitry Andric if (SrcSubReg != Def.getOperand(0).getSubReg()) 123349cc55cSDimitry Andric continue; 124349cc55cSDimitry Andric 125349cc55cSDimitry Andric if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 126349cc55cSDimitry Andric MachineOperand DefSrcMO = Def.getOperand(1); 127349cc55cSDimitry Andric 128349cc55cSDimitry Andric // Immediates are not an issue and can be propagated in 129349cc55cSDimitry Andric // postrapseudos pass. Only handle cases where defining 130349cc55cSDimitry Andric // accvgpr_write source is a vreg. 131349cc55cSDimitry Andric if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { 132349cc55cSDimitry Andric // Propagate source reg of accvgpr write to this copy instruction 133349cc55cSDimitry Andric I.getOperand(1).setReg(DefSrcMO.getReg()); 134349cc55cSDimitry Andric I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); 135349cc55cSDimitry Andric 136349cc55cSDimitry Andric // Reg uses were changed, collect unique set of registers to update 137349cc55cSDimitry Andric // live intervals at the end. 138349cc55cSDimitry Andric ModifiedRegs.insert(DefSrcMO.getReg()); 139349cc55cSDimitry Andric ModifiedRegs.insert(SrcReg); 140349cc55cSDimitry Andric 141349cc55cSDimitry Andric Changed = true; 142349cc55cSDimitry Andric } 143349cc55cSDimitry Andric 144349cc55cSDimitry Andric // Found the defining accvgpr_write, stop looking any further. 145349cc55cSDimitry Andric break; 146349cc55cSDimitry Andric } 147349cc55cSDimitry Andric } 148349cc55cSDimitry Andric break; 149349cc55cSDimitry Andric } 150349cc55cSDimitry Andric case AMDGPU::S_MOV_B32: 151349cc55cSDimitry Andric if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || 152349cc55cSDimitry Andric I.getNumOperands() != 2) 153fe6060f1SDimitry Andric return false; 154fe6060f1SDimitry Andric 155fe6060f1SDimitry Andric switch (I.getOperand(0).getSubReg()) { 156fe6060f1SDimitry Andric default: 157fe6060f1SDimitry Andric return false; 158fe6060f1SDimitry Andric case AMDGPU::sub0: 159fe6060f1SDimitry Andric if (Def0) 160fe6060f1SDimitry Andric return false; 161fe6060f1SDimitry Andric Def0 = &I; 162fe6060f1SDimitry Andric Init |= I.getOperand(1).getImm() & 0xffffffff; 163fe6060f1SDimitry Andric break; 164fe6060f1SDimitry Andric case AMDGPU::sub1: 165fe6060f1SDimitry Andric if (Def1) 166fe6060f1SDimitry Andric return false; 167fe6060f1SDimitry Andric Def1 = &I; 168fe6060f1SDimitry Andric Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; 169fe6060f1SDimitry Andric break; 170fe6060f1SDimitry Andric } 171349cc55cSDimitry Andric break; 172349cc55cSDimitry Andric } 173fe6060f1SDimitry Andric } 174fe6060f1SDimitry Andric 175349cc55cSDimitry Andric // For AGPR reg, check if live intervals need to be updated. 176349cc55cSDimitry Andric if (IsAGPRDst) { 177349cc55cSDimitry Andric if (Changed) { 178349cc55cSDimitry Andric for (Register RegToUpdate : ModifiedRegs) { 179349cc55cSDimitry Andric LIS->removeInterval(RegToUpdate); 180349cc55cSDimitry Andric LIS->createAndComputeVirtRegInterval(RegToUpdate); 181349cc55cSDimitry Andric } 182349cc55cSDimitry Andric } 183349cc55cSDimitry Andric 184349cc55cSDimitry Andric return Changed; 185349cc55cSDimitry Andric } 186349cc55cSDimitry Andric 187349cc55cSDimitry Andric // For SGPR reg, check if we can combine instructions. 188fe6060f1SDimitry Andric if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) 189349cc55cSDimitry Andric return Changed; 190fe6060f1SDimitry Andric 191fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 192fe6060f1SDimitry Andric << " =>\n"); 193fe6060f1SDimitry Andric 194fe6060f1SDimitry Andric if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), 195fe6060f1SDimitry Andric LIS->getInstructionIndex(*Def0))) 196fe6060f1SDimitry Andric std::swap(Def0, Def1); 197fe6060f1SDimitry Andric 198fe6060f1SDimitry Andric LIS->RemoveMachineInstrFromMaps(*Def0); 199fe6060f1SDimitry Andric LIS->RemoveMachineInstrFromMaps(*Def1); 200fe6060f1SDimitry Andric auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), 201fe6060f1SDimitry Andric TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) 202fe6060f1SDimitry Andric .addImm(Init); 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric Def0->eraseFromParent(); 205fe6060f1SDimitry Andric Def1->eraseFromParent(); 206fe6060f1SDimitry Andric LIS->InsertMachineInstrInMaps(*NewI); 207fe6060f1SDimitry Andric LIS->removeInterval(Reg); 208fe6060f1SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 209fe6060f1SDimitry Andric 210fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " " << *NewI); 211fe6060f1SDimitry Andric 212fe6060f1SDimitry Andric return true; 213fe6060f1SDimitry Andric } 214fe6060f1SDimitry Andric 215fe6060f1SDimitry Andric bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { 216fe6060f1SDimitry Andric if (skipFunction(MF.getFunction())) 217fe6060f1SDimitry Andric return false; 218fe6060f1SDimitry Andric 219fe6060f1SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 220fe6060f1SDimitry Andric TII = ST.getInstrInfo(); 221fe6060f1SDimitry Andric MRI = &MF.getRegInfo(); 222*0fca6ea1SDimitry Andric LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 223349cc55cSDimitry Andric TRI = ST.getRegisterInfo(); 224fe6060f1SDimitry Andric 225fe6060f1SDimitry Andric bool Changed = false; 226fe6060f1SDimitry Andric 227fe6060f1SDimitry Andric for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 228fe6060f1SDimitry Andric Register Reg = Register::index2VirtReg(I); 229fe6060f1SDimitry Andric if (!LIS->hasInterval(Reg)) 230fe6060f1SDimitry Andric continue; 231fe6060f1SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(Reg); 232349cc55cSDimitry Andric if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && 233349cc55cSDimitry Andric (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) 234fe6060f1SDimitry Andric continue; 235349cc55cSDimitry Andric 236fe6060f1SDimitry Andric Changed |= processReg(Reg); 237fe6060f1SDimitry Andric } 238fe6060f1SDimitry Andric 239fe6060f1SDimitry Andric return Changed; 240fe6060f1SDimitry Andric } 241