10b57cec5SDimitry Andric //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// SI implementation of the TargetRegisterInfo class. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 14e8d8bef9SDimitry Andric #include "AMDGPU.h" 150b57cec5SDimitry Andric #include "AMDGPURegisterBankInfo.h" 16e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 170b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUInstPrinter.h" 180b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h" 20bdd1243dSDimitry Andric #include "SIRegisterInfo.h" 210b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 225f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 280b57cec5SDimitry Andric 295ffd83dbSDimitry Andric #define GET_REGINFO_TARGET_DESC 305ffd83dbSDimitry Andric #include "AMDGPUGenRegisterInfo.inc" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric static cl::opt<bool> EnableSpillSGPRToVGPR( 330b57cec5SDimitry Andric "amdgpu-spill-sgpr-to-vgpr", 3406c3fb27SDimitry Andric cl::desc("Enable spilling SGPRs to VGPRs"), 350b57cec5SDimitry Andric cl::ReallyHidden, 360b57cec5SDimitry Andric cl::init(true)); 370b57cec5SDimitry Andric 385ffd83dbSDimitry Andric std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts; 39e8d8bef9SDimitry Andric std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable; 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric // Map numbers of DWORDs to indexes in SubRegFromChannelTable. 42e8d8bef9SDimitry Andric // Valid indexes are shifted 1, such that a 0 mapping means unsupported. 43e8d8bef9SDimitry Andric // e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8, 44e8d8bef9SDimitry Andric // meaning index 7 in SubRegFromChannelTable. 45e8d8bef9SDimitry Andric static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = { 46e8d8bef9SDimitry Andric 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; 470b57cec5SDimitry Andric 48fe6060f1SDimitry Andric namespace llvm { 49fe6060f1SDimitry Andric 50fe6060f1SDimitry Andric // A temporary struct to spill SGPRs. 51fe6060f1SDimitry Andric // This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits 52fe6060f1SDimitry Andric // just v_writelane and v_readlane. 53fe6060f1SDimitry Andric // 54fe6060f1SDimitry Andric // When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR 55fe6060f1SDimitry Andric // is saved to scratch (or the other way around for loads). 56fe6060f1SDimitry Andric // For this, a VGPR is required where the needed lanes can be clobbered. The 57fe6060f1SDimitry Andric // RegScavenger can provide a VGPR where currently active lanes can be 58fe6060f1SDimitry Andric // clobbered, but we still need to save inactive lanes. 59fe6060f1SDimitry Andric // The high-level steps are: 60fe6060f1SDimitry Andric // - Try to scavenge SGPR(s) to save exec 61fe6060f1SDimitry Andric // - Try to scavenge VGPR 62fe6060f1SDimitry Andric // - Save needed, all or inactive lanes of a TmpVGPR 63fe6060f1SDimitry Andric // - Spill/Restore SGPRs using TmpVGPR 64fe6060f1SDimitry Andric // - Restore TmpVGPR 65fe6060f1SDimitry Andric // 66fe6060f1SDimitry Andric // To save all lanes of TmpVGPR, exec needs to be saved and modified. If we 67fe6060f1SDimitry Andric // cannot scavenge temporary SGPRs to save exec, we use the following code: 68fe6060f1SDimitry Andric // buffer_store_dword TmpVGPR ; only if active lanes need to be saved 69fe6060f1SDimitry Andric // s_not exec, exec 70fe6060f1SDimitry Andric // buffer_store_dword TmpVGPR ; save inactive lanes 71fe6060f1SDimitry Andric // s_not exec, exec 72fe6060f1SDimitry Andric struct SGPRSpillBuilder { 73fe6060f1SDimitry Andric struct PerVGPRData { 74fe6060f1SDimitry Andric unsigned PerVGPR; 75fe6060f1SDimitry Andric unsigned NumVGPRs; 76fe6060f1SDimitry Andric int64_t VGPRLanes; 77fe6060f1SDimitry Andric }; 78fe6060f1SDimitry Andric 79fe6060f1SDimitry Andric // The SGPR to save 80fe6060f1SDimitry Andric Register SuperReg; 81fe6060f1SDimitry Andric MachineBasicBlock::iterator MI; 82fe6060f1SDimitry Andric ArrayRef<int16_t> SplitParts; 83fe6060f1SDimitry Andric unsigned NumSubRegs; 84fe6060f1SDimitry Andric bool IsKill; 85fe6060f1SDimitry Andric const DebugLoc &DL; 86fe6060f1SDimitry Andric 87fe6060f1SDimitry Andric /* When spilling to stack */ 88fe6060f1SDimitry Andric // The SGPRs are written into this VGPR, which is then written to scratch 89fe6060f1SDimitry Andric // (or vice versa for loads). 90fe6060f1SDimitry Andric Register TmpVGPR = AMDGPU::NoRegister; 91fe6060f1SDimitry Andric // Temporary spill slot to save TmpVGPR to. 92fe6060f1SDimitry Andric int TmpVGPRIndex = 0; 93fe6060f1SDimitry Andric // If TmpVGPR is live before the spill or if it is scavenged. 94fe6060f1SDimitry Andric bool TmpVGPRLive = false; 95fe6060f1SDimitry Andric // Scavenged SGPR to save EXEC. 96fe6060f1SDimitry Andric Register SavedExecReg = AMDGPU::NoRegister; 97fe6060f1SDimitry Andric // Stack index to write the SGPRs to. 98fe6060f1SDimitry Andric int Index; 99fe6060f1SDimitry Andric unsigned EltSize = 4; 100fe6060f1SDimitry Andric 101fe6060f1SDimitry Andric RegScavenger *RS; 102349cc55cSDimitry Andric MachineBasicBlock *MBB; 103fe6060f1SDimitry Andric MachineFunction &MF; 104fe6060f1SDimitry Andric SIMachineFunctionInfo &MFI; 105fe6060f1SDimitry Andric const SIInstrInfo &TII; 106fe6060f1SDimitry Andric const SIRegisterInfo &TRI; 107fe6060f1SDimitry Andric bool IsWave32; 108fe6060f1SDimitry Andric Register ExecReg; 109fe6060f1SDimitry Andric unsigned MovOpc; 110fe6060f1SDimitry Andric unsigned NotOpc; 111fe6060f1SDimitry Andric 112fe6060f1SDimitry Andric SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, 113fe6060f1SDimitry Andric bool IsWave32, MachineBasicBlock::iterator MI, int Index, 114fe6060f1SDimitry Andric RegScavenger *RS) 115349cc55cSDimitry Andric : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(), 116349cc55cSDimitry Andric MI->getOperand(0).isKill(), Index, RS) {} 117349cc55cSDimitry Andric 118349cc55cSDimitry Andric SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, 119349cc55cSDimitry Andric bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, 120349cc55cSDimitry Andric bool IsKill, int Index, RegScavenger *RS) 121349cc55cSDimitry Andric : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()), 122349cc55cSDimitry Andric Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()), 123fe6060f1SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), 124fe6060f1SDimitry Andric IsWave32(IsWave32) { 125bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg); 126fe6060f1SDimitry Andric SplitParts = TRI.getRegSplitParts(RC, EltSize); 127fe6060f1SDimitry Andric NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 128fe6060f1SDimitry Andric 129fe6060f1SDimitry Andric if (IsWave32) { 130fe6060f1SDimitry Andric ExecReg = AMDGPU::EXEC_LO; 131fe6060f1SDimitry Andric MovOpc = AMDGPU::S_MOV_B32; 132fe6060f1SDimitry Andric NotOpc = AMDGPU::S_NOT_B32; 133fe6060f1SDimitry Andric } else { 134fe6060f1SDimitry Andric ExecReg = AMDGPU::EXEC; 135fe6060f1SDimitry Andric MovOpc = AMDGPU::S_MOV_B64; 136fe6060f1SDimitry Andric NotOpc = AMDGPU::S_NOT_B64; 137fe6060f1SDimitry Andric } 138fe6060f1SDimitry Andric 139fe6060f1SDimitry Andric assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 140fe6060f1SDimitry Andric assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && 141fe6060f1SDimitry Andric SuperReg != AMDGPU::EXEC && "exec should never spill"); 142fe6060f1SDimitry Andric } 143fe6060f1SDimitry Andric 144fe6060f1SDimitry Andric PerVGPRData getPerVGPRData() { 145fe6060f1SDimitry Andric PerVGPRData Data; 146fe6060f1SDimitry Andric Data.PerVGPR = IsWave32 ? 32 : 64; 147fe6060f1SDimitry Andric Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; 148fe6060f1SDimitry Andric Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; 149fe6060f1SDimitry Andric return Data; 150fe6060f1SDimitry Andric } 151fe6060f1SDimitry Andric 152fe6060f1SDimitry Andric // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is 153fe6060f1SDimitry Andric // free. 154fe6060f1SDimitry Andric // Writes these instructions if an SGPR can be scavenged: 155fe6060f1SDimitry Andric // s_mov_b64 s[6:7], exec ; Save exec 156fe6060f1SDimitry Andric // s_mov_b64 exec, 3 ; Wanted lanemask 157fe6060f1SDimitry Andric // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot 158fe6060f1SDimitry Andric // 159fe6060f1SDimitry Andric // Writes these instructions if no SGPR can be scavenged: 160fe6060f1SDimitry Andric // buffer_store_dword v0 ; Only if no free VGPR was found 161fe6060f1SDimitry Andric // s_not_b64 exec, exec 162fe6060f1SDimitry Andric // buffer_store_dword v0 ; Save inactive lanes 163fe6060f1SDimitry Andric // ; exec stays inverted, it is flipped back in 164fe6060f1SDimitry Andric // ; restore. 165fe6060f1SDimitry Andric void prepare() { 166fe6060f1SDimitry Andric // Scavenged temporary VGPR to use. It must be scavenged once for any number 167fe6060f1SDimitry Andric // of spilled subregs. 168fe6060f1SDimitry Andric // FIXME: The liveness analysis is limited and does not tell if a register 169fe6060f1SDimitry Andric // is in use in lanes that are currently inactive. We can never be sure if 170fe6060f1SDimitry Andric // a register as actually in use in another lane, so we need to save all 171fe6060f1SDimitry Andric // used lanes of the chosen VGPR. 172fe6060f1SDimitry Andric assert(RS && "Cannot spill SGPR to memory without RegScavenger"); 17306c3fb27SDimitry Andric TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 17406c3fb27SDimitry Andric 0, false); 175fe6060f1SDimitry Andric 176fe6060f1SDimitry Andric // Reserve temporary stack slot 177fe6060f1SDimitry Andric TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); 178fe6060f1SDimitry Andric if (TmpVGPR) { 179fe6060f1SDimitry Andric // Found a register that is dead in the currently active lanes, we only 180fe6060f1SDimitry Andric // need to spill inactive lanes. 181fe6060f1SDimitry Andric TmpVGPRLive = false; 182fe6060f1SDimitry Andric } else { 183fe6060f1SDimitry Andric // Pick v0 because it doesn't make a difference. 184fe6060f1SDimitry Andric TmpVGPR = AMDGPU::VGPR0; 185fe6060f1SDimitry Andric TmpVGPRLive = true; 186fe6060f1SDimitry Andric } 187fe6060f1SDimitry Andric 18881ad6265SDimitry Andric if (TmpVGPRLive) { 18981ad6265SDimitry Andric // We need to inform the scavenger that this index is already in use until 19081ad6265SDimitry Andric // we're done with the custom emergency spill. 19181ad6265SDimitry Andric RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR); 19281ad6265SDimitry Andric } 19381ad6265SDimitry Andric 19481ad6265SDimitry Andric // We may end up recursively calling the scavenger, and don't want to re-use 19581ad6265SDimitry Andric // the same register. 19681ad6265SDimitry Andric RS->setRegUsed(TmpVGPR); 19781ad6265SDimitry Andric 198fe6060f1SDimitry Andric // Try to scavenge SGPRs to save exec 199fe6060f1SDimitry Andric assert(!SavedExecReg && "Exec is already saved, refuse to save again"); 200fe6060f1SDimitry Andric const TargetRegisterClass &RC = 201fe6060f1SDimitry Andric IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; 202fe6060f1SDimitry Andric RS->setRegUsed(SuperReg); 20306c3fb27SDimitry Andric SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false); 204fe6060f1SDimitry Andric 205fe6060f1SDimitry Andric int64_t VGPRLanes = getPerVGPRData().VGPRLanes; 206fe6060f1SDimitry Andric 207fe6060f1SDimitry Andric if (SavedExecReg) { 208fe6060f1SDimitry Andric RS->setRegUsed(SavedExecReg); 209fe6060f1SDimitry Andric // Set exec to needed lanes 210349cc55cSDimitry Andric BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); 211349cc55cSDimitry Andric auto I = 212349cc55cSDimitry Andric BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); 213fe6060f1SDimitry Andric if (!TmpVGPRLive) 214fe6060f1SDimitry Andric I.addReg(TmpVGPR, RegState::ImplicitDefine); 215fe6060f1SDimitry Andric // Spill needed lanes 216fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 217fe6060f1SDimitry Andric } else { 21881ad6265SDimitry Andric // The modify and restore of exec clobber SCC, which we would have to save 21981ad6265SDimitry Andric // and restore. FIXME: We probably would need to reserve a register for 22081ad6265SDimitry Andric // this. 22181ad6265SDimitry Andric if (RS->isRegUsed(AMDGPU::SCC)) 22281ad6265SDimitry Andric MI->emitError("unhandled SGPR spill to memory"); 22381ad6265SDimitry Andric 224fe6060f1SDimitry Andric // Spill active lanes 225fe6060f1SDimitry Andric if (TmpVGPRLive) 226fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, 227fe6060f1SDimitry Andric /*IsKill*/ false); 228fe6060f1SDimitry Andric // Spill inactive lanes 229349cc55cSDimitry Andric auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 230fe6060f1SDimitry Andric if (!TmpVGPRLive) 231fe6060f1SDimitry Andric I.addReg(TmpVGPR, RegState::ImplicitDefine); 232bdd1243dSDimitry Andric I->getOperand(2).setIsDead(); // Mark SCC as dead. 233fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 234fe6060f1SDimitry Andric } 235fe6060f1SDimitry Andric } 236fe6060f1SDimitry Andric 237fe6060f1SDimitry Andric // Writes these instructions if an SGPR can be scavenged: 238fe6060f1SDimitry Andric // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot 239fe6060f1SDimitry Andric // s_waitcnt vmcnt(0) ; If a free VGPR was found 240fe6060f1SDimitry Andric // s_mov_b64 exec, s[6:7] ; Save exec 241fe6060f1SDimitry Andric // 242fe6060f1SDimitry Andric // Writes these instructions if no SGPR can be scavenged: 243fe6060f1SDimitry Andric // buffer_load_dword v0 ; Restore inactive lanes 244fe6060f1SDimitry Andric // s_waitcnt vmcnt(0) ; If a free VGPR was found 245fe6060f1SDimitry Andric // s_not_b64 exec, exec 246fe6060f1SDimitry Andric // buffer_load_dword v0 ; Only if no free VGPR was found 247fe6060f1SDimitry Andric void restore() { 248fe6060f1SDimitry Andric if (SavedExecReg) { 249fe6060f1SDimitry Andric // Restore used lanes 250fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 251fe6060f1SDimitry Andric /*IsKill*/ false); 252fe6060f1SDimitry Andric // Restore exec 253349cc55cSDimitry Andric auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg) 254fe6060f1SDimitry Andric .addReg(SavedExecReg, RegState::Kill); 255fe6060f1SDimitry Andric // Add an implicit use of the load so it is not dead. 256fe6060f1SDimitry Andric // FIXME This inserts an unnecessary waitcnt 257fe6060f1SDimitry Andric if (!TmpVGPRLive) { 258fe6060f1SDimitry Andric I.addReg(TmpVGPR, RegState::ImplicitKill); 259fe6060f1SDimitry Andric } 260fe6060f1SDimitry Andric } else { 261fe6060f1SDimitry Andric // Restore inactive lanes 262fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 263fe6060f1SDimitry Andric /*IsKill*/ false); 264349cc55cSDimitry Andric auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 2650eae32dcSDimitry Andric if (!TmpVGPRLive) 266fe6060f1SDimitry Andric I.addReg(TmpVGPR, RegState::ImplicitKill); 267bdd1243dSDimitry Andric I->getOperand(2).setIsDead(); // Mark SCC as dead. 2680eae32dcSDimitry Andric 269fe6060f1SDimitry Andric // Restore active lanes 270fe6060f1SDimitry Andric if (TmpVGPRLive) 271fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); 272fe6060f1SDimitry Andric } 27381ad6265SDimitry Andric 27481ad6265SDimitry Andric // Inform the scavenger where we're releasing our custom scavenged register. 27581ad6265SDimitry Andric if (TmpVGPRLive) { 27681ad6265SDimitry Andric MachineBasicBlock::iterator RestorePt = std::prev(MI); 27781ad6265SDimitry Andric RS->assignRegToScavengingIndex(TmpVGPRIndex, TmpVGPR, &*RestorePt); 27881ad6265SDimitry Andric } 279fe6060f1SDimitry Andric } 280fe6060f1SDimitry Andric 281fe6060f1SDimitry Andric // Write TmpVGPR to memory or read TmpVGPR from memory. 282fe6060f1SDimitry Andric // Either using a single buffer_load/store if exec is set to the needed mask 283fe6060f1SDimitry Andric // or using 284fe6060f1SDimitry Andric // buffer_load 285fe6060f1SDimitry Andric // s_not exec, exec 286fe6060f1SDimitry Andric // buffer_load 287fe6060f1SDimitry Andric // s_not exec, exec 288fe6060f1SDimitry Andric void readWriteTmpVGPR(unsigned Offset, bool IsLoad) { 289fe6060f1SDimitry Andric if (SavedExecReg) { 290fe6060f1SDimitry Andric // Spill needed lanes 291fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 292fe6060f1SDimitry Andric } else { 29381ad6265SDimitry Andric // The modify and restore of exec clobber SCC, which we would have to save 29481ad6265SDimitry Andric // and restore. FIXME: We probably would need to reserve a register for 29581ad6265SDimitry Andric // this. 29681ad6265SDimitry Andric if (RS->isRegUsed(AMDGPU::SCC)) 29781ad6265SDimitry Andric MI->emitError("unhandled SGPR spill to memory"); 29881ad6265SDimitry Andric 299fe6060f1SDimitry Andric // Spill active lanes 300fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, 301fe6060f1SDimitry Andric /*IsKill*/ false); 302fe6060f1SDimitry Andric // Spill inactive lanes 3030eae32dcSDimitry Andric auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 3040eae32dcSDimitry Andric Not0->getOperand(2).setIsDead(); // Mark SCC as dead. 305fe6060f1SDimitry Andric TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 3060eae32dcSDimitry Andric auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 3070eae32dcSDimitry Andric Not1->getOperand(2).setIsDead(); // Mark SCC as dead. 308fe6060f1SDimitry Andric } 309fe6060f1SDimitry Andric } 310349cc55cSDimitry Andric 311349cc55cSDimitry Andric void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) { 312349cc55cSDimitry Andric assert(MBB->getParent() == &MF); 313349cc55cSDimitry Andric MI = NewMI; 314349cc55cSDimitry Andric MBB = NewMBB; 315349cc55cSDimitry Andric } 316fe6060f1SDimitry Andric }; 317fe6060f1SDimitry Andric 318fe6060f1SDimitry Andric } // namespace llvm 319fe6060f1SDimitry Andric 3205ffd83dbSDimitry Andric SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) 321*0fca6ea1SDimitry Andric : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour(), 322*0fca6ea1SDimitry Andric ST.getAMDGPUDwarfFlavour()), 323*0fca6ea1SDimitry Andric ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { 3240b57cec5SDimitry Andric 3255ffd83dbSDimitry Andric assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && 3265ffd83dbSDimitry Andric getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && 3275ffd83dbSDimitry Andric (getSubRegIndexLaneMask(AMDGPU::lo16) | 3285ffd83dbSDimitry Andric getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == 3295ffd83dbSDimitry Andric getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && 3305ffd83dbSDimitry Andric "getNumCoveredRegs() will not work with generated subreg masks!"); 3310b57cec5SDimitry Andric 3325ffd83dbSDimitry Andric RegPressureIgnoredUnits.resize(getNumRegUnits()); 33306c3fb27SDimitry Andric RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin()); 334647cbc5dSDimitry Andric for (auto Reg : AMDGPU::VGPR_16RegClass) { 335647cbc5dSDimitry Andric if (AMDGPU::isHi(Reg, *this)) 33606c3fb27SDimitry Andric RegPressureIgnoredUnits.set(*regunits(Reg).begin()); 337647cbc5dSDimitry Andric } 3380b57cec5SDimitry Andric 3395ffd83dbSDimitry Andric // HACK: Until this is fully tablegen'd. 3405ffd83dbSDimitry Andric static llvm::once_flag InitializeRegSplitPartsFlag; 3415ffd83dbSDimitry Andric 3425ffd83dbSDimitry Andric static auto InitializeRegSplitPartsOnce = [this]() { 3435ffd83dbSDimitry Andric for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) { 3445ffd83dbSDimitry Andric unsigned Size = getSubRegIdxSize(Idx); 3455ffd83dbSDimitry Andric if (Size & 31) 3460b57cec5SDimitry Andric continue; 3475ffd83dbSDimitry Andric std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1]; 3485ffd83dbSDimitry Andric unsigned Pos = getSubRegIdxOffset(Idx); 3495ffd83dbSDimitry Andric if (Pos % Size) 3500b57cec5SDimitry Andric continue; 3515ffd83dbSDimitry Andric Pos /= Size; 3525ffd83dbSDimitry Andric if (Vec.empty()) { 3535ffd83dbSDimitry Andric unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits. 3545ffd83dbSDimitry Andric Vec.resize(MaxNumParts); 3555ffd83dbSDimitry Andric } 3565ffd83dbSDimitry Andric Vec[Pos] = Idx; 3575ffd83dbSDimitry Andric } 3585ffd83dbSDimitry Andric }; 3595ffd83dbSDimitry Andric 360e8d8bef9SDimitry Andric static llvm::once_flag InitializeSubRegFromChannelTableFlag; 361e8d8bef9SDimitry Andric 362e8d8bef9SDimitry Andric static auto InitializeSubRegFromChannelTableOnce = [this]() { 363e8d8bef9SDimitry Andric for (auto &Row : SubRegFromChannelTable) 364e8d8bef9SDimitry Andric Row.fill(AMDGPU::NoSubRegister); 36581ad6265SDimitry Andric for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { 366*0fca6ea1SDimitry Andric unsigned Width = getSubRegIdxSize(Idx) / 32; 367*0fca6ea1SDimitry Andric unsigned Offset = getSubRegIdxOffset(Idx) / 32; 368e8d8bef9SDimitry Andric assert(Width < SubRegFromChannelTableWidthMap.size()); 369e8d8bef9SDimitry Andric Width = SubRegFromChannelTableWidthMap[Width]; 370e8d8bef9SDimitry Andric if (Width == 0) 371e8d8bef9SDimitry Andric continue; 372e8d8bef9SDimitry Andric unsigned TableIdx = Width - 1; 373e8d8bef9SDimitry Andric assert(TableIdx < SubRegFromChannelTable.size()); 374e8d8bef9SDimitry Andric assert(Offset < SubRegFromChannelTable[TableIdx].size()); 375e8d8bef9SDimitry Andric SubRegFromChannelTable[TableIdx][Offset] = Idx; 376e8d8bef9SDimitry Andric } 377e8d8bef9SDimitry Andric }; 3785ffd83dbSDimitry Andric 3795ffd83dbSDimitry Andric llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); 380e8d8bef9SDimitry Andric llvm::call_once(InitializeSubRegFromChannelTableFlag, 381e8d8bef9SDimitry Andric InitializeSubRegFromChannelTableOnce); 3825ffd83dbSDimitry Andric } 3835ffd83dbSDimitry Andric 3845ffd83dbSDimitry Andric void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, 3855ffd83dbSDimitry Andric MCRegister Reg) const { 38606c3fb27SDimitry Andric for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R) 3875ffd83dbSDimitry Andric Reserved.set(*R); 3885ffd83dbSDimitry Andric } 3895ffd83dbSDimitry Andric 3905ffd83dbSDimitry Andric // Forced to be here by one .inc 3915ffd83dbSDimitry Andric const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( 3925ffd83dbSDimitry Andric const MachineFunction *MF) const { 3935ffd83dbSDimitry Andric CallingConv::ID CC = MF->getFunction().getCallingConv(); 3945ffd83dbSDimitry Andric switch (CC) { 3955ffd83dbSDimitry Andric case CallingConv::C: 3965ffd83dbSDimitry Andric case CallingConv::Fast: 3975ffd83dbSDimitry Andric case CallingConv::Cold: 39881ad6265SDimitry Andric return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList 39981ad6265SDimitry Andric : CSR_AMDGPU_SaveList; 400349cc55cSDimitry Andric case CallingConv::AMDGPU_Gfx: 40181ad6265SDimitry Andric return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList 402349cc55cSDimitry Andric : CSR_AMDGPU_SI_Gfx_SaveList; 4035f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve: 4045f757f3fSDimitry Andric return CSR_AMDGPU_CS_ChainPreserve_SaveList; 4055ffd83dbSDimitry Andric default: { 4065ffd83dbSDimitry Andric // Dummy to not crash RegisterClassInfo. 4075ffd83dbSDimitry Andric static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; 4085ffd83dbSDimitry Andric return &NoCalleeSavedReg; 4095ffd83dbSDimitry Andric } 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric 4135ffd83dbSDimitry Andric const MCPhysReg * 4145ffd83dbSDimitry Andric SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { 4155ffd83dbSDimitry Andric return nullptr; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 4185ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, 4195ffd83dbSDimitry Andric CallingConv::ID CC) const { 4205ffd83dbSDimitry Andric switch (CC) { 4215ffd83dbSDimitry Andric case CallingConv::C: 4225ffd83dbSDimitry Andric case CallingConv::Fast: 4235ffd83dbSDimitry Andric case CallingConv::Cold: 42481ad6265SDimitry Andric return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask 42581ad6265SDimitry Andric : CSR_AMDGPU_RegMask; 426349cc55cSDimitry Andric case CallingConv::AMDGPU_Gfx: 42781ad6265SDimitry Andric return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask 428349cc55cSDimitry Andric : CSR_AMDGPU_SI_Gfx_RegMask; 4295f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_Chain: 4305f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve: 4315f757f3fSDimitry Andric // Calls to these functions never return, so we can pretend everything is 4325f757f3fSDimitry Andric // preserved. 4335f757f3fSDimitry Andric return AMDGPU_AllVGPRs_RegMask; 4345ffd83dbSDimitry Andric default: 4355ffd83dbSDimitry Andric return nullptr; 4365ffd83dbSDimitry Andric } 4375ffd83dbSDimitry Andric } 4385ffd83dbSDimitry Andric 439e8d8bef9SDimitry Andric const uint32_t *SIRegisterInfo::getNoPreservedMask() const { 440e8d8bef9SDimitry Andric return CSR_AMDGPU_NoRegs_RegMask; 441e8d8bef9SDimitry Andric } 442e8d8bef9SDimitry Andric 4435f757f3fSDimitry Andric bool SIRegisterInfo::isChainScratchRegister(Register VGPR) { 4445f757f3fSDimitry Andric return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8; 4455f757f3fSDimitry Andric } 4465f757f3fSDimitry Andric 4474824e7fdSDimitry Andric const TargetRegisterClass * 4484824e7fdSDimitry Andric SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, 4494824e7fdSDimitry Andric const MachineFunction &MF) const { 4504824e7fdSDimitry Andric // FIXME: Should have a helper function like getEquivalentVGPRClass to get the 4514824e7fdSDimitry Andric // equivalent AV class. If used one, the verifier will crash after 4524824e7fdSDimitry Andric // RegBankSelect in the GISel flow. The aligned regclasses are not fully given 4534824e7fdSDimitry Andric // until Instruction selection. 45481ad6265SDimitry Andric if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) { 4554824e7fdSDimitry Andric if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass) 4564824e7fdSDimitry Andric return &AMDGPU::AV_32RegClass; 4574824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass) 4584824e7fdSDimitry Andric return &AMDGPU::AV_64RegClass; 4594824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_64_Align2RegClass || 4604824e7fdSDimitry Andric RC == &AMDGPU::AReg_64_Align2RegClass) 4614824e7fdSDimitry Andric return &AMDGPU::AV_64_Align2RegClass; 4624824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass) 4634824e7fdSDimitry Andric return &AMDGPU::AV_96RegClass; 4644824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_96_Align2RegClass || 4654824e7fdSDimitry Andric RC == &AMDGPU::AReg_96_Align2RegClass) 4664824e7fdSDimitry Andric return &AMDGPU::AV_96_Align2RegClass; 4674824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass) 4684824e7fdSDimitry Andric return &AMDGPU::AV_128RegClass; 4694824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_128_Align2RegClass || 4704824e7fdSDimitry Andric RC == &AMDGPU::AReg_128_Align2RegClass) 4714824e7fdSDimitry Andric return &AMDGPU::AV_128_Align2RegClass; 4724824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass) 4734824e7fdSDimitry Andric return &AMDGPU::AV_160RegClass; 4744824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_160_Align2RegClass || 4754824e7fdSDimitry Andric RC == &AMDGPU::AReg_160_Align2RegClass) 4764824e7fdSDimitry Andric return &AMDGPU::AV_160_Align2RegClass; 4774824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass) 4784824e7fdSDimitry Andric return &AMDGPU::AV_192RegClass; 4794824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_192_Align2RegClass || 4804824e7fdSDimitry Andric RC == &AMDGPU::AReg_192_Align2RegClass) 4814824e7fdSDimitry Andric return &AMDGPU::AV_192_Align2RegClass; 4824824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass) 4834824e7fdSDimitry Andric return &AMDGPU::AV_256RegClass; 4844824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_256_Align2RegClass || 4854824e7fdSDimitry Andric RC == &AMDGPU::AReg_256_Align2RegClass) 4864824e7fdSDimitry Andric return &AMDGPU::AV_256_Align2RegClass; 4874824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass) 4884824e7fdSDimitry Andric return &AMDGPU::AV_512RegClass; 4894824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_512_Align2RegClass || 4904824e7fdSDimitry Andric RC == &AMDGPU::AReg_512_Align2RegClass) 4914824e7fdSDimitry Andric return &AMDGPU::AV_512_Align2RegClass; 4924824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass) 4934824e7fdSDimitry Andric return &AMDGPU::AV_1024RegClass; 4944824e7fdSDimitry Andric if (RC == &AMDGPU::VReg_1024_Align2RegClass || 4954824e7fdSDimitry Andric RC == &AMDGPU::AReg_1024_Align2RegClass) 4964824e7fdSDimitry Andric return &AMDGPU::AV_1024_Align2RegClass; 4974824e7fdSDimitry Andric } 4984824e7fdSDimitry Andric 4994824e7fdSDimitry Andric return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF); 5004824e7fdSDimitry Andric } 5014824e7fdSDimitry Andric 5025ffd83dbSDimitry Andric Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { 50381ad6265SDimitry Andric const SIFrameLowering *TFI = ST.getFrameLowering(); 5045ffd83dbSDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 5055f757f3fSDimitry Andric // During ISel lowering we always reserve the stack pointer in entry and chain 5065ffd83dbSDimitry Andric // functions, but never actually want to reference it when accessing our own 5075ffd83dbSDimitry Andric // frame. If we need a frame pointer we use it, but otherwise we can just use 5085ffd83dbSDimitry Andric // an immediate "0" which we represent by returning NoRegister. 5095f757f3fSDimitry Andric if (FuncInfo->isBottomOfStack()) { 5105ffd83dbSDimitry Andric return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register(); 5115ffd83dbSDimitry Andric } 5125ffd83dbSDimitry Andric return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() 5135ffd83dbSDimitry Andric : FuncInfo->getStackPtrOffsetReg(); 5145ffd83dbSDimitry Andric } 5155ffd83dbSDimitry Andric 5165ffd83dbSDimitry Andric bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const { 5175ffd83dbSDimitry Andric // When we need stack realignment, we can't reference off of the 5185ffd83dbSDimitry Andric // stack pointer, so we reserve a base pointer. 5195ffd83dbSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 520fe6060f1SDimitry Andric return MFI.getNumFixedObjects() && shouldRealignStack(MF); 5215ffd83dbSDimitry Andric } 5225ffd83dbSDimitry Andric 5235ffd83dbSDimitry Andric Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; } 5245ffd83dbSDimitry Andric 5255ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const { 52681ad6265SDimitry Andric return AMDGPU_AllVGPRs_RegMask; 5275ffd83dbSDimitry Andric } 5285ffd83dbSDimitry Andric 529fe6060f1SDimitry Andric const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const { 53081ad6265SDimitry Andric return AMDGPU_AllAGPRs_RegMask; 531fe6060f1SDimitry Andric } 532fe6060f1SDimitry Andric 533fe6060f1SDimitry Andric const uint32_t *SIRegisterInfo::getAllVectorRegMask() const { 53481ad6265SDimitry Andric return AMDGPU_AllVectorRegs_RegMask; 535fe6060f1SDimitry Andric } 536fe6060f1SDimitry Andric 5375ffd83dbSDimitry Andric const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const { 53881ad6265SDimitry Andric return AMDGPU_AllAllocatableSRegs_RegMask; 5395ffd83dbSDimitry Andric } 5405ffd83dbSDimitry Andric 5415ffd83dbSDimitry Andric unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, 5425ffd83dbSDimitry Andric unsigned NumRegs) { 543e8d8bef9SDimitry Andric assert(NumRegs < SubRegFromChannelTableWidthMap.size()); 544e8d8bef9SDimitry Andric unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs]; 545e8d8bef9SDimitry Andric assert(NumRegIndex && "Not implemented"); 546e8d8bef9SDimitry Andric assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size()); 547e8d8bef9SDimitry Andric return SubRegFromChannelTable[NumRegIndex - 1][Channel]; 5485ffd83dbSDimitry Andric } 5495ffd83dbSDimitry Andric 55006c3fb27SDimitry Andric MCRegister 55106c3fb27SDimitry Andric SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF, 55206c3fb27SDimitry Andric const unsigned Align, 55306c3fb27SDimitry Andric const TargetRegisterClass *RC) const { 55406c3fb27SDimitry Andric unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align; 55506c3fb27SDimitry Andric MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 55606c3fb27SDimitry Andric return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC); 55706c3fb27SDimitry Andric } 55806c3fb27SDimitry Andric 5595ffd83dbSDimitry Andric MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( 5600b57cec5SDimitry Andric const MachineFunction &MF) const { 56106c3fb27SDimitry Andric return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 5650b57cec5SDimitry Andric BitVector Reserved(getNumRegs()); 5665ffd83dbSDimitry Andric Reserved.set(AMDGPU::MODE); 5670b57cec5SDimitry Andric 56881ad6265SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 56981ad6265SDimitry Andric 57081ad6265SDimitry Andric // Reserve special purpose registers. 57181ad6265SDimitry Andric // 5720b57cec5SDimitry Andric // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 5730b57cec5SDimitry Andric // this seems likely to result in bugs, so I'm marking them as reserved. 5740b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::EXEC); 5750b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric // M0 has to be reserved so that llvm accepts it as a live-in into a block. 5780b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::M0); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric // Reserve src_vccz, src_execz, src_scc. 5810b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ); 5820b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ); 5830b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC); 5840b57cec5SDimitry Andric 585bdd1243dSDimitry Andric // Reserve the memory aperture registers 5860b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); 5870b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); 5880b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); 5890b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen. 5920b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID); 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric // Reserve xnack_mask registers - support is not implemented in Codegen. 5950b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric // Reserve lds_direct register - support is not implemented in Codegen. 5980b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT); 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric // Reserve Trap Handler registers - support is not implemented in Codegen. 6010b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TBA); 6020b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TMA); 6030b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 6040b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 6050b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 6060b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 6070b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 6080b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 6090b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); 6100b57cec5SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric // Reserve null register - it shall never be allocated 61381ad6265SDimitry Andric reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64); 6140b57cec5SDimitry Andric 61581ad6265SDimitry Andric // Reserve SGPRs. 61681ad6265SDimitry Andric // 6170b57cec5SDimitry Andric unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); 6180b57cec5SDimitry Andric unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 619*0fca6ea1SDimitry Andric for (const TargetRegisterClass *RC : regclasses()) { 620*0fca6ea1SDimitry Andric if (RC->isBaseClass() && isSGPRClass(RC)) { 621*0fca6ea1SDimitry Andric unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32); 622*0fca6ea1SDimitry Andric for (MCPhysReg Reg : *RC) { 623*0fca6ea1SDimitry Andric unsigned Index = getHWRegIndex(Reg); 624*0fca6ea1SDimitry Andric if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs) 625*0fca6ea1SDimitry Andric Reserved.set(Reg); 626*0fca6ea1SDimitry Andric } 627*0fca6ea1SDimitry Andric } 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 63081ad6265SDimitry Andric Register ScratchRSrcReg = MFI->getScratchRSrcReg(); 63181ad6265SDimitry Andric if (ScratchRSrcReg != AMDGPU::NoRegister) { 63281ad6265SDimitry Andric // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we 63381ad6265SDimitry Andric // need to spill. 63481ad6265SDimitry Andric // TODO: May need to reserve a VGPR if doing LDS spilling. 63581ad6265SDimitry Andric reserveRegisterTuples(Reserved, ScratchRSrcReg); 63681ad6265SDimitry Andric } 63781ad6265SDimitry Andric 63806c3fb27SDimitry Andric Register LongBranchReservedReg = MFI->getLongBranchReservedReg(); 63906c3fb27SDimitry Andric if (LongBranchReservedReg) 64006c3fb27SDimitry Andric reserveRegisterTuples(Reserved, LongBranchReservedReg); 64106c3fb27SDimitry Andric 64281ad6265SDimitry Andric // We have to assume the SP is needed in case there are calls in the function, 64381ad6265SDimitry Andric // which is detected after the function is lowered. If we aren't really going 64481ad6265SDimitry Andric // to need SP, don't bother reserving it. 64581ad6265SDimitry Andric MCRegister StackPtrReg = MFI->getStackPtrOffsetReg(); 64681ad6265SDimitry Andric if (StackPtrReg) { 64781ad6265SDimitry Andric reserveRegisterTuples(Reserved, StackPtrReg); 64881ad6265SDimitry Andric assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); 64981ad6265SDimitry Andric } 65081ad6265SDimitry Andric 65181ad6265SDimitry Andric MCRegister FrameReg = MFI->getFrameOffsetReg(); 65281ad6265SDimitry Andric if (FrameReg) { 65381ad6265SDimitry Andric reserveRegisterTuples(Reserved, FrameReg); 65481ad6265SDimitry Andric assert(!isSubRegister(ScratchRSrcReg, FrameReg)); 65581ad6265SDimitry Andric } 65681ad6265SDimitry Andric 65781ad6265SDimitry Andric if (hasBasePointer(MF)) { 65881ad6265SDimitry Andric MCRegister BasePtrReg = getBaseRegister(); 65981ad6265SDimitry Andric reserveRegisterTuples(Reserved, BasePtrReg); 66081ad6265SDimitry Andric assert(!isSubRegister(ScratchRSrcReg, BasePtrReg)); 66181ad6265SDimitry Andric } 66281ad6265SDimitry Andric 66306c3fb27SDimitry Andric // FIXME: Use same reserved register introduced in D149775 66406c3fb27SDimitry Andric // SGPR used to preserve EXEC MASK around WWM spill/copy instructions. 66506c3fb27SDimitry Andric Register ExecCopyReg = MFI->getSGPRForEXECCopy(); 66606c3fb27SDimitry Andric if (ExecCopyReg) 66706c3fb27SDimitry Andric reserveRegisterTuples(Reserved, ExecCopyReg); 66806c3fb27SDimitry Andric 66981ad6265SDimitry Andric // Reserve VGPRs/AGPRs. 67081ad6265SDimitry Andric // 6710b57cec5SDimitry Andric unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); 672349cc55cSDimitry Andric unsigned MaxNumAGPRs = MaxNumVGPRs; 6730b57cec5SDimitry Andric unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 674349cc55cSDimitry Andric 67581ad6265SDimitry Andric // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically, 67681ad6265SDimitry Andric // a wave may have up to 512 total vector registers combining together both 67781ad6265SDimitry Andric // VGPRs and AGPRs. Hence, in an entry function without calls and without 67881ad6265SDimitry Andric // AGPRs used within it, it is possible to use the whole vector register 67981ad6265SDimitry Andric // budget for VGPRs. 68081ad6265SDimitry Andric // 68181ad6265SDimitry Andric // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split 68281ad6265SDimitry Andric // register file accordingly. 68381ad6265SDimitry Andric if (ST.hasGFX90AInsts()) { 684349cc55cSDimitry Andric if (MFI->usesAGPRs(MF)) { 685349cc55cSDimitry Andric MaxNumVGPRs /= 2; 686349cc55cSDimitry Andric MaxNumAGPRs = MaxNumVGPRs; 687349cc55cSDimitry Andric } else { 688349cc55cSDimitry Andric if (MaxNumVGPRs > TotalNumVGPRs) { 689349cc55cSDimitry Andric MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; 690349cc55cSDimitry Andric MaxNumVGPRs = TotalNumVGPRs; 691349cc55cSDimitry Andric } else 692349cc55cSDimitry Andric MaxNumAGPRs = 0; 693349cc55cSDimitry Andric } 694349cc55cSDimitry Andric } 695349cc55cSDimitry Andric 696*0fca6ea1SDimitry Andric for (const TargetRegisterClass *RC : regclasses()) { 697*0fca6ea1SDimitry Andric if (RC->isBaseClass() && isVGPRClass(RC)) { 698*0fca6ea1SDimitry Andric unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32); 699*0fca6ea1SDimitry Andric for (MCPhysReg Reg : *RC) { 700*0fca6ea1SDimitry Andric unsigned Index = getHWRegIndex(Reg); 701*0fca6ea1SDimitry Andric if (Index + NumRegs > MaxNumVGPRs) 702*0fca6ea1SDimitry Andric Reserved.set(Reg); 703*0fca6ea1SDimitry Andric } 704*0fca6ea1SDimitry Andric } 705349cc55cSDimitry Andric } 706349cc55cSDimitry Andric 70706c3fb27SDimitry Andric // Reserve all the AGPRs if there are no instructions to use it. 708*0fca6ea1SDimitry Andric if (!ST.hasMAIInsts()) 709*0fca6ea1SDimitry Andric MaxNumAGPRs = 0; 710*0fca6ea1SDimitry Andric for (const TargetRegisterClass *RC : regclasses()) { 711*0fca6ea1SDimitry Andric if (RC->isBaseClass() && isAGPRClass(RC)) { 712*0fca6ea1SDimitry Andric unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32); 713*0fca6ea1SDimitry Andric for (MCPhysReg Reg : *RC) { 714*0fca6ea1SDimitry Andric unsigned Index = getHWRegIndex(Reg); 715*0fca6ea1SDimitry Andric if (Index + NumRegs > MaxNumAGPRs) 716*0fca6ea1SDimitry Andric Reserved.set(Reg); 717*0fca6ea1SDimitry Andric } 718*0fca6ea1SDimitry Andric } 71906c3fb27SDimitry Andric } 7200b57cec5SDimitry Andric 72181ad6265SDimitry Andric // On GFX908, in order to guarantee copying between AGPRs, we need a scratch 72281ad6265SDimitry Andric // VGPR available at all times. 72381ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) { 72481ad6265SDimitry Andric reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); 7255ffd83dbSDimitry Andric } 7265ffd83dbSDimitry Andric 727bdd1243dSDimitry Andric for (Register Reg : MFI->getWWMReservedRegs()) 7288bcb0991SDimitry Andric reserveRegisterTuples(Reserved, Reg); 729fe6060f1SDimitry Andric 7300b57cec5SDimitry Andric // FIXME: Stop using reserved registers for this. 7310b57cec5SDimitry Andric for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) 7320b57cec5SDimitry Andric reserveRegisterTuples(Reserved, Reg); 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) 7350b57cec5SDimitry Andric reserveRegisterTuples(Reserved, Reg); 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric return Reserved; 7380b57cec5SDimitry Andric } 7390b57cec5SDimitry Andric 74081ad6265SDimitry Andric bool SIRegisterInfo::isAsmClobberable(const MachineFunction &MF, 74181ad6265SDimitry Andric MCRegister PhysReg) const { 74281ad6265SDimitry Andric return !MF.getRegInfo().isReserved(PhysReg); 74381ad6265SDimitry Andric } 74481ad6265SDimitry Andric 745fe6060f1SDimitry Andric bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 7460b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 7475f757f3fSDimitry Andric // On entry or in chain functions, the base address is 0, so it can't possibly 7485f757f3fSDimitry Andric // need any more alignment. 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric // FIXME: Should be able to specify the entry frame alignment per calling 7510b57cec5SDimitry Andric // convention instead. 7525f757f3fSDimitry Andric if (Info->isBottomOfStack()) 7530b57cec5SDimitry Andric return false; 7540b57cec5SDimitry Andric 755fe6060f1SDimitry Andric return TargetRegisterInfo::shouldRealignStack(MF); 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 7590b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); 7600b57cec5SDimitry Andric if (Info->isEntryFunction()) { 7610b57cec5SDimitry Andric const MachineFrameInfo &MFI = Fn.getFrameInfo(); 7620b57cec5SDimitry Andric return MFI.hasStackObjects() || MFI.hasCalls(); 7630b57cec5SDimitry Andric } 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andric // May need scavenger for dealing with callee saved registers. 7660b57cec5SDimitry Andric return true; 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric bool SIRegisterInfo::requiresFrameIndexScavenging( 7700b57cec5SDimitry Andric const MachineFunction &MF) const { 7718bcb0991SDimitry Andric // Do not use frame virtual registers. They used to be used for SGPRs, but 7728bcb0991SDimitry Andric // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the 7738bcb0991SDimitry Andric // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a 7748bcb0991SDimitry Andric // spill. 7758bcb0991SDimitry Andric return false; 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( 7790b57cec5SDimitry Andric const MachineFunction &MF) const { 7800b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 7818bcb0991SDimitry Andric return MFI.hasStackObjects(); 7820b57cec5SDimitry Andric } 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric bool SIRegisterInfo::requiresVirtualBaseRegisters( 7850b57cec5SDimitry Andric const MachineFunction &) const { 7860b57cec5SDimitry Andric // There are no special dedicated stack or frame pointers. 7870b57cec5SDimitry Andric return true; 7880b57cec5SDimitry Andric } 7890b57cec5SDimitry Andric 790e8d8bef9SDimitry Andric int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const { 791e8d8bef9SDimitry Andric assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI)); 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 7940b57cec5SDimitry Andric AMDGPU::OpName::offset); 7950b57cec5SDimitry Andric return MI->getOperand(OffIdx).getImm(); 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 7990b57cec5SDimitry Andric int Idx) const { 800e8d8bef9SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 8010b57cec5SDimitry Andric return 0; 8020b57cec5SDimitry Andric 803e8d8bef9SDimitry Andric assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 804e8d8bef9SDimitry Andric AMDGPU::OpName::vaddr) || 805e8d8bef9SDimitry Andric (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 806e8d8bef9SDimitry Andric AMDGPU::OpName::saddr))) && 8070b57cec5SDimitry Andric "Should never see frame index on non-address operand"); 8080b57cec5SDimitry Andric 809e8d8bef9SDimitry Andric return getScratchInstrOffset(MI); 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 813fe6060f1SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 8140b57cec5SDimitry Andric return false; 8150b57cec5SDimitry Andric 816e8d8bef9SDimitry Andric int64_t FullOffset = Offset + getScratchInstrOffset(MI); 8170b57cec5SDimitry Andric 818e8d8bef9SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 8195f757f3fSDimitry Andric if (SIInstrInfo::isMUBUF(*MI)) 8205f757f3fSDimitry Andric return !TII->isLegalMUBUFImmOffset(FullOffset); 8215f757f3fSDimitry Andric 822fe6060f1SDimitry Andric return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, 823fe6060f1SDimitry Andric SIInstrFlags::FlatScratch); 8240b57cec5SDimitry Andric } 8250b57cec5SDimitry Andric 826e8d8bef9SDimitry Andric Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 8270b57cec5SDimitry Andric int FrameIdx, 8280b57cec5SDimitry Andric int64_t Offset) const { 8290b57cec5SDimitry Andric MachineBasicBlock::iterator Ins = MBB->begin(); 8300b57cec5SDimitry Andric DebugLoc DL; // Defaults to "unknown" 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric if (Ins != MBB->end()) 8330b57cec5SDimitry Andric DL = Ins->getDebugLoc(); 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent(); 8368bcb0991SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 837e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 838e8d8bef9SDimitry Andric unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32 839e8d8bef9SDimitry Andric : AMDGPU::V_MOV_B32_e32; 840e8d8bef9SDimitry Andric 841e8d8bef9SDimitry Andric Register BaseReg = MRI.createVirtualRegister( 842e8d8bef9SDimitry Andric ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass 843e8d8bef9SDimitry Andric : &AMDGPU::VGPR_32RegClass); 8440b57cec5SDimitry Andric 8450b57cec5SDimitry Andric if (Offset == 0) { 846e8d8bef9SDimitry Andric BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg) 8470b57cec5SDimitry Andric .addFrameIndex(FrameIdx); 848e8d8bef9SDimitry Andric return BaseReg; 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric 8518bcb0991SDimitry Andric Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 8520b57cec5SDimitry Andric 853e8d8bef9SDimitry Andric Register FIReg = MRI.createVirtualRegister( 854e8d8bef9SDimitry Andric ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass 855e8d8bef9SDimitry Andric : &AMDGPU::VGPR_32RegClass); 8560b57cec5SDimitry Andric 8570b57cec5SDimitry Andric BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 8580b57cec5SDimitry Andric .addImm(Offset); 859e8d8bef9SDimitry Andric BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg) 8600b57cec5SDimitry Andric .addFrameIndex(FrameIdx); 8610b57cec5SDimitry Andric 862e8d8bef9SDimitry Andric if (ST.enableFlatScratch() ) { 863fe6060f1SDimitry Andric BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg) 864e8d8bef9SDimitry Andric .addReg(OffsetReg, RegState::Kill) 865e8d8bef9SDimitry Andric .addReg(FIReg); 866e8d8bef9SDimitry Andric return BaseReg; 867e8d8bef9SDimitry Andric } 868e8d8bef9SDimitry Andric 8690b57cec5SDimitry Andric TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) 8700b57cec5SDimitry Andric .addReg(OffsetReg, RegState::Kill) 8710b57cec5SDimitry Andric .addReg(FIReg) 8720b57cec5SDimitry Andric .addImm(0); // clamp bit 873e8d8bef9SDimitry Andric 874e8d8bef9SDimitry Andric return BaseReg; 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric 8775ffd83dbSDimitry Andric void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, 8780b57cec5SDimitry Andric int64_t Offset) const { 8798bcb0991SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 880e8d8bef9SDimitry Andric bool IsFlat = TII->isFLATScratch(MI); 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric #ifndef NDEBUG 8830b57cec5SDimitry Andric // FIXME: Is it possible to be storing a frame index to itself? 8840b57cec5SDimitry Andric bool SeenFI = false; 8850b57cec5SDimitry Andric for (const MachineOperand &MO: MI.operands()) { 8860b57cec5SDimitry Andric if (MO.isFI()) { 8870b57cec5SDimitry Andric if (SeenFI) 8880b57cec5SDimitry Andric llvm_unreachable("should not see multiple frame indices"); 8890b57cec5SDimitry Andric 8900b57cec5SDimitry Andric SeenFI = true; 8910b57cec5SDimitry Andric } 8920b57cec5SDimitry Andric } 8930b57cec5SDimitry Andric #endif 8940b57cec5SDimitry Andric 895e8d8bef9SDimitry Andric MachineOperand *FIOp = 896e8d8bef9SDimitry Andric TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr 897e8d8bef9SDimitry Andric : AMDGPU::OpName::vaddr); 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 9000b57cec5SDimitry Andric int64_t NewOffset = OffsetOp->getImm() + Offset; 901e8d8bef9SDimitry Andric 902e8d8bef9SDimitry Andric assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 903e8d8bef9SDimitry Andric assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI)); 904e8d8bef9SDimitry Andric 905e8d8bef9SDimitry Andric if (IsFlat) { 906fe6060f1SDimitry Andric assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 907fe6060f1SDimitry Andric SIInstrFlags::FlatScratch) && 908e8d8bef9SDimitry Andric "offset should be legal"); 909e8d8bef9SDimitry Andric FIOp->ChangeToRegister(BaseReg, false); 910e8d8bef9SDimitry Andric OffsetOp->setImm(NewOffset); 911e8d8bef9SDimitry Andric return; 912e8d8bef9SDimitry Andric } 913e8d8bef9SDimitry Andric 914e8d8bef9SDimitry Andric #ifndef NDEBUG 915e8d8bef9SDimitry Andric MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 916e8d8bef9SDimitry Andric assert(SOffset->isImm() && SOffset->getImm() == 0); 917e8d8bef9SDimitry Andric #endif 918e8d8bef9SDimitry Andric 9195f757f3fSDimitry Andric assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal"); 9200b57cec5SDimitry Andric 9210b57cec5SDimitry Andric FIOp->ChangeToRegister(BaseReg, false); 9220b57cec5SDimitry Andric OffsetOp->setImm(NewOffset); 9230b57cec5SDimitry Andric } 9240b57cec5SDimitry Andric 9250b57cec5SDimitry Andric bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 9265ffd83dbSDimitry Andric Register BaseReg, 9270b57cec5SDimitry Andric int64_t Offset) const { 928e8d8bef9SDimitry Andric if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 9290b57cec5SDimitry Andric return false; 9300b57cec5SDimitry Andric 931e8d8bef9SDimitry Andric int64_t NewOffset = Offset + getScratchInstrOffset(MI); 9320b57cec5SDimitry Andric 933e8d8bef9SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 9345f757f3fSDimitry Andric if (SIInstrInfo::isMUBUF(*MI)) 9355f757f3fSDimitry Andric return TII->isLegalMUBUFImmOffset(NewOffset); 9365f757f3fSDimitry Andric 937fe6060f1SDimitry Andric return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 938fe6060f1SDimitry Andric SIInstrFlags::FlatScratch); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric 9410b57cec5SDimitry Andric const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 9420b57cec5SDimitry Andric const MachineFunction &MF, unsigned Kind) const { 9430b57cec5SDimitry Andric // This is inaccurate. It depends on the instruction and address space. The 9440b57cec5SDimitry Andric // only place where we should hit this is for dealing with frame indexes / 9450b57cec5SDimitry Andric // private accesses, so this is correct in that case. 9460b57cec5SDimitry Andric return &AMDGPU::VGPR_32RegClass; 9470b57cec5SDimitry Andric } 9480b57cec5SDimitry Andric 949349cc55cSDimitry Andric const TargetRegisterClass * 950349cc55cSDimitry Andric SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 951349cc55cSDimitry Andric if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) 952349cc55cSDimitry Andric return getEquivalentVGPRClass(RC); 953bdd1243dSDimitry Andric if (RC == &AMDGPU::SCC_CLASSRegClass) 954bdd1243dSDimitry Andric return getWaveMaskRegClass(); 955349cc55cSDimitry Andric 956349cc55cSDimitry Andric return RC; 957349cc55cSDimitry Andric } 958349cc55cSDimitry Andric 9590b57cec5SDimitry Andric static unsigned getNumSubRegsForSpillOp(unsigned Op) { 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric switch (Op) { 9620b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_SAVE: 9630b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_RESTORE: 9640b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V1024_SAVE: 9650b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V1024_RESTORE: 9660b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A1024_SAVE: 9670b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A1024_RESTORE: 9680eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV1024_SAVE: 9690eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV1024_RESTORE: 9700b57cec5SDimitry Andric return 32; 9710b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_SAVE: 9720b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_RESTORE: 9730b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V512_SAVE: 9740b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V512_RESTORE: 9750b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A512_SAVE: 9760b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A512_RESTORE: 9770eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV512_SAVE: 9780eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV512_RESTORE: 9790b57cec5SDimitry Andric return 16; 980bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_SAVE: 981bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_RESTORE: 982bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V384_SAVE: 983bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V384_RESTORE: 984bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A384_SAVE: 985bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A384_RESTORE: 986bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV384_SAVE: 987bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV384_RESTORE: 988bdd1243dSDimitry Andric return 12; 989bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_SAVE: 990bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_RESTORE: 991bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V352_SAVE: 992bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V352_RESTORE: 993bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A352_SAVE: 994bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A352_RESTORE: 995bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV352_SAVE: 996bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV352_RESTORE: 997bdd1243dSDimitry Andric return 11; 998bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_SAVE: 999bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_RESTORE: 1000bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V320_SAVE: 1001bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V320_RESTORE: 1002bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A320_SAVE: 1003bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A320_RESTORE: 1004bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV320_SAVE: 1005bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV320_RESTORE: 1006bdd1243dSDimitry Andric return 10; 1007bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_SAVE: 1008bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_RESTORE: 1009bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V288_SAVE: 1010bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V288_RESTORE: 1011bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A288_SAVE: 1012bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A288_RESTORE: 1013bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV288_SAVE: 1014bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV288_RESTORE: 1015bdd1243dSDimitry Andric return 9; 10160b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_SAVE: 10170b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_RESTORE: 10180b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V256_SAVE: 10190b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V256_RESTORE: 1020e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A256_SAVE: 1021e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A256_RESTORE: 10220eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV256_SAVE: 10230eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV256_RESTORE: 10240b57cec5SDimitry Andric return 8; 1025fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_SAVE: 1026fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_RESTORE: 1027fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_V224_SAVE: 1028fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_V224_RESTORE: 1029fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_A224_SAVE: 1030fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_A224_RESTORE: 10310eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV224_SAVE: 10320eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV224_RESTORE: 1033fe6060f1SDimitry Andric return 7; 10345ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_SAVE: 10355ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_RESTORE: 10365ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_V192_SAVE: 10375ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_V192_RESTORE: 1038e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A192_SAVE: 1039e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A192_RESTORE: 10400eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV192_SAVE: 10410eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV192_RESTORE: 10425ffd83dbSDimitry Andric return 6; 10430b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_SAVE: 10440b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_RESTORE: 10450b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V160_SAVE: 10460b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V160_RESTORE: 1047e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A160_SAVE: 1048e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A160_RESTORE: 10490eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV160_SAVE: 10500eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV160_RESTORE: 10510b57cec5SDimitry Andric return 5; 10520b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_SAVE: 10530b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_RESTORE: 10540b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V128_SAVE: 10550b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V128_RESTORE: 10560b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A128_SAVE: 10570b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A128_RESTORE: 10580eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV128_SAVE: 10590eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV128_RESTORE: 10600b57cec5SDimitry Andric return 4; 10610b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_SAVE: 10620b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_RESTORE: 10630b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V96_SAVE: 10640b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V96_RESTORE: 1065e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A96_SAVE: 1066e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A96_RESTORE: 10670eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV96_SAVE: 10680eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV96_RESTORE: 10690b57cec5SDimitry Andric return 3; 10700b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_SAVE: 10710b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_RESTORE: 10720b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V64_SAVE: 10730b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V64_RESTORE: 10740b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A64_SAVE: 10750b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A64_RESTORE: 10760eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV64_SAVE: 10770eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV64_RESTORE: 10780b57cec5SDimitry Andric return 2; 10790b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_SAVE: 10800b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_RESTORE: 10810b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V32_SAVE: 10820b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V32_RESTORE: 10830b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A32_SAVE: 10840b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A32_RESTORE: 10850eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV32_SAVE: 10860eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV32_RESTORE: 108706c3fb27SDimitry Andric case AMDGPU::SI_SPILL_WWM_V32_SAVE: 108806c3fb27SDimitry Andric case AMDGPU::SI_SPILL_WWM_V32_RESTORE: 10895f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_AV32_SAVE: 10905f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: 10910b57cec5SDimitry Andric return 1; 10920b57cec5SDimitry Andric default: llvm_unreachable("Invalid spill opcode"); 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric 10960b57cec5SDimitry Andric static int getOffsetMUBUFStore(unsigned Opc) { 10970b57cec5SDimitry Andric switch (Opc) { 10980b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_DWORD_OFFEN: 10990b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_DWORD_OFFSET; 11000b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_BYTE_OFFEN: 11010b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_BYTE_OFFSET; 11020b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_SHORT_OFFEN: 11030b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_SHORT_OFFSET; 11040b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: 11050b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; 110681ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN: 110781ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET; 11080b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: 11090b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; 11100b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: 11110b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; 11120b57cec5SDimitry Andric case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: 11130b57cec5SDimitry Andric return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; 11140b57cec5SDimitry Andric default: 11150b57cec5SDimitry Andric return -1; 11160b57cec5SDimitry Andric } 11170b57cec5SDimitry Andric } 11180b57cec5SDimitry Andric 11190b57cec5SDimitry Andric static int getOffsetMUBUFLoad(unsigned Opc) { 11200b57cec5SDimitry Andric switch (Opc) { 11210b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: 11220b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 11230b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: 11240b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; 11250b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: 11260b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; 11270b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: 11280b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; 11290b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: 11300b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; 11310b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: 11320b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; 113381ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN: 113481ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET; 11350b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: 11360b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; 11370b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: 11380b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; 11390b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: 11400b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; 11410b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: 11420b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; 11430b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: 11440b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; 11450b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: 11460b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; 11470b57cec5SDimitry Andric case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: 11480b57cec5SDimitry Andric return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; 11490b57cec5SDimitry Andric default: 11500b57cec5SDimitry Andric return -1; 11510b57cec5SDimitry Andric } 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric 115481ad6265SDimitry Andric static int getOffenMUBUFStore(unsigned Opc) { 115581ad6265SDimitry Andric switch (Opc) { 115681ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_DWORD_OFFSET: 115781ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_DWORD_OFFEN; 115881ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_BYTE_OFFSET: 115981ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_BYTE_OFFEN; 116081ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_SHORT_OFFSET: 116181ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_SHORT_OFFEN; 116281ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET: 116381ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN; 116481ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET: 116581ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN; 116681ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET: 116781ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN; 116881ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET: 116981ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN; 117081ad6265SDimitry Andric case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET: 117181ad6265SDimitry Andric return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN; 117281ad6265SDimitry Andric default: 117381ad6265SDimitry Andric return -1; 117481ad6265SDimitry Andric } 117581ad6265SDimitry Andric } 117681ad6265SDimitry Andric 117781ad6265SDimitry Andric static int getOffenMUBUFLoad(unsigned Opc) { 117881ad6265SDimitry Andric switch (Opc) { 117981ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: 118081ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORD_OFFEN; 118181ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET: 118281ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN; 118381ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET: 118481ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN; 118581ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_USHORT_OFFSET: 118681ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_USHORT_OFFEN; 118781ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET: 118881ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN; 118981ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET: 119081ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN; 119181ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET: 119281ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN; 119381ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET: 119481ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN; 119581ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET: 119681ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN; 119781ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET: 119881ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN; 119981ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET: 120081ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN; 120181ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET: 120281ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN; 120381ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET: 120481ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN; 120581ad6265SDimitry Andric case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET: 120681ad6265SDimitry Andric return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN; 120781ad6265SDimitry Andric default: 120881ad6265SDimitry Andric return -1; 120981ad6265SDimitry Andric } 121081ad6265SDimitry Andric } 121181ad6265SDimitry Andric 12128bcb0991SDimitry Andric static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, 1213fe6060f1SDimitry Andric MachineBasicBlock &MBB, 12148bcb0991SDimitry Andric MachineBasicBlock::iterator MI, 1215fe6060f1SDimitry Andric int Index, unsigned Lane, 1216fe6060f1SDimitry Andric unsigned ValueReg, bool IsKill) { 1217fe6060f1SDimitry Andric MachineFunction *MF = MBB.getParent(); 12180b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 12190b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); 12220b57cec5SDimitry Andric 12230b57cec5SDimitry Andric if (Reg == AMDGPU::NoRegister) 12240b57cec5SDimitry Andric return MachineInstrBuilder(); 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric bool IsStore = MI->mayStore(); 12270b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF->getRegInfo(); 12280b57cec5SDimitry Andric auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric unsigned Dst = IsStore ? Reg : ValueReg; 12310b57cec5SDimitry Andric unsigned Src = IsStore ? ValueReg : Reg; 12324824e7fdSDimitry Andric bool IsVGPR = TRI->isVGPR(MRI, Reg); 12334824e7fdSDimitry Andric DebugLoc DL = MI->getDebugLoc(); 12344824e7fdSDimitry Andric if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) { 12354824e7fdSDimitry Andric // Spiller during regalloc may restore a spilled register to its superclass. 12364824e7fdSDimitry Andric // It could result in AGPR spills restored to VGPRs or the other way around, 12374824e7fdSDimitry Andric // making the src and dst with identical regclasses at this point. It just 12384824e7fdSDimitry Andric // needs a copy in such cases. 12394824e7fdSDimitry Andric auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst) 12404824e7fdSDimitry Andric .addReg(Src, getKillRegState(IsKill)); 12414824e7fdSDimitry Andric CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 12424824e7fdSDimitry Andric return CopyMIB; 12434824e7fdSDimitry Andric } 12444824e7fdSDimitry Andric unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 1245e8d8bef9SDimitry Andric : AMDGPU::V_ACCVGPR_READ_B32_e64; 12460b57cec5SDimitry Andric 12474824e7fdSDimitry Andric auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst) 12480b57cec5SDimitry Andric .addReg(Src, getKillRegState(IsKill)); 1249e8d8bef9SDimitry Andric MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1250e8d8bef9SDimitry Andric return MIB; 12510b57cec5SDimitry Andric } 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not 12540b57cec5SDimitry Andric // need to handle the case where an SGPR may need to be spilled while spilling. 12558bcb0991SDimitry Andric static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, 12560b57cec5SDimitry Andric MachineFrameInfo &MFI, 12570b57cec5SDimitry Andric MachineBasicBlock::iterator MI, 12580b57cec5SDimitry Andric int Index, 12590b57cec5SDimitry Andric int64_t Offset) { 12608bcb0991SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 12610b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 12620b57cec5SDimitry Andric const DebugLoc &DL = MI->getDebugLoc(); 12630b57cec5SDimitry Andric bool IsStore = MI->mayStore(); 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andric unsigned Opc = MI->getOpcode(); 12660b57cec5SDimitry Andric int LoadStoreOp = IsStore ? 12670b57cec5SDimitry Andric getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); 12680b57cec5SDimitry Andric if (LoadStoreOp == -1) 12690b57cec5SDimitry Andric return false; 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); 1272fe6060f1SDimitry Andric if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) 12730b57cec5SDimitry Andric return true; 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric MachineInstrBuilder NewMI = 12760b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 12770b57cec5SDimitry Andric .add(*Reg) 12780b57cec5SDimitry Andric .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) 12790b57cec5SDimitry Andric .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) 12800b57cec5SDimitry Andric .addImm(Offset) 1281fe6060f1SDimitry Andric .addImm(0) // cpol 12828bcb0991SDimitry Andric .addImm(0) // swz 12830b57cec5SDimitry Andric .cloneMemRefs(*MI); 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric const MachineOperand *VDataIn = TII->getNamedOperand(*MI, 12860b57cec5SDimitry Andric AMDGPU::OpName::vdata_in); 12870b57cec5SDimitry Andric if (VDataIn) 12880b57cec5SDimitry Andric NewMI.add(*VDataIn); 12890b57cec5SDimitry Andric return true; 12900b57cec5SDimitry Andric } 12910b57cec5SDimitry Andric 1292e8d8bef9SDimitry Andric static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, 1293e8d8bef9SDimitry Andric unsigned LoadStoreOp, 1294e8d8bef9SDimitry Andric unsigned EltSize) { 1295e8d8bef9SDimitry Andric bool IsStore = TII->get(LoadStoreOp).mayStore(); 1296bdd1243dSDimitry Andric bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr); 1297e8d8bef9SDimitry Andric bool UseST = 1298bdd1243dSDimitry Andric !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr); 1299e8d8bef9SDimitry Andric 1300e8d8bef9SDimitry Andric switch (EltSize) { 1301e8d8bef9SDimitry Andric case 4: 1302e8d8bef9SDimitry Andric LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1303e8d8bef9SDimitry Andric : AMDGPU::SCRATCH_LOAD_DWORD_SADDR; 1304e8d8bef9SDimitry Andric break; 1305e8d8bef9SDimitry Andric case 8: 1306e8d8bef9SDimitry Andric LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR 1307e8d8bef9SDimitry Andric : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR; 1308e8d8bef9SDimitry Andric break; 1309e8d8bef9SDimitry Andric case 12: 1310e8d8bef9SDimitry Andric LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR 1311e8d8bef9SDimitry Andric : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR; 1312e8d8bef9SDimitry Andric break; 1313e8d8bef9SDimitry Andric case 16: 1314e8d8bef9SDimitry Andric LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR 1315e8d8bef9SDimitry Andric : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR; 1316e8d8bef9SDimitry Andric break; 1317e8d8bef9SDimitry Andric default: 1318e8d8bef9SDimitry Andric llvm_unreachable("Unexpected spill load/store size!"); 1319e8d8bef9SDimitry Andric } 1320e8d8bef9SDimitry Andric 132181ad6265SDimitry Andric if (HasVAddr) 132281ad6265SDimitry Andric LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp); 132381ad6265SDimitry Andric else if (UseST) 1324e8d8bef9SDimitry Andric LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1325e8d8bef9SDimitry Andric 1326e8d8bef9SDimitry Andric return LoadStoreOp; 1327e8d8bef9SDimitry Andric } 1328e8d8bef9SDimitry Andric 1329fe6060f1SDimitry Andric void SIRegisterInfo::buildSpillLoadStore( 1330349cc55cSDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, 1331fe6060f1SDimitry Andric unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, 1332fe6060f1SDimitry Andric MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, 13335f757f3fSDimitry Andric RegScavenger *RS, LiveRegUnits *LiveUnits) const { 13345f757f3fSDimitry Andric assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both"); 1335fe6060f1SDimitry Andric 1336fe6060f1SDimitry Andric MachineFunction *MF = MBB.getParent(); 13370b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 13380b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF->getFrameInfo(); 13395ffd83dbSDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); 13400b57cec5SDimitry Andric 1341e8d8bef9SDimitry Andric const MCInstrDesc *Desc = &TII->get(LoadStoreOp); 1342e8d8bef9SDimitry Andric bool IsStore = Desc->mayStore(); 1343e8d8bef9SDimitry Andric bool IsFlat = TII->isFLATScratch(LoadStoreOp); 13440b57cec5SDimitry Andric 134581ad6265SDimitry Andric bool CanClobberSCC = false; 13460b57cec5SDimitry Andric bool Scavenged = false; 13475ffd83dbSDimitry Andric MCRegister SOffset = ScratchOffsetReg; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); 1350fe6060f1SDimitry Andric // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores. 13514824e7fdSDimitry Andric const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC); 135206c3fb27SDimitry Andric const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8; 1353e8d8bef9SDimitry Andric 1354e8d8bef9SDimitry Andric // Always use 4 byte operations for AGPRs because we need to scavenge 1355e8d8bef9SDimitry Andric // a temporary VGPR. 1356e8d8bef9SDimitry Andric unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u; 1357e8d8bef9SDimitry Andric unsigned NumSubRegs = RegWidth / EltSize; 13580b57cec5SDimitry Andric unsigned Size = NumSubRegs * EltSize; 1359e8d8bef9SDimitry Andric unsigned RemSize = RegWidth - Size; 1360e8d8bef9SDimitry Andric unsigned NumRemSubRegs = RemSize ? 1 : 0; 13610b57cec5SDimitry Andric int64_t Offset = InstOffset + MFI.getObjectOffset(Index); 136281ad6265SDimitry Andric int64_t MaterializedOffset = Offset; 136381ad6265SDimitry Andric 1364e8d8bef9SDimitry Andric int64_t MaxOffset = Offset + Size + RemSize - EltSize; 13650b57cec5SDimitry Andric int64_t ScratchOffsetRegDelta = 0; 13660b57cec5SDimitry Andric 1367e8d8bef9SDimitry Andric if (IsFlat && EltSize > 4) { 1368e8d8bef9SDimitry Andric LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1369e8d8bef9SDimitry Andric Desc = &TII->get(LoadStoreOp); 1370e8d8bef9SDimitry Andric } 1371e8d8bef9SDimitry Andric 13725ffd83dbSDimitry Andric Align Alignment = MFI.getObjectAlign(Index); 13730b57cec5SDimitry Andric const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); 13740b57cec5SDimitry Andric 1375e8d8bef9SDimitry Andric assert((IsFlat || ((Offset % EltSize) == 0)) && 1376e8d8bef9SDimitry Andric "unexpected VGPR spill offset"); 13770b57cec5SDimitry Andric 137881ad6265SDimitry Andric // Track a VGPR to use for a constant offset we need to materialize. 137981ad6265SDimitry Andric Register TmpOffsetVGPR; 138081ad6265SDimitry Andric 138181ad6265SDimitry Andric // Track a VGPR to use as an intermediate value. 138281ad6265SDimitry Andric Register TmpIntermediateVGPR; 138381ad6265SDimitry Andric bool UseVGPROffset = false; 138481ad6265SDimitry Andric 138581ad6265SDimitry Andric // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate 138681ad6265SDimitry Andric // combination. 138781ad6265SDimitry Andric auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR, 138881ad6265SDimitry Andric int64_t VOffset) { 138981ad6265SDimitry Andric // We are using a VGPR offset 139081ad6265SDimitry Andric if (IsFlat && SGPRBase) { 139181ad6265SDimitry Andric // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free 139281ad6265SDimitry Andric // SGPR, so perform the add as vector. 139381ad6265SDimitry Andric // We don't need a base SGPR in the kernel. 139481ad6265SDimitry Andric 139581ad6265SDimitry Andric if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) { 139681ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR) 139781ad6265SDimitry Andric .addReg(SGPRBase) 139881ad6265SDimitry Andric .addImm(VOffset) 139981ad6265SDimitry Andric .addImm(0); // clamp 140081ad6265SDimitry Andric } else { 140181ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 140281ad6265SDimitry Andric .addReg(SGPRBase); 140381ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR) 140481ad6265SDimitry Andric .addImm(VOffset) 140581ad6265SDimitry Andric .addReg(TmpOffsetVGPR); 140681ad6265SDimitry Andric } 140781ad6265SDimitry Andric } else { 140881ad6265SDimitry Andric assert(TmpOffsetVGPR); 140981ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) 141081ad6265SDimitry Andric .addImm(VOffset); 141181ad6265SDimitry Andric } 141281ad6265SDimitry Andric }; 141381ad6265SDimitry Andric 1414fe6060f1SDimitry Andric bool IsOffsetLegal = 1415fe6060f1SDimitry Andric IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, 1416fe6060f1SDimitry Andric SIInstrFlags::FlatScratch) 14175f757f3fSDimitry Andric : TII->isLegalMUBUFImmOffset(MaxOffset); 1418e8d8bef9SDimitry Andric if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { 14195ffd83dbSDimitry Andric SOffset = MCRegister(); 14200b57cec5SDimitry Andric 14210b57cec5SDimitry Andric // We don't have access to the register scavenger if this function is called 14225f757f3fSDimitry Andric // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case. 142381ad6265SDimitry Andric // TODO: Clobbering SCC is not necessary for scratch instructions in the 142481ad6265SDimitry Andric // entry. 1425fe6060f1SDimitry Andric if (RS) { 1426bdd1243dSDimitry Andric SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false); 142781ad6265SDimitry Andric 142881ad6265SDimitry Andric // Piggy back on the liveness scan we just did see if SCC is dead. 142981ad6265SDimitry Andric CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC); 14305f757f3fSDimitry Andric } else if (LiveUnits) { 14315f757f3fSDimitry Andric CanClobberSCC = LiveUnits->available(AMDGPU::SCC); 1432fe6060f1SDimitry Andric for (MCRegister Reg : AMDGPU::SGPR_32RegClass) { 14335f757f3fSDimitry Andric if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) { 1434fe6060f1SDimitry Andric SOffset = Reg; 1435fe6060f1SDimitry Andric break; 1436fe6060f1SDimitry Andric } 1437fe6060f1SDimitry Andric } 1438fe6060f1SDimitry Andric } 14390b57cec5SDimitry Andric 144081ad6265SDimitry Andric if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC) 144181ad6265SDimitry Andric SOffset = Register(); 144281ad6265SDimitry Andric 14435ffd83dbSDimitry Andric if (!SOffset) { 144481ad6265SDimitry Andric UseVGPROffset = true; 144581ad6265SDimitry Andric 144681ad6265SDimitry Andric if (RS) { 1447bdd1243dSDimitry Andric TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0); 144881ad6265SDimitry Andric } else { 14495f757f3fSDimitry Andric assert(LiveUnits); 145081ad6265SDimitry Andric for (MCRegister Reg : AMDGPU::VGPR_32RegClass) { 14515f757f3fSDimitry Andric if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) { 145281ad6265SDimitry Andric TmpOffsetVGPR = Reg; 145381ad6265SDimitry Andric break; 145481ad6265SDimitry Andric } 145581ad6265SDimitry Andric } 145681ad6265SDimitry Andric } 145781ad6265SDimitry Andric 145881ad6265SDimitry Andric assert(TmpOffsetVGPR); 145981ad6265SDimitry Andric } else if (!SOffset && CanClobberSCC) { 14600b57cec5SDimitry Andric // There are no free SGPRs, and since we are in the process of spilling 14610b57cec5SDimitry Andric // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 14620b57cec5SDimitry Andric // on SI/CI and on VI it is true until we implement spilling using scalar 14630b57cec5SDimitry Andric // stores), we have no way to free up an SGPR. Our solution here is to 14645ffd83dbSDimitry Andric // add the offset directly to the ScratchOffset or StackPtrOffset 14655ffd83dbSDimitry Andric // register, and then subtract the offset after the spill to return the 14665ffd83dbSDimitry Andric // register to it's original value. 146781ad6265SDimitry Andric 146881ad6265SDimitry Andric // TODO: If we don't have to do an emergency stack slot spill, converting 146981ad6265SDimitry Andric // to use the VGPR offset is fewer instructions. 14705ffd83dbSDimitry Andric if (!ScratchOffsetReg) 14715ffd83dbSDimitry Andric ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg(); 14720b57cec5SDimitry Andric SOffset = ScratchOffsetReg; 14730b57cec5SDimitry Andric ScratchOffsetRegDelta = Offset; 14740b57cec5SDimitry Andric } else { 14750b57cec5SDimitry Andric Scavenged = true; 14760b57cec5SDimitry Andric } 14770b57cec5SDimitry Andric 147881ad6265SDimitry Andric // We currently only support spilling VGPRs to EltSize boundaries, meaning 147981ad6265SDimitry Andric // we can simplify the adjustment of Offset here to just scale with 148081ad6265SDimitry Andric // WavefrontSize. 148181ad6265SDimitry Andric if (!IsFlat && !UseVGPROffset) 148281ad6265SDimitry Andric Offset *= ST.getWavefrontSize(); 148381ad6265SDimitry Andric 148481ad6265SDimitry Andric if (!UseVGPROffset && !SOffset) 14855ffd83dbSDimitry Andric report_fatal_error("could not scavenge SGPR to spill in entry function"); 14865ffd83dbSDimitry Andric 148781ad6265SDimitry Andric if (UseVGPROffset) { 148881ad6265SDimitry Andric // We are using a VGPR offset 148981ad6265SDimitry Andric MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset); 149081ad6265SDimitry Andric } else if (ScratchOffsetReg == AMDGPU::NoRegister) { 1491fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); 14925ffd83dbSDimitry Andric } else { 149381ad6265SDimitry Andric assert(Offset != 0); 14940eae32dcSDimitry Andric auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 14950b57cec5SDimitry Andric .addReg(ScratchOffsetReg) 14960b57cec5SDimitry Andric .addImm(Offset); 14970eae32dcSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead. 14985ffd83dbSDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric Offset = 0; 15010b57cec5SDimitry Andric } 15020b57cec5SDimitry Andric 1503e8d8bef9SDimitry Andric if (IsFlat && SOffset == AMDGPU::NoRegister) { 1504e8d8bef9SDimitry Andric assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 1505e8d8bef9SDimitry Andric && "Unexpected vaddr for flat scratch with a FI operand"); 1506e8d8bef9SDimitry Andric 150781ad6265SDimitry Andric if (UseVGPROffset) { 150881ad6265SDimitry Andric LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp); 150981ad6265SDimitry Andric } else { 1510e8d8bef9SDimitry Andric assert(ST.hasFlatScratchSTMode()); 1511e8d8bef9SDimitry Andric LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1512e8d8bef9SDimitry Andric } 1513e8d8bef9SDimitry Andric 151481ad6265SDimitry Andric Desc = &TII->get(LoadStoreOp); 151581ad6265SDimitry Andric } 1516e8d8bef9SDimitry Andric 1517e8d8bef9SDimitry Andric for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e; 1518e8d8bef9SDimitry Andric ++i, RegOffset += EltSize) { 1519e8d8bef9SDimitry Andric if (i == NumSubRegs) { 1520e8d8bef9SDimitry Andric EltSize = RemSize; 1521e8d8bef9SDimitry Andric LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1522e8d8bef9SDimitry Andric } 1523e8d8bef9SDimitry Andric Desc = &TII->get(LoadStoreOp); 1524e8d8bef9SDimitry Andric 152581ad6265SDimitry Andric if (!IsFlat && UseVGPROffset) { 152681ad6265SDimitry Andric int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp) 152781ad6265SDimitry Andric : getOffenMUBUFLoad(LoadStoreOp); 152881ad6265SDimitry Andric Desc = &TII->get(NewLoadStoreOp); 152981ad6265SDimitry Andric } 153081ad6265SDimitry Andric 153181ad6265SDimitry Andric if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) { 153281ad6265SDimitry Andric // If we are spilling an AGPR beyond the range of the memory instruction 153381ad6265SDimitry Andric // offset and need to use a VGPR offset, we ideally have at least 2 153481ad6265SDimitry Andric // scratch VGPRs. If we don't have a second free VGPR without spilling, 153581ad6265SDimitry Andric // recycle the VGPR used for the offset which requires resetting after 153681ad6265SDimitry Andric // each subregister. 153781ad6265SDimitry Andric 153881ad6265SDimitry Andric MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset); 153981ad6265SDimitry Andric } 154081ad6265SDimitry Andric 1541e8d8bef9SDimitry Andric unsigned NumRegs = EltSize / 4; 1542e8d8bef9SDimitry Andric Register SubReg = e == 1 1543e8d8bef9SDimitry Andric ? ValueReg 1544e8d8bef9SDimitry Andric : Register(getSubReg(ValueReg, 1545e8d8bef9SDimitry Andric getSubRegFromChannel(RegOffset / 4, NumRegs))); 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andric unsigned SOffsetRegState = 0; 15480b57cec5SDimitry Andric unsigned SrcDstRegState = getDefRegState(!IsStore); 154981ad6265SDimitry Andric const bool IsLastSubReg = i + 1 == e; 1550bdd1243dSDimitry Andric const bool IsFirstSubReg = i == 0; 155181ad6265SDimitry Andric if (IsLastSubReg) { 15520b57cec5SDimitry Andric SOffsetRegState |= getKillRegState(Scavenged); 15530b57cec5SDimitry Andric // The last implicit use carries the "Kill" flag. 15540b57cec5SDimitry Andric SrcDstRegState |= getKillRegState(IsKill); 15550b57cec5SDimitry Andric } 15560b57cec5SDimitry Andric 1557e8d8bef9SDimitry Andric // Make sure the whole register is defined if there are undef components by 1558e8d8bef9SDimitry Andric // adding an implicit def of the super-reg on the first instruction. 1559bdd1243dSDimitry Andric bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg; 1560e8d8bef9SDimitry Andric bool NeedSuperRegImpOperand = e > 1; 15610b57cec5SDimitry Andric 1562349cc55cSDimitry Andric // Remaining element size to spill into memory after some parts of it 1563349cc55cSDimitry Andric // spilled into either AGPRs or VGPRs. 1564349cc55cSDimitry Andric unsigned RemEltSize = EltSize; 1565349cc55cSDimitry Andric 1566349cc55cSDimitry Andric // AGPRs to spill VGPRs and vice versa are allocated in a reverse order, 1567349cc55cSDimitry Andric // starting from the last lane. In case if a register cannot be completely 1568349cc55cSDimitry Andric // spilled into another register that will ensure its alignment does not 1569349cc55cSDimitry Andric // change. For targets with VGPR alignment requirement this is important 1570349cc55cSDimitry Andric // in case of flat scratch usage as we might get a scratch_load or 1571349cc55cSDimitry Andric // scratch_store of an unaligned register otherwise. 1572349cc55cSDimitry Andric for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS, 1573349cc55cSDimitry Andric LaneE = RegOffset / 4; 1574349cc55cSDimitry Andric Lane >= LaneE; --Lane) { 1575e8d8bef9SDimitry Andric bool IsSubReg = e > 1 || EltSize > 4; 1576e8d8bef9SDimitry Andric Register Sub = IsSubReg 1577e8d8bef9SDimitry Andric ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) 1578e8d8bef9SDimitry Andric : ValueReg; 1579fe6060f1SDimitry Andric auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); 1580e8d8bef9SDimitry Andric if (!MIB.getInstr()) 1581e8d8bef9SDimitry Andric break; 1582bdd1243dSDimitry Andric if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) { 1583e8d8bef9SDimitry Andric MIB.addReg(ValueReg, RegState::ImplicitDefine); 1584e8d8bef9SDimitry Andric NeedSuperRegDef = false; 1585e8d8bef9SDimitry Andric } 1586bdd1243dSDimitry Andric if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) { 1587e8d8bef9SDimitry Andric NeedSuperRegImpOperand = true; 1588e8d8bef9SDimitry Andric unsigned State = SrcDstRegState; 1589bdd1243dSDimitry Andric if (!IsLastSubReg || (Lane != LaneE)) 1590e8d8bef9SDimitry Andric State &= ~RegState::Kill; 1591bdd1243dSDimitry Andric if (!IsFirstSubReg || (Lane != LaneS)) 1592bdd1243dSDimitry Andric State &= ~RegState::Define; 1593e8d8bef9SDimitry Andric MIB.addReg(ValueReg, RegState::Implicit | State); 1594e8d8bef9SDimitry Andric } 1595349cc55cSDimitry Andric RemEltSize -= 4; 1596e8d8bef9SDimitry Andric } 1597e8d8bef9SDimitry Andric 1598349cc55cSDimitry Andric if (!RemEltSize) // Fully spilled into AGPRs. 1599e8d8bef9SDimitry Andric continue; 1600e8d8bef9SDimitry Andric 1601e8d8bef9SDimitry Andric if (RemEltSize != EltSize) { // Partially spilled to AGPRs 1602e8d8bef9SDimitry Andric assert(IsFlat && EltSize > 4); 1603e8d8bef9SDimitry Andric 1604e8d8bef9SDimitry Andric unsigned NumRegs = RemEltSize / 4; 1605e8d8bef9SDimitry Andric SubReg = Register(getSubReg(ValueReg, 1606349cc55cSDimitry Andric getSubRegFromChannel(RegOffset / 4, NumRegs))); 1607e8d8bef9SDimitry Andric unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize); 1608e8d8bef9SDimitry Andric Desc = &TII->get(Opc); 1609e8d8bef9SDimitry Andric } 1610e8d8bef9SDimitry Andric 16110b57cec5SDimitry Andric unsigned FinalReg = SubReg; 1612e8d8bef9SDimitry Andric 1613e8d8bef9SDimitry Andric if (IsAGPR) { 1614e8d8bef9SDimitry Andric assert(EltSize == 4); 1615e8d8bef9SDimitry Andric 161681ad6265SDimitry Andric if (!TmpIntermediateVGPR) { 161781ad6265SDimitry Andric TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy(); 161881ad6265SDimitry Andric assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR)); 1619e8d8bef9SDimitry Andric } 1620e8d8bef9SDimitry Andric if (IsStore) { 1621fe6060f1SDimitry Andric auto AccRead = BuildMI(MBB, MI, DL, 162281ad6265SDimitry Andric TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), 162381ad6265SDimitry Andric TmpIntermediateVGPR) 16240b57cec5SDimitry Andric .addReg(SubReg, getKillRegState(IsKill)); 1625e8d8bef9SDimitry Andric if (NeedSuperRegDef) 1626e8d8bef9SDimitry Andric AccRead.addReg(ValueReg, RegState::ImplicitDefine); 1627e8d8bef9SDimitry Andric AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1628e8d8bef9SDimitry Andric } 162981ad6265SDimitry Andric SubReg = TmpIntermediateVGPR; 163081ad6265SDimitry Andric } else if (UseVGPROffset) { 163181ad6265SDimitry Andric if (!TmpOffsetVGPR) { 163206c3fb27SDimitry Andric TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, 163306c3fb27SDimitry Andric MI, false, 0); 163481ad6265SDimitry Andric RS->setRegUsed(TmpOffsetVGPR); 163581ad6265SDimitry Andric } 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 1638349cc55cSDimitry Andric MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset); 16395ffd83dbSDimitry Andric MachineMemOperand *NewMMO = 1640e8d8bef9SDimitry Andric MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize, 1641349cc55cSDimitry Andric commonAlignment(Alignment, RegOffset)); 16420b57cec5SDimitry Andric 1643fe6060f1SDimitry Andric auto MIB = 1644fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, *Desc) 1645fe6060f1SDimitry Andric .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)); 164681ad6265SDimitry Andric 164781ad6265SDimitry Andric if (UseVGPROffset) { 164881ad6265SDimitry Andric // For an AGPR spill, we reuse the same temp VGPR for the offset and the 164981ad6265SDimitry Andric // intermediate accvgpr_write. 165081ad6265SDimitry Andric MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR)); 165181ad6265SDimitry Andric } 165281ad6265SDimitry Andric 1653e8d8bef9SDimitry Andric if (!IsFlat) 1654e8d8bef9SDimitry Andric MIB.addReg(FuncInfo->getScratchRSrcReg()); 1655e8d8bef9SDimitry Andric 16565ffd83dbSDimitry Andric if (SOffset == AMDGPU::NoRegister) { 165781ad6265SDimitry Andric if (!IsFlat) { 165881ad6265SDimitry Andric if (UseVGPROffset && ScratchOffsetReg) { 165981ad6265SDimitry Andric MIB.addReg(ScratchOffsetReg); 166081ad6265SDimitry Andric } else { 16615f757f3fSDimitry Andric assert(FuncInfo->isBottomOfStack()); 16625ffd83dbSDimitry Andric MIB.addImm(0); 166381ad6265SDimitry Andric } 166481ad6265SDimitry Andric } 16655ffd83dbSDimitry Andric } else { 16665ffd83dbSDimitry Andric MIB.addReg(SOffset, SOffsetRegState); 16675ffd83dbSDimitry Andric } 16687a6dacacSDimitry Andric 16697a6dacacSDimitry Andric MIB.addImm(Offset + RegOffset); 16707a6dacacSDimitry Andric 16717a6dacacSDimitry Andric bool LastUse = MMO->getFlags() & MOLastUse; 16727a6dacacSDimitry Andric MIB.addImm(LastUse ? AMDGPU::CPol::TH_LU : 0); // cpol 16737a6dacacSDimitry Andric 1674e8d8bef9SDimitry Andric if (!IsFlat) 1675bdd1243dSDimitry Andric MIB.addImm(0); // swz 1676e8d8bef9SDimitry Andric MIB.addMemOperand(NewMMO); 16770b57cec5SDimitry Andric 1678e8d8bef9SDimitry Andric if (!IsAGPR && NeedSuperRegDef) 1679e8d8bef9SDimitry Andric MIB.addReg(ValueReg, RegState::ImplicitDefine); 1680e8d8bef9SDimitry Andric 168181ad6265SDimitry Andric if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) { 1682fe6060f1SDimitry Andric MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), 16830b57cec5SDimitry Andric FinalReg) 168481ad6265SDimitry Andric .addReg(TmpIntermediateVGPR, RegState::Kill); 1685e8d8bef9SDimitry Andric MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 16860b57cec5SDimitry Andric } 16870b57cec5SDimitry Andric 1688bdd1243dSDimitry Andric if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg)) 16890b57cec5SDimitry Andric MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); 169006c3fb27SDimitry Andric 169106c3fb27SDimitry Andric // The epilog restore of a wwm-scratch register can cause undesired 169206c3fb27SDimitry Andric // optimization during machine-cp post PrologEpilogInserter if the same 169306c3fb27SDimitry Andric // register was assigned for return value ABI lowering with a COPY 169406c3fb27SDimitry Andric // instruction. As given below, with the epilog reload, the earlier COPY 169506c3fb27SDimitry Andric // appeared to be dead during machine-cp. 169606c3fb27SDimitry Andric // ... 169706c3fb27SDimitry Andric // v0 in WWM operation, needs the WWM spill at prolog/epilog. 169806c3fb27SDimitry Andric // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0 169906c3fb27SDimitry Andric // ... 170006c3fb27SDimitry Andric // Epilog block: 170106c3fb27SDimitry Andric // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0 170206c3fb27SDimitry Andric // ... 170306c3fb27SDimitry Andric // WWM spill restore to preserve the inactive lanes of v0. 170406c3fb27SDimitry Andric // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1 170506c3fb27SDimitry Andric // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0 170606c3fb27SDimitry Andric // $exec = S_MOV_B64 killed $sgpr4_sgpr5 170706c3fb27SDimitry Andric // ... 170806c3fb27SDimitry Andric // SI_RETURN implicit $vgpr0 170906c3fb27SDimitry Andric // ... 171006c3fb27SDimitry Andric // To fix it, mark the same reg as a tied op for such restore instructions 171106c3fb27SDimitry Andric // so that it marks a usage for the preceding COPY. 171206c3fb27SDimitry Andric if (!IsStore && MI != MBB.end() && MI->isReturn() && 171306c3fb27SDimitry Andric MI->readsRegister(SubReg, this)) { 171406c3fb27SDimitry Andric MIB.addReg(SubReg, RegState::Implicit); 171506c3fb27SDimitry Andric MIB->tieOperands(0, MIB->getNumOperands() - 1); 171606c3fb27SDimitry Andric } 17170b57cec5SDimitry Andric } 17180b57cec5SDimitry Andric 17190b57cec5SDimitry Andric if (ScratchOffsetRegDelta != 0) { 17200b57cec5SDimitry Andric // Subtract the offset we added to the ScratchOffset register. 1721fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 17225ffd83dbSDimitry Andric .addReg(SOffset) 1723fe6060f1SDimitry Andric .addImm(-ScratchOffsetRegDelta); 17240b57cec5SDimitry Andric } 17250b57cec5SDimitry Andric } 17260b57cec5SDimitry Andric 1727fe6060f1SDimitry Andric void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, 1728fe6060f1SDimitry Andric int Offset, bool IsLoad, 1729fe6060f1SDimitry Andric bool IsKill) const { 17305ffd83dbSDimitry Andric // Load/store VGPR 1731fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo(); 17325ffd83dbSDimitry Andric assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); 17335ffd83dbSDimitry Andric 1734fe6060f1SDimitry Andric Register FrameReg = 1735fe6060f1SDimitry Andric FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF) 17365ffd83dbSDimitry Andric ? getBaseRegister() 1737fe6060f1SDimitry Andric : getFrameRegister(SB.MF); 17385ffd83dbSDimitry Andric 17395ffd83dbSDimitry Andric Align Alignment = FrameInfo.getObjectAlign(Index); 1740fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index); 1741fe6060f1SDimitry Andric MachineMemOperand *MMO = SB.MF.getMachineMemOperand( 17425ffd83dbSDimitry Andric PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, 1743fe6060f1SDimitry Andric SB.EltSize, Alignment); 17445ffd83dbSDimitry Andric 17455ffd83dbSDimitry Andric if (IsLoad) { 1746e8d8bef9SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 1747e8d8bef9SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1748349cc55cSDimitry Andric buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false, 1749*0fca6ea1SDimitry Andric FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS); 17505ffd83dbSDimitry Andric } else { 1751e8d8bef9SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1752e8d8bef9SDimitry Andric : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1753349cc55cSDimitry Andric buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill, 1754*0fca6ea1SDimitry Andric FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS); 17555ffd83dbSDimitry Andric // This only ever adds one VGPR spill 1756fe6060f1SDimitry Andric SB.MFI.addToSpilledVGPRs(1); 17575ffd83dbSDimitry Andric } 17585ffd83dbSDimitry Andric } 17595ffd83dbSDimitry Andric 1760bdd1243dSDimitry Andric bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, 1761bdd1243dSDimitry Andric RegScavenger *RS, SlotIndexes *Indexes, 17625f757f3fSDimitry Andric LiveIntervals *LIS, bool OnlyToVGPR, 17635f757f3fSDimitry Andric bool SpillToPhysVGPRLane) const { 1764fe6060f1SDimitry Andric SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 17650b57cec5SDimitry Andric 17665f757f3fSDimitry Andric ArrayRef<SpilledReg> VGPRSpills = 17675f757f3fSDimitry Andric SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) 17685f757f3fSDimitry Andric : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); 17690b57cec5SDimitry Andric bool SpillToVGPR = !VGPRSpills.empty(); 17700b57cec5SDimitry Andric if (OnlyToVGPR && !SpillToVGPR) 17710b57cec5SDimitry Andric return false; 17720b57cec5SDimitry Andric 1773fe6060f1SDimitry Andric assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && 1774fe6060f1SDimitry Andric SB.SuperReg != SB.MFI.getFrameOffsetReg())); 17750b57cec5SDimitry Andric 17765ffd83dbSDimitry Andric if (SpillToVGPR) { 1777349cc55cSDimitry Andric 1778349cc55cSDimitry Andric assert(SB.NumSubRegs == VGPRSpills.size() && 1779349cc55cSDimitry Andric "Num of VGPR lanes should be equal to num of SGPRs spilled"); 1780349cc55cSDimitry Andric 1781fe6060f1SDimitry Andric for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1782fe6060f1SDimitry Andric Register SubReg = 1783fe6060f1SDimitry Andric SB.NumSubRegs == 1 1784fe6060f1SDimitry Andric ? SB.SuperReg 1785fe6060f1SDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 178681ad6265SDimitry Andric SpilledReg Spill = VGPRSpills[i]; 17870b57cec5SDimitry Andric 1788bdd1243dSDimitry Andric bool IsFirstSubreg = i == 0; 1789bdd1243dSDimitry Andric bool IsLastSubreg = i == SB.NumSubRegs - 1; 1790bdd1243dSDimitry Andric bool UseKill = SB.IsKill && IsLastSubreg; 1791bdd1243dSDimitry Andric 17920b57cec5SDimitry Andric 17930b57cec5SDimitry Andric // Mark the "old value of vgpr" input undef only if this is the first sgpr 17940b57cec5SDimitry Andric // spill to this specific vgpr in the first basic block. 1795349cc55cSDimitry Andric auto MIB = BuildMI(*SB.MBB, MI, SB.DL, 17965f757f3fSDimitry Andric SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR) 1797e8d8bef9SDimitry Andric .addReg(SubReg, getKillRegState(UseKill)) 17980b57cec5SDimitry Andric .addImm(Spill.Lane) 1799e8d8bef9SDimitry Andric .addReg(Spill.VGPR); 1800bdd1243dSDimitry Andric if (Indexes) { 1801bdd1243dSDimitry Andric if (IsFirstSubreg) 1802bdd1243dSDimitry Andric Indexes->replaceMachineInstrInMaps(*MI, *MIB); 1803fe6060f1SDimitry Andric else 1804bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(*MIB); 1805e8d8bef9SDimitry Andric } 1806e8d8bef9SDimitry Andric 1807bdd1243dSDimitry Andric if (IsFirstSubreg && SB.NumSubRegs > 1) { 1808fe6060f1SDimitry Andric // We may be spilling a super-register which is only partially defined, 1809fe6060f1SDimitry Andric // and need to ensure later spills think the value is defined. 1810fe6060f1SDimitry Andric MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1811fe6060f1SDimitry Andric } 1812fe6060f1SDimitry Andric 1813bdd1243dSDimitry Andric if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg)) 1814fe6060f1SDimitry Andric MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); 18150b57cec5SDimitry Andric 18160b57cec5SDimitry Andric // FIXME: Since this spills to another register instead of an actual 18170b57cec5SDimitry Andric // frame index, we should delete the frame index when all references to 18180b57cec5SDimitry Andric // it are fixed. 18195ffd83dbSDimitry Andric } 18200b57cec5SDimitry Andric } else { 1821fe6060f1SDimitry Andric SB.prepare(); 18220b57cec5SDimitry Andric 1823fe6060f1SDimitry Andric // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. 1824fe6060f1SDimitry Andric unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); 18250b57cec5SDimitry Andric 1826fe6060f1SDimitry Andric // Per VGPR helper data 1827fe6060f1SDimitry Andric auto PVD = SB.getPerVGPRData(); 18285ffd83dbSDimitry Andric 1829fe6060f1SDimitry Andric for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 18305ffd83dbSDimitry Andric unsigned TmpVGPRFlags = RegState::Undef; 18315ffd83dbSDimitry Andric 18325ffd83dbSDimitry Andric // Write sub registers into the VGPR 1833fe6060f1SDimitry Andric for (unsigned i = Offset * PVD.PerVGPR, 1834fe6060f1SDimitry Andric e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 18355ffd83dbSDimitry Andric i < e; ++i) { 1836fe6060f1SDimitry Andric Register SubReg = 1837fe6060f1SDimitry Andric SB.NumSubRegs == 1 1838fe6060f1SDimitry Andric ? SB.SuperReg 1839fe6060f1SDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 18405ffd83dbSDimitry Andric 18415ffd83dbSDimitry Andric MachineInstrBuilder WriteLane = 18425f757f3fSDimitry Andric BuildMI(*SB.MBB, MI, SB.DL, 18435f757f3fSDimitry Andric SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR) 18445ffd83dbSDimitry Andric .addReg(SubReg, SubKillState) 1845fe6060f1SDimitry Andric .addImm(i % PVD.PerVGPR) 1846fe6060f1SDimitry Andric .addReg(SB.TmpVGPR, TmpVGPRFlags); 18475ffd83dbSDimitry Andric TmpVGPRFlags = 0; 18480b57cec5SDimitry Andric 1849bdd1243dSDimitry Andric if (Indexes) { 1850fe6060f1SDimitry Andric if (i == 0) 1851bdd1243dSDimitry Andric Indexes->replaceMachineInstrInMaps(*MI, *WriteLane); 1852fe6060f1SDimitry Andric else 1853bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(*WriteLane); 1854fe6060f1SDimitry Andric } 1855fe6060f1SDimitry Andric 18560b57cec5SDimitry Andric // There could be undef components of a spilled super register. 18570b57cec5SDimitry Andric // TODO: Can we detect this and skip the spill? 1858fe6060f1SDimitry Andric if (SB.NumSubRegs > 1) { 1859fe6060f1SDimitry Andric // The last implicit use of the SB.SuperReg carries the "Kill" flag. 18600b57cec5SDimitry Andric unsigned SuperKillState = 0; 1861fe6060f1SDimitry Andric if (i + 1 == SB.NumSubRegs) 1862fe6060f1SDimitry Andric SuperKillState |= getKillRegState(SB.IsKill); 1863fe6060f1SDimitry Andric WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); 18645ffd83dbSDimitry Andric } 18650b57cec5SDimitry Andric } 18660b57cec5SDimitry Andric 18675ffd83dbSDimitry Andric // Write out VGPR 1868fe6060f1SDimitry Andric SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); 18690b57cec5SDimitry Andric } 1870fe6060f1SDimitry Andric 1871fe6060f1SDimitry Andric SB.restore(); 18720b57cec5SDimitry Andric } 18730b57cec5SDimitry Andric 18740b57cec5SDimitry Andric MI->eraseFromParent(); 1875fe6060f1SDimitry Andric SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); 1876fe6060f1SDimitry Andric 1877fe6060f1SDimitry Andric if (LIS) 1878fe6060f1SDimitry Andric LIS->removeAllRegUnitsForPhysReg(SB.SuperReg); 1879fe6060f1SDimitry Andric 18800b57cec5SDimitry Andric return true; 18810b57cec5SDimitry Andric } 18820b57cec5SDimitry Andric 1883bdd1243dSDimitry Andric bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, 1884bdd1243dSDimitry Andric RegScavenger *RS, SlotIndexes *Indexes, 18855f757f3fSDimitry Andric LiveIntervals *LIS, bool OnlyToVGPR, 18865f757f3fSDimitry Andric bool SpillToPhysVGPRLane) const { 1887fe6060f1SDimitry Andric SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 18880b57cec5SDimitry Andric 18895f757f3fSDimitry Andric ArrayRef<SpilledReg> VGPRSpills = 18905f757f3fSDimitry Andric SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index) 18915f757f3fSDimitry Andric : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index); 18920b57cec5SDimitry Andric bool SpillToVGPR = !VGPRSpills.empty(); 18930b57cec5SDimitry Andric if (OnlyToVGPR && !SpillToVGPR) 18940b57cec5SDimitry Andric return false; 18950b57cec5SDimitry Andric 18965ffd83dbSDimitry Andric if (SpillToVGPR) { 1897fe6060f1SDimitry Andric for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1898fe6060f1SDimitry Andric Register SubReg = 1899fe6060f1SDimitry Andric SB.NumSubRegs == 1 1900fe6060f1SDimitry Andric ? SB.SuperReg 1901fe6060f1SDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 19020b57cec5SDimitry Andric 190381ad6265SDimitry Andric SpilledReg Spill = VGPRSpills[i]; 19045f757f3fSDimitry Andric auto MIB = BuildMI(*SB.MBB, MI, SB.DL, 19055f757f3fSDimitry Andric SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg) 19060b57cec5SDimitry Andric .addReg(Spill.VGPR) 19070b57cec5SDimitry Andric .addImm(Spill.Lane); 1908fe6060f1SDimitry Andric if (SB.NumSubRegs > 1 && i == 0) 1909fe6060f1SDimitry Andric MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1910bdd1243dSDimitry Andric if (Indexes) { 1911fe6060f1SDimitry Andric if (i == e - 1) 1912bdd1243dSDimitry Andric Indexes->replaceMachineInstrInMaps(*MI, *MIB); 1913fe6060f1SDimitry Andric else 1914bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(*MIB); 1915fe6060f1SDimitry Andric } 19165ffd83dbSDimitry Andric } 19170b57cec5SDimitry Andric } else { 1918fe6060f1SDimitry Andric SB.prepare(); 19190b57cec5SDimitry Andric 1920fe6060f1SDimitry Andric // Per VGPR helper data 1921fe6060f1SDimitry Andric auto PVD = SB.getPerVGPRData(); 19220b57cec5SDimitry Andric 1923fe6060f1SDimitry Andric for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 19245ffd83dbSDimitry Andric // Load in VGPR data 1925fe6060f1SDimitry Andric SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true); 19260b57cec5SDimitry Andric 19275ffd83dbSDimitry Andric // Unpack lanes 1928fe6060f1SDimitry Andric for (unsigned i = Offset * PVD.PerVGPR, 1929fe6060f1SDimitry Andric e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 19305ffd83dbSDimitry Andric i < e; ++i) { 1931fe6060f1SDimitry Andric Register SubReg = 1932fe6060f1SDimitry Andric SB.NumSubRegs == 1 1933fe6060f1SDimitry Andric ? SB.SuperReg 1934fe6060f1SDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 19350b57cec5SDimitry Andric 19365ffd83dbSDimitry Andric bool LastSubReg = (i + 1 == e); 1937349cc55cSDimitry Andric auto MIB = BuildMI(*SB.MBB, MI, SB.DL, 19385f757f3fSDimitry Andric SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg) 1939fe6060f1SDimitry Andric .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) 19405ffd83dbSDimitry Andric .addImm(i); 1941fe6060f1SDimitry Andric if (SB.NumSubRegs > 1 && i == 0) 1942fe6060f1SDimitry Andric MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1943bdd1243dSDimitry Andric if (Indexes) { 1944fe6060f1SDimitry Andric if (i == e - 1) 1945bdd1243dSDimitry Andric Indexes->replaceMachineInstrInMaps(*MI, *MIB); 1946fe6060f1SDimitry Andric else 1947bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(*MIB); 19485ffd83dbSDimitry Andric } 19490b57cec5SDimitry Andric } 19500b57cec5SDimitry Andric } 19510b57cec5SDimitry Andric 1952fe6060f1SDimitry Andric SB.restore(); 1953fe6060f1SDimitry Andric } 1954fe6060f1SDimitry Andric 19550b57cec5SDimitry Andric MI->eraseFromParent(); 1956fe6060f1SDimitry Andric 1957fe6060f1SDimitry Andric if (LIS) 1958fe6060f1SDimitry Andric LIS->removeAllRegUnitsForPhysReg(SB.SuperReg); 1959fe6060f1SDimitry Andric 19600b57cec5SDimitry Andric return true; 19610b57cec5SDimitry Andric } 19620b57cec5SDimitry Andric 1963349cc55cSDimitry Andric bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI, 1964349cc55cSDimitry Andric MachineBasicBlock &RestoreMBB, 1965349cc55cSDimitry Andric Register SGPR, RegScavenger *RS) const { 1966349cc55cSDimitry Andric SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0, 1967349cc55cSDimitry Andric RS); 1968349cc55cSDimitry Andric SB.prepare(); 1969349cc55cSDimitry Andric // Generate the spill of SGPR to SB.TmpVGPR. 1970349cc55cSDimitry Andric unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); 1971349cc55cSDimitry Andric auto PVD = SB.getPerVGPRData(); 1972349cc55cSDimitry Andric for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 1973349cc55cSDimitry Andric unsigned TmpVGPRFlags = RegState::Undef; 1974349cc55cSDimitry Andric // Write sub registers into the VGPR 1975349cc55cSDimitry Andric for (unsigned i = Offset * PVD.PerVGPR, 1976349cc55cSDimitry Andric e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 1977349cc55cSDimitry Andric i < e; ++i) { 1978349cc55cSDimitry Andric Register SubReg = 1979349cc55cSDimitry Andric SB.NumSubRegs == 1 1980349cc55cSDimitry Andric ? SB.SuperReg 1981349cc55cSDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1982349cc55cSDimitry Andric 1983349cc55cSDimitry Andric MachineInstrBuilder WriteLane = 1984349cc55cSDimitry Andric BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), 1985349cc55cSDimitry Andric SB.TmpVGPR) 1986349cc55cSDimitry Andric .addReg(SubReg, SubKillState) 1987349cc55cSDimitry Andric .addImm(i % PVD.PerVGPR) 1988349cc55cSDimitry Andric .addReg(SB.TmpVGPR, TmpVGPRFlags); 1989349cc55cSDimitry Andric TmpVGPRFlags = 0; 1990349cc55cSDimitry Andric // There could be undef components of a spilled super register. 1991349cc55cSDimitry Andric // TODO: Can we detect this and skip the spill? 1992349cc55cSDimitry Andric if (SB.NumSubRegs > 1) { 1993349cc55cSDimitry Andric // The last implicit use of the SB.SuperReg carries the "Kill" flag. 1994349cc55cSDimitry Andric unsigned SuperKillState = 0; 1995349cc55cSDimitry Andric if (i + 1 == SB.NumSubRegs) 1996349cc55cSDimitry Andric SuperKillState |= getKillRegState(SB.IsKill); 1997349cc55cSDimitry Andric WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); 1998349cc55cSDimitry Andric } 1999349cc55cSDimitry Andric } 2000349cc55cSDimitry Andric // Don't need to write VGPR out. 2001349cc55cSDimitry Andric } 2002349cc55cSDimitry Andric 2003349cc55cSDimitry Andric // Restore clobbered registers in the specified restore block. 2004349cc55cSDimitry Andric MI = RestoreMBB.end(); 2005349cc55cSDimitry Andric SB.setMI(&RestoreMBB, MI); 2006349cc55cSDimitry Andric // Generate the restore of SGPR from SB.TmpVGPR. 2007349cc55cSDimitry Andric for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 2008349cc55cSDimitry Andric // Don't need to load VGPR in. 2009349cc55cSDimitry Andric // Unpack lanes 2010349cc55cSDimitry Andric for (unsigned i = Offset * PVD.PerVGPR, 2011349cc55cSDimitry Andric e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 2012349cc55cSDimitry Andric i < e; ++i) { 2013349cc55cSDimitry Andric Register SubReg = 2014349cc55cSDimitry Andric SB.NumSubRegs == 1 2015349cc55cSDimitry Andric ? SB.SuperReg 2016349cc55cSDimitry Andric : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 2017349cc55cSDimitry Andric bool LastSubReg = (i + 1 == e); 2018349cc55cSDimitry Andric auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), 2019349cc55cSDimitry Andric SubReg) 2020349cc55cSDimitry Andric .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) 2021349cc55cSDimitry Andric .addImm(i); 2022349cc55cSDimitry Andric if (SB.NumSubRegs > 1 && i == 0) 2023349cc55cSDimitry Andric MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 2024349cc55cSDimitry Andric } 2025349cc55cSDimitry Andric } 2026349cc55cSDimitry Andric SB.restore(); 2027349cc55cSDimitry Andric 2028349cc55cSDimitry Andric SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); 2029349cc55cSDimitry Andric return false; 2030349cc55cSDimitry Andric } 2031349cc55cSDimitry Andric 20320b57cec5SDimitry Andric /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to 20330b57cec5SDimitry Andric /// a VGPR and the stack slot can be safely eliminated when all other users are 20340b57cec5SDimitry Andric /// handled. 20350b57cec5SDimitry Andric bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( 2036bdd1243dSDimitry Andric MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 20375f757f3fSDimitry Andric SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const { 20380b57cec5SDimitry Andric switch (MI->getOpcode()) { 20390b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_SAVE: 20400b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_SAVE: 2041bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_SAVE: 2042bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_SAVE: 2043bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_SAVE: 2044bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_SAVE: 20450b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_SAVE: 2046fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_SAVE: 20475ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_SAVE: 20480b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_SAVE: 20490b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_SAVE: 20500b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_SAVE: 20510b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_SAVE: 20520b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_SAVE: 20535f757f3fSDimitry Andric return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); 20540b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_RESTORE: 20550b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_RESTORE: 2056bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_RESTORE: 2057bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_RESTORE: 2058bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_RESTORE: 2059bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_RESTORE: 20600b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_RESTORE: 2061fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_RESTORE: 20625ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_RESTORE: 20630b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_RESTORE: 20640b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_RESTORE: 20650b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_RESTORE: 20660b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_RESTORE: 20670b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_RESTORE: 20685f757f3fSDimitry Andric return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane); 20690b57cec5SDimitry Andric default: 20700b57cec5SDimitry Andric llvm_unreachable("not an SGPR spill instruction"); 20710b57cec5SDimitry Andric } 20720b57cec5SDimitry Andric } 20730b57cec5SDimitry Andric 2074bdd1243dSDimitry Andric bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 20750b57cec5SDimitry Andric int SPAdj, unsigned FIOperandNum, 20760b57cec5SDimitry Andric RegScavenger *RS) const { 20770b57cec5SDimitry Andric MachineFunction *MF = MI->getParent()->getParent(); 20780b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 20790b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 20800b57cec5SDimitry Andric MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 20810b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo(); 20820b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc(); 20830b57cec5SDimitry Andric 20840b57cec5SDimitry Andric assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); 20850b57cec5SDimitry Andric 2086*0fca6ea1SDimitry Andric assert(MF->getRegInfo().isReserved(MFI->getScratchRSrcReg()) && 2087*0fca6ea1SDimitry Andric "unreserved scratch RSRC register"); 2088*0fca6ea1SDimitry Andric 20890b57cec5SDimitry Andric MachineOperand &FIOp = MI->getOperand(FIOperandNum); 20900b57cec5SDimitry Andric int Index = MI->getOperand(FIOperandNum).getIndex(); 20910b57cec5SDimitry Andric 20925ffd83dbSDimitry Andric Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) 20935ffd83dbSDimitry Andric ? getBaseRegister() 20945ffd83dbSDimitry Andric : getFrameRegister(*MF); 20950b57cec5SDimitry Andric 20960b57cec5SDimitry Andric switch (MI->getOpcode()) { 20970b57cec5SDimitry Andric // SGPR register spill 20980b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_SAVE: 20990b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_SAVE: 2100bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_SAVE: 2101bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_SAVE: 2102bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_SAVE: 2103bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_SAVE: 21040b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_SAVE: 2105fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_SAVE: 21065ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_SAVE: 21070b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_SAVE: 21080b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_SAVE: 21090b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_SAVE: 21100b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_SAVE: 21110b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_SAVE: { 2112bdd1243dSDimitry Andric return spillSGPR(MI, Index, RS); 21130b57cec5SDimitry Andric } 21140b57cec5SDimitry Andric 21150b57cec5SDimitry Andric // SGPR register restore 21160b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S1024_RESTORE: 21170b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S512_RESTORE: 2118bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S384_RESTORE: 2119bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S352_RESTORE: 2120bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S320_RESTORE: 2121bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_S288_RESTORE: 21220b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S256_RESTORE: 2123fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_S224_RESTORE: 21245ffd83dbSDimitry Andric case AMDGPU::SI_SPILL_S192_RESTORE: 21250b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S160_RESTORE: 21260b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S128_RESTORE: 21270b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S96_RESTORE: 21280b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S64_RESTORE: 21290b57cec5SDimitry Andric case AMDGPU::SI_SPILL_S32_RESTORE: { 2130bdd1243dSDimitry Andric return restoreSGPR(MI, Index, RS); 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric 21330b57cec5SDimitry Andric // VGPR register spill 21340b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V1024_SAVE: 21350b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V512_SAVE: 2136bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V384_SAVE: 2137bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V352_SAVE: 2138bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V320_SAVE: 2139bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V288_SAVE: 21400b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V256_SAVE: 2141fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_V224_SAVE: 2142e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_V192_SAVE: 21430b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V160_SAVE: 21440b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V128_SAVE: 21450b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V96_SAVE: 21460b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V64_SAVE: 21470b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V32_SAVE: 21480b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A1024_SAVE: 21490b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A512_SAVE: 2150bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A384_SAVE: 2151bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A352_SAVE: 2152bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A320_SAVE: 2153bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A288_SAVE: 2154e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A256_SAVE: 2155fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_A224_SAVE: 2156e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A192_SAVE: 2157e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A160_SAVE: 21580b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A128_SAVE: 2159e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A96_SAVE: 21600b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A64_SAVE: 21610eae32dcSDimitry Andric case AMDGPU::SI_SPILL_A32_SAVE: 21620eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV1024_SAVE: 21630eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV512_SAVE: 2164bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV384_SAVE: 2165bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV352_SAVE: 2166bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV320_SAVE: 2167bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV288_SAVE: 21680eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV256_SAVE: 21690eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV224_SAVE: 21700eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV192_SAVE: 21710eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV160_SAVE: 21720eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV128_SAVE: 21730eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV96_SAVE: 21740eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV64_SAVE: 217506c3fb27SDimitry Andric case AMDGPU::SI_SPILL_AV32_SAVE: 21765f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_V32_SAVE: 21775f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_AV32_SAVE: { 21780b57cec5SDimitry Andric const MachineOperand *VData = TII->getNamedOperand(*MI, 21790b57cec5SDimitry Andric AMDGPU::OpName::vdata); 21800b57cec5SDimitry Andric assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 21810b57cec5SDimitry Andric MFI->getStackPtrOffsetReg()); 21820b57cec5SDimitry Andric 2183e8d8bef9SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 2184e8d8bef9SDimitry Andric : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 2185fe6060f1SDimitry Andric auto *MBB = MI->getParent(); 218606c3fb27SDimitry Andric bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); 218706c3fb27SDimitry Andric if (IsWWMRegSpill) { 218806c3fb27SDimitry Andric TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), 218906c3fb27SDimitry Andric RS->isRegUsed(AMDGPU::SCC)); 219006c3fb27SDimitry Andric } 2191fe6060f1SDimitry Andric buildSpillLoadStore( 2192349cc55cSDimitry Andric *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 21930b57cec5SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 2194fe6060f1SDimitry Andric *MI->memoperands_begin(), RS); 21950b57cec5SDimitry Andric MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 219606c3fb27SDimitry Andric if (IsWWMRegSpill) 219706c3fb27SDimitry Andric TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); 219806c3fb27SDimitry Andric 21990b57cec5SDimitry Andric MI->eraseFromParent(); 2200bdd1243dSDimitry Andric return true; 22010b57cec5SDimitry Andric } 22020b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V32_RESTORE: 22030b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V64_RESTORE: 22040b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V96_RESTORE: 22050b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V128_RESTORE: 22060b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V160_RESTORE: 2207e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_V192_RESTORE: 2208fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_V224_RESTORE: 22090b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V256_RESTORE: 2210bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V288_RESTORE: 2211bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V320_RESTORE: 2212bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V352_RESTORE: 2213bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_V384_RESTORE: 22140b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V512_RESTORE: 22150b57cec5SDimitry Andric case AMDGPU::SI_SPILL_V1024_RESTORE: 22160b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A32_RESTORE: 22170b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A64_RESTORE: 2218e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A96_RESTORE: 22190b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A128_RESTORE: 2220e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A160_RESTORE: 2221e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A192_RESTORE: 2222fe6060f1SDimitry Andric case AMDGPU::SI_SPILL_A224_RESTORE: 2223e8d8bef9SDimitry Andric case AMDGPU::SI_SPILL_A256_RESTORE: 2224bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A288_RESTORE: 2225bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A320_RESTORE: 2226bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A352_RESTORE: 2227bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_A384_RESTORE: 22280b57cec5SDimitry Andric case AMDGPU::SI_SPILL_A512_RESTORE: 22290eae32dcSDimitry Andric case AMDGPU::SI_SPILL_A1024_RESTORE: 22300eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV32_RESTORE: 22310eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV64_RESTORE: 22320eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV96_RESTORE: 22330eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV128_RESTORE: 22340eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV160_RESTORE: 22350eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV192_RESTORE: 22360eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV224_RESTORE: 22370eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV256_RESTORE: 2238bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV288_RESTORE: 2239bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV320_RESTORE: 2240bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV352_RESTORE: 2241bdd1243dSDimitry Andric case AMDGPU::SI_SPILL_AV384_RESTORE: 22420eae32dcSDimitry Andric case AMDGPU::SI_SPILL_AV512_RESTORE: 224306c3fb27SDimitry Andric case AMDGPU::SI_SPILL_AV1024_RESTORE: 22445f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_V32_RESTORE: 22455f757f3fSDimitry Andric case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: { 22460b57cec5SDimitry Andric const MachineOperand *VData = TII->getNamedOperand(*MI, 22470b57cec5SDimitry Andric AMDGPU::OpName::vdata); 22480b57cec5SDimitry Andric assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 22490b57cec5SDimitry Andric MFI->getStackPtrOffsetReg()); 22500b57cec5SDimitry Andric 2251e8d8bef9SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 2252e8d8bef9SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 2253fe6060f1SDimitry Andric auto *MBB = MI->getParent(); 225406c3fb27SDimitry Andric bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); 225506c3fb27SDimitry Andric if (IsWWMRegSpill) { 225606c3fb27SDimitry Andric TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(), 225706c3fb27SDimitry Andric RS->isRegUsed(AMDGPU::SCC)); 225806c3fb27SDimitry Andric } 22597a6dacacSDimitry Andric 2260fe6060f1SDimitry Andric buildSpillLoadStore( 2261349cc55cSDimitry Andric *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 22620b57cec5SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 2263fe6060f1SDimitry Andric *MI->memoperands_begin(), RS); 226406c3fb27SDimitry Andric 226506c3fb27SDimitry Andric if (IsWWMRegSpill) 226606c3fb27SDimitry Andric TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy()); 226706c3fb27SDimitry Andric 22680b57cec5SDimitry Andric MI->eraseFromParent(); 2269bdd1243dSDimitry Andric return true; 22700b57cec5SDimitry Andric } 22710b57cec5SDimitry Andric 22720b57cec5SDimitry Andric default: { 2273fe6060f1SDimitry Andric // Other access to frame index 22740b57cec5SDimitry Andric const DebugLoc &DL = MI->getDebugLoc(); 2275e8d8bef9SDimitry Andric 2276e8d8bef9SDimitry Andric int64_t Offset = FrameInfo.getObjectOffset(Index); 2277e8d8bef9SDimitry Andric if (ST.enableFlatScratch()) { 2278e8d8bef9SDimitry Andric if (TII->isFLATScratch(*MI)) { 2279e8d8bef9SDimitry Andric assert((int16_t)FIOperandNum == 2280e8d8bef9SDimitry Andric AMDGPU::getNamedOperandIdx(MI->getOpcode(), 2281e8d8bef9SDimitry Andric AMDGPU::OpName::saddr)); 2282e8d8bef9SDimitry Andric 2283e8d8bef9SDimitry Andric // The offset is always swizzled, just replace it 2284e8d8bef9SDimitry Andric if (FrameReg) 2285e8d8bef9SDimitry Andric FIOp.ChangeToRegister(FrameReg, false); 2286e8d8bef9SDimitry Andric 2287e8d8bef9SDimitry Andric MachineOperand *OffsetOp = 2288e8d8bef9SDimitry Andric TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 2289e8d8bef9SDimitry Andric int64_t NewOffset = Offset + OffsetOp->getImm(); 2290e8d8bef9SDimitry Andric if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 2291fe6060f1SDimitry Andric SIInstrFlags::FlatScratch)) { 2292e8d8bef9SDimitry Andric OffsetOp->setImm(NewOffset); 2293e8d8bef9SDimitry Andric if (FrameReg) 2294bdd1243dSDimitry Andric return false; 2295e8d8bef9SDimitry Andric Offset = 0; 2296e8d8bef9SDimitry Andric } 2297e8d8bef9SDimitry Andric 229881ad6265SDimitry Andric if (!Offset) { 229981ad6265SDimitry Andric unsigned Opc = MI->getOpcode(); 230081ad6265SDimitry Andric int NewOpc = -1; 2301bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) { 230281ad6265SDimitry Andric NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); 230381ad6265SDimitry Andric } else if (ST.hasFlatScratchSTMode()) { 2304e8d8bef9SDimitry Andric // On GFX10 we have ST mode to use no registers for an address. 2305e8d8bef9SDimitry Andric // Otherwise we need to materialize 0 into an SGPR. 230681ad6265SDimitry Andric NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); 230781ad6265SDimitry Andric } 230881ad6265SDimitry Andric 230981ad6265SDimitry Andric if (NewOpc != -1) { 2310bdd1243dSDimitry Andric // removeOperand doesn't fixup tied operand indexes as it goes, so 2311bdd1243dSDimitry Andric // it asserts. Untie vdst_in for now and retie them afterwards. 2312bdd1243dSDimitry Andric int VDstIn = AMDGPU::getNamedOperandIdx(Opc, 2313bdd1243dSDimitry Andric AMDGPU::OpName::vdst_in); 2314bdd1243dSDimitry Andric bool TiedVDst = VDstIn != -1 && 2315bdd1243dSDimitry Andric MI->getOperand(VDstIn).isReg() && 2316bdd1243dSDimitry Andric MI->getOperand(VDstIn).isTied(); 2317bdd1243dSDimitry Andric if (TiedVDst) 2318bdd1243dSDimitry Andric MI->untieRegOperand(VDstIn); 2319bdd1243dSDimitry Andric 232081ad6265SDimitry Andric MI->removeOperand( 2321e8d8bef9SDimitry Andric AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); 2322bdd1243dSDimitry Andric 2323bdd1243dSDimitry Andric if (TiedVDst) { 2324bdd1243dSDimitry Andric int NewVDst = 2325bdd1243dSDimitry Andric AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); 2326bdd1243dSDimitry Andric int NewVDstIn = 2327bdd1243dSDimitry Andric AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in); 2328bdd1243dSDimitry Andric assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!"); 2329bdd1243dSDimitry Andric MI->tieOperands(NewVDst, NewVDstIn); 2330bdd1243dSDimitry Andric } 2331e8d8bef9SDimitry Andric MI->setDesc(TII->get(NewOpc)); 2332bdd1243dSDimitry Andric return false; 2333e8d8bef9SDimitry Andric } 2334e8d8bef9SDimitry Andric } 233581ad6265SDimitry Andric } 2336e8d8bef9SDimitry Andric 2337e8d8bef9SDimitry Andric if (!FrameReg) { 2338e8d8bef9SDimitry Andric FIOp.ChangeToImmediate(Offset); 2339e8d8bef9SDimitry Andric if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) 2340bdd1243dSDimitry Andric return false; 2341e8d8bef9SDimitry Andric } 2342e8d8bef9SDimitry Andric 2343e8d8bef9SDimitry Andric // We need to use register here. Check if we can use an SGPR or need 2344e8d8bef9SDimitry Andric // a VGPR. 2345e8d8bef9SDimitry Andric FIOp.ChangeToRegister(AMDGPU::M0, false); 2346e8d8bef9SDimitry Andric bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp); 2347e8d8bef9SDimitry Andric 2348e8d8bef9SDimitry Andric if (!Offset && FrameReg && UseSGPR) { 2349e8d8bef9SDimitry Andric FIOp.setReg(FrameReg); 2350bdd1243dSDimitry Andric return false; 2351e8d8bef9SDimitry Andric } 2352e8d8bef9SDimitry Andric 2353e8d8bef9SDimitry Andric const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass 2354e8d8bef9SDimitry Andric : &AMDGPU::VGPR_32RegClass; 2355e8d8bef9SDimitry Andric 235606c3fb27SDimitry Andric Register TmpReg = 235706c3fb27SDimitry Andric RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR); 2358e8d8bef9SDimitry Andric FIOp.setReg(TmpReg); 2359bdd1243dSDimitry Andric FIOp.setIsKill(); 2360e8d8bef9SDimitry Andric 2361e8d8bef9SDimitry Andric if ((!FrameReg || !Offset) && TmpReg) { 2362e8d8bef9SDimitry Andric unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 2363e8d8bef9SDimitry Andric auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg); 2364e8d8bef9SDimitry Andric if (FrameReg) 2365e8d8bef9SDimitry Andric MIB.addReg(FrameReg); 2366e8d8bef9SDimitry Andric else 2367e8d8bef9SDimitry Andric MIB.addImm(Offset); 2368e8d8bef9SDimitry Andric 2369bdd1243dSDimitry Andric return false; 2370e8d8bef9SDimitry Andric } 2371e8d8bef9SDimitry Andric 2372*0fca6ea1SDimitry Andric bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) && 2373*0fca6ea1SDimitry Andric !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); 2374bdd1243dSDimitry Andric 2375e8d8bef9SDimitry Andric Register TmpSReg = 2376e8d8bef9SDimitry Andric UseSGPR ? TmpReg 237706c3fb27SDimitry Andric : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, 237806c3fb27SDimitry Andric MI, false, 0, !UseSGPR); 2379e8d8bef9SDimitry Andric 2380e8d8bef9SDimitry Andric // TODO: for flat scratch another attempt can be made with a VGPR index 2381e8d8bef9SDimitry Andric // if no SGPRs can be scavenged. 2382e8d8bef9SDimitry Andric if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) 2383e8d8bef9SDimitry Andric report_fatal_error("Cannot scavenge register in FI elimination!"); 2384e8d8bef9SDimitry Andric 2385e8d8bef9SDimitry Andric if (!TmpSReg) { 2386e8d8bef9SDimitry Andric // Use frame register and restore it after. 2387e8d8bef9SDimitry Andric TmpSReg = FrameReg; 2388e8d8bef9SDimitry Andric FIOp.setReg(FrameReg); 2389e8d8bef9SDimitry Andric FIOp.setIsKill(false); 2390e8d8bef9SDimitry Andric } 2391e8d8bef9SDimitry Andric 2392bdd1243dSDimitry Andric if (NeedSaveSCC) { 2393bdd1243dSDimitry Andric assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!"); 2394bdd1243dSDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg) 2395bdd1243dSDimitry Andric .addReg(FrameReg) 2396bdd1243dSDimitry Andric .addImm(Offset); 2397bdd1243dSDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32)) 2398bdd1243dSDimitry Andric .addReg(TmpSReg) 2399bdd1243dSDimitry Andric .addImm(0); 2400bdd1243dSDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg) 2401bdd1243dSDimitry Andric .addImm(0) 2402bdd1243dSDimitry Andric .addReg(TmpSReg); 2403bdd1243dSDimitry Andric } else { 2404fe6060f1SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) 2405e8d8bef9SDimitry Andric .addReg(FrameReg) 2406e8d8bef9SDimitry Andric .addImm(Offset); 2407bdd1243dSDimitry Andric } 2408e8d8bef9SDimitry Andric 2409e8d8bef9SDimitry Andric if (!UseSGPR) 2410e8d8bef9SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 2411e8d8bef9SDimitry Andric .addReg(TmpSReg, RegState::Kill); 2412e8d8bef9SDimitry Andric 2413e8d8bef9SDimitry Andric if (TmpSReg == FrameReg) { 2414e8d8bef9SDimitry Andric // Undo frame register modification. 2415*0fca6ea1SDimitry Andric if (NeedSaveSCC && 2416*0fca6ea1SDimitry Andric !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) { 2417bdd1243dSDimitry Andric MachineBasicBlock::iterator I = 2418bdd1243dSDimitry Andric BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32), 2419bdd1243dSDimitry Andric TmpSReg) 2420bdd1243dSDimitry Andric .addReg(FrameReg) 2421bdd1243dSDimitry Andric .addImm(-Offset); 2422bdd1243dSDimitry Andric I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32)) 2423bdd1243dSDimitry Andric .addReg(TmpSReg) 2424bdd1243dSDimitry Andric .addImm(0); 2425bdd1243dSDimitry Andric BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32), 2426bdd1243dSDimitry Andric TmpSReg) 2427bdd1243dSDimitry Andric .addImm(0) 2428bdd1243dSDimitry Andric .addReg(TmpSReg); 2429bdd1243dSDimitry Andric } else { 2430fe6060f1SDimitry Andric BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), 2431e8d8bef9SDimitry Andric FrameReg) 2432e8d8bef9SDimitry Andric .addReg(FrameReg) 2433fe6060f1SDimitry Andric .addImm(-Offset); 2434e8d8bef9SDimitry Andric } 2435bdd1243dSDimitry Andric } 2436e8d8bef9SDimitry Andric 2437bdd1243dSDimitry Andric return false; 2438e8d8bef9SDimitry Andric } 2439e8d8bef9SDimitry Andric 24400b57cec5SDimitry Andric bool IsMUBUF = TII->isMUBUF(*MI); 24410b57cec5SDimitry Andric 24425f757f3fSDimitry Andric if (!IsMUBUF && !MFI->isBottomOfStack()) { 24435ffd83dbSDimitry Andric // Convert to a swizzled stack address by scaling by the wave size. 24445ffd83dbSDimitry Andric // In an entry function/kernel the offset is already swizzled. 244581ad6265SDimitry Andric bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); 2446*0fca6ea1SDimitry Andric bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) && 2447*0fca6ea1SDimitry Andric !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr); 244881ad6265SDimitry Andric const TargetRegisterClass *RC = IsSALU && !LiveSCC 244981ad6265SDimitry Andric ? &AMDGPU::SReg_32RegClass 245081ad6265SDimitry Andric : &AMDGPU::VGPR_32RegClass; 245181ad6265SDimitry Andric bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 || 245281ad6265SDimitry Andric MI->getOpcode() == AMDGPU::V_MOV_B32_e64; 245306c3fb27SDimitry Andric Register ResultReg = 245406c3fb27SDimitry Andric IsCopy ? MI->getOperand(0).getReg() 245506c3fb27SDimitry Andric : RS->scavengeRegisterBackwards(*RC, MI, false, 0); 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric int64_t Offset = FrameInfo.getObjectOffset(Index); 24580b57cec5SDimitry Andric if (Offset == 0) { 245981ad6265SDimitry Andric unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 246081ad6265SDimitry Andric : AMDGPU::V_LSHRREV_B32_e64; 24615f757f3fSDimitry Andric auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg); 24625f757f3fSDimitry Andric if (OpCode == AMDGPU::V_LSHRREV_B32_e64) 24635f757f3fSDimitry Andric // For V_LSHRREV, the operands are reversed (the shift count goes 24645f757f3fSDimitry Andric // first). 24655f757f3fSDimitry Andric Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg); 24665f757f3fSDimitry Andric else 24675f757f3fSDimitry Andric Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2()); 246881ad6265SDimitry Andric if (IsSALU && !LiveSCC) 2469bdd1243dSDimitry Andric Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead. 247081ad6265SDimitry Andric if (IsSALU && LiveSCC) { 247106c3fb27SDimitry Andric Register NewDest = RS->scavengeRegisterBackwards( 247206c3fb27SDimitry Andric AMDGPU::SReg_32RegClass, Shift, false, 0); 247381ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), 247481ad6265SDimitry Andric NewDest) 247581ad6265SDimitry Andric .addReg(ResultReg); 247681ad6265SDimitry Andric ResultReg = NewDest; 247781ad6265SDimitry Andric } 24780b57cec5SDimitry Andric } else { 247981ad6265SDimitry Andric MachineInstrBuilder MIB; 248081ad6265SDimitry Andric if (!IsSALU) { 248181ad6265SDimitry Andric if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) != 248281ad6265SDimitry Andric nullptr) { 24835ffd83dbSDimitry Andric // Reuse ResultReg in intermediate step. 24845ffd83dbSDimitry Andric Register ScaledReg = ResultReg; 24850b57cec5SDimitry Andric 24868bcb0991SDimitry Andric BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), 24878bcb0991SDimitry Andric ScaledReg) 24888bcb0991SDimitry Andric .addImm(ST.getWavefrontSizeLog2()) 24895ffd83dbSDimitry Andric .addReg(FrameReg); 24900b57cec5SDimitry Andric 24918bcb0991SDimitry Andric const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; 24920b57cec5SDimitry Andric 24938bcb0991SDimitry Andric // TODO: Fold if use instruction is another add of a constant. 24948bcb0991SDimitry Andric if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { 24958bcb0991SDimitry Andric // FIXME: This can fail 24968bcb0991SDimitry Andric MIB.addImm(Offset); 24978bcb0991SDimitry Andric MIB.addReg(ScaledReg, RegState::Kill); 24988bcb0991SDimitry Andric if (!IsVOP2) 24998bcb0991SDimitry Andric MIB.addImm(0); // clamp bit 25008bcb0991SDimitry Andric } else { 2501e8d8bef9SDimitry Andric assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && 2502480093f4SDimitry Andric "Need to reuse carry out register"); 25038bcb0991SDimitry Andric 2504480093f4SDimitry Andric // Use scavenged unused carry out as offset register. 2505480093f4SDimitry Andric Register ConstOffsetReg; 2506480093f4SDimitry Andric if (!isWave32) 2507480093f4SDimitry Andric ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); 2508480093f4SDimitry Andric else 2509480093f4SDimitry Andric ConstOffsetReg = MIB.getReg(1); 25108bcb0991SDimitry Andric 25118bcb0991SDimitry Andric BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) 25120b57cec5SDimitry Andric .addImm(Offset); 25138bcb0991SDimitry Andric MIB.addReg(ConstOffsetReg, RegState::Kill); 25148bcb0991SDimitry Andric MIB.addReg(ScaledReg, RegState::Kill); 25158bcb0991SDimitry Andric MIB.addImm(0); // clamp bit 25160b57cec5SDimitry Andric } 251781ad6265SDimitry Andric } 251881ad6265SDimitry Andric } 251981ad6265SDimitry Andric if (!MIB || IsSALU) { 2520480093f4SDimitry Andric // We have to produce a carry out, and there isn't a free SGPR pair 2521480093f4SDimitry Andric // for it. We can keep the whole computation on the SALU to avoid 2522480093f4SDimitry Andric // clobbering an additional register at the cost of an extra mov. 25238bcb0991SDimitry Andric 25248bcb0991SDimitry Andric // We may have 1 free scratch SGPR even though a carry out is 25258bcb0991SDimitry Andric // unavailable. Only one additional mov is needed. 252606c3fb27SDimitry Andric Register TmpScaledReg = RS->scavengeRegisterBackwards( 252706c3fb27SDimitry Andric AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false); 25285ffd83dbSDimitry Andric Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; 25298bcb0991SDimitry Andric 25308bcb0991SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) 25315ffd83dbSDimitry Andric .addReg(FrameReg) 25328bcb0991SDimitry Andric .addImm(ST.getWavefrontSizeLog2()); 2533fe6060f1SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 25348bcb0991SDimitry Andric .addReg(ScaledReg, RegState::Kill) 25358bcb0991SDimitry Andric .addImm(Offset); 253681ad6265SDimitry Andric if (!IsSALU) 25378bcb0991SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) 25388bcb0991SDimitry Andric .addReg(ScaledReg, RegState::Kill); 253981ad6265SDimitry Andric else 254081ad6265SDimitry Andric ResultReg = ScaledReg; 25418bcb0991SDimitry Andric 25428bcb0991SDimitry Andric // If there were truly no free SGPRs, we need to undo everything. 25438bcb0991SDimitry Andric if (!TmpScaledReg.isValid()) { 2544fe6060f1SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 25458bcb0991SDimitry Andric .addReg(ScaledReg, RegState::Kill) 2546fe6060f1SDimitry Andric .addImm(-Offset); 25478bcb0991SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) 25485ffd83dbSDimitry Andric .addReg(FrameReg) 25498bcb0991SDimitry Andric .addImm(ST.getWavefrontSizeLog2()); 25508bcb0991SDimitry Andric } 25518bcb0991SDimitry Andric } 25528bcb0991SDimitry Andric } 25538bcb0991SDimitry Andric 25540b57cec5SDimitry Andric // Don't introduce an extra copy if we're just materializing in a mov. 2555bdd1243dSDimitry Andric if (IsCopy) { 25560b57cec5SDimitry Andric MI->eraseFromParent(); 2557bdd1243dSDimitry Andric return true; 2558bdd1243dSDimitry Andric } 25590b57cec5SDimitry Andric FIOp.ChangeToRegister(ResultReg, false, false, true); 2560bdd1243dSDimitry Andric return false; 25610b57cec5SDimitry Andric } 25620b57cec5SDimitry Andric 25630b57cec5SDimitry Andric if (IsMUBUF) { 25640b57cec5SDimitry Andric // Disable offen so we don't need a 0 vgpr base. 25650b57cec5SDimitry Andric assert(static_cast<int>(FIOperandNum) == 25660b57cec5SDimitry Andric AMDGPU::getNamedOperandIdx(MI->getOpcode(), 25670b57cec5SDimitry Andric AMDGPU::OpName::vaddr)); 25680b57cec5SDimitry Andric 25695ffd83dbSDimitry Andric auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset); 2570e8d8bef9SDimitry Andric assert((SOffset.isImm() && SOffset.getImm() == 0)); 2571e8d8bef9SDimitry Andric 2572e8d8bef9SDimitry Andric if (FrameReg != AMDGPU::NoRegister) 2573e8d8bef9SDimitry Andric SOffset.ChangeToRegister(FrameReg, false); 25740b57cec5SDimitry Andric 25750b57cec5SDimitry Andric int64_t Offset = FrameInfo.getObjectOffset(Index); 25760b57cec5SDimitry Andric int64_t OldImm 25770b57cec5SDimitry Andric = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); 25780b57cec5SDimitry Andric int64_t NewOffset = OldImm + Offset; 25790b57cec5SDimitry Andric 25805f757f3fSDimitry Andric if (TII->isLegalMUBUFImmOffset(NewOffset) && 25818bcb0991SDimitry Andric buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { 25820b57cec5SDimitry Andric MI->eraseFromParent(); 2583bdd1243dSDimitry Andric return true; 25840b57cec5SDimitry Andric } 25850b57cec5SDimitry Andric } 25860b57cec5SDimitry Andric 25870b57cec5SDimitry Andric // If the offset is simply too big, don't convert to a scratch wave offset 25880b57cec5SDimitry Andric // relative index. 25890b57cec5SDimitry Andric 25900b57cec5SDimitry Andric FIOp.ChangeToImmediate(Offset); 25910b57cec5SDimitry Andric if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 259206c3fb27SDimitry Andric Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, 259306c3fb27SDimitry Andric MI, false, 0); 25940b57cec5SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 25950b57cec5SDimitry Andric .addImm(Offset); 25960b57cec5SDimitry Andric FIOp.ChangeToRegister(TmpReg, false, false, true); 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric } 25990b57cec5SDimitry Andric } 2600bdd1243dSDimitry Andric return false; 26010b57cec5SDimitry Andric } 26020b57cec5SDimitry Andric 26035ffd83dbSDimitry Andric StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const { 26048bcb0991SDimitry Andric return AMDGPUInstPrinter::getRegisterName(Reg); 26050b57cec5SDimitry Andric } 26060b57cec5SDimitry Andric 26075f757f3fSDimitry Andric unsigned AMDGPU::getRegBitWidth(const TargetRegisterClass &RC) { 26085f757f3fSDimitry Andric return getRegBitWidth(RC.getID()); 26095f757f3fSDimitry Andric } 26105f757f3fSDimitry Andric 2611fe6060f1SDimitry Andric static const TargetRegisterClass * 2612fe6060f1SDimitry Andric getAnyVGPRClassForBitWidth(unsigned BitWidth) { 261306c3fb27SDimitry Andric if (BitWidth == 64) 26145ffd83dbSDimitry Andric return &AMDGPU::VReg_64RegClass; 261506c3fb27SDimitry Andric if (BitWidth == 96) 26165ffd83dbSDimitry Andric return &AMDGPU::VReg_96RegClass; 261706c3fb27SDimitry Andric if (BitWidth == 128) 26185ffd83dbSDimitry Andric return &AMDGPU::VReg_128RegClass; 261906c3fb27SDimitry Andric if (BitWidth == 160) 26205ffd83dbSDimitry Andric return &AMDGPU::VReg_160RegClass; 262106c3fb27SDimitry Andric if (BitWidth == 192) 26225ffd83dbSDimitry Andric return &AMDGPU::VReg_192RegClass; 262306c3fb27SDimitry Andric if (BitWidth == 224) 2624fe6060f1SDimitry Andric return &AMDGPU::VReg_224RegClass; 262506c3fb27SDimitry Andric if (BitWidth == 256) 26265ffd83dbSDimitry Andric return &AMDGPU::VReg_256RegClass; 262706c3fb27SDimitry Andric if (BitWidth == 288) 2628bdd1243dSDimitry Andric return &AMDGPU::VReg_288RegClass; 262906c3fb27SDimitry Andric if (BitWidth == 320) 2630bdd1243dSDimitry Andric return &AMDGPU::VReg_320RegClass; 263106c3fb27SDimitry Andric if (BitWidth == 352) 2632bdd1243dSDimitry Andric return &AMDGPU::VReg_352RegClass; 263306c3fb27SDimitry Andric if (BitWidth == 384) 2634bdd1243dSDimitry Andric return &AMDGPU::VReg_384RegClass; 263506c3fb27SDimitry Andric if (BitWidth == 512) 26365ffd83dbSDimitry Andric return &AMDGPU::VReg_512RegClass; 263706c3fb27SDimitry Andric if (BitWidth == 1024) 26385ffd83dbSDimitry Andric return &AMDGPU::VReg_1024RegClass; 26395ffd83dbSDimitry Andric 26405ffd83dbSDimitry Andric return nullptr; 26415ffd83dbSDimitry Andric } 26425ffd83dbSDimitry Andric 2643fe6060f1SDimitry Andric static const TargetRegisterClass * 2644fe6060f1SDimitry Andric getAlignedVGPRClassForBitWidth(unsigned BitWidth) { 264506c3fb27SDimitry Andric if (BitWidth == 64) 2646fe6060f1SDimitry Andric return &AMDGPU::VReg_64_Align2RegClass; 264706c3fb27SDimitry Andric if (BitWidth == 96) 2648fe6060f1SDimitry Andric return &AMDGPU::VReg_96_Align2RegClass; 264906c3fb27SDimitry Andric if (BitWidth == 128) 2650fe6060f1SDimitry Andric return &AMDGPU::VReg_128_Align2RegClass; 265106c3fb27SDimitry Andric if (BitWidth == 160) 2652fe6060f1SDimitry Andric return &AMDGPU::VReg_160_Align2RegClass; 265306c3fb27SDimitry Andric if (BitWidth == 192) 2654fe6060f1SDimitry Andric return &AMDGPU::VReg_192_Align2RegClass; 265506c3fb27SDimitry Andric if (BitWidth == 224) 2656fe6060f1SDimitry Andric return &AMDGPU::VReg_224_Align2RegClass; 265706c3fb27SDimitry Andric if (BitWidth == 256) 2658fe6060f1SDimitry Andric return &AMDGPU::VReg_256_Align2RegClass; 265906c3fb27SDimitry Andric if (BitWidth == 288) 2660bdd1243dSDimitry Andric return &AMDGPU::VReg_288_Align2RegClass; 266106c3fb27SDimitry Andric if (BitWidth == 320) 2662bdd1243dSDimitry Andric return &AMDGPU::VReg_320_Align2RegClass; 266306c3fb27SDimitry Andric if (BitWidth == 352) 2664bdd1243dSDimitry Andric return &AMDGPU::VReg_352_Align2RegClass; 266506c3fb27SDimitry Andric if (BitWidth == 384) 2666bdd1243dSDimitry Andric return &AMDGPU::VReg_384_Align2RegClass; 266706c3fb27SDimitry Andric if (BitWidth == 512) 2668fe6060f1SDimitry Andric return &AMDGPU::VReg_512_Align2RegClass; 266906c3fb27SDimitry Andric if (BitWidth == 1024) 2670fe6060f1SDimitry Andric return &AMDGPU::VReg_1024_Align2RegClass; 2671fe6060f1SDimitry Andric 2672fe6060f1SDimitry Andric return nullptr; 2673fe6060f1SDimitry Andric } 2674fe6060f1SDimitry Andric 26755ffd83dbSDimitry Andric const TargetRegisterClass * 2676fe6060f1SDimitry Andric SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { 2677fe6060f1SDimitry Andric if (BitWidth == 1) 2678fe6060f1SDimitry Andric return &AMDGPU::VReg_1RegClass; 267906c3fb27SDimitry Andric if (BitWidth == 16) 2680647cbc5dSDimitry Andric return &AMDGPU::VGPR_16RegClass; 268106c3fb27SDimitry Andric if (BitWidth == 32) 2682fe6060f1SDimitry Andric return &AMDGPU::VGPR_32RegClass; 2683fe6060f1SDimitry Andric return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) 2684fe6060f1SDimitry Andric : getAnyVGPRClassForBitWidth(BitWidth); 2685fe6060f1SDimitry Andric } 2686fe6060f1SDimitry Andric 2687fe6060f1SDimitry Andric static const TargetRegisterClass * 2688fe6060f1SDimitry Andric getAnyAGPRClassForBitWidth(unsigned BitWidth) { 268906c3fb27SDimitry Andric if (BitWidth == 64) 26905ffd83dbSDimitry Andric return &AMDGPU::AReg_64RegClass; 269106c3fb27SDimitry Andric if (BitWidth == 96) 26925ffd83dbSDimitry Andric return &AMDGPU::AReg_96RegClass; 269306c3fb27SDimitry Andric if (BitWidth == 128) 26945ffd83dbSDimitry Andric return &AMDGPU::AReg_128RegClass; 269506c3fb27SDimitry Andric if (BitWidth == 160) 26965ffd83dbSDimitry Andric return &AMDGPU::AReg_160RegClass; 269706c3fb27SDimitry Andric if (BitWidth == 192) 26985ffd83dbSDimitry Andric return &AMDGPU::AReg_192RegClass; 269906c3fb27SDimitry Andric if (BitWidth == 224) 2700fe6060f1SDimitry Andric return &AMDGPU::AReg_224RegClass; 270106c3fb27SDimitry Andric if (BitWidth == 256) 27025ffd83dbSDimitry Andric return &AMDGPU::AReg_256RegClass; 270306c3fb27SDimitry Andric if (BitWidth == 288) 2704bdd1243dSDimitry Andric return &AMDGPU::AReg_288RegClass; 270506c3fb27SDimitry Andric if (BitWidth == 320) 2706bdd1243dSDimitry Andric return &AMDGPU::AReg_320RegClass; 270706c3fb27SDimitry Andric if (BitWidth == 352) 2708bdd1243dSDimitry Andric return &AMDGPU::AReg_352RegClass; 270906c3fb27SDimitry Andric if (BitWidth == 384) 2710bdd1243dSDimitry Andric return &AMDGPU::AReg_384RegClass; 271106c3fb27SDimitry Andric if (BitWidth == 512) 27125ffd83dbSDimitry Andric return &AMDGPU::AReg_512RegClass; 271306c3fb27SDimitry Andric if (BitWidth == 1024) 27145ffd83dbSDimitry Andric return &AMDGPU::AReg_1024RegClass; 27155ffd83dbSDimitry Andric 27165ffd83dbSDimitry Andric return nullptr; 27175ffd83dbSDimitry Andric } 27185ffd83dbSDimitry Andric 2719fe6060f1SDimitry Andric static const TargetRegisterClass * 2720fe6060f1SDimitry Andric getAlignedAGPRClassForBitWidth(unsigned BitWidth) { 272106c3fb27SDimitry Andric if (BitWidth == 64) 2722fe6060f1SDimitry Andric return &AMDGPU::AReg_64_Align2RegClass; 272306c3fb27SDimitry Andric if (BitWidth == 96) 2724fe6060f1SDimitry Andric return &AMDGPU::AReg_96_Align2RegClass; 272506c3fb27SDimitry Andric if (BitWidth == 128) 2726fe6060f1SDimitry Andric return &AMDGPU::AReg_128_Align2RegClass; 272706c3fb27SDimitry Andric if (BitWidth == 160) 2728fe6060f1SDimitry Andric return &AMDGPU::AReg_160_Align2RegClass; 272906c3fb27SDimitry Andric if (BitWidth == 192) 2730fe6060f1SDimitry Andric return &AMDGPU::AReg_192_Align2RegClass; 273106c3fb27SDimitry Andric if (BitWidth == 224) 2732fe6060f1SDimitry Andric return &AMDGPU::AReg_224_Align2RegClass; 273306c3fb27SDimitry Andric if (BitWidth == 256) 2734fe6060f1SDimitry Andric return &AMDGPU::AReg_256_Align2RegClass; 273506c3fb27SDimitry Andric if (BitWidth == 288) 2736bdd1243dSDimitry Andric return &AMDGPU::AReg_288_Align2RegClass; 273706c3fb27SDimitry Andric if (BitWidth == 320) 2738bdd1243dSDimitry Andric return &AMDGPU::AReg_320_Align2RegClass; 273906c3fb27SDimitry Andric if (BitWidth == 352) 2740bdd1243dSDimitry Andric return &AMDGPU::AReg_352_Align2RegClass; 274106c3fb27SDimitry Andric if (BitWidth == 384) 2742bdd1243dSDimitry Andric return &AMDGPU::AReg_384_Align2RegClass; 274306c3fb27SDimitry Andric if (BitWidth == 512) 2744fe6060f1SDimitry Andric return &AMDGPU::AReg_512_Align2RegClass; 274506c3fb27SDimitry Andric if (BitWidth == 1024) 2746fe6060f1SDimitry Andric return &AMDGPU::AReg_1024_Align2RegClass; 2747fe6060f1SDimitry Andric 2748fe6060f1SDimitry Andric return nullptr; 2749fe6060f1SDimitry Andric } 2750fe6060f1SDimitry Andric 2751fe6060f1SDimitry Andric const TargetRegisterClass * 2752fe6060f1SDimitry Andric SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const { 275306c3fb27SDimitry Andric if (BitWidth == 16) 2754fe6060f1SDimitry Andric return &AMDGPU::AGPR_LO16RegClass; 275506c3fb27SDimitry Andric if (BitWidth == 32) 2756fe6060f1SDimitry Andric return &AMDGPU::AGPR_32RegClass; 2757fe6060f1SDimitry Andric return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth) 2758fe6060f1SDimitry Andric : getAnyAGPRClassForBitWidth(BitWidth); 2759fe6060f1SDimitry Andric } 2760fe6060f1SDimitry Andric 27614824e7fdSDimitry Andric static const TargetRegisterClass * 27624824e7fdSDimitry Andric getAnyVectorSuperClassForBitWidth(unsigned BitWidth) { 276306c3fb27SDimitry Andric if (BitWidth == 64) 27644824e7fdSDimitry Andric return &AMDGPU::AV_64RegClass; 276506c3fb27SDimitry Andric if (BitWidth == 96) 27664824e7fdSDimitry Andric return &AMDGPU::AV_96RegClass; 276706c3fb27SDimitry Andric if (BitWidth == 128) 27684824e7fdSDimitry Andric return &AMDGPU::AV_128RegClass; 276906c3fb27SDimitry Andric if (BitWidth == 160) 27704824e7fdSDimitry Andric return &AMDGPU::AV_160RegClass; 277106c3fb27SDimitry Andric if (BitWidth == 192) 27724824e7fdSDimitry Andric return &AMDGPU::AV_192RegClass; 277306c3fb27SDimitry Andric if (BitWidth == 224) 27744824e7fdSDimitry Andric return &AMDGPU::AV_224RegClass; 277506c3fb27SDimitry Andric if (BitWidth == 256) 27764824e7fdSDimitry Andric return &AMDGPU::AV_256RegClass; 277706c3fb27SDimitry Andric if (BitWidth == 288) 2778bdd1243dSDimitry Andric return &AMDGPU::AV_288RegClass; 277906c3fb27SDimitry Andric if (BitWidth == 320) 2780bdd1243dSDimitry Andric return &AMDGPU::AV_320RegClass; 278106c3fb27SDimitry Andric if (BitWidth == 352) 2782bdd1243dSDimitry Andric return &AMDGPU::AV_352RegClass; 278306c3fb27SDimitry Andric if (BitWidth == 384) 2784bdd1243dSDimitry Andric return &AMDGPU::AV_384RegClass; 278506c3fb27SDimitry Andric if (BitWidth == 512) 27864824e7fdSDimitry Andric return &AMDGPU::AV_512RegClass; 278706c3fb27SDimitry Andric if (BitWidth == 1024) 27884824e7fdSDimitry Andric return &AMDGPU::AV_1024RegClass; 27894824e7fdSDimitry Andric 27904824e7fdSDimitry Andric return nullptr; 27914824e7fdSDimitry Andric } 27924824e7fdSDimitry Andric 27934824e7fdSDimitry Andric static const TargetRegisterClass * 27944824e7fdSDimitry Andric getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { 279506c3fb27SDimitry Andric if (BitWidth == 64) 27964824e7fdSDimitry Andric return &AMDGPU::AV_64_Align2RegClass; 279706c3fb27SDimitry Andric if (BitWidth == 96) 27984824e7fdSDimitry Andric return &AMDGPU::AV_96_Align2RegClass; 279906c3fb27SDimitry Andric if (BitWidth == 128) 28004824e7fdSDimitry Andric return &AMDGPU::AV_128_Align2RegClass; 280106c3fb27SDimitry Andric if (BitWidth == 160) 28024824e7fdSDimitry Andric return &AMDGPU::AV_160_Align2RegClass; 280306c3fb27SDimitry Andric if (BitWidth == 192) 28044824e7fdSDimitry Andric return &AMDGPU::AV_192_Align2RegClass; 280506c3fb27SDimitry Andric if (BitWidth == 224) 28064824e7fdSDimitry Andric return &AMDGPU::AV_224_Align2RegClass; 280706c3fb27SDimitry Andric if (BitWidth == 256) 28084824e7fdSDimitry Andric return &AMDGPU::AV_256_Align2RegClass; 280906c3fb27SDimitry Andric if (BitWidth == 288) 2810bdd1243dSDimitry Andric return &AMDGPU::AV_288_Align2RegClass; 281106c3fb27SDimitry Andric if (BitWidth == 320) 2812bdd1243dSDimitry Andric return &AMDGPU::AV_320_Align2RegClass; 281306c3fb27SDimitry Andric if (BitWidth == 352) 2814bdd1243dSDimitry Andric return &AMDGPU::AV_352_Align2RegClass; 281506c3fb27SDimitry Andric if (BitWidth == 384) 2816bdd1243dSDimitry Andric return &AMDGPU::AV_384_Align2RegClass; 281706c3fb27SDimitry Andric if (BitWidth == 512) 28184824e7fdSDimitry Andric return &AMDGPU::AV_512_Align2RegClass; 281906c3fb27SDimitry Andric if (BitWidth == 1024) 28204824e7fdSDimitry Andric return &AMDGPU::AV_1024_Align2RegClass; 28214824e7fdSDimitry Andric 28224824e7fdSDimitry Andric return nullptr; 28234824e7fdSDimitry Andric } 28244824e7fdSDimitry Andric 28254824e7fdSDimitry Andric const TargetRegisterClass * 28264824e7fdSDimitry Andric SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { 282706c3fb27SDimitry Andric if (BitWidth == 32) 28284824e7fdSDimitry Andric return &AMDGPU::AV_32RegClass; 28294824e7fdSDimitry Andric return ST.needsAlignedVGPRs() 28304824e7fdSDimitry Andric ? getAlignedVectorSuperClassForBitWidth(BitWidth) 28314824e7fdSDimitry Andric : getAnyVectorSuperClassForBitWidth(BitWidth); 28324824e7fdSDimitry Andric } 28334824e7fdSDimitry Andric 28345ffd83dbSDimitry Andric const TargetRegisterClass * 28355ffd83dbSDimitry Andric SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { 283606c3fb27SDimitry Andric if (BitWidth == 16) 28375ffd83dbSDimitry Andric return &AMDGPU::SGPR_LO16RegClass; 283806c3fb27SDimitry Andric if (BitWidth == 32) 28395ffd83dbSDimitry Andric return &AMDGPU::SReg_32RegClass; 284006c3fb27SDimitry Andric if (BitWidth == 64) 28415ffd83dbSDimitry Andric return &AMDGPU::SReg_64RegClass; 284206c3fb27SDimitry Andric if (BitWidth == 96) 28435ffd83dbSDimitry Andric return &AMDGPU::SGPR_96RegClass; 284406c3fb27SDimitry Andric if (BitWidth == 128) 28455ffd83dbSDimitry Andric return &AMDGPU::SGPR_128RegClass; 284606c3fb27SDimitry Andric if (BitWidth == 160) 28475ffd83dbSDimitry Andric return &AMDGPU::SGPR_160RegClass; 284806c3fb27SDimitry Andric if (BitWidth == 192) 28495ffd83dbSDimitry Andric return &AMDGPU::SGPR_192RegClass; 285006c3fb27SDimitry Andric if (BitWidth == 224) 2851fe6060f1SDimitry Andric return &AMDGPU::SGPR_224RegClass; 285206c3fb27SDimitry Andric if (BitWidth == 256) 28535ffd83dbSDimitry Andric return &AMDGPU::SGPR_256RegClass; 285406c3fb27SDimitry Andric if (BitWidth == 288) 2855bdd1243dSDimitry Andric return &AMDGPU::SGPR_288RegClass; 285606c3fb27SDimitry Andric if (BitWidth == 320) 2857bdd1243dSDimitry Andric return &AMDGPU::SGPR_320RegClass; 285806c3fb27SDimitry Andric if (BitWidth == 352) 2859bdd1243dSDimitry Andric return &AMDGPU::SGPR_352RegClass; 286006c3fb27SDimitry Andric if (BitWidth == 384) 2861bdd1243dSDimitry Andric return &AMDGPU::SGPR_384RegClass; 286206c3fb27SDimitry Andric if (BitWidth == 512) 28635ffd83dbSDimitry Andric return &AMDGPU::SGPR_512RegClass; 286406c3fb27SDimitry Andric if (BitWidth == 1024) 28655ffd83dbSDimitry Andric return &AMDGPU::SGPR_1024RegClass; 28665ffd83dbSDimitry Andric 28675ffd83dbSDimitry Andric return nullptr; 28685ffd83dbSDimitry Andric } 28695ffd83dbSDimitry Andric 2870e8d8bef9SDimitry Andric bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, 2871e8d8bef9SDimitry Andric Register Reg) const { 2872e8d8bef9SDimitry Andric const TargetRegisterClass *RC; 2873e8d8bef9SDimitry Andric if (Reg.isVirtual()) 2874e8d8bef9SDimitry Andric RC = MRI.getRegClass(Reg); 2875e8d8bef9SDimitry Andric else 2876bdd1243dSDimitry Andric RC = getPhysRegBaseClass(Reg); 2877bdd1243dSDimitry Andric return RC ? isSGPRClass(RC) : false; 2878e8d8bef9SDimitry Andric } 2879e8d8bef9SDimitry Andric 28805ffd83dbSDimitry Andric const TargetRegisterClass * 28815ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const { 28825ffd83dbSDimitry Andric unsigned Size = getRegSizeInBits(*SRC); 28835ffd83dbSDimitry Andric const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); 28845ffd83dbSDimitry Andric assert(VRC && "Invalid register class size"); 28855ffd83dbSDimitry Andric return VRC; 28860b57cec5SDimitry Andric } 28870b57cec5SDimitry Andric 28885ffd83dbSDimitry Andric const TargetRegisterClass * 28895ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const { 28905ffd83dbSDimitry Andric unsigned Size = getRegSizeInBits(*SRC); 28915ffd83dbSDimitry Andric const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); 28925ffd83dbSDimitry Andric assert(ARC && "Invalid register class size"); 28935ffd83dbSDimitry Andric return ARC; 28940b57cec5SDimitry Andric } 28950b57cec5SDimitry Andric 28965ffd83dbSDimitry Andric const TargetRegisterClass * 28975ffd83dbSDimitry Andric SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const { 28985ffd83dbSDimitry Andric unsigned Size = getRegSizeInBits(*VRC); 28995ffd83dbSDimitry Andric if (Size == 32) 29000b57cec5SDimitry Andric return &AMDGPU::SGPR_32RegClass; 29015ffd83dbSDimitry Andric const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size); 29025ffd83dbSDimitry Andric assert(SRC && "Invalid register class size"); 29035ffd83dbSDimitry Andric return SRC; 29040b57cec5SDimitry Andric } 29050b57cec5SDimitry Andric 2906fe6060f1SDimitry Andric const TargetRegisterClass * 2907fe6060f1SDimitry Andric SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, 2908fe6060f1SDimitry Andric const TargetRegisterClass *SubRC, 2909fe6060f1SDimitry Andric unsigned SubIdx) const { 2910fe6060f1SDimitry Andric // Ensure this subregister index is aligned in the super register. 2911fe6060f1SDimitry Andric const TargetRegisterClass *MatchRC = 2912fe6060f1SDimitry Andric getMatchingSuperRegClass(SuperRC, SubRC, SubIdx); 2913fe6060f1SDimitry Andric return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr; 2914fe6060f1SDimitry Andric } 2915fe6060f1SDimitry Andric 29168bcb0991SDimitry Andric bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 29178bcb0991SDimitry Andric if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 29188bcb0991SDimitry Andric OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST) 29198bcb0991SDimitry Andric return !ST.hasMFMAInlineLiteralBug(); 29208bcb0991SDimitry Andric 29218bcb0991SDimitry Andric return OpType >= AMDGPU::OPERAND_SRC_FIRST && 29228bcb0991SDimitry Andric OpType <= AMDGPU::OPERAND_SRC_LAST; 29238bcb0991SDimitry Andric } 29248bcb0991SDimitry Andric 29250b57cec5SDimitry Andric bool SIRegisterInfo::shouldRewriteCopySrc( 29260b57cec5SDimitry Andric const TargetRegisterClass *DefRC, 29270b57cec5SDimitry Andric unsigned DefSubReg, 29280b57cec5SDimitry Andric const TargetRegisterClass *SrcRC, 29290b57cec5SDimitry Andric unsigned SrcSubReg) const { 29300b57cec5SDimitry Andric // We want to prefer the smallest register class possible, so we don't want to 29310b57cec5SDimitry Andric // stop and rewrite on anything that looks like a subregister 29320b57cec5SDimitry Andric // extract. Operations mostly don't care about the super register class, so we 29330b57cec5SDimitry Andric // only want to stop on the most basic of copies between the same register 29340b57cec5SDimitry Andric // class. 29350b57cec5SDimitry Andric // 29360b57cec5SDimitry Andric // e.g. if we have something like 29370b57cec5SDimitry Andric // %0 = ... 29380b57cec5SDimitry Andric // %1 = ... 29390b57cec5SDimitry Andric // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 29400b57cec5SDimitry Andric // %3 = COPY %2, sub0 29410b57cec5SDimitry Andric // 29420b57cec5SDimitry Andric // We want to look through the COPY to find: 29430b57cec5SDimitry Andric // => %3 = COPY %0 29440b57cec5SDimitry Andric 29450b57cec5SDimitry Andric // Plain copy. 29460b57cec5SDimitry Andric return getCommonSubClass(DefRC, SrcRC) != nullptr; 29470b57cec5SDimitry Andric } 29480b57cec5SDimitry Andric 2949e8d8bef9SDimitry Andric bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 2950e8d8bef9SDimitry Andric // TODO: 64-bit operands have extending behavior from 32-bit literal. 2951e8d8bef9SDimitry Andric return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST && 2952e8d8bef9SDimitry Andric OpType <= AMDGPU::OPERAND_REG_IMM_LAST; 2953e8d8bef9SDimitry Andric } 2954e8d8bef9SDimitry Andric 29555ffd83dbSDimitry Andric /// Returns a lowest register that is not used at any point in the function. 29560b57cec5SDimitry Andric /// If all registers are used, then this function will return 295706c3fb27SDimitry Andric /// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return 29585ffd83dbSDimitry Andric /// highest unused register. 295906c3fb27SDimitry Andric MCRegister SIRegisterInfo::findUnusedRegister( 296006c3fb27SDimitry Andric const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, 296106c3fb27SDimitry Andric const MachineFunction &MF, bool ReserveHighestRegister) const { 296206c3fb27SDimitry Andric if (ReserveHighestRegister) { 29635ffd83dbSDimitry Andric for (MCRegister Reg : reverse(*RC)) 29640b57cec5SDimitry Andric if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 29650b57cec5SDimitry Andric return Reg; 29665ffd83dbSDimitry Andric } else { 29675ffd83dbSDimitry Andric for (MCRegister Reg : *RC) 29685ffd83dbSDimitry Andric if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 29695ffd83dbSDimitry Andric return Reg; 29705ffd83dbSDimitry Andric } 29715ffd83dbSDimitry Andric return MCRegister(); 29720b57cec5SDimitry Andric } 29730b57cec5SDimitry Andric 297406c3fb27SDimitry Andric bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI, 297506c3fb27SDimitry Andric const RegisterBankInfo &RBI, 297606c3fb27SDimitry Andric Register Reg) const { 297706c3fb27SDimitry Andric auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo()); 297806c3fb27SDimitry Andric if (!RB) 297906c3fb27SDimitry Andric return false; 298006c3fb27SDimitry Andric 298106c3fb27SDimitry Andric return !RBI.isDivergentRegBank(RB); 298206c3fb27SDimitry Andric } 298306c3fb27SDimitry Andric 29840b57cec5SDimitry Andric ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, 29850b57cec5SDimitry Andric unsigned EltSize) const { 298606c3fb27SDimitry Andric const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC); 29875ffd83dbSDimitry Andric assert(RegBitWidth >= 32 && RegBitWidth <= 1024); 29880b57cec5SDimitry Andric 29895ffd83dbSDimitry Andric const unsigned RegDWORDs = RegBitWidth / 32; 29905ffd83dbSDimitry Andric const unsigned EltDWORDs = EltSize / 4; 29915ffd83dbSDimitry Andric assert(RegSplitParts.size() + 1 >= EltDWORDs); 29920b57cec5SDimitry Andric 29935ffd83dbSDimitry Andric const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1]; 29945ffd83dbSDimitry Andric const unsigned NumParts = RegDWORDs / EltDWORDs; 29950b57cec5SDimitry Andric 2996bdd1243dSDimitry Andric return ArrayRef(Parts.data(), NumParts); 29970b57cec5SDimitry Andric } 29980b57cec5SDimitry Andric 29990b57cec5SDimitry Andric const TargetRegisterClass* 30000b57cec5SDimitry Andric SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, 30015ffd83dbSDimitry Andric Register Reg) const { 3002bdd1243dSDimitry Andric return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg); 3003bdd1243dSDimitry Andric } 3004bdd1243dSDimitry Andric 3005bdd1243dSDimitry Andric const TargetRegisterClass * 3006bdd1243dSDimitry Andric SIRegisterInfo::getRegClassForOperandReg(const MachineRegisterInfo &MRI, 3007bdd1243dSDimitry Andric const MachineOperand &MO) const { 3008bdd1243dSDimitry Andric const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg()); 3009bdd1243dSDimitry Andric return getSubRegisterClass(SrcRC, MO.getSubReg()); 30100b57cec5SDimitry Andric } 30110b57cec5SDimitry Andric 30120b57cec5SDimitry Andric bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 30135ffd83dbSDimitry Andric Register Reg) const { 30140b57cec5SDimitry Andric const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 30155ffd83dbSDimitry Andric // Registers without classes are unaddressable, SGPR-like registers. 3016349cc55cSDimitry Andric return RC && isVGPRClass(RC); 30170b57cec5SDimitry Andric } 30180b57cec5SDimitry Andric 30190b57cec5SDimitry Andric bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI, 30205ffd83dbSDimitry Andric Register Reg) const { 30210b57cec5SDimitry Andric const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 30225ffd83dbSDimitry Andric 30235ffd83dbSDimitry Andric // Registers without classes are unaddressable, SGPR-like registers. 3024349cc55cSDimitry Andric return RC && isAGPRClass(RC); 30250b57cec5SDimitry Andric } 30260b57cec5SDimitry Andric 30270b57cec5SDimitry Andric bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, 30280b57cec5SDimitry Andric const TargetRegisterClass *SrcRC, 30290b57cec5SDimitry Andric unsigned SubReg, 30300b57cec5SDimitry Andric const TargetRegisterClass *DstRC, 30310b57cec5SDimitry Andric unsigned DstSubReg, 30320b57cec5SDimitry Andric const TargetRegisterClass *NewRC, 30330b57cec5SDimitry Andric LiveIntervals &LIS) const { 30340b57cec5SDimitry Andric unsigned SrcSize = getRegSizeInBits(*SrcRC); 30350b57cec5SDimitry Andric unsigned DstSize = getRegSizeInBits(*DstRC); 30360b57cec5SDimitry Andric unsigned NewSize = getRegSizeInBits(*NewRC); 30370b57cec5SDimitry Andric 30380b57cec5SDimitry Andric // Do not increase size of registers beyond dword, we would need to allocate 30390b57cec5SDimitry Andric // adjacent registers and constraint regalloc more than needed. 30400b57cec5SDimitry Andric 30410b57cec5SDimitry Andric // Always allow dword coalescing. 30420b57cec5SDimitry Andric if (SrcSize <= 32 || DstSize <= 32) 30430b57cec5SDimitry Andric return true; 30440b57cec5SDimitry Andric 30450b57cec5SDimitry Andric return NewSize <= DstSize || NewSize <= SrcSize; 30460b57cec5SDimitry Andric } 30470b57cec5SDimitry Andric 30480b57cec5SDimitry Andric unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 30490b57cec5SDimitry Andric MachineFunction &MF) const { 30500b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 30510b57cec5SDimitry Andric 30520b57cec5SDimitry Andric unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 30530b57cec5SDimitry Andric MF.getFunction()); 30540b57cec5SDimitry Andric switch (RC->getID()) { 30550b57cec5SDimitry Andric default: 30565ffd83dbSDimitry Andric return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); 30570b57cec5SDimitry Andric case AMDGPU::VGPR_32RegClassID: 30580b57cec5SDimitry Andric return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); 30590b57cec5SDimitry Andric case AMDGPU::SGPR_32RegClassID: 30605ffd83dbSDimitry Andric case AMDGPU::SGPR_LO16RegClassID: 30610b57cec5SDimitry Andric return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); 30620b57cec5SDimitry Andric } 30630b57cec5SDimitry Andric } 30640b57cec5SDimitry Andric 30650b57cec5SDimitry Andric unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 30660b57cec5SDimitry Andric unsigned Idx) const { 30675ffd83dbSDimitry Andric if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || 30685ffd83dbSDimitry Andric Idx == AMDGPU::RegisterPressureSets::AGPR_32) 30690b57cec5SDimitry Andric return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 30700b57cec5SDimitry Andric const_cast<MachineFunction &>(MF)); 30710b57cec5SDimitry Andric 30725ffd83dbSDimitry Andric if (Idx == AMDGPU::RegisterPressureSets::SReg_32) 30730b57cec5SDimitry Andric return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, 30740b57cec5SDimitry Andric const_cast<MachineFunction &>(MF)); 30750b57cec5SDimitry Andric 30765ffd83dbSDimitry Andric llvm_unreachable("Unexpected register pressure set!"); 30770b57cec5SDimitry Andric } 30780b57cec5SDimitry Andric 30790b57cec5SDimitry Andric const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { 30800b57cec5SDimitry Andric static const int Empty[] = { -1 }; 30810b57cec5SDimitry Andric 30825ffd83dbSDimitry Andric if (RegPressureIgnoredUnits[RegUnit]) 30830b57cec5SDimitry Andric return Empty; 30845ffd83dbSDimitry Andric 30855ffd83dbSDimitry Andric return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit); 30860b57cec5SDimitry Andric } 30870b57cec5SDimitry Andric 30885ffd83dbSDimitry Andric MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const { 30890b57cec5SDimitry Andric // Not a callee saved register. 30900b57cec5SDimitry Andric return AMDGPU::SGPR30_SGPR31; 30910b57cec5SDimitry Andric } 30920b57cec5SDimitry Andric 30930b57cec5SDimitry Andric const TargetRegisterClass * 30940b57cec5SDimitry Andric SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, 309581ad6265SDimitry Andric const RegisterBank &RB) const { 30960b57cec5SDimitry Andric switch (RB.getID()) { 30970b57cec5SDimitry Andric case AMDGPU::VGPRRegBankID: 30985f757f3fSDimitry Andric return getVGPRClassForBitWidth( 30995f757f3fSDimitry Andric std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size)); 31000b57cec5SDimitry Andric case AMDGPU::VCCRegBankID: 31015ffd83dbSDimitry Andric assert(Size == 1); 31025ffd83dbSDimitry Andric return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 31035ffd83dbSDimitry Andric : &AMDGPU::SReg_64_XEXECRegClass; 31040b57cec5SDimitry Andric case AMDGPU::SGPRRegBankID: 31055ffd83dbSDimitry Andric return getSGPRClassForBitWidth(std::max(32u, Size)); 31065ffd83dbSDimitry Andric case AMDGPU::AGPRRegBankID: 31075ffd83dbSDimitry Andric return getAGPRClassForBitWidth(std::max(32u, Size)); 31080b57cec5SDimitry Andric default: 31090b57cec5SDimitry Andric llvm_unreachable("unknown register bank"); 31100b57cec5SDimitry Andric } 31110b57cec5SDimitry Andric } 31120b57cec5SDimitry Andric 31130b57cec5SDimitry Andric const TargetRegisterClass * 31140b57cec5SDimitry Andric SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, 31150b57cec5SDimitry Andric const MachineRegisterInfo &MRI) const { 31168bcb0991SDimitry Andric const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); 31178bcb0991SDimitry Andric if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>()) 311881ad6265SDimitry Andric return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB); 31198bcb0991SDimitry Andric 3120349cc55cSDimitry Andric if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>()) 31218bcb0991SDimitry Andric return getAllocatableClass(RC); 3122349cc55cSDimitry Andric 3123349cc55cSDimitry Andric return nullptr; 31240b57cec5SDimitry Andric } 31250b57cec5SDimitry Andric 31265ffd83dbSDimitry Andric MCRegister SIRegisterInfo::getVCC() const { 31270b57cec5SDimitry Andric return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC; 31280b57cec5SDimitry Andric } 31290b57cec5SDimitry Andric 3130753f127fSDimitry Andric MCRegister SIRegisterInfo::getExec() const { 3131753f127fSDimitry Andric return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC; 3132753f127fSDimitry Andric } 3133753f127fSDimitry Andric 3134fe6060f1SDimitry Andric const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { 3135fe6060f1SDimitry Andric // VGPR tuples have an alignment requirement on gfx90a variants. 3136fe6060f1SDimitry Andric return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass 3137fe6060f1SDimitry Andric : &AMDGPU::VReg_64RegClass; 3138fe6060f1SDimitry Andric } 3139fe6060f1SDimitry Andric 31400b57cec5SDimitry Andric const TargetRegisterClass * 31410b57cec5SDimitry Andric SIRegisterInfo::getRegClass(unsigned RCID) const { 31420b57cec5SDimitry Andric switch ((int)RCID) { 31430b57cec5SDimitry Andric case AMDGPU::SReg_1RegClassID: 31440b57cec5SDimitry Andric return getBoolRC(); 31450b57cec5SDimitry Andric case AMDGPU::SReg_1_XEXECRegClassID: 31460b57cec5SDimitry Andric return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 31470b57cec5SDimitry Andric : &AMDGPU::SReg_64_XEXECRegClass; 31480b57cec5SDimitry Andric case -1: 31490b57cec5SDimitry Andric return nullptr; 31500b57cec5SDimitry Andric default: 31515ffd83dbSDimitry Andric return AMDGPUGenRegisterInfo::getRegClass(RCID); 31520b57cec5SDimitry Andric } 31530b57cec5SDimitry Andric } 31540b57cec5SDimitry Andric 31550b57cec5SDimitry Andric // Find reaching register definition 31565ffd83dbSDimitry Andric MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, 31570b57cec5SDimitry Andric MachineInstr &Use, 31580b57cec5SDimitry Andric MachineRegisterInfo &MRI, 31590b57cec5SDimitry Andric LiveIntervals *LIS) const { 3160*0fca6ea1SDimitry Andric auto &MDT = LIS->getDomTree(); 31610b57cec5SDimitry Andric SlotIndex UseIdx = LIS->getInstructionIndex(Use); 31620b57cec5SDimitry Andric SlotIndex DefIdx; 31630b57cec5SDimitry Andric 31645ffd83dbSDimitry Andric if (Reg.isVirtual()) { 31650b57cec5SDimitry Andric if (!LIS->hasInterval(Reg)) 31660b57cec5SDimitry Andric return nullptr; 31670b57cec5SDimitry Andric LiveInterval &LI = LIS->getInterval(Reg); 31680b57cec5SDimitry Andric LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg) 31690b57cec5SDimitry Andric : MRI.getMaxLaneMaskForVReg(Reg); 31700b57cec5SDimitry Andric VNInfo *V = nullptr; 31710b57cec5SDimitry Andric if (LI.hasSubRanges()) { 31720b57cec5SDimitry Andric for (auto &S : LI.subranges()) { 31730b57cec5SDimitry Andric if ((S.LaneMask & SubLanes) == SubLanes) { 31740b57cec5SDimitry Andric V = S.getVNInfoAt(UseIdx); 31750b57cec5SDimitry Andric break; 31760b57cec5SDimitry Andric } 31770b57cec5SDimitry Andric } 31780b57cec5SDimitry Andric } else { 31790b57cec5SDimitry Andric V = LI.getVNInfoAt(UseIdx); 31800b57cec5SDimitry Andric } 31810b57cec5SDimitry Andric if (!V) 31820b57cec5SDimitry Andric return nullptr; 31830b57cec5SDimitry Andric DefIdx = V->def; 31840b57cec5SDimitry Andric } else { 31850b57cec5SDimitry Andric // Find last def. 318606c3fb27SDimitry Andric for (MCRegUnit Unit : regunits(Reg.asMCReg())) { 318706c3fb27SDimitry Andric LiveRange &LR = LIS->getRegUnit(Unit); 31880b57cec5SDimitry Andric if (VNInfo *V = LR.getVNInfoAt(UseIdx)) { 31890b57cec5SDimitry Andric if (!DefIdx.isValid() || 31900b57cec5SDimitry Andric MDT.dominates(LIS->getInstructionFromIndex(DefIdx), 31910b57cec5SDimitry Andric LIS->getInstructionFromIndex(V->def))) 31920b57cec5SDimitry Andric DefIdx = V->def; 31930b57cec5SDimitry Andric } else { 31940b57cec5SDimitry Andric return nullptr; 31950b57cec5SDimitry Andric } 31960b57cec5SDimitry Andric } 31970b57cec5SDimitry Andric } 31980b57cec5SDimitry Andric 31990b57cec5SDimitry Andric MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx); 32000b57cec5SDimitry Andric 32010b57cec5SDimitry Andric if (!Def || !MDT.dominates(Def, &Use)) 32020b57cec5SDimitry Andric return nullptr; 32030b57cec5SDimitry Andric 32040b57cec5SDimitry Andric assert(Def->modifiesRegister(Reg, this)); 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric return Def; 32070b57cec5SDimitry Andric } 32085ffd83dbSDimitry Andric 32095ffd83dbSDimitry Andric MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { 3210bdd1243dSDimitry Andric assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32); 32115ffd83dbSDimitry Andric 32125ffd83dbSDimitry Andric for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, 32135ffd83dbSDimitry Andric AMDGPU::SReg_32RegClass, 32145ffd83dbSDimitry Andric AMDGPU::AGPR_32RegClass } ) { 32155ffd83dbSDimitry Andric if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC)) 32165ffd83dbSDimitry Andric return Super; 32175ffd83dbSDimitry Andric } 32185ffd83dbSDimitry Andric if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, 32195ffd83dbSDimitry Andric &AMDGPU::VGPR_32RegClass)) { 32205ffd83dbSDimitry Andric return Super; 32215ffd83dbSDimitry Andric } 32225ffd83dbSDimitry Andric 32235ffd83dbSDimitry Andric return AMDGPU::NoRegister; 32245ffd83dbSDimitry Andric } 32255ffd83dbSDimitry Andric 3226fe6060f1SDimitry Andric bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { 3227fe6060f1SDimitry Andric if (!ST.needsAlignedVGPRs()) 3228fe6060f1SDimitry Andric return true; 3229fe6060f1SDimitry Andric 32304824e7fdSDimitry Andric if (isVGPRClass(&RC)) 3231fe6060f1SDimitry Andric return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC))); 32324824e7fdSDimitry Andric if (isAGPRClass(&RC)) 3233fe6060f1SDimitry Andric return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC))); 32344824e7fdSDimitry Andric if (isVectorSuperClass(&RC)) 32354824e7fdSDimitry Andric return RC.hasSuperClassEq( 32364824e7fdSDimitry Andric getVectorSuperClassForBitWidth(getRegSizeInBits(RC))); 3237fe6060f1SDimitry Andric 3238fe6060f1SDimitry Andric return true; 3239fe6060f1SDimitry Andric } 3240fe6060f1SDimitry Andric 324181ad6265SDimitry Andric const TargetRegisterClass * 324281ad6265SDimitry Andric SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const { 324381ad6265SDimitry Andric if (!RC || !ST.needsAlignedVGPRs()) 324481ad6265SDimitry Andric return RC; 324581ad6265SDimitry Andric 324681ad6265SDimitry Andric unsigned Size = getRegSizeInBits(*RC); 324781ad6265SDimitry Andric if (Size <= 32) 324881ad6265SDimitry Andric return RC; 324981ad6265SDimitry Andric 325081ad6265SDimitry Andric if (isVGPRClass(RC)) 325181ad6265SDimitry Andric return getAlignedVGPRClassForBitWidth(Size); 325281ad6265SDimitry Andric if (isAGPRClass(RC)) 325381ad6265SDimitry Andric return getAlignedAGPRClassForBitWidth(Size); 325481ad6265SDimitry Andric if (isVectorSuperClass(RC)) 325581ad6265SDimitry Andric return getAlignedVectorSuperClassForBitWidth(Size); 325681ad6265SDimitry Andric 325781ad6265SDimitry Andric return RC; 325881ad6265SDimitry Andric } 325981ad6265SDimitry Andric 32605ffd83dbSDimitry Andric ArrayRef<MCPhysReg> 32615ffd83dbSDimitry Andric SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const { 3262bdd1243dSDimitry Andric return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4); 32635ffd83dbSDimitry Andric } 32645ffd83dbSDimitry Andric 32655ffd83dbSDimitry Andric ArrayRef<MCPhysReg> 3266e8d8bef9SDimitry Andric SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const { 3267bdd1243dSDimitry Andric return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2); 32685ffd83dbSDimitry Andric } 32695ffd83dbSDimitry Andric 32705ffd83dbSDimitry Andric ArrayRef<MCPhysReg> 3271e8d8bef9SDimitry Andric SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const { 3272bdd1243dSDimitry Andric return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF)); 32735ffd83dbSDimitry Andric } 327406c3fb27SDimitry Andric 327506c3fb27SDimitry Andric unsigned 327606c3fb27SDimitry Andric SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC, 327706c3fb27SDimitry Andric unsigned SubReg) const { 327806c3fb27SDimitry Andric switch (RC->TSFlags & SIRCFlags::RegKindMask) { 327906c3fb27SDimitry Andric case SIRCFlags::HasSGPR: 328006c3fb27SDimitry Andric return std::min(128u, getSubRegIdxSize(SubReg)); 328106c3fb27SDimitry Andric case SIRCFlags::HasAGPR: 328206c3fb27SDimitry Andric case SIRCFlags::HasVGPR: 328306c3fb27SDimitry Andric case SIRCFlags::HasVGPR | SIRCFlags::HasAGPR: 328406c3fb27SDimitry Andric return std::min(32u, getSubRegIdxSize(SubReg)); 328506c3fb27SDimitry Andric default: 328606c3fb27SDimitry Andric break; 328706c3fb27SDimitry Andric } 328806c3fb27SDimitry Andric return 0; 328906c3fb27SDimitry Andric } 3290