1*5f757f3fSDimitry Andric //===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==// 2*5f757f3fSDimitry Andric // 3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5f757f3fSDimitry Andric // 7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 8*5f757f3fSDimitry Andric // 9*5f757f3fSDimitry Andric /// \file 10*5f757f3fSDimitry Andric /// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU 11*5f757f3fSDimitry Andric /// instructions that produce single-use VGPR values. If the value is forwarded 12*5f757f3fSDimitry Andric /// to the consumer instruction prior to VGPR writeback, the hardware can 13*5f757f3fSDimitry Andric /// then skip (kill) the VGPR write. 14*5f757f3fSDimitry Andric // 15*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 16*5f757f3fSDimitry Andric 17*5f757f3fSDimitry Andric #include "AMDGPU.h" 18*5f757f3fSDimitry Andric #include "GCNSubtarget.h" 19*5f757f3fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20*5f757f3fSDimitry Andric #include "SIInstrInfo.h" 21*5f757f3fSDimitry Andric #include "llvm/ADT/DenseMap.h" 22*5f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h" 23*5f757f3fSDimitry Andric #include "llvm/ADT/StringRef.h" 24*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 25*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 26*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 28*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 29*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 30*5f757f3fSDimitry Andric #include "llvm/CodeGen/Register.h" 31*5f757f3fSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 32*5f757f3fSDimitry Andric #include "llvm/IR/DebugLoc.h" 33*5f757f3fSDimitry Andric #include "llvm/MC/MCRegister.h" 34*5f757f3fSDimitry Andric #include "llvm/Pass.h" 35*5f757f3fSDimitry Andric 36*5f757f3fSDimitry Andric using namespace llvm; 37*5f757f3fSDimitry Andric 38*5f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-insert-single-use-vdst" 39*5f757f3fSDimitry Andric 40*5f757f3fSDimitry Andric namespace { 41*5f757f3fSDimitry Andric class AMDGPUInsertSingleUseVDST : public MachineFunctionPass { 42*5f757f3fSDimitry Andric private: 43*5f757f3fSDimitry Andric const SIInstrInfo *SII; 44*5f757f3fSDimitry Andric 45*5f757f3fSDimitry Andric public: 46*5f757f3fSDimitry Andric static char ID; 47*5f757f3fSDimitry Andric 48*5f757f3fSDimitry Andric AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {} 49*5f757f3fSDimitry Andric 50*5f757f3fSDimitry Andric void emitSingleUseVDST(MachineInstr &MI) const { 51*5f757f3fSDimitry Andric // Mark the following instruction as a single-use producer: 52*5f757f3fSDimitry Andric // s_singleuse_vdst { supr0: 1 } 53*5f757f3fSDimitry Andric BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST)) 54*5f757f3fSDimitry Andric .addImm(0x1); 55*5f757f3fSDimitry Andric } 56*5f757f3fSDimitry Andric 57*5f757f3fSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 58*5f757f3fSDimitry Andric const auto &ST = MF.getSubtarget<GCNSubtarget>(); 59*5f757f3fSDimitry Andric if (!ST.hasVGPRSingleUseHintInsts()) 60*5f757f3fSDimitry Andric return false; 61*5f757f3fSDimitry Andric 62*5f757f3fSDimitry Andric SII = ST.getInstrInfo(); 63*5f757f3fSDimitry Andric const auto *TRI = &SII->getRegisterInfo(); 64*5f757f3fSDimitry Andric bool InstructionEmitted = false; 65*5f757f3fSDimitry Andric 66*5f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) { 67*5f757f3fSDimitry Andric DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits 68*5f757f3fSDimitry Andric 69*5f757f3fSDimitry Andric // Handle boundaries at the end of basic block separately to avoid 70*5f757f3fSDimitry Andric // false positives. If they are live at the end of a basic block then 71*5f757f3fSDimitry Andric // assume it has more uses later on. 72*5f757f3fSDimitry Andric for (const auto &Liveouts : MBB.liveouts()) 73*5f757f3fSDimitry Andric RegisterUseCount[Liveouts.PhysReg] = 2; 74*5f757f3fSDimitry Andric 75*5f757f3fSDimitry Andric for (MachineInstr &MI : reverse(MBB.instrs())) { 76*5f757f3fSDimitry Andric // All registers in all operands need to be single use for an 77*5f757f3fSDimitry Andric // instruction to be marked as a single use producer. 78*5f757f3fSDimitry Andric bool AllProducerOperandsAreSingleUse = true; 79*5f757f3fSDimitry Andric 80*5f757f3fSDimitry Andric for (const auto &Operand : MI.operands()) { 81*5f757f3fSDimitry Andric if (!Operand.isReg()) 82*5f757f3fSDimitry Andric continue; 83*5f757f3fSDimitry Andric const auto Reg = Operand.getReg(); 84*5f757f3fSDimitry Andric 85*5f757f3fSDimitry Andric // Count the number of times each register is read. 86*5f757f3fSDimitry Andric if (Operand.readsReg()) 87*5f757f3fSDimitry Andric RegisterUseCount[Reg]++; 88*5f757f3fSDimitry Andric 89*5f757f3fSDimitry Andric // Do not attempt to optimise across exec mask changes. 90*5f757f3fSDimitry Andric if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) { 91*5f757f3fSDimitry Andric for (auto &UsedReg : RegisterUseCount) 92*5f757f3fSDimitry Andric UsedReg.second = 2; 93*5f757f3fSDimitry Andric } 94*5f757f3fSDimitry Andric 95*5f757f3fSDimitry Andric // If we are at the point where the register first became live, 96*5f757f3fSDimitry Andric // check if the operands are single use. 97*5f757f3fSDimitry Andric if (!MI.modifiesRegister(Reg, TRI)) 98*5f757f3fSDimitry Andric continue; 99*5f757f3fSDimitry Andric if (RegisterUseCount[Reg] > 1) 100*5f757f3fSDimitry Andric AllProducerOperandsAreSingleUse = false; 101*5f757f3fSDimitry Andric // Reset uses count when a register is no longer live. 102*5f757f3fSDimitry Andric RegisterUseCount.erase(Reg); 103*5f757f3fSDimitry Andric } 104*5f757f3fSDimitry Andric if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) { 105*5f757f3fSDimitry Andric // TODO: Replace with candidate logging for instruction grouping 106*5f757f3fSDimitry Andric // later. 107*5f757f3fSDimitry Andric emitSingleUseVDST(MI); 108*5f757f3fSDimitry Andric InstructionEmitted = true; 109*5f757f3fSDimitry Andric } 110*5f757f3fSDimitry Andric } 111*5f757f3fSDimitry Andric } 112*5f757f3fSDimitry Andric return InstructionEmitted; 113*5f757f3fSDimitry Andric } 114*5f757f3fSDimitry Andric }; 115*5f757f3fSDimitry Andric } // namespace 116*5f757f3fSDimitry Andric 117*5f757f3fSDimitry Andric char AMDGPUInsertSingleUseVDST::ID = 0; 118*5f757f3fSDimitry Andric 119*5f757f3fSDimitry Andric char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID; 120*5f757f3fSDimitry Andric 121*5f757f3fSDimitry Andric INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE, 122*5f757f3fSDimitry Andric "AMDGPU Insert SingleUseVDST", false, false) 123