xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1*5f757f3fSDimitry Andric //===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2*5f757f3fSDimitry Andric //
3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5f757f3fSDimitry Andric //
7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
8*5f757f3fSDimitry Andric //
9*5f757f3fSDimitry Andric /// \file
10*5f757f3fSDimitry Andric /// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11*5f757f3fSDimitry Andric /// instructions that produce single-use VGPR values. If the value is forwarded
12*5f757f3fSDimitry Andric /// to the consumer instruction prior to VGPR writeback, the hardware can
13*5f757f3fSDimitry Andric /// then skip (kill) the VGPR write.
14*5f757f3fSDimitry Andric //
15*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
16*5f757f3fSDimitry Andric 
17*5f757f3fSDimitry Andric #include "AMDGPU.h"
18*5f757f3fSDimitry Andric #include "GCNSubtarget.h"
19*5f757f3fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20*5f757f3fSDimitry Andric #include "SIInstrInfo.h"
21*5f757f3fSDimitry Andric #include "llvm/ADT/DenseMap.h"
22*5f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
23*5f757f3fSDimitry Andric #include "llvm/ADT/StringRef.h"
24*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
25*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
26*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
27*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
28*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
29*5f757f3fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
30*5f757f3fSDimitry Andric #include "llvm/CodeGen/Register.h"
31*5f757f3fSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
32*5f757f3fSDimitry Andric #include "llvm/IR/DebugLoc.h"
33*5f757f3fSDimitry Andric #include "llvm/MC/MCRegister.h"
34*5f757f3fSDimitry Andric #include "llvm/Pass.h"
35*5f757f3fSDimitry Andric 
36*5f757f3fSDimitry Andric using namespace llvm;
37*5f757f3fSDimitry Andric 
38*5f757f3fSDimitry Andric #define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
39*5f757f3fSDimitry Andric 
40*5f757f3fSDimitry Andric namespace {
41*5f757f3fSDimitry Andric class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
42*5f757f3fSDimitry Andric private:
43*5f757f3fSDimitry Andric   const SIInstrInfo *SII;
44*5f757f3fSDimitry Andric 
45*5f757f3fSDimitry Andric public:
46*5f757f3fSDimitry Andric   static char ID;
47*5f757f3fSDimitry Andric 
48*5f757f3fSDimitry Andric   AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
49*5f757f3fSDimitry Andric 
50*5f757f3fSDimitry Andric   void emitSingleUseVDST(MachineInstr &MI) const {
51*5f757f3fSDimitry Andric     // Mark the following instruction as a single-use producer:
52*5f757f3fSDimitry Andric     //   s_singleuse_vdst { supr0: 1 }
53*5f757f3fSDimitry Andric     BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
54*5f757f3fSDimitry Andric         .addImm(0x1);
55*5f757f3fSDimitry Andric   }
56*5f757f3fSDimitry Andric 
57*5f757f3fSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
58*5f757f3fSDimitry Andric     const auto &ST = MF.getSubtarget<GCNSubtarget>();
59*5f757f3fSDimitry Andric     if (!ST.hasVGPRSingleUseHintInsts())
60*5f757f3fSDimitry Andric       return false;
61*5f757f3fSDimitry Andric 
62*5f757f3fSDimitry Andric     SII = ST.getInstrInfo();
63*5f757f3fSDimitry Andric     const auto *TRI = &SII->getRegisterInfo();
64*5f757f3fSDimitry Andric     bool InstructionEmitted = false;
65*5f757f3fSDimitry Andric 
66*5f757f3fSDimitry Andric     for (MachineBasicBlock &MBB : MF) {
67*5f757f3fSDimitry Andric       DenseMap<MCPhysReg, unsigned> RegisterUseCount; // TODO: MCRegUnits
68*5f757f3fSDimitry Andric 
69*5f757f3fSDimitry Andric       // Handle boundaries at the end of basic block separately to avoid
70*5f757f3fSDimitry Andric       // false positives. If they are live at the end of a basic block then
71*5f757f3fSDimitry Andric       // assume it has more uses later on.
72*5f757f3fSDimitry Andric       for (const auto &Liveouts : MBB.liveouts())
73*5f757f3fSDimitry Andric         RegisterUseCount[Liveouts.PhysReg] = 2;
74*5f757f3fSDimitry Andric 
75*5f757f3fSDimitry Andric       for (MachineInstr &MI : reverse(MBB.instrs())) {
76*5f757f3fSDimitry Andric         // All registers in all operands need to be single use for an
77*5f757f3fSDimitry Andric         // instruction to be marked as a single use producer.
78*5f757f3fSDimitry Andric         bool AllProducerOperandsAreSingleUse = true;
79*5f757f3fSDimitry Andric 
80*5f757f3fSDimitry Andric         for (const auto &Operand : MI.operands()) {
81*5f757f3fSDimitry Andric           if (!Operand.isReg())
82*5f757f3fSDimitry Andric             continue;
83*5f757f3fSDimitry Andric           const auto Reg = Operand.getReg();
84*5f757f3fSDimitry Andric 
85*5f757f3fSDimitry Andric           // Count the number of times each register is read.
86*5f757f3fSDimitry Andric           if (Operand.readsReg())
87*5f757f3fSDimitry Andric             RegisterUseCount[Reg]++;
88*5f757f3fSDimitry Andric 
89*5f757f3fSDimitry Andric           // Do not attempt to optimise across exec mask changes.
90*5f757f3fSDimitry Andric           if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
91*5f757f3fSDimitry Andric             for (auto &UsedReg : RegisterUseCount)
92*5f757f3fSDimitry Andric               UsedReg.second = 2;
93*5f757f3fSDimitry Andric           }
94*5f757f3fSDimitry Andric 
95*5f757f3fSDimitry Andric           // If we are at the point where the register first became live,
96*5f757f3fSDimitry Andric           // check if the operands are single use.
97*5f757f3fSDimitry Andric           if (!MI.modifiesRegister(Reg, TRI))
98*5f757f3fSDimitry Andric             continue;
99*5f757f3fSDimitry Andric           if (RegisterUseCount[Reg] > 1)
100*5f757f3fSDimitry Andric             AllProducerOperandsAreSingleUse = false;
101*5f757f3fSDimitry Andric           // Reset uses count when a register is no longer live.
102*5f757f3fSDimitry Andric           RegisterUseCount.erase(Reg);
103*5f757f3fSDimitry Andric         }
104*5f757f3fSDimitry Andric         if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
105*5f757f3fSDimitry Andric           // TODO: Replace with candidate logging for instruction grouping
106*5f757f3fSDimitry Andric           // later.
107*5f757f3fSDimitry Andric           emitSingleUseVDST(MI);
108*5f757f3fSDimitry Andric           InstructionEmitted = true;
109*5f757f3fSDimitry Andric         }
110*5f757f3fSDimitry Andric       }
111*5f757f3fSDimitry Andric     }
112*5f757f3fSDimitry Andric     return InstructionEmitted;
113*5f757f3fSDimitry Andric   }
114*5f757f3fSDimitry Andric };
115*5f757f3fSDimitry Andric } // namespace
116*5f757f3fSDimitry Andric 
117*5f757f3fSDimitry Andric char AMDGPUInsertSingleUseVDST::ID = 0;
118*5f757f3fSDimitry Andric 
119*5f757f3fSDimitry Andric char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
120*5f757f3fSDimitry Andric 
121*5f757f3fSDimitry Andric INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
122*5f757f3fSDimitry Andric                 "AMDGPU Insert SingleUseVDST", false, false)
123