xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1*753f127fSDimitry Andric //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2*753f127fSDimitry Andric //
3*753f127fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*753f127fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*753f127fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*753f127fSDimitry Andric //
7*753f127fSDimitry Andric //===----------------------------------------------------------------------===//
8*753f127fSDimitry Andric //
9*753f127fSDimitry Andric /// \file This file contains the AMDGPU DAG scheduling
10*753f127fSDimitry Andric /// mutation to pair VOPD instructions back to back. It also contains
11*753f127fSDimitry Andric //  subroutines useful in the creation of VOPD instructions
12*753f127fSDimitry Andric //
13*753f127fSDimitry Andric //===----------------------------------------------------------------------===//
14*753f127fSDimitry Andric 
15*753f127fSDimitry Andric #include "GCNVOPDUtils.h"
16*753f127fSDimitry Andric #include "AMDGPUSubtarget.h"
17*753f127fSDimitry Andric #include "GCNSubtarget.h"
18*753f127fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19*753f127fSDimitry Andric #include "SIInstrInfo.h"
20*753f127fSDimitry Andric #include "llvm/ADT/STLExtras.h"
21*753f127fSDimitry Andric #include "llvm/ADT/SmallVector.h"
22*753f127fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
23*753f127fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
24*753f127fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
25*753f127fSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
26*753f127fSDimitry Andric #include "llvm/CodeGen/MacroFusion.h"
27*753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h"
28*753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAGMutation.h"
29*753f127fSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
30*753f127fSDimitry Andric #include "llvm/MC/MCInst.h"
31*753f127fSDimitry Andric 
32*753f127fSDimitry Andric using namespace llvm;
33*753f127fSDimitry Andric 
34*753f127fSDimitry Andric #define DEBUG_TYPE "gcn-vopd-utils"
35*753f127fSDimitry Andric 
36*753f127fSDimitry Andric bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
37*753f127fSDimitry Andric                                    const MachineInstr &FirstMI,
38*753f127fSDimitry Andric                                    const MachineInstr &SecondMI) {
39*753f127fSDimitry Andric   const MachineFunction *MF = FirstMI.getMF();
40*753f127fSDimitry Andric   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
41*753f127fSDimitry Andric   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
42*753f127fSDimitry Andric   const MachineRegisterInfo &MRI = MF->getRegInfo();
43*753f127fSDimitry Andric   const unsigned NumVGPRBanks = 4;
44*753f127fSDimitry Andric   // Literals also count against scalar bus limit
45*753f127fSDimitry Andric   SmallVector<const MachineOperand *> UniqueLiterals;
46*753f127fSDimitry Andric   auto addLiteral = [&](const MachineOperand &Op) {
47*753f127fSDimitry Andric     for (auto &Literal : UniqueLiterals) {
48*753f127fSDimitry Andric       if (Literal->isIdenticalTo(Op))
49*753f127fSDimitry Andric         return;
50*753f127fSDimitry Andric     }
51*753f127fSDimitry Andric     UniqueLiterals.push_back(&Op);
52*753f127fSDimitry Andric   };
53*753f127fSDimitry Andric   SmallVector<Register> UniqueScalarRegs;
54*753f127fSDimitry Andric   assert([&]() -> bool {
55*753f127fSDimitry Andric     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
56*753f127fSDimitry Andric          MII != FirstMI.getParent()->instr_end(); ++MII) {
57*753f127fSDimitry Andric       if (&*MII == &SecondMI)
58*753f127fSDimitry Andric         return true;
59*753f127fSDimitry Andric     }
60*753f127fSDimitry Andric     return false;
61*753f127fSDimitry Andric   }() && "Expected FirstMI to precede SecondMI");
62*753f127fSDimitry Andric   // Cannot pair dependent instructions
63*753f127fSDimitry Andric   for (const auto &Use : SecondMI.uses())
64*753f127fSDimitry Andric     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
65*753f127fSDimitry Andric       return false;
66*753f127fSDimitry Andric 
67*753f127fSDimitry Andric   struct ComponentInfo {
68*753f127fSDimitry Andric     ComponentInfo(const MachineInstr &MI) : MI(MI) {}
69*753f127fSDimitry Andric     Register Dst, Reg0, Reg1, Reg2;
70*753f127fSDimitry Andric     const MachineInstr &MI;
71*753f127fSDimitry Andric   };
72*753f127fSDimitry Andric   ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)};
73*753f127fSDimitry Andric 
74*753f127fSDimitry Andric   for (ComponentInfo &Comp : CInfo) {
75*753f127fSDimitry Andric     switch (Comp.MI.getOpcode()) {
76*753f127fSDimitry Andric     case AMDGPU::V_FMAMK_F32:
77*753f127fSDimitry Andric       // cannot inline the fixed literal in fmamk
78*753f127fSDimitry Andric       addLiteral(Comp.MI.getOperand(2));
79*753f127fSDimitry Andric       Comp.Reg2 = Comp.MI.getOperand(3).getReg();
80*753f127fSDimitry Andric       break;
81*753f127fSDimitry Andric     case AMDGPU::V_FMAAK_F32:
82*753f127fSDimitry Andric       // cannot inline the fixed literal in fmaak
83*753f127fSDimitry Andric       addLiteral(Comp.MI.getOperand(3));
84*753f127fSDimitry Andric       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
85*753f127fSDimitry Andric       break;
86*753f127fSDimitry Andric     case AMDGPU::V_FMAC_F32_e32:
87*753f127fSDimitry Andric     case AMDGPU::V_DOT2_F32_F16:
88*753f127fSDimitry Andric     case AMDGPU::V_DOT2_F32_BF16:
89*753f127fSDimitry Andric       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
90*753f127fSDimitry Andric       Comp.Reg2 = Comp.MI.getOperand(0).getReg();
91*753f127fSDimitry Andric       break;
92*753f127fSDimitry Andric     case AMDGPU::V_CNDMASK_B32_e32:
93*753f127fSDimitry Andric       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
94*753f127fSDimitry Andric       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
95*753f127fSDimitry Andric       break;
96*753f127fSDimitry Andric     case AMDGPU::V_MOV_B32_e32:
97*753f127fSDimitry Andric       break;
98*753f127fSDimitry Andric     default:
99*753f127fSDimitry Andric       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
100*753f127fSDimitry Andric       break;
101*753f127fSDimitry Andric     }
102*753f127fSDimitry Andric 
103*753f127fSDimitry Andric     Comp.Dst = Comp.MI.getOperand(0).getReg();
104*753f127fSDimitry Andric 
105*753f127fSDimitry Andric     const MachineOperand &Op0 = Comp.MI.getOperand(1);
106*753f127fSDimitry Andric     if (Op0.isReg()) {
107*753f127fSDimitry Andric       if (!TRI->isVectorRegister(MRI, Op0.getReg())) {
108*753f127fSDimitry Andric         if (!is_contained(UniqueScalarRegs, Op0.getReg()))
109*753f127fSDimitry Andric           UniqueScalarRegs.push_back(Op0.getReg());
110*753f127fSDimitry Andric       } else
111*753f127fSDimitry Andric         Comp.Reg0 = Op0.getReg();
112*753f127fSDimitry Andric     } else {
113*753f127fSDimitry Andric       if (!TII.isInlineConstant(Comp.MI, 1))
114*753f127fSDimitry Andric         addLiteral(Op0);
115*753f127fSDimitry Andric     }
116*753f127fSDimitry Andric   }
117*753f127fSDimitry Andric 
118*753f127fSDimitry Andric   if (UniqueLiterals.size() > 1)
119*753f127fSDimitry Andric     return false;
120*753f127fSDimitry Andric   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
121*753f127fSDimitry Andric     return false;
122*753f127fSDimitry Andric 
123*753f127fSDimitry Andric   // check port 0
124*753f127fSDimitry Andric   if (CInfo[0].Reg0 && CInfo[1].Reg0 &&
125*753f127fSDimitry Andric       CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks)
126*753f127fSDimitry Andric     return false;
127*753f127fSDimitry Andric   // check port 1
128*753f127fSDimitry Andric   if (CInfo[0].Reg1 && CInfo[1].Reg1 &&
129*753f127fSDimitry Andric       CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks)
130*753f127fSDimitry Andric     return false;
131*753f127fSDimitry Andric   // check port 2
132*753f127fSDimitry Andric   if (CInfo[0].Reg2 && CInfo[1].Reg2 &&
133*753f127fSDimitry Andric       !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1))
134*753f127fSDimitry Andric     return false;
135*753f127fSDimitry Andric   if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1))
136*753f127fSDimitry Andric     return false;
137*753f127fSDimitry Andric 
138*753f127fSDimitry Andric   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
139*753f127fSDimitry Andric                     << "\n\tY: " << SecondMI << "\n");
140*753f127fSDimitry Andric   return true;
141*753f127fSDimitry Andric }
142*753f127fSDimitry Andric 
143*753f127fSDimitry Andric /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
144*753f127fSDimitry Andric /// together. Given SecondMI, when FirstMI is unspecified, then check if
145*753f127fSDimitry Andric /// SecondMI may be part of a fused pair at all.
146*753f127fSDimitry Andric static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
147*753f127fSDimitry Andric                                        const TargetSubtargetInfo &TSI,
148*753f127fSDimitry Andric                                        const MachineInstr *FirstMI,
149*753f127fSDimitry Andric                                        const MachineInstr &SecondMI) {
150*753f127fSDimitry Andric   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
151*753f127fSDimitry Andric   unsigned Opc2 = SecondMI.getOpcode();
152*753f127fSDimitry Andric   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
153*753f127fSDimitry Andric 
154*753f127fSDimitry Andric   // One instruction case
155*753f127fSDimitry Andric   if (!FirstMI)
156*753f127fSDimitry Andric     return SecondCanBeVOPD.Y;
157*753f127fSDimitry Andric 
158*753f127fSDimitry Andric   unsigned Opc = FirstMI->getOpcode();
159*753f127fSDimitry Andric   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
160*753f127fSDimitry Andric 
161*753f127fSDimitry Andric   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
162*753f127fSDimitry Andric         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
163*753f127fSDimitry Andric     return false;
164*753f127fSDimitry Andric 
165*753f127fSDimitry Andric   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
166*753f127fSDimitry Andric }
167*753f127fSDimitry Andric 
168*753f127fSDimitry Andric /// Adapts design from MacroFusion
169*753f127fSDimitry Andric /// Puts valid candidate instructions back-to-back so they can easily
170*753f127fSDimitry Andric /// be turned into VOPD instructions
171*753f127fSDimitry Andric /// Greedily pairs instruction candidates. O(n^2) algorithm.
172*753f127fSDimitry Andric struct VOPDPairingMutation : ScheduleDAGMutation {
173*753f127fSDimitry Andric   ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
174*753f127fSDimitry Andric 
175*753f127fSDimitry Andric   VOPDPairingMutation(
176*753f127fSDimitry Andric       ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
177*753f127fSDimitry Andric       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
178*753f127fSDimitry Andric 
179*753f127fSDimitry Andric   void apply(ScheduleDAGInstrs *DAG) override {
180*753f127fSDimitry Andric     const TargetInstrInfo &TII = *DAG->TII;
181*753f127fSDimitry Andric     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
182*753f127fSDimitry Andric     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
183*753f127fSDimitry Andric       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
184*753f127fSDimitry Andric       return;
185*753f127fSDimitry Andric     }
186*753f127fSDimitry Andric 
187*753f127fSDimitry Andric     std::vector<SUnit>::iterator ISUI, JSUI;
188*753f127fSDimitry Andric     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
189*753f127fSDimitry Andric       const MachineInstr *IMI = ISUI->getInstr();
190*753f127fSDimitry Andric       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
191*753f127fSDimitry Andric         continue;
192*753f127fSDimitry Andric       if (!hasLessThanNumFused(*ISUI, 2))
193*753f127fSDimitry Andric         continue;
194*753f127fSDimitry Andric 
195*753f127fSDimitry Andric       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
196*753f127fSDimitry Andric         if (JSUI->isBoundaryNode())
197*753f127fSDimitry Andric           continue;
198*753f127fSDimitry Andric         const MachineInstr *JMI = JSUI->getInstr();
199*753f127fSDimitry Andric         if (!hasLessThanNumFused(*JSUI, 2) ||
200*753f127fSDimitry Andric             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
201*753f127fSDimitry Andric           continue;
202*753f127fSDimitry Andric         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
203*753f127fSDimitry Andric           break;
204*753f127fSDimitry Andric       }
205*753f127fSDimitry Andric     }
206*753f127fSDimitry Andric     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
207*753f127fSDimitry Andric   }
208*753f127fSDimitry Andric };
209*753f127fSDimitry Andric 
210*753f127fSDimitry Andric std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
211*753f127fSDimitry Andric   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
212*753f127fSDimitry Andric }
213