xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1753f127fSDimitry Andric //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2753f127fSDimitry Andric //
3753f127fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4753f127fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5753f127fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6753f127fSDimitry Andric //
7753f127fSDimitry Andric //===----------------------------------------------------------------------===//
8753f127fSDimitry Andric //
9753f127fSDimitry Andric /// \file This file contains the AMDGPU DAG scheduling
10753f127fSDimitry Andric /// mutation to pair VOPD instructions back to back. It also contains
11753f127fSDimitry Andric //  subroutines useful in the creation of VOPD instructions
12753f127fSDimitry Andric //
13753f127fSDimitry Andric //===----------------------------------------------------------------------===//
14753f127fSDimitry Andric 
15753f127fSDimitry Andric #include "GCNVOPDUtils.h"
16753f127fSDimitry Andric #include "AMDGPUSubtarget.h"
17753f127fSDimitry Andric #include "GCNSubtarget.h"
18753f127fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19753f127fSDimitry Andric #include "SIInstrInfo.h"
20bdd1243dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
21753f127fSDimitry Andric #include "llvm/ADT/STLExtras.h"
22753f127fSDimitry Andric #include "llvm/ADT/SmallVector.h"
23753f127fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
24753f127fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
25753f127fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
26753f127fSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
27753f127fSDimitry Andric #include "llvm/CodeGen/MacroFusion.h"
28753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h"
29753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAGMutation.h"
30753f127fSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
31753f127fSDimitry Andric #include "llvm/MC/MCInst.h"
32753f127fSDimitry Andric 
33753f127fSDimitry Andric using namespace llvm;
34753f127fSDimitry Andric 
35753f127fSDimitry Andric #define DEBUG_TYPE "gcn-vopd-utils"
36753f127fSDimitry Andric 
checkVOPDRegConstraints(const SIInstrInfo & TII,const MachineInstr & FirstMI,const MachineInstr & SecondMI)37753f127fSDimitry Andric bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38753f127fSDimitry Andric                                    const MachineInstr &FirstMI,
39753f127fSDimitry Andric                                    const MachineInstr &SecondMI) {
40bdd1243dSDimitry Andric   namespace VOPD = AMDGPU::VOPD;
41bdd1243dSDimitry Andric 
42753f127fSDimitry Andric   const MachineFunction *MF = FirstMI.getMF();
43753f127fSDimitry Andric   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44753f127fSDimitry Andric   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
45753f127fSDimitry Andric   const MachineRegisterInfo &MRI = MF->getRegInfo();
46753f127fSDimitry Andric   // Literals also count against scalar bus limit
47753f127fSDimitry Andric   SmallVector<const MachineOperand *> UniqueLiterals;
48753f127fSDimitry Andric   auto addLiteral = [&](const MachineOperand &Op) {
49753f127fSDimitry Andric     for (auto &Literal : UniqueLiterals) {
50753f127fSDimitry Andric       if (Literal->isIdenticalTo(Op))
51753f127fSDimitry Andric         return;
52753f127fSDimitry Andric     }
53753f127fSDimitry Andric     UniqueLiterals.push_back(&Op);
54753f127fSDimitry Andric   };
55753f127fSDimitry Andric   SmallVector<Register> UniqueScalarRegs;
56753f127fSDimitry Andric   assert([&]() -> bool {
57753f127fSDimitry Andric     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58753f127fSDimitry Andric          MII != FirstMI.getParent()->instr_end(); ++MII) {
59753f127fSDimitry Andric       if (&*MII == &SecondMI)
60753f127fSDimitry Andric         return true;
61753f127fSDimitry Andric     }
62753f127fSDimitry Andric     return false;
63753f127fSDimitry Andric   }() && "Expected FirstMI to precede SecondMI");
64753f127fSDimitry Andric   // Cannot pair dependent instructions
65753f127fSDimitry Andric   for (const auto &Use : SecondMI.uses())
6606c3fb27SDimitry Andric     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67753f127fSDimitry Andric       return false;
68753f127fSDimitry Andric 
69bdd1243dSDimitry Andric   auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70bdd1243dSDimitry Andric     const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71bdd1243dSDimitry Andric     const MachineOperand &Operand = MI.getOperand(OperandIdx);
72bdd1243dSDimitry Andric     if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))
73bdd1243dSDimitry Andric       return Operand.getReg();
74bdd1243dSDimitry Andric     return Register();
75753f127fSDimitry Andric   };
76753f127fSDimitry Andric 
77bdd1243dSDimitry Andric   auto InstInfo =
78bdd1243dSDimitry Andric       AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());
79bdd1243dSDimitry Andric 
80bdd1243dSDimitry Andric   for (auto CompIdx : VOPD::COMPONENTS) {
81bdd1243dSDimitry Andric     const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82bdd1243dSDimitry Andric 
83bdd1243dSDimitry Andric     const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);
84bdd1243dSDimitry Andric     if (Src0.isReg()) {
85bdd1243dSDimitry Andric       if (!TRI->isVectorRegister(MRI, Src0.getReg())) {
86bdd1243dSDimitry Andric         if (!is_contained(UniqueScalarRegs, Src0.getReg()))
87bdd1243dSDimitry Andric           UniqueScalarRegs.push_back(Src0.getReg());
88753f127fSDimitry Andric       }
89753f127fSDimitry Andric     } else {
90bdd1243dSDimitry Andric       if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))
91bdd1243dSDimitry Andric         addLiteral(Src0);
92753f127fSDimitry Andric     }
93bdd1243dSDimitry Andric 
94bdd1243dSDimitry Andric     if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95bdd1243dSDimitry Andric       auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96bdd1243dSDimitry Andric       addLiteral(MI.getOperand(CompOprIdx));
97bdd1243dSDimitry Andric     }
98bdd1243dSDimitry Andric     if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))
99bdd1243dSDimitry Andric       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
100753f127fSDimitry Andric   }
101753f127fSDimitry Andric 
102753f127fSDimitry Andric   if (UniqueLiterals.size() > 1)
103753f127fSDimitry Andric     return false;
104753f127fSDimitry Andric   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105753f127fSDimitry Andric     return false;
106*5f757f3fSDimitry Andric 
107*5f757f3fSDimitry Andric   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108*5f757f3fSDimitry Andric   bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109*5f757f3fSDimitry Andric                  FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110*5f757f3fSDimitry Andric                  SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111*5f757f3fSDimitry Andric 
112*5f757f3fSDimitry Andric   if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))
113753f127fSDimitry Andric     return false;
114753f127fSDimitry Andric 
115753f127fSDimitry Andric   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116753f127fSDimitry Andric                     << "\n\tY: " << SecondMI << "\n");
117753f127fSDimitry Andric   return true;
118753f127fSDimitry Andric }
119753f127fSDimitry Andric 
120753f127fSDimitry Andric /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121753f127fSDimitry Andric /// together. Given SecondMI, when FirstMI is unspecified, then check if
122753f127fSDimitry Andric /// SecondMI may be part of a fused pair at all.
shouldScheduleVOPDAdjacent(const TargetInstrInfo & TII,const TargetSubtargetInfo & TSI,const MachineInstr * FirstMI,const MachineInstr & SecondMI)123753f127fSDimitry Andric static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
124753f127fSDimitry Andric                                        const TargetSubtargetInfo &TSI,
125753f127fSDimitry Andric                                        const MachineInstr *FirstMI,
126753f127fSDimitry Andric                                        const MachineInstr &SecondMI) {
127753f127fSDimitry Andric   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128753f127fSDimitry Andric   unsigned Opc2 = SecondMI.getOpcode();
129753f127fSDimitry Andric   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
130753f127fSDimitry Andric 
131753f127fSDimitry Andric   // One instruction case
132753f127fSDimitry Andric   if (!FirstMI)
133753f127fSDimitry Andric     return SecondCanBeVOPD.Y;
134753f127fSDimitry Andric 
135753f127fSDimitry Andric   unsigned Opc = FirstMI->getOpcode();
136753f127fSDimitry Andric   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137753f127fSDimitry Andric 
138753f127fSDimitry Andric   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
139753f127fSDimitry Andric         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140753f127fSDimitry Andric     return false;
141753f127fSDimitry Andric 
142753f127fSDimitry Andric   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
143753f127fSDimitry Andric }
144753f127fSDimitry Andric 
145bdd1243dSDimitry Andric namespace {
146753f127fSDimitry Andric /// Adapts design from MacroFusion
147753f127fSDimitry Andric /// Puts valid candidate instructions back-to-back so they can easily
148753f127fSDimitry Andric /// be turned into VOPD instructions
149753f127fSDimitry Andric /// Greedily pairs instruction candidates. O(n^2) algorithm.
150753f127fSDimitry Andric struct VOPDPairingMutation : ScheduleDAGMutation {
151*5f757f3fSDimitry Andric   MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152753f127fSDimitry Andric 
VOPDPairingMutation__anonbc456b870411::VOPDPairingMutation153753f127fSDimitry Andric   VOPDPairingMutation(
154*5f757f3fSDimitry Andric       MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
155753f127fSDimitry Andric       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
156753f127fSDimitry Andric 
apply__anonbc456b870411::VOPDPairingMutation157753f127fSDimitry Andric   void apply(ScheduleDAGInstrs *DAG) override {
158753f127fSDimitry Andric     const TargetInstrInfo &TII = *DAG->TII;
159753f127fSDimitry Andric     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160753f127fSDimitry Andric     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
161753f127fSDimitry Andric       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162753f127fSDimitry Andric       return;
163753f127fSDimitry Andric     }
164753f127fSDimitry Andric 
165753f127fSDimitry Andric     std::vector<SUnit>::iterator ISUI, JSUI;
166753f127fSDimitry Andric     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167753f127fSDimitry Andric       const MachineInstr *IMI = ISUI->getInstr();
168753f127fSDimitry Andric       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169753f127fSDimitry Andric         continue;
170753f127fSDimitry Andric       if (!hasLessThanNumFused(*ISUI, 2))
171753f127fSDimitry Andric         continue;
172753f127fSDimitry Andric 
173753f127fSDimitry Andric       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
174753f127fSDimitry Andric         if (JSUI->isBoundaryNode())
175753f127fSDimitry Andric           continue;
176753f127fSDimitry Andric         const MachineInstr *JMI = JSUI->getInstr();
177753f127fSDimitry Andric         if (!hasLessThanNumFused(*JSUI, 2) ||
178753f127fSDimitry Andric             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179753f127fSDimitry Andric           continue;
180753f127fSDimitry Andric         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
181753f127fSDimitry Andric           break;
182753f127fSDimitry Andric       }
183753f127fSDimitry Andric     }
184753f127fSDimitry Andric     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185753f127fSDimitry Andric   }
186753f127fSDimitry Andric };
187bdd1243dSDimitry Andric } // namespace
188753f127fSDimitry Andric 
createVOPDPairingMutation()189753f127fSDimitry Andric std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190753f127fSDimitry Andric   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
191753f127fSDimitry Andric }
192