1*753f127fSDimitry Andric //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// 2*753f127fSDimitry Andric // 3*753f127fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*753f127fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*753f127fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*753f127fSDimitry Andric // 7*753f127fSDimitry Andric //===----------------------------------------------------------------------===// 8*753f127fSDimitry Andric // 9*753f127fSDimitry Andric /// \file This file contains the AMDGPU DAG scheduling 10*753f127fSDimitry Andric /// mutation to pair VOPD instructions back to back. It also contains 11*753f127fSDimitry Andric // subroutines useful in the creation of VOPD instructions 12*753f127fSDimitry Andric // 13*753f127fSDimitry Andric //===----------------------------------------------------------------------===// 14*753f127fSDimitry Andric 15*753f127fSDimitry Andric #include "GCNVOPDUtils.h" 16*753f127fSDimitry Andric #include "AMDGPUSubtarget.h" 17*753f127fSDimitry Andric #include "GCNSubtarget.h" 18*753f127fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19*753f127fSDimitry Andric #include "SIInstrInfo.h" 20*753f127fSDimitry Andric #include "llvm/ADT/STLExtras.h" 21*753f127fSDimitry Andric #include "llvm/ADT/SmallVector.h" 22*753f127fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 23*753f127fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 24*753f127fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 25*753f127fSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 26*753f127fSDimitry Andric #include "llvm/CodeGen/MacroFusion.h" 27*753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 28*753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAGMutation.h" 29*753f127fSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 30*753f127fSDimitry Andric #include "llvm/MC/MCInst.h" 31*753f127fSDimitry Andric 32*753f127fSDimitry Andric using namespace llvm; 33*753f127fSDimitry Andric 34*753f127fSDimitry Andric #define DEBUG_TYPE "gcn-vopd-utils" 35*753f127fSDimitry Andric 36*753f127fSDimitry Andric bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, 37*753f127fSDimitry Andric const MachineInstr &FirstMI, 38*753f127fSDimitry Andric const MachineInstr &SecondMI) { 39*753f127fSDimitry Andric const MachineFunction *MF = FirstMI.getMF(); 40*753f127fSDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 41*753f127fSDimitry Andric const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); 42*753f127fSDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 43*753f127fSDimitry Andric const unsigned NumVGPRBanks = 4; 44*753f127fSDimitry Andric // Literals also count against scalar bus limit 45*753f127fSDimitry Andric SmallVector<const MachineOperand *> UniqueLiterals; 46*753f127fSDimitry Andric auto addLiteral = [&](const MachineOperand &Op) { 47*753f127fSDimitry Andric for (auto &Literal : UniqueLiterals) { 48*753f127fSDimitry Andric if (Literal->isIdenticalTo(Op)) 49*753f127fSDimitry Andric return; 50*753f127fSDimitry Andric } 51*753f127fSDimitry Andric UniqueLiterals.push_back(&Op); 52*753f127fSDimitry Andric }; 53*753f127fSDimitry Andric SmallVector<Register> UniqueScalarRegs; 54*753f127fSDimitry Andric assert([&]() -> bool { 55*753f127fSDimitry Andric for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); 56*753f127fSDimitry Andric MII != FirstMI.getParent()->instr_end(); ++MII) { 57*753f127fSDimitry Andric if (&*MII == &SecondMI) 58*753f127fSDimitry Andric return true; 59*753f127fSDimitry Andric } 60*753f127fSDimitry Andric return false; 61*753f127fSDimitry Andric }() && "Expected FirstMI to precede SecondMI"); 62*753f127fSDimitry Andric // Cannot pair dependent instructions 63*753f127fSDimitry Andric for (const auto &Use : SecondMI.uses()) 64*753f127fSDimitry Andric if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg())) 65*753f127fSDimitry Andric return false; 66*753f127fSDimitry Andric 67*753f127fSDimitry Andric struct ComponentInfo { 68*753f127fSDimitry Andric ComponentInfo(const MachineInstr &MI) : MI(MI) {} 69*753f127fSDimitry Andric Register Dst, Reg0, Reg1, Reg2; 70*753f127fSDimitry Andric const MachineInstr &MI; 71*753f127fSDimitry Andric }; 72*753f127fSDimitry Andric ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)}; 73*753f127fSDimitry Andric 74*753f127fSDimitry Andric for (ComponentInfo &Comp : CInfo) { 75*753f127fSDimitry Andric switch (Comp.MI.getOpcode()) { 76*753f127fSDimitry Andric case AMDGPU::V_FMAMK_F32: 77*753f127fSDimitry Andric // cannot inline the fixed literal in fmamk 78*753f127fSDimitry Andric addLiteral(Comp.MI.getOperand(2)); 79*753f127fSDimitry Andric Comp.Reg2 = Comp.MI.getOperand(3).getReg(); 80*753f127fSDimitry Andric break; 81*753f127fSDimitry Andric case AMDGPU::V_FMAAK_F32: 82*753f127fSDimitry Andric // cannot inline the fixed literal in fmaak 83*753f127fSDimitry Andric addLiteral(Comp.MI.getOperand(3)); 84*753f127fSDimitry Andric Comp.Reg1 = Comp.MI.getOperand(2).getReg(); 85*753f127fSDimitry Andric break; 86*753f127fSDimitry Andric case AMDGPU::V_FMAC_F32_e32: 87*753f127fSDimitry Andric case AMDGPU::V_DOT2_F32_F16: 88*753f127fSDimitry Andric case AMDGPU::V_DOT2_F32_BF16: 89*753f127fSDimitry Andric Comp.Reg1 = Comp.MI.getOperand(2).getReg(); 90*753f127fSDimitry Andric Comp.Reg2 = Comp.MI.getOperand(0).getReg(); 91*753f127fSDimitry Andric break; 92*753f127fSDimitry Andric case AMDGPU::V_CNDMASK_B32_e32: 93*753f127fSDimitry Andric UniqueScalarRegs.push_back(AMDGPU::VCC_LO); 94*753f127fSDimitry Andric Comp.Reg1 = Comp.MI.getOperand(2).getReg(); 95*753f127fSDimitry Andric break; 96*753f127fSDimitry Andric case AMDGPU::V_MOV_B32_e32: 97*753f127fSDimitry Andric break; 98*753f127fSDimitry Andric default: 99*753f127fSDimitry Andric Comp.Reg1 = Comp.MI.getOperand(2).getReg(); 100*753f127fSDimitry Andric break; 101*753f127fSDimitry Andric } 102*753f127fSDimitry Andric 103*753f127fSDimitry Andric Comp.Dst = Comp.MI.getOperand(0).getReg(); 104*753f127fSDimitry Andric 105*753f127fSDimitry Andric const MachineOperand &Op0 = Comp.MI.getOperand(1); 106*753f127fSDimitry Andric if (Op0.isReg()) { 107*753f127fSDimitry Andric if (!TRI->isVectorRegister(MRI, Op0.getReg())) { 108*753f127fSDimitry Andric if (!is_contained(UniqueScalarRegs, Op0.getReg())) 109*753f127fSDimitry Andric UniqueScalarRegs.push_back(Op0.getReg()); 110*753f127fSDimitry Andric } else 111*753f127fSDimitry Andric Comp.Reg0 = Op0.getReg(); 112*753f127fSDimitry Andric } else { 113*753f127fSDimitry Andric if (!TII.isInlineConstant(Comp.MI, 1)) 114*753f127fSDimitry Andric addLiteral(Op0); 115*753f127fSDimitry Andric } 116*753f127fSDimitry Andric } 117*753f127fSDimitry Andric 118*753f127fSDimitry Andric if (UniqueLiterals.size() > 1) 119*753f127fSDimitry Andric return false; 120*753f127fSDimitry Andric if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) 121*753f127fSDimitry Andric return false; 122*753f127fSDimitry Andric 123*753f127fSDimitry Andric // check port 0 124*753f127fSDimitry Andric if (CInfo[0].Reg0 && CInfo[1].Reg0 && 125*753f127fSDimitry Andric CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks) 126*753f127fSDimitry Andric return false; 127*753f127fSDimitry Andric // check port 1 128*753f127fSDimitry Andric if (CInfo[0].Reg1 && CInfo[1].Reg1 && 129*753f127fSDimitry Andric CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks) 130*753f127fSDimitry Andric return false; 131*753f127fSDimitry Andric // check port 2 132*753f127fSDimitry Andric if (CInfo[0].Reg2 && CInfo[1].Reg2 && 133*753f127fSDimitry Andric !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1)) 134*753f127fSDimitry Andric return false; 135*753f127fSDimitry Andric if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1)) 136*753f127fSDimitry Andric return false; 137*753f127fSDimitry Andric 138*753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI 139*753f127fSDimitry Andric << "\n\tY: " << SecondMI << "\n"); 140*753f127fSDimitry Andric return true; 141*753f127fSDimitry Andric } 142*753f127fSDimitry Andric 143*753f127fSDimitry Andric /// Check if the instr pair, FirstMI and SecondMI, should be scheduled 144*753f127fSDimitry Andric /// together. Given SecondMI, when FirstMI is unspecified, then check if 145*753f127fSDimitry Andric /// SecondMI may be part of a fused pair at all. 146*753f127fSDimitry Andric static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, 147*753f127fSDimitry Andric const TargetSubtargetInfo &TSI, 148*753f127fSDimitry Andric const MachineInstr *FirstMI, 149*753f127fSDimitry Andric const MachineInstr &SecondMI) { 150*753f127fSDimitry Andric const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); 151*753f127fSDimitry Andric unsigned Opc2 = SecondMI.getOpcode(); 152*753f127fSDimitry Andric auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); 153*753f127fSDimitry Andric 154*753f127fSDimitry Andric // One instruction case 155*753f127fSDimitry Andric if (!FirstMI) 156*753f127fSDimitry Andric return SecondCanBeVOPD.Y; 157*753f127fSDimitry Andric 158*753f127fSDimitry Andric unsigned Opc = FirstMI->getOpcode(); 159*753f127fSDimitry Andric auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); 160*753f127fSDimitry Andric 161*753f127fSDimitry Andric if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || 162*753f127fSDimitry Andric (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) 163*753f127fSDimitry Andric return false; 164*753f127fSDimitry Andric 165*753f127fSDimitry Andric return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); 166*753f127fSDimitry Andric } 167*753f127fSDimitry Andric 168*753f127fSDimitry Andric /// Adapts design from MacroFusion 169*753f127fSDimitry Andric /// Puts valid candidate instructions back-to-back so they can easily 170*753f127fSDimitry Andric /// be turned into VOPD instructions 171*753f127fSDimitry Andric /// Greedily pairs instruction candidates. O(n^2) algorithm. 172*753f127fSDimitry Andric struct VOPDPairingMutation : ScheduleDAGMutation { 173*753f127fSDimitry Andric ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer 174*753f127fSDimitry Andric 175*753f127fSDimitry Andric VOPDPairingMutation( 176*753f127fSDimitry Andric ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer 177*753f127fSDimitry Andric : shouldScheduleAdjacent(shouldScheduleAdjacent) {} 178*753f127fSDimitry Andric 179*753f127fSDimitry Andric void apply(ScheduleDAGInstrs *DAG) override { 180*753f127fSDimitry Andric const TargetInstrInfo &TII = *DAG->TII; 181*753f127fSDimitry Andric const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); 182*753f127fSDimitry Andric if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { 183*753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); 184*753f127fSDimitry Andric return; 185*753f127fSDimitry Andric } 186*753f127fSDimitry Andric 187*753f127fSDimitry Andric std::vector<SUnit>::iterator ISUI, JSUI; 188*753f127fSDimitry Andric for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { 189*753f127fSDimitry Andric const MachineInstr *IMI = ISUI->getInstr(); 190*753f127fSDimitry Andric if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) 191*753f127fSDimitry Andric continue; 192*753f127fSDimitry Andric if (!hasLessThanNumFused(*ISUI, 2)) 193*753f127fSDimitry Andric continue; 194*753f127fSDimitry Andric 195*753f127fSDimitry Andric for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { 196*753f127fSDimitry Andric if (JSUI->isBoundaryNode()) 197*753f127fSDimitry Andric continue; 198*753f127fSDimitry Andric const MachineInstr *JMI = JSUI->getInstr(); 199*753f127fSDimitry Andric if (!hasLessThanNumFused(*JSUI, 2) || 200*753f127fSDimitry Andric !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) 201*753f127fSDimitry Andric continue; 202*753f127fSDimitry Andric if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) 203*753f127fSDimitry Andric break; 204*753f127fSDimitry Andric } 205*753f127fSDimitry Andric } 206*753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); 207*753f127fSDimitry Andric } 208*753f127fSDimitry Andric }; 209*753f127fSDimitry Andric 210*753f127fSDimitry Andric std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { 211*753f127fSDimitry Andric return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); 212*753f127fSDimitry Andric } 213