1753f127fSDimitry Andric //===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===// 2753f127fSDimitry Andric // 3753f127fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4753f127fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5753f127fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6753f127fSDimitry Andric // 7753f127fSDimitry Andric //===----------------------------------------------------------------------===// 8753f127fSDimitry Andric // 9753f127fSDimitry Andric /// \file This file contains the AMDGPU DAG scheduling 10753f127fSDimitry Andric /// mutation to pair VOPD instructions back to back. It also contains 11753f127fSDimitry Andric // subroutines useful in the creation of VOPD instructions 12753f127fSDimitry Andric // 13753f127fSDimitry Andric //===----------------------------------------------------------------------===// 14753f127fSDimitry Andric 15753f127fSDimitry Andric #include "GCNVOPDUtils.h" 16753f127fSDimitry Andric #include "AMDGPUSubtarget.h" 17753f127fSDimitry Andric #include "GCNSubtarget.h" 18753f127fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19753f127fSDimitry Andric #include "SIInstrInfo.h" 20*bdd1243dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 21753f127fSDimitry Andric #include "llvm/ADT/STLExtras.h" 22753f127fSDimitry Andric #include "llvm/ADT/SmallVector.h" 23753f127fSDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 24753f127fSDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 25753f127fSDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 26753f127fSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 27753f127fSDimitry Andric #include "llvm/CodeGen/MacroFusion.h" 28753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 29753f127fSDimitry Andric #include "llvm/CodeGen/ScheduleDAGMutation.h" 30753f127fSDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 31753f127fSDimitry Andric #include "llvm/MC/MCInst.h" 32753f127fSDimitry Andric 33753f127fSDimitry Andric using namespace llvm; 34753f127fSDimitry Andric 35753f127fSDimitry Andric #define DEBUG_TYPE "gcn-vopd-utils" 36753f127fSDimitry Andric 37753f127fSDimitry Andric bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII, 38753f127fSDimitry Andric const MachineInstr &FirstMI, 39753f127fSDimitry Andric const MachineInstr &SecondMI) { 40*bdd1243dSDimitry Andric namespace VOPD = AMDGPU::VOPD; 41*bdd1243dSDimitry Andric 42753f127fSDimitry Andric const MachineFunction *MF = FirstMI.getMF(); 43753f127fSDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 44753f127fSDimitry Andric const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo()); 45753f127fSDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 46753f127fSDimitry Andric // Literals also count against scalar bus limit 47753f127fSDimitry Andric SmallVector<const MachineOperand *> UniqueLiterals; 48753f127fSDimitry Andric auto addLiteral = [&](const MachineOperand &Op) { 49753f127fSDimitry Andric for (auto &Literal : UniqueLiterals) { 50753f127fSDimitry Andric if (Literal->isIdenticalTo(Op)) 51753f127fSDimitry Andric return; 52753f127fSDimitry Andric } 53753f127fSDimitry Andric UniqueLiterals.push_back(&Op); 54753f127fSDimitry Andric }; 55753f127fSDimitry Andric SmallVector<Register> UniqueScalarRegs; 56753f127fSDimitry Andric assert([&]() -> bool { 57753f127fSDimitry Andric for (auto MII = MachineBasicBlock::const_iterator(&FirstMI); 58753f127fSDimitry Andric MII != FirstMI.getParent()->instr_end(); ++MII) { 59753f127fSDimitry Andric if (&*MII == &SecondMI) 60753f127fSDimitry Andric return true; 61753f127fSDimitry Andric } 62753f127fSDimitry Andric return false; 63753f127fSDimitry Andric }() && "Expected FirstMI to precede SecondMI"); 64753f127fSDimitry Andric // Cannot pair dependent instructions 65753f127fSDimitry Andric for (const auto &Use : SecondMI.uses()) 66753f127fSDimitry Andric if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg())) 67753f127fSDimitry Andric return false; 68753f127fSDimitry Andric 69*bdd1243dSDimitry Andric auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) { 70*bdd1243dSDimitry Andric const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI; 71*bdd1243dSDimitry Andric const MachineOperand &Operand = MI.getOperand(OperandIdx); 72*bdd1243dSDimitry Andric if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg())) 73*bdd1243dSDimitry Andric return Operand.getReg(); 74*bdd1243dSDimitry Andric return Register(); 75753f127fSDimitry Andric }; 76753f127fSDimitry Andric 77*bdd1243dSDimitry Andric auto InstInfo = 78*bdd1243dSDimitry Andric AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc()); 79*bdd1243dSDimitry Andric 80*bdd1243dSDimitry Andric for (auto CompIdx : VOPD::COMPONENTS) { 81*bdd1243dSDimitry Andric const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI; 82*bdd1243dSDimitry Andric 83*bdd1243dSDimitry Andric const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0); 84*bdd1243dSDimitry Andric if (Src0.isReg()) { 85*bdd1243dSDimitry Andric if (!TRI->isVectorRegister(MRI, Src0.getReg())) { 86*bdd1243dSDimitry Andric if (!is_contained(UniqueScalarRegs, Src0.getReg())) 87*bdd1243dSDimitry Andric UniqueScalarRegs.push_back(Src0.getReg()); 88753f127fSDimitry Andric } 89753f127fSDimitry Andric } else { 90*bdd1243dSDimitry Andric if (!TII.isInlineConstant(MI, VOPD::Component::SRC0)) 91*bdd1243dSDimitry Andric addLiteral(Src0); 92753f127fSDimitry Andric } 93*bdd1243dSDimitry Andric 94*bdd1243dSDimitry Andric if (InstInfo[CompIdx].hasMandatoryLiteral()) { 95*bdd1243dSDimitry Andric auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex(); 96*bdd1243dSDimitry Andric addLiteral(MI.getOperand(CompOprIdx)); 97*bdd1243dSDimitry Andric } 98*bdd1243dSDimitry Andric if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC)) 99*bdd1243dSDimitry Andric UniqueScalarRegs.push_back(AMDGPU::VCC_LO); 100753f127fSDimitry Andric } 101753f127fSDimitry Andric 102753f127fSDimitry Andric if (UniqueLiterals.size() > 1) 103753f127fSDimitry Andric return false; 104753f127fSDimitry Andric if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2) 105753f127fSDimitry Andric return false; 106*bdd1243dSDimitry Andric if (InstInfo.hasInvalidOperand(getVRegIdx)) 107753f127fSDimitry Andric return false; 108753f127fSDimitry Andric 109753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI 110753f127fSDimitry Andric << "\n\tY: " << SecondMI << "\n"); 111753f127fSDimitry Andric return true; 112753f127fSDimitry Andric } 113753f127fSDimitry Andric 114753f127fSDimitry Andric /// Check if the instr pair, FirstMI and SecondMI, should be scheduled 115753f127fSDimitry Andric /// together. Given SecondMI, when FirstMI is unspecified, then check if 116753f127fSDimitry Andric /// SecondMI may be part of a fused pair at all. 117753f127fSDimitry Andric static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, 118753f127fSDimitry Andric const TargetSubtargetInfo &TSI, 119753f127fSDimitry Andric const MachineInstr *FirstMI, 120753f127fSDimitry Andric const MachineInstr &SecondMI) { 121753f127fSDimitry Andric const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII); 122753f127fSDimitry Andric unsigned Opc2 = SecondMI.getOpcode(); 123753f127fSDimitry Andric auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); 124753f127fSDimitry Andric 125753f127fSDimitry Andric // One instruction case 126753f127fSDimitry Andric if (!FirstMI) 127753f127fSDimitry Andric return SecondCanBeVOPD.Y; 128753f127fSDimitry Andric 129753f127fSDimitry Andric unsigned Opc = FirstMI->getOpcode(); 130753f127fSDimitry Andric auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); 131753f127fSDimitry Andric 132753f127fSDimitry Andric if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) || 133753f127fSDimitry Andric (FirstCanBeVOPD.Y && SecondCanBeVOPD.X))) 134753f127fSDimitry Andric return false; 135753f127fSDimitry Andric 136753f127fSDimitry Andric return checkVOPDRegConstraints(STII, *FirstMI, SecondMI); 137753f127fSDimitry Andric } 138753f127fSDimitry Andric 139*bdd1243dSDimitry Andric namespace { 140753f127fSDimitry Andric /// Adapts design from MacroFusion 141753f127fSDimitry Andric /// Puts valid candidate instructions back-to-back so they can easily 142753f127fSDimitry Andric /// be turned into VOPD instructions 143753f127fSDimitry Andric /// Greedily pairs instruction candidates. O(n^2) algorithm. 144753f127fSDimitry Andric struct VOPDPairingMutation : ScheduleDAGMutation { 145753f127fSDimitry Andric ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer 146753f127fSDimitry Andric 147753f127fSDimitry Andric VOPDPairingMutation( 148753f127fSDimitry Andric ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer 149753f127fSDimitry Andric : shouldScheduleAdjacent(shouldScheduleAdjacent) {} 150753f127fSDimitry Andric 151753f127fSDimitry Andric void apply(ScheduleDAGInstrs *DAG) override { 152753f127fSDimitry Andric const TargetInstrInfo &TII = *DAG->TII; 153753f127fSDimitry Andric const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>(); 154753f127fSDimitry Andric if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) { 155753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n"); 156753f127fSDimitry Andric return; 157753f127fSDimitry Andric } 158753f127fSDimitry Andric 159753f127fSDimitry Andric std::vector<SUnit>::iterator ISUI, JSUI; 160753f127fSDimitry Andric for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) { 161753f127fSDimitry Andric const MachineInstr *IMI = ISUI->getInstr(); 162753f127fSDimitry Andric if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI)) 163753f127fSDimitry Andric continue; 164753f127fSDimitry Andric if (!hasLessThanNumFused(*ISUI, 2)) 165753f127fSDimitry Andric continue; 166753f127fSDimitry Andric 167753f127fSDimitry Andric for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) { 168753f127fSDimitry Andric if (JSUI->isBoundaryNode()) 169753f127fSDimitry Andric continue; 170753f127fSDimitry Andric const MachineInstr *JMI = JSUI->getInstr(); 171753f127fSDimitry Andric if (!hasLessThanNumFused(*JSUI, 2) || 172753f127fSDimitry Andric !shouldScheduleAdjacent(TII, ST, IMI, *JMI)) 173753f127fSDimitry Andric continue; 174753f127fSDimitry Andric if (fuseInstructionPair(*DAG, *ISUI, *JSUI)) 175753f127fSDimitry Andric break; 176753f127fSDimitry Andric } 177753f127fSDimitry Andric } 178753f127fSDimitry Andric LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n"); 179753f127fSDimitry Andric } 180753f127fSDimitry Andric }; 181*bdd1243dSDimitry Andric } // namespace 182753f127fSDimitry Andric 183753f127fSDimitry Andric std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() { 184753f127fSDimitry Andric return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent); 185753f127fSDimitry Andric } 186