1349cc55cSDimitry Andric //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric /// \file 9349cc55cSDimitry Andric /// 10349cc55cSDimitry Andric /// This file implements methods from the AMDGPUCustomBehaviour class. 11349cc55cSDimitry Andric /// 12349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 13349cc55cSDimitry Andric 14349cc55cSDimitry Andric #include "AMDGPUCustomBehaviour.h" 15349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16349cc55cSDimitry Andric #include "TargetInfo/AMDGPUTargetInfo.h" 17*0fca6ea1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 18349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 19349cc55cSDimitry Andric #include "llvm/Support/WithColor.h" 20349cc55cSDimitry Andric 21*0fca6ea1SDimitry Andric namespace llvm::mca { 22349cc55cSDimitry Andric 23349cc55cSDimitry Andric void AMDGPUInstrPostProcess::postProcessInstruction( 24349cc55cSDimitry Andric std::unique_ptr<Instruction> &Inst, const MCInst &MCI) { 25349cc55cSDimitry Andric switch (MCI.getOpcode()) { 26349cc55cSDimitry Andric case AMDGPU::S_WAITCNT: 275f757f3fSDimitry Andric case AMDGPU::S_WAITCNT_soft: 28349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 29349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 30349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 31349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 325f757f3fSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_soft: 33349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 34349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 35349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT_gfx10: 36349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_gfx10: 37349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx10: 38349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx6_gfx7: 39349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_vi: 40349cc55cSDimitry Andric return processWaitCnt(Inst, MCI); 41349cc55cSDimitry Andric } 42349cc55cSDimitry Andric } 43349cc55cSDimitry Andric 44349cc55cSDimitry Andric // s_waitcnt instructions encode important information as immediate operands 45349cc55cSDimitry Andric // which are lost during the MCInst -> mca::Instruction lowering. 46349cc55cSDimitry Andric void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst, 47349cc55cSDimitry Andric const MCInst &MCI) { 48349cc55cSDimitry Andric for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) { 49349cc55cSDimitry Andric MCAOperand Op; 50349cc55cSDimitry Andric const MCOperand &MCOp = MCI.getOperand(Idx); 51349cc55cSDimitry Andric if (MCOp.isReg()) { 52349cc55cSDimitry Andric Op = MCAOperand::createReg(MCOp.getReg()); 53349cc55cSDimitry Andric } else if (MCOp.isImm()) { 54349cc55cSDimitry Andric Op = MCAOperand::createImm(MCOp.getImm()); 55349cc55cSDimitry Andric } 56349cc55cSDimitry Andric Op.setIndex(Idx); 57349cc55cSDimitry Andric Inst->addOperand(Op); 58349cc55cSDimitry Andric } 59349cc55cSDimitry Andric } 60349cc55cSDimitry Andric 61349cc55cSDimitry Andric AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, 62349cc55cSDimitry Andric const mca::SourceMgr &SrcMgr, 63349cc55cSDimitry Andric const MCInstrInfo &MCII) 64349cc55cSDimitry Andric : CustomBehaviour(STI, SrcMgr, MCII) { 65349cc55cSDimitry Andric generateWaitCntInfo(); 66349cc55cSDimitry Andric } 67349cc55cSDimitry Andric 68349cc55cSDimitry Andric unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst, 69349cc55cSDimitry Andric const InstRef &IR) { 70349cc55cSDimitry Andric const Instruction &Inst = *IR.getInstruction(); 71349cc55cSDimitry Andric unsigned Opcode = Inst.getOpcode(); 72349cc55cSDimitry Andric 73349cc55cSDimitry Andric // llvm-mca is generally run on fully compiled assembly so we wouldn't see any 74349cc55cSDimitry Andric // pseudo instructions here. However, there are plans for the future to make 75349cc55cSDimitry Andric // it possible to use mca within backend passes. As such, I have left the 76349cc55cSDimitry Andric // pseudo version of s_waitcnt within this switch statement. 77349cc55cSDimitry Andric switch (Opcode) { 78349cc55cSDimitry Andric default: 79349cc55cSDimitry Andric return 0; 80349cc55cSDimitry Andric case AMDGPU::S_WAITCNT: // This instruction 815f757f3fSDimitry Andric case AMDGPU::S_WAITCNT_soft: 82349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT: 83349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT: 84349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT: 855f757f3fSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT: 865f757f3fSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo. 87349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 88349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 89349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT_gfx10: 90349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_gfx10: 91349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx10: 92349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx6_gfx7: 93349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_vi: 94349cc55cSDimitry Andric // s_endpgm also behaves as if there is an implicit 95349cc55cSDimitry Andric // s_waitcnt 0, but I'm not sure if it would be appropriate 96349cc55cSDimitry Andric // to model this in llvm-mca based on how the iterations work 97349cc55cSDimitry Andric // while simulating the pipeline over and over. 98349cc55cSDimitry Andric return handleWaitCnt(IssuedInst, IR); 99349cc55cSDimitry Andric } 100349cc55cSDimitry Andric 101349cc55cSDimitry Andric return 0; 102349cc55cSDimitry Andric } 103349cc55cSDimitry Andric 104349cc55cSDimitry Andric unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst, 105349cc55cSDimitry Andric const InstRef &IR) { 106349cc55cSDimitry Andric // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr. 107349cc55cSDimitry Andric // I do not know how that instruction works so I did not attempt to model it. 108349cc55cSDimitry Andric // set the max values to begin 109349cc55cSDimitry Andric unsigned Vmcnt = 63; 110349cc55cSDimitry Andric unsigned Expcnt = 7; 111349cc55cSDimitry Andric unsigned Lgkmcnt = 31; 112349cc55cSDimitry Andric unsigned Vscnt = 63; 113349cc55cSDimitry Andric unsigned CurrVmcnt = 0; 114349cc55cSDimitry Andric unsigned CurrExpcnt = 0; 115349cc55cSDimitry Andric unsigned CurrLgkmcnt = 0; 116349cc55cSDimitry Andric unsigned CurrVscnt = 0; 117349cc55cSDimitry Andric unsigned CyclesToWaitVm = ~0U; 118349cc55cSDimitry Andric unsigned CyclesToWaitExp = ~0U; 119349cc55cSDimitry Andric unsigned CyclesToWaitLgkm = ~0U; 120349cc55cSDimitry Andric unsigned CyclesToWaitVs = ~0U; 121349cc55cSDimitry Andric 122349cc55cSDimitry Andric computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt); 123349cc55cSDimitry Andric 124349cc55cSDimitry Andric // We will now look at each of the currently executing instructions 125349cc55cSDimitry Andric // to find out if this wait instruction still needs to wait. 1260eae32dcSDimitry Andric for (const InstRef &PrevIR : IssuedInst) { 127349cc55cSDimitry Andric const Instruction &PrevInst = *PrevIR.getInstruction(); 128349cc55cSDimitry Andric const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size(); 129349cc55cSDimitry Andric const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex]; 130349cc55cSDimitry Andric const int CyclesLeft = PrevInst.getCyclesLeft(); 131349cc55cSDimitry Andric assert(CyclesLeft != UNKNOWN_CYCLES && 132349cc55cSDimitry Andric "We should know how many cycles are left for this instruction"); 133349cc55cSDimitry Andric if (PrevInstWaitInfo.VmCnt) { 134349cc55cSDimitry Andric CurrVmcnt++; 135349cc55cSDimitry Andric if ((unsigned)CyclesLeft < CyclesToWaitVm) 136349cc55cSDimitry Andric CyclesToWaitVm = CyclesLeft; 137349cc55cSDimitry Andric } 138349cc55cSDimitry Andric if (PrevInstWaitInfo.ExpCnt) { 139349cc55cSDimitry Andric CurrExpcnt++; 140349cc55cSDimitry Andric if ((unsigned)CyclesLeft < CyclesToWaitExp) 141349cc55cSDimitry Andric CyclesToWaitExp = CyclesLeft; 142349cc55cSDimitry Andric } 143349cc55cSDimitry Andric if (PrevInstWaitInfo.LgkmCnt) { 144349cc55cSDimitry Andric CurrLgkmcnt++; 145349cc55cSDimitry Andric if ((unsigned)CyclesLeft < CyclesToWaitLgkm) 146349cc55cSDimitry Andric CyclesToWaitLgkm = CyclesLeft; 147349cc55cSDimitry Andric } 148349cc55cSDimitry Andric if (PrevInstWaitInfo.VsCnt) { 149349cc55cSDimitry Andric CurrVscnt++; 150349cc55cSDimitry Andric if ((unsigned)CyclesLeft < CyclesToWaitVs) 151349cc55cSDimitry Andric CyclesToWaitVs = CyclesLeft; 152349cc55cSDimitry Andric } 153349cc55cSDimitry Andric } 154349cc55cSDimitry Andric 155349cc55cSDimitry Andric unsigned CyclesToWait = ~0U; 156349cc55cSDimitry Andric if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait) 157349cc55cSDimitry Andric CyclesToWait = CyclesToWaitVm; 158349cc55cSDimitry Andric if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait) 159349cc55cSDimitry Andric CyclesToWait = CyclesToWaitExp; 160349cc55cSDimitry Andric if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait) 161349cc55cSDimitry Andric CyclesToWait = CyclesToWaitLgkm; 162349cc55cSDimitry Andric if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait) 163349cc55cSDimitry Andric CyclesToWait = CyclesToWaitVs; 164349cc55cSDimitry Andric 165349cc55cSDimitry Andric // We may underestimate how many cycles we need to wait, but this 166349cc55cSDimitry Andric // isn't a big deal. Our return value is just how many cycles until 167349cc55cSDimitry Andric // this function gets run again. So as long as we don't overestimate 168349cc55cSDimitry Andric // the wait time, we'll still end up stalling at this instruction 169349cc55cSDimitry Andric // for the correct number of cycles. 170349cc55cSDimitry Andric 171349cc55cSDimitry Andric if (CyclesToWait == ~0U) 172349cc55cSDimitry Andric return 0; 173349cc55cSDimitry Andric return CyclesToWait; 174349cc55cSDimitry Andric } 175349cc55cSDimitry Andric 176349cc55cSDimitry Andric void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt, 177349cc55cSDimitry Andric unsigned &Expcnt, unsigned &Lgkmcnt, 178349cc55cSDimitry Andric unsigned &Vscnt) { 179349cc55cSDimitry Andric AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU()); 180349cc55cSDimitry Andric const Instruction &Inst = *IR.getInstruction(); 181349cc55cSDimitry Andric unsigned Opcode = Inst.getOpcode(); 182349cc55cSDimitry Andric 183349cc55cSDimitry Andric switch (Opcode) { 184349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 185349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 186349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT_gfx10: 187349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_gfx10: { 188349cc55cSDimitry Andric // Should probably be checking for nullptr 189349cc55cSDimitry Andric // here, but I'm not sure how I should handle the case 190349cc55cSDimitry Andric // where we see a nullptr. 191349cc55cSDimitry Andric const MCAOperand *OpReg = Inst.getOperand(0); 192349cc55cSDimitry Andric const MCAOperand *OpImm = Inst.getOperand(1); 193349cc55cSDimitry Andric assert(OpReg && OpReg->isReg() && "First operand should be a register."); 194349cc55cSDimitry Andric assert(OpImm && OpImm->isImm() && "Second operand should be an immediate."); 195349cc55cSDimitry Andric if (OpReg->getReg() != AMDGPU::SGPR_NULL) { 196349cc55cSDimitry Andric // Instruction is using a real register. 197349cc55cSDimitry Andric // Since we can't know what value this register will have, 198349cc55cSDimitry Andric // we can't compute what the value of this wait should be. 199349cc55cSDimitry Andric WithColor::warning() << "The register component of " 200349cc55cSDimitry Andric << MCII.getName(Opcode) << " will be completely " 201349cc55cSDimitry Andric << "ignored. So the wait may not be accurate.\n"; 202349cc55cSDimitry Andric } 203349cc55cSDimitry Andric switch (Opcode) { 204349cc55cSDimitry Andric // Redundant switch so I don't have to repeat the code above 205349cc55cSDimitry Andric // for each case. There are more clever ways to avoid this 206349cc55cSDimitry Andric // extra switch and anyone can feel free to implement one of them. 207349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 208349cc55cSDimitry Andric Expcnt = OpImm->getImm(); 209349cc55cSDimitry Andric break; 210349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 211349cc55cSDimitry Andric Lgkmcnt = OpImm->getImm(); 212349cc55cSDimitry Andric break; 213349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VMCNT_gfx10: 214349cc55cSDimitry Andric Vmcnt = OpImm->getImm(); 215349cc55cSDimitry Andric break; 216349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_VSCNT_gfx10: 217349cc55cSDimitry Andric Vscnt = OpImm->getImm(); 218349cc55cSDimitry Andric break; 219349cc55cSDimitry Andric } 220349cc55cSDimitry Andric return; 221349cc55cSDimitry Andric } 222349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx10: 223349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_gfx6_gfx7: 224349cc55cSDimitry Andric case AMDGPU::S_WAITCNT_vi: 225349cc55cSDimitry Andric unsigned WaitCnt = Inst.getOperand(0)->getImm(); 226349cc55cSDimitry Andric AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt); 227349cc55cSDimitry Andric return; 228349cc55cSDimitry Andric } 229349cc55cSDimitry Andric } 230349cc55cSDimitry Andric 231349cc55cSDimitry Andric void AMDGPUCustomBehaviour::generateWaitCntInfo() { 232349cc55cSDimitry Andric // The core logic from this function is taken from 233349cc55cSDimitry Andric // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions 234349cc55cSDimitry Andric // that are being looked at are in the MachineInstr format, whereas we have 235349cc55cSDimitry Andric // access to the MCInst format. The side effects of this are that we can't use 236349cc55cSDimitry Andric // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst) 237349cc55cSDimitry Andric // functions. Therefore, we conservatively assume that these functions will 238349cc55cSDimitry Andric // return true. This may cause a few instructions to be incorrectly tagged 239349cc55cSDimitry Andric // with an extra CNT. However, these are instructions that do interact with at 240349cc55cSDimitry Andric // least one CNT so giving them an extra CNT shouldn't cause issues in most 241349cc55cSDimitry Andric // scenarios. 242349cc55cSDimitry Andric AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU()); 243349cc55cSDimitry Andric InstrWaitCntInfo.resize(SrcMgr.size()); 244349cc55cSDimitry Andric 24581ad6265SDimitry Andric for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) { 24681ad6265SDimitry Andric const std::unique_ptr<Instruction> &Inst = EN.value(); 24781ad6265SDimitry Andric unsigned Index = EN.index(); 248349cc55cSDimitry Andric unsigned Opcode = Inst->getOpcode(); 249349cc55cSDimitry Andric const MCInstrDesc &MCID = MCII.get(Opcode); 250349cc55cSDimitry Andric if ((MCID.TSFlags & SIInstrFlags::DS) && 251349cc55cSDimitry Andric (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) { 252349cc55cSDimitry Andric InstrWaitCntInfo[Index].LgkmCnt = true; 253349cc55cSDimitry Andric if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds)) 254349cc55cSDimitry Andric InstrWaitCntInfo[Index].ExpCnt = true; 255349cc55cSDimitry Andric } else if (MCID.TSFlags & SIInstrFlags::FLAT) { 256349cc55cSDimitry Andric // We conservatively assume that mayAccessVMEMThroughFlat(Inst) 257349cc55cSDimitry Andric // and mayAccessLDSThroughFlat(Inst) would both return true for this 258349cc55cSDimitry Andric // instruction. We have to do this because those functions use 259349cc55cSDimitry Andric // information about the memory operands that we don't have access to. 260349cc55cSDimitry Andric InstrWaitCntInfo[Index].LgkmCnt = true; 261349cc55cSDimitry Andric if (!STI.hasFeature(AMDGPU::FeatureVscnt)) 262349cc55cSDimitry Andric InstrWaitCntInfo[Index].VmCnt = true; 263349cc55cSDimitry Andric else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) 264349cc55cSDimitry Andric InstrWaitCntInfo[Index].VmCnt = true; 265349cc55cSDimitry Andric else 266349cc55cSDimitry Andric InstrWaitCntInfo[Index].VsCnt = true; 267349cc55cSDimitry Andric } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) { 268349cc55cSDimitry Andric if (!STI.hasFeature(AMDGPU::FeatureVscnt)) 269349cc55cSDimitry Andric InstrWaitCntInfo[Index].VmCnt = true; 270349cc55cSDimitry Andric else if ((MCID.mayLoad() && 271349cc55cSDimitry Andric !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) || 272349cc55cSDimitry Andric ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() && 273349cc55cSDimitry Andric !MCID.mayStore())) 274349cc55cSDimitry Andric InstrWaitCntInfo[Index].VmCnt = true; 275349cc55cSDimitry Andric else if (MCID.mayStore()) 276349cc55cSDimitry Andric InstrWaitCntInfo[Index].VsCnt = true; 277349cc55cSDimitry Andric 278349cc55cSDimitry Andric // (IV.Major < 7) is meant to represent 279349cc55cSDimitry Andric // GCNTarget.vmemWriteNeedsExpWaitcnt() 280349cc55cSDimitry Andric // which is defined as 281349cc55cSDimitry Andric // { return getGeneration() < SEA_ISLANDS; } 282349cc55cSDimitry Andric if (IV.Major < 7 && 283349cc55cSDimitry Andric (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet))) 284349cc55cSDimitry Andric InstrWaitCntInfo[Index].ExpCnt = true; 285349cc55cSDimitry Andric } else if (MCID.TSFlags & SIInstrFlags::SMRD) { 286349cc55cSDimitry Andric InstrWaitCntInfo[Index].LgkmCnt = true; 287349cc55cSDimitry Andric } else if (MCID.TSFlags & SIInstrFlags::EXP) { 288349cc55cSDimitry Andric InstrWaitCntInfo[Index].ExpCnt = true; 289349cc55cSDimitry Andric } else { 290349cc55cSDimitry Andric switch (Opcode) { 291349cc55cSDimitry Andric case AMDGPU::S_SENDMSG: 292349cc55cSDimitry Andric case AMDGPU::S_SENDMSGHALT: 293349cc55cSDimitry Andric case AMDGPU::S_MEMTIME: 294349cc55cSDimitry Andric case AMDGPU::S_MEMREALTIME: 295349cc55cSDimitry Andric InstrWaitCntInfo[Index].LgkmCnt = true; 296349cc55cSDimitry Andric break; 297349cc55cSDimitry Andric } 298349cc55cSDimitry Andric } 299349cc55cSDimitry Andric } 300349cc55cSDimitry Andric } 301349cc55cSDimitry Andric 302349cc55cSDimitry Andric // taken from SIInstrInfo::isVMEM() 303349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) { 304349cc55cSDimitry Andric return MCID.TSFlags & SIInstrFlags::MUBUF || 305349cc55cSDimitry Andric MCID.TSFlags & SIInstrFlags::MTBUF || 306349cc55cSDimitry Andric MCID.TSFlags & SIInstrFlags::MIMG; 307349cc55cSDimitry Andric } 308349cc55cSDimitry Andric 309349cc55cSDimitry Andric // taken from SIInstrInfo::hasModifiersSet() 310349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::hasModifiersSet( 311349cc55cSDimitry Andric const std::unique_ptr<Instruction> &Inst, unsigned OpName) const { 312349cc55cSDimitry Andric int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName); 313349cc55cSDimitry Andric if (Idx == -1) 314349cc55cSDimitry Andric return false; 315349cc55cSDimitry Andric 316349cc55cSDimitry Andric const MCAOperand *Op = Inst->getOperand(Idx); 317349cc55cSDimitry Andric if (Op == nullptr || !Op->isImm() || !Op->getImm()) 318349cc55cSDimitry Andric return false; 319349cc55cSDimitry Andric 320349cc55cSDimitry Andric return true; 321349cc55cSDimitry Andric } 322349cc55cSDimitry Andric 3235f757f3fSDimitry Andric // taken from SIInstrInfo::isGWS() 3245f757f3fSDimitry Andric bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const { 3255f757f3fSDimitry Andric const MCInstrDesc &MCID = MCII.get(Opcode); 3265f757f3fSDimitry Andric return MCID.TSFlags & SIInstrFlags::GWS; 3275f757f3fSDimitry Andric } 3285f757f3fSDimitry Andric 329349cc55cSDimitry Andric // taken from SIInstrInfo::isAlwaysGDS() 330349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const { 3315f757f3fSDimitry Andric return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); 332349cc55cSDimitry Andric } 333349cc55cSDimitry Andric 334*0fca6ea1SDimitry Andric } // namespace llvm::mca 335349cc55cSDimitry Andric 336349cc55cSDimitry Andric using namespace llvm; 337349cc55cSDimitry Andric using namespace mca; 338349cc55cSDimitry Andric 339349cc55cSDimitry Andric static CustomBehaviour * 340349cc55cSDimitry Andric createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, 341349cc55cSDimitry Andric const mca::SourceMgr &SrcMgr, 342349cc55cSDimitry Andric const MCInstrInfo &MCII) { 343349cc55cSDimitry Andric return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII); 344349cc55cSDimitry Andric } 345349cc55cSDimitry Andric 346349cc55cSDimitry Andric static InstrPostProcess * 347349cc55cSDimitry Andric createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, 348349cc55cSDimitry Andric const MCInstrInfo &MCII) { 349349cc55cSDimitry Andric return new AMDGPUInstrPostProcess(STI, MCII); 350349cc55cSDimitry Andric } 351349cc55cSDimitry Andric 352349cc55cSDimitry Andric /// Extern function to initialize the targets for the AMDGPU backend 353349cc55cSDimitry Andric 354349cc55cSDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() { 35506c3fb27SDimitry Andric TargetRegistry::RegisterCustomBehaviour(getTheR600Target(), 356349cc55cSDimitry Andric createAMDGPUCustomBehaviour); 35706c3fb27SDimitry Andric TargetRegistry::RegisterInstrPostProcess(getTheR600Target(), 358349cc55cSDimitry Andric createAMDGPUInstrPostProcess); 359349cc55cSDimitry Andric 360349cc55cSDimitry Andric TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(), 361349cc55cSDimitry Andric createAMDGPUCustomBehaviour); 362349cc55cSDimitry Andric TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(), 363349cc55cSDimitry Andric createAMDGPUInstrPostProcess); 364349cc55cSDimitry Andric } 365