1dbed061bSPatrick Holland //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===// 2dbed061bSPatrick Holland // 3dbed061bSPatrick Holland // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4dbed061bSPatrick Holland // See https://llvm.org/LICENSE.txt for license information. 5dbed061bSPatrick Holland // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6dbed061bSPatrick Holland // 7dbed061bSPatrick Holland //===----------------------------------------------------------------------===// 8dbed061bSPatrick Holland /// \file 9dbed061bSPatrick Holland /// 10dbed061bSPatrick Holland /// This file implements methods from the AMDGPUCustomBehaviour class. 11dbed061bSPatrick Holland /// 12dbed061bSPatrick Holland //===----------------------------------------------------------------------===// 13dbed061bSPatrick Holland 14dbed061bSPatrick Holland #include "AMDGPUCustomBehaviour.h" 15dbed061bSPatrick Holland #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16dbed061bSPatrick Holland #include "TargetInfo/AMDGPUTargetInfo.h" 17c1fa62baSJay Foad #include "Utils/AMDGPUBaseInfo.h" 1889b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h" 19dbed061bSPatrick Holland #include "llvm/Support/WithColor.h" 20dbed061bSPatrick Holland 21*ff81bbedSJay Foad namespace llvm::mca { 22dbed061bSPatrick Holland 23e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::postProcessInstruction( 24e4ebfb57SPatrick Holland std::unique_ptr<Instruction> &Inst, const MCInst &MCI) { 25e4ebfb57SPatrick Holland switch (MCI.getOpcode()) { 26e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT: 27ef067f52SPierre van Houtryve case AMDGPU::S_WAITCNT_soft: 28e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT: 29e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT: 30e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT: 31e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT: 32ef067f52SPierre van Houtryve case AMDGPU::S_WAITCNT_VSCNT_soft: 33e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 34e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 35e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10: 36e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10: 37e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10: 38e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7: 39e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi: 40e4ebfb57SPatrick Holland return processWaitCnt(Inst, MCI); 41e4ebfb57SPatrick Holland } 42e4ebfb57SPatrick Holland } 43e4ebfb57SPatrick Holland 44e4ebfb57SPatrick Holland // s_waitcnt instructions encode important information as immediate operands 45e4ebfb57SPatrick Holland // which are lost during the MCInst -> mca::Instruction lowering. 46e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst, 47e4ebfb57SPatrick Holland const MCInst &MCI) { 48e4ebfb57SPatrick Holland for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) { 49e4ebfb57SPatrick Holland MCAOperand Op; 50e4ebfb57SPatrick Holland const MCOperand &MCOp = MCI.getOperand(Idx); 51e4ebfb57SPatrick Holland if (MCOp.isReg()) { 52e4ebfb57SPatrick Holland Op = MCAOperand::createReg(MCOp.getReg()); 53e4ebfb57SPatrick Holland } else if (MCOp.isImm()) { 54e4ebfb57SPatrick Holland Op = MCAOperand::createImm(MCOp.getImm()); 55e4ebfb57SPatrick Holland } 56e4ebfb57SPatrick Holland Op.setIndex(Idx); 57e4ebfb57SPatrick Holland Inst->addOperand(Op); 58e4ebfb57SPatrick Holland } 59e4ebfb57SPatrick Holland } 60e4ebfb57SPatrick Holland 61dbed061bSPatrick Holland AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, 62dbed061bSPatrick Holland const mca::SourceMgr &SrcMgr, 63dbed061bSPatrick Holland const MCInstrInfo &MCII) 64e4ebfb57SPatrick Holland : CustomBehaviour(STI, SrcMgr, MCII) { 65e4ebfb57SPatrick Holland generateWaitCntInfo(); 66e4ebfb57SPatrick Holland } 67dbed061bSPatrick Holland 68e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst, 69e4ebfb57SPatrick Holland const InstRef &IR) { 70e4ebfb57SPatrick Holland const Instruction &Inst = *IR.getInstruction(); 71e4ebfb57SPatrick Holland unsigned Opcode = Inst.getOpcode(); 72e4ebfb57SPatrick Holland 73e4ebfb57SPatrick Holland // llvm-mca is generally run on fully compiled assembly so we wouldn't see any 74e4ebfb57SPatrick Holland // pseudo instructions here. However, there are plans for the future to make 75e4ebfb57SPatrick Holland // it possible to use mca within backend passes. As such, I have left the 76e4ebfb57SPatrick Holland // pseudo version of s_waitcnt within this switch statement. 77e4ebfb57SPatrick Holland switch (Opcode) { 78e4ebfb57SPatrick Holland default: 79dbed061bSPatrick Holland return 0; 80e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT: // This instruction 81ef067f52SPierre van Houtryve case AMDGPU::S_WAITCNT_soft: 82e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT: 83e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT: 84e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT: 85ef067f52SPierre van Houtryve case AMDGPU::S_WAITCNT_VSCNT: 86ef067f52SPierre van Houtryve case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo. 87e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 88e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 89e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10: 90e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10: 91e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10: 92e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7: 93e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi: 94e4ebfb57SPatrick Holland // s_endpgm also behaves as if there is an implicit 95e4ebfb57SPatrick Holland // s_waitcnt 0, but I'm not sure if it would be appropriate 96e4ebfb57SPatrick Holland // to model this in llvm-mca based on how the iterations work 97e4ebfb57SPatrick Holland // while simulating the pipeline over and over. 98e4ebfb57SPatrick Holland return handleWaitCnt(IssuedInst, IR); 99e4ebfb57SPatrick Holland } 100e4ebfb57SPatrick Holland 101e4ebfb57SPatrick Holland return 0; 102e4ebfb57SPatrick Holland } 103e4ebfb57SPatrick Holland 104e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst, 105e4ebfb57SPatrick Holland const InstRef &IR) { 106e4ebfb57SPatrick Holland // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr. 107e4ebfb57SPatrick Holland // I do not know how that instruction works so I did not attempt to model it. 108e4ebfb57SPatrick Holland // set the max values to begin 109e4ebfb57SPatrick Holland unsigned Vmcnt = 63; 110e4ebfb57SPatrick Holland unsigned Expcnt = 7; 111e4ebfb57SPatrick Holland unsigned Lgkmcnt = 31; 112e4ebfb57SPatrick Holland unsigned Vscnt = 63; 113e4ebfb57SPatrick Holland unsigned CurrVmcnt = 0; 114e4ebfb57SPatrick Holland unsigned CurrExpcnt = 0; 115e4ebfb57SPatrick Holland unsigned CurrLgkmcnt = 0; 116e4ebfb57SPatrick Holland unsigned CurrVscnt = 0; 117e4ebfb57SPatrick Holland unsigned CyclesToWaitVm = ~0U; 118e4ebfb57SPatrick Holland unsigned CyclesToWaitExp = ~0U; 119e4ebfb57SPatrick Holland unsigned CyclesToWaitLgkm = ~0U; 120e4ebfb57SPatrick Holland unsigned CyclesToWaitVs = ~0U; 121e4ebfb57SPatrick Holland 122e4ebfb57SPatrick Holland computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt); 123e4ebfb57SPatrick Holland 124e4ebfb57SPatrick Holland // We will now look at each of the currently executing instructions 125e4ebfb57SPatrick Holland // to find out if this wait instruction still needs to wait. 126d395befaSKazu Hirata for (const InstRef &PrevIR : IssuedInst) { 127e4ebfb57SPatrick Holland const Instruction &PrevInst = *PrevIR.getInstruction(); 128e4ebfb57SPatrick Holland const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size(); 129e4ebfb57SPatrick Holland const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex]; 130e4ebfb57SPatrick Holland const int CyclesLeft = PrevInst.getCyclesLeft(); 131e4ebfb57SPatrick Holland assert(CyclesLeft != UNKNOWN_CYCLES && 132e4ebfb57SPatrick Holland "We should know how many cycles are left for this instruction"); 133e4ebfb57SPatrick Holland if (PrevInstWaitInfo.VmCnt) { 134e4ebfb57SPatrick Holland CurrVmcnt++; 135e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitVm) 136e4ebfb57SPatrick Holland CyclesToWaitVm = CyclesLeft; 137e4ebfb57SPatrick Holland } 138e4ebfb57SPatrick Holland if (PrevInstWaitInfo.ExpCnt) { 139e4ebfb57SPatrick Holland CurrExpcnt++; 140e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitExp) 141e4ebfb57SPatrick Holland CyclesToWaitExp = CyclesLeft; 142e4ebfb57SPatrick Holland } 143e4ebfb57SPatrick Holland if (PrevInstWaitInfo.LgkmCnt) { 144e4ebfb57SPatrick Holland CurrLgkmcnt++; 145e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitLgkm) 146e4ebfb57SPatrick Holland CyclesToWaitLgkm = CyclesLeft; 147e4ebfb57SPatrick Holland } 148e4ebfb57SPatrick Holland if (PrevInstWaitInfo.VsCnt) { 149e4ebfb57SPatrick Holland CurrVscnt++; 150e4ebfb57SPatrick Holland if ((unsigned)CyclesLeft < CyclesToWaitVs) 151e4ebfb57SPatrick Holland CyclesToWaitVs = CyclesLeft; 152e4ebfb57SPatrick Holland } 153e4ebfb57SPatrick Holland } 154e4ebfb57SPatrick Holland 155e4ebfb57SPatrick Holland unsigned CyclesToWait = ~0U; 156e4ebfb57SPatrick Holland if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait) 157e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitVm; 158e4ebfb57SPatrick Holland if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait) 159e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitExp; 160e4ebfb57SPatrick Holland if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait) 161e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitLgkm; 162e4ebfb57SPatrick Holland if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait) 163e4ebfb57SPatrick Holland CyclesToWait = CyclesToWaitVs; 164e4ebfb57SPatrick Holland 165e4ebfb57SPatrick Holland // We may underestimate how many cycles we need to wait, but this 166e4ebfb57SPatrick Holland // isn't a big deal. Our return value is just how many cycles until 167e4ebfb57SPatrick Holland // this function gets run again. So as long as we don't overestimate 168e4ebfb57SPatrick Holland // the wait time, we'll still end up stalling at this instruction 169e4ebfb57SPatrick Holland // for the correct number of cycles. 170e4ebfb57SPatrick Holland 171e4ebfb57SPatrick Holland if (CyclesToWait == ~0U) 172e4ebfb57SPatrick Holland return 0; 173e4ebfb57SPatrick Holland return CyclesToWait; 174e4ebfb57SPatrick Holland } 175e4ebfb57SPatrick Holland 176e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt, 177e4ebfb57SPatrick Holland unsigned &Expcnt, unsigned &Lgkmcnt, 178e4ebfb57SPatrick Holland unsigned &Vscnt) { 179e4ebfb57SPatrick Holland AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU()); 180e4ebfb57SPatrick Holland const Instruction &Inst = *IR.getInstruction(); 181e4ebfb57SPatrick Holland unsigned Opcode = Inst.getOpcode(); 182e4ebfb57SPatrick Holland 183e4ebfb57SPatrick Holland switch (Opcode) { 184e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 185e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 186e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10: 187e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10: { 188e4ebfb57SPatrick Holland // Should probably be checking for nullptr 189e4ebfb57SPatrick Holland // here, but I'm not sure how I should handle the case 190e4ebfb57SPatrick Holland // where we see a nullptr. 191e4ebfb57SPatrick Holland const MCAOperand *OpReg = Inst.getOperand(0); 192e4ebfb57SPatrick Holland const MCAOperand *OpImm = Inst.getOperand(1); 193e4ebfb57SPatrick Holland assert(OpReg && OpReg->isReg() && "First operand should be a register."); 194e4ebfb57SPatrick Holland assert(OpImm && OpImm->isImm() && "Second operand should be an immediate."); 195e4ebfb57SPatrick Holland if (OpReg->getReg() != AMDGPU::SGPR_NULL) { 196e4ebfb57SPatrick Holland // Instruction is using a real register. 197e4ebfb57SPatrick Holland // Since we can't know what value this register will have, 198e4ebfb57SPatrick Holland // we can't compute what the value of this wait should be. 199e4ebfb57SPatrick Holland WithColor::warning() << "The register component of " 200e4ebfb57SPatrick Holland << MCII.getName(Opcode) << " will be completely " 201e4ebfb57SPatrick Holland << "ignored. So the wait may not be accurate.\n"; 202e4ebfb57SPatrick Holland } 203e4ebfb57SPatrick Holland switch (Opcode) { 204e4ebfb57SPatrick Holland // Redundant switch so I don't have to repeat the code above 205e4ebfb57SPatrick Holland // for each case. There are more clever ways to avoid this 206e4ebfb57SPatrick Holland // extra switch and anyone can feel free to implement one of them. 207e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_EXPCNT_gfx10: 208e4ebfb57SPatrick Holland Expcnt = OpImm->getImm(); 209e4ebfb57SPatrick Holland break; 210e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: 211e4ebfb57SPatrick Holland Lgkmcnt = OpImm->getImm(); 212e4ebfb57SPatrick Holland break; 213e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VMCNT_gfx10: 214e4ebfb57SPatrick Holland Vmcnt = OpImm->getImm(); 215e4ebfb57SPatrick Holland break; 216e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_VSCNT_gfx10: 217e4ebfb57SPatrick Holland Vscnt = OpImm->getImm(); 218e4ebfb57SPatrick Holland break; 219e4ebfb57SPatrick Holland } 220e4ebfb57SPatrick Holland return; 221e4ebfb57SPatrick Holland } 222e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx10: 223e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_gfx6_gfx7: 224e4ebfb57SPatrick Holland case AMDGPU::S_WAITCNT_vi: 225e4ebfb57SPatrick Holland unsigned WaitCnt = Inst.getOperand(0)->getImm(); 226e4ebfb57SPatrick Holland AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt); 227e4ebfb57SPatrick Holland return; 228e4ebfb57SPatrick Holland } 229e4ebfb57SPatrick Holland } 230e4ebfb57SPatrick Holland 231e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::generateWaitCntInfo() { 232e4ebfb57SPatrick Holland // The core logic from this function is taken from 233e4ebfb57SPatrick Holland // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions 234e4ebfb57SPatrick Holland // that are being looked at are in the MachineInstr format, whereas we have 235e4ebfb57SPatrick Holland // access to the MCInst format. The side effects of this are that we can't use 236e4ebfb57SPatrick Holland // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst) 237e4ebfb57SPatrick Holland // functions. Therefore, we conservatively assume that these functions will 238e4ebfb57SPatrick Holland // return true. This may cause a few instructions to be incorrectly tagged 239e4ebfb57SPatrick Holland // with an extra CNT. However, these are instructions that do interact with at 240e4ebfb57SPatrick Holland // least one CNT so giving them an extra CNT shouldn't cause issues in most 241e4ebfb57SPatrick Holland // scenarios. 242e4ebfb57SPatrick Holland AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU()); 243e4ebfb57SPatrick Holland InstrWaitCntInfo.resize(SrcMgr.size()); 244e4ebfb57SPatrick Holland 24597579dccSMin-Yih Hsu for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) { 24697579dccSMin-Yih Hsu const std::unique_ptr<Instruction> &Inst = EN.value(); 24797579dccSMin-Yih Hsu unsigned Index = EN.index(); 248e4ebfb57SPatrick Holland unsigned Opcode = Inst->getOpcode(); 249e4ebfb57SPatrick Holland const MCInstrDesc &MCID = MCII.get(Opcode); 250e4ebfb57SPatrick Holland if ((MCID.TSFlags & SIInstrFlags::DS) && 251e4ebfb57SPatrick Holland (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) { 252e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true; 253e4ebfb57SPatrick Holland if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds)) 254e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true; 255e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::FLAT) { 256e4ebfb57SPatrick Holland // We conservatively assume that mayAccessVMEMThroughFlat(Inst) 257e4ebfb57SPatrick Holland // and mayAccessLDSThroughFlat(Inst) would both return true for this 258e4ebfb57SPatrick Holland // instruction. We have to do this because those functions use 259e4ebfb57SPatrick Holland // information about the memory operands that we don't have access to. 260e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true; 261e4ebfb57SPatrick Holland if (!STI.hasFeature(AMDGPU::FeatureVscnt)) 262e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true; 263e4ebfb57SPatrick Holland else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) 264e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true; 265e4ebfb57SPatrick Holland else 266e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VsCnt = true; 267e4ebfb57SPatrick Holland } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) { 268e4ebfb57SPatrick Holland if (!STI.hasFeature(AMDGPU::FeatureVscnt)) 269e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true; 270e4ebfb57SPatrick Holland else if ((MCID.mayLoad() && 271e4ebfb57SPatrick Holland !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) || 272e4ebfb57SPatrick Holland ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() && 273e4ebfb57SPatrick Holland !MCID.mayStore())) 274e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VmCnt = true; 275e4ebfb57SPatrick Holland else if (MCID.mayStore()) 276e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].VsCnt = true; 277e4ebfb57SPatrick Holland 278e4ebfb57SPatrick Holland // (IV.Major < 7) is meant to represent 279e4ebfb57SPatrick Holland // GCNTarget.vmemWriteNeedsExpWaitcnt() 280e4ebfb57SPatrick Holland // which is defined as 281e4ebfb57SPatrick Holland // { return getGeneration() < SEA_ISLANDS; } 282e4ebfb57SPatrick Holland if (IV.Major < 7 && 283e4ebfb57SPatrick Holland (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet))) 284e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true; 285e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::SMRD) { 286e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true; 287e4ebfb57SPatrick Holland } else if (MCID.TSFlags & SIInstrFlags::EXP) { 288e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].ExpCnt = true; 289e4ebfb57SPatrick Holland } else { 290e4ebfb57SPatrick Holland switch (Opcode) { 291e4ebfb57SPatrick Holland case AMDGPU::S_SENDMSG: 292e4ebfb57SPatrick Holland case AMDGPU::S_SENDMSGHALT: 293e4ebfb57SPatrick Holland case AMDGPU::S_MEMTIME: 294e4ebfb57SPatrick Holland case AMDGPU::S_MEMREALTIME: 295e4ebfb57SPatrick Holland InstrWaitCntInfo[Index].LgkmCnt = true; 296e4ebfb57SPatrick Holland break; 297e4ebfb57SPatrick Holland } 298e4ebfb57SPatrick Holland } 299e4ebfb57SPatrick Holland } 300e4ebfb57SPatrick Holland } 301e4ebfb57SPatrick Holland 302e4ebfb57SPatrick Holland // taken from SIInstrInfo::isVMEM() 303e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) { 304e4ebfb57SPatrick Holland return MCID.TSFlags & SIInstrFlags::MUBUF || 305e4ebfb57SPatrick Holland MCID.TSFlags & SIInstrFlags::MTBUF || 306e4ebfb57SPatrick Holland MCID.TSFlags & SIInstrFlags::MIMG; 307e4ebfb57SPatrick Holland } 308e4ebfb57SPatrick Holland 309e4ebfb57SPatrick Holland // taken from SIInstrInfo::hasModifiersSet() 310e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::hasModifiersSet( 311e4ebfb57SPatrick Holland const std::unique_ptr<Instruction> &Inst, unsigned OpName) const { 312e4ebfb57SPatrick Holland int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName); 313e4ebfb57SPatrick Holland if (Idx == -1) 314e4ebfb57SPatrick Holland return false; 315e4ebfb57SPatrick Holland 316e4ebfb57SPatrick Holland const MCAOperand *Op = Inst->getOperand(Idx); 317e4ebfb57SPatrick Holland if (Op == nullptr || !Op->isImm() || !Op->getImm()) 318e4ebfb57SPatrick Holland return false; 319e4ebfb57SPatrick Holland 320e4ebfb57SPatrick Holland return true; 321e4ebfb57SPatrick Holland } 322e4ebfb57SPatrick Holland 323e61ca232SJay Foad // taken from SIInstrInfo::isGWS() 324e61ca232SJay Foad bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const { 325e61ca232SJay Foad const MCInstrDesc &MCID = MCII.get(Opcode); 326e61ca232SJay Foad return MCID.TSFlags & SIInstrFlags::GWS; 327e61ca232SJay Foad } 328e61ca232SJay Foad 329e4ebfb57SPatrick Holland // taken from SIInstrInfo::isAlwaysGDS() 330e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const { 331e61ca232SJay Foad return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); 332dbed061bSPatrick Holland } 333dbed061bSPatrick Holland 334*ff81bbedSJay Foad } // namespace llvm::mca 335dbed061bSPatrick Holland 336dbed061bSPatrick Holland using namespace llvm; 337dbed061bSPatrick Holland using namespace mca; 338dbed061bSPatrick Holland 339dbed061bSPatrick Holland static CustomBehaviour * 340dbed061bSPatrick Holland createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, 341dbed061bSPatrick Holland const mca::SourceMgr &SrcMgr, 342dbed061bSPatrick Holland const MCInstrInfo &MCII) { 343dbed061bSPatrick Holland return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII); 344dbed061bSPatrick Holland } 345dbed061bSPatrick Holland 346dbed061bSPatrick Holland static InstrPostProcess * 347dbed061bSPatrick Holland createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, 348dbed061bSPatrick Holland const MCInstrInfo &MCII) { 349dbed061bSPatrick Holland return new AMDGPUInstrPostProcess(STI, MCII); 350dbed061bSPatrick Holland } 351dbed061bSPatrick Holland 352dbed061bSPatrick Holland /// Extern function to initialize the targets for the AMDGPU backend 353dbed061bSPatrick Holland 354dbed061bSPatrick Holland extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() { 355ee165cdbSIvan Kosarev TargetRegistry::RegisterCustomBehaviour(getTheR600Target(), 356dbed061bSPatrick Holland createAMDGPUCustomBehaviour); 357ee165cdbSIvan Kosarev TargetRegistry::RegisterInstrPostProcess(getTheR600Target(), 358dbed061bSPatrick Holland createAMDGPUInstrPostProcess); 359dbed061bSPatrick Holland 360dbed061bSPatrick Holland TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(), 361dbed061bSPatrick Holland createAMDGPUCustomBehaviour); 362dbed061bSPatrick Holland TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(), 363dbed061bSPatrick Holland createAMDGPUInstrPostProcess); 364dbed061bSPatrick Holland } 365