xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1349cc55cSDimitry Andric //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric /// \file
9349cc55cSDimitry Andric ///
10349cc55cSDimitry Andric /// This file implements methods from the AMDGPUCustomBehaviour class.
11349cc55cSDimitry Andric ///
12349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
13349cc55cSDimitry Andric 
14349cc55cSDimitry Andric #include "AMDGPUCustomBehaviour.h"
15349cc55cSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16349cc55cSDimitry Andric #include "TargetInfo/AMDGPUTargetInfo.h"
17*0fca6ea1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
18349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
19349cc55cSDimitry Andric #include "llvm/Support/WithColor.h"
20349cc55cSDimitry Andric 
21*0fca6ea1SDimitry Andric namespace llvm::mca {
22349cc55cSDimitry Andric 
23349cc55cSDimitry Andric void AMDGPUInstrPostProcess::postProcessInstruction(
24349cc55cSDimitry Andric     std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
25349cc55cSDimitry Andric   switch (MCI.getOpcode()) {
26349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT:
275f757f3fSDimitry Andric   case AMDGPU::S_WAITCNT_soft:
28349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_EXPCNT:
29349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_LGKMCNT:
30349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VMCNT:
31349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT:
325f757f3fSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT_soft:
33349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
34349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
35349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
36349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT_gfx10:
37349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx10:
38349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx6_gfx7:
39349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_vi:
40349cc55cSDimitry Andric     return processWaitCnt(Inst, MCI);
41349cc55cSDimitry Andric   }
42349cc55cSDimitry Andric }
43349cc55cSDimitry Andric 
44349cc55cSDimitry Andric // s_waitcnt instructions encode important information as immediate operands
45349cc55cSDimitry Andric // which are lost during the MCInst -> mca::Instruction lowering.
46349cc55cSDimitry Andric void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
47349cc55cSDimitry Andric                                             const MCInst &MCI) {
48349cc55cSDimitry Andric   for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
49349cc55cSDimitry Andric     MCAOperand Op;
50349cc55cSDimitry Andric     const MCOperand &MCOp = MCI.getOperand(Idx);
51349cc55cSDimitry Andric     if (MCOp.isReg()) {
52349cc55cSDimitry Andric       Op = MCAOperand::createReg(MCOp.getReg());
53349cc55cSDimitry Andric     } else if (MCOp.isImm()) {
54349cc55cSDimitry Andric       Op = MCAOperand::createImm(MCOp.getImm());
55349cc55cSDimitry Andric     }
56349cc55cSDimitry Andric     Op.setIndex(Idx);
57349cc55cSDimitry Andric     Inst->addOperand(Op);
58349cc55cSDimitry Andric   }
59349cc55cSDimitry Andric }
60349cc55cSDimitry Andric 
61349cc55cSDimitry Andric AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
62349cc55cSDimitry Andric                                              const mca::SourceMgr &SrcMgr,
63349cc55cSDimitry Andric                                              const MCInstrInfo &MCII)
64349cc55cSDimitry Andric     : CustomBehaviour(STI, SrcMgr, MCII) {
65349cc55cSDimitry Andric   generateWaitCntInfo();
66349cc55cSDimitry Andric }
67349cc55cSDimitry Andric 
68349cc55cSDimitry Andric unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
69349cc55cSDimitry Andric                                                   const InstRef &IR) {
70349cc55cSDimitry Andric   const Instruction &Inst = *IR.getInstruction();
71349cc55cSDimitry Andric   unsigned Opcode = Inst.getOpcode();
72349cc55cSDimitry Andric 
73349cc55cSDimitry Andric   // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
74349cc55cSDimitry Andric   // pseudo instructions here. However, there are plans for the future to make
75349cc55cSDimitry Andric   // it possible to use mca within backend passes. As such, I have left the
76349cc55cSDimitry Andric   // pseudo version of s_waitcnt within this switch statement.
77349cc55cSDimitry Andric   switch (Opcode) {
78349cc55cSDimitry Andric   default:
79349cc55cSDimitry Andric     return 0;
80349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT: // This instruction
815f757f3fSDimitry Andric   case AMDGPU::S_WAITCNT_soft:
82349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_EXPCNT:
83349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_LGKMCNT:
84349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VMCNT:
855f757f3fSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT:
865f757f3fSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo.
87349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
88349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
89349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
90349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT_gfx10:
91349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx10:
92349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx6_gfx7:
93349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_vi:
94349cc55cSDimitry Andric     // s_endpgm also behaves as if there is an implicit
95349cc55cSDimitry Andric     // s_waitcnt 0, but I'm not sure if it would be appropriate
96349cc55cSDimitry Andric     // to model this in llvm-mca based on how the iterations work
97349cc55cSDimitry Andric     // while simulating the pipeline over and over.
98349cc55cSDimitry Andric     return handleWaitCnt(IssuedInst, IR);
99349cc55cSDimitry Andric   }
100349cc55cSDimitry Andric 
101349cc55cSDimitry Andric   return 0;
102349cc55cSDimitry Andric }
103349cc55cSDimitry Andric 
104349cc55cSDimitry Andric unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
105349cc55cSDimitry Andric                                               const InstRef &IR) {
106349cc55cSDimitry Andric   // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
107349cc55cSDimitry Andric   // I do not know how that instruction works so I did not attempt to model it.
108349cc55cSDimitry Andric   // set the max values to begin
109349cc55cSDimitry Andric   unsigned Vmcnt = 63;
110349cc55cSDimitry Andric   unsigned Expcnt = 7;
111349cc55cSDimitry Andric   unsigned Lgkmcnt = 31;
112349cc55cSDimitry Andric   unsigned Vscnt = 63;
113349cc55cSDimitry Andric   unsigned CurrVmcnt = 0;
114349cc55cSDimitry Andric   unsigned CurrExpcnt = 0;
115349cc55cSDimitry Andric   unsigned CurrLgkmcnt = 0;
116349cc55cSDimitry Andric   unsigned CurrVscnt = 0;
117349cc55cSDimitry Andric   unsigned CyclesToWaitVm = ~0U;
118349cc55cSDimitry Andric   unsigned CyclesToWaitExp = ~0U;
119349cc55cSDimitry Andric   unsigned CyclesToWaitLgkm = ~0U;
120349cc55cSDimitry Andric   unsigned CyclesToWaitVs = ~0U;
121349cc55cSDimitry Andric 
122349cc55cSDimitry Andric   computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
123349cc55cSDimitry Andric 
124349cc55cSDimitry Andric   // We will now look at each of the currently executing instructions
125349cc55cSDimitry Andric   // to find out if this wait instruction still needs to wait.
1260eae32dcSDimitry Andric   for (const InstRef &PrevIR : IssuedInst) {
127349cc55cSDimitry Andric     const Instruction &PrevInst = *PrevIR.getInstruction();
128349cc55cSDimitry Andric     const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
129349cc55cSDimitry Andric     const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
130349cc55cSDimitry Andric     const int CyclesLeft = PrevInst.getCyclesLeft();
131349cc55cSDimitry Andric     assert(CyclesLeft != UNKNOWN_CYCLES &&
132349cc55cSDimitry Andric            "We should know how many cycles are left for this instruction");
133349cc55cSDimitry Andric     if (PrevInstWaitInfo.VmCnt) {
134349cc55cSDimitry Andric       CurrVmcnt++;
135349cc55cSDimitry Andric       if ((unsigned)CyclesLeft < CyclesToWaitVm)
136349cc55cSDimitry Andric         CyclesToWaitVm = CyclesLeft;
137349cc55cSDimitry Andric     }
138349cc55cSDimitry Andric     if (PrevInstWaitInfo.ExpCnt) {
139349cc55cSDimitry Andric       CurrExpcnt++;
140349cc55cSDimitry Andric       if ((unsigned)CyclesLeft < CyclesToWaitExp)
141349cc55cSDimitry Andric         CyclesToWaitExp = CyclesLeft;
142349cc55cSDimitry Andric     }
143349cc55cSDimitry Andric     if (PrevInstWaitInfo.LgkmCnt) {
144349cc55cSDimitry Andric       CurrLgkmcnt++;
145349cc55cSDimitry Andric       if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
146349cc55cSDimitry Andric         CyclesToWaitLgkm = CyclesLeft;
147349cc55cSDimitry Andric     }
148349cc55cSDimitry Andric     if (PrevInstWaitInfo.VsCnt) {
149349cc55cSDimitry Andric       CurrVscnt++;
150349cc55cSDimitry Andric       if ((unsigned)CyclesLeft < CyclesToWaitVs)
151349cc55cSDimitry Andric         CyclesToWaitVs = CyclesLeft;
152349cc55cSDimitry Andric     }
153349cc55cSDimitry Andric   }
154349cc55cSDimitry Andric 
155349cc55cSDimitry Andric   unsigned CyclesToWait = ~0U;
156349cc55cSDimitry Andric   if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
157349cc55cSDimitry Andric     CyclesToWait = CyclesToWaitVm;
158349cc55cSDimitry Andric   if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
159349cc55cSDimitry Andric     CyclesToWait = CyclesToWaitExp;
160349cc55cSDimitry Andric   if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
161349cc55cSDimitry Andric     CyclesToWait = CyclesToWaitLgkm;
162349cc55cSDimitry Andric   if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
163349cc55cSDimitry Andric     CyclesToWait = CyclesToWaitVs;
164349cc55cSDimitry Andric 
165349cc55cSDimitry Andric   // We may underestimate how many cycles we need to wait, but this
166349cc55cSDimitry Andric   // isn't a big deal. Our return value is just how many cycles until
167349cc55cSDimitry Andric   // this function gets run again. So as long as we don't overestimate
168349cc55cSDimitry Andric   // the wait time, we'll still end up stalling at this instruction
169349cc55cSDimitry Andric   // for the correct number of cycles.
170349cc55cSDimitry Andric 
171349cc55cSDimitry Andric   if (CyclesToWait == ~0U)
172349cc55cSDimitry Andric     return 0;
173349cc55cSDimitry Andric   return CyclesToWait;
174349cc55cSDimitry Andric }
175349cc55cSDimitry Andric 
176349cc55cSDimitry Andric void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
177349cc55cSDimitry Andric                                            unsigned &Expcnt, unsigned &Lgkmcnt,
178349cc55cSDimitry Andric                                            unsigned &Vscnt) {
179349cc55cSDimitry Andric   AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
180349cc55cSDimitry Andric   const Instruction &Inst = *IR.getInstruction();
181349cc55cSDimitry Andric   unsigned Opcode = Inst.getOpcode();
182349cc55cSDimitry Andric 
183349cc55cSDimitry Andric   switch (Opcode) {
184349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
185349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
186349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
187349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
188349cc55cSDimitry Andric     // Should probably be checking for nullptr
189349cc55cSDimitry Andric     // here, but I'm not sure how I should handle the case
190349cc55cSDimitry Andric     // where we see a nullptr.
191349cc55cSDimitry Andric     const MCAOperand *OpReg = Inst.getOperand(0);
192349cc55cSDimitry Andric     const MCAOperand *OpImm = Inst.getOperand(1);
193349cc55cSDimitry Andric     assert(OpReg && OpReg->isReg() && "First operand should be a register.");
194349cc55cSDimitry Andric     assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
195349cc55cSDimitry Andric     if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
196349cc55cSDimitry Andric       // Instruction is using a real register.
197349cc55cSDimitry Andric       // Since we can't know what value this register will have,
198349cc55cSDimitry Andric       // we can't compute what the value of this wait should be.
199349cc55cSDimitry Andric       WithColor::warning() << "The register component of "
200349cc55cSDimitry Andric                            << MCII.getName(Opcode) << " will be completely "
201349cc55cSDimitry Andric                            << "ignored. So the wait may not be accurate.\n";
202349cc55cSDimitry Andric     }
203349cc55cSDimitry Andric     switch (Opcode) {
204349cc55cSDimitry Andric     // Redundant switch so I don't have to repeat the code above
205349cc55cSDimitry Andric     // for each case. There are more clever ways to avoid this
206349cc55cSDimitry Andric     // extra switch and anyone can feel free to implement one of them.
207349cc55cSDimitry Andric     case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
208349cc55cSDimitry Andric       Expcnt = OpImm->getImm();
209349cc55cSDimitry Andric       break;
210349cc55cSDimitry Andric     case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
211349cc55cSDimitry Andric       Lgkmcnt = OpImm->getImm();
212349cc55cSDimitry Andric       break;
213349cc55cSDimitry Andric     case AMDGPU::S_WAITCNT_VMCNT_gfx10:
214349cc55cSDimitry Andric       Vmcnt = OpImm->getImm();
215349cc55cSDimitry Andric       break;
216349cc55cSDimitry Andric     case AMDGPU::S_WAITCNT_VSCNT_gfx10:
217349cc55cSDimitry Andric       Vscnt = OpImm->getImm();
218349cc55cSDimitry Andric       break;
219349cc55cSDimitry Andric     }
220349cc55cSDimitry Andric     return;
221349cc55cSDimitry Andric   }
222349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx10:
223349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_gfx6_gfx7:
224349cc55cSDimitry Andric   case AMDGPU::S_WAITCNT_vi:
225349cc55cSDimitry Andric     unsigned WaitCnt = Inst.getOperand(0)->getImm();
226349cc55cSDimitry Andric     AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
227349cc55cSDimitry Andric     return;
228349cc55cSDimitry Andric   }
229349cc55cSDimitry Andric }
230349cc55cSDimitry Andric 
231349cc55cSDimitry Andric void AMDGPUCustomBehaviour::generateWaitCntInfo() {
232349cc55cSDimitry Andric   // The core logic from this function is taken from
233349cc55cSDimitry Andric   // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
234349cc55cSDimitry Andric   // that are being looked at are in the MachineInstr format, whereas we have
235349cc55cSDimitry Andric   // access to the MCInst format. The side effects of this are that we can't use
236349cc55cSDimitry Andric   // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
237349cc55cSDimitry Andric   // functions. Therefore, we conservatively assume that these functions will
238349cc55cSDimitry Andric   // return true. This may cause a few instructions to be incorrectly tagged
239349cc55cSDimitry Andric   // with an extra CNT. However, these are instructions that do interact with at
240349cc55cSDimitry Andric   // least one CNT so giving them an extra CNT shouldn't cause issues in most
241349cc55cSDimitry Andric   // scenarios.
242349cc55cSDimitry Andric   AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
243349cc55cSDimitry Andric   InstrWaitCntInfo.resize(SrcMgr.size());
244349cc55cSDimitry Andric 
24581ad6265SDimitry Andric   for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
24681ad6265SDimitry Andric     const std::unique_ptr<Instruction> &Inst = EN.value();
24781ad6265SDimitry Andric     unsigned Index = EN.index();
248349cc55cSDimitry Andric     unsigned Opcode = Inst->getOpcode();
249349cc55cSDimitry Andric     const MCInstrDesc &MCID = MCII.get(Opcode);
250349cc55cSDimitry Andric     if ((MCID.TSFlags & SIInstrFlags::DS) &&
251349cc55cSDimitry Andric         (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
252349cc55cSDimitry Andric       InstrWaitCntInfo[Index].LgkmCnt = true;
253349cc55cSDimitry Andric       if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
254349cc55cSDimitry Andric         InstrWaitCntInfo[Index].ExpCnt = true;
255349cc55cSDimitry Andric     } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
256349cc55cSDimitry Andric       // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
257349cc55cSDimitry Andric       // and mayAccessLDSThroughFlat(Inst) would both return true for this
258349cc55cSDimitry Andric       // instruction. We have to do this because those functions use
259349cc55cSDimitry Andric       // information about the memory operands that we don't have access to.
260349cc55cSDimitry Andric       InstrWaitCntInfo[Index].LgkmCnt = true;
261349cc55cSDimitry Andric       if (!STI.hasFeature(AMDGPU::FeatureVscnt))
262349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VmCnt = true;
263349cc55cSDimitry Andric       else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
264349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VmCnt = true;
265349cc55cSDimitry Andric       else
266349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VsCnt = true;
267349cc55cSDimitry Andric     } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
268349cc55cSDimitry Andric       if (!STI.hasFeature(AMDGPU::FeatureVscnt))
269349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VmCnt = true;
270349cc55cSDimitry Andric       else if ((MCID.mayLoad() &&
271349cc55cSDimitry Andric                 !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
272349cc55cSDimitry Andric                ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
273349cc55cSDimitry Andric                 !MCID.mayStore()))
274349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VmCnt = true;
275349cc55cSDimitry Andric       else if (MCID.mayStore())
276349cc55cSDimitry Andric         InstrWaitCntInfo[Index].VsCnt = true;
277349cc55cSDimitry Andric 
278349cc55cSDimitry Andric       // (IV.Major < 7) is meant to represent
279349cc55cSDimitry Andric       // GCNTarget.vmemWriteNeedsExpWaitcnt()
280349cc55cSDimitry Andric       // which is defined as
281349cc55cSDimitry Andric       // { return getGeneration() < SEA_ISLANDS; }
282349cc55cSDimitry Andric       if (IV.Major < 7 &&
283349cc55cSDimitry Andric           (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
284349cc55cSDimitry Andric         InstrWaitCntInfo[Index].ExpCnt = true;
285349cc55cSDimitry Andric     } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
286349cc55cSDimitry Andric       InstrWaitCntInfo[Index].LgkmCnt = true;
287349cc55cSDimitry Andric     } else if (MCID.TSFlags & SIInstrFlags::EXP) {
288349cc55cSDimitry Andric       InstrWaitCntInfo[Index].ExpCnt = true;
289349cc55cSDimitry Andric     } else {
290349cc55cSDimitry Andric       switch (Opcode) {
291349cc55cSDimitry Andric       case AMDGPU::S_SENDMSG:
292349cc55cSDimitry Andric       case AMDGPU::S_SENDMSGHALT:
293349cc55cSDimitry Andric       case AMDGPU::S_MEMTIME:
294349cc55cSDimitry Andric       case AMDGPU::S_MEMREALTIME:
295349cc55cSDimitry Andric         InstrWaitCntInfo[Index].LgkmCnt = true;
296349cc55cSDimitry Andric         break;
297349cc55cSDimitry Andric       }
298349cc55cSDimitry Andric     }
299349cc55cSDimitry Andric   }
300349cc55cSDimitry Andric }
301349cc55cSDimitry Andric 
302349cc55cSDimitry Andric // taken from SIInstrInfo::isVMEM()
303349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
304349cc55cSDimitry Andric   return MCID.TSFlags & SIInstrFlags::MUBUF ||
305349cc55cSDimitry Andric          MCID.TSFlags & SIInstrFlags::MTBUF ||
306349cc55cSDimitry Andric          MCID.TSFlags & SIInstrFlags::MIMG;
307349cc55cSDimitry Andric }
308349cc55cSDimitry Andric 
309349cc55cSDimitry Andric // taken from SIInstrInfo::hasModifiersSet()
310349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::hasModifiersSet(
311349cc55cSDimitry Andric     const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
312349cc55cSDimitry Andric   int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
313349cc55cSDimitry Andric   if (Idx == -1)
314349cc55cSDimitry Andric     return false;
315349cc55cSDimitry Andric 
316349cc55cSDimitry Andric   const MCAOperand *Op = Inst->getOperand(Idx);
317349cc55cSDimitry Andric   if (Op == nullptr || !Op->isImm() || !Op->getImm())
318349cc55cSDimitry Andric     return false;
319349cc55cSDimitry Andric 
320349cc55cSDimitry Andric   return true;
321349cc55cSDimitry Andric }
322349cc55cSDimitry Andric 
3235f757f3fSDimitry Andric // taken from SIInstrInfo::isGWS()
3245f757f3fSDimitry Andric bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const {
3255f757f3fSDimitry Andric   const MCInstrDesc &MCID = MCII.get(Opcode);
3265f757f3fSDimitry Andric   return MCID.TSFlags & SIInstrFlags::GWS;
3275f757f3fSDimitry Andric }
3285f757f3fSDimitry Andric 
329349cc55cSDimitry Andric // taken from SIInstrInfo::isAlwaysGDS()
330349cc55cSDimitry Andric bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
3315f757f3fSDimitry Andric   return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode);
332349cc55cSDimitry Andric }
333349cc55cSDimitry Andric 
334*0fca6ea1SDimitry Andric } // namespace llvm::mca
335349cc55cSDimitry Andric 
336349cc55cSDimitry Andric using namespace llvm;
337349cc55cSDimitry Andric using namespace mca;
338349cc55cSDimitry Andric 
339349cc55cSDimitry Andric static CustomBehaviour *
340349cc55cSDimitry Andric createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
341349cc55cSDimitry Andric                             const mca::SourceMgr &SrcMgr,
342349cc55cSDimitry Andric                             const MCInstrInfo &MCII) {
343349cc55cSDimitry Andric   return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
344349cc55cSDimitry Andric }
345349cc55cSDimitry Andric 
346349cc55cSDimitry Andric static InstrPostProcess *
347349cc55cSDimitry Andric createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
348349cc55cSDimitry Andric                              const MCInstrInfo &MCII) {
349349cc55cSDimitry Andric   return new AMDGPUInstrPostProcess(STI, MCII);
350349cc55cSDimitry Andric }
351349cc55cSDimitry Andric 
352349cc55cSDimitry Andric /// Extern function to initialize the targets for the AMDGPU backend
353349cc55cSDimitry Andric 
354349cc55cSDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
35506c3fb27SDimitry Andric   TargetRegistry::RegisterCustomBehaviour(getTheR600Target(),
356349cc55cSDimitry Andric                                           createAMDGPUCustomBehaviour);
35706c3fb27SDimitry Andric   TargetRegistry::RegisterInstrPostProcess(getTheR600Target(),
358349cc55cSDimitry Andric                                            createAMDGPUInstrPostProcess);
359349cc55cSDimitry Andric 
360349cc55cSDimitry Andric   TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
361349cc55cSDimitry Andric                                           createAMDGPUCustomBehaviour);
362349cc55cSDimitry Andric   TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
363349cc55cSDimitry Andric                                            createAMDGPUInstrPostProcess);
364349cc55cSDimitry Andric }
365