xref: /llvm-project/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp (revision ff81bbede4f3a28d285106a5a3f5d0980608dd47)
1dbed061bSPatrick Holland //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
2dbed061bSPatrick Holland //
3dbed061bSPatrick Holland // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4dbed061bSPatrick Holland // See https://llvm.org/LICENSE.txt for license information.
5dbed061bSPatrick Holland // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dbed061bSPatrick Holland //
7dbed061bSPatrick Holland //===----------------------------------------------------------------------===//
8dbed061bSPatrick Holland /// \file
9dbed061bSPatrick Holland ///
10dbed061bSPatrick Holland /// This file implements methods from the AMDGPUCustomBehaviour class.
11dbed061bSPatrick Holland ///
12dbed061bSPatrick Holland //===----------------------------------------------------------------------===//
13dbed061bSPatrick Holland 
14dbed061bSPatrick Holland #include "AMDGPUCustomBehaviour.h"
15dbed061bSPatrick Holland #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16dbed061bSPatrick Holland #include "TargetInfo/AMDGPUTargetInfo.h"
17c1fa62baSJay Foad #include "Utils/AMDGPUBaseInfo.h"
1889b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
19dbed061bSPatrick Holland #include "llvm/Support/WithColor.h"
20dbed061bSPatrick Holland 
21*ff81bbedSJay Foad namespace llvm::mca {
22dbed061bSPatrick Holland 
23e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::postProcessInstruction(
24e4ebfb57SPatrick Holland     std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
25e4ebfb57SPatrick Holland   switch (MCI.getOpcode()) {
26e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT:
27ef067f52SPierre van Houtryve   case AMDGPU::S_WAITCNT_soft:
28e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_EXPCNT:
29e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_LGKMCNT:
30e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VMCNT:
31e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VSCNT:
32ef067f52SPierre van Houtryve   case AMDGPU::S_WAITCNT_VSCNT_soft:
33e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
34e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
35e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
36e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VSCNT_gfx10:
37e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx10:
38e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx6_gfx7:
39e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_vi:
40e4ebfb57SPatrick Holland     return processWaitCnt(Inst, MCI);
41e4ebfb57SPatrick Holland   }
42e4ebfb57SPatrick Holland }
43e4ebfb57SPatrick Holland 
44e4ebfb57SPatrick Holland // s_waitcnt instructions encode important information as immediate operands
45e4ebfb57SPatrick Holland // which are lost during the MCInst -> mca::Instruction lowering.
46e4ebfb57SPatrick Holland void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
47e4ebfb57SPatrick Holland                                             const MCInst &MCI) {
48e4ebfb57SPatrick Holland   for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
49e4ebfb57SPatrick Holland     MCAOperand Op;
50e4ebfb57SPatrick Holland     const MCOperand &MCOp = MCI.getOperand(Idx);
51e4ebfb57SPatrick Holland     if (MCOp.isReg()) {
52e4ebfb57SPatrick Holland       Op = MCAOperand::createReg(MCOp.getReg());
53e4ebfb57SPatrick Holland     } else if (MCOp.isImm()) {
54e4ebfb57SPatrick Holland       Op = MCAOperand::createImm(MCOp.getImm());
55e4ebfb57SPatrick Holland     }
56e4ebfb57SPatrick Holland     Op.setIndex(Idx);
57e4ebfb57SPatrick Holland     Inst->addOperand(Op);
58e4ebfb57SPatrick Holland   }
59e4ebfb57SPatrick Holland }
60e4ebfb57SPatrick Holland 
61dbed061bSPatrick Holland AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
62dbed061bSPatrick Holland                                              const mca::SourceMgr &SrcMgr,
63dbed061bSPatrick Holland                                              const MCInstrInfo &MCII)
64e4ebfb57SPatrick Holland     : CustomBehaviour(STI, SrcMgr, MCII) {
65e4ebfb57SPatrick Holland   generateWaitCntInfo();
66e4ebfb57SPatrick Holland }
67dbed061bSPatrick Holland 
68e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
69e4ebfb57SPatrick Holland                                                   const InstRef &IR) {
70e4ebfb57SPatrick Holland   const Instruction &Inst = *IR.getInstruction();
71e4ebfb57SPatrick Holland   unsigned Opcode = Inst.getOpcode();
72e4ebfb57SPatrick Holland 
73e4ebfb57SPatrick Holland   // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
74e4ebfb57SPatrick Holland   // pseudo instructions here. However, there are plans for the future to make
75e4ebfb57SPatrick Holland   // it possible to use mca within backend passes. As such, I have left the
76e4ebfb57SPatrick Holland   // pseudo version of s_waitcnt within this switch statement.
77e4ebfb57SPatrick Holland   switch (Opcode) {
78e4ebfb57SPatrick Holland   default:
79dbed061bSPatrick Holland     return 0;
80e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT: // This instruction
81ef067f52SPierre van Houtryve   case AMDGPU::S_WAITCNT_soft:
82e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_EXPCNT:
83e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_LGKMCNT:
84e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VMCNT:
85ef067f52SPierre van Houtryve   case AMDGPU::S_WAITCNT_VSCNT:
86ef067f52SPierre van Houtryve   case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo.
87e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
88e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
89e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
90e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VSCNT_gfx10:
91e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx10:
92e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx6_gfx7:
93e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_vi:
94e4ebfb57SPatrick Holland     // s_endpgm also behaves as if there is an implicit
95e4ebfb57SPatrick Holland     // s_waitcnt 0, but I'm not sure if it would be appropriate
96e4ebfb57SPatrick Holland     // to model this in llvm-mca based on how the iterations work
97e4ebfb57SPatrick Holland     // while simulating the pipeline over and over.
98e4ebfb57SPatrick Holland     return handleWaitCnt(IssuedInst, IR);
99e4ebfb57SPatrick Holland   }
100e4ebfb57SPatrick Holland 
101e4ebfb57SPatrick Holland   return 0;
102e4ebfb57SPatrick Holland }
103e4ebfb57SPatrick Holland 
104e4ebfb57SPatrick Holland unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
105e4ebfb57SPatrick Holland                                               const InstRef &IR) {
106e4ebfb57SPatrick Holland   // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
107e4ebfb57SPatrick Holland   // I do not know how that instruction works so I did not attempt to model it.
108e4ebfb57SPatrick Holland   // set the max values to begin
109e4ebfb57SPatrick Holland   unsigned Vmcnt = 63;
110e4ebfb57SPatrick Holland   unsigned Expcnt = 7;
111e4ebfb57SPatrick Holland   unsigned Lgkmcnt = 31;
112e4ebfb57SPatrick Holland   unsigned Vscnt = 63;
113e4ebfb57SPatrick Holland   unsigned CurrVmcnt = 0;
114e4ebfb57SPatrick Holland   unsigned CurrExpcnt = 0;
115e4ebfb57SPatrick Holland   unsigned CurrLgkmcnt = 0;
116e4ebfb57SPatrick Holland   unsigned CurrVscnt = 0;
117e4ebfb57SPatrick Holland   unsigned CyclesToWaitVm = ~0U;
118e4ebfb57SPatrick Holland   unsigned CyclesToWaitExp = ~0U;
119e4ebfb57SPatrick Holland   unsigned CyclesToWaitLgkm = ~0U;
120e4ebfb57SPatrick Holland   unsigned CyclesToWaitVs = ~0U;
121e4ebfb57SPatrick Holland 
122e4ebfb57SPatrick Holland   computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
123e4ebfb57SPatrick Holland 
124e4ebfb57SPatrick Holland   // We will now look at each of the currently executing instructions
125e4ebfb57SPatrick Holland   // to find out if this wait instruction still needs to wait.
126d395befaSKazu Hirata   for (const InstRef &PrevIR : IssuedInst) {
127e4ebfb57SPatrick Holland     const Instruction &PrevInst = *PrevIR.getInstruction();
128e4ebfb57SPatrick Holland     const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
129e4ebfb57SPatrick Holland     const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
130e4ebfb57SPatrick Holland     const int CyclesLeft = PrevInst.getCyclesLeft();
131e4ebfb57SPatrick Holland     assert(CyclesLeft != UNKNOWN_CYCLES &&
132e4ebfb57SPatrick Holland            "We should know how many cycles are left for this instruction");
133e4ebfb57SPatrick Holland     if (PrevInstWaitInfo.VmCnt) {
134e4ebfb57SPatrick Holland       CurrVmcnt++;
135e4ebfb57SPatrick Holland       if ((unsigned)CyclesLeft < CyclesToWaitVm)
136e4ebfb57SPatrick Holland         CyclesToWaitVm = CyclesLeft;
137e4ebfb57SPatrick Holland     }
138e4ebfb57SPatrick Holland     if (PrevInstWaitInfo.ExpCnt) {
139e4ebfb57SPatrick Holland       CurrExpcnt++;
140e4ebfb57SPatrick Holland       if ((unsigned)CyclesLeft < CyclesToWaitExp)
141e4ebfb57SPatrick Holland         CyclesToWaitExp = CyclesLeft;
142e4ebfb57SPatrick Holland     }
143e4ebfb57SPatrick Holland     if (PrevInstWaitInfo.LgkmCnt) {
144e4ebfb57SPatrick Holland       CurrLgkmcnt++;
145e4ebfb57SPatrick Holland       if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
146e4ebfb57SPatrick Holland         CyclesToWaitLgkm = CyclesLeft;
147e4ebfb57SPatrick Holland     }
148e4ebfb57SPatrick Holland     if (PrevInstWaitInfo.VsCnt) {
149e4ebfb57SPatrick Holland       CurrVscnt++;
150e4ebfb57SPatrick Holland       if ((unsigned)CyclesLeft < CyclesToWaitVs)
151e4ebfb57SPatrick Holland         CyclesToWaitVs = CyclesLeft;
152e4ebfb57SPatrick Holland     }
153e4ebfb57SPatrick Holland   }
154e4ebfb57SPatrick Holland 
155e4ebfb57SPatrick Holland   unsigned CyclesToWait = ~0U;
156e4ebfb57SPatrick Holland   if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
157e4ebfb57SPatrick Holland     CyclesToWait = CyclesToWaitVm;
158e4ebfb57SPatrick Holland   if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
159e4ebfb57SPatrick Holland     CyclesToWait = CyclesToWaitExp;
160e4ebfb57SPatrick Holland   if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
161e4ebfb57SPatrick Holland     CyclesToWait = CyclesToWaitLgkm;
162e4ebfb57SPatrick Holland   if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
163e4ebfb57SPatrick Holland     CyclesToWait = CyclesToWaitVs;
164e4ebfb57SPatrick Holland 
165e4ebfb57SPatrick Holland   // We may underestimate how many cycles we need to wait, but this
166e4ebfb57SPatrick Holland   // isn't a big deal. Our return value is just how many cycles until
167e4ebfb57SPatrick Holland   // this function gets run again. So as long as we don't overestimate
168e4ebfb57SPatrick Holland   // the wait time, we'll still end up stalling at this instruction
169e4ebfb57SPatrick Holland   // for the correct number of cycles.
170e4ebfb57SPatrick Holland 
171e4ebfb57SPatrick Holland   if (CyclesToWait == ~0U)
172e4ebfb57SPatrick Holland     return 0;
173e4ebfb57SPatrick Holland   return CyclesToWait;
174e4ebfb57SPatrick Holland }
175e4ebfb57SPatrick Holland 
176e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
177e4ebfb57SPatrick Holland                                            unsigned &Expcnt, unsigned &Lgkmcnt,
178e4ebfb57SPatrick Holland                                            unsigned &Vscnt) {
179e4ebfb57SPatrick Holland   AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
180e4ebfb57SPatrick Holland   const Instruction &Inst = *IR.getInstruction();
181e4ebfb57SPatrick Holland   unsigned Opcode = Inst.getOpcode();
182e4ebfb57SPatrick Holland 
183e4ebfb57SPatrick Holland   switch (Opcode) {
184e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
185e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
186e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VMCNT_gfx10:
187e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
188e4ebfb57SPatrick Holland     // Should probably be checking for nullptr
189e4ebfb57SPatrick Holland     // here, but I'm not sure how I should handle the case
190e4ebfb57SPatrick Holland     // where we see a nullptr.
191e4ebfb57SPatrick Holland     const MCAOperand *OpReg = Inst.getOperand(0);
192e4ebfb57SPatrick Holland     const MCAOperand *OpImm = Inst.getOperand(1);
193e4ebfb57SPatrick Holland     assert(OpReg && OpReg->isReg() && "First operand should be a register.");
194e4ebfb57SPatrick Holland     assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
195e4ebfb57SPatrick Holland     if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
196e4ebfb57SPatrick Holland       // Instruction is using a real register.
197e4ebfb57SPatrick Holland       // Since we can't know what value this register will have,
198e4ebfb57SPatrick Holland       // we can't compute what the value of this wait should be.
199e4ebfb57SPatrick Holland       WithColor::warning() << "The register component of "
200e4ebfb57SPatrick Holland                            << MCII.getName(Opcode) << " will be completely "
201e4ebfb57SPatrick Holland                            << "ignored. So the wait may not be accurate.\n";
202e4ebfb57SPatrick Holland     }
203e4ebfb57SPatrick Holland     switch (Opcode) {
204e4ebfb57SPatrick Holland     // Redundant switch so I don't have to repeat the code above
205e4ebfb57SPatrick Holland     // for each case. There are more clever ways to avoid this
206e4ebfb57SPatrick Holland     // extra switch and anyone can feel free to implement one of them.
207e4ebfb57SPatrick Holland     case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
208e4ebfb57SPatrick Holland       Expcnt = OpImm->getImm();
209e4ebfb57SPatrick Holland       break;
210e4ebfb57SPatrick Holland     case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
211e4ebfb57SPatrick Holland       Lgkmcnt = OpImm->getImm();
212e4ebfb57SPatrick Holland       break;
213e4ebfb57SPatrick Holland     case AMDGPU::S_WAITCNT_VMCNT_gfx10:
214e4ebfb57SPatrick Holland       Vmcnt = OpImm->getImm();
215e4ebfb57SPatrick Holland       break;
216e4ebfb57SPatrick Holland     case AMDGPU::S_WAITCNT_VSCNT_gfx10:
217e4ebfb57SPatrick Holland       Vscnt = OpImm->getImm();
218e4ebfb57SPatrick Holland       break;
219e4ebfb57SPatrick Holland     }
220e4ebfb57SPatrick Holland     return;
221e4ebfb57SPatrick Holland   }
222e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx10:
223e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_gfx6_gfx7:
224e4ebfb57SPatrick Holland   case AMDGPU::S_WAITCNT_vi:
225e4ebfb57SPatrick Holland     unsigned WaitCnt = Inst.getOperand(0)->getImm();
226e4ebfb57SPatrick Holland     AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
227e4ebfb57SPatrick Holland     return;
228e4ebfb57SPatrick Holland   }
229e4ebfb57SPatrick Holland }
230e4ebfb57SPatrick Holland 
231e4ebfb57SPatrick Holland void AMDGPUCustomBehaviour::generateWaitCntInfo() {
232e4ebfb57SPatrick Holland   // The core logic from this function is taken from
233e4ebfb57SPatrick Holland   // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
234e4ebfb57SPatrick Holland   // that are being looked at are in the MachineInstr format, whereas we have
235e4ebfb57SPatrick Holland   // access to the MCInst format. The side effects of this are that we can't use
236e4ebfb57SPatrick Holland   // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
237e4ebfb57SPatrick Holland   // functions. Therefore, we conservatively assume that these functions will
238e4ebfb57SPatrick Holland   // return true. This may cause a few instructions to be incorrectly tagged
239e4ebfb57SPatrick Holland   // with an extra CNT. However, these are instructions that do interact with at
240e4ebfb57SPatrick Holland   // least one CNT so giving them an extra CNT shouldn't cause issues in most
241e4ebfb57SPatrick Holland   // scenarios.
242e4ebfb57SPatrick Holland   AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
243e4ebfb57SPatrick Holland   InstrWaitCntInfo.resize(SrcMgr.size());
244e4ebfb57SPatrick Holland 
24597579dccSMin-Yih Hsu   for (const auto &EN : llvm::enumerate(SrcMgr.getInstructions())) {
24697579dccSMin-Yih Hsu     const std::unique_ptr<Instruction> &Inst = EN.value();
24797579dccSMin-Yih Hsu     unsigned Index = EN.index();
248e4ebfb57SPatrick Holland     unsigned Opcode = Inst->getOpcode();
249e4ebfb57SPatrick Holland     const MCInstrDesc &MCID = MCII.get(Opcode);
250e4ebfb57SPatrick Holland     if ((MCID.TSFlags & SIInstrFlags::DS) &&
251e4ebfb57SPatrick Holland         (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
252e4ebfb57SPatrick Holland       InstrWaitCntInfo[Index].LgkmCnt = true;
253e4ebfb57SPatrick Holland       if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
254e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].ExpCnt = true;
255e4ebfb57SPatrick Holland     } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
256e4ebfb57SPatrick Holland       // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
257e4ebfb57SPatrick Holland       // and mayAccessLDSThroughFlat(Inst) would both return true for this
258e4ebfb57SPatrick Holland       // instruction. We have to do this because those functions use
259e4ebfb57SPatrick Holland       // information about the memory operands that we don't have access to.
260e4ebfb57SPatrick Holland       InstrWaitCntInfo[Index].LgkmCnt = true;
261e4ebfb57SPatrick Holland       if (!STI.hasFeature(AMDGPU::FeatureVscnt))
262e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VmCnt = true;
263e4ebfb57SPatrick Holland       else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
264e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VmCnt = true;
265e4ebfb57SPatrick Holland       else
266e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VsCnt = true;
267e4ebfb57SPatrick Holland     } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
268e4ebfb57SPatrick Holland       if (!STI.hasFeature(AMDGPU::FeatureVscnt))
269e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VmCnt = true;
270e4ebfb57SPatrick Holland       else if ((MCID.mayLoad() &&
271e4ebfb57SPatrick Holland                 !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
272e4ebfb57SPatrick Holland                ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
273e4ebfb57SPatrick Holland                 !MCID.mayStore()))
274e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VmCnt = true;
275e4ebfb57SPatrick Holland       else if (MCID.mayStore())
276e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].VsCnt = true;
277e4ebfb57SPatrick Holland 
278e4ebfb57SPatrick Holland       // (IV.Major < 7) is meant to represent
279e4ebfb57SPatrick Holland       // GCNTarget.vmemWriteNeedsExpWaitcnt()
280e4ebfb57SPatrick Holland       // which is defined as
281e4ebfb57SPatrick Holland       // { return getGeneration() < SEA_ISLANDS; }
282e4ebfb57SPatrick Holland       if (IV.Major < 7 &&
283e4ebfb57SPatrick Holland           (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
284e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].ExpCnt = true;
285e4ebfb57SPatrick Holland     } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
286e4ebfb57SPatrick Holland       InstrWaitCntInfo[Index].LgkmCnt = true;
287e4ebfb57SPatrick Holland     } else if (MCID.TSFlags & SIInstrFlags::EXP) {
288e4ebfb57SPatrick Holland       InstrWaitCntInfo[Index].ExpCnt = true;
289e4ebfb57SPatrick Holland     } else {
290e4ebfb57SPatrick Holland       switch (Opcode) {
291e4ebfb57SPatrick Holland       case AMDGPU::S_SENDMSG:
292e4ebfb57SPatrick Holland       case AMDGPU::S_SENDMSGHALT:
293e4ebfb57SPatrick Holland       case AMDGPU::S_MEMTIME:
294e4ebfb57SPatrick Holland       case AMDGPU::S_MEMREALTIME:
295e4ebfb57SPatrick Holland         InstrWaitCntInfo[Index].LgkmCnt = true;
296e4ebfb57SPatrick Holland         break;
297e4ebfb57SPatrick Holland       }
298e4ebfb57SPatrick Holland     }
299e4ebfb57SPatrick Holland   }
300e4ebfb57SPatrick Holland }
301e4ebfb57SPatrick Holland 
302e4ebfb57SPatrick Holland // taken from SIInstrInfo::isVMEM()
303e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
304e4ebfb57SPatrick Holland   return MCID.TSFlags & SIInstrFlags::MUBUF ||
305e4ebfb57SPatrick Holland          MCID.TSFlags & SIInstrFlags::MTBUF ||
306e4ebfb57SPatrick Holland          MCID.TSFlags & SIInstrFlags::MIMG;
307e4ebfb57SPatrick Holland }
308e4ebfb57SPatrick Holland 
309e4ebfb57SPatrick Holland // taken from SIInstrInfo::hasModifiersSet()
310e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::hasModifiersSet(
311e4ebfb57SPatrick Holland     const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
312e4ebfb57SPatrick Holland   int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
313e4ebfb57SPatrick Holland   if (Idx == -1)
314e4ebfb57SPatrick Holland     return false;
315e4ebfb57SPatrick Holland 
316e4ebfb57SPatrick Holland   const MCAOperand *Op = Inst->getOperand(Idx);
317e4ebfb57SPatrick Holland   if (Op == nullptr || !Op->isImm() || !Op->getImm())
318e4ebfb57SPatrick Holland     return false;
319e4ebfb57SPatrick Holland 
320e4ebfb57SPatrick Holland   return true;
321e4ebfb57SPatrick Holland }
322e4ebfb57SPatrick Holland 
323e61ca232SJay Foad // taken from SIInstrInfo::isGWS()
324e61ca232SJay Foad bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const {
325e61ca232SJay Foad   const MCInstrDesc &MCID = MCII.get(Opcode);
326e61ca232SJay Foad   return MCID.TSFlags & SIInstrFlags::GWS;
327e61ca232SJay Foad }
328e61ca232SJay Foad 
329e4ebfb57SPatrick Holland // taken from SIInstrInfo::isAlwaysGDS()
330e4ebfb57SPatrick Holland bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
331e61ca232SJay Foad   return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode);
332dbed061bSPatrick Holland }
333dbed061bSPatrick Holland 
334*ff81bbedSJay Foad } // namespace llvm::mca
335dbed061bSPatrick Holland 
336dbed061bSPatrick Holland using namespace llvm;
337dbed061bSPatrick Holland using namespace mca;
338dbed061bSPatrick Holland 
339dbed061bSPatrick Holland static CustomBehaviour *
340dbed061bSPatrick Holland createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
341dbed061bSPatrick Holland                             const mca::SourceMgr &SrcMgr,
342dbed061bSPatrick Holland                             const MCInstrInfo &MCII) {
343dbed061bSPatrick Holland   return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
344dbed061bSPatrick Holland }
345dbed061bSPatrick Holland 
346dbed061bSPatrick Holland static InstrPostProcess *
347dbed061bSPatrick Holland createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
348dbed061bSPatrick Holland                              const MCInstrInfo &MCII) {
349dbed061bSPatrick Holland   return new AMDGPUInstrPostProcess(STI, MCII);
350dbed061bSPatrick Holland }
351dbed061bSPatrick Holland 
352dbed061bSPatrick Holland /// Extern function to initialize the targets for the AMDGPU backend
353dbed061bSPatrick Holland 
354dbed061bSPatrick Holland extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
355ee165cdbSIvan Kosarev   TargetRegistry::RegisterCustomBehaviour(getTheR600Target(),
356dbed061bSPatrick Holland                                           createAMDGPUCustomBehaviour);
357ee165cdbSIvan Kosarev   TargetRegistry::RegisterInstrPostProcess(getTheR600Target(),
358dbed061bSPatrick Holland                                            createAMDGPUInstrPostProcess);
359dbed061bSPatrick Holland 
360dbed061bSPatrick Holland   TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
361dbed061bSPatrick Holland                                           createAMDGPUCustomBehaviour);
362dbed061bSPatrick Holland   TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
363dbed061bSPatrick Holland                                            createAMDGPUInstrPostProcess);
364dbed061bSPatrick Holland }
365