xref: /minix3/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInsertWaits.cpp (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1f4a2713aSLionel Sambuc //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief Insert wait instructions for memory reads and writes.
12f4a2713aSLionel Sambuc ///
13f4a2713aSLionel Sambuc /// Memory reads and writes are issued asynchronously, so we need to insert
14f4a2713aSLionel Sambuc /// S_WAITCNT instructions when we want to access any of their results or
15f4a2713aSLionel Sambuc /// overwrite any register that's used asynchronously.
16f4a2713aSLionel Sambuc //
17f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
18f4a2713aSLionel Sambuc 
19f4a2713aSLionel Sambuc #include "AMDGPU.h"
20*0a6a1f1dSLionel Sambuc #include "AMDGPUSubtarget.h"
21*0a6a1f1dSLionel Sambuc #include "SIDefines.h"
22f4a2713aSLionel Sambuc #include "SIInstrInfo.h"
23f4a2713aSLionel Sambuc #include "SIMachineFunctionInfo.h"
24f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunction.h"
25f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunctionPass.h"
26f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineInstrBuilder.h"
27f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
28f4a2713aSLionel Sambuc 
29f4a2713aSLionel Sambuc using namespace llvm;
30f4a2713aSLionel Sambuc 
31f4a2713aSLionel Sambuc namespace {
32f4a2713aSLionel Sambuc 
33f4a2713aSLionel Sambuc /// \brief One variable for each of the hardware counters
34f4a2713aSLionel Sambuc typedef union {
35f4a2713aSLionel Sambuc   struct {
36f4a2713aSLionel Sambuc     unsigned VM;
37f4a2713aSLionel Sambuc     unsigned EXP;
38f4a2713aSLionel Sambuc     unsigned LGKM;
39f4a2713aSLionel Sambuc   } Named;
40f4a2713aSLionel Sambuc   unsigned Array[3];
41f4a2713aSLionel Sambuc 
42f4a2713aSLionel Sambuc } Counters;
43f4a2713aSLionel Sambuc 
44*0a6a1f1dSLionel Sambuc typedef enum {
45*0a6a1f1dSLionel Sambuc   OTHER,
46*0a6a1f1dSLionel Sambuc   SMEM,
47*0a6a1f1dSLionel Sambuc   VMEM
48*0a6a1f1dSLionel Sambuc } InstType;
49*0a6a1f1dSLionel Sambuc 
50f4a2713aSLionel Sambuc typedef Counters RegCounters[512];
51f4a2713aSLionel Sambuc typedef std::pair<unsigned, unsigned> RegInterval;
52f4a2713aSLionel Sambuc 
53f4a2713aSLionel Sambuc class SIInsertWaits : public MachineFunctionPass {
54f4a2713aSLionel Sambuc 
55f4a2713aSLionel Sambuc private:
56f4a2713aSLionel Sambuc   static char ID;
57f4a2713aSLionel Sambuc   const SIInstrInfo *TII;
58f4a2713aSLionel Sambuc   const SIRegisterInfo *TRI;
59f4a2713aSLionel Sambuc   const MachineRegisterInfo *MRI;
60f4a2713aSLionel Sambuc 
61f4a2713aSLionel Sambuc   /// \brief Constant hardware limits
62f4a2713aSLionel Sambuc   static const Counters WaitCounts;
63f4a2713aSLionel Sambuc 
64f4a2713aSLionel Sambuc   /// \brief Constant zero value
65f4a2713aSLionel Sambuc   static const Counters ZeroCounts;
66f4a2713aSLionel Sambuc 
67f4a2713aSLionel Sambuc   /// \brief Counter values we have already waited on.
68f4a2713aSLionel Sambuc   Counters WaitedOn;
69f4a2713aSLionel Sambuc 
70f4a2713aSLionel Sambuc   /// \brief Counter values for last instruction issued.
71f4a2713aSLionel Sambuc   Counters LastIssued;
72f4a2713aSLionel Sambuc 
73f4a2713aSLionel Sambuc   /// \brief Registers used by async instructions.
74f4a2713aSLionel Sambuc   RegCounters UsedRegs;
75f4a2713aSLionel Sambuc 
76f4a2713aSLionel Sambuc   /// \brief Registers defined by async instructions.
77f4a2713aSLionel Sambuc   RegCounters DefinedRegs;
78f4a2713aSLionel Sambuc 
79f4a2713aSLionel Sambuc   /// \brief Different export instruction types seen since last wait.
80f4a2713aSLionel Sambuc   unsigned ExpInstrTypesSeen;
81f4a2713aSLionel Sambuc 
82*0a6a1f1dSLionel Sambuc   /// \brief Type of the last opcode.
83*0a6a1f1dSLionel Sambuc   InstType LastOpcodeType;
84*0a6a1f1dSLionel Sambuc 
85*0a6a1f1dSLionel Sambuc   bool LastInstWritesM0;
86*0a6a1f1dSLionel Sambuc 
87f4a2713aSLionel Sambuc   /// \brief Get increment/decrement amount for this instruction.
88f4a2713aSLionel Sambuc   Counters getHwCounts(MachineInstr &MI);
89f4a2713aSLionel Sambuc 
90f4a2713aSLionel Sambuc   /// \brief Is operand relevant for async execution?
91f4a2713aSLionel Sambuc   bool isOpRelevant(MachineOperand &Op);
92f4a2713aSLionel Sambuc 
93f4a2713aSLionel Sambuc   /// \brief Get register interval an operand affects.
94f4a2713aSLionel Sambuc   RegInterval getRegInterval(MachineOperand &Op);
95f4a2713aSLionel Sambuc 
96f4a2713aSLionel Sambuc   /// \brief Handle instructions async components
97*0a6a1f1dSLionel Sambuc   void pushInstruction(MachineBasicBlock &MBB,
98*0a6a1f1dSLionel Sambuc                        MachineBasicBlock::iterator I);
99f4a2713aSLionel Sambuc 
100f4a2713aSLionel Sambuc   /// \brief Insert the actual wait instruction
101f4a2713aSLionel Sambuc   bool insertWait(MachineBasicBlock &MBB,
102f4a2713aSLionel Sambuc                   MachineBasicBlock::iterator I,
103f4a2713aSLionel Sambuc                   const Counters &Counts);
104f4a2713aSLionel Sambuc 
105f4a2713aSLionel Sambuc   /// \brief Do we need def2def checks?
106f4a2713aSLionel Sambuc   bool unorderedDefines(MachineInstr &MI);
107f4a2713aSLionel Sambuc 
108f4a2713aSLionel Sambuc   /// \brief Resolve all operand dependencies to counter requirements
109f4a2713aSLionel Sambuc   Counters handleOperands(MachineInstr &MI);
110f4a2713aSLionel Sambuc 
111*0a6a1f1dSLionel Sambuc   /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
112*0a6a1f1dSLionel Sambuc   void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
113*0a6a1f1dSLionel Sambuc 
114f4a2713aSLionel Sambuc public:
SIInsertWaits(TargetMachine & tm)115f4a2713aSLionel Sambuc   SIInsertWaits(TargetMachine &tm) :
116f4a2713aSLionel Sambuc     MachineFunctionPass(ID),
117*0a6a1f1dSLionel Sambuc     TII(nullptr),
118*0a6a1f1dSLionel Sambuc     TRI(nullptr),
119f4a2713aSLionel Sambuc     ExpInstrTypesSeen(0) { }
120f4a2713aSLionel Sambuc 
121*0a6a1f1dSLionel Sambuc   bool runOnMachineFunction(MachineFunction &MF) override;
122f4a2713aSLionel Sambuc 
getPassName() const123*0a6a1f1dSLionel Sambuc   const char *getPassName() const override {
124f4a2713aSLionel Sambuc     return "SI insert wait  instructions";
125f4a2713aSLionel Sambuc   }
126f4a2713aSLionel Sambuc 
127f4a2713aSLionel Sambuc };
128f4a2713aSLionel Sambuc 
129f4a2713aSLionel Sambuc } // End anonymous namespace
130f4a2713aSLionel Sambuc 
131f4a2713aSLionel Sambuc char SIInsertWaits::ID = 0;
132f4a2713aSLionel Sambuc 
133f4a2713aSLionel Sambuc const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
134f4a2713aSLionel Sambuc const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
135f4a2713aSLionel Sambuc 
createSIInsertWaits(TargetMachine & tm)136f4a2713aSLionel Sambuc FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
137f4a2713aSLionel Sambuc   return new SIInsertWaits(tm);
138f4a2713aSLionel Sambuc }
139f4a2713aSLionel Sambuc 
getHwCounts(MachineInstr & MI)140f4a2713aSLionel Sambuc Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
141f4a2713aSLionel Sambuc 
142f4a2713aSLionel Sambuc   uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
143f4a2713aSLionel Sambuc   Counters Result;
144f4a2713aSLionel Sambuc 
145f4a2713aSLionel Sambuc   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
146f4a2713aSLionel Sambuc 
147f4a2713aSLionel Sambuc   // Only consider stores or EXP for EXP_CNT
148f4a2713aSLionel Sambuc   Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
149f4a2713aSLionel Sambuc       (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
150f4a2713aSLionel Sambuc 
151f4a2713aSLionel Sambuc   // LGKM may uses larger values
152f4a2713aSLionel Sambuc   if (TSFlags & SIInstrFlags::LGKM_CNT) {
153f4a2713aSLionel Sambuc 
154f4a2713aSLionel Sambuc     if (TII->isSMRD(MI.getOpcode())) {
155f4a2713aSLionel Sambuc 
156f4a2713aSLionel Sambuc       MachineOperand &Op = MI.getOperand(0);
157f4a2713aSLionel Sambuc       assert(Op.isReg() && "First LGKM operand must be a register!");
158f4a2713aSLionel Sambuc 
159f4a2713aSLionel Sambuc       unsigned Reg = Op.getReg();
160f4a2713aSLionel Sambuc       unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
161f4a2713aSLionel Sambuc       Result.Named.LGKM = Size > 4 ? 2 : 1;
162f4a2713aSLionel Sambuc 
163f4a2713aSLionel Sambuc     } else {
164f4a2713aSLionel Sambuc       // DS
165f4a2713aSLionel Sambuc       Result.Named.LGKM = 1;
166f4a2713aSLionel Sambuc     }
167f4a2713aSLionel Sambuc 
168f4a2713aSLionel Sambuc   } else {
169f4a2713aSLionel Sambuc     Result.Named.LGKM = 0;
170f4a2713aSLionel Sambuc   }
171f4a2713aSLionel Sambuc 
172f4a2713aSLionel Sambuc   return Result;
173f4a2713aSLionel Sambuc }
174f4a2713aSLionel Sambuc 
isOpRelevant(MachineOperand & Op)175f4a2713aSLionel Sambuc bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
176f4a2713aSLionel Sambuc 
177f4a2713aSLionel Sambuc   // Constants are always irrelevant
178f4a2713aSLionel Sambuc   if (!Op.isReg())
179f4a2713aSLionel Sambuc     return false;
180f4a2713aSLionel Sambuc 
181f4a2713aSLionel Sambuc   // Defines are always relevant
182f4a2713aSLionel Sambuc   if (Op.isDef())
183f4a2713aSLionel Sambuc     return true;
184f4a2713aSLionel Sambuc 
185f4a2713aSLionel Sambuc   // For exports all registers are relevant
186f4a2713aSLionel Sambuc   MachineInstr &MI = *Op.getParent();
187f4a2713aSLionel Sambuc   if (MI.getOpcode() == AMDGPU::EXP)
188f4a2713aSLionel Sambuc     return true;
189f4a2713aSLionel Sambuc 
190f4a2713aSLionel Sambuc   // For stores the stored value is also relevant
191f4a2713aSLionel Sambuc   if (!MI.getDesc().mayStore())
192f4a2713aSLionel Sambuc     return false;
193f4a2713aSLionel Sambuc 
194*0a6a1f1dSLionel Sambuc   // Check if this operand is the value being stored.
195*0a6a1f1dSLionel Sambuc   // Special case for DS instructions, since the address
196*0a6a1f1dSLionel Sambuc   // operand comes before the value operand and it may have
197*0a6a1f1dSLionel Sambuc   // multiple data operands.
198*0a6a1f1dSLionel Sambuc 
199*0a6a1f1dSLionel Sambuc   if (TII->isDS(MI.getOpcode())) {
200*0a6a1f1dSLionel Sambuc     MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
201*0a6a1f1dSLionel Sambuc     if (Data && Op.isIdenticalTo(*Data))
202*0a6a1f1dSLionel Sambuc       return true;
203*0a6a1f1dSLionel Sambuc 
204*0a6a1f1dSLionel Sambuc     MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
205*0a6a1f1dSLionel Sambuc     if (Data0 && Op.isIdenticalTo(*Data0))
206*0a6a1f1dSLionel Sambuc       return true;
207*0a6a1f1dSLionel Sambuc 
208*0a6a1f1dSLionel Sambuc     MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
209*0a6a1f1dSLionel Sambuc     if (Data1 && Op.isIdenticalTo(*Data1))
210*0a6a1f1dSLionel Sambuc       return true;
211*0a6a1f1dSLionel Sambuc 
212*0a6a1f1dSLionel Sambuc     return false;
213*0a6a1f1dSLionel Sambuc   }
214*0a6a1f1dSLionel Sambuc 
215*0a6a1f1dSLionel Sambuc   // NOTE: This assumes that the value operand is before the
216*0a6a1f1dSLionel Sambuc   // address operand, and that there is only one value operand.
217f4a2713aSLionel Sambuc   for (MachineInstr::mop_iterator I = MI.operands_begin(),
218f4a2713aSLionel Sambuc        E = MI.operands_end(); I != E; ++I) {
219f4a2713aSLionel Sambuc 
220f4a2713aSLionel Sambuc     if (I->isReg() && I->isUse())
221f4a2713aSLionel Sambuc       return Op.isIdenticalTo(*I);
222f4a2713aSLionel Sambuc   }
223f4a2713aSLionel Sambuc 
224f4a2713aSLionel Sambuc   return false;
225f4a2713aSLionel Sambuc }
226f4a2713aSLionel Sambuc 
getRegInterval(MachineOperand & Op)227f4a2713aSLionel Sambuc RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
228f4a2713aSLionel Sambuc 
229f4a2713aSLionel Sambuc   if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
230f4a2713aSLionel Sambuc     return std::make_pair(0, 0);
231f4a2713aSLionel Sambuc 
232f4a2713aSLionel Sambuc   unsigned Reg = Op.getReg();
233f4a2713aSLionel Sambuc   unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
234f4a2713aSLionel Sambuc 
235f4a2713aSLionel Sambuc   assert(Size >= 4);
236f4a2713aSLionel Sambuc 
237f4a2713aSLionel Sambuc   RegInterval Result;
238f4a2713aSLionel Sambuc   Result.first = TRI->getEncodingValue(Reg);
239f4a2713aSLionel Sambuc   Result.second = Result.first + Size / 4;
240f4a2713aSLionel Sambuc 
241f4a2713aSLionel Sambuc   return Result;
242f4a2713aSLionel Sambuc }
243f4a2713aSLionel Sambuc 
pushInstruction(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)244*0a6a1f1dSLionel Sambuc void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
245*0a6a1f1dSLionel Sambuc                                     MachineBasicBlock::iterator I) {
246f4a2713aSLionel Sambuc 
247f4a2713aSLionel Sambuc   // Get the hardware counter increments and sum them up
248*0a6a1f1dSLionel Sambuc   Counters Increment = getHwCounts(*I);
249f4a2713aSLionel Sambuc   unsigned Sum = 0;
250f4a2713aSLionel Sambuc 
251f4a2713aSLionel Sambuc   for (unsigned i = 0; i < 3; ++i) {
252f4a2713aSLionel Sambuc     LastIssued.Array[i] += Increment.Array[i];
253f4a2713aSLionel Sambuc     Sum += Increment.Array[i];
254f4a2713aSLionel Sambuc   }
255f4a2713aSLionel Sambuc 
256f4a2713aSLionel Sambuc   // If we don't increase anything then that's it
257*0a6a1f1dSLionel Sambuc   if (Sum == 0) {
258*0a6a1f1dSLionel Sambuc     LastOpcodeType = OTHER;
259f4a2713aSLionel Sambuc     return;
260*0a6a1f1dSLionel Sambuc   }
261*0a6a1f1dSLionel Sambuc 
262*0a6a1f1dSLionel Sambuc   if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
263*0a6a1f1dSLionel Sambuc     // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
264*0a6a1f1dSLionel Sambuc     // or SMEM clause, respectively.
265*0a6a1f1dSLionel Sambuc     //
266*0a6a1f1dSLionel Sambuc     // The temporary workaround is to break the clauses with S_NOP.
267*0a6a1f1dSLionel Sambuc     //
268*0a6a1f1dSLionel Sambuc     // The proper solution would be to allocate registers such that all source
269*0a6a1f1dSLionel Sambuc     // and destination registers don't overlap, e.g. this is illegal:
270*0a6a1f1dSLionel Sambuc     //   r0 = load r2
271*0a6a1f1dSLionel Sambuc     //   r2 = load r0
272*0a6a1f1dSLionel Sambuc     if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
273*0a6a1f1dSLionel Sambuc         (LastOpcodeType == VMEM && Increment.Named.VM)) {
274*0a6a1f1dSLionel Sambuc       // Insert a NOP to break the clause.
275*0a6a1f1dSLionel Sambuc       BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
276*0a6a1f1dSLionel Sambuc           .addImm(0);
277*0a6a1f1dSLionel Sambuc       LastInstWritesM0 = false;
278*0a6a1f1dSLionel Sambuc     }
279*0a6a1f1dSLionel Sambuc 
280*0a6a1f1dSLionel Sambuc     if (TII->isSMRD(I->getOpcode()))
281*0a6a1f1dSLionel Sambuc       LastOpcodeType = SMEM;
282*0a6a1f1dSLionel Sambuc     else if (Increment.Named.VM)
283*0a6a1f1dSLionel Sambuc       LastOpcodeType = VMEM;
284*0a6a1f1dSLionel Sambuc   }
285f4a2713aSLionel Sambuc 
286f4a2713aSLionel Sambuc   // Remember which export instructions we have seen
287f4a2713aSLionel Sambuc   if (Increment.Named.EXP) {
288*0a6a1f1dSLionel Sambuc     ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
289f4a2713aSLionel Sambuc   }
290f4a2713aSLionel Sambuc 
291*0a6a1f1dSLionel Sambuc   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
292f4a2713aSLionel Sambuc 
293*0a6a1f1dSLionel Sambuc     MachineOperand &Op = I->getOperand(i);
294f4a2713aSLionel Sambuc     if (!isOpRelevant(Op))
295f4a2713aSLionel Sambuc       continue;
296f4a2713aSLionel Sambuc 
297f4a2713aSLionel Sambuc     RegInterval Interval = getRegInterval(Op);
298f4a2713aSLionel Sambuc     for (unsigned j = Interval.first; j < Interval.second; ++j) {
299f4a2713aSLionel Sambuc 
300f4a2713aSLionel Sambuc       // Remember which registers we define
301f4a2713aSLionel Sambuc       if (Op.isDef())
302f4a2713aSLionel Sambuc         DefinedRegs[j] = LastIssued;
303f4a2713aSLionel Sambuc 
304f4a2713aSLionel Sambuc       // and which one we are using
305f4a2713aSLionel Sambuc       if (Op.isUse())
306f4a2713aSLionel Sambuc         UsedRegs[j] = LastIssued;
307f4a2713aSLionel Sambuc     }
308f4a2713aSLionel Sambuc   }
309f4a2713aSLionel Sambuc }
310f4a2713aSLionel Sambuc 
insertWait(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Required)311f4a2713aSLionel Sambuc bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
312f4a2713aSLionel Sambuc                                MachineBasicBlock::iterator I,
313f4a2713aSLionel Sambuc                                const Counters &Required) {
314f4a2713aSLionel Sambuc 
315f4a2713aSLionel Sambuc   // End of program? No need to wait on anything
316f4a2713aSLionel Sambuc   if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
317f4a2713aSLionel Sambuc     return false;
318f4a2713aSLionel Sambuc 
319f4a2713aSLionel Sambuc   // Figure out if the async instructions execute in order
320f4a2713aSLionel Sambuc   bool Ordered[3];
321f4a2713aSLionel Sambuc 
322f4a2713aSLionel Sambuc   // VM_CNT is always ordered
323f4a2713aSLionel Sambuc   Ordered[0] = true;
324f4a2713aSLionel Sambuc 
325f4a2713aSLionel Sambuc   // EXP_CNT is unordered if we have both EXP & VM-writes
326f4a2713aSLionel Sambuc   Ordered[1] = ExpInstrTypesSeen == 3;
327f4a2713aSLionel Sambuc 
328f4a2713aSLionel Sambuc   // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
329f4a2713aSLionel Sambuc   Ordered[2] = false;
330f4a2713aSLionel Sambuc 
331f4a2713aSLionel Sambuc   // The values we are going to put into the S_WAITCNT instruction
332f4a2713aSLionel Sambuc   Counters Counts = WaitCounts;
333f4a2713aSLionel Sambuc 
334f4a2713aSLionel Sambuc   // Do we really need to wait?
335f4a2713aSLionel Sambuc   bool NeedWait = false;
336f4a2713aSLionel Sambuc 
337f4a2713aSLionel Sambuc   for (unsigned i = 0; i < 3; ++i) {
338f4a2713aSLionel Sambuc 
339f4a2713aSLionel Sambuc     if (Required.Array[i] <= WaitedOn.Array[i])
340f4a2713aSLionel Sambuc       continue;
341f4a2713aSLionel Sambuc 
342f4a2713aSLionel Sambuc     NeedWait = true;
343f4a2713aSLionel Sambuc 
344f4a2713aSLionel Sambuc     if (Ordered[i]) {
345f4a2713aSLionel Sambuc       unsigned Value = LastIssued.Array[i] - Required.Array[i];
346f4a2713aSLionel Sambuc 
347*0a6a1f1dSLionel Sambuc       // Adjust the value to the real hardware possibilities.
348f4a2713aSLionel Sambuc       Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
349f4a2713aSLionel Sambuc 
350f4a2713aSLionel Sambuc     } else
351f4a2713aSLionel Sambuc       Counts.Array[i] = 0;
352f4a2713aSLionel Sambuc 
353*0a6a1f1dSLionel Sambuc     // Remember on what we have waited on.
354f4a2713aSLionel Sambuc     WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
355f4a2713aSLionel Sambuc   }
356f4a2713aSLionel Sambuc 
357f4a2713aSLionel Sambuc   if (!NeedWait)
358f4a2713aSLionel Sambuc     return false;
359f4a2713aSLionel Sambuc 
360f4a2713aSLionel Sambuc   // Reset EXP_CNT instruction types
361f4a2713aSLionel Sambuc   if (Counts.Named.EXP == 0)
362f4a2713aSLionel Sambuc     ExpInstrTypesSeen = 0;
363f4a2713aSLionel Sambuc 
364f4a2713aSLionel Sambuc   // Build the wait instruction
365f4a2713aSLionel Sambuc   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
366f4a2713aSLionel Sambuc           .addImm((Counts.Named.VM & 0xF) |
367f4a2713aSLionel Sambuc                   ((Counts.Named.EXP & 0x7) << 4) |
368f4a2713aSLionel Sambuc                   ((Counts.Named.LGKM & 0x7) << 8));
369f4a2713aSLionel Sambuc 
370*0a6a1f1dSLionel Sambuc   LastOpcodeType = OTHER;
371*0a6a1f1dSLionel Sambuc   LastInstWritesM0 = false;
372f4a2713aSLionel Sambuc   return true;
373f4a2713aSLionel Sambuc }
374f4a2713aSLionel Sambuc 
375f4a2713aSLionel Sambuc /// \brief helper function for handleOperands
increaseCounters(Counters & Dst,const Counters & Src)376f4a2713aSLionel Sambuc static void increaseCounters(Counters &Dst, const Counters &Src) {
377f4a2713aSLionel Sambuc 
378f4a2713aSLionel Sambuc   for (unsigned i = 0; i < 3; ++i)
379f4a2713aSLionel Sambuc     Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
380f4a2713aSLionel Sambuc }
381f4a2713aSLionel Sambuc 
handleOperands(MachineInstr & MI)382f4a2713aSLionel Sambuc Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
383f4a2713aSLionel Sambuc 
384f4a2713aSLionel Sambuc   Counters Result = ZeroCounts;
385f4a2713aSLionel Sambuc 
386*0a6a1f1dSLionel Sambuc   // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
387*0a6a1f1dSLionel Sambuc   // but we also want to wait for any other outstanding transfers before
388*0a6a1f1dSLionel Sambuc   // signalling other hardware blocks
389*0a6a1f1dSLionel Sambuc   if (MI.getOpcode() == AMDGPU::S_SENDMSG)
390*0a6a1f1dSLionel Sambuc     return LastIssued;
391*0a6a1f1dSLionel Sambuc 
392f4a2713aSLionel Sambuc   // For each register affected by this
393f4a2713aSLionel Sambuc   // instruction increase the result sequence
394f4a2713aSLionel Sambuc   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
395f4a2713aSLionel Sambuc 
396f4a2713aSLionel Sambuc     MachineOperand &Op = MI.getOperand(i);
397f4a2713aSLionel Sambuc     RegInterval Interval = getRegInterval(Op);
398f4a2713aSLionel Sambuc     for (unsigned j = Interval.first; j < Interval.second; ++j) {
399f4a2713aSLionel Sambuc 
400f4a2713aSLionel Sambuc       if (Op.isDef()) {
401f4a2713aSLionel Sambuc         increaseCounters(Result, UsedRegs[j]);
402f4a2713aSLionel Sambuc         increaseCounters(Result, DefinedRegs[j]);
403f4a2713aSLionel Sambuc       }
404f4a2713aSLionel Sambuc 
405f4a2713aSLionel Sambuc       if (Op.isUse())
406f4a2713aSLionel Sambuc         increaseCounters(Result, DefinedRegs[j]);
407f4a2713aSLionel Sambuc     }
408f4a2713aSLionel Sambuc   }
409f4a2713aSLionel Sambuc 
410f4a2713aSLionel Sambuc   return Result;
411f4a2713aSLionel Sambuc }
412f4a2713aSLionel Sambuc 
handleSendMsg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)413*0a6a1f1dSLionel Sambuc void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
414*0a6a1f1dSLionel Sambuc                                   MachineBasicBlock::iterator I) {
415*0a6a1f1dSLionel Sambuc   if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
416*0a6a1f1dSLionel Sambuc     return;
417*0a6a1f1dSLionel Sambuc 
418*0a6a1f1dSLionel Sambuc   // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
419*0a6a1f1dSLionel Sambuc   if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
420*0a6a1f1dSLionel Sambuc     BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
421*0a6a1f1dSLionel Sambuc     LastInstWritesM0 = false;
422*0a6a1f1dSLionel Sambuc     return;
423*0a6a1f1dSLionel Sambuc   }
424*0a6a1f1dSLionel Sambuc 
425*0a6a1f1dSLionel Sambuc   // Set whether this instruction sets M0
426*0a6a1f1dSLionel Sambuc   LastInstWritesM0 = false;
427*0a6a1f1dSLionel Sambuc 
428*0a6a1f1dSLionel Sambuc   unsigned NumOperands = I->getNumOperands();
429*0a6a1f1dSLionel Sambuc   for (unsigned i = 0; i < NumOperands; i++) {
430*0a6a1f1dSLionel Sambuc     const MachineOperand &Op = I->getOperand(i);
431*0a6a1f1dSLionel Sambuc 
432*0a6a1f1dSLionel Sambuc     if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
433*0a6a1f1dSLionel Sambuc       LastInstWritesM0 = true;
434*0a6a1f1dSLionel Sambuc   }
435*0a6a1f1dSLionel Sambuc }
436*0a6a1f1dSLionel Sambuc 
437*0a6a1f1dSLionel Sambuc // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
438*0a6a1f1dSLionel Sambuc // around other non-memory instructions.
runOnMachineFunction(MachineFunction & MF)439f4a2713aSLionel Sambuc bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
440f4a2713aSLionel Sambuc   bool Changes = false;
441f4a2713aSLionel Sambuc 
442*0a6a1f1dSLionel Sambuc   TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
443*0a6a1f1dSLionel Sambuc   TRI =
444*0a6a1f1dSLionel Sambuc       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
445f4a2713aSLionel Sambuc 
446f4a2713aSLionel Sambuc   MRI = &MF.getRegInfo();
447f4a2713aSLionel Sambuc 
448f4a2713aSLionel Sambuc   WaitedOn = ZeroCounts;
449f4a2713aSLionel Sambuc   LastIssued = ZeroCounts;
450*0a6a1f1dSLionel Sambuc   LastOpcodeType = OTHER;
451*0a6a1f1dSLionel Sambuc   LastInstWritesM0 = false;
452f4a2713aSLionel Sambuc 
453f4a2713aSLionel Sambuc   memset(&UsedRegs, 0, sizeof(UsedRegs));
454f4a2713aSLionel Sambuc   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
455f4a2713aSLionel Sambuc 
456f4a2713aSLionel Sambuc   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
457f4a2713aSLionel Sambuc        BI != BE; ++BI) {
458f4a2713aSLionel Sambuc 
459f4a2713aSLionel Sambuc     MachineBasicBlock &MBB = *BI;
460f4a2713aSLionel Sambuc     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
461f4a2713aSLionel Sambuc          I != E; ++I) {
462f4a2713aSLionel Sambuc 
463*0a6a1f1dSLionel Sambuc       // Wait for everything before a barrier.
464*0a6a1f1dSLionel Sambuc       if (I->getOpcode() == AMDGPU::S_BARRIER)
465*0a6a1f1dSLionel Sambuc         Changes |= insertWait(MBB, I, LastIssued);
466*0a6a1f1dSLionel Sambuc       else
467f4a2713aSLionel Sambuc         Changes |= insertWait(MBB, I, handleOperands(*I));
468*0a6a1f1dSLionel Sambuc 
469*0a6a1f1dSLionel Sambuc       pushInstruction(MBB, I);
470*0a6a1f1dSLionel Sambuc       handleSendMsg(MBB, I);
471f4a2713aSLionel Sambuc     }
472f4a2713aSLionel Sambuc 
473f4a2713aSLionel Sambuc     // Wait for everything at the end of the MBB
474f4a2713aSLionel Sambuc     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
475f4a2713aSLionel Sambuc   }
476f4a2713aSLionel Sambuc 
477f4a2713aSLionel Sambuc   return Changes;
478f4a2713aSLionel Sambuc }
479