1f4a2713aSLionel Sambuc //===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc // The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc /// \file
11f4a2713aSLionel Sambuc /// \brief Insert wait instructions for memory reads and writes.
12f4a2713aSLionel Sambuc ///
13f4a2713aSLionel Sambuc /// Memory reads and writes are issued asynchronously, so we need to insert
14f4a2713aSLionel Sambuc /// S_WAITCNT instructions when we want to access any of their results or
15f4a2713aSLionel Sambuc /// overwrite any register that's used asynchronously.
16f4a2713aSLionel Sambuc //
17f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
18f4a2713aSLionel Sambuc
19f4a2713aSLionel Sambuc #include "AMDGPU.h"
20*0a6a1f1dSLionel Sambuc #include "AMDGPUSubtarget.h"
21*0a6a1f1dSLionel Sambuc #include "SIDefines.h"
22f4a2713aSLionel Sambuc #include "SIInstrInfo.h"
23f4a2713aSLionel Sambuc #include "SIMachineFunctionInfo.h"
24f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunction.h"
25f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineFunctionPass.h"
26f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineInstrBuilder.h"
27f4a2713aSLionel Sambuc #include "llvm/CodeGen/MachineRegisterInfo.h"
28f4a2713aSLionel Sambuc
29f4a2713aSLionel Sambuc using namespace llvm;
30f4a2713aSLionel Sambuc
31f4a2713aSLionel Sambuc namespace {
32f4a2713aSLionel Sambuc
33f4a2713aSLionel Sambuc /// \brief One variable for each of the hardware counters
34f4a2713aSLionel Sambuc typedef union {
35f4a2713aSLionel Sambuc struct {
36f4a2713aSLionel Sambuc unsigned VM;
37f4a2713aSLionel Sambuc unsigned EXP;
38f4a2713aSLionel Sambuc unsigned LGKM;
39f4a2713aSLionel Sambuc } Named;
40f4a2713aSLionel Sambuc unsigned Array[3];
41f4a2713aSLionel Sambuc
42f4a2713aSLionel Sambuc } Counters;
43f4a2713aSLionel Sambuc
44*0a6a1f1dSLionel Sambuc typedef enum {
45*0a6a1f1dSLionel Sambuc OTHER,
46*0a6a1f1dSLionel Sambuc SMEM,
47*0a6a1f1dSLionel Sambuc VMEM
48*0a6a1f1dSLionel Sambuc } InstType;
49*0a6a1f1dSLionel Sambuc
50f4a2713aSLionel Sambuc typedef Counters RegCounters[512];
51f4a2713aSLionel Sambuc typedef std::pair<unsigned, unsigned> RegInterval;
52f4a2713aSLionel Sambuc
53f4a2713aSLionel Sambuc class SIInsertWaits : public MachineFunctionPass {
54f4a2713aSLionel Sambuc
55f4a2713aSLionel Sambuc private:
56f4a2713aSLionel Sambuc static char ID;
57f4a2713aSLionel Sambuc const SIInstrInfo *TII;
58f4a2713aSLionel Sambuc const SIRegisterInfo *TRI;
59f4a2713aSLionel Sambuc const MachineRegisterInfo *MRI;
60f4a2713aSLionel Sambuc
61f4a2713aSLionel Sambuc /// \brief Constant hardware limits
62f4a2713aSLionel Sambuc static const Counters WaitCounts;
63f4a2713aSLionel Sambuc
64f4a2713aSLionel Sambuc /// \brief Constant zero value
65f4a2713aSLionel Sambuc static const Counters ZeroCounts;
66f4a2713aSLionel Sambuc
67f4a2713aSLionel Sambuc /// \brief Counter values we have already waited on.
68f4a2713aSLionel Sambuc Counters WaitedOn;
69f4a2713aSLionel Sambuc
70f4a2713aSLionel Sambuc /// \brief Counter values for last instruction issued.
71f4a2713aSLionel Sambuc Counters LastIssued;
72f4a2713aSLionel Sambuc
73f4a2713aSLionel Sambuc /// \brief Registers used by async instructions.
74f4a2713aSLionel Sambuc RegCounters UsedRegs;
75f4a2713aSLionel Sambuc
76f4a2713aSLionel Sambuc /// \brief Registers defined by async instructions.
77f4a2713aSLionel Sambuc RegCounters DefinedRegs;
78f4a2713aSLionel Sambuc
79f4a2713aSLionel Sambuc /// \brief Different export instruction types seen since last wait.
80f4a2713aSLionel Sambuc unsigned ExpInstrTypesSeen;
81f4a2713aSLionel Sambuc
82*0a6a1f1dSLionel Sambuc /// \brief Type of the last opcode.
83*0a6a1f1dSLionel Sambuc InstType LastOpcodeType;
84*0a6a1f1dSLionel Sambuc
85*0a6a1f1dSLionel Sambuc bool LastInstWritesM0;
86*0a6a1f1dSLionel Sambuc
87f4a2713aSLionel Sambuc /// \brief Get increment/decrement amount for this instruction.
88f4a2713aSLionel Sambuc Counters getHwCounts(MachineInstr &MI);
89f4a2713aSLionel Sambuc
90f4a2713aSLionel Sambuc /// \brief Is operand relevant for async execution?
91f4a2713aSLionel Sambuc bool isOpRelevant(MachineOperand &Op);
92f4a2713aSLionel Sambuc
93f4a2713aSLionel Sambuc /// \brief Get register interval an operand affects.
94f4a2713aSLionel Sambuc RegInterval getRegInterval(MachineOperand &Op);
95f4a2713aSLionel Sambuc
96f4a2713aSLionel Sambuc /// \brief Handle instructions async components
97*0a6a1f1dSLionel Sambuc void pushInstruction(MachineBasicBlock &MBB,
98*0a6a1f1dSLionel Sambuc MachineBasicBlock::iterator I);
99f4a2713aSLionel Sambuc
100f4a2713aSLionel Sambuc /// \brief Insert the actual wait instruction
101f4a2713aSLionel Sambuc bool insertWait(MachineBasicBlock &MBB,
102f4a2713aSLionel Sambuc MachineBasicBlock::iterator I,
103f4a2713aSLionel Sambuc const Counters &Counts);
104f4a2713aSLionel Sambuc
105f4a2713aSLionel Sambuc /// \brief Do we need def2def checks?
106f4a2713aSLionel Sambuc bool unorderedDefines(MachineInstr &MI);
107f4a2713aSLionel Sambuc
108f4a2713aSLionel Sambuc /// \brief Resolve all operand dependencies to counter requirements
109f4a2713aSLionel Sambuc Counters handleOperands(MachineInstr &MI);
110f4a2713aSLionel Sambuc
111*0a6a1f1dSLionel Sambuc /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
112*0a6a1f1dSLionel Sambuc void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
113*0a6a1f1dSLionel Sambuc
114f4a2713aSLionel Sambuc public:
SIInsertWaits(TargetMachine & tm)115f4a2713aSLionel Sambuc SIInsertWaits(TargetMachine &tm) :
116f4a2713aSLionel Sambuc MachineFunctionPass(ID),
117*0a6a1f1dSLionel Sambuc TII(nullptr),
118*0a6a1f1dSLionel Sambuc TRI(nullptr),
119f4a2713aSLionel Sambuc ExpInstrTypesSeen(0) { }
120f4a2713aSLionel Sambuc
121*0a6a1f1dSLionel Sambuc bool runOnMachineFunction(MachineFunction &MF) override;
122f4a2713aSLionel Sambuc
getPassName() const123*0a6a1f1dSLionel Sambuc const char *getPassName() const override {
124f4a2713aSLionel Sambuc return "SI insert wait instructions";
125f4a2713aSLionel Sambuc }
126f4a2713aSLionel Sambuc
127f4a2713aSLionel Sambuc };
128f4a2713aSLionel Sambuc
129f4a2713aSLionel Sambuc } // End anonymous namespace
130f4a2713aSLionel Sambuc
131f4a2713aSLionel Sambuc char SIInsertWaits::ID = 0;
132f4a2713aSLionel Sambuc
133f4a2713aSLionel Sambuc const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
134f4a2713aSLionel Sambuc const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
135f4a2713aSLionel Sambuc
createSIInsertWaits(TargetMachine & tm)136f4a2713aSLionel Sambuc FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
137f4a2713aSLionel Sambuc return new SIInsertWaits(tm);
138f4a2713aSLionel Sambuc }
139f4a2713aSLionel Sambuc
getHwCounts(MachineInstr & MI)140f4a2713aSLionel Sambuc Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
141f4a2713aSLionel Sambuc
142f4a2713aSLionel Sambuc uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
143f4a2713aSLionel Sambuc Counters Result;
144f4a2713aSLionel Sambuc
145f4a2713aSLionel Sambuc Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
146f4a2713aSLionel Sambuc
147f4a2713aSLionel Sambuc // Only consider stores or EXP for EXP_CNT
148f4a2713aSLionel Sambuc Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
149f4a2713aSLionel Sambuc (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
150f4a2713aSLionel Sambuc
151f4a2713aSLionel Sambuc // LGKM may uses larger values
152f4a2713aSLionel Sambuc if (TSFlags & SIInstrFlags::LGKM_CNT) {
153f4a2713aSLionel Sambuc
154f4a2713aSLionel Sambuc if (TII->isSMRD(MI.getOpcode())) {
155f4a2713aSLionel Sambuc
156f4a2713aSLionel Sambuc MachineOperand &Op = MI.getOperand(0);
157f4a2713aSLionel Sambuc assert(Op.isReg() && "First LGKM operand must be a register!");
158f4a2713aSLionel Sambuc
159f4a2713aSLionel Sambuc unsigned Reg = Op.getReg();
160f4a2713aSLionel Sambuc unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
161f4a2713aSLionel Sambuc Result.Named.LGKM = Size > 4 ? 2 : 1;
162f4a2713aSLionel Sambuc
163f4a2713aSLionel Sambuc } else {
164f4a2713aSLionel Sambuc // DS
165f4a2713aSLionel Sambuc Result.Named.LGKM = 1;
166f4a2713aSLionel Sambuc }
167f4a2713aSLionel Sambuc
168f4a2713aSLionel Sambuc } else {
169f4a2713aSLionel Sambuc Result.Named.LGKM = 0;
170f4a2713aSLionel Sambuc }
171f4a2713aSLionel Sambuc
172f4a2713aSLionel Sambuc return Result;
173f4a2713aSLionel Sambuc }
174f4a2713aSLionel Sambuc
isOpRelevant(MachineOperand & Op)175f4a2713aSLionel Sambuc bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
176f4a2713aSLionel Sambuc
177f4a2713aSLionel Sambuc // Constants are always irrelevant
178f4a2713aSLionel Sambuc if (!Op.isReg())
179f4a2713aSLionel Sambuc return false;
180f4a2713aSLionel Sambuc
181f4a2713aSLionel Sambuc // Defines are always relevant
182f4a2713aSLionel Sambuc if (Op.isDef())
183f4a2713aSLionel Sambuc return true;
184f4a2713aSLionel Sambuc
185f4a2713aSLionel Sambuc // For exports all registers are relevant
186f4a2713aSLionel Sambuc MachineInstr &MI = *Op.getParent();
187f4a2713aSLionel Sambuc if (MI.getOpcode() == AMDGPU::EXP)
188f4a2713aSLionel Sambuc return true;
189f4a2713aSLionel Sambuc
190f4a2713aSLionel Sambuc // For stores the stored value is also relevant
191f4a2713aSLionel Sambuc if (!MI.getDesc().mayStore())
192f4a2713aSLionel Sambuc return false;
193f4a2713aSLionel Sambuc
194*0a6a1f1dSLionel Sambuc // Check if this operand is the value being stored.
195*0a6a1f1dSLionel Sambuc // Special case for DS instructions, since the address
196*0a6a1f1dSLionel Sambuc // operand comes before the value operand and it may have
197*0a6a1f1dSLionel Sambuc // multiple data operands.
198*0a6a1f1dSLionel Sambuc
199*0a6a1f1dSLionel Sambuc if (TII->isDS(MI.getOpcode())) {
200*0a6a1f1dSLionel Sambuc MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
201*0a6a1f1dSLionel Sambuc if (Data && Op.isIdenticalTo(*Data))
202*0a6a1f1dSLionel Sambuc return true;
203*0a6a1f1dSLionel Sambuc
204*0a6a1f1dSLionel Sambuc MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
205*0a6a1f1dSLionel Sambuc if (Data0 && Op.isIdenticalTo(*Data0))
206*0a6a1f1dSLionel Sambuc return true;
207*0a6a1f1dSLionel Sambuc
208*0a6a1f1dSLionel Sambuc MachineOperand *Data1 = TII->getNamedOperand(MI, AMDGPU::OpName::data1);
209*0a6a1f1dSLionel Sambuc if (Data1 && Op.isIdenticalTo(*Data1))
210*0a6a1f1dSLionel Sambuc return true;
211*0a6a1f1dSLionel Sambuc
212*0a6a1f1dSLionel Sambuc return false;
213*0a6a1f1dSLionel Sambuc }
214*0a6a1f1dSLionel Sambuc
215*0a6a1f1dSLionel Sambuc // NOTE: This assumes that the value operand is before the
216*0a6a1f1dSLionel Sambuc // address operand, and that there is only one value operand.
217f4a2713aSLionel Sambuc for (MachineInstr::mop_iterator I = MI.operands_begin(),
218f4a2713aSLionel Sambuc E = MI.operands_end(); I != E; ++I) {
219f4a2713aSLionel Sambuc
220f4a2713aSLionel Sambuc if (I->isReg() && I->isUse())
221f4a2713aSLionel Sambuc return Op.isIdenticalTo(*I);
222f4a2713aSLionel Sambuc }
223f4a2713aSLionel Sambuc
224f4a2713aSLionel Sambuc return false;
225f4a2713aSLionel Sambuc }
226f4a2713aSLionel Sambuc
getRegInterval(MachineOperand & Op)227f4a2713aSLionel Sambuc RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
228f4a2713aSLionel Sambuc
229f4a2713aSLionel Sambuc if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
230f4a2713aSLionel Sambuc return std::make_pair(0, 0);
231f4a2713aSLionel Sambuc
232f4a2713aSLionel Sambuc unsigned Reg = Op.getReg();
233f4a2713aSLionel Sambuc unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
234f4a2713aSLionel Sambuc
235f4a2713aSLionel Sambuc assert(Size >= 4);
236f4a2713aSLionel Sambuc
237f4a2713aSLionel Sambuc RegInterval Result;
238f4a2713aSLionel Sambuc Result.first = TRI->getEncodingValue(Reg);
239f4a2713aSLionel Sambuc Result.second = Result.first + Size / 4;
240f4a2713aSLionel Sambuc
241f4a2713aSLionel Sambuc return Result;
242f4a2713aSLionel Sambuc }
243f4a2713aSLionel Sambuc
pushInstruction(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)244*0a6a1f1dSLionel Sambuc void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
245*0a6a1f1dSLionel Sambuc MachineBasicBlock::iterator I) {
246f4a2713aSLionel Sambuc
247f4a2713aSLionel Sambuc // Get the hardware counter increments and sum them up
248*0a6a1f1dSLionel Sambuc Counters Increment = getHwCounts(*I);
249f4a2713aSLionel Sambuc unsigned Sum = 0;
250f4a2713aSLionel Sambuc
251f4a2713aSLionel Sambuc for (unsigned i = 0; i < 3; ++i) {
252f4a2713aSLionel Sambuc LastIssued.Array[i] += Increment.Array[i];
253f4a2713aSLionel Sambuc Sum += Increment.Array[i];
254f4a2713aSLionel Sambuc }
255f4a2713aSLionel Sambuc
256f4a2713aSLionel Sambuc // If we don't increase anything then that's it
257*0a6a1f1dSLionel Sambuc if (Sum == 0) {
258*0a6a1f1dSLionel Sambuc LastOpcodeType = OTHER;
259f4a2713aSLionel Sambuc return;
260*0a6a1f1dSLionel Sambuc }
261*0a6a1f1dSLionel Sambuc
262*0a6a1f1dSLionel Sambuc if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
263*0a6a1f1dSLionel Sambuc // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
264*0a6a1f1dSLionel Sambuc // or SMEM clause, respectively.
265*0a6a1f1dSLionel Sambuc //
266*0a6a1f1dSLionel Sambuc // The temporary workaround is to break the clauses with S_NOP.
267*0a6a1f1dSLionel Sambuc //
268*0a6a1f1dSLionel Sambuc // The proper solution would be to allocate registers such that all source
269*0a6a1f1dSLionel Sambuc // and destination registers don't overlap, e.g. this is illegal:
270*0a6a1f1dSLionel Sambuc // r0 = load r2
271*0a6a1f1dSLionel Sambuc // r2 = load r0
272*0a6a1f1dSLionel Sambuc if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
273*0a6a1f1dSLionel Sambuc (LastOpcodeType == VMEM && Increment.Named.VM)) {
274*0a6a1f1dSLionel Sambuc // Insert a NOP to break the clause.
275*0a6a1f1dSLionel Sambuc BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
276*0a6a1f1dSLionel Sambuc .addImm(0);
277*0a6a1f1dSLionel Sambuc LastInstWritesM0 = false;
278*0a6a1f1dSLionel Sambuc }
279*0a6a1f1dSLionel Sambuc
280*0a6a1f1dSLionel Sambuc if (TII->isSMRD(I->getOpcode()))
281*0a6a1f1dSLionel Sambuc LastOpcodeType = SMEM;
282*0a6a1f1dSLionel Sambuc else if (Increment.Named.VM)
283*0a6a1f1dSLionel Sambuc LastOpcodeType = VMEM;
284*0a6a1f1dSLionel Sambuc }
285f4a2713aSLionel Sambuc
286f4a2713aSLionel Sambuc // Remember which export instructions we have seen
287f4a2713aSLionel Sambuc if (Increment.Named.EXP) {
288*0a6a1f1dSLionel Sambuc ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
289f4a2713aSLionel Sambuc }
290f4a2713aSLionel Sambuc
291*0a6a1f1dSLionel Sambuc for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
292f4a2713aSLionel Sambuc
293*0a6a1f1dSLionel Sambuc MachineOperand &Op = I->getOperand(i);
294f4a2713aSLionel Sambuc if (!isOpRelevant(Op))
295f4a2713aSLionel Sambuc continue;
296f4a2713aSLionel Sambuc
297f4a2713aSLionel Sambuc RegInterval Interval = getRegInterval(Op);
298f4a2713aSLionel Sambuc for (unsigned j = Interval.first; j < Interval.second; ++j) {
299f4a2713aSLionel Sambuc
300f4a2713aSLionel Sambuc // Remember which registers we define
301f4a2713aSLionel Sambuc if (Op.isDef())
302f4a2713aSLionel Sambuc DefinedRegs[j] = LastIssued;
303f4a2713aSLionel Sambuc
304f4a2713aSLionel Sambuc // and which one we are using
305f4a2713aSLionel Sambuc if (Op.isUse())
306f4a2713aSLionel Sambuc UsedRegs[j] = LastIssued;
307f4a2713aSLionel Sambuc }
308f4a2713aSLionel Sambuc }
309f4a2713aSLionel Sambuc }
310f4a2713aSLionel Sambuc
insertWait(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const Counters & Required)311f4a2713aSLionel Sambuc bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
312f4a2713aSLionel Sambuc MachineBasicBlock::iterator I,
313f4a2713aSLionel Sambuc const Counters &Required) {
314f4a2713aSLionel Sambuc
315f4a2713aSLionel Sambuc // End of program? No need to wait on anything
316f4a2713aSLionel Sambuc if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
317f4a2713aSLionel Sambuc return false;
318f4a2713aSLionel Sambuc
319f4a2713aSLionel Sambuc // Figure out if the async instructions execute in order
320f4a2713aSLionel Sambuc bool Ordered[3];
321f4a2713aSLionel Sambuc
322f4a2713aSLionel Sambuc // VM_CNT is always ordered
323f4a2713aSLionel Sambuc Ordered[0] = true;
324f4a2713aSLionel Sambuc
325f4a2713aSLionel Sambuc // EXP_CNT is unordered if we have both EXP & VM-writes
326f4a2713aSLionel Sambuc Ordered[1] = ExpInstrTypesSeen == 3;
327f4a2713aSLionel Sambuc
328f4a2713aSLionel Sambuc // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
329f4a2713aSLionel Sambuc Ordered[2] = false;
330f4a2713aSLionel Sambuc
331f4a2713aSLionel Sambuc // The values we are going to put into the S_WAITCNT instruction
332f4a2713aSLionel Sambuc Counters Counts = WaitCounts;
333f4a2713aSLionel Sambuc
334f4a2713aSLionel Sambuc // Do we really need to wait?
335f4a2713aSLionel Sambuc bool NeedWait = false;
336f4a2713aSLionel Sambuc
337f4a2713aSLionel Sambuc for (unsigned i = 0; i < 3; ++i) {
338f4a2713aSLionel Sambuc
339f4a2713aSLionel Sambuc if (Required.Array[i] <= WaitedOn.Array[i])
340f4a2713aSLionel Sambuc continue;
341f4a2713aSLionel Sambuc
342f4a2713aSLionel Sambuc NeedWait = true;
343f4a2713aSLionel Sambuc
344f4a2713aSLionel Sambuc if (Ordered[i]) {
345f4a2713aSLionel Sambuc unsigned Value = LastIssued.Array[i] - Required.Array[i];
346f4a2713aSLionel Sambuc
347*0a6a1f1dSLionel Sambuc // Adjust the value to the real hardware possibilities.
348f4a2713aSLionel Sambuc Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
349f4a2713aSLionel Sambuc
350f4a2713aSLionel Sambuc } else
351f4a2713aSLionel Sambuc Counts.Array[i] = 0;
352f4a2713aSLionel Sambuc
353*0a6a1f1dSLionel Sambuc // Remember on what we have waited on.
354f4a2713aSLionel Sambuc WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
355f4a2713aSLionel Sambuc }
356f4a2713aSLionel Sambuc
357f4a2713aSLionel Sambuc if (!NeedWait)
358f4a2713aSLionel Sambuc return false;
359f4a2713aSLionel Sambuc
360f4a2713aSLionel Sambuc // Reset EXP_CNT instruction types
361f4a2713aSLionel Sambuc if (Counts.Named.EXP == 0)
362f4a2713aSLionel Sambuc ExpInstrTypesSeen = 0;
363f4a2713aSLionel Sambuc
364f4a2713aSLionel Sambuc // Build the wait instruction
365f4a2713aSLionel Sambuc BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
366f4a2713aSLionel Sambuc .addImm((Counts.Named.VM & 0xF) |
367f4a2713aSLionel Sambuc ((Counts.Named.EXP & 0x7) << 4) |
368f4a2713aSLionel Sambuc ((Counts.Named.LGKM & 0x7) << 8));
369f4a2713aSLionel Sambuc
370*0a6a1f1dSLionel Sambuc LastOpcodeType = OTHER;
371*0a6a1f1dSLionel Sambuc LastInstWritesM0 = false;
372f4a2713aSLionel Sambuc return true;
373f4a2713aSLionel Sambuc }
374f4a2713aSLionel Sambuc
375f4a2713aSLionel Sambuc /// \brief helper function for handleOperands
increaseCounters(Counters & Dst,const Counters & Src)376f4a2713aSLionel Sambuc static void increaseCounters(Counters &Dst, const Counters &Src) {
377f4a2713aSLionel Sambuc
378f4a2713aSLionel Sambuc for (unsigned i = 0; i < 3; ++i)
379f4a2713aSLionel Sambuc Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
380f4a2713aSLionel Sambuc }
381f4a2713aSLionel Sambuc
handleOperands(MachineInstr & MI)382f4a2713aSLionel Sambuc Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
383f4a2713aSLionel Sambuc
384f4a2713aSLionel Sambuc Counters Result = ZeroCounts;
385f4a2713aSLionel Sambuc
386*0a6a1f1dSLionel Sambuc // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
387*0a6a1f1dSLionel Sambuc // but we also want to wait for any other outstanding transfers before
388*0a6a1f1dSLionel Sambuc // signalling other hardware blocks
389*0a6a1f1dSLionel Sambuc if (MI.getOpcode() == AMDGPU::S_SENDMSG)
390*0a6a1f1dSLionel Sambuc return LastIssued;
391*0a6a1f1dSLionel Sambuc
392f4a2713aSLionel Sambuc // For each register affected by this
393f4a2713aSLionel Sambuc // instruction increase the result sequence
394f4a2713aSLionel Sambuc for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
395f4a2713aSLionel Sambuc
396f4a2713aSLionel Sambuc MachineOperand &Op = MI.getOperand(i);
397f4a2713aSLionel Sambuc RegInterval Interval = getRegInterval(Op);
398f4a2713aSLionel Sambuc for (unsigned j = Interval.first; j < Interval.second; ++j) {
399f4a2713aSLionel Sambuc
400f4a2713aSLionel Sambuc if (Op.isDef()) {
401f4a2713aSLionel Sambuc increaseCounters(Result, UsedRegs[j]);
402f4a2713aSLionel Sambuc increaseCounters(Result, DefinedRegs[j]);
403f4a2713aSLionel Sambuc }
404f4a2713aSLionel Sambuc
405f4a2713aSLionel Sambuc if (Op.isUse())
406f4a2713aSLionel Sambuc increaseCounters(Result, DefinedRegs[j]);
407f4a2713aSLionel Sambuc }
408f4a2713aSLionel Sambuc }
409f4a2713aSLionel Sambuc
410f4a2713aSLionel Sambuc return Result;
411f4a2713aSLionel Sambuc }
412f4a2713aSLionel Sambuc
handleSendMsg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)413*0a6a1f1dSLionel Sambuc void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
414*0a6a1f1dSLionel Sambuc MachineBasicBlock::iterator I) {
415*0a6a1f1dSLionel Sambuc if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
416*0a6a1f1dSLionel Sambuc return;
417*0a6a1f1dSLionel Sambuc
418*0a6a1f1dSLionel Sambuc // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
419*0a6a1f1dSLionel Sambuc if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
420*0a6a1f1dSLionel Sambuc BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
421*0a6a1f1dSLionel Sambuc LastInstWritesM0 = false;
422*0a6a1f1dSLionel Sambuc return;
423*0a6a1f1dSLionel Sambuc }
424*0a6a1f1dSLionel Sambuc
425*0a6a1f1dSLionel Sambuc // Set whether this instruction sets M0
426*0a6a1f1dSLionel Sambuc LastInstWritesM0 = false;
427*0a6a1f1dSLionel Sambuc
428*0a6a1f1dSLionel Sambuc unsigned NumOperands = I->getNumOperands();
429*0a6a1f1dSLionel Sambuc for (unsigned i = 0; i < NumOperands; i++) {
430*0a6a1f1dSLionel Sambuc const MachineOperand &Op = I->getOperand(i);
431*0a6a1f1dSLionel Sambuc
432*0a6a1f1dSLionel Sambuc if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
433*0a6a1f1dSLionel Sambuc LastInstWritesM0 = true;
434*0a6a1f1dSLionel Sambuc }
435*0a6a1f1dSLionel Sambuc }
436*0a6a1f1dSLionel Sambuc
437*0a6a1f1dSLionel Sambuc // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
438*0a6a1f1dSLionel Sambuc // around other non-memory instructions.
runOnMachineFunction(MachineFunction & MF)439f4a2713aSLionel Sambuc bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
440f4a2713aSLionel Sambuc bool Changes = false;
441f4a2713aSLionel Sambuc
442*0a6a1f1dSLionel Sambuc TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
443*0a6a1f1dSLionel Sambuc TRI =
444*0a6a1f1dSLionel Sambuc static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
445f4a2713aSLionel Sambuc
446f4a2713aSLionel Sambuc MRI = &MF.getRegInfo();
447f4a2713aSLionel Sambuc
448f4a2713aSLionel Sambuc WaitedOn = ZeroCounts;
449f4a2713aSLionel Sambuc LastIssued = ZeroCounts;
450*0a6a1f1dSLionel Sambuc LastOpcodeType = OTHER;
451*0a6a1f1dSLionel Sambuc LastInstWritesM0 = false;
452f4a2713aSLionel Sambuc
453f4a2713aSLionel Sambuc memset(&UsedRegs, 0, sizeof(UsedRegs));
454f4a2713aSLionel Sambuc memset(&DefinedRegs, 0, sizeof(DefinedRegs));
455f4a2713aSLionel Sambuc
456f4a2713aSLionel Sambuc for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
457f4a2713aSLionel Sambuc BI != BE; ++BI) {
458f4a2713aSLionel Sambuc
459f4a2713aSLionel Sambuc MachineBasicBlock &MBB = *BI;
460f4a2713aSLionel Sambuc for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
461f4a2713aSLionel Sambuc I != E; ++I) {
462f4a2713aSLionel Sambuc
463*0a6a1f1dSLionel Sambuc // Wait for everything before a barrier.
464*0a6a1f1dSLionel Sambuc if (I->getOpcode() == AMDGPU::S_BARRIER)
465*0a6a1f1dSLionel Sambuc Changes |= insertWait(MBB, I, LastIssued);
466*0a6a1f1dSLionel Sambuc else
467f4a2713aSLionel Sambuc Changes |= insertWait(MBB, I, handleOperands(*I));
468*0a6a1f1dSLionel Sambuc
469*0a6a1f1dSLionel Sambuc pushInstruction(MBB, I);
470*0a6a1f1dSLionel Sambuc handleSendMsg(MBB, I);
471f4a2713aSLionel Sambuc }
472f4a2713aSLionel Sambuc
473f4a2713aSLionel Sambuc // Wait for everything at the end of the MBB
474f4a2713aSLionel Sambuc Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
475f4a2713aSLionel Sambuc }
476f4a2713aSLionel Sambuc
477f4a2713aSLionel Sambuc return Changes;
478f4a2713aSLionel Sambuc }
479