xref: /llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp (revision 6a87e9b08bf093ba3ccba8650b89f4d337c497f4)
1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass implements instructions packetization for R600. It unsets isLast
11 /// bit of instructions inside a bundle and substitutes src register with
12 /// PreviousVector when applicable.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "llvm/CodeGen/DFAPacketizer.h"
19 #include "llvm/CodeGen/MachineDominators.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/ScheduleDAG.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "packets"
26 
27 namespace {
28 
29 class R600Packetizer : public MachineFunctionPass {
30 
31 public:
32   static char ID;
33   R600Packetizer() : MachineFunctionPass(ID) {}
34 
35   void getAnalysisUsage(AnalysisUsage &AU) const override {
36     AU.setPreservesCFG();
37     AU.addRequired<MachineDominatorTree>();
38     AU.addPreserved<MachineDominatorTree>();
39     AU.addRequired<MachineLoopInfo>();
40     AU.addPreserved<MachineLoopInfo>();
41     MachineFunctionPass::getAnalysisUsage(AU);
42   }
43 
44   StringRef getPassName() const override { return "R600 Packetizer"; }
45 
46   bool runOnMachineFunction(MachineFunction &Fn) override;
47 };
48 
49 class R600PacketizerList : public VLIWPacketizerList {
50 private:
51   const R600InstrInfo *TII;
52   const R600RegisterInfo &TRI;
53   bool VLIW5;
54   bool ConsideredInstUsesAlreadyWrittenVectorElement;
55 
56   unsigned getSlot(const MachineInstr &MI) const {
57     return TRI.getHWRegChan(MI.getOperand(0).getReg());
58   }
59 
60   /// \returns register to PV chan mapping for bundle/single instructions that
61   /// immediately precedes I.
62   DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
63       const {
64     DenseMap<unsigned, unsigned> Result;
65     I--;
66     if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
67       return Result;
68     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
69     if (I->isBundle())
70       BI++;
71     int LastDstChan = -1;
72     do {
73       bool isTrans = false;
74       int BISlot = getSlot(*BI);
75       if (LastDstChan >= BISlot)
76         isTrans = true;
77       LastDstChan = BISlot;
78       if (TII->isPredicated(*BI))
79         continue;
80       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
81       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
82         continue;
83       int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
84       if (DstIdx == -1) {
85         continue;
86       }
87       Register Dst = BI->getOperand(DstIdx).getReg();
88       if (isTrans || TII->isTransOnly(*BI)) {
89         Result[Dst] = R600::PS;
90         continue;
91       }
92       if (BI->getOpcode() == R600::DOT4_r600 ||
93           BI->getOpcode() == R600::DOT4_eg) {
94         Result[Dst] = R600::PV_X;
95         continue;
96       }
97       if (Dst == R600::OQAP) {
98         continue;
99       }
100       unsigned PVReg = 0;
101       switch (TRI.getHWRegChan(Dst)) {
102       case 0:
103         PVReg = R600::PV_X;
104         break;
105       case 1:
106         PVReg = R600::PV_Y;
107         break;
108       case 2:
109         PVReg = R600::PV_Z;
110         break;
111       case 3:
112         PVReg = R600::PV_W;
113         break;
114       default:
115         llvm_unreachable("Invalid Chan");
116       }
117       Result[Dst] = PVReg;
118     } while ((++BI)->isBundledWithPred());
119     return Result;
120   }
121 
122   void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
123       const {
124     unsigned Ops[] = {
125       R600::OpName::src0,
126       R600::OpName::src1,
127       R600::OpName::src2
128     };
129     for (unsigned i = 0; i < 3; i++) {
130       int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
131       if (OperandIdx < 0)
132         continue;
133       Register Src = MI.getOperand(OperandIdx).getReg();
134       const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
135       if (It != PVs.end())
136         MI.getOperand(OperandIdx).setReg(It->second);
137     }
138   }
139 public:
140   // Ctor.
141   R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST,
142                      MachineLoopInfo &MLI)
143       : VLIWPacketizerList(MF, MLI, nullptr),
144         TII(ST.getInstrInfo()),
145         TRI(TII->getRegisterInfo()) {
146     VLIW5 = !ST.hasCaymanISA();
147   }
148 
149   // initPacketizerState - initialize some internal flags.
150   void initPacketizerState() override {
151     ConsideredInstUsesAlreadyWrittenVectorElement = false;
152   }
153 
154   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
155   bool ignorePseudoInstruction(const MachineInstr &MI,
156                                const MachineBasicBlock *MBB) override {
157     return false;
158   }
159 
160   // isSoloInstruction - return true if instruction MI can not be packetized
161   // with any other instruction, which means that MI itself is a packet.
162   bool isSoloInstruction(const MachineInstr &MI) override {
163     if (TII->isVector(MI))
164       return true;
165     if (!TII->isALUInstr(MI.getOpcode()))
166       return true;
167     if (MI.getOpcode() == R600::GROUP_BARRIER)
168       return true;
169     // XXX: This can be removed once the packetizer properly handles all the
170     // LDS instruction group restrictions.
171     return TII->isLDSInstr(MI.getOpcode());
172   }
173 
174   // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
175   // together.
176   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
177     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
178     if (getSlot(*MII) == getSlot(*MIJ))
179       ConsideredInstUsesAlreadyWrittenVectorElement = true;
180     // Does MII and MIJ share the same pred_sel ?
181     int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
182         OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
183     Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
184       PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
185     if (PredI != PredJ)
186       return false;
187     if (SUJ->isSucc(SUI)) {
188       for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
189         const SDep &Dep = SUJ->Succs[i];
190         if (Dep.getSUnit() != SUI)
191           continue;
192         if (Dep.getKind() == SDep::Anti)
193           continue;
194         if (Dep.getKind() == SDep::Output)
195           if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
196             continue;
197         return false;
198       }
199     }
200 
201     bool ARDef =
202         TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ);
203     bool ARUse =
204         TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ);
205 
206     return !ARDef || !ARUse;
207   }
208 
209   // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
210   // and SUJ.
211   bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
212     return false;
213   }
214 
215   void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
216     unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
217     MI->getOperand(LastOp).setImm(Bit);
218   }
219 
220   bool isBundlableWithCurrentPMI(MachineInstr &MI,
221                                  const DenseMap<unsigned, unsigned> &PV,
222                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
223                                  bool &isTransSlot) {
224     isTransSlot = TII->isTransOnly(MI);
225     assert (!isTransSlot || VLIW5);
226 
227     // Is the dst reg sequence legal ?
228     if (!isTransSlot && !CurrentPacketMIs.empty()) {
229       if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) {
230         if (ConsideredInstUsesAlreadyWrittenVectorElement &&
231             !TII->isVectorOnly(MI) && VLIW5) {
232           isTransSlot = true;
233           LLVM_DEBUG({
234             dbgs() << "Considering as Trans Inst :";
235             MI.dump();
236           });
237         }
238         else
239           return false;
240       }
241     }
242 
243     // Are the Constants limitations met ?
244     CurrentPacketMIs.push_back(&MI);
245     if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
246       LLVM_DEBUG({
247         dbgs() << "Couldn't pack :\n";
248         MI.dump();
249         dbgs() << "with the following packets :\n";
250         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
251           CurrentPacketMIs[i]->dump();
252           dbgs() << "\n";
253         }
254         dbgs() << "because of Consts read limitations\n";
255       });
256       CurrentPacketMIs.pop_back();
257       return false;
258     }
259 
260     // Is there a BankSwizzle set that meet Read Port limitations ?
261     if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
262             PV, BS, isTransSlot)) {
263       LLVM_DEBUG({
264         dbgs() << "Couldn't pack :\n";
265         MI.dump();
266         dbgs() << "with the following packets :\n";
267         for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
268           CurrentPacketMIs[i]->dump();
269           dbgs() << "\n";
270         }
271         dbgs() << "because of Read port limitations\n";
272       });
273       CurrentPacketMIs.pop_back();
274       return false;
275     }
276 
277     // We cannot read LDS source registers from the Trans slot.
278     if (isTransSlot && TII->readsLDSSrcReg(MI))
279       return false;
280 
281     CurrentPacketMIs.pop_back();
282     return true;
283   }
284 
285   MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override {
286     MachineBasicBlock::iterator FirstInBundle =
287         CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front();
288     const DenseMap<unsigned, unsigned> &PV =
289         getPreviousVector(FirstInBundle);
290     std::vector<R600InstrInfo::BankSwizzle> BS;
291     bool isTransSlot;
292 
293     if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
294       for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
295         MachineInstr *MI = CurrentPacketMIs[i];
296         unsigned Op = TII->getOperandIdx(MI->getOpcode(),
297             R600::OpName::bank_swizzle);
298         MI->getOperand(Op).setImm(BS[i]);
299       }
300       unsigned Op =
301           TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
302       MI.getOperand(Op).setImm(BS.back());
303       if (!CurrentPacketMIs.empty())
304         setIsLastBit(CurrentPacketMIs.back(), 0);
305       substitutePV(MI, PV);
306       MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
307       if (isTransSlot) {
308         endPacket(std::next(It)->getParent(), std::next(It));
309       }
310       return It;
311     }
312     endPacket(MI.getParent(), MI);
313     if (TII->isTransOnly(MI))
314       return MI;
315     return VLIWPacketizerList::addToPacket(MI);
316   }
317 };
318 
319 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
320   const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>();
321   const R600InstrInfo *TII = ST.getInstrInfo();
322 
323   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
324 
325   // Instantiate the packetizer.
326   R600PacketizerList Packetizer(Fn, ST, MLI);
327 
328   // DFA state table should not be empty.
329   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
330   assert(Packetizer.getResourceTracker()->getInstrItins());
331 
332   if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
333     return false;
334 
335   //
336   // Loop over all basic blocks and remove KILL pseudo-instructions
337   // These instructions confuse the dependence analysis. Consider:
338   // D0 = ...   (Insn 0)
339   // R0 = KILL R0, D0 (Insn 1)
340   // R0 = ... (Insn 2)
341   // Here, Insn 1 will result in the dependence graph not emitting an output
342   // dependence between Insn 0 and Insn 2. This can lead to incorrect
343   // packetization
344   //
345   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
346        MBB != MBBe; ++MBB) {
347     MachineBasicBlock::iterator End = MBB->end();
348     MachineBasicBlock::iterator MI = MBB->begin();
349     while (MI != End) {
350       if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
351           (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
352         MachineBasicBlock::iterator DeleteMI = MI;
353         ++MI;
354         MBB->erase(DeleteMI);
355         End = MBB->end();
356         continue;
357       }
358       ++MI;
359     }
360   }
361 
362   // Loop over all of the basic blocks.
363   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
364        MBB != MBBe; ++MBB) {
365     // Find scheduling regions and schedule / packetize each region.
366     unsigned RemainingCount = MBB->size();
367     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
368         RegionEnd != MBB->begin();) {
369       // The next region starts above the previous region. Look backward in the
370       // instruction stream until we find the nearest boundary.
371       MachineBasicBlock::iterator I = RegionEnd;
372       for(;I != MBB->begin(); --I, --RemainingCount) {
373         if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn))
374           break;
375       }
376       I = MBB->begin();
377 
378       // Skip empty scheduling regions.
379       if (I == RegionEnd) {
380         RegionEnd = std::prev(RegionEnd);
381         --RemainingCount;
382         continue;
383       }
384       // Skip regions with one instruction.
385       if (I == std::prev(RegionEnd)) {
386         RegionEnd = std::prev(RegionEnd);
387         continue;
388       }
389 
390       Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
391       RegionEnd = I;
392     }
393   }
394 
395   return true;
396 
397 }
398 
399 } // end anonymous namespace
400 
401 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE,
402                      "R600 Packetizer", false, false)
403 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE,
404                     "R600 Packetizer", false, false)
405 
406 char R600Packetizer::ID = 0;
407 
408 char &llvm::R600PacketizerID = R600Packetizer::ID;
409 
410 llvm::FunctionPass *llvm::createR600Packetizer() {
411   return new R600Packetizer();
412 }
413