xref: /llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp (revision db2fa0672210705ff1ca6d38f487970e4536809f)
1 //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This class implements a deterministic finite automaton (DFA) based
9 // packetizing mechanism for VLIW architectures. It provides APIs to
10 // determine whether there exists a legal mapping of instructions to
11 // functional unit assignments in a packet. The DFA is auto-generated from
12 // the target's Schedule.td file.
13 //
14 // A DFA consists of 3 major elements: states, inputs, and transitions. For
15 // the packetizing mechanism, the input is the set of instruction classes for
16 // a target. The state models all possible combinations of functional unit
17 // consumption for a given set of instructions in a packet. A transition
18 // models the addition of an instruction to a packet. In the DFA constructed
19 // by this class, if an instruction can be added to a packet, then a valid
20 // transition exists from the corresponding state. Invalid transitions
21 // indicate that the instruction cannot be added to the current packet.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "llvm/CodeGen/DFAPacketizer.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBundle.h"
30 #include "llvm/CodeGen/ScheduleDAG.h"
31 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
32 #include "llvm/CodeGen/TargetInstrInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrItineraries.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <iterator>
42 #include <memory>
43 #include <vector>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "packets"
48 
49 static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
50   cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
51 
52 static unsigned InstrCount = 0;
53 
54 // --------------------------------------------------------------------
55 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
56 
57 static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
58   return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
59 }
60 
61 /// Return the DFAInput for an instruction class input vector.
62 /// This function is used in both DFAPacketizer.cpp and in
63 /// DFAPacketizerEmitter.cpp.
64 static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
65   DFAInput InsnInput = 0;
66   assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
67          "Exceeded maximum number of DFA terms");
68   for (auto U : InsnClass)
69     InsnInput = addDFAFuncUnits(InsnInput, U);
70   return InsnInput;
71 }
72 
73 // --------------------------------------------------------------------
74 
75 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
76                              const DFAStateInput (*SIT)[2], const unsigned *SET,
77                              const std::pair<unsigned, unsigned> *RTT,
78                              const unsigned *RTET)
79     : InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET),
80       DFAResourceTransitionTable(RTT), DFAResourceTransitionEntryTable(RTET) {
81   // Make sure DFA types are large enough for the number of terms & resources.
82   static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
83                     (8 * sizeof(DFAInput)),
84                 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
85   static_assert(
86       (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
87       "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
88   clearResources();
89 }
90 
91 // Read the DFA transition table and update CachedTable.
92 //
93 // Format of the transition tables:
94 // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
95 //                           transitions
96 // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
97 //                         for the ith state
98 //
99 void DFAPacketizer::ReadTable(unsigned int state) {
100   unsigned ThisStateIdx = DFAStateEntryTable[state];
101   unsigned NextStateIdxInTable = DFAStateEntryTable[state + 1];
102   // Early exit in case CachedTable has already contains this
103   // state's transitions.
104   if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisStateIdx][0])))
105     return;
106 
107   for (unsigned TransitionIdx = ThisStateIdx;
108        TransitionIdx < NextStateIdxInTable; TransitionIdx++) {
109     auto TransitionPair =
110         UnsignPair(state, DFAStateInputTable[TransitionIdx][0]);
111     CachedTable[TransitionPair] = DFAStateInputTable[TransitionIdx][1];
112 
113     if (TrackResources) {
114       unsigned I = DFAResourceTransitionEntryTable[TransitionIdx];
115       unsigned E = DFAResourceTransitionEntryTable[TransitionIdx + 1];
116       CachedResourceTransitions[TransitionPair] = makeArrayRef(
117           &DFAResourceTransitionTable[I], &DFAResourceTransitionTable[E]);
118     }
119   }
120 }
121 
122 // Return the DFAInput for an instruction class.
123 DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
124   // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
125   DFAInput InsnInput = 0;
126   unsigned i = 0;
127   (void)i;
128   for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
129        *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
130     InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
131     assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
132   }
133   return InsnInput;
134 }
135 
136 // Return the DFAInput for an instruction class input vector.
137 DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
138   return getDFAInsnInput(InsnClass);
139 }
140 
141 // Check if the resources occupied by a MCInstrDesc are available in the
142 // current state.
143 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
144   unsigned InsnClass = MID->getSchedClass();
145   DFAInput InsnInput = getInsnInput(InsnClass);
146   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
147   ReadTable(CurrentState);
148   return CachedTable.count(StateTrans) != 0;
149 }
150 
151 // Reserve the resources occupied by a MCInstrDesc and change the current
152 // state to reflect that change.
153 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
154   unsigned InsnClass = MID->getSchedClass();
155   DFAInput InsnInput = getInsnInput(InsnClass);
156   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
157   ReadTable(CurrentState);
158 
159   if (TrackResources) {
160     DenseMap<unsigned, SmallVector<unsigned, 8>> NewResourceStates;
161     for (const auto &KV : CachedResourceTransitions[StateTrans]) {
162       assert(ResourceStates.count(KV.first));
163       NewResourceStates[KV.second] = ResourceStates[KV.first];
164       NewResourceStates[KV.second].push_back(KV.second);
165     }
166     ResourceStates = NewResourceStates;
167   }
168   assert(CachedTable.count(StateTrans) != 0);
169   CurrentState = CachedTable[StateTrans];
170 }
171 
172 // Check if the resources occupied by a machine instruction are available
173 // in the current state.
174 bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
175   const MCInstrDesc &MID = MI.getDesc();
176   return canReserveResources(&MID);
177 }
178 
179 // Reserve the resources occupied by a machine instruction and change the
180 // current state to reflect that change.
181 void DFAPacketizer::reserveResources(MachineInstr &MI) {
182   const MCInstrDesc &MID = MI.getDesc();
183   reserveResources(&MID);
184 }
185 
186 unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
187   assert(TrackResources && "getUsedResources requires resource tracking!");
188   // Assert that there is at least one example of a valid bundle format.
189   assert(!ResourceStates.empty() && "Invalid bundle!");
190   SmallVectorImpl<unsigned> &RS = ResourceStates.begin()->second;
191 
192   // RS stores the cumulative resources used up to and including the I'th
193   // instruction. The 0th instruction is the base case.
194   if (InstIdx == 0)
195     return RS[0];
196   // Return the difference between the cumulative resources used by InstIdx and
197   // its predecessor.
198   return RS[InstIdx] ^ RS[InstIdx - 1];
199 }
200 
201 namespace llvm {
202 
203 // This class extends ScheduleDAGInstrs and overrides the schedule method
204 // to build the dependence graph.
205 class DefaultVLIWScheduler : public ScheduleDAGInstrs {
206 private:
207   AliasAnalysis *AA;
208   /// Ordered list of DAG postprocessing steps.
209   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
210 
211 public:
212   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
213                        AliasAnalysis *AA);
214 
215   // Actual scheduling work.
216   void schedule() override;
217 
218   /// DefaultVLIWScheduler takes ownership of the Mutation object.
219   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
220     Mutations.push_back(std::move(Mutation));
221   }
222 
223 protected:
224   void postprocessDAG();
225 };
226 
227 } // end namespace llvm
228 
229 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
230                                            MachineLoopInfo &MLI,
231                                            AliasAnalysis *AA)
232     : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
233   CanHandleTerminators = true;
234 }
235 
236 /// Apply each ScheduleDAGMutation step in order.
237 void DefaultVLIWScheduler::postprocessDAG() {
238   for (auto &M : Mutations)
239     M->apply(this);
240 }
241 
242 void DefaultVLIWScheduler::schedule() {
243   // Build the scheduling graph.
244   buildSchedGraph(AA);
245   postprocessDAG();
246 }
247 
248 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
249                                        MachineLoopInfo &mli, AliasAnalysis *aa)
250     : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
251   ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
252   ResourceTracker->setTrackResources(true);
253   VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
254 }
255 
256 VLIWPacketizerList::~VLIWPacketizerList() {
257   delete VLIWScheduler;
258   delete ResourceTracker;
259 }
260 
261 // End the current packet, bundle packet instructions and reset DFA state.
262 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
263                                    MachineBasicBlock::iterator MI) {
264   LLVM_DEBUG({
265     if (!CurrentPacketMIs.empty()) {
266       dbgs() << "Finalizing packet:\n";
267       unsigned Idx = 0;
268       for (MachineInstr *MI : CurrentPacketMIs) {
269         unsigned R = ResourceTracker->getUsedResources(Idx++);
270         dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
271       }
272     }
273   });
274   if (CurrentPacketMIs.size() > 1) {
275     MachineInstr &MIFirst = *CurrentPacketMIs.front();
276     finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
277   }
278   CurrentPacketMIs.clear();
279   ResourceTracker->clearResources();
280   LLVM_DEBUG(dbgs() << "End packet\n");
281 }
282 
283 // Bundle machine instructions into packets.
284 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
285                                       MachineBasicBlock::iterator BeginItr,
286                                       MachineBasicBlock::iterator EndItr) {
287   assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
288   VLIWScheduler->startBlock(MBB);
289   VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
290                              std::distance(BeginItr, EndItr));
291   VLIWScheduler->schedule();
292 
293   LLVM_DEBUG({
294     dbgs() << "Scheduling DAG of the packetize region\n";
295     VLIWScheduler->dump();
296   });
297 
298   // Generate MI -> SU map.
299   MIToSUnit.clear();
300   for (SUnit &SU : VLIWScheduler->SUnits)
301     MIToSUnit[SU.getInstr()] = &SU;
302 
303   bool LimitPresent = InstrLimit.getPosition();
304 
305   // The main packetizer loop.
306   for (; BeginItr != EndItr; ++BeginItr) {
307     if (LimitPresent) {
308       if (InstrCount >= InstrLimit) {
309         EndItr = BeginItr;
310         break;
311       }
312       InstrCount++;
313     }
314     MachineInstr &MI = *BeginItr;
315     initPacketizerState();
316 
317     // End the current packet if needed.
318     if (isSoloInstruction(MI)) {
319       endPacket(MBB, MI);
320       continue;
321     }
322 
323     // Ignore pseudo instructions.
324     if (ignorePseudoInstruction(MI, MBB))
325       continue;
326 
327     SUnit *SUI = MIToSUnit[&MI];
328     assert(SUI && "Missing SUnit Info!");
329 
330     // Ask DFA if machine resource is available for MI.
331     LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
332 
333     bool ResourceAvail = ResourceTracker->canReserveResources(MI);
334     LLVM_DEBUG({
335       if (ResourceAvail)
336         dbgs() << "  Resources are available for adding MI to packet\n";
337       else
338         dbgs() << "  Resources NOT available\n";
339     });
340     if (ResourceAvail && shouldAddToPacket(MI)) {
341       // Dependency check for MI with instructions in CurrentPacketMIs.
342       for (auto MJ : CurrentPacketMIs) {
343         SUnit *SUJ = MIToSUnit[MJ];
344         assert(SUJ && "Missing SUnit Info!");
345 
346         LLVM_DEBUG(dbgs() << "  Checking against MJ " << *MJ);
347         // Is it legal to packetize SUI and SUJ together.
348         if (!isLegalToPacketizeTogether(SUI, SUJ)) {
349           LLVM_DEBUG(dbgs() << "  Not legal to add MI, try to prune\n");
350           // Allow packetization if dependency can be pruned.
351           if (!isLegalToPruneDependencies(SUI, SUJ)) {
352             // End the packet if dependency cannot be pruned.
353             LLVM_DEBUG(dbgs()
354                        << "  Could not prune dependencies for adding MI\n");
355             endPacket(MBB, MI);
356             break;
357           }
358           LLVM_DEBUG(dbgs() << "  Pruned dependence for adding MI\n");
359         }
360       }
361     } else {
362       LLVM_DEBUG(if (ResourceAvail) dbgs()
363                  << "Resources are available, but instruction should not be "
364                     "added to packet\n  "
365                  << MI);
366       // End the packet if resource is not available, or if the instruction
367       // shoud not be added to the current packet.
368       endPacket(MBB, MI);
369     }
370 
371     // Add MI to the current packet.
372     LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
373     BeginItr = addToPacket(MI);
374   } // For all instructions in the packetization range.
375 
376   // End any packet left behind.
377   endPacket(MBB, EndItr);
378   VLIWScheduler->exitRegion();
379   VLIWScheduler->finishBlock();
380 }
381 
382 bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
383                                const MachineMemOperand &Op2,
384                                bool UseTBAA) const {
385   if (!Op1.getValue() || !Op2.getValue())
386     return true;
387 
388   int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
389   int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
390   int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
391 
392   AliasResult AAResult =
393       AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
394                                UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
395                 MemoryLocation(Op2.getValue(), Overlapb,
396                                UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
397 
398   return AAResult != NoAlias;
399 }
400 
401 bool VLIWPacketizerList::alias(const MachineInstr &MI1,
402                                const MachineInstr &MI2,
403                                bool UseTBAA) const {
404   if (MI1.memoperands_empty() || MI2.memoperands_empty())
405     return true;
406 
407   for (const MachineMemOperand *Op1 : MI1.memoperands())
408     for (const MachineMemOperand *Op2 : MI2.memoperands())
409       if (alias(*Op1, *Op2, UseTBAA))
410         return true;
411   return false;
412 }
413 
414 // Add a DAG mutation object to the ordered list.
415 void VLIWPacketizerList::addMutation(
416       std::unique_ptr<ScheduleDAGMutation> Mutation) {
417   VLIWScheduler->addMutation(std::move(Mutation));
418 }
419