xref: /llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp (revision da71203e6fc6b8e08c9979204506d385e9cb07b8)
1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/VLIWMachineScheduler.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/CodeGen/DFAPacketizer.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstr.h"
20 #include "llvm/CodeGen/MachineLoopInfo.h"
21 #include "llvm/CodeGen/RegisterClassInfo.h"
22 #include "llvm/CodeGen/RegisterPressure.h"
23 #include "llvm/CodeGen/ScheduleDAG.h"
24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetOpcodes.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSchedule.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <cassert>
35 #include <iomanip>
36 #include <limits>
37 #include <memory>
38 #include <sstream>
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "machine-scheduler"
43 
44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
45                                          cl::init(false));
46 
47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
48                                        cl::init(true));
49 
50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51                                                 cl::Hidden, cl::init(1));
52 
53 // Check if the scheduler should penalize instructions that are available to
54 // early due to a zero-latency dependence.
55 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
56                                      cl::init(true));
57 
58 // This value is used to determine if a register class is a high pressure set.
59 // We compute the maximum number of registers needed and divided by the total
60 // available. Then, we compare the result to this value.
61 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
62                                   cl::init(0.75f),
63                                   cl::desc("High register pressure threhold."));
64 
65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
66                                      const TargetSchedModel *SM)
67     : TII(STI.getInstrInfo()), SchedModel(SM) {
68   ResourcesModel = createPacketizer(STI);
69 
70   // This hard requirement could be relaxed,
71   // but for now do not let it proceed.
72   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
73 
74   Packet.reserve(SchedModel->getIssueWidth());
75   Packet.clear();
76   ResourcesModel->clearResources();
77 }
78 
79 void VLIWResourceModel::reset() {
80   Packet.clear();
81   ResourcesModel->clearResources();
82 }
83 
84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
85 
86 /// Return true if there is a dependence between SUd and SUu.
87 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) {
88   if (SUd->Succs.size() == 0)
89     return false;
90 
91   for (const auto &S : SUd->Succs) {
92     // Since we do not add pseudos to packets, might as well
93     // ignore order dependencies.
94     if (S.isCtrl())
95       continue;
96 
97     if (S.getSUnit() == SUu && S.getLatency() > 0)
98       return true;
99   }
100   return false;
101 }
102 
103 /// Check if scheduling of this SU is possible
104 /// in the current packet.
105 /// It is _not_ precise (statefull), it is more like
106 /// another heuristic. Many corner cases are figured
107 /// empirically.
108 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
109   if (!SU || !SU->getInstr())
110     return false;
111 
112   // First see if the pipeline could receive this instruction
113   // in the current cycle.
114   switch (SU->getInstr()->getOpcode()) {
115   default:
116     if (!ResourcesModel->canReserveResources(*SU->getInstr()))
117       return false;
118     break;
119   case TargetOpcode::EXTRACT_SUBREG:
120   case TargetOpcode::INSERT_SUBREG:
121   case TargetOpcode::SUBREG_TO_REG:
122   case TargetOpcode::REG_SEQUENCE:
123   case TargetOpcode::IMPLICIT_DEF:
124   case TargetOpcode::COPY:
125   case TargetOpcode::INLINEASM:
126   case TargetOpcode::INLINEASM_BR:
127     break;
128   }
129 
130   // Now see if there are no other dependencies to instructions already
131   // in the packet.
132   if (IsTop) {
133     for (const SUnit *U : Packet)
134       if (hasDependence(U, SU))
135         return false;
136   } else {
137     for (const SUnit *U : Packet)
138       if (hasDependence(SU, U))
139         return false;
140   }
141   return true;
142 }
143 
144 /// Keep track of available resources.
145 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
146   bool startNewCycle = false;
147   // Artificially reset state.
148   if (!SU) {
149     reset();
150     TotalPackets++;
151     return false;
152   }
153   // If this SU does not fit in the packet or the packet is now full
154   // start a new one.
155   if (!isResourceAvailable(SU, IsTop) ||
156       Packet.size() >= SchedModel->getIssueWidth()) {
157     reset();
158     TotalPackets++;
159     startNewCycle = true;
160   }
161 
162   switch (SU->getInstr()->getOpcode()) {
163   default:
164     ResourcesModel->reserveResources(*SU->getInstr());
165     break;
166   case TargetOpcode::EXTRACT_SUBREG:
167   case TargetOpcode::INSERT_SUBREG:
168   case TargetOpcode::SUBREG_TO_REG:
169   case TargetOpcode::REG_SEQUENCE:
170   case TargetOpcode::IMPLICIT_DEF:
171   case TargetOpcode::KILL:
172   case TargetOpcode::CFI_INSTRUCTION:
173   case TargetOpcode::EH_LABEL:
174   case TargetOpcode::COPY:
175   case TargetOpcode::INLINEASM:
176   case TargetOpcode::INLINEASM_BR:
177     break;
178   }
179   Packet.push_back(SU);
180 
181 #ifndef NDEBUG
182   LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
183   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
184     LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
185     LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
186     LLVM_DEBUG(Packet[i]->getInstr()->dump());
187   }
188 #endif
189 
190   return startNewCycle;
191 }
192 
193 DFAPacketizer *
194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
195   return STI.getInstrInfo()->CreateTargetScheduleState(STI);
196 }
197 
198 /// schedule - Called back from MachineScheduler::runOnMachineFunction
199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200 /// only includes instructions that have DAG nodes, not scheduling boundaries.
201 void VLIWMachineScheduler::schedule() {
202   LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203                     << printMBBReference(*BB) << " " << BB->getName()
204                     << " in_func " << BB->getParent()->getName()
205                     << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
206 
207   buildDAGWithRegPressure();
208 
209   Topo.InitDAGTopologicalSorting();
210 
211   // Postprocess the DAG to add platform-specific artificial dependencies.
212   postProcessDAG();
213 
214   SmallVector<SUnit *, 8> TopRoots, BotRoots;
215   findRootsAndBiasEdges(TopRoots, BotRoots);
216 
217   // Initialize the strategy before modifying the DAG.
218   SchedImpl->initialize(this);
219 
220   LLVM_DEBUG({
221     unsigned maxH = 0;
222     for (const SUnit &SU : SUnits)
223       if (SU.getHeight() > maxH)
224         maxH = SU.getHeight();
225     dbgs() << "Max Height " << maxH << "\n";
226   });
227   LLVM_DEBUG({
228     unsigned maxD = 0;
229     for (const SUnit &SU : SUnits)
230       if (SU.getDepth() > maxD)
231         maxD = SU.getDepth();
232     dbgs() << "Max Depth " << maxD << "\n";
233   });
234   LLVM_DEBUG(dump());
235   if (ViewMISchedDAGs)
236     viewGraph();
237 
238   initQueues(TopRoots, BotRoots);
239 
240   bool IsTopNode = false;
241   while (true) {
242     LLVM_DEBUG(
243         dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244     SUnit *SU = SchedImpl->pickNode(IsTopNode);
245     if (!SU)
246       break;
247 
248     if (!checkSchedLimit())
249       break;
250 
251     scheduleMI(SU, IsTopNode);
252 
253     // Notify the scheduling strategy after updating the DAG.
254     SchedImpl->schedNode(SU, IsTopNode);
255 
256     updateQueues(SU, IsTopNode);
257   }
258   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
259 
260   placeDebugValues();
261 
262   LLVM_DEBUG({
263     dbgs() << "*** Final schedule for "
264            << printMBBReference(*begin()->getParent()) << " ***\n";
265     dumpSchedule();
266     dbgs() << '\n';
267   });
268 }
269 
270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
271   DAG = static_cast<VLIWMachineScheduler *>(dag);
272   SchedModel = DAG->getSchedModel();
273 
274   Top.init(DAG, SchedModel);
275   Bot.init(DAG, SchedModel);
276 
277   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278   // are disabled, then these HazardRecs will be disabled.
279   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
280   const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
281   const TargetInstrInfo *TII = STI.getInstrInfo();
282   delete Top.HazardRec;
283   delete Bot.HazardRec;
284   Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285   Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
286 
287   delete Top.ResourceModel;
288   delete Bot.ResourceModel;
289   Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
290   Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel());
291 
292   const std::vector<unsigned> &MaxPressure =
293       DAG->getRegPressure().MaxSetPressure;
294   HighPressureSets.assign(MaxPressure.size(), false);
295   for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
296     unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
297     HighPressureSets[i] =
298         ((float)MaxPressure[i] > ((float)Limit * RPThreshold));
299   }
300 }
301 
302 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
303     const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const {
304   return new VLIWResourceModel(STI, SchedModel);
305 }
306 
307 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
308   for (const SDep &PI : SU->Preds) {
309     unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
310     unsigned MinLatency = PI.getLatency();
311 #ifndef NDEBUG
312     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
313 #endif
314     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
315       SU->TopReadyCycle = PredReadyCycle + MinLatency;
316   }
317 
318   if (!SU->isScheduled)
319     Top.releaseNode(SU, SU->TopReadyCycle);
320 }
321 
322 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
323   assert(SU->getInstr() && "Scheduled SUnit must have instr");
324 
325   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
326        ++I) {
327     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
328     unsigned MinLatency = I->getLatency();
329 #ifndef NDEBUG
330     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
331 #endif
332     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
333       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
334   }
335 
336   if (!SU->isScheduled)
337     Bot.releaseNode(SU, SU->BotReadyCycle);
338 }
339 
340 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
341   delete ResourceModel;
342   delete HazardRec;
343 }
344 
345 /// Does this SU have a hazard within the current instruction group.
346 ///
347 /// The scheduler supports two modes of hazard recognition. The first is the
348 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
349 /// supports highly complicated in-order reservation tables
350 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
351 ///
352 /// The second is a streamlined mechanism that checks for hazards based on
353 /// simple counters that the scheduler itself maintains. It explicitly checks
354 /// for instruction dispatch limitations, including the number of micro-ops that
355 /// can dispatch per cycle.
356 ///
357 /// TODO: Also check whether the SU must start a new group.
358 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
359   if (HazardRec->isEnabled())
360     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
361 
362   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
363   if (IssueCount + uops > SchedModel->getIssueWidth())
364     return true;
365 
366   return false;
367 }
368 
369 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
370     SUnit *SU, unsigned ReadyCycle) {
371   if (ReadyCycle < MinReadyCycle)
372     MinReadyCycle = ReadyCycle;
373 
374   // Check for interlocks first. For the purpose of other heuristics, an
375   // instruction that cannot issue appears as if it's not in the ReadyQueue.
376   if (ReadyCycle > CurrCycle || checkHazard(SU))
377 
378     Pending.push(SU);
379   else
380     Available.push(SU);
381 }
382 
383 /// Move the boundary of scheduled code by one cycle.
384 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
385   unsigned Width = SchedModel->getIssueWidth();
386   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
387 
388   assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
389          "MinReadyCycle uninitialized");
390   unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
391 
392   if (!HazardRec->isEnabled()) {
393     // Bypass HazardRec virtual calls.
394     CurrCycle = NextCycle;
395   } else {
396     // Bypass getHazardType calls in case of long latency.
397     for (; CurrCycle != NextCycle; ++CurrCycle) {
398       if (isTop())
399         HazardRec->AdvanceCycle();
400       else
401         HazardRec->RecedeCycle();
402     }
403   }
404   CheckPending = true;
405 
406   LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
407                     << CurrCycle << '\n');
408 }
409 
410 /// Move the boundary of scheduled code by one SUnit.
411 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
412   bool startNewCycle = false;
413 
414   // Update the reservation table.
415   if (HazardRec->isEnabled()) {
416     if (!isTop() && SU->isCall) {
417       // Calls are scheduled with their preceding instructions. For bottom-up
418       // scheduling, clear the pipeline state before emitting.
419       HazardRec->Reset();
420     }
421     HazardRec->EmitInstruction(SU);
422   }
423 
424   // Update DFA model.
425   startNewCycle = ResourceModel->reserveResources(SU, isTop());
426 
427   // Check the instruction group dispatch limit.
428   // TODO: Check if this SU must end a dispatch group.
429   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
430   if (startNewCycle) {
431     LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
432     bumpCycle();
433   } else
434     LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
435                       << CurrCycle << '\n');
436 }
437 
438 /// Release pending ready nodes in to the available queue. This makes them
439 /// visible to heuristics.
440 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
441   // If the available queue is empty, it is safe to reset MinReadyCycle.
442   if (Available.empty())
443     MinReadyCycle = std::numeric_limits<unsigned>::max();
444 
445   // Check to see if any of the pending instructions are ready to issue.  If
446   // so, add them to the available queue.
447   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
448     SUnit *SU = *(Pending.begin() + i);
449     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
450 
451     if (ReadyCycle < MinReadyCycle)
452       MinReadyCycle = ReadyCycle;
453 
454     if (ReadyCycle > CurrCycle)
455       continue;
456 
457     if (checkHazard(SU))
458       continue;
459 
460     Available.push(SU);
461     Pending.remove(Pending.begin() + i);
462     --i;
463     --e;
464   }
465   CheckPending = false;
466 }
467 
468 /// Remove SU from the ready set for this boundary.
469 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
470   if (Available.isInQueue(SU))
471     Available.remove(Available.find(SU));
472   else {
473     assert(Pending.isInQueue(SU) && "bad ready count");
474     Pending.remove(Pending.find(SU));
475   }
476 }
477 
478 /// If this queue only has one ready candidate, return it. As a side effect,
479 /// advance the cycle until at least one node is ready. If multiple instructions
480 /// are ready, return NULL.
481 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
482   if (CheckPending)
483     releasePending();
484 
485   auto AdvanceCycle = [this]() {
486     if (Available.empty())
487       return true;
488     if (Available.size() == 1 && Pending.size() > 0)
489       return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
490              getWeakLeft(*Available.begin(), isTop()) != 0;
491     return false;
492   };
493   for (unsigned i = 0; AdvanceCycle(); ++i) {
494     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
495            "permanent hazard");
496     (void)i;
497     ResourceModel->reserveResources(nullptr, isTop());
498     bumpCycle();
499     releasePending();
500   }
501   if (Available.size() == 1)
502     return *Available.begin();
503   return nullptr;
504 }
505 
506 #ifndef NDEBUG
507 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
508                                              const ReadyQueue &Q, SUnit *SU,
509                                              int Cost, PressureChange P) {
510   dbgs() << Label << " " << Q.getName() << " ";
511   if (P.isValid())
512     dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
513            << P.getUnitInc() << " ";
514   else
515     dbgs() << "     ";
516   dbgs() << "cost(" << Cost << ")\t";
517   DAG->dumpNode(*SU);
518 }
519 
520 // Very detailed queue dump, to be used with higher verbosity levels.
521 void ConvergingVLIWScheduler::readyQueueVerboseDump(
522     const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
523     ReadyQueue &Q) {
524   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
525 
526   dbgs() << ">>> " << Q.getName() << "\n";
527   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
528     RegPressureDelta RPDelta;
529     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
530                                     DAG->getRegionCriticalPSets(),
531                                     DAG->getRegPressure().MaxSetPressure);
532     std::stringstream dbgstr;
533     dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")";
534     dbgs() << dbgstr.str();
535     SchedulingCost(Q, *I, Candidate, RPDelta, true);
536     dbgs() << "\t";
537     (*I)->getInstr()->dump();
538   }
539   dbgs() << "\n";
540 }
541 #endif
542 
543 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
544 /// of SU, return true (we may have duplicates)
545 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
546   if (SU->NumPredsLeft == 0)
547     return false;
548 
549   for (auto &Pred : SU->Preds) {
550     // We found an available, but not scheduled, predecessor.
551     if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
552       return false;
553   }
554 
555   return true;
556 }
557 
558 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
559 /// of SU, return true (we may have duplicates)
560 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
561   if (SU->NumSuccsLeft == 0)
562     return false;
563 
564   for (auto &Succ : SU->Succs) {
565     // We found an available, but not scheduled, successor.
566     if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
567       return false;
568   }
569   return true;
570 }
571 
572 /// Check if the instruction changes the register pressure of a register in the
573 /// high pressure set. The function returns a negative value if the pressure
574 /// decreases and a positive value is the pressure increases. If the instruction
575 /// doesn't use a high pressure register or doesn't change the register
576 /// pressure, then return 0.
577 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
578   PressureDiff &PD = DAG->getPressureDiff(SU);
579   for (const auto &P : PD) {
580     if (!P.isValid())
581       continue;
582     // The pressure differences are computed bottom-up, so the comparison for
583     // an increase is positive in the bottom direction, but negative in the
584     //  top-down direction.
585     if (HighPressureSets[P.getPSet()])
586       return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
587   }
588   return 0;
589 }
590 
591 /// Single point to compute overall scheduling cost.
592 /// TODO: More heuristics will be used soon.
593 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
594                                             SchedCandidate &Candidate,
595                                             RegPressureDelta &Delta,
596                                             bool verbose) {
597   // Initial trivial priority.
598   int ResCount = 1;
599 
600   // Do not waste time on a node that is already scheduled.
601   if (!SU || SU->isScheduled)
602     return ResCount;
603 
604   LLVM_DEBUG(if (verbose) dbgs()
605              << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
606   // Forced priority is high.
607   if (SU->isScheduleHigh) {
608     ResCount += PriorityOne;
609     LLVM_DEBUG(dbgs() << "H|");
610   }
611 
612   unsigned IsAvailableAmt = 0;
613   // Critical path first.
614   if (Q.getID() == TopQID) {
615     if (Top.isLatencyBound(SU)) {
616       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
617       ResCount += (SU->getHeight() * ScaleTwo);
618     }
619 
620     LLVM_DEBUG(if (verbose) {
621       std::stringstream dbgstr;
622       dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
623       dbgs() << dbgstr.str();
624     });
625 
626     // If resources are available for it, multiply the
627     // chance of scheduling.
628     if (Top.ResourceModel->isResourceAvailable(SU, true)) {
629       IsAvailableAmt = (PriorityTwo + PriorityThree);
630       ResCount += IsAvailableAmt;
631       LLVM_DEBUG(if (verbose) dbgs() << "A|");
632     } else
633       LLVM_DEBUG(if (verbose) dbgs() << " |");
634   } else {
635     if (Bot.isLatencyBound(SU)) {
636       LLVM_DEBUG(if (verbose) dbgs() << "LB|");
637       ResCount += (SU->getDepth() * ScaleTwo);
638     }
639 
640     LLVM_DEBUG(if (verbose) {
641       std::stringstream dbgstr;
642       dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
643       dbgs() << dbgstr.str();
644     });
645 
646     // If resources are available for it, multiply the
647     // chance of scheduling.
648     if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
649       IsAvailableAmt = (PriorityTwo + PriorityThree);
650       ResCount += IsAvailableAmt;
651       LLVM_DEBUG(if (verbose) dbgs() << "A|");
652     } else
653       LLVM_DEBUG(if (verbose) dbgs() << " |");
654   }
655 
656   unsigned NumNodesBlocking = 0;
657   if (Q.getID() == TopQID) {
658     // How many SUs does it block from scheduling?
659     // Look at all of the successors of this node.
660     // Count the number of nodes that
661     // this node is the sole unscheduled node for.
662     if (Top.isLatencyBound(SU))
663       for (const SDep &SI : SU->Succs)
664         if (isSingleUnscheduledPred(SI.getSUnit(), SU))
665           ++NumNodesBlocking;
666   } else {
667     // How many unscheduled predecessors block this node?
668     if (Bot.isLatencyBound(SU))
669       for (const SDep &PI : SU->Preds)
670         if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
671           ++NumNodesBlocking;
672   }
673   ResCount += (NumNodesBlocking * ScaleTwo);
674 
675   LLVM_DEBUG(if (verbose) {
676     std::stringstream dbgstr;
677     dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
678     dbgs() << dbgstr.str();
679   });
680 
681   // Factor in reg pressure as a heuristic.
682   if (!IgnoreBBRegPressure) {
683     // Decrease priority by the amount that register pressure exceeds the limit.
684     ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
685     // Decrease priority if register pressure exceeds the limit.
686     ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
687     // Decrease priority slightly if register pressure would increase over the
688     // current maximum.
689     ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
690     // If there are register pressure issues, then we remove the value added for
691     // the instruction being available. The rationale is that we really don't
692     // want to schedule an instruction that causes a spill.
693     if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
694         (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
695          Delta.CurrentMax.getUnitInc()))
696       ResCount -= IsAvailableAmt;
697     LLVM_DEBUG(if (verbose) {
698       dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
699              << Delta.CriticalMax.getUnitInc() << "/"
700              << Delta.CurrentMax.getUnitInc() << ")|";
701     });
702   }
703 
704   // Give preference to a zero latency instruction if the dependent
705   // instruction is in the current packet.
706   if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
707     for (const SDep &PI : SU->Preds) {
708       if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
709           PI.getLatency() == 0 &&
710           Top.ResourceModel->isInPacket(PI.getSUnit())) {
711         ResCount += PriorityThree;
712         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
713       }
714     }
715   } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
716     for (const SDep &SI : SU->Succs) {
717       if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
718           SI.getLatency() == 0 &&
719           Bot.ResourceModel->isInPacket(SI.getSUnit())) {
720         ResCount += PriorityThree;
721         LLVM_DEBUG(if (verbose) dbgs() << "Z|");
722       }
723     }
724   }
725 
726   // If the instruction has a non-zero latency dependence with an instruction in
727   // the current packet, then it should not be scheduled yet. The case occurs
728   // when the dependent instruction is scheduled in a new packet, so the
729   // scheduler updates the current cycle and pending instructions become
730   // available.
731   if (CheckEarlyAvail) {
732     if (Q.getID() == TopQID) {
733       for (const auto &PI : SU->Preds) {
734         if (PI.getLatency() > 0 &&
735             Top.ResourceModel->isInPacket(PI.getSUnit())) {
736           ResCount -= PriorityOne;
737           LLVM_DEBUG(if (verbose) dbgs() << "D|");
738         }
739       }
740     } else {
741       for (const auto &SI : SU->Succs) {
742         if (SI.getLatency() > 0 &&
743             Bot.ResourceModel->isInPacket(SI.getSUnit())) {
744           ResCount -= PriorityOne;
745           LLVM_DEBUG(if (verbose) dbgs() << "D|");
746         }
747       }
748     }
749   }
750 
751   LLVM_DEBUG(if (verbose) {
752     std::stringstream dbgstr;
753     dbgstr << "Total " << std::setw(4) << ResCount << ")";
754     dbgs() << dbgstr.str();
755   });
756 
757   return ResCount;
758 }
759 
760 /// Pick the best candidate from the top queue.
761 ///
762 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
763 /// DAG building. To adjust for the current scheduling location we need to
764 /// maintain the number of vreg uses remaining to be top-scheduled.
765 ConvergingVLIWScheduler::CandResult
766 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
767                                            const RegPressureTracker &RPTracker,
768                                            SchedCandidate &Candidate) {
769   ReadyQueue &Q = Zone.Available;
770   LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
771                  readyQueueVerboseDump(RPTracker, Candidate, Q);
772              else Q.dump(););
773 
774   // getMaxPressureDelta temporarily modifies the tracker.
775   RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
776 
777   // BestSU remains NULL if no top candidates beat the best existing candidate.
778   CandResult FoundCandidate = NoCand;
779   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
780     RegPressureDelta RPDelta;
781     TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
782                                     DAG->getRegionCriticalPSets(),
783                                     DAG->getRegPressure().MaxSetPressure);
784 
785     int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
786 
787     // Initialize the candidate if needed.
788     if (!Candidate.SU) {
789       LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
790       Candidate.SU = *I;
791       Candidate.RPDelta = RPDelta;
792       Candidate.SCost = CurrentCost;
793       FoundCandidate = NodeOrder;
794       continue;
795     }
796 
797     // Choose node order for negative cost candidates. There is no good
798     // candidate in this case.
799     if (CurrentCost < 0 && Candidate.SCost < 0) {
800       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
801           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
802         LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
803         Candidate.SU = *I;
804         Candidate.RPDelta = RPDelta;
805         Candidate.SCost = CurrentCost;
806         FoundCandidate = NodeOrder;
807       }
808       continue;
809     }
810 
811     // Best cost.
812     if (CurrentCost > Candidate.SCost) {
813       LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
814       Candidate.SU = *I;
815       Candidate.RPDelta = RPDelta;
816       Candidate.SCost = CurrentCost;
817       FoundCandidate = BestCost;
818       continue;
819     }
820 
821     // Choose an instruction that does not depend on an artificial edge.
822     unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
823     unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
824     if (CurrWeak != CandWeak) {
825       if (CurrWeak < CandWeak) {
826         LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
827         Candidate.SU = *I;
828         Candidate.RPDelta = RPDelta;
829         Candidate.SCost = CurrentCost;
830         FoundCandidate = Weak;
831       }
832       continue;
833     }
834 
835     if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
836       unsigned CurrSize, CandSize;
837       if (Q.getID() == TopQID) {
838         CurrSize = (*I)->Succs.size();
839         CandSize = Candidate.SU->Succs.size();
840       } else {
841         CurrSize = (*I)->Preds.size();
842         CandSize = Candidate.SU->Preds.size();
843       }
844       if (CurrSize > CandSize) {
845         LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
846         Candidate.SU = *I;
847         Candidate.RPDelta = RPDelta;
848         Candidate.SCost = CurrentCost;
849         FoundCandidate = BestCost;
850       }
851       // Keep the old candidate if it's a better candidate. That is, don't use
852       // the subsequent tie breaker.
853       if (CurrSize != CandSize)
854         continue;
855     }
856 
857     // Tie breaker.
858     // To avoid scheduling indeterminism, we need a tie breaker
859     // for the case when cost is identical for two nodes.
860     if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
861       if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) ||
862           (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
863         LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
864         Candidate.SU = *I;
865         Candidate.RPDelta = RPDelta;
866         Candidate.SCost = CurrentCost;
867         FoundCandidate = NodeOrder;
868         continue;
869       }
870     }
871 
872     // Fall through to original instruction order.
873     // Only consider node order if Candidate was chosen from this Q.
874     if (FoundCandidate == NoCand)
875       continue;
876   }
877   return FoundCandidate;
878 }
879 
880 /// Pick the best candidate node from either the top or bottom queue.
881 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
882   // Schedule as far as possible in the direction of no choice. This is most
883   // efficient, but also provides the best heuristics for CriticalPSets.
884   if (SUnit *SU = Bot.pickOnlyChoice()) {
885     LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
886     IsTopNode = false;
887     return SU;
888   }
889   if (SUnit *SU = Top.pickOnlyChoice()) {
890     LLVM_DEBUG(dbgs() << "Picked only Top\n");
891     IsTopNode = true;
892     return SU;
893   }
894   SchedCandidate BotCand;
895   // Prefer bottom scheduling when heuristics are silent.
896   CandResult BotResult =
897       pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
898   assert(BotResult != NoCand && "failed to find the first candidate");
899 
900   // If either Q has a single candidate that provides the least increase in
901   // Excess pressure, we can immediately schedule from that Q.
902   //
903   // RegionCriticalPSets summarizes the pressure within the scheduled region and
904   // affects picking from either Q. If scheduling in one direction must
905   // increase pressure for one of the excess PSets, then schedule in that
906   // direction first to provide more freedom in the other direction.
907   if (BotResult == SingleExcess || BotResult == SingleCritical) {
908     LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
909     IsTopNode = false;
910     return BotCand.SU;
911   }
912   // Check if the top Q has a better candidate.
913   SchedCandidate TopCand;
914   CandResult TopResult =
915       pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
916   assert(TopResult != NoCand && "failed to find the first candidate");
917 
918   if (TopResult == SingleExcess || TopResult == SingleCritical) {
919     LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
920     IsTopNode = true;
921     return TopCand.SU;
922   }
923   // If either Q has a single candidate that minimizes pressure above the
924   // original region's pressure pick it.
925   if (BotResult == SingleMax) {
926     LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
927     IsTopNode = false;
928     return BotCand.SU;
929   }
930   if (TopResult == SingleMax) {
931     LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
932     IsTopNode = true;
933     return TopCand.SU;
934   }
935   if (TopCand.SCost > BotCand.SCost) {
936     LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
937     IsTopNode = true;
938     return TopCand.SU;
939   }
940   // Otherwise prefer the bottom candidate in node order.
941   LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
942   IsTopNode = false;
943   return BotCand.SU;
944 }
945 
946 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
947 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
948   if (DAG->top() == DAG->bottom()) {
949     assert(Top.Available.empty() && Top.Pending.empty() &&
950            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
951     return nullptr;
952   }
953   SUnit *SU;
954   if (PreRADirection == MISched::TopDown) {
955     SU = Top.pickOnlyChoice();
956     if (!SU) {
957       SchedCandidate TopCand;
958       CandResult TopResult =
959           pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
960       assert(TopResult != NoCand && "failed to find the first candidate");
961       (void)TopResult;
962       SU = TopCand.SU;
963     }
964     IsTopNode = true;
965   } else if (PreRADirection == MISched::BottomUp) {
966     SU = Bot.pickOnlyChoice();
967     if (!SU) {
968       SchedCandidate BotCand;
969       CandResult BotResult =
970           pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
971       assert(BotResult != NoCand && "failed to find the first candidate");
972       (void)BotResult;
973       SU = BotCand.SU;
974     }
975     IsTopNode = false;
976   } else {
977     SU = pickNodeBidrectional(IsTopNode);
978   }
979   if (SU->isTopReady())
980     Top.removeReady(SU);
981   if (SU->isBottomReady())
982     Bot.removeReady(SU);
983 
984   LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
985                     << " Scheduling instruction in cycle "
986                     << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
987                     << reportPackets() << ")\n";
988              DAG->dumpNode(*SU));
989   return SU;
990 }
991 
992 /// Update the scheduler's state after scheduling a node. This is the same node
993 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
994 /// to update it's state based on the current cycle before MachineSchedStrategy
995 /// does.
996 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
997   if (IsTopNode) {
998     Top.bumpNode(SU);
999     SU->TopReadyCycle = Top.CurrCycle;
1000   } else {
1001     Bot.bumpNode(SU);
1002     SU->BotReadyCycle = Bot.CurrCycle;
1003   }
1004 }
1005