xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision f1ff84c64ea7c6caeca1fd5239919aac3e624475)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
16 
17 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/CodeGen/RegisterClassInfo.h"
21 #include "llvm/CodeGen/ScheduleDAGILP.h"
22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/ADT/OwningPtr.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 
31 #include <queue>
32 
33 using namespace llvm;
34 
35 namespace llvm {
36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
37                            cl::desc("Force top-down list scheduling"));
38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
39                             cl::desc("Force bottom-up list scheduling"));
40 }
41 
42 #ifndef NDEBUG
43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
44   cl::desc("Pop up a window to show MISched dags after they are processed"));
45 
46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
47   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
48 #else
49 static bool ViewMISchedDAGs = false;
50 #endif // NDEBUG
51 
52 // Threshold to very roughly model an out-of-order processor's instruction
53 // buffers. If the actual value of this threshold matters much in practice, then
54 // it can be specified by the machine model. For now, it's an experimental
55 // tuning knob to determine when and if it matters.
56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
57   cl::desc("Allow expected latency to exceed the critical path by N cycles "
58            "before attempting to balance ILP"),
59   cl::init(10U));
60 
61 //===----------------------------------------------------------------------===//
62 // Machine Instruction Scheduling Pass and Registry
63 //===----------------------------------------------------------------------===//
64 
65 MachineSchedContext::MachineSchedContext():
66     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
67   RegClassInfo = new RegisterClassInfo();
68 }
69 
70 MachineSchedContext::~MachineSchedContext() {
71   delete RegClassInfo;
72 }
73 
74 namespace {
75 /// MachineScheduler runs after coalescing and before register allocation.
76 class MachineScheduler : public MachineSchedContext,
77                          public MachineFunctionPass {
78 public:
79   MachineScheduler();
80 
81   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
82 
83   virtual void releaseMemory() {}
84 
85   virtual bool runOnMachineFunction(MachineFunction&);
86 
87   virtual void print(raw_ostream &O, const Module* = 0) const;
88 
89   static char ID; // Class identification, replacement for typeinfo
90 };
91 } // namespace
92 
93 char MachineScheduler::ID = 0;
94 
95 char &llvm::MachineSchedulerID = MachineScheduler::ID;
96 
97 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
98                       "Machine Instruction Scheduler", false, false)
99 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
100 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
101 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
102 INITIALIZE_PASS_END(MachineScheduler, "misched",
103                     "Machine Instruction Scheduler", false, false)
104 
105 MachineScheduler::MachineScheduler()
106 : MachineFunctionPass(ID) {
107   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
108 }
109 
110 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
111   AU.setPreservesCFG();
112   AU.addRequiredID(MachineDominatorsID);
113   AU.addRequired<MachineLoopInfo>();
114   AU.addRequired<AliasAnalysis>();
115   AU.addRequired<TargetPassConfig>();
116   AU.addRequired<SlotIndexes>();
117   AU.addPreserved<SlotIndexes>();
118   AU.addRequired<LiveIntervals>();
119   AU.addPreserved<LiveIntervals>();
120   MachineFunctionPass::getAnalysisUsage(AU);
121 }
122 
123 MachinePassRegistry MachineSchedRegistry::Registry;
124 
125 /// A dummy default scheduler factory indicates whether the scheduler
126 /// is overridden on the command line.
127 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
128   return 0;
129 }
130 
131 /// MachineSchedOpt allows command line selection of the scheduler.
132 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
133                RegisterPassParser<MachineSchedRegistry> >
134 MachineSchedOpt("misched",
135                 cl::init(&useDefaultMachineSched), cl::Hidden,
136                 cl::desc("Machine instruction scheduler to use"));
137 
138 static MachineSchedRegistry
139 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
140                      useDefaultMachineSched);
141 
142 /// Forward declare the standard machine scheduler. This will be used as the
143 /// default scheduler if the target does not set a default.
144 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
145 
146 
147 /// Decrement this iterator until reaching the top or a non-debug instr.
148 static MachineBasicBlock::iterator
149 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
150   assert(I != Beg && "reached the top of the region, cannot decrement");
151   while (--I != Beg) {
152     if (!I->isDebugValue())
153       break;
154   }
155   return I;
156 }
157 
158 /// If this iterator is a debug value, increment until reaching the End or a
159 /// non-debug instruction.
160 static MachineBasicBlock::iterator
161 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
162   for(; I != End; ++I) {
163     if (!I->isDebugValue())
164       break;
165   }
166   return I;
167 }
168 
169 /// Top-level MachineScheduler pass driver.
170 ///
171 /// Visit blocks in function order. Divide each block into scheduling regions
172 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
173 /// consistent with the DAG builder, which traverses the interior of the
174 /// scheduling regions bottom-up.
175 ///
176 /// This design avoids exposing scheduling boundaries to the DAG builder,
177 /// simplifying the DAG builder's support for "special" target instructions.
178 /// At the same time the design allows target schedulers to operate across
179 /// scheduling boundaries, for example to bundle the boudary instructions
180 /// without reordering them. This creates complexity, because the target
181 /// scheduler must update the RegionBegin and RegionEnd positions cached by
182 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
183 /// design would be to split blocks at scheduling boundaries, but LLVM has a
184 /// general bias against block splitting purely for implementation simplicity.
185 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
186   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
187 
188   // Initialize the context of the pass.
189   MF = &mf;
190   MLI = &getAnalysis<MachineLoopInfo>();
191   MDT = &getAnalysis<MachineDominatorTree>();
192   PassConfig = &getAnalysis<TargetPassConfig>();
193   AA = &getAnalysis<AliasAnalysis>();
194 
195   LIS = &getAnalysis<LiveIntervals>();
196   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
197 
198   RegClassInfo->runOnMachineFunction(*MF);
199 
200   // Select the scheduler, or set the default.
201   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
202   if (Ctor == useDefaultMachineSched) {
203     // Get the default scheduler set by the target.
204     Ctor = MachineSchedRegistry::getDefault();
205     if (!Ctor) {
206       Ctor = createConvergingSched;
207       MachineSchedRegistry::setDefault(Ctor);
208     }
209   }
210   // Instantiate the selected scheduler.
211   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
212 
213   // Visit all machine basic blocks.
214   //
215   // TODO: Visit blocks in global postorder or postorder within the bottom-up
216   // loop tree. Then we can optionally compute global RegPressure.
217   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
218        MBB != MBBEnd; ++MBB) {
219 
220     Scheduler->startBlock(MBB);
221 
222     // Break the block into scheduling regions [I, RegionEnd), and schedule each
223     // region as soon as it is discovered. RegionEnd points the scheduling
224     // boundary at the bottom of the region. The DAG does not include RegionEnd,
225     // but the region does (i.e. the next RegionEnd is above the previous
226     // RegionBegin). If the current block has no terminator then RegionEnd ==
227     // MBB->end() for the bottom region.
228     //
229     // The Scheduler may insert instructions during either schedule() or
230     // exitRegion(), even for empty regions. So the local iterators 'I' and
231     // 'RegionEnd' are invalid across these calls.
232     unsigned RemainingInstrs = MBB->size();
233     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
234         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
235 
236       // Avoid decrementing RegionEnd for blocks with no terminator.
237       if (RegionEnd != MBB->end()
238           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
239         --RegionEnd;
240         // Count the boundary instruction.
241         --RemainingInstrs;
242       }
243 
244       // The next region starts above the previous region. Look backward in the
245       // instruction stream until we find the nearest boundary.
246       MachineBasicBlock::iterator I = RegionEnd;
247       for(;I != MBB->begin(); --I, --RemainingInstrs) {
248         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
249           break;
250       }
251       // Notify the scheduler of the region, even if we may skip scheduling
252       // it. Perhaps it still needs to be bundled.
253       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
254 
255       // Skip empty scheduling regions (0 or 1 schedulable instructions).
256       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
257         // Close the current region. Bundle the terminator if needed.
258         // This invalidates 'RegionEnd' and 'I'.
259         Scheduler->exitRegion();
260         continue;
261       }
262       DEBUG(dbgs() << "********** MI Scheduling **********\n");
263       DEBUG(dbgs() << MF->getName()
264             << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
265             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
266             else dbgs() << "End";
267             dbgs() << " Remaining: " << RemainingInstrs << "\n");
268 
269       // Schedule a region: possibly reorder instructions.
270       // This invalidates 'RegionEnd' and 'I'.
271       Scheduler->schedule();
272 
273       // Close the current region.
274       Scheduler->exitRegion();
275 
276       // Scheduling has invalidated the current iterator 'I'. Ask the
277       // scheduler for the top of it's scheduled region.
278       RegionEnd = Scheduler->begin();
279     }
280     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
281     Scheduler->finishBlock();
282   }
283   Scheduler->finalizeSchedule();
284   DEBUG(LIS->print(dbgs()));
285   return true;
286 }
287 
288 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
289   // unimplemented
290 }
291 
292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
293 void ReadyQueue::dump() {
294   dbgs() << Name << ": ";
295   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
296     dbgs() << Queue[i]->NodeNum << " ";
297   dbgs() << "\n";
298 }
299 #endif
300 
301 //===----------------------------------------------------------------------===//
302 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
303 // preservation.
304 //===----------------------------------------------------------------------===//
305 
306 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
307 /// NumPredsLeft reaches zero, release the successor node.
308 ///
309 /// FIXME: Adjust SuccSU height based on MinLatency.
310 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
311   SUnit *SuccSU = SuccEdge->getSUnit();
312 
313   if (SuccEdge->isWeak()) {
314     --SuccSU->WeakPredsLeft;
315     return;
316   }
317 #ifndef NDEBUG
318   if (SuccSU->NumPredsLeft == 0) {
319     dbgs() << "*** Scheduling failed! ***\n";
320     SuccSU->dump(this);
321     dbgs() << " has been released too many times!\n";
322     llvm_unreachable(0);
323   }
324 #endif
325   --SuccSU->NumPredsLeft;
326   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
327     SchedImpl->releaseTopNode(SuccSU);
328 }
329 
330 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
331 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
332   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
333        I != E; ++I) {
334     releaseSucc(SU, &*I);
335   }
336 }
337 
338 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
339 /// NumSuccsLeft reaches zero, release the predecessor node.
340 ///
341 /// FIXME: Adjust PredSU height based on MinLatency.
342 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
343   SUnit *PredSU = PredEdge->getSUnit();
344 
345   if (PredEdge->isWeak()) {
346     --PredSU->WeakSuccsLeft;
347     return;
348   }
349 #ifndef NDEBUG
350   if (PredSU->NumSuccsLeft == 0) {
351     dbgs() << "*** Scheduling failed! ***\n";
352     PredSU->dump(this);
353     dbgs() << " has been released too many times!\n";
354     llvm_unreachable(0);
355   }
356 #endif
357   --PredSU->NumSuccsLeft;
358   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
359     SchedImpl->releaseBottomNode(PredSU);
360 }
361 
362 /// releasePredecessors - Call releasePred on each of SU's predecessors.
363 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
364   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
365        I != E; ++I) {
366     releasePred(SU, &*I);
367   }
368 }
369 
370 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
371                                     MachineBasicBlock::iterator InsertPos) {
372   // Advance RegionBegin if the first instruction moves down.
373   if (&*RegionBegin == MI)
374     ++RegionBegin;
375 
376   // Update the instruction stream.
377   BB->splice(InsertPos, BB, MI);
378 
379   // Update LiveIntervals
380   LIS->handleMove(MI, /*UpdateFlags=*/true);
381 
382   // Recede RegionBegin if an instruction moves above the first.
383   if (RegionBegin == InsertPos)
384     RegionBegin = MI;
385 }
386 
387 bool ScheduleDAGMI::checkSchedLimit() {
388 #ifndef NDEBUG
389   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
390     CurrentTop = CurrentBottom;
391     return false;
392   }
393   ++NumInstrsScheduled;
394 #endif
395   return true;
396 }
397 
398 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
399 /// crossing a scheduling boundary. [begin, end) includes all instructions in
400 /// the region, including the boundary itself and single-instruction regions
401 /// that don't get scheduled.
402 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
403                                 MachineBasicBlock::iterator begin,
404                                 MachineBasicBlock::iterator end,
405                                 unsigned endcount)
406 {
407   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
408 
409   // For convenience remember the end of the liveness region.
410   LiveRegionEnd =
411     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
412 }
413 
414 // Setup the register pressure trackers for the top scheduled top and bottom
415 // scheduled regions.
416 void ScheduleDAGMI::initRegPressure() {
417   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
418   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
419 
420   // Close the RPTracker to finalize live ins.
421   RPTracker.closeRegion();
422 
423   DEBUG(RPTracker.getPressure().dump(TRI));
424 
425   // Initialize the live ins and live outs.
426   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
427   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
428 
429   // Close one end of the tracker so we can call
430   // getMaxUpward/DownwardPressureDelta before advancing across any
431   // instructions. This converts currently live regs into live ins/outs.
432   TopRPTracker.closeTop();
433   BotRPTracker.closeBottom();
434 
435   // Account for liveness generated by the region boundary.
436   if (LiveRegionEnd != RegionEnd)
437     BotRPTracker.recede();
438 
439   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
440 
441   // Cache the list of excess pressure sets in this region. This will also track
442   // the max pressure in the scheduled code for these sets.
443   RegionCriticalPSets.clear();
444   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
445   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
446     unsigned Limit = TRI->getRegPressureSetLimit(i);
447     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
448           << "Limit " << Limit
449           << " Actual " << RegionPressure[i] << "\n");
450     if (RegionPressure[i] > Limit)
451       RegionCriticalPSets.push_back(PressureElement(i, 0));
452   }
453   DEBUG(dbgs() << "Excess PSets: ";
454         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
455           dbgs() << TRI->getRegPressureSetName(
456             RegionCriticalPSets[i].PSetID) << " ";
457         dbgs() << "\n");
458 }
459 
460 // FIXME: When the pressure tracker deals in pressure differences then we won't
461 // iterate over all RegionCriticalPSets[i].
462 void ScheduleDAGMI::
463 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
464   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
465     unsigned ID = RegionCriticalPSets[i].PSetID;
466     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
467     if ((int)NewMaxPressure[ID] > MaxUnits)
468       MaxUnits = NewMaxPressure[ID];
469   }
470 }
471 
472 /// schedule - Called back from MachineScheduler::runOnMachineFunction
473 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
474 /// only includes instructions that have DAG nodes, not scheduling boundaries.
475 ///
476 /// This is a skeletal driver, with all the functionality pushed into helpers,
477 /// so that it can be easilly extended by experimental schedulers. Generally,
478 /// implementing MachineSchedStrategy should be sufficient to implement a new
479 /// scheduling algorithm. However, if a scheduler further subclasses
480 /// ScheduleDAGMI then it will want to override this virtual method in order to
481 /// update any specialized state.
482 void ScheduleDAGMI::schedule() {
483   buildDAGWithRegPressure();
484 
485   postprocessDAG();
486 
487   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
488           SUnits[su].dumpAll(this));
489 
490   if (ViewMISchedDAGs) viewGraph();
491 
492   initQueues();
493 
494   bool IsTopNode = false;
495   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
496     assert(!SU->isScheduled && "Node already scheduled");
497     if (!checkSchedLimit())
498       break;
499 
500     scheduleMI(SU, IsTopNode);
501 
502     updateQueues(SU, IsTopNode);
503   }
504   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
505 
506   placeDebugValues();
507 
508   DEBUG({
509       unsigned BBNum = top()->getParent()->getNumber();
510       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
511       dumpSchedule();
512       dbgs() << '\n';
513     });
514 }
515 
516 /// Build the DAG and setup three register pressure trackers.
517 void ScheduleDAGMI::buildDAGWithRegPressure() {
518   // Initialize the register pressure tracker used by buildSchedGraph.
519   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
520 
521   // Account for liveness generate by the region boundary.
522   if (LiveRegionEnd != RegionEnd)
523     RPTracker.recede();
524 
525   // Build the DAG, and compute current register pressure.
526   buildSchedGraph(AA, &RPTracker);
527   if (ViewMISchedDAGs) viewGraph();
528 
529   // Initialize top/bottom trackers after computing region pressure.
530   initRegPressure();
531 }
532 
533 /// Apply each ScheduleDAGMutation step in order.
534 void ScheduleDAGMI::postprocessDAG() {
535   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
536     Mutations[i]->apply(this);
537   }
538 }
539 
540 // Release all DAG roots for scheduling.
541 //
542 // Nodes with unreleased weak edges can still be roots.
543 void ScheduleDAGMI::releaseRoots() {
544   SmallVector<SUnit*, 16> BotRoots;
545 
546   for (std::vector<SUnit>::iterator
547          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
548     SUnit *SU = &(*I);
549     // A SUnit is ready to top schedule if it has no predecessors.
550     if (!I->NumPredsLeft && SU != &EntrySU)
551       SchedImpl->releaseTopNode(SU);
552     // A SUnit is ready to bottom schedule if it has no successors.
553     if (!I->NumSuccsLeft && SU != &ExitSU)
554       BotRoots.push_back(SU);
555   }
556   // Release bottom roots in reverse order so the higher priority nodes appear
557   // first. This is more natural and slightly more efficient.
558   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
559          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
560     SchedImpl->releaseBottomNode(*I);
561 }
562 
563 /// Identify DAG roots and setup scheduler queues.
564 void ScheduleDAGMI::initQueues() {
565 
566   // Initialize the strategy before modifying the DAG.
567   SchedImpl->initialize(this);
568 
569   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
570   releaseRoots();
571 
572   releaseSuccessors(&EntrySU);
573   releasePredecessors(&ExitSU);
574 
575   SchedImpl->registerRoots();
576 
577   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
578   CurrentBottom = RegionEnd;
579 }
580 
581 /// Move an instruction and update register pressure.
582 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
583   // Move the instruction to its new location in the instruction stream.
584   MachineInstr *MI = SU->getInstr();
585 
586   if (IsTopNode) {
587     assert(SU->isTopReady() && "node still has unscheduled dependencies");
588     if (&*CurrentTop == MI)
589       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
590     else {
591       moveInstruction(MI, CurrentTop);
592       TopRPTracker.setPos(MI);
593     }
594 
595     // Update top scheduled pressure.
596     TopRPTracker.advance();
597     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
598     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
599   }
600   else {
601     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
602     MachineBasicBlock::iterator priorII =
603       priorNonDebug(CurrentBottom, CurrentTop);
604     if (&*priorII == MI)
605       CurrentBottom = priorII;
606     else {
607       if (&*CurrentTop == MI) {
608         CurrentTop = nextIfDebug(++CurrentTop, priorII);
609         TopRPTracker.setPos(CurrentTop);
610       }
611       moveInstruction(MI, CurrentBottom);
612       CurrentBottom = MI;
613     }
614     // Update bottom scheduled pressure.
615     BotRPTracker.recede();
616     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
617     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
618   }
619 }
620 
621 /// Update scheduler queues after scheduling an instruction.
622 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
623   // Release dependent instructions for scheduling.
624   if (IsTopNode)
625     releaseSuccessors(SU);
626   else
627     releasePredecessors(SU);
628 
629   SU->isScheduled = true;
630 
631   // Notify the scheduling strategy after updating the DAG.
632   SchedImpl->schedNode(SU, IsTopNode);
633 }
634 
635 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
636 void ScheduleDAGMI::placeDebugValues() {
637   // If first instruction was a DBG_VALUE then put it back.
638   if (FirstDbgValue) {
639     BB->splice(RegionBegin, BB, FirstDbgValue);
640     RegionBegin = FirstDbgValue;
641   }
642 
643   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
644          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
645     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
646     MachineInstr *DbgValue = P.first;
647     MachineBasicBlock::iterator OrigPrevMI = P.second;
648     BB->splice(++OrigPrevMI, BB, DbgValue);
649     if (OrigPrevMI == llvm::prior(RegionEnd))
650       RegionEnd = DbgValue;
651   }
652   DbgValues.clear();
653   FirstDbgValue = NULL;
654 }
655 
656 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
657 void ScheduleDAGMI::dumpSchedule() const {
658   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
659     if (SUnit *SU = getSUnit(&(*MI)))
660       SU->dump(this);
661     else
662       dbgs() << "Missing SUnit\n";
663   }
664 }
665 #endif
666 
667 //===----------------------------------------------------------------------===//
668 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
669 //===----------------------------------------------------------------------===//
670 
671 namespace {
672 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
673 /// the schedule.
674 class ConvergingScheduler : public MachineSchedStrategy {
675 public:
676   /// Represent the type of SchedCandidate found within a single queue.
677   /// pickNodeBidirectional depends on these listed by decreasing priority.
678   enum CandReason {
679     NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand,
680     BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce,
681     SingleMax, MultiPressure, NextDefUse, NodeOrder};
682 
683 #ifndef NDEBUG
684   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
685 #endif
686 
687   /// Policy for scheduling the next instruction in the candidate's zone.
688   struct CandPolicy {
689     bool ReduceLatency;
690     unsigned ReduceResIdx;
691     unsigned DemandResIdx;
692 
693     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
694   };
695 
696   /// Status of an instruction's critical resource consumption.
697   struct SchedResourceDelta {
698     // Count critical resources in the scheduled region required by SU.
699     unsigned CritResources;
700 
701     // Count critical resources from another region consumed by SU.
702     unsigned DemandedResources;
703 
704     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
705 
706     bool operator==(const SchedResourceDelta &RHS) const {
707       return CritResources == RHS.CritResources
708         && DemandedResources == RHS.DemandedResources;
709     }
710     bool operator!=(const SchedResourceDelta &RHS) const {
711       return !operator==(RHS);
712     }
713   };
714 
715   /// Store the state used by ConvergingScheduler heuristics, required for the
716   /// lifetime of one invocation of pickNode().
717   struct SchedCandidate {
718     CandPolicy Policy;
719 
720     // The best SUnit candidate.
721     SUnit *SU;
722 
723     // The reason for this candidate.
724     CandReason Reason;
725 
726     // Register pressure values for the best candidate.
727     RegPressureDelta RPDelta;
728 
729     // Critical resource consumption of the best candidate.
730     SchedResourceDelta ResDelta;
731 
732     SchedCandidate(const CandPolicy &policy)
733     : Policy(policy), SU(NULL), Reason(NoCand) {}
734 
735     bool isValid() const { return SU; }
736 
737     // Copy the status of another candidate without changing policy.
738     void setBest(SchedCandidate &Best) {
739       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
740       SU = Best.SU;
741       Reason = Best.Reason;
742       RPDelta = Best.RPDelta;
743       ResDelta = Best.ResDelta;
744     }
745 
746     void initResourceDelta(const ScheduleDAGMI *DAG,
747                            const TargetSchedModel *SchedModel);
748   };
749 
750   /// Summarize the unscheduled region.
751   struct SchedRemainder {
752     // Critical path through the DAG in expected latency.
753     unsigned CriticalPath;
754 
755     // Unscheduled resources
756     SmallVector<unsigned, 16> RemainingCounts;
757     // Critical resource for the unscheduled zone.
758     unsigned CritResIdx;
759     // Number of micro-ops left to schedule.
760     unsigned RemainingMicroOps;
761     // Is the unscheduled zone resource limited.
762     bool IsResourceLimited;
763 
764     unsigned MaxRemainingCount;
765 
766     void reset() {
767       CriticalPath = 0;
768       RemainingCounts.clear();
769       CritResIdx = 0;
770       RemainingMicroOps = 0;
771       IsResourceLimited = false;
772       MaxRemainingCount = 0;
773     }
774 
775     SchedRemainder() { reset(); }
776 
777     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
778   };
779 
780   /// Each Scheduling boundary is associated with ready queues. It tracks the
781   /// current cycle in the direction of movement, and maintains the state
782   /// of "hazards" and other interlocks at the current cycle.
783   struct SchedBoundary {
784     ScheduleDAGMI *DAG;
785     const TargetSchedModel *SchedModel;
786     SchedRemainder *Rem;
787 
788     ReadyQueue Available;
789     ReadyQueue Pending;
790     bool CheckPending;
791 
792     // For heuristics, keep a list of the nodes that immediately depend on the
793     // most recently scheduled node.
794     SmallPtrSet<const SUnit*, 8> NextSUs;
795 
796     ScheduleHazardRecognizer *HazardRec;
797 
798     unsigned CurrCycle;
799     unsigned IssueCount;
800 
801     /// MinReadyCycle - Cycle of the soonest available instruction.
802     unsigned MinReadyCycle;
803 
804     // The expected latency of the critical path in this scheduled zone.
805     unsigned ExpectedLatency;
806 
807     // Resources used in the scheduled zone beyond this boundary.
808     SmallVector<unsigned, 16> ResourceCounts;
809 
810     // Cache the critical resources ID in this scheduled zone.
811     unsigned CritResIdx;
812 
813     // Is the scheduled region resource limited vs. latency limited.
814     bool IsResourceLimited;
815 
816     unsigned ExpectedCount;
817 
818     // Policy flag: attempt to find ILP until expected latency is covered.
819     bool ShouldIncreaseILP;
820 
821 #ifndef NDEBUG
822     // Remember the greatest min operand latency.
823     unsigned MaxMinLatency;
824 #endif
825 
826     void reset() {
827       Available.clear();
828       Pending.clear();
829       CheckPending = false;
830       NextSUs.clear();
831       HazardRec = 0;
832       CurrCycle = 0;
833       IssueCount = 0;
834       MinReadyCycle = UINT_MAX;
835       ExpectedLatency = 0;
836       ResourceCounts.resize(1);
837       assert(!ResourceCounts[0] && "nonzero count for bad resource");
838       CritResIdx = 0;
839       IsResourceLimited = false;
840       ExpectedCount = 0;
841       ShouldIncreaseILP = false;
842 #ifndef NDEBUG
843       MaxMinLatency = 0;
844 #endif
845       // Reserve a zero-count for invalid CritResIdx.
846       ResourceCounts.resize(1);
847     }
848 
849     /// Pending queues extend the ready queues with the same ID and the
850     /// PendingFlag set.
851     SchedBoundary(unsigned ID, const Twine &Name):
852       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
853       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
854       reset();
855     }
856 
857     ~SchedBoundary() { delete HazardRec; }
858 
859     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
860               SchedRemainder *rem);
861 
862     bool isTop() const {
863       return Available.getID() == ConvergingScheduler::TopQID;
864     }
865 
866     unsigned getUnscheduledLatency(SUnit *SU) const {
867       if (isTop())
868         return SU->getHeight();
869       return SU->getDepth();
870     }
871 
872     unsigned getCriticalCount() const {
873       return ResourceCounts[CritResIdx];
874     }
875 
876     bool checkHazard(SUnit *SU);
877 
878     void checkILPPolicy();
879 
880     void releaseNode(SUnit *SU, unsigned ReadyCycle);
881 
882     void bumpCycle();
883 
884     void countResource(unsigned PIdx, unsigned Cycles);
885 
886     void bumpNode(SUnit *SU);
887 
888     void releasePending();
889 
890     void removeReady(SUnit *SU);
891 
892     SUnit *pickOnlyChoice();
893   };
894 
895 private:
896   ScheduleDAGMI *DAG;
897   const TargetSchedModel *SchedModel;
898   const TargetRegisterInfo *TRI;
899 
900   // State of the top and bottom scheduled instruction boundaries.
901   SchedRemainder Rem;
902   SchedBoundary Top;
903   SchedBoundary Bot;
904 
905 public:
906   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
907   enum {
908     TopQID = 1,
909     BotQID = 2,
910     LogMaxQID = 2
911   };
912 
913   ConvergingScheduler():
914     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
915 
916   virtual void initialize(ScheduleDAGMI *dag);
917 
918   virtual SUnit *pickNode(bool &IsTopNode);
919 
920   virtual void schedNode(SUnit *SU, bool IsTopNode);
921 
922   virtual void releaseTopNode(SUnit *SU);
923 
924   virtual void releaseBottomNode(SUnit *SU);
925 
926   virtual void registerRoots();
927 
928 protected:
929   void balanceZones(
930     ConvergingScheduler::SchedBoundary &CriticalZone,
931     ConvergingScheduler::SchedCandidate &CriticalCand,
932     ConvergingScheduler::SchedBoundary &OppositeZone,
933     ConvergingScheduler::SchedCandidate &OppositeCand);
934 
935   void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
936                            ConvergingScheduler::SchedCandidate &BotCand);
937 
938   void tryCandidate(SchedCandidate &Cand,
939                     SchedCandidate &TryCand,
940                     SchedBoundary &Zone,
941                     const RegPressureTracker &RPTracker,
942                     RegPressureTracker &TempTracker);
943 
944   SUnit *pickNodeBidirectional(bool &IsTopNode);
945 
946   void pickNodeFromQueue(SchedBoundary &Zone,
947                          const RegPressureTracker &RPTracker,
948                          SchedCandidate &Candidate);
949 
950 #ifndef NDEBUG
951   void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
952 #endif
953 };
954 } // namespace
955 
956 void ConvergingScheduler::SchedRemainder::
957 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
958   reset();
959   if (!SchedModel->hasInstrSchedModel())
960     return;
961   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
962   for (std::vector<SUnit>::iterator
963          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
964     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
965     RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
966     for (TargetSchedModel::ProcResIter
967            PI = SchedModel->getWriteProcResBegin(SC),
968            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
969       unsigned PIdx = PI->ProcResourceIdx;
970       unsigned Factor = SchedModel->getResourceFactor(PIdx);
971       RemainingCounts[PIdx] += (Factor * PI->Cycles);
972     }
973   }
974 }
975 
976 void ConvergingScheduler::SchedBoundary::
977 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
978   reset();
979   DAG = dag;
980   SchedModel = smodel;
981   Rem = rem;
982   if (SchedModel->hasInstrSchedModel())
983     ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
984 }
985 
986 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
987   DAG = dag;
988   SchedModel = DAG->getSchedModel();
989   TRI = DAG->TRI;
990   Rem.init(DAG, SchedModel);
991   Top.init(DAG, SchedModel, &Rem);
992   Bot.init(DAG, SchedModel, &Rem);
993 
994   // Initialize resource counts.
995 
996   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
997   // are disabled, then these HazardRecs will be disabled.
998   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
999   const TargetMachine &TM = DAG->MF.getTarget();
1000   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1001   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1002 
1003   assert((!ForceTopDown || !ForceBottomUp) &&
1004          "-misched-topdown incompatible with -misched-bottomup");
1005 }
1006 
1007 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1008   if (SU->isScheduled)
1009     return;
1010 
1011   for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1012        I != E; ++I) {
1013     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1014     unsigned MinLatency = I->getMinLatency();
1015 #ifndef NDEBUG
1016     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1017 #endif
1018     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1019       SU->TopReadyCycle = PredReadyCycle + MinLatency;
1020   }
1021   Top.releaseNode(SU, SU->TopReadyCycle);
1022 }
1023 
1024 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1025   if (SU->isScheduled)
1026     return;
1027 
1028   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1029 
1030   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1031        I != E; ++I) {
1032     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1033     unsigned MinLatency = I->getMinLatency();
1034 #ifndef NDEBUG
1035     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1036 #endif
1037     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1038       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1039   }
1040   Bot.releaseNode(SU, SU->BotReadyCycle);
1041 }
1042 
1043 void ConvergingScheduler::registerRoots() {
1044   Rem.CriticalPath = DAG->ExitSU.getDepth();
1045   // Some roots may not feed into ExitSU. Check all of them in case.
1046   for (std::vector<SUnit*>::const_iterator
1047          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1048     if ((*I)->getDepth() > Rem.CriticalPath)
1049       Rem.CriticalPath = (*I)->getDepth();
1050   }
1051   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1052 }
1053 
1054 /// Does this SU have a hazard within the current instruction group.
1055 ///
1056 /// The scheduler supports two modes of hazard recognition. The first is the
1057 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1058 /// supports highly complicated in-order reservation tables
1059 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1060 ///
1061 /// The second is a streamlined mechanism that checks for hazards based on
1062 /// simple counters that the scheduler itself maintains. It explicitly checks
1063 /// for instruction dispatch limitations, including the number of micro-ops that
1064 /// can dispatch per cycle.
1065 ///
1066 /// TODO: Also check whether the SU must start a new group.
1067 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1068   if (HazardRec->isEnabled())
1069     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1070 
1071   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1072   if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1073     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1074           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1075     return true;
1076   }
1077   return false;
1078 }
1079 
1080 /// If expected latency is covered, disable ILP policy.
1081 void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
1082   if (ShouldIncreaseILP
1083       && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
1084     ShouldIncreaseILP = false;
1085     DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
1086   }
1087 }
1088 
1089 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1090                                                      unsigned ReadyCycle) {
1091 
1092   if (ReadyCycle < MinReadyCycle)
1093     MinReadyCycle = ReadyCycle;
1094 
1095   // Check for interlocks first. For the purpose of other heuristics, an
1096   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1097   if (ReadyCycle > CurrCycle || checkHazard(SU))
1098     Pending.push(SU);
1099   else
1100     Available.push(SU);
1101 
1102   // Record this node as an immediate dependent of the scheduled node.
1103   NextSUs.insert(SU);
1104 
1105   // If CriticalPath has been computed, then check if the unscheduled nodes
1106   // exceed the ILP window. Before registerRoots, CriticalPath==0.
1107   if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
1108                             > Rem->CriticalPath + ILPWindow)) {
1109     ShouldIncreaseILP = true;
1110     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
1111           << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
1112   }
1113 }
1114 
1115 /// Move the boundary of scheduled code by one cycle.
1116 void ConvergingScheduler::SchedBoundary::bumpCycle() {
1117   unsigned Width = SchedModel->getIssueWidth();
1118   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1119 
1120   unsigned NextCycle = CurrCycle + 1;
1121   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1122   if (MinReadyCycle > NextCycle) {
1123     IssueCount = 0;
1124     NextCycle = MinReadyCycle;
1125   }
1126 
1127   if (!HazardRec->isEnabled()) {
1128     // Bypass HazardRec virtual calls.
1129     CurrCycle = NextCycle;
1130   }
1131   else {
1132     // Bypass getHazardType calls in case of long latency.
1133     for (; CurrCycle != NextCycle; ++CurrCycle) {
1134       if (isTop())
1135         HazardRec->AdvanceCycle();
1136       else
1137         HazardRec->RecedeCycle();
1138     }
1139   }
1140   CheckPending = true;
1141   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1142 
1143   DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1144         << CurrCycle << '\n');
1145 }
1146 
1147 /// Add the given processor resource to this scheduled zone.
1148 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1149                                                        unsigned Cycles) {
1150   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1151   DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1152         << " +(" << Cycles << "x" << Factor
1153         << ") / " << SchedModel->getLatencyFactor() << '\n');
1154 
1155   unsigned Count = Factor * Cycles;
1156   ResourceCounts[PIdx] += Count;
1157   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1158   Rem->RemainingCounts[PIdx] -= Count;
1159 
1160   // Reset MaxRemainingCount for sanity.
1161   Rem->MaxRemainingCount = 0;
1162 
1163   // Check if this resource exceeds the current critical resource by a full
1164   // cycle. If so, it becomes the critical resource.
1165   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1166       >= (int)SchedModel->getLatencyFactor()) {
1167     CritResIdx = PIdx;
1168     DEBUG(dbgs() << "  *** Critical resource "
1169           << SchedModel->getProcResource(PIdx)->Name << " x"
1170           << ResourceCounts[PIdx] << '\n');
1171   }
1172 }
1173 
1174 /// Move the boundary of scheduled code by one SUnit.
1175 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1176   // Update the reservation table.
1177   if (HazardRec->isEnabled()) {
1178     if (!isTop() && SU->isCall) {
1179       // Calls are scheduled with their preceding instructions. For bottom-up
1180       // scheduling, clear the pipeline state before emitting.
1181       HazardRec->Reset();
1182     }
1183     HazardRec->EmitInstruction(SU);
1184   }
1185   // Update resource counts and critical resource.
1186   if (SchedModel->hasInstrSchedModel()) {
1187     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1188     Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1189     for (TargetSchedModel::ProcResIter
1190            PI = SchedModel->getWriteProcResBegin(SC),
1191            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1192       countResource(PI->ProcResourceIdx, PI->Cycles);
1193     }
1194   }
1195   if (isTop()) {
1196     if (SU->getDepth() > ExpectedLatency)
1197       ExpectedLatency = SU->getDepth();
1198   }
1199   else {
1200     if (SU->getHeight() > ExpectedLatency)
1201       ExpectedLatency = SU->getHeight();
1202   }
1203 
1204   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1205 
1206   // Check the instruction group dispatch limit.
1207   // TODO: Check if this SU must end a dispatch group.
1208   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1209 
1210   // checkHazard prevents scheduling multiple instructions per cycle that exceed
1211   // issue width. However, we commonly reach the maximum. In this case
1212   // opportunistically bump the cycle to avoid uselessly checking everything in
1213   // the readyQ. Furthermore, a single instruction may produce more than one
1214   // cycle's worth of micro-ops.
1215   if (IssueCount >= SchedModel->getIssueWidth()) {
1216     DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1217     bumpCycle();
1218   }
1219 }
1220 
1221 /// Release pending ready nodes in to the available queue. This makes them
1222 /// visible to heuristics.
1223 void ConvergingScheduler::SchedBoundary::releasePending() {
1224   // If the available queue is empty, it is safe to reset MinReadyCycle.
1225   if (Available.empty())
1226     MinReadyCycle = UINT_MAX;
1227 
1228   // Check to see if any of the pending instructions are ready to issue.  If
1229   // so, add them to the available queue.
1230   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1231     SUnit *SU = *(Pending.begin()+i);
1232     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1233 
1234     if (ReadyCycle < MinReadyCycle)
1235       MinReadyCycle = ReadyCycle;
1236 
1237     if (ReadyCycle > CurrCycle)
1238       continue;
1239 
1240     if (checkHazard(SU))
1241       continue;
1242 
1243     Available.push(SU);
1244     Pending.remove(Pending.begin()+i);
1245     --i; --e;
1246   }
1247   DEBUG(if (!Pending.empty()) Pending.dump());
1248   CheckPending = false;
1249 }
1250 
1251 /// Remove SU from the ready set for this boundary.
1252 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1253   if (Available.isInQueue(SU))
1254     Available.remove(Available.find(SU));
1255   else {
1256     assert(Pending.isInQueue(SU) && "bad ready count");
1257     Pending.remove(Pending.find(SU));
1258   }
1259 }
1260 
1261 /// If this queue only has one ready candidate, return it. As a side effect,
1262 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1263 /// one node is ready. If multiple instructions are ready, return NULL.
1264 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1265   if (CheckPending)
1266     releasePending();
1267 
1268   if (IssueCount > 0) {
1269     // Defer any ready instrs that now have a hazard.
1270     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1271       if (checkHazard(*I)) {
1272         Pending.push(*I);
1273         I = Available.remove(I);
1274         continue;
1275       }
1276       ++I;
1277     }
1278   }
1279   for (unsigned i = 0; Available.empty(); ++i) {
1280     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1281            "permanent hazard"); (void)i;
1282     bumpCycle();
1283     releasePending();
1284   }
1285   if (Available.size() == 1)
1286     return *Available.begin();
1287   return NULL;
1288 }
1289 
1290 /// Record the candidate policy for opposite zones with different critical
1291 /// resources.
1292 ///
1293 /// If the CriticalZone is latency limited, don't force a policy for the
1294 /// candidates here. Instead, When releasing each candidate, releaseNode
1295 /// compares the region's critical path to the candidate's height or depth and
1296 /// the scheduled zone's expected latency then sets ShouldIncreaseILP.
1297 void ConvergingScheduler::balanceZones(
1298   ConvergingScheduler::SchedBoundary &CriticalZone,
1299   ConvergingScheduler::SchedCandidate &CriticalCand,
1300   ConvergingScheduler::SchedBoundary &OppositeZone,
1301   ConvergingScheduler::SchedCandidate &OppositeCand) {
1302 
1303   if (!CriticalZone.IsResourceLimited)
1304     return;
1305 
1306   SchedRemainder *Rem = CriticalZone.Rem;
1307 
1308   // If the critical zone is overconsuming a resource relative to the
1309   // remainder, try to reduce it.
1310   unsigned RemainingCritCount =
1311     Rem->RemainingCounts[CriticalZone.CritResIdx];
1312   if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
1313       > (int)SchedModel->getLatencyFactor()) {
1314     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1315     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1316           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1317           << '\n');
1318   }
1319   // If the other zone is underconsuming a resource relative to the full zone,
1320   // try to increase it.
1321   unsigned OppositeCount =
1322     OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1323   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1324       > (int)SchedModel->getLatencyFactor()) {
1325     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1326     DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1327           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1328           << '\n');
1329   }
1330 }
1331 
1332 /// Determine if the scheduled zones exceed resource limits or critical path and
1333 /// set each candidate's ReduceHeight policy accordingly.
1334 void ConvergingScheduler::checkResourceLimits(
1335   ConvergingScheduler::SchedCandidate &TopCand,
1336   ConvergingScheduler::SchedCandidate &BotCand) {
1337 
1338   Bot.checkILPPolicy();
1339   Top.checkILPPolicy();
1340   if (Bot.ShouldIncreaseILP)
1341     BotCand.Policy.ReduceLatency = true;
1342   if (Top.ShouldIncreaseILP)
1343     TopCand.Policy.ReduceLatency = true;
1344 
1345   // Handle resource-limited regions.
1346   if (Top.IsResourceLimited && Bot.IsResourceLimited
1347       && Top.CritResIdx == Bot.CritResIdx) {
1348     // If the scheduled critical resource in both zones is no longer the
1349     // critical remaining resource, attempt to reduce resource height both ways.
1350     if (Top.CritResIdx != Rem.CritResIdx) {
1351       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1352       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1353       DEBUG(dbgs() << "Reduce scheduled "
1354             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1355     }
1356     return;
1357   }
1358   // Handle latency-limited regions.
1359   if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1360     // If the total scheduled expected latency exceeds the region's critical
1361     // path then reduce latency both ways.
1362     //
1363     // Just because a zone is not resource limited does not mean it is latency
1364     // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1365     // to exceed expected latency.
1366     if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1367         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1368       TopCand.Policy.ReduceLatency = true;
1369       BotCand.Policy.ReduceLatency = true;
1370       DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1371             << " + " << Bot.ExpectedLatency << '\n');
1372     }
1373     return;
1374   }
1375   // The critical resource is different in each zone, so request balancing.
1376 
1377   // Compute the cost of each zone.
1378   Rem.MaxRemainingCount = std::max(
1379     Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
1380     Rem.RemainingCounts[Rem.CritResIdx]);
1381   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1382   Top.ExpectedCount = std::max(
1383     Top.getCriticalCount(),
1384     Top.ExpectedCount * SchedModel->getLatencyFactor());
1385   Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1386   Bot.ExpectedCount = std::max(
1387     Bot.getCriticalCount(),
1388     Bot.ExpectedCount * SchedModel->getLatencyFactor());
1389 
1390   balanceZones(Top, TopCand, Bot, BotCand);
1391   balanceZones(Bot, BotCand, Top, TopCand);
1392 }
1393 
1394 void ConvergingScheduler::SchedCandidate::
1395 initResourceDelta(const ScheduleDAGMI *DAG,
1396                   const TargetSchedModel *SchedModel) {
1397   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1398     return;
1399 
1400   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1401   for (TargetSchedModel::ProcResIter
1402          PI = SchedModel->getWriteProcResBegin(SC),
1403          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1404     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1405       ResDelta.CritResources += PI->Cycles;
1406     if (PI->ProcResourceIdx == Policy.DemandResIdx)
1407       ResDelta.DemandedResources += PI->Cycles;
1408   }
1409 }
1410 
1411 /// Return true if this heuristic determines order.
1412 static bool tryLess(unsigned TryVal, unsigned CandVal,
1413                     ConvergingScheduler::SchedCandidate &TryCand,
1414                     ConvergingScheduler::SchedCandidate &Cand,
1415                     ConvergingScheduler::CandReason Reason) {
1416   if (TryVal < CandVal) {
1417     TryCand.Reason = Reason;
1418     return true;
1419   }
1420   if (TryVal > CandVal) {
1421     if (Cand.Reason > Reason)
1422       Cand.Reason = Reason;
1423     return true;
1424   }
1425   return false;
1426 }
1427 static bool tryGreater(unsigned TryVal, unsigned CandVal,
1428                        ConvergingScheduler::SchedCandidate &TryCand,
1429                        ConvergingScheduler::SchedCandidate &Cand,
1430                        ConvergingScheduler::CandReason Reason) {
1431   if (TryVal > CandVal) {
1432     TryCand.Reason = Reason;
1433     return true;
1434   }
1435   if (TryVal < CandVal) {
1436     if (Cand.Reason > Reason)
1437       Cand.Reason = Reason;
1438     return true;
1439   }
1440   return false;
1441 }
1442 
1443 /// Apply a set of heursitics to a new candidate. Heuristics are currently
1444 /// hierarchical. This may be more efficient than a graduated cost model because
1445 /// we don't need to evaluate all aspects of the model for each node in the
1446 /// queue. But it's really done to make the heuristics easier to debug and
1447 /// statistically analyze.
1448 ///
1449 /// \param Cand provides the policy and current best candidate.
1450 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1451 /// \param Zone describes the scheduled zone that we are extending.
1452 /// \param RPTracker describes reg pressure within the scheduled zone.
1453 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
1454 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1455                                        SchedCandidate &TryCand,
1456                                        SchedBoundary &Zone,
1457                                        const RegPressureTracker &RPTracker,
1458                                        RegPressureTracker &TempTracker) {
1459 
1460   // Always initialize TryCand's RPDelta.
1461   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1462                                   DAG->getRegionCriticalPSets(),
1463                                   DAG->getRegPressure().MaxSetPressure);
1464 
1465   // Initialize the candidate if needed.
1466   if (!Cand.isValid()) {
1467     TryCand.Reason = NodeOrder;
1468     return;
1469   }
1470   // Avoid exceeding the target's limit.
1471   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1472               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1473     return;
1474   if (Cand.Reason == SingleExcess)
1475     Cand.Reason = MultiPressure;
1476 
1477   // Avoid increasing the max critical pressure in the scheduled region.
1478   if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1479               Cand.RPDelta.CriticalMax.UnitIncrease,
1480               TryCand, Cand, SingleCritical))
1481     return;
1482   if (Cand.Reason == SingleCritical)
1483     Cand.Reason = MultiPressure;
1484 
1485   // Avoid critical resource consumption and balance the schedule.
1486   TryCand.initResourceDelta(DAG, SchedModel);
1487   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1488               TryCand, Cand, ResourceReduce))
1489     return;
1490   if (tryGreater(TryCand.ResDelta.DemandedResources,
1491                  Cand.ResDelta.DemandedResources,
1492                  TryCand, Cand, ResourceDemand))
1493     return;
1494 
1495   // Avoid serializing long latency dependence chains.
1496   if (Cand.Policy.ReduceLatency) {
1497     if (Zone.isTop()) {
1498       if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1499           > Zone.ExpectedCount) {
1500         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1501                     TryCand, Cand, TopDepthReduce))
1502           return;
1503       }
1504       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1505                      TryCand, Cand, TopPathReduce))
1506         return;
1507     }
1508     else {
1509       if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1510           > Zone.ExpectedCount) {
1511         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1512                     TryCand, Cand, BotHeightReduce))
1513           return;
1514       }
1515       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1516                      TryCand, Cand, BotPathReduce))
1517         return;
1518     }
1519   }
1520 
1521   // Avoid increasing the max pressure of the entire region.
1522   if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1523               Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1524     return;
1525   if (Cand.Reason == SingleMax)
1526     Cand.Reason = MultiPressure;
1527 
1528   // Prefer immediate defs/users of the last scheduled instruction. This is a
1529   // nice pressure avoidance strategy that also conserves the processor's
1530   // register renaming resources and keeps the machine code readable.
1531   if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) {
1532     TryCand.Reason = NextDefUse;
1533     return;
1534   }
1535   if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) {
1536     if (Cand.Reason > NextDefUse)
1537       Cand.Reason = NextDefUse;
1538     return;
1539   }
1540   // Fall through to original instruction order.
1541   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1542       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1543     TryCand.Reason = NodeOrder;
1544   }
1545 }
1546 
1547 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1548 /// more desirable than RHS from scheduling standpoint.
1549 static bool compareRPDelta(const RegPressureDelta &LHS,
1550                            const RegPressureDelta &RHS) {
1551   // Compare each component of pressure in decreasing order of importance
1552   // without checking if any are valid. Invalid PressureElements are assumed to
1553   // have UnitIncrease==0, so are neutral.
1554 
1555   // Avoid increasing the max critical pressure in the scheduled region.
1556   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1557     DEBUG(dbgs() << "RP excess top - bot: "
1558           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1559     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1560   }
1561   // Avoid increasing the max critical pressure in the scheduled region.
1562   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1563     DEBUG(dbgs() << "RP critical top - bot: "
1564           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1565           << '\n');
1566     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1567   }
1568   // Avoid increasing the max pressure of the entire region.
1569   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1570     DEBUG(dbgs() << "RP current top - bot: "
1571           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1572           << '\n');
1573     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1574   }
1575   return false;
1576 }
1577 
1578 #ifndef NDEBUG
1579 const char *ConvergingScheduler::getReasonStr(
1580   ConvergingScheduler::CandReason Reason) {
1581   switch (Reason) {
1582   case NoCand:         return "NOCAND    ";
1583   case SingleExcess:   return "REG-EXCESS";
1584   case SingleCritical: return "REG-CRIT  ";
1585   case SingleMax:      return "REG-MAX   ";
1586   case MultiPressure:  return "REG-MULTI ";
1587   case ResourceReduce: return "RES-REDUCE";
1588   case ResourceDemand: return "RES-DEMAND";
1589   case TopDepthReduce: return "TOP-DEPTH ";
1590   case TopPathReduce:  return "TOP-PATH  ";
1591   case BotHeightReduce:return "BOT-HEIGHT";
1592   case BotPathReduce:  return "BOT-PATH  ";
1593   case NextDefUse:     return "DEF-USE   ";
1594   case NodeOrder:      return "ORDER     ";
1595   };
1596   llvm_unreachable("Unknown reason!");
1597 }
1598 
1599 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1600                                          const SchedBoundary &Zone) {
1601   const char *Label = getReasonStr(Cand.Reason);
1602   PressureElement P;
1603   unsigned ResIdx = 0;
1604   unsigned Latency = 0;
1605   switch (Cand.Reason) {
1606   default:
1607     break;
1608   case SingleExcess:
1609     P = Cand.RPDelta.Excess;
1610     break;
1611   case SingleCritical:
1612     P = Cand.RPDelta.CriticalMax;
1613     break;
1614   case SingleMax:
1615     P = Cand.RPDelta.CurrentMax;
1616     break;
1617   case ResourceReduce:
1618     ResIdx = Cand.Policy.ReduceResIdx;
1619     break;
1620   case ResourceDemand:
1621     ResIdx = Cand.Policy.DemandResIdx;
1622     break;
1623   case TopDepthReduce:
1624     Latency = Cand.SU->getDepth();
1625     break;
1626   case TopPathReduce:
1627     Latency = Cand.SU->getHeight();
1628     break;
1629   case BotHeightReduce:
1630     Latency = Cand.SU->getHeight();
1631     break;
1632   case BotPathReduce:
1633     Latency = Cand.SU->getDepth();
1634     break;
1635   }
1636   dbgs() << Label << " " << Zone.Available.getName() << " ";
1637   if (P.isValid())
1638     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1639            << " ";
1640   else
1641     dbgs() << "     ";
1642   if (ResIdx)
1643     dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1644   else
1645     dbgs() << "        ";
1646   if (Latency)
1647     dbgs() << Latency << " cycles ";
1648   else
1649     dbgs() << "         ";
1650   Cand.SU->dump(DAG);
1651 }
1652 #endif
1653 
1654 /// Pick the best candidate from the top queue.
1655 ///
1656 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1657 /// DAG building. To adjust for the current scheduling location we need to
1658 /// maintain the number of vreg uses remaining to be top-scheduled.
1659 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1660                                             const RegPressureTracker &RPTracker,
1661                                             SchedCandidate &Cand) {
1662   ReadyQueue &Q = Zone.Available;
1663 
1664   DEBUG(Q.dump());
1665 
1666   // getMaxPressureDelta temporarily modifies the tracker.
1667   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1668 
1669   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1670 
1671     SchedCandidate TryCand(Cand.Policy);
1672     TryCand.SU = *I;
1673     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1674     if (TryCand.Reason != NoCand) {
1675       // Initialize resource delta if needed in case future heuristics query it.
1676       if (TryCand.ResDelta == SchedResourceDelta())
1677         TryCand.initResourceDelta(DAG, SchedModel);
1678       Cand.setBest(TryCand);
1679       DEBUG(traceCandidate(Cand, Zone));
1680     }
1681     TryCand.SU = *I;
1682   }
1683 }
1684 
1685 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1686                       bool IsTop) {
1687   DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1688         << " SU(" << Cand.SU->NodeNum << ") "
1689         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1690 }
1691 
1692 /// Pick the best candidate node from either the top or bottom queue.
1693 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1694   // Schedule as far as possible in the direction of no choice. This is most
1695   // efficient, but also provides the best heuristics for CriticalPSets.
1696   if (SUnit *SU = Bot.pickOnlyChoice()) {
1697     IsTopNode = false;
1698     return SU;
1699   }
1700   if (SUnit *SU = Top.pickOnlyChoice()) {
1701     IsTopNode = true;
1702     return SU;
1703   }
1704   CandPolicy NoPolicy;
1705   SchedCandidate BotCand(NoPolicy);
1706   SchedCandidate TopCand(NoPolicy);
1707   checkResourceLimits(TopCand, BotCand);
1708 
1709   // Prefer bottom scheduling when heuristics are silent.
1710   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1711   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1712 
1713   // If either Q has a single candidate that provides the least increase in
1714   // Excess pressure, we can immediately schedule from that Q.
1715   //
1716   // RegionCriticalPSets summarizes the pressure within the scheduled region and
1717   // affects picking from either Q. If scheduling in one direction must
1718   // increase pressure for one of the excess PSets, then schedule in that
1719   // direction first to provide more freedom in the other direction.
1720   if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1721     IsTopNode = false;
1722     tracePick(BotCand, IsTopNode);
1723     return BotCand.SU;
1724   }
1725   // Check if the top Q has a better candidate.
1726   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1727   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1728 
1729   // If either Q has a single candidate that minimizes pressure above the
1730   // original region's pressure pick it.
1731   if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1732     if (TopCand.Reason < BotCand.Reason) {
1733       IsTopNode = true;
1734       tracePick(TopCand, IsTopNode);
1735       return TopCand.SU;
1736     }
1737     IsTopNode = false;
1738     tracePick(BotCand, IsTopNode);
1739     return BotCand.SU;
1740   }
1741   // Check for a salient pressure difference and pick the best from either side.
1742   if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1743     IsTopNode = true;
1744     tracePick(TopCand, IsTopNode);
1745     return TopCand.SU;
1746   }
1747   // Otherwise prefer the bottom candidate, in node order if all else failed.
1748   if (TopCand.Reason < BotCand.Reason) {
1749     IsTopNode = true;
1750     tracePick(TopCand, IsTopNode);
1751     return TopCand.SU;
1752   }
1753   IsTopNode = false;
1754   tracePick(BotCand, IsTopNode);
1755   return BotCand.SU;
1756 }
1757 
1758 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
1759 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
1760   if (DAG->top() == DAG->bottom()) {
1761     assert(Top.Available.empty() && Top.Pending.empty() &&
1762            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
1763     return NULL;
1764   }
1765   SUnit *SU;
1766   do {
1767     if (ForceTopDown) {
1768       SU = Top.pickOnlyChoice();
1769       if (!SU) {
1770         CandPolicy NoPolicy;
1771         SchedCandidate TopCand(NoPolicy);
1772         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1773         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1774         SU = TopCand.SU;
1775       }
1776       IsTopNode = true;
1777     }
1778     else if (ForceBottomUp) {
1779       SU = Bot.pickOnlyChoice();
1780       if (!SU) {
1781         CandPolicy NoPolicy;
1782         SchedCandidate BotCand(NoPolicy);
1783         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1784         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1785         SU = BotCand.SU;
1786       }
1787       IsTopNode = false;
1788     }
1789     else {
1790       SU = pickNodeBidirectional(IsTopNode);
1791     }
1792   } while (SU->isScheduled);
1793 
1794   if (SU->isTopReady())
1795     Top.removeReady(SU);
1796   if (SU->isBottomReady())
1797     Bot.removeReady(SU);
1798 
1799   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1800         << " Scheduling Instruction in cycle "
1801         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1802         SU->dump(DAG));
1803   return SU;
1804 }
1805 
1806 /// Update the scheduler's state after scheduling a node. This is the same node
1807 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
1808 /// it's state based on the current cycle before MachineSchedStrategy does.
1809 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1810   if (IsTopNode) {
1811     SU->TopReadyCycle = Top.CurrCycle;
1812     Top.bumpNode(SU);
1813   }
1814   else {
1815     SU->BotReadyCycle = Bot.CurrCycle;
1816     Bot.bumpNode(SU);
1817   }
1818 }
1819 
1820 /// Create the standard converging machine scheduler. This will be used as the
1821 /// default scheduler if the target does not set a default.
1822 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
1823   assert((!ForceTopDown || !ForceBottomUp) &&
1824          "-misched-topdown incompatible with -misched-bottomup");
1825   return new ScheduleDAGMI(C, new ConvergingScheduler());
1826 }
1827 static MachineSchedRegistry
1828 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
1829                         createConvergingSched);
1830 
1831 //===----------------------------------------------------------------------===//
1832 // ILP Scheduler. Currently for experimental analysis of heuristics.
1833 //===----------------------------------------------------------------------===//
1834 
1835 namespace {
1836 /// \brief Order nodes by the ILP metric.
1837 struct ILPOrder {
1838   ScheduleDAGILP *ILP;
1839   bool MaximizeILP;
1840 
1841   ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
1842 
1843   /// \brief Apply a less-than relation on node priority.
1844   bool operator()(const SUnit *A, const SUnit *B) const {
1845     // Return true if A comes after B in the Q.
1846     if (MaximizeILP)
1847       return ILP->getILP(A) < ILP->getILP(B);
1848     else
1849       return ILP->getILP(A) > ILP->getILP(B);
1850   }
1851 };
1852 
1853 /// \brief Schedule based on the ILP metric.
1854 class ILPScheduler : public MachineSchedStrategy {
1855   ScheduleDAGILP ILP;
1856   ILPOrder Cmp;
1857 
1858   std::vector<SUnit*> ReadyQ;
1859 public:
1860   ILPScheduler(bool MaximizeILP)
1861   : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
1862 
1863   virtual void initialize(ScheduleDAGMI *DAG) {
1864     ReadyQ.clear();
1865     ILP.resize(DAG->SUnits.size());
1866   }
1867 
1868   virtual void registerRoots() {
1869     for (std::vector<SUnit*>::const_iterator
1870            I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
1871       ILP.computeILP(*I);
1872     }
1873   }
1874 
1875   /// Implement MachineSchedStrategy interface.
1876   /// -----------------------------------------
1877 
1878   virtual SUnit *pickNode(bool &IsTopNode) {
1879     if (ReadyQ.empty()) return NULL;
1880     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
1881     SUnit *SU = ReadyQ.back();
1882     ReadyQ.pop_back();
1883     IsTopNode = false;
1884     DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
1885           << " ILP: " << ILP.getILP(SU) << '\n');
1886     return SU;
1887   }
1888 
1889   virtual void schedNode(SUnit *, bool) {}
1890 
1891   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
1892 
1893   virtual void releaseBottomNode(SUnit *SU) {
1894     ReadyQ.push_back(SU);
1895     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
1896   }
1897 };
1898 } // namespace
1899 
1900 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
1901   return new ScheduleDAGMI(C, new ILPScheduler(true));
1902 }
1903 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
1904   return new ScheduleDAGMI(C, new ILPScheduler(false));
1905 }
1906 static MachineSchedRegistry ILPMaxRegistry(
1907   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
1908 static MachineSchedRegistry ILPMinRegistry(
1909   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
1910 
1911 //===----------------------------------------------------------------------===//
1912 // Machine Instruction Shuffler for Correctness Testing
1913 //===----------------------------------------------------------------------===//
1914 
1915 #ifndef NDEBUG
1916 namespace {
1917 /// Apply a less-than relation on the node order, which corresponds to the
1918 /// instruction order prior to scheduling. IsReverse implements greater-than.
1919 template<bool IsReverse>
1920 struct SUnitOrder {
1921   bool operator()(SUnit *A, SUnit *B) const {
1922     if (IsReverse)
1923       return A->NodeNum > B->NodeNum;
1924     else
1925       return A->NodeNum < B->NodeNum;
1926   }
1927 };
1928 
1929 /// Reorder instructions as much as possible.
1930 class InstructionShuffler : public MachineSchedStrategy {
1931   bool IsAlternating;
1932   bool IsTopDown;
1933 
1934   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
1935   // gives nodes with a higher number higher priority causing the latest
1936   // instructions to be scheduled first.
1937   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
1938     TopQ;
1939   // When scheduling bottom-up, use greater-than as the queue priority.
1940   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
1941     BottomQ;
1942 public:
1943   InstructionShuffler(bool alternate, bool topdown)
1944     : IsAlternating(alternate), IsTopDown(topdown) {}
1945 
1946   virtual void initialize(ScheduleDAGMI *) {
1947     TopQ.clear();
1948     BottomQ.clear();
1949   }
1950 
1951   /// Implement MachineSchedStrategy interface.
1952   /// -----------------------------------------
1953 
1954   virtual SUnit *pickNode(bool &IsTopNode) {
1955     SUnit *SU;
1956     if (IsTopDown) {
1957       do {
1958         if (TopQ.empty()) return NULL;
1959         SU = TopQ.top();
1960         TopQ.pop();
1961       } while (SU->isScheduled);
1962       IsTopNode = true;
1963     }
1964     else {
1965       do {
1966         if (BottomQ.empty()) return NULL;
1967         SU = BottomQ.top();
1968         BottomQ.pop();
1969       } while (SU->isScheduled);
1970       IsTopNode = false;
1971     }
1972     if (IsAlternating)
1973       IsTopDown = !IsTopDown;
1974     return SU;
1975   }
1976 
1977   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
1978 
1979   virtual void releaseTopNode(SUnit *SU) {
1980     TopQ.push(SU);
1981   }
1982   virtual void releaseBottomNode(SUnit *SU) {
1983     BottomQ.push(SU);
1984   }
1985 };
1986 } // namespace
1987 
1988 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
1989   bool Alternate = !ForceTopDown && !ForceBottomUp;
1990   bool TopDown = !ForceBottomUp;
1991   assert((TopDown || !ForceTopDown) &&
1992          "-misched-topdown incompatible with -misched-bottomup");
1993   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
1994 }
1995 static MachineSchedRegistry ShufflerRegistry(
1996   "shuffle", "Shuffle machine instructions alternating directions",
1997   createInstructionShuffler);
1998 #endif // !NDEBUG
1999