xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision a7714a0ff986950eb42bd2417486d58ab4620d78)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
16 
17 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19 #include "llvm/CodeGen/Passes.h"
20 #include "llvm/CodeGen/RegisterClassInfo.h"
21 #include "llvm/CodeGen/ScheduleDAGILP.h"
22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/ADT/OwningPtr.h"
29 #include "llvm/ADT/PriorityQueue.h"
30 
31 #include <queue>
32 
33 using namespace llvm;
34 
35 namespace llvm {
36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
37                            cl::desc("Force top-down list scheduling"));
38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
39                             cl::desc("Force bottom-up list scheduling"));
40 }
41 
42 #ifndef NDEBUG
43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
44   cl::desc("Pop up a window to show MISched dags after they are processed"));
45 
46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
47   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
48 #else
49 static bool ViewMISchedDAGs = false;
50 #endif // NDEBUG
51 
52 // Threshold to very roughly model an out-of-order processor's instruction
53 // buffers. If the actual value of this threshold matters much in practice, then
54 // it can be specified by the machine model. For now, it's an experimental
55 // tuning knob to determine when and if it matters.
56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
57   cl::desc("Allow expected latency to exceed the critical path by N cycles "
58            "before attempting to balance ILP"),
59   cl::init(10U));
60 
61 // Experimental heuristics
62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
63   cl::desc("Enable load clustering."));
64 
65 //===----------------------------------------------------------------------===//
66 // Machine Instruction Scheduling Pass and Registry
67 //===----------------------------------------------------------------------===//
68 
69 MachineSchedContext::MachineSchedContext():
70     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
71   RegClassInfo = new RegisterClassInfo();
72 }
73 
74 MachineSchedContext::~MachineSchedContext() {
75   delete RegClassInfo;
76 }
77 
78 namespace {
79 /// MachineScheduler runs after coalescing and before register allocation.
80 class MachineScheduler : public MachineSchedContext,
81                          public MachineFunctionPass {
82 public:
83   MachineScheduler();
84 
85   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
86 
87   virtual void releaseMemory() {}
88 
89   virtual bool runOnMachineFunction(MachineFunction&);
90 
91   virtual void print(raw_ostream &O, const Module* = 0) const;
92 
93   static char ID; // Class identification, replacement for typeinfo
94 };
95 } // namespace
96 
97 char MachineScheduler::ID = 0;
98 
99 char &llvm::MachineSchedulerID = MachineScheduler::ID;
100 
101 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
102                       "Machine Instruction Scheduler", false, false)
103 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
104 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
105 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
106 INITIALIZE_PASS_END(MachineScheduler, "misched",
107                     "Machine Instruction Scheduler", false, false)
108 
109 MachineScheduler::MachineScheduler()
110 : MachineFunctionPass(ID) {
111   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
112 }
113 
114 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
115   AU.setPreservesCFG();
116   AU.addRequiredID(MachineDominatorsID);
117   AU.addRequired<MachineLoopInfo>();
118   AU.addRequired<AliasAnalysis>();
119   AU.addRequired<TargetPassConfig>();
120   AU.addRequired<SlotIndexes>();
121   AU.addPreserved<SlotIndexes>();
122   AU.addRequired<LiveIntervals>();
123   AU.addPreserved<LiveIntervals>();
124   MachineFunctionPass::getAnalysisUsage(AU);
125 }
126 
127 MachinePassRegistry MachineSchedRegistry::Registry;
128 
129 /// A dummy default scheduler factory indicates whether the scheduler
130 /// is overridden on the command line.
131 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
132   return 0;
133 }
134 
135 /// MachineSchedOpt allows command line selection of the scheduler.
136 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
137                RegisterPassParser<MachineSchedRegistry> >
138 MachineSchedOpt("misched",
139                 cl::init(&useDefaultMachineSched), cl::Hidden,
140                 cl::desc("Machine instruction scheduler to use"));
141 
142 static MachineSchedRegistry
143 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
144                      useDefaultMachineSched);
145 
146 /// Forward declare the standard machine scheduler. This will be used as the
147 /// default scheduler if the target does not set a default.
148 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
149 
150 
151 /// Decrement this iterator until reaching the top or a non-debug instr.
152 static MachineBasicBlock::iterator
153 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
154   assert(I != Beg && "reached the top of the region, cannot decrement");
155   while (--I != Beg) {
156     if (!I->isDebugValue())
157       break;
158   }
159   return I;
160 }
161 
162 /// If this iterator is a debug value, increment until reaching the End or a
163 /// non-debug instruction.
164 static MachineBasicBlock::iterator
165 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
166   for(; I != End; ++I) {
167     if (!I->isDebugValue())
168       break;
169   }
170   return I;
171 }
172 
173 /// Top-level MachineScheduler pass driver.
174 ///
175 /// Visit blocks in function order. Divide each block into scheduling regions
176 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
177 /// consistent with the DAG builder, which traverses the interior of the
178 /// scheduling regions bottom-up.
179 ///
180 /// This design avoids exposing scheduling boundaries to the DAG builder,
181 /// simplifying the DAG builder's support for "special" target instructions.
182 /// At the same time the design allows target schedulers to operate across
183 /// scheduling boundaries, for example to bundle the boudary instructions
184 /// without reordering them. This creates complexity, because the target
185 /// scheduler must update the RegionBegin and RegionEnd positions cached by
186 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
187 /// design would be to split blocks at scheduling boundaries, but LLVM has a
188 /// general bias against block splitting purely for implementation simplicity.
189 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
190   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
191 
192   // Initialize the context of the pass.
193   MF = &mf;
194   MLI = &getAnalysis<MachineLoopInfo>();
195   MDT = &getAnalysis<MachineDominatorTree>();
196   PassConfig = &getAnalysis<TargetPassConfig>();
197   AA = &getAnalysis<AliasAnalysis>();
198 
199   LIS = &getAnalysis<LiveIntervals>();
200   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
201 
202   RegClassInfo->runOnMachineFunction(*MF);
203 
204   // Select the scheduler, or set the default.
205   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
206   if (Ctor == useDefaultMachineSched) {
207     // Get the default scheduler set by the target.
208     Ctor = MachineSchedRegistry::getDefault();
209     if (!Ctor) {
210       Ctor = createConvergingSched;
211       MachineSchedRegistry::setDefault(Ctor);
212     }
213   }
214   // Instantiate the selected scheduler.
215   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
216 
217   // Visit all machine basic blocks.
218   //
219   // TODO: Visit blocks in global postorder or postorder within the bottom-up
220   // loop tree. Then we can optionally compute global RegPressure.
221   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
222        MBB != MBBEnd; ++MBB) {
223 
224     Scheduler->startBlock(MBB);
225 
226     // Break the block into scheduling regions [I, RegionEnd), and schedule each
227     // region as soon as it is discovered. RegionEnd points the scheduling
228     // boundary at the bottom of the region. The DAG does not include RegionEnd,
229     // but the region does (i.e. the next RegionEnd is above the previous
230     // RegionBegin). If the current block has no terminator then RegionEnd ==
231     // MBB->end() for the bottom region.
232     //
233     // The Scheduler may insert instructions during either schedule() or
234     // exitRegion(), even for empty regions. So the local iterators 'I' and
235     // 'RegionEnd' are invalid across these calls.
236     unsigned RemainingInstrs = MBB->size();
237     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
238         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
239 
240       // Avoid decrementing RegionEnd for blocks with no terminator.
241       if (RegionEnd != MBB->end()
242           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
243         --RegionEnd;
244         // Count the boundary instruction.
245         --RemainingInstrs;
246       }
247 
248       // The next region starts above the previous region. Look backward in the
249       // instruction stream until we find the nearest boundary.
250       MachineBasicBlock::iterator I = RegionEnd;
251       for(;I != MBB->begin(); --I, --RemainingInstrs) {
252         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
253           break;
254       }
255       // Notify the scheduler of the region, even if we may skip scheduling
256       // it. Perhaps it still needs to be bundled.
257       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
258 
259       // Skip empty scheduling regions (0 or 1 schedulable instructions).
260       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
261         // Close the current region. Bundle the terminator if needed.
262         // This invalidates 'RegionEnd' and 'I'.
263         Scheduler->exitRegion();
264         continue;
265       }
266       DEBUG(dbgs() << "********** MI Scheduling **********\n");
267       DEBUG(dbgs() << MF->getName()
268             << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
269             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
270             else dbgs() << "End";
271             dbgs() << " Remaining: " << RemainingInstrs << "\n");
272 
273       // Schedule a region: possibly reorder instructions.
274       // This invalidates 'RegionEnd' and 'I'.
275       Scheduler->schedule();
276 
277       // Close the current region.
278       Scheduler->exitRegion();
279 
280       // Scheduling has invalidated the current iterator 'I'. Ask the
281       // scheduler for the top of it's scheduled region.
282       RegionEnd = Scheduler->begin();
283     }
284     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
285     Scheduler->finishBlock();
286   }
287   Scheduler->finalizeSchedule();
288   DEBUG(LIS->print(dbgs()));
289   return true;
290 }
291 
292 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
293   // unimplemented
294 }
295 
296 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
297 void ReadyQueue::dump() {
298   dbgs() << Name << ": ";
299   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
300     dbgs() << Queue[i]->NodeNum << " ";
301   dbgs() << "\n";
302 }
303 #endif
304 
305 //===----------------------------------------------------------------------===//
306 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
307 // preservation.
308 //===----------------------------------------------------------------------===//
309 
310 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
311   // Do not use WillCreateCycle, it assumes SD scheduling.
312   // If Pred is reachable from Succ, then the edge creates a cycle.
313   if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
314     return false;
315   Topo.AddPred(SuccSU, PredDep.getSUnit());
316   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
317   // Return true regardless of whether a new edge needed to be inserted.
318   return true;
319 }
320 
321 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
322 /// NumPredsLeft reaches zero, release the successor node.
323 ///
324 /// FIXME: Adjust SuccSU height based on MinLatency.
325 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
326   SUnit *SuccSU = SuccEdge->getSUnit();
327 
328   if (SuccEdge->isWeak()) {
329     --SuccSU->WeakPredsLeft;
330     if (SuccEdge->isCluster())
331       NextClusterSucc = SuccSU;
332     return;
333   }
334 #ifndef NDEBUG
335   if (SuccSU->NumPredsLeft == 0) {
336     dbgs() << "*** Scheduling failed! ***\n";
337     SuccSU->dump(this);
338     dbgs() << " has been released too many times!\n";
339     llvm_unreachable(0);
340   }
341 #endif
342   --SuccSU->NumPredsLeft;
343   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
344     SchedImpl->releaseTopNode(SuccSU);
345 }
346 
347 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
348 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
349   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
350        I != E; ++I) {
351     releaseSucc(SU, &*I);
352   }
353 }
354 
355 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
356 /// NumSuccsLeft reaches zero, release the predecessor node.
357 ///
358 /// FIXME: Adjust PredSU height based on MinLatency.
359 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
360   SUnit *PredSU = PredEdge->getSUnit();
361 
362   if (PredEdge->isWeak()) {
363     --PredSU->WeakSuccsLeft;
364     if (PredEdge->isCluster())
365       NextClusterPred = PredSU;
366     return;
367   }
368 #ifndef NDEBUG
369   if (PredSU->NumSuccsLeft == 0) {
370     dbgs() << "*** Scheduling failed! ***\n";
371     PredSU->dump(this);
372     dbgs() << " has been released too many times!\n";
373     llvm_unreachable(0);
374   }
375 #endif
376   --PredSU->NumSuccsLeft;
377   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
378     SchedImpl->releaseBottomNode(PredSU);
379 }
380 
381 /// releasePredecessors - Call releasePred on each of SU's predecessors.
382 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
383   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
384        I != E; ++I) {
385     releasePred(SU, &*I);
386   }
387 }
388 
389 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
390                                     MachineBasicBlock::iterator InsertPos) {
391   // Advance RegionBegin if the first instruction moves down.
392   if (&*RegionBegin == MI)
393     ++RegionBegin;
394 
395   // Update the instruction stream.
396   BB->splice(InsertPos, BB, MI);
397 
398   // Update LiveIntervals
399   LIS->handleMove(MI, /*UpdateFlags=*/true);
400 
401   // Recede RegionBegin if an instruction moves above the first.
402   if (RegionBegin == InsertPos)
403     RegionBegin = MI;
404 }
405 
406 bool ScheduleDAGMI::checkSchedLimit() {
407 #ifndef NDEBUG
408   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
409     CurrentTop = CurrentBottom;
410     return false;
411   }
412   ++NumInstrsScheduled;
413 #endif
414   return true;
415 }
416 
417 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
418 /// crossing a scheduling boundary. [begin, end) includes all instructions in
419 /// the region, including the boundary itself and single-instruction regions
420 /// that don't get scheduled.
421 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
422                                 MachineBasicBlock::iterator begin,
423                                 MachineBasicBlock::iterator end,
424                                 unsigned endcount)
425 {
426   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
427 
428   // For convenience remember the end of the liveness region.
429   LiveRegionEnd =
430     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
431 }
432 
433 // Setup the register pressure trackers for the top scheduled top and bottom
434 // scheduled regions.
435 void ScheduleDAGMI::initRegPressure() {
436   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
437   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
438 
439   // Close the RPTracker to finalize live ins.
440   RPTracker.closeRegion();
441 
442   DEBUG(RPTracker.getPressure().dump(TRI));
443 
444   // Initialize the live ins and live outs.
445   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
446   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
447 
448   // Close one end of the tracker so we can call
449   // getMaxUpward/DownwardPressureDelta before advancing across any
450   // instructions. This converts currently live regs into live ins/outs.
451   TopRPTracker.closeTop();
452   BotRPTracker.closeBottom();
453 
454   // Account for liveness generated by the region boundary.
455   if (LiveRegionEnd != RegionEnd)
456     BotRPTracker.recede();
457 
458   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
459 
460   // Cache the list of excess pressure sets in this region. This will also track
461   // the max pressure in the scheduled code for these sets.
462   RegionCriticalPSets.clear();
463   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
464   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
465     unsigned Limit = TRI->getRegPressureSetLimit(i);
466     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
467           << "Limit " << Limit
468           << " Actual " << RegionPressure[i] << "\n");
469     if (RegionPressure[i] > Limit)
470       RegionCriticalPSets.push_back(PressureElement(i, 0));
471   }
472   DEBUG(dbgs() << "Excess PSets: ";
473         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
474           dbgs() << TRI->getRegPressureSetName(
475             RegionCriticalPSets[i].PSetID) << " ";
476         dbgs() << "\n");
477 }
478 
479 // FIXME: When the pressure tracker deals in pressure differences then we won't
480 // iterate over all RegionCriticalPSets[i].
481 void ScheduleDAGMI::
482 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
483   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
484     unsigned ID = RegionCriticalPSets[i].PSetID;
485     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
486     if ((int)NewMaxPressure[ID] > MaxUnits)
487       MaxUnits = NewMaxPressure[ID];
488   }
489 }
490 
491 /// schedule - Called back from MachineScheduler::runOnMachineFunction
492 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
493 /// only includes instructions that have DAG nodes, not scheduling boundaries.
494 ///
495 /// This is a skeletal driver, with all the functionality pushed into helpers,
496 /// so that it can be easilly extended by experimental schedulers. Generally,
497 /// implementing MachineSchedStrategy should be sufficient to implement a new
498 /// scheduling algorithm. However, if a scheduler further subclasses
499 /// ScheduleDAGMI then it will want to override this virtual method in order to
500 /// update any specialized state.
501 void ScheduleDAGMI::schedule() {
502   buildDAGWithRegPressure();
503 
504   Topo.InitDAGTopologicalSorting();
505 
506   postprocessDAG();
507 
508   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
509           SUnits[su].dumpAll(this));
510 
511   if (ViewMISchedDAGs) viewGraph();
512 
513   initQueues();
514 
515   bool IsTopNode = false;
516   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
517     assert(!SU->isScheduled && "Node already scheduled");
518     if (!checkSchedLimit())
519       break;
520 
521     scheduleMI(SU, IsTopNode);
522 
523     updateQueues(SU, IsTopNode);
524   }
525   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
526 
527   placeDebugValues();
528 
529   DEBUG({
530       unsigned BBNum = top()->getParent()->getNumber();
531       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
532       dumpSchedule();
533       dbgs() << '\n';
534     });
535 }
536 
537 /// Build the DAG and setup three register pressure trackers.
538 void ScheduleDAGMI::buildDAGWithRegPressure() {
539   // Initialize the register pressure tracker used by buildSchedGraph.
540   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
541 
542   // Account for liveness generate by the region boundary.
543   if (LiveRegionEnd != RegionEnd)
544     RPTracker.recede();
545 
546   // Build the DAG, and compute current register pressure.
547   buildSchedGraph(AA, &RPTracker);
548   if (ViewMISchedDAGs) viewGraph();
549 
550   // Initialize top/bottom trackers after computing region pressure.
551   initRegPressure();
552 }
553 
554 /// Apply each ScheduleDAGMutation step in order.
555 void ScheduleDAGMI::postprocessDAG() {
556   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
557     Mutations[i]->apply(this);
558   }
559 }
560 
561 // Release all DAG roots for scheduling.
562 //
563 // Nodes with unreleased weak edges can still be roots.
564 void ScheduleDAGMI::releaseRoots() {
565   SmallVector<SUnit*, 16> BotRoots;
566 
567   for (std::vector<SUnit>::iterator
568          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
569     SUnit *SU = &(*I);
570     // A SUnit is ready to top schedule if it has no predecessors.
571     if (!I->NumPredsLeft && SU != &EntrySU)
572       SchedImpl->releaseTopNode(SU);
573     // A SUnit is ready to bottom schedule if it has no successors.
574     if (!I->NumSuccsLeft && SU != &ExitSU)
575       BotRoots.push_back(SU);
576   }
577   // Release bottom roots in reverse order so the higher priority nodes appear
578   // first. This is more natural and slightly more efficient.
579   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
580          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
581     SchedImpl->releaseBottomNode(*I);
582 }
583 
584 /// Identify DAG roots and setup scheduler queues.
585 void ScheduleDAGMI::initQueues() {
586   NextClusterSucc = NULL;
587   NextClusterPred = NULL;
588 
589   // Initialize the strategy before modifying the DAG.
590   SchedImpl->initialize(this);
591 
592   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
593   releaseRoots();
594 
595   releaseSuccessors(&EntrySU);
596   releasePredecessors(&ExitSU);
597 
598   SchedImpl->registerRoots();
599 
600   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
601   CurrentBottom = RegionEnd;
602 }
603 
604 /// Move an instruction and update register pressure.
605 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
606   // Move the instruction to its new location in the instruction stream.
607   MachineInstr *MI = SU->getInstr();
608 
609   if (IsTopNode) {
610     assert(SU->isTopReady() && "node still has unscheduled dependencies");
611     if (&*CurrentTop == MI)
612       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
613     else {
614       moveInstruction(MI, CurrentTop);
615       TopRPTracker.setPos(MI);
616     }
617 
618     // Update top scheduled pressure.
619     TopRPTracker.advance();
620     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
621     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
622   }
623   else {
624     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
625     MachineBasicBlock::iterator priorII =
626       priorNonDebug(CurrentBottom, CurrentTop);
627     if (&*priorII == MI)
628       CurrentBottom = priorII;
629     else {
630       if (&*CurrentTop == MI) {
631         CurrentTop = nextIfDebug(++CurrentTop, priorII);
632         TopRPTracker.setPos(CurrentTop);
633       }
634       moveInstruction(MI, CurrentBottom);
635       CurrentBottom = MI;
636     }
637     // Update bottom scheduled pressure.
638     BotRPTracker.recede();
639     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
640     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
641   }
642 }
643 
644 /// Update scheduler queues after scheduling an instruction.
645 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
646   // Release dependent instructions for scheduling.
647   if (IsTopNode)
648     releaseSuccessors(SU);
649   else
650     releasePredecessors(SU);
651 
652   SU->isScheduled = true;
653 
654   // Notify the scheduling strategy after updating the DAG.
655   SchedImpl->schedNode(SU, IsTopNode);
656 }
657 
658 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
659 void ScheduleDAGMI::placeDebugValues() {
660   // If first instruction was a DBG_VALUE then put it back.
661   if (FirstDbgValue) {
662     BB->splice(RegionBegin, BB, FirstDbgValue);
663     RegionBegin = FirstDbgValue;
664   }
665 
666   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
667          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
668     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
669     MachineInstr *DbgValue = P.first;
670     MachineBasicBlock::iterator OrigPrevMI = P.second;
671     BB->splice(++OrigPrevMI, BB, DbgValue);
672     if (OrigPrevMI == llvm::prior(RegionEnd))
673       RegionEnd = DbgValue;
674   }
675   DbgValues.clear();
676   FirstDbgValue = NULL;
677 }
678 
679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
680 void ScheduleDAGMI::dumpSchedule() const {
681   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
682     if (SUnit *SU = getSUnit(&(*MI)))
683       SU->dump(this);
684     else
685       dbgs() << "Missing SUnit\n";
686   }
687 }
688 #endif
689 
690 namespace {
691 /// \brief Post-process the DAG to create cluster edges between neighboring
692 /// loads.
693 class LoadClusterMutation : public ScheduleDAGMutation {
694   struct LoadInfo {
695     SUnit *SU;
696     unsigned BaseReg;
697     unsigned Offset;
698     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
699       : SU(su), BaseReg(reg), Offset(ofs) {}
700   };
701   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
702                            const LoadClusterMutation::LoadInfo &RHS);
703 
704   const TargetInstrInfo *TII;
705   const TargetRegisterInfo *TRI;
706 public:
707   LoadClusterMutation(const TargetInstrInfo *tii,
708                       const TargetRegisterInfo *tri)
709     : TII(tii), TRI(tri) {}
710 
711   virtual void apply(ScheduleDAGMI *DAG);
712 protected:
713   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
714 };
715 } // anonymous
716 
717 bool LoadClusterMutation::LoadInfoLess(
718   const LoadClusterMutation::LoadInfo &LHS,
719   const LoadClusterMutation::LoadInfo &RHS) {
720   if (LHS.BaseReg != RHS.BaseReg)
721     return LHS.BaseReg < RHS.BaseReg;
722   return LHS.Offset < RHS.Offset;
723 }
724 
725 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
726                                                   ScheduleDAGMI *DAG) {
727   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
728   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
729     SUnit *SU = Loads[Idx];
730     unsigned BaseReg;
731     unsigned Offset;
732     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
733       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
734   }
735   if (LoadRecords.size() < 2)
736     return;
737   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
738   unsigned ClusterLength = 1;
739   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
740     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
741       ClusterLength = 1;
742       continue;
743     }
744 
745     SUnit *SUa = LoadRecords[Idx].SU;
746     SUnit *SUb = LoadRecords[Idx+1].SU;
747     if (TII->shouldScheduleLoadsNear(SUa->getInstr(), SUb->getInstr(),
748                                      ClusterLength)
749         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
750 
751       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
752             << SUb->NodeNum << ")\n");
753       // Copy successor edges from SUa to SUb. Interleaving computation
754       // dependent on SUa can prevent load combining due to register reuse.
755       // Predecessor edges do not need to be copied from SUb to SUa since nearby
756       // loads should have effectively the same inputs.
757       for (SUnit::const_succ_iterator
758              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
759         if (SI->getSUnit() == SUb)
760           continue;
761         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
762         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
763       }
764       ++ClusterLength;
765     }
766     else
767       ClusterLength = 1;
768   }
769 }
770 
771 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
772 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
773   // Map DAG NodeNum to store chain ID.
774   DenseMap<unsigned, unsigned> StoreChainIDs;
775   // Map each store chain to a set of dependent loads.
776   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
777   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
778     SUnit *SU = &DAG->SUnits[Idx];
779     if (!SU->getInstr()->mayLoad())
780       continue;
781     unsigned ChainPredID = DAG->SUnits.size();
782     for (SUnit::const_pred_iterator
783            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
784       if (PI->isCtrl()) {
785         ChainPredID = PI->getSUnit()->NodeNum;
786         break;
787       }
788     }
789     // Check if this chain-like pred has been seen
790     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
791     unsigned NumChains = StoreChainDependents.size();
792     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
793       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
794     if (Result.second)
795       StoreChainDependents.resize(NumChains + 1);
796     StoreChainDependents[Result.first->second].push_back(SU);
797   }
798   // Iterate over the store chains.
799   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
800     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
801 }
802 
803 //===----------------------------------------------------------------------===//
804 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
805 //===----------------------------------------------------------------------===//
806 
807 namespace {
808 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
809 /// the schedule.
810 class ConvergingScheduler : public MachineSchedStrategy {
811 public:
812   /// Represent the type of SchedCandidate found within a single queue.
813   /// pickNodeBidirectional depends on these listed by decreasing priority.
814   enum CandReason {
815     NoCand, SingleExcess, SingleCritical, Cluster,
816     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
817     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
818     NodeOrder};
819 
820 #ifndef NDEBUG
821   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
822 #endif
823 
824   /// Policy for scheduling the next instruction in the candidate's zone.
825   struct CandPolicy {
826     bool ReduceLatency;
827     unsigned ReduceResIdx;
828     unsigned DemandResIdx;
829 
830     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
831   };
832 
833   /// Status of an instruction's critical resource consumption.
834   struct SchedResourceDelta {
835     // Count critical resources in the scheduled region required by SU.
836     unsigned CritResources;
837 
838     // Count critical resources from another region consumed by SU.
839     unsigned DemandedResources;
840 
841     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
842 
843     bool operator==(const SchedResourceDelta &RHS) const {
844       return CritResources == RHS.CritResources
845         && DemandedResources == RHS.DemandedResources;
846     }
847     bool operator!=(const SchedResourceDelta &RHS) const {
848       return !operator==(RHS);
849     }
850   };
851 
852   /// Store the state used by ConvergingScheduler heuristics, required for the
853   /// lifetime of one invocation of pickNode().
854   struct SchedCandidate {
855     CandPolicy Policy;
856 
857     // The best SUnit candidate.
858     SUnit *SU;
859 
860     // The reason for this candidate.
861     CandReason Reason;
862 
863     // Register pressure values for the best candidate.
864     RegPressureDelta RPDelta;
865 
866     // Critical resource consumption of the best candidate.
867     SchedResourceDelta ResDelta;
868 
869     SchedCandidate(const CandPolicy &policy)
870     : Policy(policy), SU(NULL), Reason(NoCand) {}
871 
872     bool isValid() const { return SU; }
873 
874     // Copy the status of another candidate without changing policy.
875     void setBest(SchedCandidate &Best) {
876       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
877       SU = Best.SU;
878       Reason = Best.Reason;
879       RPDelta = Best.RPDelta;
880       ResDelta = Best.ResDelta;
881     }
882 
883     void initResourceDelta(const ScheduleDAGMI *DAG,
884                            const TargetSchedModel *SchedModel);
885   };
886 
887   /// Summarize the unscheduled region.
888   struct SchedRemainder {
889     // Critical path through the DAG in expected latency.
890     unsigned CriticalPath;
891 
892     // Unscheduled resources
893     SmallVector<unsigned, 16> RemainingCounts;
894     // Critical resource for the unscheduled zone.
895     unsigned CritResIdx;
896     // Number of micro-ops left to schedule.
897     unsigned RemainingMicroOps;
898     // Is the unscheduled zone resource limited.
899     bool IsResourceLimited;
900 
901     unsigned MaxRemainingCount;
902 
903     void reset() {
904       CriticalPath = 0;
905       RemainingCounts.clear();
906       CritResIdx = 0;
907       RemainingMicroOps = 0;
908       IsResourceLimited = false;
909       MaxRemainingCount = 0;
910     }
911 
912     SchedRemainder() { reset(); }
913 
914     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
915   };
916 
917   /// Each Scheduling boundary is associated with ready queues. It tracks the
918   /// current cycle in the direction of movement, and maintains the state
919   /// of "hazards" and other interlocks at the current cycle.
920   struct SchedBoundary {
921     ScheduleDAGMI *DAG;
922     const TargetSchedModel *SchedModel;
923     SchedRemainder *Rem;
924 
925     ReadyQueue Available;
926     ReadyQueue Pending;
927     bool CheckPending;
928 
929     // For heuristics, keep a list of the nodes that immediately depend on the
930     // most recently scheduled node.
931     SmallPtrSet<const SUnit*, 8> NextSUs;
932 
933     ScheduleHazardRecognizer *HazardRec;
934 
935     unsigned CurrCycle;
936     unsigned IssueCount;
937 
938     /// MinReadyCycle - Cycle of the soonest available instruction.
939     unsigned MinReadyCycle;
940 
941     // The expected latency of the critical path in this scheduled zone.
942     unsigned ExpectedLatency;
943 
944     // Resources used in the scheduled zone beyond this boundary.
945     SmallVector<unsigned, 16> ResourceCounts;
946 
947     // Cache the critical resources ID in this scheduled zone.
948     unsigned CritResIdx;
949 
950     // Is the scheduled region resource limited vs. latency limited.
951     bool IsResourceLimited;
952 
953     unsigned ExpectedCount;
954 
955     // Policy flag: attempt to find ILP until expected latency is covered.
956     bool ShouldIncreaseILP;
957 
958 #ifndef NDEBUG
959     // Remember the greatest min operand latency.
960     unsigned MaxMinLatency;
961 #endif
962 
963     void reset() {
964       Available.clear();
965       Pending.clear();
966       CheckPending = false;
967       NextSUs.clear();
968       HazardRec = 0;
969       CurrCycle = 0;
970       IssueCount = 0;
971       MinReadyCycle = UINT_MAX;
972       ExpectedLatency = 0;
973       ResourceCounts.resize(1);
974       assert(!ResourceCounts[0] && "nonzero count for bad resource");
975       CritResIdx = 0;
976       IsResourceLimited = false;
977       ExpectedCount = 0;
978       ShouldIncreaseILP = false;
979 #ifndef NDEBUG
980       MaxMinLatency = 0;
981 #endif
982       // Reserve a zero-count for invalid CritResIdx.
983       ResourceCounts.resize(1);
984     }
985 
986     /// Pending queues extend the ready queues with the same ID and the
987     /// PendingFlag set.
988     SchedBoundary(unsigned ID, const Twine &Name):
989       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
990       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
991       reset();
992     }
993 
994     ~SchedBoundary() { delete HazardRec; }
995 
996     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
997               SchedRemainder *rem);
998 
999     bool isTop() const {
1000       return Available.getID() == ConvergingScheduler::TopQID;
1001     }
1002 
1003     unsigned getUnscheduledLatency(SUnit *SU) const {
1004       if (isTop())
1005         return SU->getHeight();
1006       return SU->getDepth();
1007     }
1008 
1009     unsigned getCriticalCount() const {
1010       return ResourceCounts[CritResIdx];
1011     }
1012 
1013     bool checkHazard(SUnit *SU);
1014 
1015     void checkILPPolicy();
1016 
1017     void releaseNode(SUnit *SU, unsigned ReadyCycle);
1018 
1019     void bumpCycle();
1020 
1021     void countResource(unsigned PIdx, unsigned Cycles);
1022 
1023     void bumpNode(SUnit *SU);
1024 
1025     void releasePending();
1026 
1027     void removeReady(SUnit *SU);
1028 
1029     SUnit *pickOnlyChoice();
1030   };
1031 
1032 private:
1033   ScheduleDAGMI *DAG;
1034   const TargetSchedModel *SchedModel;
1035   const TargetRegisterInfo *TRI;
1036 
1037   // State of the top and bottom scheduled instruction boundaries.
1038   SchedRemainder Rem;
1039   SchedBoundary Top;
1040   SchedBoundary Bot;
1041 
1042 public:
1043   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1044   enum {
1045     TopQID = 1,
1046     BotQID = 2,
1047     LogMaxQID = 2
1048   };
1049 
1050   ConvergingScheduler():
1051     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1052 
1053   virtual void initialize(ScheduleDAGMI *dag);
1054 
1055   virtual SUnit *pickNode(bool &IsTopNode);
1056 
1057   virtual void schedNode(SUnit *SU, bool IsTopNode);
1058 
1059   virtual void releaseTopNode(SUnit *SU);
1060 
1061   virtual void releaseBottomNode(SUnit *SU);
1062 
1063   virtual void registerRoots();
1064 
1065 protected:
1066   void balanceZones(
1067     ConvergingScheduler::SchedBoundary &CriticalZone,
1068     ConvergingScheduler::SchedCandidate &CriticalCand,
1069     ConvergingScheduler::SchedBoundary &OppositeZone,
1070     ConvergingScheduler::SchedCandidate &OppositeCand);
1071 
1072   void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1073                            ConvergingScheduler::SchedCandidate &BotCand);
1074 
1075   void tryCandidate(SchedCandidate &Cand,
1076                     SchedCandidate &TryCand,
1077                     SchedBoundary &Zone,
1078                     const RegPressureTracker &RPTracker,
1079                     RegPressureTracker &TempTracker);
1080 
1081   SUnit *pickNodeBidirectional(bool &IsTopNode);
1082 
1083   void pickNodeFromQueue(SchedBoundary &Zone,
1084                          const RegPressureTracker &RPTracker,
1085                          SchedCandidate &Candidate);
1086 
1087 #ifndef NDEBUG
1088   void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1089 #endif
1090 };
1091 } // namespace
1092 
1093 void ConvergingScheduler::SchedRemainder::
1094 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1095   reset();
1096   if (!SchedModel->hasInstrSchedModel())
1097     return;
1098   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1099   for (std::vector<SUnit>::iterator
1100          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1101     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1102     RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1103     for (TargetSchedModel::ProcResIter
1104            PI = SchedModel->getWriteProcResBegin(SC),
1105            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1106       unsigned PIdx = PI->ProcResourceIdx;
1107       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1108       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1109     }
1110   }
1111 }
1112 
1113 void ConvergingScheduler::SchedBoundary::
1114 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1115   reset();
1116   DAG = dag;
1117   SchedModel = smodel;
1118   Rem = rem;
1119   if (SchedModel->hasInstrSchedModel())
1120     ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1121 }
1122 
1123 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1124   DAG = dag;
1125   SchedModel = DAG->getSchedModel();
1126   TRI = DAG->TRI;
1127   Rem.init(DAG, SchedModel);
1128   Top.init(DAG, SchedModel, &Rem);
1129   Bot.init(DAG, SchedModel, &Rem);
1130 
1131   // Initialize resource counts.
1132 
1133   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1134   // are disabled, then these HazardRecs will be disabled.
1135   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1136   const TargetMachine &TM = DAG->MF.getTarget();
1137   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1138   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1139 
1140   assert((!ForceTopDown || !ForceBottomUp) &&
1141          "-misched-topdown incompatible with -misched-bottomup");
1142 }
1143 
1144 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1145   if (SU->isScheduled)
1146     return;
1147 
1148   for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1149        I != E; ++I) {
1150     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1151     unsigned MinLatency = I->getMinLatency();
1152 #ifndef NDEBUG
1153     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1154 #endif
1155     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1156       SU->TopReadyCycle = PredReadyCycle + MinLatency;
1157   }
1158   Top.releaseNode(SU, SU->TopReadyCycle);
1159 }
1160 
1161 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1162   if (SU->isScheduled)
1163     return;
1164 
1165   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1166 
1167   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1168        I != E; ++I) {
1169     if (I->isWeak())
1170       continue;
1171     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1172     unsigned MinLatency = I->getMinLatency();
1173 #ifndef NDEBUG
1174     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1175 #endif
1176     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1177       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1178   }
1179   Bot.releaseNode(SU, SU->BotReadyCycle);
1180 }
1181 
1182 void ConvergingScheduler::registerRoots() {
1183   Rem.CriticalPath = DAG->ExitSU.getDepth();
1184   // Some roots may not feed into ExitSU. Check all of them in case.
1185   for (std::vector<SUnit*>::const_iterator
1186          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1187     if ((*I)->getDepth() > Rem.CriticalPath)
1188       Rem.CriticalPath = (*I)->getDepth();
1189   }
1190   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1191 }
1192 
1193 /// Does this SU have a hazard within the current instruction group.
1194 ///
1195 /// The scheduler supports two modes of hazard recognition. The first is the
1196 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1197 /// supports highly complicated in-order reservation tables
1198 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1199 ///
1200 /// The second is a streamlined mechanism that checks for hazards based on
1201 /// simple counters that the scheduler itself maintains. It explicitly checks
1202 /// for instruction dispatch limitations, including the number of micro-ops that
1203 /// can dispatch per cycle.
1204 ///
1205 /// TODO: Also check whether the SU must start a new group.
1206 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1207   if (HazardRec->isEnabled())
1208     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1209 
1210   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1211   if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1212     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1213           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1214     return true;
1215   }
1216   return false;
1217 }
1218 
1219 /// If expected latency is covered, disable ILP policy.
1220 void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
1221   if (ShouldIncreaseILP
1222       && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
1223     ShouldIncreaseILP = false;
1224     DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
1225   }
1226 }
1227 
1228 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1229                                                      unsigned ReadyCycle) {
1230 
1231   if (ReadyCycle < MinReadyCycle)
1232     MinReadyCycle = ReadyCycle;
1233 
1234   // Check for interlocks first. For the purpose of other heuristics, an
1235   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1236   if (ReadyCycle > CurrCycle || checkHazard(SU))
1237     Pending.push(SU);
1238   else
1239     Available.push(SU);
1240 
1241   // Record this node as an immediate dependent of the scheduled node.
1242   NextSUs.insert(SU);
1243 
1244   // If CriticalPath has been computed, then check if the unscheduled nodes
1245   // exceed the ILP window. Before registerRoots, CriticalPath==0.
1246   if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
1247                             > Rem->CriticalPath + ILPWindow)) {
1248     ShouldIncreaseILP = true;
1249     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
1250           << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
1251   }
1252 }
1253 
1254 /// Move the boundary of scheduled code by one cycle.
1255 void ConvergingScheduler::SchedBoundary::bumpCycle() {
1256   unsigned Width = SchedModel->getIssueWidth();
1257   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1258 
1259   unsigned NextCycle = CurrCycle + 1;
1260   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1261   if (MinReadyCycle > NextCycle) {
1262     IssueCount = 0;
1263     NextCycle = MinReadyCycle;
1264   }
1265 
1266   if (!HazardRec->isEnabled()) {
1267     // Bypass HazardRec virtual calls.
1268     CurrCycle = NextCycle;
1269   }
1270   else {
1271     // Bypass getHazardType calls in case of long latency.
1272     for (; CurrCycle != NextCycle; ++CurrCycle) {
1273       if (isTop())
1274         HazardRec->AdvanceCycle();
1275       else
1276         HazardRec->RecedeCycle();
1277     }
1278   }
1279   CheckPending = true;
1280   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1281 
1282   DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1283         << CurrCycle << '\n');
1284 }
1285 
1286 /// Add the given processor resource to this scheduled zone.
1287 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1288                                                        unsigned Cycles) {
1289   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1290   DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1291         << " +(" << Cycles << "x" << Factor
1292         << ") / " << SchedModel->getLatencyFactor() << '\n');
1293 
1294   unsigned Count = Factor * Cycles;
1295   ResourceCounts[PIdx] += Count;
1296   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1297   Rem->RemainingCounts[PIdx] -= Count;
1298 
1299   // Reset MaxRemainingCount for sanity.
1300   Rem->MaxRemainingCount = 0;
1301 
1302   // Check if this resource exceeds the current critical resource by a full
1303   // cycle. If so, it becomes the critical resource.
1304   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1305       >= (int)SchedModel->getLatencyFactor()) {
1306     CritResIdx = PIdx;
1307     DEBUG(dbgs() << "  *** Critical resource "
1308           << SchedModel->getProcResource(PIdx)->Name << " x"
1309           << ResourceCounts[PIdx] << '\n');
1310   }
1311 }
1312 
1313 /// Move the boundary of scheduled code by one SUnit.
1314 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1315   // Update the reservation table.
1316   if (HazardRec->isEnabled()) {
1317     if (!isTop() && SU->isCall) {
1318       // Calls are scheduled with their preceding instructions. For bottom-up
1319       // scheduling, clear the pipeline state before emitting.
1320       HazardRec->Reset();
1321     }
1322     HazardRec->EmitInstruction(SU);
1323   }
1324   // Update resource counts and critical resource.
1325   if (SchedModel->hasInstrSchedModel()) {
1326     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1327     Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1328     for (TargetSchedModel::ProcResIter
1329            PI = SchedModel->getWriteProcResBegin(SC),
1330            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1331       countResource(PI->ProcResourceIdx, PI->Cycles);
1332     }
1333   }
1334   if (isTop()) {
1335     if (SU->getDepth() > ExpectedLatency)
1336       ExpectedLatency = SU->getDepth();
1337   }
1338   else {
1339     if (SU->getHeight() > ExpectedLatency)
1340       ExpectedLatency = SU->getHeight();
1341   }
1342 
1343   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1344 
1345   // Check the instruction group dispatch limit.
1346   // TODO: Check if this SU must end a dispatch group.
1347   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1348 
1349   // checkHazard prevents scheduling multiple instructions per cycle that exceed
1350   // issue width. However, we commonly reach the maximum. In this case
1351   // opportunistically bump the cycle to avoid uselessly checking everything in
1352   // the readyQ. Furthermore, a single instruction may produce more than one
1353   // cycle's worth of micro-ops.
1354   if (IssueCount >= SchedModel->getIssueWidth()) {
1355     DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1356     bumpCycle();
1357   }
1358 }
1359 
1360 /// Release pending ready nodes in to the available queue. This makes them
1361 /// visible to heuristics.
1362 void ConvergingScheduler::SchedBoundary::releasePending() {
1363   // If the available queue is empty, it is safe to reset MinReadyCycle.
1364   if (Available.empty())
1365     MinReadyCycle = UINT_MAX;
1366 
1367   // Check to see if any of the pending instructions are ready to issue.  If
1368   // so, add them to the available queue.
1369   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1370     SUnit *SU = *(Pending.begin()+i);
1371     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1372 
1373     if (ReadyCycle < MinReadyCycle)
1374       MinReadyCycle = ReadyCycle;
1375 
1376     if (ReadyCycle > CurrCycle)
1377       continue;
1378 
1379     if (checkHazard(SU))
1380       continue;
1381 
1382     Available.push(SU);
1383     Pending.remove(Pending.begin()+i);
1384     --i; --e;
1385   }
1386   DEBUG(if (!Pending.empty()) Pending.dump());
1387   CheckPending = false;
1388 }
1389 
1390 /// Remove SU from the ready set for this boundary.
1391 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1392   if (Available.isInQueue(SU))
1393     Available.remove(Available.find(SU));
1394   else {
1395     assert(Pending.isInQueue(SU) && "bad ready count");
1396     Pending.remove(Pending.find(SU));
1397   }
1398 }
1399 
1400 /// If this queue only has one ready candidate, return it. As a side effect,
1401 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1402 /// one node is ready. If multiple instructions are ready, return NULL.
1403 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1404   if (CheckPending)
1405     releasePending();
1406 
1407   if (IssueCount > 0) {
1408     // Defer any ready instrs that now have a hazard.
1409     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1410       if (checkHazard(*I)) {
1411         Pending.push(*I);
1412         I = Available.remove(I);
1413         continue;
1414       }
1415       ++I;
1416     }
1417   }
1418   for (unsigned i = 0; Available.empty(); ++i) {
1419     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1420            "permanent hazard"); (void)i;
1421     bumpCycle();
1422     releasePending();
1423   }
1424   if (Available.size() == 1)
1425     return *Available.begin();
1426   return NULL;
1427 }
1428 
1429 /// Record the candidate policy for opposite zones with different critical
1430 /// resources.
1431 ///
1432 /// If the CriticalZone is latency limited, don't force a policy for the
1433 /// candidates here. Instead, When releasing each candidate, releaseNode
1434 /// compares the region's critical path to the candidate's height or depth and
1435 /// the scheduled zone's expected latency then sets ShouldIncreaseILP.
1436 void ConvergingScheduler::balanceZones(
1437   ConvergingScheduler::SchedBoundary &CriticalZone,
1438   ConvergingScheduler::SchedCandidate &CriticalCand,
1439   ConvergingScheduler::SchedBoundary &OppositeZone,
1440   ConvergingScheduler::SchedCandidate &OppositeCand) {
1441 
1442   if (!CriticalZone.IsResourceLimited)
1443     return;
1444 
1445   SchedRemainder *Rem = CriticalZone.Rem;
1446 
1447   // If the critical zone is overconsuming a resource relative to the
1448   // remainder, try to reduce it.
1449   unsigned RemainingCritCount =
1450     Rem->RemainingCounts[CriticalZone.CritResIdx];
1451   if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
1452       > (int)SchedModel->getLatencyFactor()) {
1453     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1454     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1455           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1456           << '\n');
1457   }
1458   // If the other zone is underconsuming a resource relative to the full zone,
1459   // try to increase it.
1460   unsigned OppositeCount =
1461     OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1462   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1463       > (int)SchedModel->getLatencyFactor()) {
1464     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1465     DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1466           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1467           << '\n');
1468   }
1469 }
1470 
1471 /// Determine if the scheduled zones exceed resource limits or critical path and
1472 /// set each candidate's ReduceHeight policy accordingly.
1473 void ConvergingScheduler::checkResourceLimits(
1474   ConvergingScheduler::SchedCandidate &TopCand,
1475   ConvergingScheduler::SchedCandidate &BotCand) {
1476 
1477   Bot.checkILPPolicy();
1478   Top.checkILPPolicy();
1479   if (Bot.ShouldIncreaseILP)
1480     BotCand.Policy.ReduceLatency = true;
1481   if (Top.ShouldIncreaseILP)
1482     TopCand.Policy.ReduceLatency = true;
1483 
1484   // Handle resource-limited regions.
1485   if (Top.IsResourceLimited && Bot.IsResourceLimited
1486       && Top.CritResIdx == Bot.CritResIdx) {
1487     // If the scheduled critical resource in both zones is no longer the
1488     // critical remaining resource, attempt to reduce resource height both ways.
1489     if (Top.CritResIdx != Rem.CritResIdx) {
1490       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1491       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1492       DEBUG(dbgs() << "Reduce scheduled "
1493             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1494     }
1495     return;
1496   }
1497   // Handle latency-limited regions.
1498   if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1499     // If the total scheduled expected latency exceeds the region's critical
1500     // path then reduce latency both ways.
1501     //
1502     // Just because a zone is not resource limited does not mean it is latency
1503     // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1504     // to exceed expected latency.
1505     if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1506         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1507       TopCand.Policy.ReduceLatency = true;
1508       BotCand.Policy.ReduceLatency = true;
1509       DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1510             << " + " << Bot.ExpectedLatency << '\n');
1511     }
1512     return;
1513   }
1514   // The critical resource is different in each zone, so request balancing.
1515 
1516   // Compute the cost of each zone.
1517   Rem.MaxRemainingCount = std::max(
1518     Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
1519     Rem.RemainingCounts[Rem.CritResIdx]);
1520   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1521   Top.ExpectedCount = std::max(
1522     Top.getCriticalCount(),
1523     Top.ExpectedCount * SchedModel->getLatencyFactor());
1524   Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1525   Bot.ExpectedCount = std::max(
1526     Bot.getCriticalCount(),
1527     Bot.ExpectedCount * SchedModel->getLatencyFactor());
1528 
1529   balanceZones(Top, TopCand, Bot, BotCand);
1530   balanceZones(Bot, BotCand, Top, TopCand);
1531 }
1532 
1533 void ConvergingScheduler::SchedCandidate::
1534 initResourceDelta(const ScheduleDAGMI *DAG,
1535                   const TargetSchedModel *SchedModel) {
1536   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1537     return;
1538 
1539   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1540   for (TargetSchedModel::ProcResIter
1541          PI = SchedModel->getWriteProcResBegin(SC),
1542          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1543     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1544       ResDelta.CritResources += PI->Cycles;
1545     if (PI->ProcResourceIdx == Policy.DemandResIdx)
1546       ResDelta.DemandedResources += PI->Cycles;
1547   }
1548 }
1549 
1550 /// Return true if this heuristic determines order.
1551 static bool tryLess(unsigned TryVal, unsigned CandVal,
1552                     ConvergingScheduler::SchedCandidate &TryCand,
1553                     ConvergingScheduler::SchedCandidate &Cand,
1554                     ConvergingScheduler::CandReason Reason) {
1555   if (TryVal < CandVal) {
1556     TryCand.Reason = Reason;
1557     return true;
1558   }
1559   if (TryVal > CandVal) {
1560     if (Cand.Reason > Reason)
1561       Cand.Reason = Reason;
1562     return true;
1563   }
1564   return false;
1565 }
1566 
1567 static bool tryGreater(unsigned TryVal, unsigned CandVal,
1568                        ConvergingScheduler::SchedCandidate &TryCand,
1569                        ConvergingScheduler::SchedCandidate &Cand,
1570                        ConvergingScheduler::CandReason Reason) {
1571   if (TryVal > CandVal) {
1572     TryCand.Reason = Reason;
1573     return true;
1574   }
1575   if (TryVal < CandVal) {
1576     if (Cand.Reason > Reason)
1577       Cand.Reason = Reason;
1578     return true;
1579   }
1580   return false;
1581 }
1582 
1583 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1584   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1585 }
1586 
1587 /// Apply a set of heursitics to a new candidate. Heuristics are currently
1588 /// hierarchical. This may be more efficient than a graduated cost model because
1589 /// we don't need to evaluate all aspects of the model for each node in the
1590 /// queue. But it's really done to make the heuristics easier to debug and
1591 /// statistically analyze.
1592 ///
1593 /// \param Cand provides the policy and current best candidate.
1594 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1595 /// \param Zone describes the scheduled zone that we are extending.
1596 /// \param RPTracker describes reg pressure within the scheduled zone.
1597 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
1598 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1599                                        SchedCandidate &TryCand,
1600                                        SchedBoundary &Zone,
1601                                        const RegPressureTracker &RPTracker,
1602                                        RegPressureTracker &TempTracker) {
1603 
1604   // Always initialize TryCand's RPDelta.
1605   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1606                                   DAG->getRegionCriticalPSets(),
1607                                   DAG->getRegPressure().MaxSetPressure);
1608 
1609   // Initialize the candidate if needed.
1610   if (!Cand.isValid()) {
1611     TryCand.Reason = NodeOrder;
1612     return;
1613   }
1614   // Avoid exceeding the target's limit.
1615   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1616               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1617     return;
1618   if (Cand.Reason == SingleExcess)
1619     Cand.Reason = MultiPressure;
1620 
1621   // Avoid increasing the max critical pressure in the scheduled region.
1622   if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1623               Cand.RPDelta.CriticalMax.UnitIncrease,
1624               TryCand, Cand, SingleCritical))
1625     return;
1626   if (Cand.Reason == SingleCritical)
1627     Cand.Reason = MultiPressure;
1628 
1629   // Keep clustered nodes together to encourage downstream peephole
1630   // optimizations which may reduce resource requirements.
1631   //
1632   // This is a best effort to set things up for a post-RA pass. Optimizations
1633   // like generating loads of multiple registers should ideally be done within
1634   // the scheduler pass by combining the loads during DAG postprocessing.
1635   const SUnit *NextClusterSU =
1636     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1637   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1638                  TryCand, Cand, Cluster))
1639     return;
1640   // Currently, weak edges are for clustering, so we hard-code that reason.
1641   // However, deferring the current TryCand will not change Cand's reason.
1642   CandReason OrigReason = Cand.Reason;
1643   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1644               getWeakLeft(Cand.SU, Zone.isTop()),
1645               TryCand, Cand, Cluster)) {
1646     Cand.Reason = OrigReason;
1647     return;
1648   }
1649   // Avoid critical resource consumption and balance the schedule.
1650   TryCand.initResourceDelta(DAG, SchedModel);
1651   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1652               TryCand, Cand, ResourceReduce))
1653     return;
1654   if (tryGreater(TryCand.ResDelta.DemandedResources,
1655                  Cand.ResDelta.DemandedResources,
1656                  TryCand, Cand, ResourceDemand))
1657     return;
1658 
1659   // Avoid serializing long latency dependence chains.
1660   if (Cand.Policy.ReduceLatency) {
1661     if (Zone.isTop()) {
1662       if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1663           > Zone.ExpectedCount) {
1664         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1665                     TryCand, Cand, TopDepthReduce))
1666           return;
1667       }
1668       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1669                      TryCand, Cand, TopPathReduce))
1670         return;
1671     }
1672     else {
1673       if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1674           > Zone.ExpectedCount) {
1675         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1676                     TryCand, Cand, BotHeightReduce))
1677           return;
1678       }
1679       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1680                      TryCand, Cand, BotPathReduce))
1681         return;
1682     }
1683   }
1684 
1685   // Avoid increasing the max pressure of the entire region.
1686   if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1687               Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1688     return;
1689   if (Cand.Reason == SingleMax)
1690     Cand.Reason = MultiPressure;
1691 
1692   // Prefer immediate defs/users of the last scheduled instruction. This is a
1693   // nice pressure avoidance strategy that also conserves the processor's
1694   // register renaming resources and keeps the machine code readable.
1695   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1696                  TryCand, Cand, NextDefUse))
1697     return;
1698 
1699   // Fall through to original instruction order.
1700   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1701       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1702     TryCand.Reason = NodeOrder;
1703   }
1704 }
1705 
1706 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1707 /// more desirable than RHS from scheduling standpoint.
1708 static bool compareRPDelta(const RegPressureDelta &LHS,
1709                            const RegPressureDelta &RHS) {
1710   // Compare each component of pressure in decreasing order of importance
1711   // without checking if any are valid. Invalid PressureElements are assumed to
1712   // have UnitIncrease==0, so are neutral.
1713 
1714   // Avoid increasing the max critical pressure in the scheduled region.
1715   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1716     DEBUG(dbgs() << "RP excess top - bot: "
1717           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1718     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1719   }
1720   // Avoid increasing the max critical pressure in the scheduled region.
1721   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1722     DEBUG(dbgs() << "RP critical top - bot: "
1723           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1724           << '\n');
1725     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1726   }
1727   // Avoid increasing the max pressure of the entire region.
1728   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1729     DEBUG(dbgs() << "RP current top - bot: "
1730           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1731           << '\n');
1732     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1733   }
1734   return false;
1735 }
1736 
1737 #ifndef NDEBUG
1738 const char *ConvergingScheduler::getReasonStr(
1739   ConvergingScheduler::CandReason Reason) {
1740   switch (Reason) {
1741   case NoCand:         return "NOCAND    ";
1742   case SingleExcess:   return "REG-EXCESS";
1743   case SingleCritical: return "REG-CRIT  ";
1744   case Cluster:        return "CLUSTER   ";
1745   case SingleMax:      return "REG-MAX   ";
1746   case MultiPressure:  return "REG-MULTI ";
1747   case ResourceReduce: return "RES-REDUCE";
1748   case ResourceDemand: return "RES-DEMAND";
1749   case TopDepthReduce: return "TOP-DEPTH ";
1750   case TopPathReduce:  return "TOP-PATH  ";
1751   case BotHeightReduce:return "BOT-HEIGHT";
1752   case BotPathReduce:  return "BOT-PATH  ";
1753   case NextDefUse:     return "DEF-USE   ";
1754   case NodeOrder:      return "ORDER     ";
1755   };
1756   llvm_unreachable("Unknown reason!");
1757 }
1758 
1759 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1760                                          const SchedBoundary &Zone) {
1761   const char *Label = getReasonStr(Cand.Reason);
1762   PressureElement P;
1763   unsigned ResIdx = 0;
1764   unsigned Latency = 0;
1765   switch (Cand.Reason) {
1766   default:
1767     break;
1768   case SingleExcess:
1769     P = Cand.RPDelta.Excess;
1770     break;
1771   case SingleCritical:
1772     P = Cand.RPDelta.CriticalMax;
1773     break;
1774   case SingleMax:
1775     P = Cand.RPDelta.CurrentMax;
1776     break;
1777   case ResourceReduce:
1778     ResIdx = Cand.Policy.ReduceResIdx;
1779     break;
1780   case ResourceDemand:
1781     ResIdx = Cand.Policy.DemandResIdx;
1782     break;
1783   case TopDepthReduce:
1784     Latency = Cand.SU->getDepth();
1785     break;
1786   case TopPathReduce:
1787     Latency = Cand.SU->getHeight();
1788     break;
1789   case BotHeightReduce:
1790     Latency = Cand.SU->getHeight();
1791     break;
1792   case BotPathReduce:
1793     Latency = Cand.SU->getDepth();
1794     break;
1795   }
1796   dbgs() << Label << " " << Zone.Available.getName() << " ";
1797   if (P.isValid())
1798     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1799            << " ";
1800   else
1801     dbgs() << "     ";
1802   if (ResIdx)
1803     dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1804   else
1805     dbgs() << "        ";
1806   if (Latency)
1807     dbgs() << Latency << " cycles ";
1808   else
1809     dbgs() << "         ";
1810   Cand.SU->dump(DAG);
1811 }
1812 #endif
1813 
1814 /// Pick the best candidate from the top queue.
1815 ///
1816 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1817 /// DAG building. To adjust for the current scheduling location we need to
1818 /// maintain the number of vreg uses remaining to be top-scheduled.
1819 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1820                                             const RegPressureTracker &RPTracker,
1821                                             SchedCandidate &Cand) {
1822   ReadyQueue &Q = Zone.Available;
1823 
1824   DEBUG(Q.dump());
1825 
1826   // getMaxPressureDelta temporarily modifies the tracker.
1827   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1828 
1829   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1830 
1831     SchedCandidate TryCand(Cand.Policy);
1832     TryCand.SU = *I;
1833     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1834     if (TryCand.Reason != NoCand) {
1835       // Initialize resource delta if needed in case future heuristics query it.
1836       if (TryCand.ResDelta == SchedResourceDelta())
1837         TryCand.initResourceDelta(DAG, SchedModel);
1838       Cand.setBest(TryCand);
1839       DEBUG(traceCandidate(Cand, Zone));
1840     }
1841     TryCand.SU = *I;
1842   }
1843 }
1844 
1845 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1846                       bool IsTop) {
1847   DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1848         << " SU(" << Cand.SU->NodeNum << ") "
1849         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1850 }
1851 
1852 /// Pick the best candidate node from either the top or bottom queue.
1853 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1854   // Schedule as far as possible in the direction of no choice. This is most
1855   // efficient, but also provides the best heuristics for CriticalPSets.
1856   if (SUnit *SU = Bot.pickOnlyChoice()) {
1857     IsTopNode = false;
1858     return SU;
1859   }
1860   if (SUnit *SU = Top.pickOnlyChoice()) {
1861     IsTopNode = true;
1862     return SU;
1863   }
1864   CandPolicy NoPolicy;
1865   SchedCandidate BotCand(NoPolicy);
1866   SchedCandidate TopCand(NoPolicy);
1867   checkResourceLimits(TopCand, BotCand);
1868 
1869   // Prefer bottom scheduling when heuristics are silent.
1870   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1871   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1872 
1873   // If either Q has a single candidate that provides the least increase in
1874   // Excess pressure, we can immediately schedule from that Q.
1875   //
1876   // RegionCriticalPSets summarizes the pressure within the scheduled region and
1877   // affects picking from either Q. If scheduling in one direction must
1878   // increase pressure for one of the excess PSets, then schedule in that
1879   // direction first to provide more freedom in the other direction.
1880   if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1881     IsTopNode = false;
1882     tracePick(BotCand, IsTopNode);
1883     return BotCand.SU;
1884   }
1885   // Check if the top Q has a better candidate.
1886   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1887   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1888 
1889   // If either Q has a single candidate that minimizes pressure above the
1890   // original region's pressure pick it.
1891   if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1892     if (TopCand.Reason < BotCand.Reason) {
1893       IsTopNode = true;
1894       tracePick(TopCand, IsTopNode);
1895       return TopCand.SU;
1896     }
1897     IsTopNode = false;
1898     tracePick(BotCand, IsTopNode);
1899     return BotCand.SU;
1900   }
1901   // Check for a salient pressure difference and pick the best from either side.
1902   if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1903     IsTopNode = true;
1904     tracePick(TopCand, IsTopNode);
1905     return TopCand.SU;
1906   }
1907   // Otherwise prefer the bottom candidate, in node order if all else failed.
1908   if (TopCand.Reason < BotCand.Reason) {
1909     IsTopNode = true;
1910     tracePick(TopCand, IsTopNode);
1911     return TopCand.SU;
1912   }
1913   IsTopNode = false;
1914   tracePick(BotCand, IsTopNode);
1915   return BotCand.SU;
1916 }
1917 
1918 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
1919 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
1920   if (DAG->top() == DAG->bottom()) {
1921     assert(Top.Available.empty() && Top.Pending.empty() &&
1922            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
1923     return NULL;
1924   }
1925   SUnit *SU;
1926   do {
1927     if (ForceTopDown) {
1928       SU = Top.pickOnlyChoice();
1929       if (!SU) {
1930         CandPolicy NoPolicy;
1931         SchedCandidate TopCand(NoPolicy);
1932         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1933         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1934         SU = TopCand.SU;
1935       }
1936       IsTopNode = true;
1937     }
1938     else if (ForceBottomUp) {
1939       SU = Bot.pickOnlyChoice();
1940       if (!SU) {
1941         CandPolicy NoPolicy;
1942         SchedCandidate BotCand(NoPolicy);
1943         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1944         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1945         SU = BotCand.SU;
1946       }
1947       IsTopNode = false;
1948     }
1949     else {
1950       SU = pickNodeBidirectional(IsTopNode);
1951     }
1952   } while (SU->isScheduled);
1953 
1954   if (SU->isTopReady())
1955     Top.removeReady(SU);
1956   if (SU->isBottomReady())
1957     Bot.removeReady(SU);
1958 
1959   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
1960         << " Scheduling Instruction in cycle "
1961         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
1962         SU->dump(DAG));
1963   return SU;
1964 }
1965 
1966 /// Update the scheduler's state after scheduling a node. This is the same node
1967 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
1968 /// it's state based on the current cycle before MachineSchedStrategy does.
1969 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
1970   if (IsTopNode) {
1971     SU->TopReadyCycle = Top.CurrCycle;
1972     Top.bumpNode(SU);
1973   }
1974   else {
1975     SU->BotReadyCycle = Bot.CurrCycle;
1976     Bot.bumpNode(SU);
1977   }
1978 }
1979 
1980 /// Create the standard converging machine scheduler. This will be used as the
1981 /// default scheduler if the target does not set a default.
1982 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
1983   assert((!ForceTopDown || !ForceBottomUp) &&
1984          "-misched-topdown incompatible with -misched-bottomup");
1985   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
1986   // Register DAG post-processors.
1987   if (EnableLoadCluster)
1988     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
1989   return DAG;
1990 }
1991 static MachineSchedRegistry
1992 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
1993                         createConvergingSched);
1994 
1995 //===----------------------------------------------------------------------===//
1996 // ILP Scheduler. Currently for experimental analysis of heuristics.
1997 //===----------------------------------------------------------------------===//
1998 
1999 namespace {
2000 /// \brief Order nodes by the ILP metric.
2001 struct ILPOrder {
2002   ScheduleDAGILP *ILP;
2003   bool MaximizeILP;
2004 
2005   ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
2006 
2007   /// \brief Apply a less-than relation on node priority.
2008   bool operator()(const SUnit *A, const SUnit *B) const {
2009     // Return true if A comes after B in the Q.
2010     if (MaximizeILP)
2011       return ILP->getILP(A) < ILP->getILP(B);
2012     else
2013       return ILP->getILP(A) > ILP->getILP(B);
2014   }
2015 };
2016 
2017 /// \brief Schedule based on the ILP metric.
2018 class ILPScheduler : public MachineSchedStrategy {
2019   ScheduleDAGILP ILP;
2020   ILPOrder Cmp;
2021 
2022   std::vector<SUnit*> ReadyQ;
2023 public:
2024   ILPScheduler(bool MaximizeILP)
2025   : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
2026 
2027   virtual void initialize(ScheduleDAGMI *DAG) {
2028     ReadyQ.clear();
2029     ILP.resize(DAG->SUnits.size());
2030   }
2031 
2032   virtual void registerRoots() {
2033     for (std::vector<SUnit*>::const_iterator
2034            I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
2035       ILP.computeILP(*I);
2036     }
2037   }
2038 
2039   /// Implement MachineSchedStrategy interface.
2040   /// -----------------------------------------
2041 
2042   virtual SUnit *pickNode(bool &IsTopNode) {
2043     if (ReadyQ.empty()) return NULL;
2044     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2045     SUnit *SU = ReadyQ.back();
2046     ReadyQ.pop_back();
2047     IsTopNode = false;
2048     DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
2049           << " ILP: " << ILP.getILP(SU) << '\n');
2050     return SU;
2051   }
2052 
2053   virtual void schedNode(SUnit *, bool) {}
2054 
2055   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2056 
2057   virtual void releaseBottomNode(SUnit *SU) {
2058     ReadyQ.push_back(SU);
2059     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2060   }
2061 };
2062 } // namespace
2063 
2064 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2065   return new ScheduleDAGMI(C, new ILPScheduler(true));
2066 }
2067 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2068   return new ScheduleDAGMI(C, new ILPScheduler(false));
2069 }
2070 static MachineSchedRegistry ILPMaxRegistry(
2071   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2072 static MachineSchedRegistry ILPMinRegistry(
2073   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2074 
2075 //===----------------------------------------------------------------------===//
2076 // Machine Instruction Shuffler for Correctness Testing
2077 //===----------------------------------------------------------------------===//
2078 
2079 #ifndef NDEBUG
2080 namespace {
2081 /// Apply a less-than relation on the node order, which corresponds to the
2082 /// instruction order prior to scheduling. IsReverse implements greater-than.
2083 template<bool IsReverse>
2084 struct SUnitOrder {
2085   bool operator()(SUnit *A, SUnit *B) const {
2086     if (IsReverse)
2087       return A->NodeNum > B->NodeNum;
2088     else
2089       return A->NodeNum < B->NodeNum;
2090   }
2091 };
2092 
2093 /// Reorder instructions as much as possible.
2094 class InstructionShuffler : public MachineSchedStrategy {
2095   bool IsAlternating;
2096   bool IsTopDown;
2097 
2098   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2099   // gives nodes with a higher number higher priority causing the latest
2100   // instructions to be scheduled first.
2101   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2102     TopQ;
2103   // When scheduling bottom-up, use greater-than as the queue priority.
2104   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2105     BottomQ;
2106 public:
2107   InstructionShuffler(bool alternate, bool topdown)
2108     : IsAlternating(alternate), IsTopDown(topdown) {}
2109 
2110   virtual void initialize(ScheduleDAGMI *) {
2111     TopQ.clear();
2112     BottomQ.clear();
2113   }
2114 
2115   /// Implement MachineSchedStrategy interface.
2116   /// -----------------------------------------
2117 
2118   virtual SUnit *pickNode(bool &IsTopNode) {
2119     SUnit *SU;
2120     if (IsTopDown) {
2121       do {
2122         if (TopQ.empty()) return NULL;
2123         SU = TopQ.top();
2124         TopQ.pop();
2125       } while (SU->isScheduled);
2126       IsTopNode = true;
2127     }
2128     else {
2129       do {
2130         if (BottomQ.empty()) return NULL;
2131         SU = BottomQ.top();
2132         BottomQ.pop();
2133       } while (SU->isScheduled);
2134       IsTopNode = false;
2135     }
2136     if (IsAlternating)
2137       IsTopDown = !IsTopDown;
2138     return SU;
2139   }
2140 
2141   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2142 
2143   virtual void releaseTopNode(SUnit *SU) {
2144     TopQ.push(SU);
2145   }
2146   virtual void releaseBottomNode(SUnit *SU) {
2147     BottomQ.push(SU);
2148   }
2149 };
2150 } // namespace
2151 
2152 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2153   bool Alternate = !ForceTopDown && !ForceBottomUp;
2154   bool TopDown = !ForceBottomUp;
2155   assert((TopDown || !ForceTopDown) &&
2156          "-misched-topdown incompatible with -misched-bottomup");
2157   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2158 }
2159 static MachineSchedRegistry ShufflerRegistry(
2160   "shuffle", "Shuffle machine instructions alternating directions",
2161   createInstructionShuffler);
2162 #endif // !NDEBUG
2163