xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision 44f750a3e58eb6ae84a4476aa4befc0bb406dffa)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #define DEBUG_TYPE "misched"
16 
17 #include "llvm/CodeGen/MachineScheduler.h"
18 #include "llvm/ADT/OwningPtr.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/Analysis/AliasAnalysis.h"
21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/ScheduleDFS.h"
25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <queue>
31 
32 using namespace llvm;
33 
34 namespace llvm {
35 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
36                            cl::desc("Force top-down list scheduling"));
37 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
38                             cl::desc("Force bottom-up list scheduling"));
39 }
40 
41 #ifndef NDEBUG
42 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
43   cl::desc("Pop up a window to show MISched dags after they are processed"));
44 
45 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
46   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
47 #else
48 static bool ViewMISchedDAGs = false;
49 #endif // NDEBUG
50 
51 // Experimental heuristics
52 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
53   cl::desc("Enable load clustering."), cl::init(true));
54 
55 // Experimental heuristics
56 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
57   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
58 
59 // DAG subtrees must have at least this many nodes.
60 static const unsigned MinSubtreeSize = 8;
61 
62 //===----------------------------------------------------------------------===//
63 // Machine Instruction Scheduling Pass and Registry
64 //===----------------------------------------------------------------------===//
65 
66 MachineSchedContext::MachineSchedContext():
67     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
68   RegClassInfo = new RegisterClassInfo();
69 }
70 
71 MachineSchedContext::~MachineSchedContext() {
72   delete RegClassInfo;
73 }
74 
75 namespace {
76 /// MachineScheduler runs after coalescing and before register allocation.
77 class MachineScheduler : public MachineSchedContext,
78                          public MachineFunctionPass {
79 public:
80   MachineScheduler();
81 
82   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
83 
84   virtual void releaseMemory() {}
85 
86   virtual bool runOnMachineFunction(MachineFunction&);
87 
88   virtual void print(raw_ostream &O, const Module* = 0) const;
89 
90   static char ID; // Class identification, replacement for typeinfo
91 };
92 } // namespace
93 
94 char MachineScheduler::ID = 0;
95 
96 char &llvm::MachineSchedulerID = MachineScheduler::ID;
97 
98 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
99                       "Machine Instruction Scheduler", false, false)
100 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
101 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
102 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
103 INITIALIZE_PASS_END(MachineScheduler, "misched",
104                     "Machine Instruction Scheduler", false, false)
105 
106 MachineScheduler::MachineScheduler()
107 : MachineFunctionPass(ID) {
108   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
109 }
110 
111 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
112   AU.setPreservesCFG();
113   AU.addRequiredID(MachineDominatorsID);
114   AU.addRequired<MachineLoopInfo>();
115   AU.addRequired<AliasAnalysis>();
116   AU.addRequired<TargetPassConfig>();
117   AU.addRequired<SlotIndexes>();
118   AU.addPreserved<SlotIndexes>();
119   AU.addRequired<LiveIntervals>();
120   AU.addPreserved<LiveIntervals>();
121   MachineFunctionPass::getAnalysisUsage(AU);
122 }
123 
124 MachinePassRegistry MachineSchedRegistry::Registry;
125 
126 /// A dummy default scheduler factory indicates whether the scheduler
127 /// is overridden on the command line.
128 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
129   return 0;
130 }
131 
132 /// MachineSchedOpt allows command line selection of the scheduler.
133 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
134                RegisterPassParser<MachineSchedRegistry> >
135 MachineSchedOpt("misched",
136                 cl::init(&useDefaultMachineSched), cl::Hidden,
137                 cl::desc("Machine instruction scheduler to use"));
138 
139 static MachineSchedRegistry
140 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
141                      useDefaultMachineSched);
142 
143 /// Forward declare the standard machine scheduler. This will be used as the
144 /// default scheduler if the target does not set a default.
145 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
146 
147 
148 /// Decrement this iterator until reaching the top or a non-debug instr.
149 static MachineBasicBlock::iterator
150 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
151   assert(I != Beg && "reached the top of the region, cannot decrement");
152   while (--I != Beg) {
153     if (!I->isDebugValue())
154       break;
155   }
156   return I;
157 }
158 
159 /// If this iterator is a debug value, increment until reaching the End or a
160 /// non-debug instruction.
161 static MachineBasicBlock::iterator
162 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
163   for(; I != End; ++I) {
164     if (!I->isDebugValue())
165       break;
166   }
167   return I;
168 }
169 
170 /// Top-level MachineScheduler pass driver.
171 ///
172 /// Visit blocks in function order. Divide each block into scheduling regions
173 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
174 /// consistent with the DAG builder, which traverses the interior of the
175 /// scheduling regions bottom-up.
176 ///
177 /// This design avoids exposing scheduling boundaries to the DAG builder,
178 /// simplifying the DAG builder's support for "special" target instructions.
179 /// At the same time the design allows target schedulers to operate across
180 /// scheduling boundaries, for example to bundle the boudary instructions
181 /// without reordering them. This creates complexity, because the target
182 /// scheduler must update the RegionBegin and RegionEnd positions cached by
183 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
184 /// design would be to split blocks at scheduling boundaries, but LLVM has a
185 /// general bias against block splitting purely for implementation simplicity.
186 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
187   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
188 
189   // Initialize the context of the pass.
190   MF = &mf;
191   MLI = &getAnalysis<MachineLoopInfo>();
192   MDT = &getAnalysis<MachineDominatorTree>();
193   PassConfig = &getAnalysis<TargetPassConfig>();
194   AA = &getAnalysis<AliasAnalysis>();
195 
196   LIS = &getAnalysis<LiveIntervals>();
197   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
198 
199   RegClassInfo->runOnMachineFunction(*MF);
200 
201   // Select the scheduler, or set the default.
202   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
203   if (Ctor == useDefaultMachineSched) {
204     // Get the default scheduler set by the target.
205     Ctor = MachineSchedRegistry::getDefault();
206     if (!Ctor) {
207       Ctor = createConvergingSched;
208       MachineSchedRegistry::setDefault(Ctor);
209     }
210   }
211   // Instantiate the selected scheduler.
212   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
213 
214   // Visit all machine basic blocks.
215   //
216   // TODO: Visit blocks in global postorder or postorder within the bottom-up
217   // loop tree. Then we can optionally compute global RegPressure.
218   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
219        MBB != MBBEnd; ++MBB) {
220 
221     Scheduler->startBlock(MBB);
222 
223     // Break the block into scheduling regions [I, RegionEnd), and schedule each
224     // region as soon as it is discovered. RegionEnd points the scheduling
225     // boundary at the bottom of the region. The DAG does not include RegionEnd,
226     // but the region does (i.e. the next RegionEnd is above the previous
227     // RegionBegin). If the current block has no terminator then RegionEnd ==
228     // MBB->end() for the bottom region.
229     //
230     // The Scheduler may insert instructions during either schedule() or
231     // exitRegion(), even for empty regions. So the local iterators 'I' and
232     // 'RegionEnd' are invalid across these calls.
233     unsigned RemainingInstrs = MBB->size();
234     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
235         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
236 
237       // Avoid decrementing RegionEnd for blocks with no terminator.
238       if (RegionEnd != MBB->end()
239           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
240         --RegionEnd;
241         // Count the boundary instruction.
242         --RemainingInstrs;
243       }
244 
245       // The next region starts above the previous region. Look backward in the
246       // instruction stream until we find the nearest boundary.
247       MachineBasicBlock::iterator I = RegionEnd;
248       for(;I != MBB->begin(); --I, --RemainingInstrs) {
249         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
250           break;
251       }
252       // Notify the scheduler of the region, even if we may skip scheduling
253       // it. Perhaps it still needs to be bundled.
254       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
255 
256       // Skip empty scheduling regions (0 or 1 schedulable instructions).
257       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
258         // Close the current region. Bundle the terminator if needed.
259         // This invalidates 'RegionEnd' and 'I'.
260         Scheduler->exitRegion();
261         continue;
262       }
263       DEBUG(dbgs() << "********** MI Scheduling **********\n");
264       DEBUG(dbgs() << MF->getName()
265             << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
266             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
267             else dbgs() << "End";
268             dbgs() << " Remaining: " << RemainingInstrs << "\n");
269 
270       // Schedule a region: possibly reorder instructions.
271       // This invalidates 'RegionEnd' and 'I'.
272       Scheduler->schedule();
273 
274       // Close the current region.
275       Scheduler->exitRegion();
276 
277       // Scheduling has invalidated the current iterator 'I'. Ask the
278       // scheduler for the top of it's scheduled region.
279       RegionEnd = Scheduler->begin();
280     }
281     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
282     Scheduler->finishBlock();
283   }
284   Scheduler->finalizeSchedule();
285   DEBUG(LIS->print(dbgs()));
286   return true;
287 }
288 
289 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
290   // unimplemented
291 }
292 
293 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
294 void ReadyQueue::dump() {
295   dbgs() << Name << ": ";
296   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
297     dbgs() << Queue[i]->NodeNum << " ";
298   dbgs() << "\n";
299 }
300 #endif
301 
302 //===----------------------------------------------------------------------===//
303 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
304 // preservation.
305 //===----------------------------------------------------------------------===//
306 
307 ScheduleDAGMI::~ScheduleDAGMI() {
308   delete DFSResult;
309   DeleteContainerPointers(Mutations);
310   delete SchedImpl;
311 }
312 
313 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
314   if (SuccSU != &ExitSU) {
315     // Do not use WillCreateCycle, it assumes SD scheduling.
316     // If Pred is reachable from Succ, then the edge creates a cycle.
317     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
318       return false;
319     Topo.AddPred(SuccSU, PredDep.getSUnit());
320   }
321   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
322   // Return true regardless of whether a new edge needed to be inserted.
323   return true;
324 }
325 
326 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
327 /// NumPredsLeft reaches zero, release the successor node.
328 ///
329 /// FIXME: Adjust SuccSU height based on MinLatency.
330 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
331   SUnit *SuccSU = SuccEdge->getSUnit();
332 
333   if (SuccEdge->isWeak()) {
334     --SuccSU->WeakPredsLeft;
335     if (SuccEdge->isCluster())
336       NextClusterSucc = SuccSU;
337     return;
338   }
339 #ifndef NDEBUG
340   if (SuccSU->NumPredsLeft == 0) {
341     dbgs() << "*** Scheduling failed! ***\n";
342     SuccSU->dump(this);
343     dbgs() << " has been released too many times!\n";
344     llvm_unreachable(0);
345   }
346 #endif
347   --SuccSU->NumPredsLeft;
348   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
349     SchedImpl->releaseTopNode(SuccSU);
350 }
351 
352 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
353 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
354   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
355        I != E; ++I) {
356     releaseSucc(SU, &*I);
357   }
358 }
359 
360 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
361 /// NumSuccsLeft reaches zero, release the predecessor node.
362 ///
363 /// FIXME: Adjust PredSU height based on MinLatency.
364 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
365   SUnit *PredSU = PredEdge->getSUnit();
366 
367   if (PredEdge->isWeak()) {
368     --PredSU->WeakSuccsLeft;
369     if (PredEdge->isCluster())
370       NextClusterPred = PredSU;
371     return;
372   }
373 #ifndef NDEBUG
374   if (PredSU->NumSuccsLeft == 0) {
375     dbgs() << "*** Scheduling failed! ***\n";
376     PredSU->dump(this);
377     dbgs() << " has been released too many times!\n";
378     llvm_unreachable(0);
379   }
380 #endif
381   --PredSU->NumSuccsLeft;
382   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
383     SchedImpl->releaseBottomNode(PredSU);
384 }
385 
386 /// releasePredecessors - Call releasePred on each of SU's predecessors.
387 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
388   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
389        I != E; ++I) {
390     releasePred(SU, &*I);
391   }
392 }
393 
394 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
395                                     MachineBasicBlock::iterator InsertPos) {
396   // Advance RegionBegin if the first instruction moves down.
397   if (&*RegionBegin == MI)
398     ++RegionBegin;
399 
400   // Update the instruction stream.
401   BB->splice(InsertPos, BB, MI);
402 
403   // Update LiveIntervals
404   LIS->handleMove(MI, /*UpdateFlags=*/true);
405 
406   // Recede RegionBegin if an instruction moves above the first.
407   if (RegionBegin == InsertPos)
408     RegionBegin = MI;
409 }
410 
411 bool ScheduleDAGMI::checkSchedLimit() {
412 #ifndef NDEBUG
413   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
414     CurrentTop = CurrentBottom;
415     return false;
416   }
417   ++NumInstrsScheduled;
418 #endif
419   return true;
420 }
421 
422 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
423 /// crossing a scheduling boundary. [begin, end) includes all instructions in
424 /// the region, including the boundary itself and single-instruction regions
425 /// that don't get scheduled.
426 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
427                                 MachineBasicBlock::iterator begin,
428                                 MachineBasicBlock::iterator end,
429                                 unsigned endcount)
430 {
431   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
432 
433   // For convenience remember the end of the liveness region.
434   LiveRegionEnd =
435     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
436 }
437 
438 // Setup the register pressure trackers for the top scheduled top and bottom
439 // scheduled regions.
440 void ScheduleDAGMI::initRegPressure() {
441   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
442   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
443 
444   // Close the RPTracker to finalize live ins.
445   RPTracker.closeRegion();
446 
447   DEBUG(RPTracker.getPressure().dump(TRI));
448 
449   // Initialize the live ins and live outs.
450   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
451   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
452 
453   // Close one end of the tracker so we can call
454   // getMaxUpward/DownwardPressureDelta before advancing across any
455   // instructions. This converts currently live regs into live ins/outs.
456   TopRPTracker.closeTop();
457   BotRPTracker.closeBottom();
458 
459   // Account for liveness generated by the region boundary.
460   if (LiveRegionEnd != RegionEnd)
461     BotRPTracker.recede();
462 
463   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
464 
465   // Cache the list of excess pressure sets in this region. This will also track
466   // the max pressure in the scheduled code for these sets.
467   RegionCriticalPSets.clear();
468   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
469   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
470     unsigned Limit = TRI->getRegPressureSetLimit(i);
471     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
472           << "Limit " << Limit
473           << " Actual " << RegionPressure[i] << "\n");
474     if (RegionPressure[i] > Limit)
475       RegionCriticalPSets.push_back(PressureElement(i, 0));
476   }
477   DEBUG(dbgs() << "Excess PSets: ";
478         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
479           dbgs() << TRI->getRegPressureSetName(
480             RegionCriticalPSets[i].PSetID) << " ";
481         dbgs() << "\n");
482 }
483 
484 // FIXME: When the pressure tracker deals in pressure differences then we won't
485 // iterate over all RegionCriticalPSets[i].
486 void ScheduleDAGMI::
487 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
488   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
489     unsigned ID = RegionCriticalPSets[i].PSetID;
490     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
491     if ((int)NewMaxPressure[ID] > MaxUnits)
492       MaxUnits = NewMaxPressure[ID];
493   }
494 }
495 
496 /// schedule - Called back from MachineScheduler::runOnMachineFunction
497 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
498 /// only includes instructions that have DAG nodes, not scheduling boundaries.
499 ///
500 /// This is a skeletal driver, with all the functionality pushed into helpers,
501 /// so that it can be easilly extended by experimental schedulers. Generally,
502 /// implementing MachineSchedStrategy should be sufficient to implement a new
503 /// scheduling algorithm. However, if a scheduler further subclasses
504 /// ScheduleDAGMI then it will want to override this virtual method in order to
505 /// update any specialized state.
506 void ScheduleDAGMI::schedule() {
507   buildDAGWithRegPressure();
508 
509   Topo.InitDAGTopologicalSorting();
510 
511   postprocessDAG();
512 
513   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
514           SUnits[su].dumpAll(this));
515 
516   initQueues();
517 
518   bool IsTopNode = false;
519   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
520     assert(!SU->isScheduled && "Node already scheduled");
521     if (!checkSchedLimit())
522       break;
523 
524     scheduleMI(SU, IsTopNode);
525 
526     updateQueues(SU, IsTopNode);
527   }
528   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
529 
530   placeDebugValues();
531 
532   DEBUG({
533       unsigned BBNum = begin()->getParent()->getNumber();
534       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
535       dumpSchedule();
536       dbgs() << '\n';
537     });
538 }
539 
540 /// Build the DAG and setup three register pressure trackers.
541 void ScheduleDAGMI::buildDAGWithRegPressure() {
542   // Initialize the register pressure tracker used by buildSchedGraph.
543   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
544 
545   // Account for liveness generate by the region boundary.
546   if (LiveRegionEnd != RegionEnd)
547     RPTracker.recede();
548 
549   // Build the DAG, and compute current register pressure.
550   buildSchedGraph(AA, &RPTracker);
551   if (ViewMISchedDAGs) viewGraph();
552 
553   // Initialize top/bottom trackers after computing region pressure.
554   initRegPressure();
555 }
556 
557 /// Apply each ScheduleDAGMutation step in order.
558 void ScheduleDAGMI::postprocessDAG() {
559   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
560     Mutations[i]->apply(this);
561   }
562 }
563 
564 void ScheduleDAGMI::initDFSResult() {
565   if (!DFSResult)
566     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
567   DFSResult->clear();
568   DFSResult->resize(SUnits.size());
569   ScheduledTrees.clear();
570 }
571 
572 void ScheduleDAGMI::computeDFSResult(ArrayRef<SUnit*> Roots) {
573   DFSResult->compute(Roots);
574   ScheduledTrees.resize(DFSResult->getNumSubtrees());
575 }
576 
577 // Release all DAG roots for scheduling.
578 //
579 // Nodes with unreleased weak edges can still be roots.
580 void ScheduleDAGMI::releaseRoots() {
581   SmallVector<SUnit*, 16> BotRoots;
582 
583   for (std::vector<SUnit>::iterator
584          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
585     SUnit *SU = &(*I);
586 
587     // Order predecessors so DFSResult follows the critical path.
588     SU->biasCriticalPath();
589 
590     // A SUnit is ready to top schedule if it has no predecessors.
591     if (!I->NumPredsLeft && SU != &EntrySU)
592       SchedImpl->releaseTopNode(SU);
593     // A SUnit is ready to bottom schedule if it has no successors.
594     if (!I->NumSuccsLeft && SU != &ExitSU)
595       BotRoots.push_back(SU);
596   }
597   // Release bottom roots in reverse order so the higher priority nodes appear
598   // first. This is more natural and slightly more efficient.
599   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
600          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
601     SchedImpl->releaseBottomNode(*I);
602 }
603 
604 /// Identify DAG roots and setup scheduler queues.
605 void ScheduleDAGMI::initQueues() {
606   NextClusterSucc = NULL;
607   NextClusterPred = NULL;
608 
609   // Initialize the strategy before modifying the DAG.
610   SchedImpl->initialize(this);
611 
612   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
613   releaseRoots();
614 
615   releaseSuccessors(&EntrySU);
616   releasePredecessors(&ExitSU);
617 
618   SchedImpl->registerRoots();
619 
620   // Advance past initial DebugValues.
621   assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
622   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
623   TopRPTracker.setPos(CurrentTop);
624 
625   CurrentBottom = RegionEnd;
626 }
627 
628 /// Move an instruction and update register pressure.
629 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
630   // Move the instruction to its new location in the instruction stream.
631   MachineInstr *MI = SU->getInstr();
632 
633   if (IsTopNode) {
634     assert(SU->isTopReady() && "node still has unscheduled dependencies");
635     if (&*CurrentTop == MI)
636       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
637     else {
638       moveInstruction(MI, CurrentTop);
639       TopRPTracker.setPos(MI);
640     }
641 
642     // Update top scheduled pressure.
643     TopRPTracker.advance();
644     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
645     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
646   }
647   else {
648     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
649     MachineBasicBlock::iterator priorII =
650       priorNonDebug(CurrentBottom, CurrentTop);
651     if (&*priorII == MI)
652       CurrentBottom = priorII;
653     else {
654       if (&*CurrentTop == MI) {
655         CurrentTop = nextIfDebug(++CurrentTop, priorII);
656         TopRPTracker.setPos(CurrentTop);
657       }
658       moveInstruction(MI, CurrentBottom);
659       CurrentBottom = MI;
660     }
661     // Update bottom scheduled pressure.
662     BotRPTracker.recede();
663     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
664     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
665   }
666 }
667 
668 /// Update scheduler queues after scheduling an instruction.
669 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
670   // Release dependent instructions for scheduling.
671   if (IsTopNode)
672     releaseSuccessors(SU);
673   else
674     releasePredecessors(SU);
675 
676   SU->isScheduled = true;
677 
678   if (DFSResult) {
679     unsigned SubtreeID = DFSResult->getSubtreeID(SU);
680     if (!ScheduledTrees.test(SubtreeID)) {
681       ScheduledTrees.set(SubtreeID);
682       DFSResult->scheduleTree(SubtreeID);
683       SchedImpl->scheduleTree(SubtreeID);
684     }
685   }
686 
687   // Notify the scheduling strategy after updating the DAG.
688   SchedImpl->schedNode(SU, IsTopNode);
689 }
690 
691 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
692 void ScheduleDAGMI::placeDebugValues() {
693   // If first instruction was a DBG_VALUE then put it back.
694   if (FirstDbgValue) {
695     BB->splice(RegionBegin, BB, FirstDbgValue);
696     RegionBegin = FirstDbgValue;
697   }
698 
699   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
700          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
701     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
702     MachineInstr *DbgValue = P.first;
703     MachineBasicBlock::iterator OrigPrevMI = P.second;
704     if (&*RegionBegin == DbgValue)
705       ++RegionBegin;
706     BB->splice(++OrigPrevMI, BB, DbgValue);
707     if (OrigPrevMI == llvm::prior(RegionEnd))
708       RegionEnd = DbgValue;
709   }
710   DbgValues.clear();
711   FirstDbgValue = NULL;
712 }
713 
714 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715 void ScheduleDAGMI::dumpSchedule() const {
716   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
717     if (SUnit *SU = getSUnit(&(*MI)))
718       SU->dump(this);
719     else
720       dbgs() << "Missing SUnit\n";
721   }
722 }
723 #endif
724 
725 //===----------------------------------------------------------------------===//
726 // LoadClusterMutation - DAG post-processing to cluster loads.
727 //===----------------------------------------------------------------------===//
728 
729 namespace {
730 /// \brief Post-process the DAG to create cluster edges between neighboring
731 /// loads.
732 class LoadClusterMutation : public ScheduleDAGMutation {
733   struct LoadInfo {
734     SUnit *SU;
735     unsigned BaseReg;
736     unsigned Offset;
737     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
738       : SU(su), BaseReg(reg), Offset(ofs) {}
739   };
740   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
741                            const LoadClusterMutation::LoadInfo &RHS);
742 
743   const TargetInstrInfo *TII;
744   const TargetRegisterInfo *TRI;
745 public:
746   LoadClusterMutation(const TargetInstrInfo *tii,
747                       const TargetRegisterInfo *tri)
748     : TII(tii), TRI(tri) {}
749 
750   virtual void apply(ScheduleDAGMI *DAG);
751 protected:
752   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
753 };
754 } // anonymous
755 
756 bool LoadClusterMutation::LoadInfoLess(
757   const LoadClusterMutation::LoadInfo &LHS,
758   const LoadClusterMutation::LoadInfo &RHS) {
759   if (LHS.BaseReg != RHS.BaseReg)
760     return LHS.BaseReg < RHS.BaseReg;
761   return LHS.Offset < RHS.Offset;
762 }
763 
764 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
765                                                   ScheduleDAGMI *DAG) {
766   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
767   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
768     SUnit *SU = Loads[Idx];
769     unsigned BaseReg;
770     unsigned Offset;
771     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
772       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
773   }
774   if (LoadRecords.size() < 2)
775     return;
776   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
777   unsigned ClusterLength = 1;
778   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
779     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
780       ClusterLength = 1;
781       continue;
782     }
783 
784     SUnit *SUa = LoadRecords[Idx].SU;
785     SUnit *SUb = LoadRecords[Idx+1].SU;
786     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
787         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
788 
789       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
790             << SUb->NodeNum << ")\n");
791       // Copy successor edges from SUa to SUb. Interleaving computation
792       // dependent on SUa can prevent load combining due to register reuse.
793       // Predecessor edges do not need to be copied from SUb to SUa since nearby
794       // loads should have effectively the same inputs.
795       for (SUnit::const_succ_iterator
796              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
797         if (SI->getSUnit() == SUb)
798           continue;
799         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
800         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
801       }
802       ++ClusterLength;
803     }
804     else
805       ClusterLength = 1;
806   }
807 }
808 
809 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
810 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
811   // Map DAG NodeNum to store chain ID.
812   DenseMap<unsigned, unsigned> StoreChainIDs;
813   // Map each store chain to a set of dependent loads.
814   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
815   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
816     SUnit *SU = &DAG->SUnits[Idx];
817     if (!SU->getInstr()->mayLoad())
818       continue;
819     unsigned ChainPredID = DAG->SUnits.size();
820     for (SUnit::const_pred_iterator
821            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
822       if (PI->isCtrl()) {
823         ChainPredID = PI->getSUnit()->NodeNum;
824         break;
825       }
826     }
827     // Check if this chain-like pred has been seen
828     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
829     unsigned NumChains = StoreChainDependents.size();
830     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
831       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
832     if (Result.second)
833       StoreChainDependents.resize(NumChains + 1);
834     StoreChainDependents[Result.first->second].push_back(SU);
835   }
836   // Iterate over the store chains.
837   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
838     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
839 }
840 
841 //===----------------------------------------------------------------------===//
842 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
843 //===----------------------------------------------------------------------===//
844 
845 namespace {
846 /// \brief Post-process the DAG to create cluster edges between instructions
847 /// that may be fused by the processor into a single operation.
848 class MacroFusion : public ScheduleDAGMutation {
849   const TargetInstrInfo *TII;
850 public:
851   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
852 
853   virtual void apply(ScheduleDAGMI *DAG);
854 };
855 } // anonymous
856 
857 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
858 /// fused operations.
859 void MacroFusion::apply(ScheduleDAGMI *DAG) {
860   // For now, assume targets can only fuse with the branch.
861   MachineInstr *Branch = DAG->ExitSU.getInstr();
862   if (!Branch)
863     return;
864 
865   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
866     SUnit *SU = &DAG->SUnits[--Idx];
867     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
868       continue;
869 
870     // Create a single weak edge from SU to ExitSU. The only effect is to cause
871     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
872     // need to copy predecessor edges from ExitSU to SU, since top-down
873     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
874     // of SU, we could create an artificial edge from the deepest root, but it
875     // hasn't been needed yet.
876     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
877     (void)Success;
878     assert(Success && "No DAG nodes should be reachable from ExitSU");
879 
880     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
881     break;
882   }
883 }
884 
885 //===----------------------------------------------------------------------===//
886 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
887 //===----------------------------------------------------------------------===//
888 
889 namespace {
890 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
891 /// the schedule.
892 class ConvergingScheduler : public MachineSchedStrategy {
893 public:
894   /// Represent the type of SchedCandidate found within a single queue.
895   /// pickNodeBidirectional depends on these listed by decreasing priority.
896   enum CandReason {
897     NoCand, SingleExcess, SingleCritical, Cluster,
898     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
899     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
900     NodeOrder};
901 
902 #ifndef NDEBUG
903   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
904 #endif
905 
906   /// Policy for scheduling the next instruction in the candidate's zone.
907   struct CandPolicy {
908     bool ReduceLatency;
909     unsigned ReduceResIdx;
910     unsigned DemandResIdx;
911 
912     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
913   };
914 
915   /// Status of an instruction's critical resource consumption.
916   struct SchedResourceDelta {
917     // Count critical resources in the scheduled region required by SU.
918     unsigned CritResources;
919 
920     // Count critical resources from another region consumed by SU.
921     unsigned DemandedResources;
922 
923     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
924 
925     bool operator==(const SchedResourceDelta &RHS) const {
926       return CritResources == RHS.CritResources
927         && DemandedResources == RHS.DemandedResources;
928     }
929     bool operator!=(const SchedResourceDelta &RHS) const {
930       return !operator==(RHS);
931     }
932   };
933 
934   /// Store the state used by ConvergingScheduler heuristics, required for the
935   /// lifetime of one invocation of pickNode().
936   struct SchedCandidate {
937     CandPolicy Policy;
938 
939     // The best SUnit candidate.
940     SUnit *SU;
941 
942     // The reason for this candidate.
943     CandReason Reason;
944 
945     // Register pressure values for the best candidate.
946     RegPressureDelta RPDelta;
947 
948     // Critical resource consumption of the best candidate.
949     SchedResourceDelta ResDelta;
950 
951     SchedCandidate(const CandPolicy &policy)
952     : Policy(policy), SU(NULL), Reason(NoCand) {}
953 
954     bool isValid() const { return SU; }
955 
956     // Copy the status of another candidate without changing policy.
957     void setBest(SchedCandidate &Best) {
958       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
959       SU = Best.SU;
960       Reason = Best.Reason;
961       RPDelta = Best.RPDelta;
962       ResDelta = Best.ResDelta;
963     }
964 
965     void initResourceDelta(const ScheduleDAGMI *DAG,
966                            const TargetSchedModel *SchedModel);
967   };
968 
969   /// Summarize the unscheduled region.
970   struct SchedRemainder {
971     // Critical path through the DAG in expected latency.
972     unsigned CriticalPath;
973 
974     // Unscheduled resources
975     SmallVector<unsigned, 16> RemainingCounts;
976     // Critical resource for the unscheduled zone.
977     unsigned CritResIdx;
978     // Number of micro-ops left to schedule.
979     unsigned RemainingMicroOps;
980 
981     void reset() {
982       CriticalPath = 0;
983       RemainingCounts.clear();
984       CritResIdx = 0;
985       RemainingMicroOps = 0;
986     }
987 
988     SchedRemainder() { reset(); }
989 
990     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
991 
992     unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
993       if (!SchedModel->hasInstrSchedModel())
994         return 0;
995 
996       return std::max(
997         RemainingMicroOps * SchedModel->getMicroOpFactor(),
998         RemainingCounts[CritResIdx]);
999     }
1000   };
1001 
1002   /// Each Scheduling boundary is associated with ready queues. It tracks the
1003   /// current cycle in the direction of movement, and maintains the state
1004   /// of "hazards" and other interlocks at the current cycle.
1005   struct SchedBoundary {
1006     ScheduleDAGMI *DAG;
1007     const TargetSchedModel *SchedModel;
1008     SchedRemainder *Rem;
1009 
1010     ReadyQueue Available;
1011     ReadyQueue Pending;
1012     bool CheckPending;
1013 
1014     // For heuristics, keep a list of the nodes that immediately depend on the
1015     // most recently scheduled node.
1016     SmallPtrSet<const SUnit*, 8> NextSUs;
1017 
1018     ScheduleHazardRecognizer *HazardRec;
1019 
1020     unsigned CurrCycle;
1021     unsigned IssueCount;
1022 
1023     /// MinReadyCycle - Cycle of the soonest available instruction.
1024     unsigned MinReadyCycle;
1025 
1026     // The expected latency of the critical path in this scheduled zone.
1027     unsigned ExpectedLatency;
1028 
1029     // Resources used in the scheduled zone beyond this boundary.
1030     SmallVector<unsigned, 16> ResourceCounts;
1031 
1032     // Cache the critical resources ID in this scheduled zone.
1033     unsigned CritResIdx;
1034 
1035     // Is the scheduled region resource limited vs. latency limited.
1036     bool IsResourceLimited;
1037 
1038     unsigned ExpectedCount;
1039 
1040 #ifndef NDEBUG
1041     // Remember the greatest min operand latency.
1042     unsigned MaxMinLatency;
1043 #endif
1044 
1045     void reset() {
1046       Available.clear();
1047       Pending.clear();
1048       CheckPending = false;
1049       NextSUs.clear();
1050       HazardRec = 0;
1051       CurrCycle = 0;
1052       IssueCount = 0;
1053       MinReadyCycle = UINT_MAX;
1054       ExpectedLatency = 0;
1055       ResourceCounts.resize(1);
1056       assert(!ResourceCounts[0] && "nonzero count for bad resource");
1057       CritResIdx = 0;
1058       IsResourceLimited = false;
1059       ExpectedCount = 0;
1060 #ifndef NDEBUG
1061       MaxMinLatency = 0;
1062 #endif
1063       // Reserve a zero-count for invalid CritResIdx.
1064       ResourceCounts.resize(1);
1065     }
1066 
1067     /// Pending queues extend the ready queues with the same ID and the
1068     /// PendingFlag set.
1069     SchedBoundary(unsigned ID, const Twine &Name):
1070       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1071       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
1072       reset();
1073     }
1074 
1075     ~SchedBoundary() { delete HazardRec; }
1076 
1077     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1078               SchedRemainder *rem);
1079 
1080     bool isTop() const {
1081       return Available.getID() == ConvergingScheduler::TopQID;
1082     }
1083 
1084     unsigned getUnscheduledLatency(SUnit *SU) const {
1085       if (isTop())
1086         return SU->getHeight();
1087       return SU->getDepth() + SU->Latency;
1088     }
1089 
1090     unsigned getCriticalCount() const {
1091       return ResourceCounts[CritResIdx];
1092     }
1093 
1094     bool checkHazard(SUnit *SU);
1095 
1096     void setLatencyPolicy(CandPolicy &Policy);
1097 
1098     void releaseNode(SUnit *SU, unsigned ReadyCycle);
1099 
1100     void bumpCycle();
1101 
1102     void countResource(unsigned PIdx, unsigned Cycles);
1103 
1104     void bumpNode(SUnit *SU);
1105 
1106     void releasePending();
1107 
1108     void removeReady(SUnit *SU);
1109 
1110     SUnit *pickOnlyChoice();
1111   };
1112 
1113 private:
1114   ScheduleDAGMI *DAG;
1115   const TargetSchedModel *SchedModel;
1116   const TargetRegisterInfo *TRI;
1117 
1118   // State of the top and bottom scheduled instruction boundaries.
1119   SchedRemainder Rem;
1120   SchedBoundary Top;
1121   SchedBoundary Bot;
1122 
1123 public:
1124   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1125   enum {
1126     TopQID = 1,
1127     BotQID = 2,
1128     LogMaxQID = 2
1129   };
1130 
1131   ConvergingScheduler():
1132     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1133 
1134   virtual void initialize(ScheduleDAGMI *dag);
1135 
1136   virtual SUnit *pickNode(bool &IsTopNode);
1137 
1138   virtual void schedNode(SUnit *SU, bool IsTopNode);
1139 
1140   virtual void releaseTopNode(SUnit *SU);
1141 
1142   virtual void releaseBottomNode(SUnit *SU);
1143 
1144   virtual void registerRoots();
1145 
1146 protected:
1147   void balanceZones(
1148     ConvergingScheduler::SchedBoundary &CriticalZone,
1149     ConvergingScheduler::SchedCandidate &CriticalCand,
1150     ConvergingScheduler::SchedBoundary &OppositeZone,
1151     ConvergingScheduler::SchedCandidate &OppositeCand);
1152 
1153   void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1154                            ConvergingScheduler::SchedCandidate &BotCand);
1155 
1156   void tryCandidate(SchedCandidate &Cand,
1157                     SchedCandidate &TryCand,
1158                     SchedBoundary &Zone,
1159                     const RegPressureTracker &RPTracker,
1160                     RegPressureTracker &TempTracker);
1161 
1162   SUnit *pickNodeBidirectional(bool &IsTopNode);
1163 
1164   void pickNodeFromQueue(SchedBoundary &Zone,
1165                          const RegPressureTracker &RPTracker,
1166                          SchedCandidate &Candidate);
1167 
1168 #ifndef NDEBUG
1169   void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1170 #endif
1171 };
1172 } // namespace
1173 
1174 void ConvergingScheduler::SchedRemainder::
1175 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1176   reset();
1177   if (!SchedModel->hasInstrSchedModel())
1178     return;
1179   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1180   for (std::vector<SUnit>::iterator
1181          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1182     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1183     RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1184     for (TargetSchedModel::ProcResIter
1185            PI = SchedModel->getWriteProcResBegin(SC),
1186            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1187       unsigned PIdx = PI->ProcResourceIdx;
1188       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1189       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1190     }
1191   }
1192   for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1193        PIdx != PEnd; ++PIdx) {
1194     if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1195         >= (int)SchedModel->getLatencyFactor()) {
1196       CritResIdx = PIdx;
1197     }
1198   }
1199 }
1200 
1201 void ConvergingScheduler::SchedBoundary::
1202 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1203   reset();
1204   DAG = dag;
1205   SchedModel = smodel;
1206   Rem = rem;
1207   if (SchedModel->hasInstrSchedModel())
1208     ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1209 }
1210 
1211 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1212   DAG = dag;
1213   SchedModel = DAG->getSchedModel();
1214   TRI = DAG->TRI;
1215   Rem.init(DAG, SchedModel);
1216   Top.init(DAG, SchedModel, &Rem);
1217   Bot.init(DAG, SchedModel, &Rem);
1218 
1219   DAG->initDFSResult();
1220 
1221   // Initialize resource counts.
1222 
1223   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1224   // are disabled, then these HazardRecs will be disabled.
1225   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1226   const TargetMachine &TM = DAG->MF.getTarget();
1227   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1228   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1229 
1230   assert((!ForceTopDown || !ForceBottomUp) &&
1231          "-misched-topdown incompatible with -misched-bottomup");
1232 }
1233 
1234 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1235   if (SU->isScheduled)
1236     return;
1237 
1238   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1239        I != E; ++I) {
1240     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1241     unsigned MinLatency = I->getMinLatency();
1242 #ifndef NDEBUG
1243     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1244 #endif
1245     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1246       SU->TopReadyCycle = PredReadyCycle + MinLatency;
1247   }
1248   Top.releaseNode(SU, SU->TopReadyCycle);
1249 }
1250 
1251 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1252   if (SU->isScheduled)
1253     return;
1254 
1255   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1256 
1257   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1258        I != E; ++I) {
1259     if (I->isWeak())
1260       continue;
1261     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1262     unsigned MinLatency = I->getMinLatency();
1263 #ifndef NDEBUG
1264     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1265 #endif
1266     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1267       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1268   }
1269   Bot.releaseNode(SU, SU->BotReadyCycle);
1270 }
1271 
1272 void ConvergingScheduler::registerRoots() {
1273   Rem.CriticalPath = DAG->ExitSU.getDepth();
1274   // Some roots may not feed into ExitSU. Check all of them in case.
1275   for (std::vector<SUnit*>::const_iterator
1276          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1277     if ((*I)->getDepth() > Rem.CriticalPath)
1278       Rem.CriticalPath = (*I)->getDepth();
1279   }
1280   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1281 
1282   DAG->computeDFSResult(Bot.Available.elements());
1283 }
1284 
1285 /// Does this SU have a hazard within the current instruction group.
1286 ///
1287 /// The scheduler supports two modes of hazard recognition. The first is the
1288 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1289 /// supports highly complicated in-order reservation tables
1290 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1291 ///
1292 /// The second is a streamlined mechanism that checks for hazards based on
1293 /// simple counters that the scheduler itself maintains. It explicitly checks
1294 /// for instruction dispatch limitations, including the number of micro-ops that
1295 /// can dispatch per cycle.
1296 ///
1297 /// TODO: Also check whether the SU must start a new group.
1298 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1299   if (HazardRec->isEnabled())
1300     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1301 
1302   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1303   if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1304     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1305           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1306     return true;
1307   }
1308   return false;
1309 }
1310 
1311 /// Compute the remaining latency to determine whether ILP should be increased.
1312 void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1313   // FIXME: compile time. In all, we visit four queues here one we should only
1314   // need to visit the one that was last popped if we cache the result.
1315   unsigned RemLatency = 0;
1316   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1317        I != E; ++I) {
1318     unsigned L = getUnscheduledLatency(*I);
1319     if (L > RemLatency)
1320       RemLatency = L;
1321   }
1322   for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1323        I != E; ++I) {
1324     unsigned L = getUnscheduledLatency(*I);
1325     if (L > RemLatency)
1326       RemLatency = L;
1327   }
1328   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1329   if (RemLatency + ExpectedLatency >= CriticalPathLimit
1330       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1331     Policy.ReduceLatency = true;
1332     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
1333   }
1334 }
1335 
1336 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1337                                                      unsigned ReadyCycle) {
1338 
1339   if (ReadyCycle < MinReadyCycle)
1340     MinReadyCycle = ReadyCycle;
1341 
1342   // Check for interlocks first. For the purpose of other heuristics, an
1343   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1344   if (ReadyCycle > CurrCycle || checkHazard(SU))
1345     Pending.push(SU);
1346   else
1347     Available.push(SU);
1348 
1349   // Record this node as an immediate dependent of the scheduled node.
1350   NextSUs.insert(SU);
1351 }
1352 
1353 /// Move the boundary of scheduled code by one cycle.
1354 void ConvergingScheduler::SchedBoundary::bumpCycle() {
1355   unsigned Width = SchedModel->getIssueWidth();
1356   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1357 
1358   unsigned NextCycle = CurrCycle + 1;
1359   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1360   if (MinReadyCycle > NextCycle) {
1361     IssueCount = 0;
1362     NextCycle = MinReadyCycle;
1363   }
1364 
1365   if (!HazardRec->isEnabled()) {
1366     // Bypass HazardRec virtual calls.
1367     CurrCycle = NextCycle;
1368   }
1369   else {
1370     // Bypass getHazardType calls in case of long latency.
1371     for (; CurrCycle != NextCycle; ++CurrCycle) {
1372       if (isTop())
1373         HazardRec->AdvanceCycle();
1374       else
1375         HazardRec->RecedeCycle();
1376     }
1377   }
1378   CheckPending = true;
1379   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1380 
1381   DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1382         << CurrCycle << '\n');
1383 }
1384 
1385 /// Add the given processor resource to this scheduled zone.
1386 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1387                                                        unsigned Cycles) {
1388   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1389   DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1390         << " +(" << Cycles << "x" << Factor
1391         << ") / " << SchedModel->getLatencyFactor() << '\n');
1392 
1393   unsigned Count = Factor * Cycles;
1394   ResourceCounts[PIdx] += Count;
1395   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1396   Rem->RemainingCounts[PIdx] -= Count;
1397 
1398   // Check if this resource exceeds the current critical resource by a full
1399   // cycle. If so, it becomes the critical resource.
1400   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1401       >= (int)SchedModel->getLatencyFactor()) {
1402     CritResIdx = PIdx;
1403     DEBUG(dbgs() << "  *** Critical resource "
1404           << SchedModel->getProcResource(PIdx)->Name << " x"
1405           << ResourceCounts[PIdx] << '\n');
1406   }
1407 }
1408 
1409 /// Move the boundary of scheduled code by one SUnit.
1410 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1411   // Update the reservation table.
1412   if (HazardRec->isEnabled()) {
1413     if (!isTop() && SU->isCall) {
1414       // Calls are scheduled with their preceding instructions. For bottom-up
1415       // scheduling, clear the pipeline state before emitting.
1416       HazardRec->Reset();
1417     }
1418     HazardRec->EmitInstruction(SU);
1419   }
1420   // Update resource counts and critical resource.
1421   if (SchedModel->hasInstrSchedModel()) {
1422     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1423     Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1424     for (TargetSchedModel::ProcResIter
1425            PI = SchedModel->getWriteProcResBegin(SC),
1426            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1427       countResource(PI->ProcResourceIdx, PI->Cycles);
1428     }
1429   }
1430   if (isTop()) {
1431     if (SU->getDepth() > ExpectedLatency)
1432       ExpectedLatency = SU->getDepth();
1433   }
1434   else {
1435     if (SU->getHeight() > ExpectedLatency)
1436       ExpectedLatency = SU->getHeight();
1437   }
1438 
1439   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1440 
1441   // Check the instruction group dispatch limit.
1442   // TODO: Check if this SU must end a dispatch group.
1443   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1444 
1445   // checkHazard prevents scheduling multiple instructions per cycle that exceed
1446   // issue width. However, we commonly reach the maximum. In this case
1447   // opportunistically bump the cycle to avoid uselessly checking everything in
1448   // the readyQ. Furthermore, a single instruction may produce more than one
1449   // cycle's worth of micro-ops.
1450   if (IssueCount >= SchedModel->getIssueWidth()) {
1451     DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1452     bumpCycle();
1453   }
1454 }
1455 
1456 /// Release pending ready nodes in to the available queue. This makes them
1457 /// visible to heuristics.
1458 void ConvergingScheduler::SchedBoundary::releasePending() {
1459   // If the available queue is empty, it is safe to reset MinReadyCycle.
1460   if (Available.empty())
1461     MinReadyCycle = UINT_MAX;
1462 
1463   // Check to see if any of the pending instructions are ready to issue.  If
1464   // so, add them to the available queue.
1465   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1466     SUnit *SU = *(Pending.begin()+i);
1467     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1468 
1469     if (ReadyCycle < MinReadyCycle)
1470       MinReadyCycle = ReadyCycle;
1471 
1472     if (ReadyCycle > CurrCycle)
1473       continue;
1474 
1475     if (checkHazard(SU))
1476       continue;
1477 
1478     Available.push(SU);
1479     Pending.remove(Pending.begin()+i);
1480     --i; --e;
1481   }
1482   DEBUG(if (!Pending.empty()) Pending.dump());
1483   CheckPending = false;
1484 }
1485 
1486 /// Remove SU from the ready set for this boundary.
1487 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1488   if (Available.isInQueue(SU))
1489     Available.remove(Available.find(SU));
1490   else {
1491     assert(Pending.isInQueue(SU) && "bad ready count");
1492     Pending.remove(Pending.find(SU));
1493   }
1494 }
1495 
1496 /// If this queue only has one ready candidate, return it. As a side effect,
1497 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1498 /// one node is ready. If multiple instructions are ready, return NULL.
1499 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1500   if (CheckPending)
1501     releasePending();
1502 
1503   if (IssueCount > 0) {
1504     // Defer any ready instrs that now have a hazard.
1505     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1506       if (checkHazard(*I)) {
1507         Pending.push(*I);
1508         I = Available.remove(I);
1509         continue;
1510       }
1511       ++I;
1512     }
1513   }
1514   for (unsigned i = 0; Available.empty(); ++i) {
1515     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1516            "permanent hazard"); (void)i;
1517     bumpCycle();
1518     releasePending();
1519   }
1520   if (Available.size() == 1)
1521     return *Available.begin();
1522   return NULL;
1523 }
1524 
1525 /// Record the candidate policy for opposite zones with different critical
1526 /// resources.
1527 ///
1528 /// If the CriticalZone is latency limited, don't force a policy for the
1529 /// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1530 void ConvergingScheduler::balanceZones(
1531   ConvergingScheduler::SchedBoundary &CriticalZone,
1532   ConvergingScheduler::SchedCandidate &CriticalCand,
1533   ConvergingScheduler::SchedBoundary &OppositeZone,
1534   ConvergingScheduler::SchedCandidate &OppositeCand) {
1535 
1536   if (!CriticalZone.IsResourceLimited)
1537     return;
1538   assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1539 
1540   SchedRemainder *Rem = CriticalZone.Rem;
1541 
1542   // If the critical zone is overconsuming a resource relative to the
1543   // remainder, try to reduce it.
1544   unsigned RemainingCritCount =
1545     Rem->RemainingCounts[CriticalZone.CritResIdx];
1546   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1547       > (int)SchedModel->getLatencyFactor()) {
1548     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1549     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1550           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1551           << '\n');
1552   }
1553   // If the other zone is underconsuming a resource relative to the full zone,
1554   // try to increase it.
1555   unsigned OppositeCount =
1556     OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1557   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1558       > (int)SchedModel->getLatencyFactor()) {
1559     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1560     DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1561           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1562           << '\n');
1563   }
1564 }
1565 
1566 /// Determine if the scheduled zones exceed resource limits or critical path and
1567 /// set each candidate's ReduceHeight policy accordingly.
1568 void ConvergingScheduler::checkResourceLimits(
1569   ConvergingScheduler::SchedCandidate &TopCand,
1570   ConvergingScheduler::SchedCandidate &BotCand) {
1571 
1572   // Set ReduceLatency to true if needed.
1573   Bot.setLatencyPolicy(BotCand.Policy);
1574   Top.setLatencyPolicy(TopCand.Policy);
1575 
1576   // Handle resource-limited regions.
1577   if (Top.IsResourceLimited && Bot.IsResourceLimited
1578       && Top.CritResIdx == Bot.CritResIdx) {
1579     // If the scheduled critical resource in both zones is no longer the
1580     // critical remaining resource, attempt to reduce resource height both ways.
1581     if (Top.CritResIdx != Rem.CritResIdx) {
1582       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1583       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1584       DEBUG(dbgs() << "Reduce scheduled "
1585             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1586     }
1587     return;
1588   }
1589   // Handle latency-limited regions.
1590   if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1591     // If the total scheduled expected latency exceeds the region's critical
1592     // path then reduce latency both ways.
1593     //
1594     // Just because a zone is not resource limited does not mean it is latency
1595     // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1596     // to exceed expected latency.
1597     if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1598         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1599       TopCand.Policy.ReduceLatency = true;
1600       BotCand.Policy.ReduceLatency = true;
1601       DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1602             << " + " << Bot.ExpectedLatency << '\n');
1603     }
1604     return;
1605   }
1606   // The critical resource is different in each zone, so request balancing.
1607 
1608   // Compute the cost of each zone.
1609   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1610   Top.ExpectedCount = std::max(
1611     Top.getCriticalCount(),
1612     Top.ExpectedCount * SchedModel->getLatencyFactor());
1613   Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1614   Bot.ExpectedCount = std::max(
1615     Bot.getCriticalCount(),
1616     Bot.ExpectedCount * SchedModel->getLatencyFactor());
1617 
1618   balanceZones(Top, TopCand, Bot, BotCand);
1619   balanceZones(Bot, BotCand, Top, TopCand);
1620 }
1621 
1622 void ConvergingScheduler::SchedCandidate::
1623 initResourceDelta(const ScheduleDAGMI *DAG,
1624                   const TargetSchedModel *SchedModel) {
1625   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1626     return;
1627 
1628   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1629   for (TargetSchedModel::ProcResIter
1630          PI = SchedModel->getWriteProcResBegin(SC),
1631          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1632     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1633       ResDelta.CritResources += PI->Cycles;
1634     if (PI->ProcResourceIdx == Policy.DemandResIdx)
1635       ResDelta.DemandedResources += PI->Cycles;
1636   }
1637 }
1638 
1639 /// Return true if this heuristic determines order.
1640 static bool tryLess(unsigned TryVal, unsigned CandVal,
1641                     ConvergingScheduler::SchedCandidate &TryCand,
1642                     ConvergingScheduler::SchedCandidate &Cand,
1643                     ConvergingScheduler::CandReason Reason) {
1644   if (TryVal < CandVal) {
1645     TryCand.Reason = Reason;
1646     return true;
1647   }
1648   if (TryVal > CandVal) {
1649     if (Cand.Reason > Reason)
1650       Cand.Reason = Reason;
1651     return true;
1652   }
1653   return false;
1654 }
1655 
1656 static bool tryGreater(unsigned TryVal, unsigned CandVal,
1657                        ConvergingScheduler::SchedCandidate &TryCand,
1658                        ConvergingScheduler::SchedCandidate &Cand,
1659                        ConvergingScheduler::CandReason Reason) {
1660   if (TryVal > CandVal) {
1661     TryCand.Reason = Reason;
1662     return true;
1663   }
1664   if (TryVal < CandVal) {
1665     if (Cand.Reason > Reason)
1666       Cand.Reason = Reason;
1667     return true;
1668   }
1669   return false;
1670 }
1671 
1672 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1673   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1674 }
1675 
1676 /// Apply a set of heursitics to a new candidate. Heuristics are currently
1677 /// hierarchical. This may be more efficient than a graduated cost model because
1678 /// we don't need to evaluate all aspects of the model for each node in the
1679 /// queue. But it's really done to make the heuristics easier to debug and
1680 /// statistically analyze.
1681 ///
1682 /// \param Cand provides the policy and current best candidate.
1683 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1684 /// \param Zone describes the scheduled zone that we are extending.
1685 /// \param RPTracker describes reg pressure within the scheduled zone.
1686 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
1687 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1688                                        SchedCandidate &TryCand,
1689                                        SchedBoundary &Zone,
1690                                        const RegPressureTracker &RPTracker,
1691                                        RegPressureTracker &TempTracker) {
1692 
1693   // Always initialize TryCand's RPDelta.
1694   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1695                                   DAG->getRegionCriticalPSets(),
1696                                   DAG->getRegPressure().MaxSetPressure);
1697 
1698   // Initialize the candidate if needed.
1699   if (!Cand.isValid()) {
1700     TryCand.Reason = NodeOrder;
1701     return;
1702   }
1703   // Avoid exceeding the target's limit.
1704   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1705               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1706     return;
1707   if (Cand.Reason == SingleExcess)
1708     Cand.Reason = MultiPressure;
1709 
1710   // Avoid increasing the max critical pressure in the scheduled region.
1711   if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1712               Cand.RPDelta.CriticalMax.UnitIncrease,
1713               TryCand, Cand, SingleCritical))
1714     return;
1715   if (Cand.Reason == SingleCritical)
1716     Cand.Reason = MultiPressure;
1717 
1718   // Keep clustered nodes together to encourage downstream peephole
1719   // optimizations which may reduce resource requirements.
1720   //
1721   // This is a best effort to set things up for a post-RA pass. Optimizations
1722   // like generating loads of multiple registers should ideally be done within
1723   // the scheduler pass by combining the loads during DAG postprocessing.
1724   const SUnit *NextClusterSU =
1725     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1726   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1727                  TryCand, Cand, Cluster))
1728     return;
1729   // Currently, weak edges are for clustering, so we hard-code that reason.
1730   // However, deferring the current TryCand will not change Cand's reason.
1731   CandReason OrigReason = Cand.Reason;
1732   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1733               getWeakLeft(Cand.SU, Zone.isTop()),
1734               TryCand, Cand, Cluster)) {
1735     Cand.Reason = OrigReason;
1736     return;
1737   }
1738   // Avoid critical resource consumption and balance the schedule.
1739   TryCand.initResourceDelta(DAG, SchedModel);
1740   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1741               TryCand, Cand, ResourceReduce))
1742     return;
1743   if (tryGreater(TryCand.ResDelta.DemandedResources,
1744                  Cand.ResDelta.DemandedResources,
1745                  TryCand, Cand, ResourceDemand))
1746     return;
1747 
1748   // Avoid serializing long latency dependence chains.
1749   if (Cand.Policy.ReduceLatency) {
1750     if (Zone.isTop()) {
1751       if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1752           > Zone.ExpectedCount) {
1753         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1754                     TryCand, Cand, TopDepthReduce))
1755           return;
1756       }
1757       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1758                      TryCand, Cand, TopPathReduce))
1759         return;
1760     }
1761     else {
1762       if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1763           > Zone.ExpectedCount) {
1764         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1765                     TryCand, Cand, BotHeightReduce))
1766           return;
1767       }
1768       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1769                      TryCand, Cand, BotPathReduce))
1770         return;
1771     }
1772   }
1773 
1774   // Avoid increasing the max pressure of the entire region.
1775   if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1776               Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1777     return;
1778   if (Cand.Reason == SingleMax)
1779     Cand.Reason = MultiPressure;
1780 
1781   // Prefer immediate defs/users of the last scheduled instruction. This is a
1782   // nice pressure avoidance strategy that also conserves the processor's
1783   // register renaming resources and keeps the machine code readable.
1784   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1785                  TryCand, Cand, NextDefUse))
1786     return;
1787 
1788   // Fall through to original instruction order.
1789   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1790       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1791     TryCand.Reason = NodeOrder;
1792   }
1793 }
1794 
1795 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1796 /// more desirable than RHS from scheduling standpoint.
1797 static bool compareRPDelta(const RegPressureDelta &LHS,
1798                            const RegPressureDelta &RHS) {
1799   // Compare each component of pressure in decreasing order of importance
1800   // without checking if any are valid. Invalid PressureElements are assumed to
1801   // have UnitIncrease==0, so are neutral.
1802 
1803   // Avoid increasing the max critical pressure in the scheduled region.
1804   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1805     DEBUG(dbgs() << "RP excess top - bot: "
1806           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1807     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1808   }
1809   // Avoid increasing the max critical pressure in the scheduled region.
1810   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1811     DEBUG(dbgs() << "RP critical top - bot: "
1812           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1813           << '\n');
1814     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1815   }
1816   // Avoid increasing the max pressure of the entire region.
1817   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1818     DEBUG(dbgs() << "RP current top - bot: "
1819           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1820           << '\n');
1821     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1822   }
1823   return false;
1824 }
1825 
1826 #ifndef NDEBUG
1827 const char *ConvergingScheduler::getReasonStr(
1828   ConvergingScheduler::CandReason Reason) {
1829   switch (Reason) {
1830   case NoCand:         return "NOCAND    ";
1831   case SingleExcess:   return "REG-EXCESS";
1832   case SingleCritical: return "REG-CRIT  ";
1833   case Cluster:        return "CLUSTER   ";
1834   case SingleMax:      return "REG-MAX   ";
1835   case MultiPressure:  return "REG-MULTI ";
1836   case ResourceReduce: return "RES-REDUCE";
1837   case ResourceDemand: return "RES-DEMAND";
1838   case TopDepthReduce: return "TOP-DEPTH ";
1839   case TopPathReduce:  return "TOP-PATH  ";
1840   case BotHeightReduce:return "BOT-HEIGHT";
1841   case BotPathReduce:  return "BOT-PATH  ";
1842   case NextDefUse:     return "DEF-USE   ";
1843   case NodeOrder:      return "ORDER     ";
1844   };
1845   llvm_unreachable("Unknown reason!");
1846 }
1847 
1848 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1849                                          const SchedBoundary &Zone) {
1850   const char *Label = getReasonStr(Cand.Reason);
1851   PressureElement P;
1852   unsigned ResIdx = 0;
1853   unsigned Latency = 0;
1854   switch (Cand.Reason) {
1855   default:
1856     break;
1857   case SingleExcess:
1858     P = Cand.RPDelta.Excess;
1859     break;
1860   case SingleCritical:
1861     P = Cand.RPDelta.CriticalMax;
1862     break;
1863   case SingleMax:
1864     P = Cand.RPDelta.CurrentMax;
1865     break;
1866   case ResourceReduce:
1867     ResIdx = Cand.Policy.ReduceResIdx;
1868     break;
1869   case ResourceDemand:
1870     ResIdx = Cand.Policy.DemandResIdx;
1871     break;
1872   case TopDepthReduce:
1873     Latency = Cand.SU->getDepth();
1874     break;
1875   case TopPathReduce:
1876     Latency = Cand.SU->getHeight();
1877     break;
1878   case BotHeightReduce:
1879     Latency = Cand.SU->getHeight();
1880     break;
1881   case BotPathReduce:
1882     Latency = Cand.SU->getDepth();
1883     break;
1884   }
1885   dbgs() << Label << " " << Zone.Available.getName() << " ";
1886   if (P.isValid())
1887     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1888            << " ";
1889   else
1890     dbgs() << "     ";
1891   if (ResIdx)
1892     dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1893   else
1894     dbgs() << "        ";
1895   if (Latency)
1896     dbgs() << Latency << " cycles ";
1897   else
1898     dbgs() << "         ";
1899   Cand.SU->dump(DAG);
1900 }
1901 #endif
1902 
1903 /// Pick the best candidate from the top queue.
1904 ///
1905 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1906 /// DAG building. To adjust for the current scheduling location we need to
1907 /// maintain the number of vreg uses remaining to be top-scheduled.
1908 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1909                                             const RegPressureTracker &RPTracker,
1910                                             SchedCandidate &Cand) {
1911   ReadyQueue &Q = Zone.Available;
1912 
1913   DEBUG(Q.dump());
1914 
1915   // getMaxPressureDelta temporarily modifies the tracker.
1916   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1917 
1918   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1919 
1920     SchedCandidate TryCand(Cand.Policy);
1921     TryCand.SU = *I;
1922     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1923     if (TryCand.Reason != NoCand) {
1924       // Initialize resource delta if needed in case future heuristics query it.
1925       if (TryCand.ResDelta == SchedResourceDelta())
1926         TryCand.initResourceDelta(DAG, SchedModel);
1927       Cand.setBest(TryCand);
1928       DEBUG(traceCandidate(Cand, Zone));
1929     }
1930   }
1931 }
1932 
1933 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1934                       bool IsTop) {
1935   DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1936         << " SU(" << Cand.SU->NodeNum << ") "
1937         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1938 }
1939 
1940 /// Pick the best candidate node from either the top or bottom queue.
1941 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1942   // Schedule as far as possible in the direction of no choice. This is most
1943   // efficient, but also provides the best heuristics for CriticalPSets.
1944   if (SUnit *SU = Bot.pickOnlyChoice()) {
1945     IsTopNode = false;
1946     return SU;
1947   }
1948   if (SUnit *SU = Top.pickOnlyChoice()) {
1949     IsTopNode = true;
1950     return SU;
1951   }
1952   CandPolicy NoPolicy;
1953   SchedCandidate BotCand(NoPolicy);
1954   SchedCandidate TopCand(NoPolicy);
1955   checkResourceLimits(TopCand, BotCand);
1956 
1957   // Prefer bottom scheduling when heuristics are silent.
1958   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1959   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1960 
1961   // If either Q has a single candidate that provides the least increase in
1962   // Excess pressure, we can immediately schedule from that Q.
1963   //
1964   // RegionCriticalPSets summarizes the pressure within the scheduled region and
1965   // affects picking from either Q. If scheduling in one direction must
1966   // increase pressure for one of the excess PSets, then schedule in that
1967   // direction first to provide more freedom in the other direction.
1968   if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1969     IsTopNode = false;
1970     tracePick(BotCand, IsTopNode);
1971     return BotCand.SU;
1972   }
1973   // Check if the top Q has a better candidate.
1974   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1975   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1976 
1977   // If either Q has a single candidate that minimizes pressure above the
1978   // original region's pressure pick it.
1979   if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1980     if (TopCand.Reason < BotCand.Reason) {
1981       IsTopNode = true;
1982       tracePick(TopCand, IsTopNode);
1983       return TopCand.SU;
1984     }
1985     IsTopNode = false;
1986     tracePick(BotCand, IsTopNode);
1987     return BotCand.SU;
1988   }
1989   // Check for a salient pressure difference and pick the best from either side.
1990   if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1991     IsTopNode = true;
1992     tracePick(TopCand, IsTopNode);
1993     return TopCand.SU;
1994   }
1995   // Otherwise prefer the bottom candidate, in node order if all else failed.
1996   if (TopCand.Reason < BotCand.Reason) {
1997     IsTopNode = true;
1998     tracePick(TopCand, IsTopNode);
1999     return TopCand.SU;
2000   }
2001   IsTopNode = false;
2002   tracePick(BotCand, IsTopNode);
2003   return BotCand.SU;
2004 }
2005 
2006 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2007 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2008   if (DAG->top() == DAG->bottom()) {
2009     assert(Top.Available.empty() && Top.Pending.empty() &&
2010            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2011     return NULL;
2012   }
2013   SUnit *SU;
2014   do {
2015     if (ForceTopDown) {
2016       SU = Top.pickOnlyChoice();
2017       if (!SU) {
2018         CandPolicy NoPolicy;
2019         SchedCandidate TopCand(NoPolicy);
2020         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2021         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2022         SU = TopCand.SU;
2023       }
2024       IsTopNode = true;
2025     }
2026     else if (ForceBottomUp) {
2027       SU = Bot.pickOnlyChoice();
2028       if (!SU) {
2029         CandPolicy NoPolicy;
2030         SchedCandidate BotCand(NoPolicy);
2031         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2032         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2033         SU = BotCand.SU;
2034       }
2035       IsTopNode = false;
2036     }
2037     else {
2038       SU = pickNodeBidirectional(IsTopNode);
2039     }
2040   } while (SU->isScheduled);
2041 
2042   if (SU->isTopReady())
2043     Top.removeReady(SU);
2044   if (SU->isBottomReady())
2045     Bot.removeReady(SU);
2046 
2047   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2048         << " Scheduling Instruction in cycle "
2049         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2050         SU->dump(DAG));
2051   return SU;
2052 }
2053 
2054 /// Update the scheduler's state after scheduling a node. This is the same node
2055 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2056 /// it's state based on the current cycle before MachineSchedStrategy does.
2057 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2058   if (IsTopNode) {
2059     SU->TopReadyCycle = Top.CurrCycle;
2060     Top.bumpNode(SU);
2061   }
2062   else {
2063     SU->BotReadyCycle = Bot.CurrCycle;
2064     Bot.bumpNode(SU);
2065   }
2066 }
2067 
2068 /// Create the standard converging machine scheduler. This will be used as the
2069 /// default scheduler if the target does not set a default.
2070 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2071   assert((!ForceTopDown || !ForceBottomUp) &&
2072          "-misched-topdown incompatible with -misched-bottomup");
2073   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2074   // Register DAG post-processors.
2075   if (EnableLoadCluster)
2076     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2077   if (EnableMacroFusion)
2078     DAG->addMutation(new MacroFusion(DAG->TII));
2079   return DAG;
2080 }
2081 static MachineSchedRegistry
2082 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2083                         createConvergingSched);
2084 
2085 //===----------------------------------------------------------------------===//
2086 // ILP Scheduler. Currently for experimental analysis of heuristics.
2087 //===----------------------------------------------------------------------===//
2088 
2089 namespace {
2090 /// \brief Order nodes by the ILP metric.
2091 struct ILPOrder {
2092   const SchedDFSResult *DFSResult;
2093   const BitVector *ScheduledTrees;
2094   bool MaximizeILP;
2095 
2096   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2097 
2098   /// \brief Apply a less-than relation on node priority.
2099   ///
2100   /// (Return true if A comes after B in the Q.)
2101   bool operator()(const SUnit *A, const SUnit *B) const {
2102     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2103     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2104     if (SchedTreeA != SchedTreeB) {
2105       // Unscheduled trees have lower priority.
2106       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2107         return ScheduledTrees->test(SchedTreeB);
2108 
2109       // Trees with shallower connections have have lower priority.
2110       if (DFSResult->getSubtreeLevel(SchedTreeA)
2111           != DFSResult->getSubtreeLevel(SchedTreeB)) {
2112         return DFSResult->getSubtreeLevel(SchedTreeA)
2113           < DFSResult->getSubtreeLevel(SchedTreeB);
2114       }
2115     }
2116     if (MaximizeILP)
2117       return DFSResult->getILP(A) < DFSResult->getILP(B);
2118     else
2119       return DFSResult->getILP(A) > DFSResult->getILP(B);
2120   }
2121 };
2122 
2123 /// \brief Schedule based on the ILP metric.
2124 class ILPScheduler : public MachineSchedStrategy {
2125   /// In case all subtrees are eventually connected to a common root through
2126   /// data dependence (e.g. reduction), place an upper limit on their size.
2127   ///
2128   /// FIXME: A subtree limit is generally good, but in the situation commented
2129   /// above, where multiple similar subtrees feed a common root, we should
2130   /// only split at a point where the resulting subtrees will be balanced.
2131   /// (a motivating test case must be found).
2132   static const unsigned SubtreeLimit = 16;
2133 
2134   ScheduleDAGMI *DAG;
2135   ILPOrder Cmp;
2136 
2137   std::vector<SUnit*> ReadyQ;
2138 public:
2139   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2140 
2141   virtual void initialize(ScheduleDAGMI *dag) {
2142     DAG = dag;
2143     DAG->initDFSResult();
2144     Cmp.DFSResult = DAG->getDFSResult();
2145     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2146     ReadyQ.clear();
2147   }
2148 
2149   virtual void registerRoots() {
2150     DAG->computeDFSResult(ReadyQ);
2151     // Restore the heap in ReadyQ with the updated DFS results.
2152     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2153   }
2154 
2155   /// Implement MachineSchedStrategy interface.
2156   /// -----------------------------------------
2157 
2158   /// Callback to select the highest priority node from the ready Q.
2159   virtual SUnit *pickNode(bool &IsTopNode) {
2160     if (ReadyQ.empty()) return NULL;
2161     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2162     SUnit *SU = ReadyQ.back();
2163     ReadyQ.pop_back();
2164     IsTopNode = false;
2165     DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2166           << *SU->getInstr()
2167           << " ILP: " << DAG->getDFSResult()->getILP(SU)
2168           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2169           << DAG->getDFSResult()->getSubtreeLevel(
2170             DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
2171     return SU;
2172   }
2173 
2174   /// \brief Scheduler callback to notify that a new subtree is scheduled.
2175   virtual void scheduleTree(unsigned SubtreeID) {
2176     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2177   }
2178 
2179   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2180   /// DFSResults, and resort the priority Q.
2181   virtual void schedNode(SUnit *SU, bool IsTopNode) {
2182     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2183   }
2184 
2185   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2186 
2187   virtual void releaseBottomNode(SUnit *SU) {
2188     ReadyQ.push_back(SU);
2189     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2190   }
2191 };
2192 } // namespace
2193 
2194 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2195   return new ScheduleDAGMI(C, new ILPScheduler(true));
2196 }
2197 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2198   return new ScheduleDAGMI(C, new ILPScheduler(false));
2199 }
2200 static MachineSchedRegistry ILPMaxRegistry(
2201   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2202 static MachineSchedRegistry ILPMinRegistry(
2203   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2204 
2205 //===----------------------------------------------------------------------===//
2206 // Machine Instruction Shuffler for Correctness Testing
2207 //===----------------------------------------------------------------------===//
2208 
2209 #ifndef NDEBUG
2210 namespace {
2211 /// Apply a less-than relation on the node order, which corresponds to the
2212 /// instruction order prior to scheduling. IsReverse implements greater-than.
2213 template<bool IsReverse>
2214 struct SUnitOrder {
2215   bool operator()(SUnit *A, SUnit *B) const {
2216     if (IsReverse)
2217       return A->NodeNum > B->NodeNum;
2218     else
2219       return A->NodeNum < B->NodeNum;
2220   }
2221 };
2222 
2223 /// Reorder instructions as much as possible.
2224 class InstructionShuffler : public MachineSchedStrategy {
2225   bool IsAlternating;
2226   bool IsTopDown;
2227 
2228   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2229   // gives nodes with a higher number higher priority causing the latest
2230   // instructions to be scheduled first.
2231   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2232     TopQ;
2233   // When scheduling bottom-up, use greater-than as the queue priority.
2234   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2235     BottomQ;
2236 public:
2237   InstructionShuffler(bool alternate, bool topdown)
2238     : IsAlternating(alternate), IsTopDown(topdown) {}
2239 
2240   virtual void initialize(ScheduleDAGMI *) {
2241     TopQ.clear();
2242     BottomQ.clear();
2243   }
2244 
2245   /// Implement MachineSchedStrategy interface.
2246   /// -----------------------------------------
2247 
2248   virtual SUnit *pickNode(bool &IsTopNode) {
2249     SUnit *SU;
2250     if (IsTopDown) {
2251       do {
2252         if (TopQ.empty()) return NULL;
2253         SU = TopQ.top();
2254         TopQ.pop();
2255       } while (SU->isScheduled);
2256       IsTopNode = true;
2257     }
2258     else {
2259       do {
2260         if (BottomQ.empty()) return NULL;
2261         SU = BottomQ.top();
2262         BottomQ.pop();
2263       } while (SU->isScheduled);
2264       IsTopNode = false;
2265     }
2266     if (IsAlternating)
2267       IsTopDown = !IsTopDown;
2268     return SU;
2269   }
2270 
2271   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2272 
2273   virtual void releaseTopNode(SUnit *SU) {
2274     TopQ.push(SU);
2275   }
2276   virtual void releaseBottomNode(SUnit *SU) {
2277     BottomQ.push(SU);
2278   }
2279 };
2280 } // namespace
2281 
2282 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2283   bool Alternate = !ForceTopDown && !ForceBottomUp;
2284   bool TopDown = !ForceBottomUp;
2285   assert((TopDown || !ForceTopDown) &&
2286          "-misched-topdown incompatible with -misched-bottomup");
2287   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2288 }
2289 static MachineSchedRegistry ShufflerRegistry(
2290   "shuffle", "Shuffle machine instructions alternating directions",
2291   createInstructionShuffler);
2292 #endif // !NDEBUG
2293