xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision ce3ddd2de4c5dbd5a7a68b51ea38f96cf7fbf3aa)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/ADT/PriorityQueue.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/CodeGen/LiveInterval.h"
24 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineLoopInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachinePassRegistry.h"
33 #include "llvm/CodeGen/RegisterPressure.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/MachineScheduler.h"
36 #include "llvm/CodeGen/MachineValueType.h"
37 #include "llvm/CodeGen/Passes.h"
38 #include "llvm/CodeGen/RegisterClassInfo.h"
39 #include "llvm/CodeGen/ScheduleDAG.h"
40 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
41 #include "llvm/CodeGen/ScheduleDAGMutation.h"
42 #include "llvm/CodeGen/ScheduleDFS.h"
43 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
44 #include "llvm/CodeGen/SlotIndexes.h"
45 #include "llvm/CodeGen/TargetPassConfig.h"
46 #include "llvm/CodeGen/TargetSchedule.h"
47 #include "llvm/MC/LaneBitmask.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/GraphWriter.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Target/TargetInstrInfo.h"
56 #include "llvm/Target/TargetLowering.h"
57 #include "llvm/Target/TargetRegisterInfo.h"
58 #include "llvm/Target/TargetSubtargetInfo.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <limits>
64 #include <memory>
65 #include <string>
66 #include <tuple>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "misched"
73 
74 namespace llvm {
75 
76 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
77                            cl::desc("Force top-down list scheduling"));
78 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
79                             cl::desc("Force bottom-up list scheduling"));
80 cl::opt<bool>
81 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
82                        cl::desc("Print critical path length to stdout"));
83 
84 } // end namespace llvm
85 
86 #ifndef NDEBUG
87 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
88   cl::desc("Pop up a window to show MISched dags after they are processed"));
89 
90 /// In some situations a few uninteresting nodes depend on nearly all other
91 /// nodes in the graph, provide a cutoff to hide them.
92 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
93   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
94 
95 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
96   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
97 
98 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
99   cl::desc("Only schedule this function"));
100 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
101   cl::desc("Only schedule this MBB#"));
102 #else
103 static bool ViewMISchedDAGs = false;
104 #endif // NDEBUG
105 
106 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
107 /// size of the ready lists.
108 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
109   cl::desc("Limit ready list to N instructions"), cl::init(256));
110 
111 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
112   cl::desc("Enable register pressure scheduling."), cl::init(true));
113 
114 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
115   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
116 
117 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
118                                         cl::desc("Enable memop clustering."),
119                                         cl::init(true));
120 
121 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
122   cl::desc("Verify machine instrs before and after machine scheduling"));
123 
124 // DAG subtrees must have at least this many nodes.
125 static const unsigned MinSubtreeSize = 8;
126 
127 // Pin the vtables to this file.
128 void MachineSchedStrategy::anchor() {}
129 
130 void ScheduleDAGMutation::anchor() {}
131 
132 //===----------------------------------------------------------------------===//
133 // Machine Instruction Scheduling Pass and Registry
134 //===----------------------------------------------------------------------===//
135 
136 MachineSchedContext::MachineSchedContext() {
137   RegClassInfo = new RegisterClassInfo();
138 }
139 
140 MachineSchedContext::~MachineSchedContext() {
141   delete RegClassInfo;
142 }
143 
144 namespace {
145 
146 /// Base class for a machine scheduler class that can run at any point.
147 class MachineSchedulerBase : public MachineSchedContext,
148                              public MachineFunctionPass {
149 public:
150   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
151 
152   void print(raw_ostream &O, const Module* = nullptr) const override;
153 
154 protected:
155   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
156 };
157 
158 /// MachineScheduler runs after coalescing and before register allocation.
159 class MachineScheduler : public MachineSchedulerBase {
160 public:
161   MachineScheduler();
162 
163   void getAnalysisUsage(AnalysisUsage &AU) const override;
164 
165   bool runOnMachineFunction(MachineFunction&) override;
166 
167   static char ID; // Class identification, replacement for typeinfo
168 
169 protected:
170   ScheduleDAGInstrs *createMachineScheduler();
171 };
172 
173 /// PostMachineScheduler runs after shortly before code emission.
174 class PostMachineScheduler : public MachineSchedulerBase {
175 public:
176   PostMachineScheduler();
177 
178   void getAnalysisUsage(AnalysisUsage &AU) const override;
179 
180   bool runOnMachineFunction(MachineFunction&) override;
181 
182   static char ID; // Class identification, replacement for typeinfo
183 
184 protected:
185   ScheduleDAGInstrs *createPostMachineScheduler();
186 };
187 
188 } // end anonymous namespace
189 
190 char MachineScheduler::ID = 0;
191 
192 char &llvm::MachineSchedulerID = MachineScheduler::ID;
193 
194 INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
195                       "Machine Instruction Scheduler", false, false)
196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
198 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
199 INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
200                     "Machine Instruction Scheduler", false, false)
201 
202 MachineScheduler::MachineScheduler()
203 : MachineSchedulerBase(ID) {
204   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
205 }
206 
207 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
208   AU.setPreservesCFG();
209   AU.addRequiredID(MachineDominatorsID);
210   AU.addRequired<MachineLoopInfo>();
211   AU.addRequired<AAResultsWrapperPass>();
212   AU.addRequired<TargetPassConfig>();
213   AU.addRequired<SlotIndexes>();
214   AU.addPreserved<SlotIndexes>();
215   AU.addRequired<LiveIntervals>();
216   AU.addPreserved<LiveIntervals>();
217   MachineFunctionPass::getAnalysisUsage(AU);
218 }
219 
220 char PostMachineScheduler::ID = 0;
221 
222 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
223 
224 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
225                 "PostRA Machine Instruction Scheduler", false, false)
226 
227 PostMachineScheduler::PostMachineScheduler()
228 : MachineSchedulerBase(ID) {
229   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
230 }
231 
232 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
233   AU.setPreservesCFG();
234   AU.addRequiredID(MachineDominatorsID);
235   AU.addRequired<MachineLoopInfo>();
236   AU.addRequired<TargetPassConfig>();
237   MachineFunctionPass::getAnalysisUsage(AU);
238 }
239 
240 MachinePassRegistry MachineSchedRegistry::Registry;
241 
242 /// A dummy default scheduler factory indicates whether the scheduler
243 /// is overridden on the command line.
244 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
245   return nullptr;
246 }
247 
248 /// MachineSchedOpt allows command line selection of the scheduler.
249 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
250                RegisterPassParser<MachineSchedRegistry>>
251 MachineSchedOpt("misched",
252                 cl::init(&useDefaultMachineSched), cl::Hidden,
253                 cl::desc("Machine instruction scheduler to use"));
254 
255 static MachineSchedRegistry
256 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
257                      useDefaultMachineSched);
258 
259 static cl::opt<bool> EnableMachineSched(
260     "enable-misched",
261     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
262     cl::Hidden);
263 
264 static cl::opt<bool> EnablePostRAMachineSched(
265     "enable-post-misched",
266     cl::desc("Enable the post-ra machine instruction scheduling pass."),
267     cl::init(true), cl::Hidden);
268 
269 /// Decrement this iterator until reaching the top or a non-debug instr.
270 static MachineBasicBlock::const_iterator
271 priorNonDebug(MachineBasicBlock::const_iterator I,
272               MachineBasicBlock::const_iterator Beg) {
273   assert(I != Beg && "reached the top of the region, cannot decrement");
274   while (--I != Beg) {
275     if (!I->isDebugValue())
276       break;
277   }
278   return I;
279 }
280 
281 /// Non-const version.
282 static MachineBasicBlock::iterator
283 priorNonDebug(MachineBasicBlock::iterator I,
284               MachineBasicBlock::const_iterator Beg) {
285   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
286       .getNonConstIterator();
287 }
288 
289 /// If this iterator is a debug value, increment until reaching the End or a
290 /// non-debug instruction.
291 static MachineBasicBlock::const_iterator
292 nextIfDebug(MachineBasicBlock::const_iterator I,
293             MachineBasicBlock::const_iterator End) {
294   for(; I != End; ++I) {
295     if (!I->isDebugValue())
296       break;
297   }
298   return I;
299 }
300 
301 /// Non-const version.
302 static MachineBasicBlock::iterator
303 nextIfDebug(MachineBasicBlock::iterator I,
304             MachineBasicBlock::const_iterator End) {
305   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
306       .getNonConstIterator();
307 }
308 
309 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
310 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
311   // Select the scheduler, or set the default.
312   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
313   if (Ctor != useDefaultMachineSched)
314     return Ctor(this);
315 
316   // Get the default scheduler set by the target for this function.
317   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
318   if (Scheduler)
319     return Scheduler;
320 
321   // Default to GenericScheduler.
322   return createGenericSchedLive(this);
323 }
324 
325 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
326 /// the caller. We don't have a command line option to override the postRA
327 /// scheduler. The Target must configure it.
328 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
329   // Get the postRA scheduler set by the target for this function.
330   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
331   if (Scheduler)
332     return Scheduler;
333 
334   // Default to GenericScheduler.
335   return createGenericSchedPostRA(this);
336 }
337 
338 /// Top-level MachineScheduler pass driver.
339 ///
340 /// Visit blocks in function order. Divide each block into scheduling regions
341 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
342 /// consistent with the DAG builder, which traverses the interior of the
343 /// scheduling regions bottom-up.
344 ///
345 /// This design avoids exposing scheduling boundaries to the DAG builder,
346 /// simplifying the DAG builder's support for "special" target instructions.
347 /// At the same time the design allows target schedulers to operate across
348 /// scheduling boundaries, for example to bundle the boudary instructions
349 /// without reordering them. This creates complexity, because the target
350 /// scheduler must update the RegionBegin and RegionEnd positions cached by
351 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
352 /// design would be to split blocks at scheduling boundaries, but LLVM has a
353 /// general bias against block splitting purely for implementation simplicity.
354 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
355   if (skipFunction(*mf.getFunction()))
356     return false;
357 
358   if (EnableMachineSched.getNumOccurrences()) {
359     if (!EnableMachineSched)
360       return false;
361   } else if (!mf.getSubtarget().enableMachineScheduler())
362     return false;
363 
364   DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
365 
366   // Initialize the context of the pass.
367   MF = &mf;
368   MLI = &getAnalysis<MachineLoopInfo>();
369   MDT = &getAnalysis<MachineDominatorTree>();
370   PassConfig = &getAnalysis<TargetPassConfig>();
371   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
372 
373   LIS = &getAnalysis<LiveIntervals>();
374 
375   if (VerifyScheduling) {
376     DEBUG(LIS->dump());
377     MF->verify(this, "Before machine scheduling.");
378   }
379   RegClassInfo->runOnMachineFunction(*MF);
380 
381   // Instantiate the selected scheduler for this target, function, and
382   // optimization level.
383   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
384   scheduleRegions(*Scheduler, false);
385 
386   DEBUG(LIS->dump());
387   if (VerifyScheduling)
388     MF->verify(this, "After machine scheduling.");
389   return true;
390 }
391 
392 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
393   if (skipFunction(*mf.getFunction()))
394     return false;
395 
396   if (EnablePostRAMachineSched.getNumOccurrences()) {
397     if (!EnablePostRAMachineSched)
398       return false;
399   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
400     DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
401     return false;
402   }
403   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
404 
405   // Initialize the context of the pass.
406   MF = &mf;
407   PassConfig = &getAnalysis<TargetPassConfig>();
408 
409   if (VerifyScheduling)
410     MF->verify(this, "Before post machine scheduling.");
411 
412   // Instantiate the selected scheduler for this target, function, and
413   // optimization level.
414   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
415   scheduleRegions(*Scheduler, true);
416 
417   if (VerifyScheduling)
418     MF->verify(this, "After post machine scheduling.");
419   return true;
420 }
421 
422 /// Return true of the given instruction should not be included in a scheduling
423 /// region.
424 ///
425 /// MachineScheduler does not currently support scheduling across calls. To
426 /// handle calls, the DAG builder needs to be modified to create register
427 /// anti/output dependencies on the registers clobbered by the call's regmask
428 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
429 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
430 /// the boundary, but there would be no benefit to postRA scheduling across
431 /// calls this late anyway.
432 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
433                             MachineBasicBlock *MBB,
434                             MachineFunction *MF,
435                             const TargetInstrInfo *TII) {
436   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
437 }
438 
439 /// Main driver for both MachineScheduler and PostMachineScheduler.
440 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
441                                            bool FixKillFlags) {
442   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
443 
444   // Visit all machine basic blocks.
445   //
446   // TODO: Visit blocks in global postorder or postorder within the bottom-up
447   // loop tree. Then we can optionally compute global RegPressure.
448   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
449        MBB != MBBEnd; ++MBB) {
450 
451     Scheduler.startBlock(&*MBB);
452 
453 #ifndef NDEBUG
454     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
455       continue;
456     if (SchedOnlyBlock.getNumOccurrences()
457         && (int)SchedOnlyBlock != MBB->getNumber())
458       continue;
459 #endif
460 
461     // Break the block into scheduling regions [I, RegionEnd), and schedule each
462     // region as soon as it is discovered. RegionEnd points the scheduling
463     // boundary at the bottom of the region. The DAG does not include RegionEnd,
464     // but the region does (i.e. the next RegionEnd is above the previous
465     // RegionBegin). If the current block has no terminator then RegionEnd ==
466     // MBB->end() for the bottom region.
467     //
468     // The Scheduler may insert instructions during either schedule() or
469     // exitRegion(), even for empty regions. So the local iterators 'I' and
470     // 'RegionEnd' are invalid across these calls.
471     //
472     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
473     // as a single instruction.
474     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
475         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
476 
477       // Avoid decrementing RegionEnd for blocks with no terminator.
478       if (RegionEnd != MBB->end() ||
479           isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
480         --RegionEnd;
481       }
482 
483       // The next region starts above the previous region. Look backward in the
484       // instruction stream until we find the nearest boundary.
485       unsigned NumRegionInstrs = 0;
486       MachineBasicBlock::iterator I = RegionEnd;
487       for (; I != MBB->begin(); --I) {
488         MachineInstr &MI = *std::prev(I);
489         if (isSchedBoundary(&MI, &*MBB, MF, TII))
490           break;
491         if (!MI.isDebugValue())
492           ++NumRegionInstrs;
493       }
494       // Notify the scheduler of the region, even if we may skip scheduling
495       // it. Perhaps it still needs to be bundled.
496       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
497 
498       // Skip empty scheduling regions (0 or 1 schedulable instructions).
499       if (I == RegionEnd || I == std::prev(RegionEnd)) {
500         // Close the current region. Bundle the terminator if needed.
501         // This invalidates 'RegionEnd' and 'I'.
502         Scheduler.exitRegion();
503         continue;
504       }
505       DEBUG(dbgs() << "********** MI Scheduling **********\n");
506       DEBUG(dbgs() << MF->getName()
507             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
508             << "\n  From: " << *I << "    To: ";
509             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
510             else dbgs() << "End";
511             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
512       if (DumpCriticalPathLength) {
513         errs() << MF->getName();
514         errs() << ":BB# " << MBB->getNumber();
515         errs() << " " << MBB->getName() << " \n";
516       }
517 
518       // Schedule a region: possibly reorder instructions.
519       // This invalidates 'RegionEnd' and 'I'.
520       Scheduler.schedule();
521 
522       // Close the current region.
523       Scheduler.exitRegion();
524 
525       // Scheduling has invalidated the current iterator 'I'. Ask the
526       // scheduler for the top of it's scheduled region.
527       RegionEnd = Scheduler.begin();
528     }
529     Scheduler.finishBlock();
530     // FIXME: Ideally, no further passes should rely on kill flags. However,
531     // thumb2 size reduction is currently an exception, so the PostMIScheduler
532     // needs to do this.
533     if (FixKillFlags)
534         Scheduler.fixupKills(&*MBB);
535   }
536   Scheduler.finalizeSchedule();
537 }
538 
539 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
540   // unimplemented
541 }
542 
543 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
544 LLVM_DUMP_METHOD void ReadyQueue::dump() {
545   dbgs() << "Queue " << Name << ": ";
546   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
547     dbgs() << Queue[i]->NodeNum << " ";
548   dbgs() << "\n";
549 }
550 #endif
551 
552 //===----------------------------------------------------------------------===//
553 // ScheduleDAGMI - Basic machine instruction scheduling. This is
554 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
555 // virtual registers.
556 // ===----------------------------------------------------------------------===/
557 
558 // Provide a vtable anchor.
559 ScheduleDAGMI::~ScheduleDAGMI() = default;
560 
561 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
562   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
563 }
564 
565 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
566   if (SuccSU != &ExitSU) {
567     // Do not use WillCreateCycle, it assumes SD scheduling.
568     // If Pred is reachable from Succ, then the edge creates a cycle.
569     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
570       return false;
571     Topo.AddPred(SuccSU, PredDep.getSUnit());
572   }
573   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
574   // Return true regardless of whether a new edge needed to be inserted.
575   return true;
576 }
577 
578 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
579 /// NumPredsLeft reaches zero, release the successor node.
580 ///
581 /// FIXME: Adjust SuccSU height based on MinLatency.
582 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
583   SUnit *SuccSU = SuccEdge->getSUnit();
584 
585   if (SuccEdge->isWeak()) {
586     --SuccSU->WeakPredsLeft;
587     if (SuccEdge->isCluster())
588       NextClusterSucc = SuccSU;
589     return;
590   }
591 #ifndef NDEBUG
592   if (SuccSU->NumPredsLeft == 0) {
593     dbgs() << "*** Scheduling failed! ***\n";
594     SuccSU->dump(this);
595     dbgs() << " has been released too many times!\n";
596     llvm_unreachable(nullptr);
597   }
598 #endif
599   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
600   // CurrCycle may have advanced since then.
601   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
602     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
603 
604   --SuccSU->NumPredsLeft;
605   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
606     SchedImpl->releaseTopNode(SuccSU);
607 }
608 
609 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
610 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
611   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
612        I != E; ++I) {
613     releaseSucc(SU, &*I);
614   }
615 }
616 
617 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
618 /// NumSuccsLeft reaches zero, release the predecessor node.
619 ///
620 /// FIXME: Adjust PredSU height based on MinLatency.
621 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
622   SUnit *PredSU = PredEdge->getSUnit();
623 
624   if (PredEdge->isWeak()) {
625     --PredSU->WeakSuccsLeft;
626     if (PredEdge->isCluster())
627       NextClusterPred = PredSU;
628     return;
629   }
630 #ifndef NDEBUG
631   if (PredSU->NumSuccsLeft == 0) {
632     dbgs() << "*** Scheduling failed! ***\n";
633     PredSU->dump(this);
634     dbgs() << " has been released too many times!\n";
635     llvm_unreachable(nullptr);
636   }
637 #endif
638   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
639   // CurrCycle may have advanced since then.
640   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
641     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
642 
643   --PredSU->NumSuccsLeft;
644   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
645     SchedImpl->releaseBottomNode(PredSU);
646 }
647 
648 /// releasePredecessors - Call releasePred on each of SU's predecessors.
649 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
650   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
651        I != E; ++I) {
652     releasePred(SU, &*I);
653   }
654 }
655 
656 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
657 /// crossing a scheduling boundary. [begin, end) includes all instructions in
658 /// the region, including the boundary itself and single-instruction regions
659 /// that don't get scheduled.
660 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
661                                      MachineBasicBlock::iterator begin,
662                                      MachineBasicBlock::iterator end,
663                                      unsigned regioninstrs)
664 {
665   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
666 
667   SchedImpl->initPolicy(begin, end, regioninstrs);
668 }
669 
670 /// This is normally called from the main scheduler loop but may also be invoked
671 /// by the scheduling strategy to perform additional code motion.
672 void ScheduleDAGMI::moveInstruction(
673   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
674   // Advance RegionBegin if the first instruction moves down.
675   if (&*RegionBegin == MI)
676     ++RegionBegin;
677 
678   // Update the instruction stream.
679   BB->splice(InsertPos, BB, MI);
680 
681   // Update LiveIntervals
682   if (LIS)
683     LIS->handleMove(*MI, /*UpdateFlags=*/true);
684 
685   // Recede RegionBegin if an instruction moves above the first.
686   if (RegionBegin == InsertPos)
687     RegionBegin = MI;
688 }
689 
690 bool ScheduleDAGMI::checkSchedLimit() {
691 #ifndef NDEBUG
692   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
693     CurrentTop = CurrentBottom;
694     return false;
695   }
696   ++NumInstrsScheduled;
697 #endif
698   return true;
699 }
700 
701 /// Per-region scheduling driver, called back from
702 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
703 /// does not consider liveness or register pressure. It is useful for PostRA
704 /// scheduling and potentially other custom schedulers.
705 void ScheduleDAGMI::schedule() {
706   DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
707   DEBUG(SchedImpl->dumpPolicy());
708 
709   // Build the DAG.
710   buildSchedGraph(AA);
711 
712   Topo.InitDAGTopologicalSorting();
713 
714   postprocessDAG();
715 
716   SmallVector<SUnit*, 8> TopRoots, BotRoots;
717   findRootsAndBiasEdges(TopRoots, BotRoots);
718 
719   // Initialize the strategy before modifying the DAG.
720   // This may initialize a DFSResult to be used for queue priority.
721   SchedImpl->initialize(this);
722 
723   DEBUG(
724     if (EntrySU.getInstr() != nullptr)
725       EntrySU.dumpAll(this);
726     for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
727       SUnits[su].dumpAll(this);
728     if (ExitSU.getInstr() != nullptr)
729       ExitSU.dumpAll(this);
730   );
731   if (ViewMISchedDAGs) viewGraph();
732 
733   // Initialize ready queues now that the DAG and priority data are finalized.
734   initQueues(TopRoots, BotRoots);
735 
736   bool IsTopNode = false;
737   while (true) {
738     DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
739     SUnit *SU = SchedImpl->pickNode(IsTopNode);
740     if (!SU) break;
741 
742     assert(!SU->isScheduled && "Node already scheduled");
743     if (!checkSchedLimit())
744       break;
745 
746     MachineInstr *MI = SU->getInstr();
747     if (IsTopNode) {
748       assert(SU->isTopReady() && "node still has unscheduled dependencies");
749       if (&*CurrentTop == MI)
750         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
751       else
752         moveInstruction(MI, CurrentTop);
753     } else {
754       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
755       MachineBasicBlock::iterator priorII =
756         priorNonDebug(CurrentBottom, CurrentTop);
757       if (&*priorII == MI)
758         CurrentBottom = priorII;
759       else {
760         if (&*CurrentTop == MI)
761           CurrentTop = nextIfDebug(++CurrentTop, priorII);
762         moveInstruction(MI, CurrentBottom);
763         CurrentBottom = MI;
764       }
765     }
766     // Notify the scheduling strategy before updating the DAG.
767     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
768     // runs, it can then use the accurate ReadyCycle time to determine whether
769     // newly released nodes can move to the readyQ.
770     SchedImpl->schedNode(SU, IsTopNode);
771 
772     updateQueues(SU, IsTopNode);
773   }
774   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
775 
776   placeDebugValues();
777 
778   DEBUG({
779       unsigned BBNum = begin()->getParent()->getNumber();
780       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
781       dumpSchedule();
782       dbgs() << '\n';
783     });
784 }
785 
786 /// Apply each ScheduleDAGMutation step in order.
787 void ScheduleDAGMI::postprocessDAG() {
788   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
789     Mutations[i]->apply(this);
790   }
791 }
792 
793 void ScheduleDAGMI::
794 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
795                       SmallVectorImpl<SUnit*> &BotRoots) {
796   for (std::vector<SUnit>::iterator
797          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
798     SUnit *SU = &(*I);
799     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
800 
801     // Order predecessors so DFSResult follows the critical path.
802     SU->biasCriticalPath();
803 
804     // A SUnit is ready to top schedule if it has no predecessors.
805     if (!I->NumPredsLeft)
806       TopRoots.push_back(SU);
807     // A SUnit is ready to bottom schedule if it has no successors.
808     if (!I->NumSuccsLeft)
809       BotRoots.push_back(SU);
810   }
811   ExitSU.biasCriticalPath();
812 }
813 
814 /// Identify DAG roots and setup scheduler queues.
815 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
816                                ArrayRef<SUnit*> BotRoots) {
817   NextClusterSucc = nullptr;
818   NextClusterPred = nullptr;
819 
820   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
821   //
822   // Nodes with unreleased weak edges can still be roots.
823   // Release top roots in forward order.
824   for (SmallVectorImpl<SUnit*>::const_iterator
825          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
826     SchedImpl->releaseTopNode(*I);
827   }
828   // Release bottom roots in reverse order so the higher priority nodes appear
829   // first. This is more natural and slightly more efficient.
830   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
831          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
832     SchedImpl->releaseBottomNode(*I);
833   }
834 
835   releaseSuccessors(&EntrySU);
836   releasePredecessors(&ExitSU);
837 
838   SchedImpl->registerRoots();
839 
840   // Advance past initial DebugValues.
841   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
842   CurrentBottom = RegionEnd;
843 }
844 
845 /// Update scheduler queues after scheduling an instruction.
846 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
847   // Release dependent instructions for scheduling.
848   if (IsTopNode)
849     releaseSuccessors(SU);
850   else
851     releasePredecessors(SU);
852 
853   SU->isScheduled = true;
854 }
855 
856 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
857 void ScheduleDAGMI::placeDebugValues() {
858   // If first instruction was a DBG_VALUE then put it back.
859   if (FirstDbgValue) {
860     BB->splice(RegionBegin, BB, FirstDbgValue);
861     RegionBegin = FirstDbgValue;
862   }
863 
864   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
865          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
866     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
867     MachineInstr *DbgValue = P.first;
868     MachineBasicBlock::iterator OrigPrevMI = P.second;
869     if (&*RegionBegin == DbgValue)
870       ++RegionBegin;
871     BB->splice(++OrigPrevMI, BB, DbgValue);
872     if (OrigPrevMI == std::prev(RegionEnd))
873       RegionEnd = DbgValue;
874   }
875   DbgValues.clear();
876   FirstDbgValue = nullptr;
877 }
878 
879 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
880 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
881   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
882     if (SUnit *SU = getSUnit(&(*MI)))
883       SU->dump(this);
884     else
885       dbgs() << "Missing SUnit\n";
886   }
887 }
888 #endif
889 
890 //===----------------------------------------------------------------------===//
891 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
892 // preservation.
893 //===----------------------------------------------------------------------===//
894 
895 ScheduleDAGMILive::~ScheduleDAGMILive() {
896   delete DFSResult;
897 }
898 
899 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
900   const MachineInstr &MI = *SU.getInstr();
901   for (const MachineOperand &MO : MI.operands()) {
902     if (!MO.isReg())
903       continue;
904     if (!MO.readsReg())
905       continue;
906     if (TrackLaneMasks && !MO.isUse())
907       continue;
908 
909     unsigned Reg = MO.getReg();
910     if (!TargetRegisterInfo::isVirtualRegister(Reg))
911       continue;
912 
913     // Ignore re-defs.
914     if (TrackLaneMasks) {
915       bool FoundDef = false;
916       for (const MachineOperand &MO2 : MI.operands()) {
917         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
918           FoundDef = true;
919           break;
920         }
921       }
922       if (FoundDef)
923         continue;
924     }
925 
926     // Record this local VReg use.
927     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
928     for (; UI != VRegUses.end(); ++UI) {
929       if (UI->SU == &SU)
930         break;
931     }
932     if (UI == VRegUses.end())
933       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
934   }
935 }
936 
937 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
938 /// crossing a scheduling boundary. [begin, end) includes all instructions in
939 /// the region, including the boundary itself and single-instruction regions
940 /// that don't get scheduled.
941 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
942                                 MachineBasicBlock::iterator begin,
943                                 MachineBasicBlock::iterator end,
944                                 unsigned regioninstrs)
945 {
946   // ScheduleDAGMI initializes SchedImpl's per-region policy.
947   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
948 
949   // For convenience remember the end of the liveness region.
950   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
951 
952   SUPressureDiffs.clear();
953 
954   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
955   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
956 
957   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
958          "ShouldTrackLaneMasks requires ShouldTrackPressure");
959 }
960 
961 // Setup the register pressure trackers for the top scheduled top and bottom
962 // scheduled regions.
963 void ScheduleDAGMILive::initRegPressure() {
964   VRegUses.clear();
965   VRegUses.setUniverse(MRI.getNumVirtRegs());
966   for (SUnit &SU : SUnits)
967     collectVRegUses(SU);
968 
969   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
970                     ShouldTrackLaneMasks, false);
971   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
972                     ShouldTrackLaneMasks, false);
973 
974   // Close the RPTracker to finalize live ins.
975   RPTracker.closeRegion();
976 
977   DEBUG(RPTracker.dump());
978 
979   // Initialize the live ins and live outs.
980   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
981   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
982 
983   // Close one end of the tracker so we can call
984   // getMaxUpward/DownwardPressureDelta before advancing across any
985   // instructions. This converts currently live regs into live ins/outs.
986   TopRPTracker.closeTop();
987   BotRPTracker.closeBottom();
988 
989   BotRPTracker.initLiveThru(RPTracker);
990   if (!BotRPTracker.getLiveThru().empty()) {
991     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
992     DEBUG(dbgs() << "Live Thru: ";
993           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
994   };
995 
996   // For each live out vreg reduce the pressure change associated with other
997   // uses of the same vreg below the live-out reaching def.
998   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
999 
1000   // Account for liveness generated by the region boundary.
1001   if (LiveRegionEnd != RegionEnd) {
1002     SmallVector<RegisterMaskPair, 8> LiveUses;
1003     BotRPTracker.recede(&LiveUses);
1004     updatePressureDiffs(LiveUses);
1005   }
1006 
1007   DEBUG(
1008     dbgs() << "Top Pressure:\n";
1009     dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1010     dbgs() << "Bottom Pressure:\n";
1011     dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1012   );
1013 
1014   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
1015 
1016   // Cache the list of excess pressure sets in this region. This will also track
1017   // the max pressure in the scheduled code for these sets.
1018   RegionCriticalPSets.clear();
1019   const std::vector<unsigned> &RegionPressure =
1020     RPTracker.getPressure().MaxSetPressure;
1021   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1022     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1023     if (RegionPressure[i] > Limit) {
1024       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
1025             << " Limit " << Limit
1026             << " Actual " << RegionPressure[i] << "\n");
1027       RegionCriticalPSets.push_back(PressureChange(i));
1028     }
1029   }
1030   DEBUG(dbgs() << "Excess PSets: ";
1031         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
1032           dbgs() << TRI->getRegPressureSetName(
1033             RegionCriticalPSets[i].getPSet()) << " ";
1034         dbgs() << "\n");
1035 }
1036 
1037 void ScheduleDAGMILive::
1038 updateScheduledPressure(const SUnit *SU,
1039                         const std::vector<unsigned> &NewMaxPressure) {
1040   const PressureDiff &PDiff = getPressureDiff(SU);
1041   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1042   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
1043        I != E; ++I) {
1044     if (!I->isValid())
1045       break;
1046     unsigned ID = I->getPSet();
1047     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1048       ++CritIdx;
1049     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1050       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1051           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1052         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1053     }
1054     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1055     if (NewMaxPressure[ID] >= Limit - 2) {
1056       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1057             << NewMaxPressure[ID]
1058             << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
1059             << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
1060     }
1061   }
1062 }
1063 
1064 /// Update the PressureDiff array for liveness after scheduling this
1065 /// instruction.
1066 void ScheduleDAGMILive::updatePressureDiffs(
1067     ArrayRef<RegisterMaskPair> LiveUses) {
1068   for (const RegisterMaskPair &P : LiveUses) {
1069     unsigned Reg = P.RegUnit;
1070     /// FIXME: Currently assuming single-use physregs.
1071     if (!TRI->isVirtualRegister(Reg))
1072       continue;
1073 
1074     if (ShouldTrackLaneMasks) {
1075       // If the register has just become live then other uses won't change
1076       // this fact anymore => decrement pressure.
1077       // If the register has just become dead then other uses make it come
1078       // back to life => increment pressure.
1079       bool Decrement = P.LaneMask.any();
1080 
1081       for (const VReg2SUnit &V2SU
1082            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1083         SUnit &SU = *V2SU.SU;
1084         if (SU.isScheduled || &SU == &ExitSU)
1085           continue;
1086 
1087         PressureDiff &PDiff = getPressureDiff(&SU);
1088         PDiff.addPressureChange(P, Decrement, &MRI);
1089         DEBUG(
1090           dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1091                  << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
1092                  << ' ' << *SU.getInstr();
1093           dbgs() << "              to ";
1094           PDiff.dump(*TRI);
1095         );
1096       }
1097     } else {
1098       assert(P.LaneMask.any());
1099       DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
1100       // This may be called before CurrentBottom has been initialized. However,
1101       // BotRPTracker must have a valid position. We want the value live into the
1102       // instruction or live out of the block, so ask for the previous
1103       // instruction's live-out.
1104       const LiveInterval &LI = LIS->getInterval(Reg);
1105       VNInfo *VNI;
1106       MachineBasicBlock::const_iterator I =
1107         nextIfDebug(BotRPTracker.getPos(), BB->end());
1108       if (I == BB->end())
1109         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1110       else {
1111         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1112         VNI = LRQ.valueIn();
1113       }
1114       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1115       assert(VNI && "No live value at use.");
1116       for (const VReg2SUnit &V2SU
1117            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1118         SUnit *SU = V2SU.SU;
1119         // If this use comes before the reaching def, it cannot be a last use,
1120         // so decrease its pressure change.
1121         if (!SU->isScheduled && SU != &ExitSU) {
1122           LiveQueryResult LRQ =
1123               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1124           if (LRQ.valueIn() == VNI) {
1125             PressureDiff &PDiff = getPressureDiff(SU);
1126             PDiff.addPressureChange(P, true, &MRI);
1127             DEBUG(
1128               dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1129                      << *SU->getInstr();
1130               dbgs() << "              to ";
1131               PDiff.dump(*TRI);
1132             );
1133           }
1134         }
1135       }
1136     }
1137   }
1138 }
1139 
1140 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1141 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1142 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1143 ///
1144 /// This is a skeletal driver, with all the functionality pushed into helpers,
1145 /// so that it can be easily extended by experimental schedulers. Generally,
1146 /// implementing MachineSchedStrategy should be sufficient to implement a new
1147 /// scheduling algorithm. However, if a scheduler further subclasses
1148 /// ScheduleDAGMILive then it will want to override this virtual method in order
1149 /// to update any specialized state.
1150 void ScheduleDAGMILive::schedule() {
1151   DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1152   DEBUG(SchedImpl->dumpPolicy());
1153   buildDAGWithRegPressure();
1154 
1155   Topo.InitDAGTopologicalSorting();
1156 
1157   postprocessDAG();
1158 
1159   SmallVector<SUnit*, 8> TopRoots, BotRoots;
1160   findRootsAndBiasEdges(TopRoots, BotRoots);
1161 
1162   // Initialize the strategy before modifying the DAG.
1163   // This may initialize a DFSResult to be used for queue priority.
1164   SchedImpl->initialize(this);
1165 
1166   DEBUG(
1167     if (EntrySU.getInstr() != nullptr)
1168       EntrySU.dumpAll(this);
1169     for (const SUnit &SU : SUnits) {
1170       SU.dumpAll(this);
1171       if (ShouldTrackPressure) {
1172         dbgs() << "  Pressure Diff      : ";
1173         getPressureDiff(&SU).dump(*TRI);
1174       }
1175       dbgs() << '\n';
1176     }
1177     if (ExitSU.getInstr() != nullptr)
1178       ExitSU.dumpAll(this);
1179   );
1180   if (ViewMISchedDAGs) viewGraph();
1181 
1182   // Initialize ready queues now that the DAG and priority data are finalized.
1183   initQueues(TopRoots, BotRoots);
1184 
1185   bool IsTopNode = false;
1186   while (true) {
1187     DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1188     SUnit *SU = SchedImpl->pickNode(IsTopNode);
1189     if (!SU) break;
1190 
1191     assert(!SU->isScheduled && "Node already scheduled");
1192     if (!checkSchedLimit())
1193       break;
1194 
1195     scheduleMI(SU, IsTopNode);
1196 
1197     if (DFSResult) {
1198       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1199       if (!ScheduledTrees.test(SubtreeID)) {
1200         ScheduledTrees.set(SubtreeID);
1201         DFSResult->scheduleTree(SubtreeID);
1202         SchedImpl->scheduleTree(SubtreeID);
1203       }
1204     }
1205 
1206     // Notify the scheduling strategy after updating the DAG.
1207     SchedImpl->schedNode(SU, IsTopNode);
1208 
1209     updateQueues(SU, IsTopNode);
1210   }
1211   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1212 
1213   placeDebugValues();
1214 
1215   DEBUG({
1216       unsigned BBNum = begin()->getParent()->getNumber();
1217       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1218       dumpSchedule();
1219       dbgs() << '\n';
1220     });
1221 }
1222 
1223 /// Build the DAG and setup three register pressure trackers.
1224 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1225   if (!ShouldTrackPressure) {
1226     RPTracker.reset();
1227     RegionCriticalPSets.clear();
1228     buildSchedGraph(AA);
1229     return;
1230   }
1231 
1232   // Initialize the register pressure tracker used by buildSchedGraph.
1233   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1234                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1235 
1236   // Account for liveness generate by the region boundary.
1237   if (LiveRegionEnd != RegionEnd)
1238     RPTracker.recede();
1239 
1240   // Build the DAG, and compute current register pressure.
1241   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1242 
1243   // Initialize top/bottom trackers after computing region pressure.
1244   initRegPressure();
1245 }
1246 
1247 void ScheduleDAGMILive::computeDFSResult() {
1248   if (!DFSResult)
1249     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1250   DFSResult->clear();
1251   ScheduledTrees.clear();
1252   DFSResult->resize(SUnits.size());
1253   DFSResult->compute(SUnits);
1254   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1255 }
1256 
1257 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1258 /// only provides the critical path for single block loops. To handle loops that
1259 /// span blocks, we could use the vreg path latencies provided by
1260 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1261 /// available for use in the scheduler.
1262 ///
1263 /// The cyclic path estimation identifies a def-use pair that crosses the back
1264 /// edge and considers the depth and height of the nodes. For example, consider
1265 /// the following instruction sequence where each instruction has unit latency
1266 /// and defines an epomymous virtual register:
1267 ///
1268 /// a->b(a,c)->c(b)->d(c)->exit
1269 ///
1270 /// The cyclic critical path is a two cycles: b->c->b
1271 /// The acyclic critical path is four cycles: a->b->c->d->exit
1272 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1273 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1274 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1275 /// LiveInDepth = depth(b) = len(a->b) = 1
1276 ///
1277 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1278 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1279 /// CyclicCriticalPath = min(2, 2) = 2
1280 ///
1281 /// This could be relevant to PostRA scheduling, but is currently implemented
1282 /// assuming LiveIntervals.
1283 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1284   // This only applies to single block loop.
1285   if (!BB->isSuccessor(BB))
1286     return 0;
1287 
1288   unsigned MaxCyclicLatency = 0;
1289   // Visit each live out vreg def to find def/use pairs that cross iterations.
1290   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1291     unsigned Reg = P.RegUnit;
1292     if (!TRI->isVirtualRegister(Reg))
1293         continue;
1294     const LiveInterval &LI = LIS->getInterval(Reg);
1295     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1296     if (!DefVNI)
1297       continue;
1298 
1299     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1300     const SUnit *DefSU = getSUnit(DefMI);
1301     if (!DefSU)
1302       continue;
1303 
1304     unsigned LiveOutHeight = DefSU->getHeight();
1305     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1306     // Visit all local users of the vreg def.
1307     for (const VReg2SUnit &V2SU
1308          : make_range(VRegUses.find(Reg), VRegUses.end())) {
1309       SUnit *SU = V2SU.SU;
1310       if (SU == &ExitSU)
1311         continue;
1312 
1313       // Only consider uses of the phi.
1314       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1315       if (!LRQ.valueIn()->isPHIDef())
1316         continue;
1317 
1318       // Assume that a path spanning two iterations is a cycle, which could
1319       // overestimate in strange cases. This allows cyclic latency to be
1320       // estimated as the minimum slack of the vreg's depth or height.
1321       unsigned CyclicLatency = 0;
1322       if (LiveOutDepth > SU->getDepth())
1323         CyclicLatency = LiveOutDepth - SU->getDepth();
1324 
1325       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1326       if (LiveInHeight > LiveOutHeight) {
1327         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1328           CyclicLatency = LiveInHeight - LiveOutHeight;
1329       } else
1330         CyclicLatency = 0;
1331 
1332       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1333             << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1334       if (CyclicLatency > MaxCyclicLatency)
1335         MaxCyclicLatency = CyclicLatency;
1336     }
1337   }
1338   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1339   return MaxCyclicLatency;
1340 }
1341 
1342 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1343 /// the Top RP tracker in case the region beginning has changed.
1344 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1345                                    ArrayRef<SUnit*> BotRoots) {
1346   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1347   if (ShouldTrackPressure) {
1348     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1349     TopRPTracker.setPos(CurrentTop);
1350   }
1351 }
1352 
1353 /// Move an instruction and update register pressure.
1354 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1355   // Move the instruction to its new location in the instruction stream.
1356   MachineInstr *MI = SU->getInstr();
1357 
1358   if (IsTopNode) {
1359     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1360     if (&*CurrentTop == MI)
1361       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1362     else {
1363       moveInstruction(MI, CurrentTop);
1364       TopRPTracker.setPos(MI);
1365     }
1366 
1367     if (ShouldTrackPressure) {
1368       // Update top scheduled pressure.
1369       RegisterOperands RegOpers;
1370       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1371       if (ShouldTrackLaneMasks) {
1372         // Adjust liveness and add missing dead+read-undef flags.
1373         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1374         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1375       } else {
1376         // Adjust for missing dead-def flags.
1377         RegOpers.detectDeadDefs(*MI, *LIS);
1378       }
1379 
1380       TopRPTracker.advance(RegOpers);
1381       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1382       DEBUG(
1383         dbgs() << "Top Pressure:\n";
1384         dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1385       );
1386 
1387       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1388     }
1389   } else {
1390     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1391     MachineBasicBlock::iterator priorII =
1392       priorNonDebug(CurrentBottom, CurrentTop);
1393     if (&*priorII == MI)
1394       CurrentBottom = priorII;
1395     else {
1396       if (&*CurrentTop == MI) {
1397         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1398         TopRPTracker.setPos(CurrentTop);
1399       }
1400       moveInstruction(MI, CurrentBottom);
1401       CurrentBottom = MI;
1402     }
1403     if (ShouldTrackPressure) {
1404       RegisterOperands RegOpers;
1405       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1406       if (ShouldTrackLaneMasks) {
1407         // Adjust liveness and add missing dead+read-undef flags.
1408         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1409         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1410       } else {
1411         // Adjust for missing dead-def flags.
1412         RegOpers.detectDeadDefs(*MI, *LIS);
1413       }
1414 
1415       BotRPTracker.recedeSkipDebugValues();
1416       SmallVector<RegisterMaskPair, 8> LiveUses;
1417       BotRPTracker.recede(RegOpers, &LiveUses);
1418       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1419       DEBUG(
1420         dbgs() << "Bottom Pressure:\n";
1421         dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
1422       );
1423 
1424       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1425       updatePressureDiffs(LiveUses);
1426     }
1427   }
1428 }
1429 
1430 //===----------------------------------------------------------------------===//
1431 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1432 //===----------------------------------------------------------------------===//
1433 
1434 namespace {
1435 
1436 /// \brief Post-process the DAG to create cluster edges between neighboring
1437 /// loads or between neighboring stores.
1438 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1439   struct MemOpInfo {
1440     SUnit *SU;
1441     unsigned BaseReg;
1442     int64_t Offset;
1443 
1444     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1445         : SU(su), BaseReg(reg), Offset(ofs) {}
1446 
1447     bool operator<(const MemOpInfo&RHS) const {
1448       return std::tie(BaseReg, Offset, SU->NodeNum) <
1449              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1450     }
1451   };
1452 
1453   const TargetInstrInfo *TII;
1454   const TargetRegisterInfo *TRI;
1455   bool IsLoad;
1456 
1457 public:
1458   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1459                            const TargetRegisterInfo *tri, bool IsLoad)
1460       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1461 
1462   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1463 
1464 protected:
1465   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1466 };
1467 
1468 class StoreClusterMutation : public BaseMemOpClusterMutation {
1469 public:
1470   StoreClusterMutation(const TargetInstrInfo *tii,
1471                        const TargetRegisterInfo *tri)
1472       : BaseMemOpClusterMutation(tii, tri, false) {}
1473 };
1474 
1475 class LoadClusterMutation : public BaseMemOpClusterMutation {
1476 public:
1477   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1478       : BaseMemOpClusterMutation(tii, tri, true) {}
1479 };
1480 
1481 } // end anonymous namespace
1482 
1483 namespace llvm {
1484 
1485 std::unique_ptr<ScheduleDAGMutation>
1486 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1487                              const TargetRegisterInfo *TRI) {
1488   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1489                             : nullptr;
1490 }
1491 
1492 std::unique_ptr<ScheduleDAGMutation>
1493 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1494                               const TargetRegisterInfo *TRI) {
1495   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1496                             : nullptr;
1497 }
1498 
1499 } // end namespace llvm
1500 
1501 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1502     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1503   SmallVector<MemOpInfo, 32> MemOpRecords;
1504   for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
1505     SUnit *SU = MemOps[Idx];
1506     unsigned BaseReg;
1507     int64_t Offset;
1508     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1509       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1510   }
1511   if (MemOpRecords.size() < 2)
1512     return;
1513 
1514   std::sort(MemOpRecords.begin(), MemOpRecords.end());
1515   unsigned ClusterLength = 1;
1516   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1517     if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
1518       ClusterLength = 1;
1519       continue;
1520     }
1521 
1522     SUnit *SUa = MemOpRecords[Idx].SU;
1523     SUnit *SUb = MemOpRecords[Idx+1].SU;
1524     if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
1525                                  ClusterLength) &&
1526         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1527       DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1528             << SUb->NodeNum << ")\n");
1529       // Copy successor edges from SUa to SUb. Interleaving computation
1530       // dependent on SUa can prevent load combining due to register reuse.
1531       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1532       // loads should have effectively the same inputs.
1533       for (SUnit::const_succ_iterator
1534              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1535         if (SI->getSUnit() == SUb)
1536           continue;
1537         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1538         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1539       }
1540       ++ClusterLength;
1541     } else
1542       ClusterLength = 1;
1543   }
1544 }
1545 
1546 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1547 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
1548 
1549   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1550 
1551   // Map DAG NodeNum to store chain ID.
1552   DenseMap<unsigned, unsigned> StoreChainIDs;
1553   // Map each store chain to a set of dependent MemOps.
1554   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1555   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1556     SUnit *SU = &DAG->SUnits[Idx];
1557     if ((IsLoad && !SU->getInstr()->mayLoad()) ||
1558         (!IsLoad && !SU->getInstr()->mayStore()))
1559       continue;
1560 
1561     unsigned ChainPredID = DAG->SUnits.size();
1562     for (SUnit::const_pred_iterator
1563            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1564       if (PI->isCtrl()) {
1565         ChainPredID = PI->getSUnit()->NodeNum;
1566         break;
1567       }
1568     }
1569     // Check if this chain-like pred has been seen
1570     // before. ChainPredID==MaxNodeID at the top of the schedule.
1571     unsigned NumChains = StoreChainDependents.size();
1572     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1573       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1574     if (Result.second)
1575       StoreChainDependents.resize(NumChains + 1);
1576     StoreChainDependents[Result.first->second].push_back(SU);
1577   }
1578 
1579   // Iterate over the store chains.
1580   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1581     clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
1582 }
1583 
1584 //===----------------------------------------------------------------------===//
1585 // CopyConstrain - DAG post-processing to encourage copy elimination.
1586 //===----------------------------------------------------------------------===//
1587 
1588 namespace {
1589 
1590 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1591 /// the one use that defines the copy's source vreg, most likely an induction
1592 /// variable increment.
1593 class CopyConstrain : public ScheduleDAGMutation {
1594   // Transient state.
1595   SlotIndex RegionBeginIdx;
1596   // RegionEndIdx is the slot index of the last non-debug instruction in the
1597   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1598   SlotIndex RegionEndIdx;
1599 
1600 public:
1601   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1602 
1603   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1604 
1605 protected:
1606   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1607 };
1608 
1609 } // end anonymous namespace
1610 
1611 namespace llvm {
1612 
1613 std::unique_ptr<ScheduleDAGMutation>
1614 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1615                                const TargetRegisterInfo *TRI) {
1616   return llvm::make_unique<CopyConstrain>(TII, TRI);
1617 }
1618 
1619 } // end namespace llvm
1620 
1621 /// constrainLocalCopy handles two possibilities:
1622 /// 1) Local src:
1623 /// I0:     = dst
1624 /// I1: src = ...
1625 /// I2:     = dst
1626 /// I3: dst = src (copy)
1627 /// (create pred->succ edges I0->I1, I2->I1)
1628 ///
1629 /// 2) Local copy:
1630 /// I0: dst = src (copy)
1631 /// I1:     = dst
1632 /// I2: src = ...
1633 /// I3:     = dst
1634 /// (create pred->succ edges I1->I2, I3->I2)
1635 ///
1636 /// Although the MachineScheduler is currently constrained to single blocks,
1637 /// this algorithm should handle extended blocks. An EBB is a set of
1638 /// contiguously numbered blocks such that the previous block in the EBB is
1639 /// always the single predecessor.
1640 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1641   LiveIntervals *LIS = DAG->getLIS();
1642   MachineInstr *Copy = CopySU->getInstr();
1643 
1644   // Check for pure vreg copies.
1645   const MachineOperand &SrcOp = Copy->getOperand(1);
1646   unsigned SrcReg = SrcOp.getReg();
1647   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1648     return;
1649 
1650   const MachineOperand &DstOp = Copy->getOperand(0);
1651   unsigned DstReg = DstOp.getReg();
1652   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1653     return;
1654 
1655   // Check if either the dest or source is local. If it's live across a back
1656   // edge, it's not local. Note that if both vregs are live across the back
1657   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1658   // If both the copy's source and dest are local live intervals, then we
1659   // should treat the dest as the global for the purpose of adding
1660   // constraints. This adds edges from source's other uses to the copy.
1661   unsigned LocalReg = SrcReg;
1662   unsigned GlobalReg = DstReg;
1663   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1664   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1665     LocalReg = DstReg;
1666     GlobalReg = SrcReg;
1667     LocalLI = &LIS->getInterval(LocalReg);
1668     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1669       return;
1670   }
1671   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1672 
1673   // Find the global segment after the start of the local LI.
1674   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1675   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1676   // local live range. We could create edges from other global uses to the local
1677   // start, but the coalescer should have already eliminated these cases, so
1678   // don't bother dealing with it.
1679   if (GlobalSegment == GlobalLI->end())
1680     return;
1681 
1682   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1683   // returned the next global segment. But if GlobalSegment overlaps with
1684   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1685   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1686   if (GlobalSegment->contains(LocalLI->beginIndex()))
1687     ++GlobalSegment;
1688 
1689   if (GlobalSegment == GlobalLI->end())
1690     return;
1691 
1692   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1693   if (GlobalSegment != GlobalLI->begin()) {
1694     // Two address defs have no hole.
1695     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1696                                GlobalSegment->start)) {
1697       return;
1698     }
1699     // If the prior global segment may be defined by the same two-address
1700     // instruction that also defines LocalLI, then can't make a hole here.
1701     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1702                                LocalLI->beginIndex())) {
1703       return;
1704     }
1705     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1706     // it would be a disconnected component in the live range.
1707     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1708            "Disconnected LRG within the scheduling region.");
1709   }
1710   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1711   if (!GlobalDef)
1712     return;
1713 
1714   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1715   if (!GlobalSU)
1716     return;
1717 
1718   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1719   // constraining the uses of the last local def to precede GlobalDef.
1720   SmallVector<SUnit*,8> LocalUses;
1721   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1722   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1723   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1724   for (SUnit::const_succ_iterator
1725          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1726        I != E; ++I) {
1727     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1728       continue;
1729     if (I->getSUnit() == GlobalSU)
1730       continue;
1731     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1732       return;
1733     LocalUses.push_back(I->getSUnit());
1734   }
1735   // Open the top of the GlobalLI hole by constraining any earlier global uses
1736   // to precede the start of LocalLI.
1737   SmallVector<SUnit*,8> GlobalUses;
1738   MachineInstr *FirstLocalDef =
1739     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1740   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1741   for (SUnit::const_pred_iterator
1742          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1743     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1744       continue;
1745     if (I->getSUnit() == FirstLocalSU)
1746       continue;
1747     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1748       return;
1749     GlobalUses.push_back(I->getSUnit());
1750   }
1751   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1752   // Add the weak edges.
1753   for (SmallVectorImpl<SUnit*>::const_iterator
1754          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1755     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1756           << GlobalSU->NodeNum << ")\n");
1757     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1758   }
1759   for (SmallVectorImpl<SUnit*>::const_iterator
1760          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1761     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1762           << FirstLocalSU->NodeNum << ")\n");
1763     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1764   }
1765 }
1766 
1767 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1768 /// copy elimination.
1769 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1770   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1771   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1772 
1773   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1774   if (FirstPos == DAG->end())
1775     return;
1776   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1777   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1778       *priorNonDebug(DAG->end(), DAG->begin()));
1779 
1780   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1781     SUnit *SU = &DAG->SUnits[Idx];
1782     if (!SU->getInstr()->isCopy())
1783       continue;
1784 
1785     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1786   }
1787 }
1788 
1789 //===----------------------------------------------------------------------===//
1790 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1791 // and possibly other custom schedulers.
1792 //===----------------------------------------------------------------------===//
1793 
1794 static const unsigned InvalidCycle = ~0U;
1795 
1796 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1797 
1798 void SchedBoundary::reset() {
1799   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1800   // Destroying and reconstructing it is very expensive though. So keep
1801   // invalid, placeholder HazardRecs.
1802   if (HazardRec && HazardRec->isEnabled()) {
1803     delete HazardRec;
1804     HazardRec = nullptr;
1805   }
1806   Available.clear();
1807   Pending.clear();
1808   CheckPending = false;
1809   CurrCycle = 0;
1810   CurrMOps = 0;
1811   MinReadyCycle = std::numeric_limits<unsigned>::max();
1812   ExpectedLatency = 0;
1813   DependentLatency = 0;
1814   RetiredMOps = 0;
1815   MaxExecutedResCount = 0;
1816   ZoneCritResIdx = 0;
1817   IsResourceLimited = false;
1818   ReservedCycles.clear();
1819 #ifndef NDEBUG
1820   // Track the maximum number of stall cycles that could arise either from the
1821   // latency of a DAG edge or the number of cycles that a processor resource is
1822   // reserved (SchedBoundary::ReservedCycles).
1823   MaxObservedStall = 0;
1824 #endif
1825   // Reserve a zero-count for invalid CritResIdx.
1826   ExecutedResCounts.resize(1);
1827   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1828 }
1829 
1830 void SchedRemainder::
1831 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1832   reset();
1833   if (!SchedModel->hasInstrSchedModel())
1834     return;
1835   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1836   for (std::vector<SUnit>::iterator
1837          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1838     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1839     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1840       * SchedModel->getMicroOpFactor();
1841     for (TargetSchedModel::ProcResIter
1842            PI = SchedModel->getWriteProcResBegin(SC),
1843            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1844       unsigned PIdx = PI->ProcResourceIdx;
1845       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1846       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1847     }
1848   }
1849 }
1850 
1851 void SchedBoundary::
1852 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1853   reset();
1854   DAG = dag;
1855   SchedModel = smodel;
1856   Rem = rem;
1857   if (SchedModel->hasInstrSchedModel()) {
1858     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1859     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1860   }
1861 }
1862 
1863 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1864 /// these "soft stalls" differently than the hard stall cycles based on CPU
1865 /// resources and computed by checkHazard(). A fully in-order model
1866 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1867 /// available for scheduling until they are ready. However, a weaker in-order
1868 /// model may use this for heuristics. For example, if a processor has in-order
1869 /// behavior when reading certain resources, this may come into play.
1870 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1871   if (!SU->isUnbuffered)
1872     return 0;
1873 
1874   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1875   if (ReadyCycle > CurrCycle)
1876     return ReadyCycle - CurrCycle;
1877   return 0;
1878 }
1879 
1880 /// Compute the next cycle at which the given processor resource can be
1881 /// scheduled.
1882 unsigned SchedBoundary::
1883 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1884   unsigned NextUnreserved = ReservedCycles[PIdx];
1885   // If this resource has never been used, always return cycle zero.
1886   if (NextUnreserved == InvalidCycle)
1887     return 0;
1888   // For bottom-up scheduling add the cycles needed for the current operation.
1889   if (!isTop())
1890     NextUnreserved += Cycles;
1891   return NextUnreserved;
1892 }
1893 
1894 /// Does this SU have a hazard within the current instruction group.
1895 ///
1896 /// The scheduler supports two modes of hazard recognition. The first is the
1897 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1898 /// supports highly complicated in-order reservation tables
1899 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1900 ///
1901 /// The second is a streamlined mechanism that checks for hazards based on
1902 /// simple counters that the scheduler itself maintains. It explicitly checks
1903 /// for instruction dispatch limitations, including the number of micro-ops that
1904 /// can dispatch per cycle.
1905 ///
1906 /// TODO: Also check whether the SU must start a new group.
1907 bool SchedBoundary::checkHazard(SUnit *SU) {
1908   if (HazardRec->isEnabled()
1909       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1910     return true;
1911   }
1912   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1913   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1914     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1915           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1916     return true;
1917   }
1918   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1919     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1920     for (TargetSchedModel::ProcResIter
1921            PI = SchedModel->getWriteProcResBegin(SC),
1922            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1923       unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
1924       if (NRCycle > CurrCycle) {
1925 #ifndef NDEBUG
1926         MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
1927 #endif
1928         DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
1929               << SchedModel->getResourceName(PI->ProcResourceIdx)
1930               << "=" << NRCycle << "c\n");
1931         return true;
1932       }
1933     }
1934   }
1935   return false;
1936 }
1937 
1938 // Find the unscheduled node in ReadySUs with the highest latency.
1939 unsigned SchedBoundary::
1940 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1941   SUnit *LateSU = nullptr;
1942   unsigned RemLatency = 0;
1943   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1944        I != E; ++I) {
1945     unsigned L = getUnscheduledLatency(*I);
1946     if (L > RemLatency) {
1947       RemLatency = L;
1948       LateSU = *I;
1949     }
1950   }
1951   if (LateSU) {
1952     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1953           << LateSU->NodeNum << ") " << RemLatency << "c\n");
1954   }
1955   return RemLatency;
1956 }
1957 
1958 // Count resources in this zone and the remaining unscheduled
1959 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1960 // resource index, or zero if the zone is issue limited.
1961 unsigned SchedBoundary::
1962 getOtherResourceCount(unsigned &OtherCritIdx) {
1963   OtherCritIdx = 0;
1964   if (!SchedModel->hasInstrSchedModel())
1965     return 0;
1966 
1967   unsigned OtherCritCount = Rem->RemIssueCount
1968     + (RetiredMOps * SchedModel->getMicroOpFactor());
1969   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1970         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1971   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1972        PIdx != PEnd; ++PIdx) {
1973     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1974     if (OtherCount > OtherCritCount) {
1975       OtherCritCount = OtherCount;
1976       OtherCritIdx = PIdx;
1977     }
1978   }
1979   if (OtherCritIdx) {
1980     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1981           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1982           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1983   }
1984   return OtherCritCount;
1985 }
1986 
1987 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1988   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1989 
1990 #ifndef NDEBUG
1991   // ReadyCycle was been bumped up to the CurrCycle when this node was
1992   // scheduled, but CurrCycle may have been eagerly advanced immediately after
1993   // scheduling, so may now be greater than ReadyCycle.
1994   if (ReadyCycle > CurrCycle)
1995     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
1996 #endif
1997 
1998   if (ReadyCycle < MinReadyCycle)
1999     MinReadyCycle = ReadyCycle;
2000 
2001   // Check for interlocks first. For the purpose of other heuristics, an
2002   // instruction that cannot issue appears as if it's not in the ReadyQueue.
2003   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2004   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2005       Available.size() >= ReadyListLimit)
2006     Pending.push(SU);
2007   else
2008     Available.push(SU);
2009 }
2010 
2011 /// Move the boundary of scheduled code by one cycle.
2012 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2013   if (SchedModel->getMicroOpBufferSize() == 0) {
2014     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2015            "MinReadyCycle uninitialized");
2016     if (MinReadyCycle > NextCycle)
2017       NextCycle = MinReadyCycle;
2018   }
2019   // Update the current micro-ops, which will issue in the next cycle.
2020   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2021   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2022 
2023   // Decrement DependentLatency based on the next cycle.
2024   if ((NextCycle - CurrCycle) > DependentLatency)
2025     DependentLatency = 0;
2026   else
2027     DependentLatency -= (NextCycle - CurrCycle);
2028 
2029   if (!HazardRec->isEnabled()) {
2030     // Bypass HazardRec virtual calls.
2031     CurrCycle = NextCycle;
2032   } else {
2033     // Bypass getHazardType calls in case of long latency.
2034     for (; CurrCycle != NextCycle; ++CurrCycle) {
2035       if (isTop())
2036         HazardRec->AdvanceCycle();
2037       else
2038         HazardRec->RecedeCycle();
2039     }
2040   }
2041   CheckPending = true;
2042   unsigned LFactor = SchedModel->getLatencyFactor();
2043   IsResourceLimited =
2044     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2045     > (int)LFactor;
2046 
2047   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
2048 }
2049 
2050 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2051   ExecutedResCounts[PIdx] += Count;
2052   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2053     MaxExecutedResCount = ExecutedResCounts[PIdx];
2054 }
2055 
2056 /// Add the given processor resource to this scheduled zone.
2057 ///
2058 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2059 /// during which this resource is consumed.
2060 ///
2061 /// \return the next cycle at which the instruction may execute without
2062 /// oversubscribing resources.
2063 unsigned SchedBoundary::
2064 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2065   unsigned Factor = SchedModel->getResourceFactor(PIdx);
2066   unsigned Count = Factor * Cycles;
2067   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
2068         << " +" << Cycles << "x" << Factor << "u\n");
2069 
2070   // Update Executed resources counts.
2071   incExecutedResources(PIdx, Count);
2072   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2073   Rem->RemainingCounts[PIdx] -= Count;
2074 
2075   // Check if this resource exceeds the current critical resource. If so, it
2076   // becomes the critical resource.
2077   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2078     ZoneCritResIdx = PIdx;
2079     DEBUG(dbgs() << "  *** Critical resource "
2080           << SchedModel->getResourceName(PIdx) << ": "
2081           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
2082   }
2083   // For reserved resources, record the highest cycle using the resource.
2084   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2085   if (NextAvailable > CurrCycle) {
2086     DEBUG(dbgs() << "  Resource conflict: "
2087           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
2088           << NextAvailable << "\n");
2089   }
2090   return NextAvailable;
2091 }
2092 
2093 /// Move the boundary of scheduled code by one SUnit.
2094 void SchedBoundary::bumpNode(SUnit *SU) {
2095   // Update the reservation table.
2096   if (HazardRec->isEnabled()) {
2097     if (!isTop() && SU->isCall) {
2098       // Calls are scheduled with their preceding instructions. For bottom-up
2099       // scheduling, clear the pipeline state before emitting.
2100       HazardRec->Reset();
2101     }
2102     HazardRec->EmitInstruction(SU);
2103   }
2104   // checkHazard should prevent scheduling multiple instructions per cycle that
2105   // exceed the issue width.
2106   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2107   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2108   assert(
2109       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2110       "Cannot schedule this instruction's MicroOps in the current cycle.");
2111 
2112   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2113   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2114 
2115   unsigned NextCycle = CurrCycle;
2116   switch (SchedModel->getMicroOpBufferSize()) {
2117   case 0:
2118     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2119     break;
2120   case 1:
2121     if (ReadyCycle > NextCycle) {
2122       NextCycle = ReadyCycle;
2123       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2124     }
2125     break;
2126   default:
2127     // We don't currently model the OOO reorder buffer, so consider all
2128     // scheduled MOps to be "retired". We do loosely model in-order resource
2129     // latency. If this instruction uses an in-order resource, account for any
2130     // likely stall cycles.
2131     if (SU->isUnbuffered && ReadyCycle > NextCycle)
2132       NextCycle = ReadyCycle;
2133     break;
2134   }
2135   RetiredMOps += IncMOps;
2136 
2137   // Update resource counts and critical resource.
2138   if (SchedModel->hasInstrSchedModel()) {
2139     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2140     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2141     Rem->RemIssueCount -= DecRemIssue;
2142     if (ZoneCritResIdx) {
2143       // Scale scheduled micro-ops for comparing with the critical resource.
2144       unsigned ScaledMOps =
2145         RetiredMOps * SchedModel->getMicroOpFactor();
2146 
2147       // If scaled micro-ops are now more than the previous critical resource by
2148       // a full cycle, then micro-ops issue becomes critical.
2149       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2150           >= (int)SchedModel->getLatencyFactor()) {
2151         ZoneCritResIdx = 0;
2152         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2153               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
2154       }
2155     }
2156     for (TargetSchedModel::ProcResIter
2157            PI = SchedModel->getWriteProcResBegin(SC),
2158            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2159       unsigned RCycle =
2160         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2161       if (RCycle > NextCycle)
2162         NextCycle = RCycle;
2163     }
2164     if (SU->hasReservedResource) {
2165       // For reserved resources, record the highest cycle using the resource.
2166       // For top-down scheduling, this is the cycle in which we schedule this
2167       // instruction plus the number of cycles the operations reserves the
2168       // resource. For bottom-up is it simply the instruction's cycle.
2169       for (TargetSchedModel::ProcResIter
2170              PI = SchedModel->getWriteProcResBegin(SC),
2171              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2172         unsigned PIdx = PI->ProcResourceIdx;
2173         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2174           if (isTop()) {
2175             ReservedCycles[PIdx] =
2176               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2177           }
2178           else
2179             ReservedCycles[PIdx] = NextCycle;
2180         }
2181       }
2182     }
2183   }
2184   // Update ExpectedLatency and DependentLatency.
2185   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2186   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2187   if (SU->getDepth() > TopLatency) {
2188     TopLatency = SU->getDepth();
2189     DEBUG(dbgs() << "  " << Available.getName()
2190           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
2191   }
2192   if (SU->getHeight() > BotLatency) {
2193     BotLatency = SU->getHeight();
2194     DEBUG(dbgs() << "  " << Available.getName()
2195           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
2196   }
2197   // If we stall for any reason, bump the cycle.
2198   if (NextCycle > CurrCycle) {
2199     bumpCycle(NextCycle);
2200   } else {
2201     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2202     // resource limited. If a stall occurred, bumpCycle does this.
2203     unsigned LFactor = SchedModel->getLatencyFactor();
2204     IsResourceLimited =
2205       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
2206       > (int)LFactor;
2207   }
2208   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2209   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2210   // one cycle.  Since we commonly reach the max MOps here, opportunistically
2211   // bump the cycle to avoid uselessly checking everything in the readyQ.
2212   CurrMOps += IncMOps;
2213   while (CurrMOps >= SchedModel->getIssueWidth()) {
2214     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
2215           << " at cycle " << CurrCycle << '\n');
2216     bumpCycle(++NextCycle);
2217   }
2218   DEBUG(dumpScheduledState());
2219 }
2220 
2221 /// Release pending ready nodes in to the available queue. This makes them
2222 /// visible to heuristics.
2223 void SchedBoundary::releasePending() {
2224   // If the available queue is empty, it is safe to reset MinReadyCycle.
2225   if (Available.empty())
2226     MinReadyCycle = std::numeric_limits<unsigned>::max();
2227 
2228   // Check to see if any of the pending instructions are ready to issue.  If
2229   // so, add them to the available queue.
2230   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2231   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2232     SUnit *SU = *(Pending.begin()+i);
2233     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2234 
2235     if (ReadyCycle < MinReadyCycle)
2236       MinReadyCycle = ReadyCycle;
2237 
2238     if (!IsBuffered && ReadyCycle > CurrCycle)
2239       continue;
2240 
2241     if (checkHazard(SU))
2242       continue;
2243 
2244     if (Available.size() >= ReadyListLimit)
2245       break;
2246 
2247     Available.push(SU);
2248     Pending.remove(Pending.begin()+i);
2249     --i; --e;
2250   }
2251   CheckPending = false;
2252 }
2253 
2254 /// Remove SU from the ready set for this boundary.
2255 void SchedBoundary::removeReady(SUnit *SU) {
2256   if (Available.isInQueue(SU))
2257     Available.remove(Available.find(SU));
2258   else {
2259     assert(Pending.isInQueue(SU) && "bad ready count");
2260     Pending.remove(Pending.find(SU));
2261   }
2262 }
2263 
2264 /// If this queue only has one ready candidate, return it. As a side effect,
2265 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2266 /// one node is ready. If multiple instructions are ready, return NULL.
2267 SUnit *SchedBoundary::pickOnlyChoice() {
2268   if (CheckPending)
2269     releasePending();
2270 
2271   if (CurrMOps > 0) {
2272     // Defer any ready instrs that now have a hazard.
2273     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2274       if (checkHazard(*I)) {
2275         Pending.push(*I);
2276         I = Available.remove(I);
2277         continue;
2278       }
2279       ++I;
2280     }
2281   }
2282   for (unsigned i = 0; Available.empty(); ++i) {
2283 //  FIXME: Re-enable assert once PR20057 is resolved.
2284 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2285 //           "permanent hazard");
2286     (void)i;
2287     bumpCycle(CurrCycle + 1);
2288     releasePending();
2289   }
2290 
2291   DEBUG(Pending.dump());
2292   DEBUG(Available.dump());
2293 
2294   if (Available.size() == 1)
2295     return *Available.begin();
2296   return nullptr;
2297 }
2298 
2299 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2300 // This is useful information to dump after bumpNode.
2301 // Note that the Queue contents are more useful before pickNodeFromQueue.
2302 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
2303   unsigned ResFactor;
2304   unsigned ResCount;
2305   if (ZoneCritResIdx) {
2306     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2307     ResCount = getResourceCount(ZoneCritResIdx);
2308   } else {
2309     ResFactor = SchedModel->getMicroOpFactor();
2310     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2311   }
2312   unsigned LFactor = SchedModel->getLatencyFactor();
2313   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2314          << "  Retired: " << RetiredMOps;
2315   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2316   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2317          << ResCount / ResFactor << " "
2318          << SchedModel->getResourceName(ZoneCritResIdx)
2319          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2320          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2321          << " limited.\n";
2322 }
2323 #endif
2324 
2325 //===----------------------------------------------------------------------===//
2326 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2327 //===----------------------------------------------------------------------===//
2328 
2329 void GenericSchedulerBase::SchedCandidate::
2330 initResourceDelta(const ScheduleDAGMI *DAG,
2331                   const TargetSchedModel *SchedModel) {
2332   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2333     return;
2334 
2335   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2336   for (TargetSchedModel::ProcResIter
2337          PI = SchedModel->getWriteProcResBegin(SC),
2338          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2339     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2340       ResDelta.CritResources += PI->Cycles;
2341     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2342       ResDelta.DemandedResources += PI->Cycles;
2343   }
2344 }
2345 
2346 /// Set the CandPolicy given a scheduling zone given the current resources and
2347 /// latencies inside and outside the zone.
2348 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2349                                      SchedBoundary &CurrZone,
2350                                      SchedBoundary *OtherZone) {
2351   // Apply preemptive heuristics based on the total latency and resources
2352   // inside and outside this zone. Potential stalls should be considered before
2353   // following this policy.
2354 
2355   // Compute remaining latency. We need this both to determine whether the
2356   // overall schedule has become latency-limited and whether the instructions
2357   // outside this zone are resource or latency limited.
2358   //
2359   // The "dependent" latency is updated incrementally during scheduling as the
2360   // max height/depth of scheduled nodes minus the cycles since it was
2361   // scheduled:
2362   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2363   //
2364   // The "independent" latency is the max ready queue depth:
2365   //   ILat = max N.depth for N in Available|Pending
2366   //
2367   // RemainingLatency is the greater of independent and dependent latency.
2368   unsigned RemLatency = CurrZone.getDependentLatency();
2369   RemLatency = std::max(RemLatency,
2370                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2371   RemLatency = std::max(RemLatency,
2372                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2373 
2374   // Compute the critical resource outside the zone.
2375   unsigned OtherCritIdx = 0;
2376   unsigned OtherCount =
2377     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2378 
2379   bool OtherResLimited = false;
2380   if (SchedModel->hasInstrSchedModel()) {
2381     unsigned LFactor = SchedModel->getLatencyFactor();
2382     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2383   }
2384   // Schedule aggressively for latency in PostRA mode. We don't check for
2385   // acyclic latency during PostRA, and highly out-of-order processors will
2386   // skip PostRA scheduling.
2387   if (!OtherResLimited) {
2388     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2389       Policy.ReduceLatency |= true;
2390       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2391             << " RemainingLatency " << RemLatency << " + "
2392             << CurrZone.getCurrCycle() << "c > CritPath "
2393             << Rem.CriticalPath << "\n");
2394     }
2395   }
2396   // If the same resource is limiting inside and outside the zone, do nothing.
2397   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2398     return;
2399 
2400   DEBUG(
2401     if (CurrZone.isResourceLimited()) {
2402       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2403              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2404              << "\n";
2405     }
2406     if (OtherResLimited)
2407       dbgs() << "  RemainingLimit: "
2408              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2409     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2410       dbgs() << "  Latency limited both directions.\n");
2411 
2412   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2413     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2414 
2415   if (OtherResLimited)
2416     Policy.DemandResIdx = OtherCritIdx;
2417 }
2418 
2419 #ifndef NDEBUG
2420 const char *GenericSchedulerBase::getReasonStr(
2421   GenericSchedulerBase::CandReason Reason) {
2422   switch (Reason) {
2423   case NoCand:         return "NOCAND    ";
2424   case Only1:          return "ONLY1     ";
2425   case PhysRegCopy:    return "PREG-COPY ";
2426   case RegExcess:      return "REG-EXCESS";
2427   case RegCritical:    return "REG-CRIT  ";
2428   case Stall:          return "STALL     ";
2429   case Cluster:        return "CLUSTER   ";
2430   case Weak:           return "WEAK      ";
2431   case RegMax:         return "REG-MAX   ";
2432   case ResourceReduce: return "RES-REDUCE";
2433   case ResourceDemand: return "RES-DEMAND";
2434   case TopDepthReduce: return "TOP-DEPTH ";
2435   case TopPathReduce:  return "TOP-PATH  ";
2436   case BotHeightReduce:return "BOT-HEIGHT";
2437   case BotPathReduce:  return "BOT-PATH  ";
2438   case NextDefUse:     return "DEF-USE   ";
2439   case NodeOrder:      return "ORDER     ";
2440   };
2441   llvm_unreachable("Unknown reason!");
2442 }
2443 
2444 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2445   PressureChange P;
2446   unsigned ResIdx = 0;
2447   unsigned Latency = 0;
2448   switch (Cand.Reason) {
2449   default:
2450     break;
2451   case RegExcess:
2452     P = Cand.RPDelta.Excess;
2453     break;
2454   case RegCritical:
2455     P = Cand.RPDelta.CriticalMax;
2456     break;
2457   case RegMax:
2458     P = Cand.RPDelta.CurrentMax;
2459     break;
2460   case ResourceReduce:
2461     ResIdx = Cand.Policy.ReduceResIdx;
2462     break;
2463   case ResourceDemand:
2464     ResIdx = Cand.Policy.DemandResIdx;
2465     break;
2466   case TopDepthReduce:
2467     Latency = Cand.SU->getDepth();
2468     break;
2469   case TopPathReduce:
2470     Latency = Cand.SU->getHeight();
2471     break;
2472   case BotHeightReduce:
2473     Latency = Cand.SU->getHeight();
2474     break;
2475   case BotPathReduce:
2476     Latency = Cand.SU->getDepth();
2477     break;
2478   }
2479   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2480   if (P.isValid())
2481     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2482            << ":" << P.getUnitInc() << " ";
2483   else
2484     dbgs() << "      ";
2485   if (ResIdx)
2486     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2487   else
2488     dbgs() << "         ";
2489   if (Latency)
2490     dbgs() << " " << Latency << " cycles ";
2491   else
2492     dbgs() << "          ";
2493   dbgs() << '\n';
2494 }
2495 #endif
2496 
2497 /// Return true if this heuristic determines order.
2498 static bool tryLess(int TryVal, int CandVal,
2499                     GenericSchedulerBase::SchedCandidate &TryCand,
2500                     GenericSchedulerBase::SchedCandidate &Cand,
2501                     GenericSchedulerBase::CandReason Reason) {
2502   if (TryVal < CandVal) {
2503     TryCand.Reason = Reason;
2504     return true;
2505   }
2506   if (TryVal > CandVal) {
2507     if (Cand.Reason > Reason)
2508       Cand.Reason = Reason;
2509     return true;
2510   }
2511   return false;
2512 }
2513 
2514 static bool tryGreater(int TryVal, int CandVal,
2515                        GenericSchedulerBase::SchedCandidate &TryCand,
2516                        GenericSchedulerBase::SchedCandidate &Cand,
2517                        GenericSchedulerBase::CandReason Reason) {
2518   if (TryVal > CandVal) {
2519     TryCand.Reason = Reason;
2520     return true;
2521   }
2522   if (TryVal < CandVal) {
2523     if (Cand.Reason > Reason)
2524       Cand.Reason = Reason;
2525     return true;
2526   }
2527   return false;
2528 }
2529 
2530 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2531                        GenericSchedulerBase::SchedCandidate &Cand,
2532                        SchedBoundary &Zone) {
2533   if (Zone.isTop()) {
2534     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2535       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2536                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2537         return true;
2538     }
2539     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2540                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2541       return true;
2542   } else {
2543     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2544       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2545                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2546         return true;
2547     }
2548     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2549                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2550       return true;
2551   }
2552   return false;
2553 }
2554 
2555 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2556   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2557         << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2558 }
2559 
2560 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2561   tracePick(Cand.Reason, Cand.AtTop);
2562 }
2563 
2564 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2565   assert(dag->hasVRegLiveness() &&
2566          "(PreRA)GenericScheduler needs vreg liveness");
2567   DAG = static_cast<ScheduleDAGMILive*>(dag);
2568   SchedModel = DAG->getSchedModel();
2569   TRI = DAG->TRI;
2570 
2571   Rem.init(DAG, SchedModel);
2572   Top.init(DAG, SchedModel, &Rem);
2573   Bot.init(DAG, SchedModel, &Rem);
2574 
2575   // Initialize resource counts.
2576 
2577   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2578   // are disabled, then these HazardRecs will be disabled.
2579   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2580   if (!Top.HazardRec) {
2581     Top.HazardRec =
2582         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2583             Itin, DAG);
2584   }
2585   if (!Bot.HazardRec) {
2586     Bot.HazardRec =
2587         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2588             Itin, DAG);
2589   }
2590   TopCand.SU = nullptr;
2591   BotCand.SU = nullptr;
2592 }
2593 
2594 /// Initialize the per-region scheduling policy.
2595 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2596                                   MachineBasicBlock::iterator End,
2597                                   unsigned NumRegionInstrs) {
2598   const MachineFunction &MF = *Begin->getParent()->getParent();
2599   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2600 
2601   // Avoid setting up the register pressure tracker for small regions to save
2602   // compile time. As a rough heuristic, only track pressure when the number of
2603   // schedulable instructions exceeds half the integer register file.
2604   RegionPolicy.ShouldTrackPressure = true;
2605   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2606     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2607     if (TLI->isTypeLegal(LegalIntVT)) {
2608       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2609         TLI->getRegClassFor(LegalIntVT));
2610       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2611     }
2612   }
2613 
2614   // For generic targets, we default to bottom-up, because it's simpler and more
2615   // compile-time optimizations have been implemented in that direction.
2616   RegionPolicy.OnlyBottomUp = true;
2617 
2618   // Allow the subtarget to override default policy.
2619   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2620 
2621   // After subtarget overrides, apply command line options.
2622   if (!EnableRegPressure)
2623     RegionPolicy.ShouldTrackPressure = false;
2624 
2625   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2626   // e.g. -misched-bottomup=false allows scheduling in both directions.
2627   assert((!ForceTopDown || !ForceBottomUp) &&
2628          "-misched-topdown incompatible with -misched-bottomup");
2629   if (ForceBottomUp.getNumOccurrences() > 0) {
2630     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2631     if (RegionPolicy.OnlyBottomUp)
2632       RegionPolicy.OnlyTopDown = false;
2633   }
2634   if (ForceTopDown.getNumOccurrences() > 0) {
2635     RegionPolicy.OnlyTopDown = ForceTopDown;
2636     if (RegionPolicy.OnlyTopDown)
2637       RegionPolicy.OnlyBottomUp = false;
2638   }
2639 }
2640 
2641 void GenericScheduler::dumpPolicy() {
2642   // Cannot completely remove virtual function even in release mode.
2643 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2644   dbgs() << "GenericScheduler RegionPolicy: "
2645          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2646          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2647          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2648          << "\n";
2649 #endif
2650 }
2651 
2652 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2653 /// critical path by more cycles than it takes to drain the instruction buffer.
2654 /// We estimate an upper bounds on in-flight instructions as:
2655 ///
2656 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2657 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2658 /// InFlightResources = InFlightIterations * LoopResources
2659 ///
2660 /// TODO: Check execution resources in addition to IssueCount.
2661 void GenericScheduler::checkAcyclicLatency() {
2662   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2663     return;
2664 
2665   // Scaled number of cycles per loop iteration.
2666   unsigned IterCount =
2667     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2668              Rem.RemIssueCount);
2669   // Scaled acyclic critical path.
2670   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2671   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2672   unsigned InFlightCount =
2673     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2674   unsigned BufferLimit =
2675     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2676 
2677   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2678 
2679   DEBUG(dbgs() << "IssueCycles="
2680         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2681         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2682         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2683         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2684         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2685         if (Rem.IsAcyclicLatencyLimited)
2686           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2687 }
2688 
2689 void GenericScheduler::registerRoots() {
2690   Rem.CriticalPath = DAG->ExitSU.getDepth();
2691 
2692   // Some roots may not feed into ExitSU. Check all of them in case.
2693   for (std::vector<SUnit*>::const_iterator
2694          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2695     if ((*I)->getDepth() > Rem.CriticalPath)
2696       Rem.CriticalPath = (*I)->getDepth();
2697   }
2698   DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2699   if (DumpCriticalPathLength) {
2700     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2701   }
2702 
2703   if (EnableCyclicPath) {
2704     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2705     checkAcyclicLatency();
2706   }
2707 }
2708 
2709 static bool tryPressure(const PressureChange &TryP,
2710                         const PressureChange &CandP,
2711                         GenericSchedulerBase::SchedCandidate &TryCand,
2712                         GenericSchedulerBase::SchedCandidate &Cand,
2713                         GenericSchedulerBase::CandReason Reason,
2714                         const TargetRegisterInfo *TRI,
2715                         const MachineFunction &MF) {
2716   // If one candidate decreases and the other increases, go with it.
2717   // Invalid candidates have UnitInc==0.
2718   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2719                  Reason)) {
2720     return true;
2721   }
2722   // Do not compare the magnitude of pressure changes between top and bottom
2723   // boundary.
2724   if (Cand.AtTop != TryCand.AtTop)
2725     return false;
2726 
2727   // If both candidates affect the same set in the same boundary, go with the
2728   // smallest increase.
2729   unsigned TryPSet = TryP.getPSetOrMax();
2730   unsigned CandPSet = CandP.getPSetOrMax();
2731   if (TryPSet == CandPSet) {
2732     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2733                    Reason);
2734   }
2735 
2736   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2737                                  std::numeric_limits<int>::max();
2738 
2739   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2740                                    std::numeric_limits<int>::max();
2741 
2742   // If the candidates are decreasing pressure, reverse priority.
2743   if (TryP.getUnitInc() < 0)
2744     std::swap(TryRank, CandRank);
2745   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2746 }
2747 
2748 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2749   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2750 }
2751 
2752 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2753 /// their physreg def/use.
2754 ///
2755 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2756 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2757 /// with the operation that produces or consumes the physreg. We'll do this when
2758 /// regalloc has support for parallel copies.
2759 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2760   const MachineInstr *MI = SU->getInstr();
2761   if (!MI->isCopy())
2762     return 0;
2763 
2764   unsigned ScheduledOper = isTop ? 1 : 0;
2765   unsigned UnscheduledOper = isTop ? 0 : 1;
2766   // If we have already scheduled the physreg produce/consumer, immediately
2767   // schedule the copy.
2768   if (TargetRegisterInfo::isPhysicalRegister(
2769         MI->getOperand(ScheduledOper).getReg()))
2770     return 1;
2771   // If the physreg is at the boundary, defer it. Otherwise schedule it
2772   // immediately to free the dependent. We can hoist the copy later.
2773   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2774   if (TargetRegisterInfo::isPhysicalRegister(
2775         MI->getOperand(UnscheduledOper).getReg()))
2776     return AtBoundary ? -1 : 1;
2777   return 0;
2778 }
2779 
2780 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2781                                      bool AtTop,
2782                                      const RegPressureTracker &RPTracker,
2783                                      RegPressureTracker &TempTracker) {
2784   Cand.SU = SU;
2785   Cand.AtTop = AtTop;
2786   if (DAG->isTrackingPressure()) {
2787     if (AtTop) {
2788       TempTracker.getMaxDownwardPressureDelta(
2789         Cand.SU->getInstr(),
2790         Cand.RPDelta,
2791         DAG->getRegionCriticalPSets(),
2792         DAG->getRegPressure().MaxSetPressure);
2793     } else {
2794       if (VerifyScheduling) {
2795         TempTracker.getMaxUpwardPressureDelta(
2796           Cand.SU->getInstr(),
2797           &DAG->getPressureDiff(Cand.SU),
2798           Cand.RPDelta,
2799           DAG->getRegionCriticalPSets(),
2800           DAG->getRegPressure().MaxSetPressure);
2801       } else {
2802         RPTracker.getUpwardPressureDelta(
2803           Cand.SU->getInstr(),
2804           DAG->getPressureDiff(Cand.SU),
2805           Cand.RPDelta,
2806           DAG->getRegionCriticalPSets(),
2807           DAG->getRegPressure().MaxSetPressure);
2808       }
2809     }
2810   }
2811   DEBUG(if (Cand.RPDelta.Excess.isValid())
2812           dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") "
2813                  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
2814                  << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
2815 }
2816 
2817 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2818 /// hierarchical. This may be more efficient than a graduated cost model because
2819 /// we don't need to evaluate all aspects of the model for each node in the
2820 /// queue. But it's really done to make the heuristics easier to debug and
2821 /// statistically analyze.
2822 ///
2823 /// \param Cand provides the policy and current best candidate.
2824 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2825 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2826 //              if Cand is from a different zone than TryCand.
2827 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2828                                     SchedCandidate &TryCand,
2829                                     SchedBoundary *Zone) {
2830   // Initialize the candidate if needed.
2831   if (!Cand.isValid()) {
2832     TryCand.Reason = NodeOrder;
2833     return;
2834   }
2835 
2836   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2837                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
2838                  TryCand, Cand, PhysRegCopy))
2839     return;
2840 
2841   // Avoid exceeding the target's limit.
2842   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2843                                                Cand.RPDelta.Excess,
2844                                                TryCand, Cand, RegExcess, TRI,
2845                                                DAG->MF))
2846     return;
2847 
2848   // Avoid increasing the max critical pressure in the scheduled region.
2849   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2850                                                Cand.RPDelta.CriticalMax,
2851                                                TryCand, Cand, RegCritical, TRI,
2852                                                DAG->MF))
2853     return;
2854 
2855   // We only compare a subset of features when comparing nodes between
2856   // Top and Bottom boundary. Some properties are simply incomparable, in many
2857   // other instances we should only override the other boundary if something
2858   // is a clear good pick on one boundary. Skip heuristics that are more
2859   // "tie-breaking" in nature.
2860   bool SameBoundary = Zone != nullptr;
2861   if (SameBoundary) {
2862     // For loops that are acyclic path limited, aggressively schedule for
2863     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2864     // heuristics to take precedence.
2865     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2866         tryLatency(TryCand, Cand, *Zone))
2867       return;
2868 
2869     // Prioritize instructions that read unbuffered resources by stall cycles.
2870     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2871                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2872       return;
2873   }
2874 
2875   // Keep clustered nodes together to encourage downstream peephole
2876   // optimizations which may reduce resource requirements.
2877   //
2878   // This is a best effort to set things up for a post-RA pass. Optimizations
2879   // like generating loads of multiple registers should ideally be done within
2880   // the scheduler pass by combining the loads during DAG postprocessing.
2881   const SUnit *CandNextClusterSU =
2882     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2883   const SUnit *TryCandNextClusterSU =
2884     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2885   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2886                  Cand.SU == CandNextClusterSU,
2887                  TryCand, Cand, Cluster))
2888     return;
2889 
2890   if (SameBoundary) {
2891     // Weak edges are for clustering and other constraints.
2892     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2893                 getWeakLeft(Cand.SU, Cand.AtTop),
2894                 TryCand, Cand, Weak))
2895       return;
2896   }
2897 
2898   // Avoid increasing the max pressure of the entire region.
2899   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2900                                                Cand.RPDelta.CurrentMax,
2901                                                TryCand, Cand, RegMax, TRI,
2902                                                DAG->MF))
2903     return;
2904 
2905   if (SameBoundary) {
2906     // Avoid critical resource consumption and balance the schedule.
2907     TryCand.initResourceDelta(DAG, SchedModel);
2908     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2909                 TryCand, Cand, ResourceReduce))
2910       return;
2911     if (tryGreater(TryCand.ResDelta.DemandedResources,
2912                    Cand.ResDelta.DemandedResources,
2913                    TryCand, Cand, ResourceDemand))
2914       return;
2915 
2916     // Avoid serializing long latency dependence chains.
2917     // For acyclic path limited loops, latency was already checked above.
2918     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
2919         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
2920       return;
2921 
2922     // Fall through to original instruction order.
2923     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2924         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2925       TryCand.Reason = NodeOrder;
2926     }
2927   }
2928 }
2929 
2930 /// Pick the best candidate from the queue.
2931 ///
2932 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2933 /// DAG building. To adjust for the current scheduling location we need to
2934 /// maintain the number of vreg uses remaining to be top-scheduled.
2935 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2936                                          const CandPolicy &ZonePolicy,
2937                                          const RegPressureTracker &RPTracker,
2938                                          SchedCandidate &Cand) {
2939   // getMaxPressureDelta temporarily modifies the tracker.
2940   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2941 
2942   ReadyQueue &Q = Zone.Available;
2943   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2944 
2945     SchedCandidate TryCand(ZonePolicy);
2946     initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
2947     // Pass SchedBoundary only when comparing nodes from the same boundary.
2948     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
2949     tryCandidate(Cand, TryCand, ZoneArg);
2950     if (TryCand.Reason != NoCand) {
2951       // Initialize resource delta if needed in case future heuristics query it.
2952       if (TryCand.ResDelta == SchedResourceDelta())
2953         TryCand.initResourceDelta(DAG, SchedModel);
2954       Cand.setBest(TryCand);
2955       DEBUG(traceCandidate(Cand));
2956     }
2957   }
2958 }
2959 
2960 /// Pick the best candidate node from either the top or bottom queue.
2961 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2962   // Schedule as far as possible in the direction of no choice. This is most
2963   // efficient, but also provides the best heuristics for CriticalPSets.
2964   if (SUnit *SU = Bot.pickOnlyChoice()) {
2965     IsTopNode = false;
2966     tracePick(Only1, false);
2967     return SU;
2968   }
2969   if (SUnit *SU = Top.pickOnlyChoice()) {
2970     IsTopNode = true;
2971     tracePick(Only1, true);
2972     return SU;
2973   }
2974   // Set the bottom-up policy based on the state of the current bottom zone and
2975   // the instructions outside the zone, including the top zone.
2976   CandPolicy BotPolicy;
2977   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
2978   // Set the top-down policy based on the state of the current top zone and
2979   // the instructions outside the zone, including the bottom zone.
2980   CandPolicy TopPolicy;
2981   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
2982 
2983   // See if BotCand is still valid (because we previously scheduled from Top).
2984   DEBUG(dbgs() << "Picking from Bot:\n");
2985   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
2986       BotCand.Policy != BotPolicy) {
2987     BotCand.reset(CandPolicy());
2988     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
2989     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2990   } else {
2991     DEBUG(traceCandidate(BotCand));
2992 #ifndef NDEBUG
2993     if (VerifyScheduling) {
2994       SchedCandidate TCand;
2995       TCand.reset(CandPolicy());
2996       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
2997       assert(TCand.SU == BotCand.SU &&
2998              "Last pick result should correspond to re-picking right now");
2999     }
3000 #endif
3001   }
3002 
3003   // Check if the top Q has a better candidate.
3004   DEBUG(dbgs() << "Picking from Top:\n");
3005   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3006       TopCand.Policy != TopPolicy) {
3007     TopCand.reset(CandPolicy());
3008     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3009     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3010   } else {
3011     DEBUG(traceCandidate(TopCand));
3012 #ifndef NDEBUG
3013     if (VerifyScheduling) {
3014       SchedCandidate TCand;
3015       TCand.reset(CandPolicy());
3016       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3017       assert(TCand.SU == TopCand.SU &&
3018            "Last pick result should correspond to re-picking right now");
3019     }
3020 #endif
3021   }
3022 
3023   // Pick best from BotCand and TopCand.
3024   assert(BotCand.isValid());
3025   assert(TopCand.isValid());
3026   SchedCandidate Cand = BotCand;
3027   TopCand.Reason = NoCand;
3028   tryCandidate(Cand, TopCand, nullptr);
3029   if (TopCand.Reason != NoCand) {
3030     Cand.setBest(TopCand);
3031     DEBUG(traceCandidate(Cand));
3032   }
3033 
3034   IsTopNode = Cand.AtTop;
3035   tracePick(Cand);
3036   return Cand.SU;
3037 }
3038 
3039 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3040 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3041   if (DAG->top() == DAG->bottom()) {
3042     assert(Top.Available.empty() && Top.Pending.empty() &&
3043            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3044     return nullptr;
3045   }
3046   SUnit *SU;
3047   do {
3048     if (RegionPolicy.OnlyTopDown) {
3049       SU = Top.pickOnlyChoice();
3050       if (!SU) {
3051         CandPolicy NoPolicy;
3052         TopCand.reset(NoPolicy);
3053         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3054         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3055         tracePick(TopCand);
3056         SU = TopCand.SU;
3057       }
3058       IsTopNode = true;
3059     } else if (RegionPolicy.OnlyBottomUp) {
3060       SU = Bot.pickOnlyChoice();
3061       if (!SU) {
3062         CandPolicy NoPolicy;
3063         BotCand.reset(NoPolicy);
3064         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3065         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3066         tracePick(BotCand);
3067         SU = BotCand.SU;
3068       }
3069       IsTopNode = false;
3070     } else {
3071       SU = pickNodeBidirectional(IsTopNode);
3072     }
3073   } while (SU->isScheduled);
3074 
3075   if (SU->isTopReady())
3076     Top.removeReady(SU);
3077   if (SU->isBottomReady())
3078     Bot.removeReady(SU);
3079 
3080   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3081   return SU;
3082 }
3083 
3084 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
3085   MachineBasicBlock::iterator InsertPos = SU->getInstr();
3086   if (!isTop)
3087     ++InsertPos;
3088   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3089 
3090   // Find already scheduled copies with a single physreg dependence and move
3091   // them just above the scheduled instruction.
3092   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
3093        I != E; ++I) {
3094     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
3095       continue;
3096     SUnit *DepSU = I->getSUnit();
3097     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3098       continue;
3099     MachineInstr *Copy = DepSU->getInstr();
3100     if (!Copy->isCopy())
3101       continue;
3102     DEBUG(dbgs() << "  Rescheduling physreg copy ";
3103           I->getSUnit()->dump(DAG));
3104     DAG->moveInstruction(Copy, InsertPos);
3105   }
3106 }
3107 
3108 /// Update the scheduler's state after scheduling a node. This is the same node
3109 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3110 /// update it's state based on the current cycle before MachineSchedStrategy
3111 /// does.
3112 ///
3113 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3114 /// them here. See comments in biasPhysRegCopy.
3115 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3116   if (IsTopNode) {
3117     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3118     Top.bumpNode(SU);
3119     if (SU->hasPhysRegUses)
3120       reschedulePhysRegCopies(SU, true);
3121   } else {
3122     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3123     Bot.bumpNode(SU);
3124     if (SU->hasPhysRegDefs)
3125       reschedulePhysRegCopies(SU, false);
3126   }
3127 }
3128 
3129 /// Create the standard converging machine scheduler. This will be used as the
3130 /// default scheduler if the target does not set a default.
3131 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3132   ScheduleDAGMILive *DAG =
3133       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3134   // Register DAG post-processors.
3135   //
3136   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3137   // data and pass it to later mutations. Have a single mutation that gathers
3138   // the interesting nodes in one pass.
3139   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3140   return DAG;
3141 }
3142 
3143 static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3144   return createGenericSchedLive(C);
3145 }
3146 
3147 static MachineSchedRegistry
3148 GenericSchedRegistry("converge", "Standard converging scheduler.",
3149                      createConveringSched);
3150 
3151 //===----------------------------------------------------------------------===//
3152 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3153 //===----------------------------------------------------------------------===//
3154 
3155 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3156   DAG = Dag;
3157   SchedModel = DAG->getSchedModel();
3158   TRI = DAG->TRI;
3159 
3160   Rem.init(DAG, SchedModel);
3161   Top.init(DAG, SchedModel, &Rem);
3162   BotRoots.clear();
3163 
3164   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3165   // or are disabled, then these HazardRecs will be disabled.
3166   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3167   if (!Top.HazardRec) {
3168     Top.HazardRec =
3169         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3170             Itin, DAG);
3171   }
3172 }
3173 
3174 void PostGenericScheduler::registerRoots() {
3175   Rem.CriticalPath = DAG->ExitSU.getDepth();
3176 
3177   // Some roots may not feed into ExitSU. Check all of them in case.
3178   for (SmallVectorImpl<SUnit*>::const_iterator
3179          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3180     if ((*I)->getDepth() > Rem.CriticalPath)
3181       Rem.CriticalPath = (*I)->getDepth();
3182   }
3183   DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3184   if (DumpCriticalPathLength) {
3185     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3186   }
3187 }
3188 
3189 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3190 ///
3191 /// \param Cand provides the policy and current best candidate.
3192 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3193 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3194                                         SchedCandidate &TryCand) {
3195 
3196   // Initialize the candidate if needed.
3197   if (!Cand.isValid()) {
3198     TryCand.Reason = NodeOrder;
3199     return;
3200   }
3201 
3202   // Prioritize instructions that read unbuffered resources by stall cycles.
3203   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3204               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3205     return;
3206 
3207   // Avoid critical resource consumption and balance the schedule.
3208   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3209               TryCand, Cand, ResourceReduce))
3210     return;
3211   if (tryGreater(TryCand.ResDelta.DemandedResources,
3212                  Cand.ResDelta.DemandedResources,
3213                  TryCand, Cand, ResourceDemand))
3214     return;
3215 
3216   // Avoid serializing long latency dependence chains.
3217   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3218     return;
3219   }
3220 
3221   // Fall through to original instruction order.
3222   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3223     TryCand.Reason = NodeOrder;
3224 }
3225 
3226 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3227   ReadyQueue &Q = Top.Available;
3228   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3229     SchedCandidate TryCand(Cand.Policy);
3230     TryCand.SU = *I;
3231     TryCand.AtTop = true;
3232     TryCand.initResourceDelta(DAG, SchedModel);
3233     tryCandidate(Cand, TryCand);
3234     if (TryCand.Reason != NoCand) {
3235       Cand.setBest(TryCand);
3236       DEBUG(traceCandidate(Cand));
3237     }
3238   }
3239 }
3240 
3241 /// Pick the next node to schedule.
3242 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3243   if (DAG->top() == DAG->bottom()) {
3244     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3245     return nullptr;
3246   }
3247   SUnit *SU;
3248   do {
3249     SU = Top.pickOnlyChoice();
3250     if (SU) {
3251       tracePick(Only1, true);
3252     } else {
3253       CandPolicy NoPolicy;
3254       SchedCandidate TopCand(NoPolicy);
3255       // Set the top-down policy based on the state of the current top zone and
3256       // the instructions outside the zone, including the bottom zone.
3257       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3258       pickNodeFromQueue(TopCand);
3259       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3260       tracePick(TopCand);
3261       SU = TopCand.SU;
3262     }
3263   } while (SU->isScheduled);
3264 
3265   IsTopNode = true;
3266   Top.removeReady(SU);
3267 
3268   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3269   return SU;
3270 }
3271 
3272 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3273 /// scheduled/remaining flags in the DAG nodes.
3274 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3275   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3276   Top.bumpNode(SU);
3277 }
3278 
3279 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3280   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3281                            /*RemoveKillFlags=*/true);
3282 }
3283 
3284 //===----------------------------------------------------------------------===//
3285 // ILP Scheduler. Currently for experimental analysis of heuristics.
3286 //===----------------------------------------------------------------------===//
3287 
3288 namespace {
3289 
3290 /// \brief Order nodes by the ILP metric.
3291 struct ILPOrder {
3292   const SchedDFSResult *DFSResult = nullptr;
3293   const BitVector *ScheduledTrees = nullptr;
3294   bool MaximizeILP;
3295 
3296   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3297 
3298   /// \brief Apply a less-than relation on node priority.
3299   ///
3300   /// (Return true if A comes after B in the Q.)
3301   bool operator()(const SUnit *A, const SUnit *B) const {
3302     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3303     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3304     if (SchedTreeA != SchedTreeB) {
3305       // Unscheduled trees have lower priority.
3306       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3307         return ScheduledTrees->test(SchedTreeB);
3308 
3309       // Trees with shallower connections have have lower priority.
3310       if (DFSResult->getSubtreeLevel(SchedTreeA)
3311           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3312         return DFSResult->getSubtreeLevel(SchedTreeA)
3313           < DFSResult->getSubtreeLevel(SchedTreeB);
3314       }
3315     }
3316     if (MaximizeILP)
3317       return DFSResult->getILP(A) < DFSResult->getILP(B);
3318     else
3319       return DFSResult->getILP(A) > DFSResult->getILP(B);
3320   }
3321 };
3322 
3323 /// \brief Schedule based on the ILP metric.
3324 class ILPScheduler : public MachineSchedStrategy {
3325   ScheduleDAGMILive *DAG = nullptr;
3326   ILPOrder Cmp;
3327 
3328   std::vector<SUnit*> ReadyQ;
3329 
3330 public:
3331   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3332 
3333   void initialize(ScheduleDAGMI *dag) override {
3334     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3335     DAG = static_cast<ScheduleDAGMILive*>(dag);
3336     DAG->computeDFSResult();
3337     Cmp.DFSResult = DAG->getDFSResult();
3338     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3339     ReadyQ.clear();
3340   }
3341 
3342   void registerRoots() override {
3343     // Restore the heap in ReadyQ with the updated DFS results.
3344     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3345   }
3346 
3347   /// Implement MachineSchedStrategy interface.
3348   /// -----------------------------------------
3349 
3350   /// Callback to select the highest priority node from the ready Q.
3351   SUnit *pickNode(bool &IsTopNode) override {
3352     if (ReadyQ.empty()) return nullptr;
3353     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3354     SUnit *SU = ReadyQ.back();
3355     ReadyQ.pop_back();
3356     IsTopNode = false;
3357     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3358           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3359           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3360           << DAG->getDFSResult()->getSubtreeLevel(
3361             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3362           << "Scheduling " << *SU->getInstr());
3363     return SU;
3364   }
3365 
3366   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3367   void scheduleTree(unsigned SubtreeID) override {
3368     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3369   }
3370 
3371   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3372   /// DFSResults, and resort the priority Q.
3373   void schedNode(SUnit *SU, bool IsTopNode) override {
3374     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3375   }
3376 
3377   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3378 
3379   void releaseBottomNode(SUnit *SU) override {
3380     ReadyQ.push_back(SU);
3381     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3382   }
3383 };
3384 
3385 } // end anonymous namespace
3386 
3387 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3388   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3389 }
3390 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3391   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3392 }
3393 
3394 static MachineSchedRegistry ILPMaxRegistry(
3395   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3396 static MachineSchedRegistry ILPMinRegistry(
3397   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3398 
3399 //===----------------------------------------------------------------------===//
3400 // Machine Instruction Shuffler for Correctness Testing
3401 //===----------------------------------------------------------------------===//
3402 
3403 #ifndef NDEBUG
3404 namespace {
3405 
3406 /// Apply a less-than relation on the node order, which corresponds to the
3407 /// instruction order prior to scheduling. IsReverse implements greater-than.
3408 template<bool IsReverse>
3409 struct SUnitOrder {
3410   bool operator()(SUnit *A, SUnit *B) const {
3411     if (IsReverse)
3412       return A->NodeNum > B->NodeNum;
3413     else
3414       return A->NodeNum < B->NodeNum;
3415   }
3416 };
3417 
3418 /// Reorder instructions as much as possible.
3419 class InstructionShuffler : public MachineSchedStrategy {
3420   bool IsAlternating;
3421   bool IsTopDown;
3422 
3423   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3424   // gives nodes with a higher number higher priority causing the latest
3425   // instructions to be scheduled first.
3426   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3427     TopQ;
3428   // When scheduling bottom-up, use greater-than as the queue priority.
3429   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3430     BottomQ;
3431 
3432 public:
3433   InstructionShuffler(bool alternate, bool topdown)
3434     : IsAlternating(alternate), IsTopDown(topdown) {}
3435 
3436   void initialize(ScheduleDAGMI*) override {
3437     TopQ.clear();
3438     BottomQ.clear();
3439   }
3440 
3441   /// Implement MachineSchedStrategy interface.
3442   /// -----------------------------------------
3443 
3444   SUnit *pickNode(bool &IsTopNode) override {
3445     SUnit *SU;
3446     if (IsTopDown) {
3447       do {
3448         if (TopQ.empty()) return nullptr;
3449         SU = TopQ.top();
3450         TopQ.pop();
3451       } while (SU->isScheduled);
3452       IsTopNode = true;
3453     } else {
3454       do {
3455         if (BottomQ.empty()) return nullptr;
3456         SU = BottomQ.top();
3457         BottomQ.pop();
3458       } while (SU->isScheduled);
3459       IsTopNode = false;
3460     }
3461     if (IsAlternating)
3462       IsTopDown = !IsTopDown;
3463     return SU;
3464   }
3465 
3466   void schedNode(SUnit *SU, bool IsTopNode) override {}
3467 
3468   void releaseTopNode(SUnit *SU) override {
3469     TopQ.push(SU);
3470   }
3471   void releaseBottomNode(SUnit *SU) override {
3472     BottomQ.push(SU);
3473   }
3474 };
3475 
3476 } // end anonymous namespace
3477 
3478 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3479   bool Alternate = !ForceTopDown && !ForceBottomUp;
3480   bool TopDown = !ForceBottomUp;
3481   assert((TopDown || !ForceTopDown) &&
3482          "-misched-topdown incompatible with -misched-bottomup");
3483   return new ScheduleDAGMILive(
3484       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3485 }
3486 
3487 static MachineSchedRegistry ShufflerRegistry(
3488   "shuffle", "Shuffle machine instructions alternating directions",
3489   createInstructionShuffler);
3490 #endif // !NDEBUG
3491 
3492 //===----------------------------------------------------------------------===//
3493 // GraphWriter support for ScheduleDAGMILive.
3494 //===----------------------------------------------------------------------===//
3495 
3496 #ifndef NDEBUG
3497 namespace llvm {
3498 
3499 template<> struct GraphTraits<
3500   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3501 
3502 template<>
3503 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3504   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3505 
3506   static std::string getGraphName(const ScheduleDAG *G) {
3507     return G->MF.getName();
3508   }
3509 
3510   static bool renderGraphFromBottomUp() {
3511     return true;
3512   }
3513 
3514   static bool isNodeHidden(const SUnit *Node) {
3515     if (ViewMISchedCutoff == 0)
3516       return false;
3517     return (Node->Preds.size() > ViewMISchedCutoff
3518          || Node->Succs.size() > ViewMISchedCutoff);
3519   }
3520 
3521   /// If you want to override the dot attributes printed for a particular
3522   /// edge, override this method.
3523   static std::string getEdgeAttributes(const SUnit *Node,
3524                                        SUnitIterator EI,
3525                                        const ScheduleDAG *Graph) {
3526     if (EI.isArtificialDep())
3527       return "color=cyan,style=dashed";
3528     if (EI.isCtrlDep())
3529       return "color=blue,style=dashed";
3530     return "";
3531   }
3532 
3533   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3534     std::string Str;
3535     raw_string_ostream SS(Str);
3536     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3537     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3538       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3539     SS << "SU:" << SU->NodeNum;
3540     if (DFS)
3541       SS << " I:" << DFS->getNumInstrs(SU);
3542     return SS.str();
3543   }
3544   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3545     return G->getGraphNodeLabel(SU);
3546   }
3547 
3548   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3549     std::string Str("shape=Mrecord");
3550     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3551     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3552       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3553     if (DFS) {
3554       Str += ",style=filled,fillcolor=\"#";
3555       Str += DOT::getColorString(DFS->getSubtreeID(N));
3556       Str += '"';
3557     }
3558     return Str;
3559   }
3560 };
3561 
3562 } // end namespace llvm
3563 #endif // NDEBUG
3564 
3565 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3566 /// rendered using 'dot'.
3567 ///
3568 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3569 #ifndef NDEBUG
3570   ViewGraph(this, Name, false, Title);
3571 #else
3572   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3573          << "systems with Graphviz or gv!\n";
3574 #endif  // NDEBUG
3575 }
3576 
3577 /// Out-of-line implementation with no arguments is handy for gdb.
3578 void ScheduleDAGMI::viewGraph() {
3579   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3580 }
3581