xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision 44be5a7fdc20a7f90d63dc18699a470e900bd3ba)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/MachineScheduler.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/LiveInterval.h"
25 #include "llvm/CodeGen/LiveIntervals.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineLoopInfo.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachinePassRegistry.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/RegisterClassInfo.h"
36 #include "llvm/CodeGen/RegisterPressure.h"
37 #include "llvm/CodeGen/ScheduleDAG.h"
38 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
39 #include "llvm/CodeGen/ScheduleDAGMutation.h"
40 #include "llvm/CodeGen/ScheduleDFS.h"
41 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42 #include "llvm/CodeGen/SlotIndexes.h"
43 #include "llvm/CodeGen/TargetFrameLowering.h"
44 #include "llvm/CodeGen/TargetInstrInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetPassConfig.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSchedule.h"
49 #include "llvm/CodeGen/TargetSubtargetInfo.h"
50 #include "llvm/CodeGenTypes/MachineValueType.h"
51 #include "llvm/Config/llvm-config.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/MC/LaneBitmask.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/CommandLine.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/ErrorHandling.h"
59 #include "llvm/Support/GraphWriter.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <iterator>
65 #include <limits>
66 #include <memory>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "machine-scheduler"
75 
76 STATISTIC(NumClustered, "Number of load/store pairs clustered");
77 
78 namespace llvm {
79 
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81                            cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83                             cl::desc("Force bottom-up list scheduling"));
84 namespace MISchedPostRASched {
85 enum Direction {
86   TopDown,
87   BottomUp,
88 };
89 } // end namespace MISchedPostRASched
90 cl::opt<MISchedPostRASched::Direction> PostRADirection(
91     "misched-postra-direction", cl::Hidden,
92     cl::desc("Post reg-alloc list scheduling direction"),
93     // Default to top-down because it was implemented first and existing targets
94     // expect that behavior by default.
95     cl::init(MISchedPostRASched::TopDown),
96     cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown",
97                           "Force top-down post reg-alloc list scheduling"),
98                clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
99                           "Force bottom-up post reg-alloc list scheduling")));
100 cl::opt<bool>
101 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
102                        cl::desc("Print critical path length to stdout"));
103 
104 cl::opt<bool> VerifyScheduling(
105     "verify-misched", cl::Hidden,
106     cl::desc("Verify machine instrs before and after machine scheduling"));
107 
108 #ifndef NDEBUG
109 cl::opt<bool> ViewMISchedDAGs(
110     "view-misched-dags", cl::Hidden,
111     cl::desc("Pop up a window to show MISched dags after they are processed"));
112 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
113                         cl::desc("Print schedule DAGs"));
114 cl::opt<bool> MISchedDumpReservedCycles(
115     "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
116     cl::desc("Dump resource usage at schedule boundary."));
117 cl::opt<bool> MischedDetailResourceBooking(
118     "misched-detail-resource-booking", cl::Hidden, cl::init(false),
119     cl::desc("Show details of invoking getNextResoufceCycle."));
120 #else
121 const bool ViewMISchedDAGs = false;
122 const bool PrintDAGs = false;
123 const bool MischedDetailResourceBooking = false;
124 #ifdef LLVM_ENABLE_DUMP
125 const bool MISchedDumpReservedCycles = false;
126 #endif // LLVM_ENABLE_DUMP
127 #endif // NDEBUG
128 
129 } // end namespace llvm
130 
131 #ifndef NDEBUG
132 /// In some situations a few uninteresting nodes depend on nearly all other
133 /// nodes in the graph, provide a cutoff to hide them.
134 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
135   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
136 
137 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
138   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
139 
140 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
141   cl::desc("Only schedule this function"));
142 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
143                                         cl::desc("Only schedule this MBB#"));
144 #endif // NDEBUG
145 
146 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
147 /// size of the ready lists.
148 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
149   cl::desc("Limit ready list to N instructions"), cl::init(256));
150 
151 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
152   cl::desc("Enable register pressure scheduling."), cl::init(true));
153 
154 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
155   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
156 
157 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
158                                         cl::desc("Enable memop clustering."),
159                                         cl::init(true));
160 static cl::opt<bool>
161     ForceFastCluster("force-fast-cluster", cl::Hidden,
162                      cl::desc("Switch to fast cluster algorithm with the lost "
163                               "of some fusion opportunities"),
164                      cl::init(false));
165 static cl::opt<unsigned>
166     FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
167                          cl::desc("The threshold for fast cluster"),
168                          cl::init(1000));
169 
170 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
171 static cl::opt<bool> MISchedDumpScheduleTrace(
172     "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
173     cl::desc("Dump resource usage at schedule boundary."));
174 static cl::opt<unsigned>
175     HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
176                    cl::desc("Set width of the columns with "
177                             "the resources and schedule units"),
178                    cl::init(19));
179 static cl::opt<unsigned>
180     ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
181              cl::desc("Set width of the columns showing resource booking."),
182              cl::init(5));
183 static cl::opt<bool> MISchedSortResourcesInTrace(
184     "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
185     cl::desc("Sort the resources printed in the dump trace"));
186 #endif
187 
188 static cl::opt<unsigned>
189     MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
190                      cl::desc("Number of intervals to track"), cl::init(10));
191 
192 // DAG subtrees must have at least this many nodes.
193 static const unsigned MinSubtreeSize = 8;
194 
195 // Pin the vtables to this file.
196 void MachineSchedStrategy::anchor() {}
197 
198 void ScheduleDAGMutation::anchor() {}
199 
200 //===----------------------------------------------------------------------===//
201 // Machine Instruction Scheduling Pass and Registry
202 //===----------------------------------------------------------------------===//
203 
204 MachineSchedContext::MachineSchedContext() {
205   RegClassInfo = new RegisterClassInfo();
206 }
207 
208 MachineSchedContext::~MachineSchedContext() {
209   delete RegClassInfo;
210 }
211 
212 namespace {
213 
214 /// Base class for a machine scheduler class that can run at any point.
215 class MachineSchedulerBase : public MachineSchedContext,
216                              public MachineFunctionPass {
217 public:
218   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
219 
220   void print(raw_ostream &O, const Module* = nullptr) const override;
221 
222 protected:
223   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
224 };
225 
226 /// MachineScheduler runs after coalescing and before register allocation.
227 class MachineScheduler : public MachineSchedulerBase {
228 public:
229   MachineScheduler();
230 
231   void getAnalysisUsage(AnalysisUsage &AU) const override;
232 
233   bool runOnMachineFunction(MachineFunction&) override;
234 
235   static char ID; // Class identification, replacement for typeinfo
236 
237 protected:
238   ScheduleDAGInstrs *createMachineScheduler();
239 };
240 
241 /// PostMachineScheduler runs after shortly before code emission.
242 class PostMachineScheduler : public MachineSchedulerBase {
243 public:
244   PostMachineScheduler();
245 
246   void getAnalysisUsage(AnalysisUsage &AU) const override;
247 
248   bool runOnMachineFunction(MachineFunction&) override;
249 
250   static char ID; // Class identification, replacement for typeinfo
251 
252 protected:
253   ScheduleDAGInstrs *createPostMachineScheduler();
254 };
255 
256 } // end anonymous namespace
257 
258 char MachineScheduler::ID = 0;
259 
260 char &llvm::MachineSchedulerID = MachineScheduler::ID;
261 
262 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
263                       "Machine Instruction Scheduler", false, false)
264 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
265 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
266 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
267 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
268 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
269 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
270                     "Machine Instruction Scheduler", false, false)
271 
272 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
273   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
274 }
275 
276 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
277   AU.setPreservesCFG();
278   AU.addRequired<MachineDominatorTree>();
279   AU.addRequired<MachineLoopInfo>();
280   AU.addRequired<AAResultsWrapperPass>();
281   AU.addRequired<TargetPassConfig>();
282   AU.addRequired<SlotIndexes>();
283   AU.addPreserved<SlotIndexes>();
284   AU.addRequired<LiveIntervals>();
285   AU.addPreserved<LiveIntervals>();
286   MachineFunctionPass::getAnalysisUsage(AU);
287 }
288 
289 char PostMachineScheduler::ID = 0;
290 
291 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
292 
293 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
294                       "PostRA Machine Instruction Scheduler", false, false)
295 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
296 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
297 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
298 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
299                     "PostRA Machine Instruction Scheduler", false, false)
300 
301 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
302   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
303 }
304 
305 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
306   AU.setPreservesCFG();
307   AU.addRequired<MachineDominatorTree>();
308   AU.addRequired<MachineLoopInfo>();
309   AU.addRequired<AAResultsWrapperPass>();
310   AU.addRequired<TargetPassConfig>();
311   MachineFunctionPass::getAnalysisUsage(AU);
312 }
313 
314 MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
315     MachineSchedRegistry::Registry;
316 
317 /// A dummy default scheduler factory indicates whether the scheduler
318 /// is overridden on the command line.
319 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
320   return nullptr;
321 }
322 
323 /// MachineSchedOpt allows command line selection of the scheduler.
324 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
325                RegisterPassParser<MachineSchedRegistry>>
326 MachineSchedOpt("misched",
327                 cl::init(&useDefaultMachineSched), cl::Hidden,
328                 cl::desc("Machine instruction scheduler to use"));
329 
330 static MachineSchedRegistry
331 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
332                      useDefaultMachineSched);
333 
334 static cl::opt<bool> EnableMachineSched(
335     "enable-misched",
336     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
337     cl::Hidden);
338 
339 static cl::opt<bool> EnablePostRAMachineSched(
340     "enable-post-misched",
341     cl::desc("Enable the post-ra machine instruction scheduling pass."),
342     cl::init(true), cl::Hidden);
343 
344 /// Decrement this iterator until reaching the top or a non-debug instr.
345 static MachineBasicBlock::const_iterator
346 priorNonDebug(MachineBasicBlock::const_iterator I,
347               MachineBasicBlock::const_iterator Beg) {
348   assert(I != Beg && "reached the top of the region, cannot decrement");
349   while (--I != Beg) {
350     if (!I->isDebugOrPseudoInstr())
351       break;
352   }
353   return I;
354 }
355 
356 /// Non-const version.
357 static MachineBasicBlock::iterator
358 priorNonDebug(MachineBasicBlock::iterator I,
359               MachineBasicBlock::const_iterator Beg) {
360   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
361       .getNonConstIterator();
362 }
363 
364 /// If this iterator is a debug value, increment until reaching the End or a
365 /// non-debug instruction.
366 static MachineBasicBlock::const_iterator
367 nextIfDebug(MachineBasicBlock::const_iterator I,
368             MachineBasicBlock::const_iterator End) {
369   for(; I != End; ++I) {
370     if (!I->isDebugOrPseudoInstr())
371       break;
372   }
373   return I;
374 }
375 
376 /// Non-const version.
377 static MachineBasicBlock::iterator
378 nextIfDebug(MachineBasicBlock::iterator I,
379             MachineBasicBlock::const_iterator End) {
380   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
381       .getNonConstIterator();
382 }
383 
384 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
385 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
386   // Select the scheduler, or set the default.
387   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
388   if (Ctor != useDefaultMachineSched)
389     return Ctor(this);
390 
391   // Get the default scheduler set by the target for this function.
392   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
393   if (Scheduler)
394     return Scheduler;
395 
396   // Default to GenericScheduler.
397   return createGenericSchedLive(this);
398 }
399 
400 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
401 /// the caller. We don't have a command line option to override the postRA
402 /// scheduler. The Target must configure it.
403 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
404   // Get the postRA scheduler set by the target for this function.
405   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
406   if (Scheduler)
407     return Scheduler;
408 
409   // Default to GenericScheduler.
410   return createGenericSchedPostRA(this);
411 }
412 
413 /// Top-level MachineScheduler pass driver.
414 ///
415 /// Visit blocks in function order. Divide each block into scheduling regions
416 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
417 /// consistent with the DAG builder, which traverses the interior of the
418 /// scheduling regions bottom-up.
419 ///
420 /// This design avoids exposing scheduling boundaries to the DAG builder,
421 /// simplifying the DAG builder's support for "special" target instructions.
422 /// At the same time the design allows target schedulers to operate across
423 /// scheduling boundaries, for example to bundle the boundary instructions
424 /// without reordering them. This creates complexity, because the target
425 /// scheduler must update the RegionBegin and RegionEnd positions cached by
426 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
427 /// design would be to split blocks at scheduling boundaries, but LLVM has a
428 /// general bias against block splitting purely for implementation simplicity.
429 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
430   if (skipFunction(mf.getFunction()))
431     return false;
432 
433   if (EnableMachineSched.getNumOccurrences()) {
434     if (!EnableMachineSched)
435       return false;
436   } else if (!mf.getSubtarget().enableMachineScheduler())
437     return false;
438 
439   LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
440 
441   // Initialize the context of the pass.
442   MF = &mf;
443   MLI = &getAnalysis<MachineLoopInfo>();
444   MDT = &getAnalysis<MachineDominatorTree>();
445   PassConfig = &getAnalysis<TargetPassConfig>();
446   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
447 
448   LIS = &getAnalysis<LiveIntervals>();
449 
450   if (VerifyScheduling) {
451     LLVM_DEBUG(LIS->dump());
452     MF->verify(this, "Before machine scheduling.");
453   }
454   RegClassInfo->runOnMachineFunction(*MF);
455 
456   // Instantiate the selected scheduler for this target, function, and
457   // optimization level.
458   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
459   ScheduleDAGMI::DumpDirection D;
460   if (ForceTopDown)
461     D = ScheduleDAGMI::DumpDirection::TopDown;
462   else if (ForceBottomUp)
463     D = ScheduleDAGMI::DumpDirection::BottomUp;
464   else
465     D = ScheduleDAGMI::DumpDirection::Bidirectional;
466   Scheduler->setDumpDirection(D);
467   scheduleRegions(*Scheduler, false);
468 
469   LLVM_DEBUG(LIS->dump());
470   if (VerifyScheduling)
471     MF->verify(this, "After machine scheduling.");
472   return true;
473 }
474 
475 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
476   if (skipFunction(mf.getFunction()))
477     return false;
478 
479   if (EnablePostRAMachineSched.getNumOccurrences()) {
480     if (!EnablePostRAMachineSched)
481       return false;
482   } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
483     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
484     return false;
485   }
486   LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
487 
488   // Initialize the context of the pass.
489   MF = &mf;
490   MLI = &getAnalysis<MachineLoopInfo>();
491   PassConfig = &getAnalysis<TargetPassConfig>();
492   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
493 
494   if (VerifyScheduling)
495     MF->verify(this, "Before post machine scheduling.");
496 
497   // Instantiate the selected scheduler for this target, function, and
498   // optimization level.
499   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
500   ScheduleDAGMI::DumpDirection D;
501   if (PostRADirection == MISchedPostRASched::TopDown)
502     D = ScheduleDAGMI::DumpDirection::TopDown;
503   else
504     D = ScheduleDAGMI::DumpDirection::BottomUp;
505   Scheduler->setDumpDirection(D);
506   scheduleRegions(*Scheduler, true);
507 
508   if (VerifyScheduling)
509     MF->verify(this, "After post machine scheduling.");
510   return true;
511 }
512 
513 /// Return true of the given instruction should not be included in a scheduling
514 /// region.
515 ///
516 /// MachineScheduler does not currently support scheduling across calls. To
517 /// handle calls, the DAG builder needs to be modified to create register
518 /// anti/output dependencies on the registers clobbered by the call's regmask
519 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
520 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
521 /// the boundary, but there would be no benefit to postRA scheduling across
522 /// calls this late anyway.
523 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
524                             MachineBasicBlock *MBB,
525                             MachineFunction *MF,
526                             const TargetInstrInfo *TII) {
527   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
528 }
529 
530 /// A region of an MBB for scheduling.
531 namespace {
532 struct SchedRegion {
533   /// RegionBegin is the first instruction in the scheduling region, and
534   /// RegionEnd is either MBB->end() or the scheduling boundary after the
535   /// last instruction in the scheduling region. These iterators cannot refer
536   /// to instructions outside of the identified scheduling region because
537   /// those may be reordered before scheduling this region.
538   MachineBasicBlock::iterator RegionBegin;
539   MachineBasicBlock::iterator RegionEnd;
540   unsigned NumRegionInstrs;
541 
542   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
543               unsigned N) :
544     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
545 };
546 } // end anonymous namespace
547 
548 using MBBRegionsVector = SmallVector<SchedRegion, 16>;
549 
550 static void
551 getSchedRegions(MachineBasicBlock *MBB,
552                 MBBRegionsVector &Regions,
553                 bool RegionsTopDown) {
554   MachineFunction *MF = MBB->getParent();
555   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
556 
557   MachineBasicBlock::iterator I = nullptr;
558   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
559       RegionEnd != MBB->begin(); RegionEnd = I) {
560 
561     // Avoid decrementing RegionEnd for blocks with no terminator.
562     if (RegionEnd != MBB->end() ||
563         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
564       --RegionEnd;
565     }
566 
567     // The next region starts above the previous region. Look backward in the
568     // instruction stream until we find the nearest boundary.
569     unsigned NumRegionInstrs = 0;
570     I = RegionEnd;
571     for (;I != MBB->begin(); --I) {
572       MachineInstr &MI = *std::prev(I);
573       if (isSchedBoundary(&MI, &*MBB, MF, TII))
574         break;
575       if (!MI.isDebugOrPseudoInstr()) {
576         // MBB::size() uses instr_iterator to count. Here we need a bundle to
577         // count as a single instruction.
578         ++NumRegionInstrs;
579       }
580     }
581 
582     // It's possible we found a scheduling region that only has debug
583     // instructions. Don't bother scheduling these.
584     if (NumRegionInstrs != 0)
585       Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
586   }
587 
588   if (RegionsTopDown)
589     std::reverse(Regions.begin(), Regions.end());
590 }
591 
592 /// Main driver for both MachineScheduler and PostMachineScheduler.
593 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
594                                            bool FixKillFlags) {
595   // Visit all machine basic blocks.
596   //
597   // TODO: Visit blocks in global postorder or postorder within the bottom-up
598   // loop tree. Then we can optionally compute global RegPressure.
599   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
600        MBB != MBBEnd; ++MBB) {
601 
602     Scheduler.startBlock(&*MBB);
603 
604 #ifndef NDEBUG
605     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
606       continue;
607     if (SchedOnlyBlock.getNumOccurrences()
608         && (int)SchedOnlyBlock != MBB->getNumber())
609       continue;
610 #endif
611 
612     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
613     // points to the scheduling boundary at the bottom of the region. The DAG
614     // does not include RegionEnd, but the region does (i.e. the next
615     // RegionEnd is above the previous RegionBegin). If the current block has
616     // no terminator then RegionEnd == MBB->end() for the bottom region.
617     //
618     // All the regions of MBB are first found and stored in MBBRegions, which
619     // will be processed (MBB) top-down if initialized with true.
620     //
621     // The Scheduler may insert instructions during either schedule() or
622     // exitRegion(), even for empty regions. So the local iterators 'I' and
623     // 'RegionEnd' are invalid across these calls. Instructions must not be
624     // added to other regions than the current one without updating MBBRegions.
625 
626     MBBRegionsVector MBBRegions;
627     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
628     for (const SchedRegion &R : MBBRegions) {
629       MachineBasicBlock::iterator I = R.RegionBegin;
630       MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
631       unsigned NumRegionInstrs = R.NumRegionInstrs;
632 
633       // Notify the scheduler of the region, even if we may skip scheduling
634       // it. Perhaps it still needs to be bundled.
635       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
636 
637       // Skip empty scheduling regions (0 or 1 schedulable instructions).
638       if (I == RegionEnd || I == std::prev(RegionEnd)) {
639         // Close the current region. Bundle the terminator if needed.
640         // This invalidates 'RegionEnd' and 'I'.
641         Scheduler.exitRegion();
642         continue;
643       }
644       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
645       LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
646                         << " " << MBB->getName() << "\n  From: " << *I
647                         << "    To: ";
648                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
649                  else dbgs() << "End\n";
650                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
651       if (DumpCriticalPathLength) {
652         errs() << MF->getName();
653         errs() << ":%bb. " << MBB->getNumber();
654         errs() << " " << MBB->getName() << " \n";
655       }
656 
657       // Schedule a region: possibly reorder instructions.
658       // This invalidates the original region iterators.
659       Scheduler.schedule();
660 
661       // Close the current region.
662       Scheduler.exitRegion();
663     }
664     Scheduler.finishBlock();
665     // FIXME: Ideally, no further passes should rely on kill flags. However,
666     // thumb2 size reduction is currently an exception, so the PostMIScheduler
667     // needs to do this.
668     if (FixKillFlags)
669       Scheduler.fixupKills(*MBB);
670   }
671   Scheduler.finalizeSchedule();
672 }
673 
674 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
675   // unimplemented
676 }
677 
678 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
679 LLVM_DUMP_METHOD void ReadyQueue::dump() const {
680   dbgs() << "Queue " << Name << ": ";
681   for (const SUnit *SU : Queue)
682     dbgs() << SU->NodeNum << " ";
683   dbgs() << "\n";
684 }
685 #endif
686 
687 //===----------------------------------------------------------------------===//
688 // ScheduleDAGMI - Basic machine instruction scheduling. This is
689 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
690 // virtual registers.
691 // ===----------------------------------------------------------------------===/
692 
693 // Provide a vtable anchor.
694 ScheduleDAGMI::~ScheduleDAGMI() = default;
695 
696 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
697 /// NumPredsLeft reaches zero, release the successor node.
698 ///
699 /// FIXME: Adjust SuccSU height based on MinLatency.
700 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
701   SUnit *SuccSU = SuccEdge->getSUnit();
702 
703   if (SuccEdge->isWeak()) {
704     --SuccSU->WeakPredsLeft;
705     if (SuccEdge->isCluster())
706       NextClusterSucc = SuccSU;
707     return;
708   }
709 #ifndef NDEBUG
710   if (SuccSU->NumPredsLeft == 0) {
711     dbgs() << "*** Scheduling failed! ***\n";
712     dumpNode(*SuccSU);
713     dbgs() << " has been released too many times!\n";
714     llvm_unreachable(nullptr);
715   }
716 #endif
717   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
718   // CurrCycle may have advanced since then.
719   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
720     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
721 
722   --SuccSU->NumPredsLeft;
723   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
724     SchedImpl->releaseTopNode(SuccSU);
725 }
726 
727 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
728 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
729   for (SDep &Succ : SU->Succs)
730     releaseSucc(SU, &Succ);
731 }
732 
733 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
734 /// NumSuccsLeft reaches zero, release the predecessor node.
735 ///
736 /// FIXME: Adjust PredSU height based on MinLatency.
737 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
738   SUnit *PredSU = PredEdge->getSUnit();
739 
740   if (PredEdge->isWeak()) {
741     --PredSU->WeakSuccsLeft;
742     if (PredEdge->isCluster())
743       NextClusterPred = PredSU;
744     return;
745   }
746 #ifndef NDEBUG
747   if (PredSU->NumSuccsLeft == 0) {
748     dbgs() << "*** Scheduling failed! ***\n";
749     dumpNode(*PredSU);
750     dbgs() << " has been released too many times!\n";
751     llvm_unreachable(nullptr);
752   }
753 #endif
754   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
755   // CurrCycle may have advanced since then.
756   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
757     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
758 
759   --PredSU->NumSuccsLeft;
760   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
761     SchedImpl->releaseBottomNode(PredSU);
762 }
763 
764 /// releasePredecessors - Call releasePred on each of SU's predecessors.
765 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
766   for (SDep &Pred : SU->Preds)
767     releasePred(SU, &Pred);
768 }
769 
770 void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
771   ScheduleDAGInstrs::startBlock(bb);
772   SchedImpl->enterMBB(bb);
773 }
774 
775 void ScheduleDAGMI::finishBlock() {
776   SchedImpl->leaveMBB();
777   ScheduleDAGInstrs::finishBlock();
778 }
779 
780 /// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
781 /// after crossing a scheduling boundary. [begin, end) includes all instructions
782 /// in the region, including the boundary itself and single-instruction regions
783 /// that don't get scheduled.
784 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
785                                      MachineBasicBlock::iterator begin,
786                                      MachineBasicBlock::iterator end,
787                                      unsigned regioninstrs)
788 {
789   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
790 
791   SchedImpl->initPolicy(begin, end, regioninstrs);
792 }
793 
794 /// This is normally called from the main scheduler loop but may also be invoked
795 /// by the scheduling strategy to perform additional code motion.
796 void ScheduleDAGMI::moveInstruction(
797   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
798   // Advance RegionBegin if the first instruction moves down.
799   if (&*RegionBegin == MI)
800     ++RegionBegin;
801 
802   // Update the instruction stream.
803   BB->splice(InsertPos, BB, MI);
804 
805   // Update LiveIntervals
806   if (LIS)
807     LIS->handleMove(*MI, /*UpdateFlags=*/true);
808 
809   // Recede RegionBegin if an instruction moves above the first.
810   if (RegionBegin == InsertPos)
811     RegionBegin = MI;
812 }
813 
814 bool ScheduleDAGMI::checkSchedLimit() {
815 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
816   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
817     CurrentTop = CurrentBottom;
818     return false;
819   }
820   ++NumInstrsScheduled;
821 #endif
822   return true;
823 }
824 
825 /// Per-region scheduling driver, called back from
826 /// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
827 /// that does not consider liveness or register pressure. It is useful for
828 /// PostRA scheduling and potentially other custom schedulers.
829 void ScheduleDAGMI::schedule() {
830   LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
831   LLVM_DEBUG(SchedImpl->dumpPolicy());
832 
833   // Build the DAG.
834   buildSchedGraph(AA);
835 
836   postProcessDAG();
837 
838   SmallVector<SUnit*, 8> TopRoots, BotRoots;
839   findRootsAndBiasEdges(TopRoots, BotRoots);
840 
841   LLVM_DEBUG(dump());
842   if (PrintDAGs) dump();
843   if (ViewMISchedDAGs) viewGraph();
844 
845   // Initialize the strategy before modifying the DAG.
846   // This may initialize a DFSResult to be used for queue priority.
847   SchedImpl->initialize(this);
848 
849   // Initialize ready queues now that the DAG and priority data are finalized.
850   initQueues(TopRoots, BotRoots);
851 
852   bool IsTopNode = false;
853   while (true) {
854     LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
855     SUnit *SU = SchedImpl->pickNode(IsTopNode);
856     if (!SU) break;
857 
858     assert(!SU->isScheduled && "Node already scheduled");
859     if (!checkSchedLimit())
860       break;
861 
862     MachineInstr *MI = SU->getInstr();
863     if (IsTopNode) {
864       assert(SU->isTopReady() && "node still has unscheduled dependencies");
865       if (&*CurrentTop == MI)
866         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
867       else
868         moveInstruction(MI, CurrentTop);
869     } else {
870       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
871       MachineBasicBlock::iterator priorII =
872         priorNonDebug(CurrentBottom, CurrentTop);
873       if (&*priorII == MI)
874         CurrentBottom = priorII;
875       else {
876         if (&*CurrentTop == MI)
877           CurrentTop = nextIfDebug(++CurrentTop, priorII);
878         moveInstruction(MI, CurrentBottom);
879         CurrentBottom = MI;
880       }
881     }
882     // Notify the scheduling strategy before updating the DAG.
883     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
884     // runs, it can then use the accurate ReadyCycle time to determine whether
885     // newly released nodes can move to the readyQ.
886     SchedImpl->schedNode(SU, IsTopNode);
887 
888     updateQueues(SU, IsTopNode);
889   }
890   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
891 
892   placeDebugValues();
893 
894   LLVM_DEBUG({
895     dbgs() << "*** Final schedule for "
896            << printMBBReference(*begin()->getParent()) << " ***\n";
897     dumpSchedule();
898     dbgs() << '\n';
899   });
900 }
901 
902 /// Apply each ScheduleDAGMutation step in order.
903 void ScheduleDAGMI::postProcessDAG() {
904   for (auto &m : Mutations)
905     m->apply(this);
906 }
907 
908 void ScheduleDAGMI::
909 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
910                       SmallVectorImpl<SUnit*> &BotRoots) {
911   for (SUnit &SU : SUnits) {
912     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
913 
914     // Order predecessors so DFSResult follows the critical path.
915     SU.biasCriticalPath();
916 
917     // A SUnit is ready to top schedule if it has no predecessors.
918     if (!SU.NumPredsLeft)
919       TopRoots.push_back(&SU);
920     // A SUnit is ready to bottom schedule if it has no successors.
921     if (!SU.NumSuccsLeft)
922       BotRoots.push_back(&SU);
923   }
924   ExitSU.biasCriticalPath();
925 }
926 
927 /// Identify DAG roots and setup scheduler queues.
928 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
929                                ArrayRef<SUnit*> BotRoots) {
930   NextClusterSucc = nullptr;
931   NextClusterPred = nullptr;
932 
933   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
934   //
935   // Nodes with unreleased weak edges can still be roots.
936   // Release top roots in forward order.
937   for (SUnit *SU : TopRoots)
938     SchedImpl->releaseTopNode(SU);
939 
940   // Release bottom roots in reverse order so the higher priority nodes appear
941   // first. This is more natural and slightly more efficient.
942   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
943          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
944     SchedImpl->releaseBottomNode(*I);
945   }
946 
947   releaseSuccessors(&EntrySU);
948   releasePredecessors(&ExitSU);
949 
950   SchedImpl->registerRoots();
951 
952   // Advance past initial DebugValues.
953   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
954   CurrentBottom = RegionEnd;
955 }
956 
957 /// Update scheduler queues after scheduling an instruction.
958 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
959   // Release dependent instructions for scheduling.
960   if (IsTopNode)
961     releaseSuccessors(SU);
962   else
963     releasePredecessors(SU);
964 
965   SU->isScheduled = true;
966 }
967 
968 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
969 void ScheduleDAGMI::placeDebugValues() {
970   // If first instruction was a DBG_VALUE then put it back.
971   if (FirstDbgValue) {
972     BB->splice(RegionBegin, BB, FirstDbgValue);
973     RegionBegin = FirstDbgValue;
974   }
975 
976   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
977          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
978     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
979     MachineInstr *DbgValue = P.first;
980     MachineBasicBlock::iterator OrigPrevMI = P.second;
981     if (&*RegionBegin == DbgValue)
982       ++RegionBegin;
983     BB->splice(std::next(OrigPrevMI), BB, DbgValue);
984     if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
985       RegionEnd = DbgValue;
986   }
987 }
988 
989 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
990 static const char *scheduleTableLegend = "  i: issue\n  x: resource booked";
991 
992 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
993   // Bail off when there is no schedule model to query.
994   if (!SchedModel.hasInstrSchedModel())
995     return;
996 
997   //  Nothing to show if there is no or just one instruction.
998   if (BB->size() < 2)
999     return;
1000 
1001   dbgs() << " * Schedule table (TopDown):\n";
1002   dbgs() << scheduleTableLegend << "\n";
1003   const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1004   unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1005   for (MachineInstr &MI : *this) {
1006     SUnit *SU = getSUnit(&MI);
1007     if (!SU)
1008       continue;
1009     const MCSchedClassDesc *SC = getSchedClass(SU);
1010     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1011                                        PE = SchedModel.getWriteProcResEnd(SC);
1012          PI != PE; ++PI) {
1013       if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1014         LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1015     }
1016   }
1017   // Print the header with the cycles
1018   dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1019   for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1020     dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1021   dbgs() << "|\n";
1022 
1023   for (MachineInstr &MI : *this) {
1024     SUnit *SU = getSUnit(&MI);
1025     if (!SU) {
1026       dbgs() << "Missing SUnit\n";
1027       continue;
1028     }
1029     std::string NodeName("SU(");
1030     NodeName += std::to_string(SU->NodeNum) + ")";
1031     dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1032     unsigned C = FirstCycle;
1033     for (; C <= LastCycle; ++C) {
1034       if (C == SU->TopReadyCycle)
1035         dbgs() << llvm::left_justify("| i", ColWidth);
1036       else
1037         dbgs() << llvm::left_justify("|", ColWidth);
1038     }
1039     dbgs() << "|\n";
1040     const MCSchedClassDesc *SC = getSchedClass(SU);
1041 
1042     SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
1043         make_range(SchedModel.getWriteProcResBegin(SC),
1044                    SchedModel.getWriteProcResEnd(SC)));
1045 
1046     if (MISchedSortResourcesInTrace)
1047       llvm::stable_sort(ResourcesIt,
1048                         [](const MCWriteProcResEntry &LHS,
1049                            const MCWriteProcResEntry &RHS) -> bool {
1050                           return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1051                                  (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1052                                   LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1053                         });
1054     for (const MCWriteProcResEntry &PI : ResourcesIt) {
1055       C = FirstCycle;
1056       const std::string ResName =
1057           SchedModel.getResourceName(PI.ProcResourceIdx);
1058       dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1059       for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1060         dbgs() << llvm::left_justify("|", ColWidth);
1061       }
1062       for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1063            ++I, ++C)
1064         dbgs() << llvm::left_justify("| x", ColWidth);
1065       while (C++ <= LastCycle)
1066         dbgs() << llvm::left_justify("|", ColWidth);
1067       // Place end char
1068       dbgs() << "| \n";
1069     }
1070   }
1071 }
1072 
1073 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1074   // Bail off when there is no schedule model to query.
1075   if (!SchedModel.hasInstrSchedModel())
1076     return;
1077 
1078   //  Nothing to show if there is no or just one instruction.
1079   if (BB->size() < 2)
1080     return;
1081 
1082   dbgs() << " * Schedule table (BottomUp):\n";
1083   dbgs() << scheduleTableLegend << "\n";
1084 
1085   const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1086   int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1087   for (MachineInstr &MI : *this) {
1088     SUnit *SU = getSUnit(&MI);
1089     if (!SU)
1090       continue;
1091     const MCSchedClassDesc *SC = getSchedClass(SU);
1092     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1093                                        PE = SchedModel.getWriteProcResEnd(SC);
1094          PI != PE; ++PI) {
1095       if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1096         LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1097     }
1098   }
1099   // Print the header with the cycles
1100   dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1101   for (int C = FirstCycle; C >= LastCycle; --C)
1102     dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1103   dbgs() << "|\n";
1104 
1105   for (MachineInstr &MI : *this) {
1106     SUnit *SU = getSUnit(&MI);
1107     if (!SU) {
1108       dbgs() << "Missing SUnit\n";
1109       continue;
1110     }
1111     std::string NodeName("SU(");
1112     NodeName += std::to_string(SU->NodeNum) + ")";
1113     dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1114     int C = FirstCycle;
1115     for (; C >= LastCycle; --C) {
1116       if (C == (int)SU->BotReadyCycle)
1117         dbgs() << llvm::left_justify("| i", ColWidth);
1118       else
1119         dbgs() << llvm::left_justify("|", ColWidth);
1120     }
1121     dbgs() << "|\n";
1122     const MCSchedClassDesc *SC = getSchedClass(SU);
1123     SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
1124         make_range(SchedModel.getWriteProcResBegin(SC),
1125                    SchedModel.getWriteProcResEnd(SC)));
1126 
1127     if (MISchedSortResourcesInTrace)
1128       llvm::stable_sort(ResourcesIt,
1129                         [](const MCWriteProcResEntry &LHS,
1130                            const MCWriteProcResEntry &RHS) -> bool {
1131                           return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1132                                  (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1133                                   LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1134                         });
1135     for (const MCWriteProcResEntry &PI : ResourcesIt) {
1136       C = FirstCycle;
1137       const std::string ResName =
1138           SchedModel.getResourceName(PI.ProcResourceIdx);
1139       dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
1140       for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1141         dbgs() << llvm::left_justify("|", ColWidth);
1142       }
1143       for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1144            ++I, --C)
1145         dbgs() << llvm::left_justify("| x", ColWidth);
1146       while (C-- >= LastCycle)
1147         dbgs() << llvm::left_justify("|", ColWidth);
1148       // Place end char
1149       dbgs() << "| \n";
1150     }
1151   }
1152 }
1153 #endif
1154 
1155 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1156 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1157   if (MISchedDumpScheduleTrace) {
1158     if (DumpDir == DumpDirection::TopDown)
1159       dumpScheduleTraceTopDown();
1160     else if (DumpDir == DumpDirection::BottomUp)
1161       dumpScheduleTraceBottomUp();
1162     else if (DumpDir == DumpDirection::Bidirectional) {
1163       dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1164     } else {
1165       dbgs() << "* Schedule table: DumpDirection not set.\n";
1166     }
1167   }
1168 
1169   for (MachineInstr &MI : *this) {
1170     if (SUnit *SU = getSUnit(&MI))
1171       dumpNode(*SU);
1172     else
1173       dbgs() << "Missing SUnit\n";
1174   }
1175 }
1176 #endif
1177 
1178 //===----------------------------------------------------------------------===//
1179 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1180 // preservation.
1181 //===----------------------------------------------------------------------===//
1182 
1183 ScheduleDAGMILive::~ScheduleDAGMILive() {
1184   delete DFSResult;
1185 }
1186 
1187 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1188   const MachineInstr &MI = *SU.getInstr();
1189   for (const MachineOperand &MO : MI.operands()) {
1190     if (!MO.isReg())
1191       continue;
1192     if (!MO.readsReg())
1193       continue;
1194     if (TrackLaneMasks && !MO.isUse())
1195       continue;
1196 
1197     Register Reg = MO.getReg();
1198     if (!Reg.isVirtual())
1199       continue;
1200 
1201     // Ignore re-defs.
1202     if (TrackLaneMasks) {
1203       bool FoundDef = false;
1204       for (const MachineOperand &MO2 : MI.all_defs()) {
1205         if (MO2.getReg() == Reg && !MO2.isDead()) {
1206           FoundDef = true;
1207           break;
1208         }
1209       }
1210       if (FoundDef)
1211         continue;
1212     }
1213 
1214     // Record this local VReg use.
1215     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
1216     for (; UI != VRegUses.end(); ++UI) {
1217       if (UI->SU == &SU)
1218         break;
1219     }
1220     if (UI == VRegUses.end())
1221       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1222   }
1223 }
1224 
1225 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1226 /// crossing a scheduling boundary. [begin, end) includes all instructions in
1227 /// the region, including the boundary itself and single-instruction regions
1228 /// that don't get scheduled.
1229 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1230                                 MachineBasicBlock::iterator begin,
1231                                 MachineBasicBlock::iterator end,
1232                                 unsigned regioninstrs)
1233 {
1234   // ScheduleDAGMI initializes SchedImpl's per-region policy.
1235   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1236 
1237   // For convenience remember the end of the liveness region.
1238   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1239 
1240   SUPressureDiffs.clear();
1241 
1242   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1243   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1244 
1245   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1246          "ShouldTrackLaneMasks requires ShouldTrackPressure");
1247 }
1248 
1249 // Setup the register pressure trackers for the top scheduled and bottom
1250 // scheduled regions.
1251 void ScheduleDAGMILive::initRegPressure() {
1252   VRegUses.clear();
1253   VRegUses.setUniverse(MRI.getNumVirtRegs());
1254   for (SUnit &SU : SUnits)
1255     collectVRegUses(SU);
1256 
1257   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1258                     ShouldTrackLaneMasks, false);
1259   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1260                     ShouldTrackLaneMasks, false);
1261 
1262   // Close the RPTracker to finalize live ins.
1263   RPTracker.closeRegion();
1264 
1265   LLVM_DEBUG(RPTracker.dump());
1266 
1267   // Initialize the live ins and live outs.
1268   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1269   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1270 
1271   // Close one end of the tracker so we can call
1272   // getMaxUpward/DownwardPressureDelta before advancing across any
1273   // instructions. This converts currently live regs into live ins/outs.
1274   TopRPTracker.closeTop();
1275   BotRPTracker.closeBottom();
1276 
1277   BotRPTracker.initLiveThru(RPTracker);
1278   if (!BotRPTracker.getLiveThru().empty()) {
1279     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1280     LLVM_DEBUG(dbgs() << "Live Thru: ";
1281                dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1282   };
1283 
1284   // For each live out vreg reduce the pressure change associated with other
1285   // uses of the same vreg below the live-out reaching def.
1286   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1287 
1288   // Account for liveness generated by the region boundary.
1289   if (LiveRegionEnd != RegionEnd) {
1290     SmallVector<RegisterMaskPair, 8> LiveUses;
1291     BotRPTracker.recede(&LiveUses);
1292     updatePressureDiffs(LiveUses);
1293   }
1294 
1295   LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1296              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1297              dbgs() << "Bottom Pressure:\n";
1298              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1299 
1300   assert((BotRPTracker.getPos() == RegionEnd ||
1301           (RegionEnd->isDebugInstr() &&
1302            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1303          "Can't find the region bottom");
1304 
1305   // Cache the list of excess pressure sets in this region. This will also track
1306   // the max pressure in the scheduled code for these sets.
1307   RegionCriticalPSets.clear();
1308   const std::vector<unsigned> &RegionPressure =
1309     RPTracker.getPressure().MaxSetPressure;
1310   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1311     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1312     if (RegionPressure[i] > Limit) {
1313       LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1314                         << " Actual " << RegionPressure[i] << "\n");
1315       RegionCriticalPSets.push_back(PressureChange(i));
1316     }
1317   }
1318   LLVM_DEBUG(dbgs() << "Excess PSets: ";
1319              for (const PressureChange &RCPS
1320                   : RegionCriticalPSets) dbgs()
1321              << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1322              dbgs() << "\n");
1323 }
1324 
1325 void ScheduleDAGMILive::
1326 updateScheduledPressure(const SUnit *SU,
1327                         const std::vector<unsigned> &NewMaxPressure) {
1328   const PressureDiff &PDiff = getPressureDiff(SU);
1329   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1330   for (const PressureChange &PC : PDiff) {
1331     if (!PC.isValid())
1332       break;
1333     unsigned ID = PC.getPSet();
1334     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1335       ++CritIdx;
1336     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1337       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1338           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1339         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1340     }
1341     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1342     if (NewMaxPressure[ID] >= Limit - 2) {
1343       LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1344                         << NewMaxPressure[ID]
1345                         << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1346                         << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1347                         << " livethru)\n");
1348     }
1349   }
1350 }
1351 
1352 /// Update the PressureDiff array for liveness after scheduling this
1353 /// instruction.
1354 void ScheduleDAGMILive::updatePressureDiffs(
1355     ArrayRef<RegisterMaskPair> LiveUses) {
1356   for (const RegisterMaskPair &P : LiveUses) {
1357     Register Reg = P.RegUnit;
1358     /// FIXME: Currently assuming single-use physregs.
1359     if (!Reg.isVirtual())
1360       continue;
1361 
1362     if (ShouldTrackLaneMasks) {
1363       // If the register has just become live then other uses won't change
1364       // this fact anymore => decrement pressure.
1365       // If the register has just become dead then other uses make it come
1366       // back to life => increment pressure.
1367       bool Decrement = P.LaneMask.any();
1368 
1369       for (const VReg2SUnit &V2SU
1370            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1371         SUnit &SU = *V2SU.SU;
1372         if (SU.isScheduled || &SU == &ExitSU)
1373           continue;
1374 
1375         PressureDiff &PDiff = getPressureDiff(&SU);
1376         PDiff.addPressureChange(Reg, Decrement, &MRI);
1377         LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1378                           << printReg(Reg, TRI) << ':'
1379                           << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1380                    dbgs() << "              to "; PDiff.dump(*TRI););
1381       }
1382     } else {
1383       assert(P.LaneMask.any());
1384       LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1385       // This may be called before CurrentBottom has been initialized. However,
1386       // BotRPTracker must have a valid position. We want the value live into the
1387       // instruction or live out of the block, so ask for the previous
1388       // instruction's live-out.
1389       const LiveInterval &LI = LIS->getInterval(Reg);
1390       VNInfo *VNI;
1391       MachineBasicBlock::const_iterator I =
1392         nextIfDebug(BotRPTracker.getPos(), BB->end());
1393       if (I == BB->end())
1394         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1395       else {
1396         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1397         VNI = LRQ.valueIn();
1398       }
1399       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1400       assert(VNI && "No live value at use.");
1401       for (const VReg2SUnit &V2SU
1402            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1403         SUnit *SU = V2SU.SU;
1404         // If this use comes before the reaching def, it cannot be a last use,
1405         // so decrease its pressure change.
1406         if (!SU->isScheduled && SU != &ExitSU) {
1407           LiveQueryResult LRQ =
1408               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1409           if (LRQ.valueIn() == VNI) {
1410             PressureDiff &PDiff = getPressureDiff(SU);
1411             PDiff.addPressureChange(Reg, true, &MRI);
1412             LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1413                               << *SU->getInstr();
1414                        dbgs() << "              to "; PDiff.dump(*TRI););
1415           }
1416         }
1417       }
1418     }
1419   }
1420 }
1421 
1422 void ScheduleDAGMILive::dump() const {
1423 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1424   if (EntrySU.getInstr() != nullptr)
1425     dumpNodeAll(EntrySU);
1426   for (const SUnit &SU : SUnits) {
1427     dumpNodeAll(SU);
1428     if (ShouldTrackPressure) {
1429       dbgs() << "  Pressure Diff      : ";
1430       getPressureDiff(&SU).dump(*TRI);
1431     }
1432     dbgs() << "  Single Issue       : ";
1433     if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1434         SchedModel.mustEndGroup(SU.getInstr()))
1435       dbgs() << "true;";
1436     else
1437       dbgs() << "false;";
1438     dbgs() << '\n';
1439   }
1440   if (ExitSU.getInstr() != nullptr)
1441     dumpNodeAll(ExitSU);
1442 #endif
1443 }
1444 
1445 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1446 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1447 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1448 ///
1449 /// This is a skeletal driver, with all the functionality pushed into helpers,
1450 /// so that it can be easily extended by experimental schedulers. Generally,
1451 /// implementing MachineSchedStrategy should be sufficient to implement a new
1452 /// scheduling algorithm. However, if a scheduler further subclasses
1453 /// ScheduleDAGMILive then it will want to override this virtual method in order
1454 /// to update any specialized state.
1455 void ScheduleDAGMILive::schedule() {
1456   LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1457   LLVM_DEBUG(SchedImpl->dumpPolicy());
1458   buildDAGWithRegPressure();
1459 
1460   postProcessDAG();
1461 
1462   SmallVector<SUnit*, 8> TopRoots, BotRoots;
1463   findRootsAndBiasEdges(TopRoots, BotRoots);
1464 
1465   // Initialize the strategy before modifying the DAG.
1466   // This may initialize a DFSResult to be used for queue priority.
1467   SchedImpl->initialize(this);
1468 
1469   LLVM_DEBUG(dump());
1470   if (PrintDAGs) dump();
1471   if (ViewMISchedDAGs) viewGraph();
1472 
1473   // Initialize ready queues now that the DAG and priority data are finalized.
1474   initQueues(TopRoots, BotRoots);
1475 
1476   bool IsTopNode = false;
1477   while (true) {
1478     LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1479     SUnit *SU = SchedImpl->pickNode(IsTopNode);
1480     if (!SU) break;
1481 
1482     assert(!SU->isScheduled && "Node already scheduled");
1483     if (!checkSchedLimit())
1484       break;
1485 
1486     scheduleMI(SU, IsTopNode);
1487 
1488     if (DFSResult) {
1489       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1490       if (!ScheduledTrees.test(SubtreeID)) {
1491         ScheduledTrees.set(SubtreeID);
1492         DFSResult->scheduleTree(SubtreeID);
1493         SchedImpl->scheduleTree(SubtreeID);
1494       }
1495     }
1496 
1497     // Notify the scheduling strategy after updating the DAG.
1498     SchedImpl->schedNode(SU, IsTopNode);
1499 
1500     updateQueues(SU, IsTopNode);
1501   }
1502   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1503 
1504   placeDebugValues();
1505 
1506   LLVM_DEBUG({
1507     dbgs() << "*** Final schedule for "
1508            << printMBBReference(*begin()->getParent()) << " ***\n";
1509     dumpSchedule();
1510     dbgs() << '\n';
1511   });
1512 }
1513 
1514 /// Build the DAG and setup three register pressure trackers.
1515 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1516   if (!ShouldTrackPressure) {
1517     RPTracker.reset();
1518     RegionCriticalPSets.clear();
1519     buildSchedGraph(AA);
1520     return;
1521   }
1522 
1523   // Initialize the register pressure tracker used by buildSchedGraph.
1524   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1525                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1526 
1527   // Account for liveness generate by the region boundary.
1528   if (LiveRegionEnd != RegionEnd)
1529     RPTracker.recede();
1530 
1531   // Build the DAG, and compute current register pressure.
1532   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1533 
1534   // Initialize top/bottom trackers after computing region pressure.
1535   initRegPressure();
1536 }
1537 
1538 void ScheduleDAGMILive::computeDFSResult() {
1539   if (!DFSResult)
1540     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1541   DFSResult->clear();
1542   ScheduledTrees.clear();
1543   DFSResult->resize(SUnits.size());
1544   DFSResult->compute(SUnits);
1545   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1546 }
1547 
1548 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1549 /// only provides the critical path for single block loops. To handle loops that
1550 /// span blocks, we could use the vreg path latencies provided by
1551 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1552 /// available for use in the scheduler.
1553 ///
1554 /// The cyclic path estimation identifies a def-use pair that crosses the back
1555 /// edge and considers the depth and height of the nodes. For example, consider
1556 /// the following instruction sequence where each instruction has unit latency
1557 /// and defines an eponymous virtual register:
1558 ///
1559 /// a->b(a,c)->c(b)->d(c)->exit
1560 ///
1561 /// The cyclic critical path is a two cycles: b->c->b
1562 /// The acyclic critical path is four cycles: a->b->c->d->exit
1563 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1564 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1565 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1566 /// LiveInDepth = depth(b) = len(a->b) = 1
1567 ///
1568 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1569 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1570 /// CyclicCriticalPath = min(2, 2) = 2
1571 ///
1572 /// This could be relevant to PostRA scheduling, but is currently implemented
1573 /// assuming LiveIntervals.
1574 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1575   // This only applies to single block loop.
1576   if (!BB->isSuccessor(BB))
1577     return 0;
1578 
1579   unsigned MaxCyclicLatency = 0;
1580   // Visit each live out vreg def to find def/use pairs that cross iterations.
1581   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1582     Register Reg = P.RegUnit;
1583     if (!Reg.isVirtual())
1584       continue;
1585     const LiveInterval &LI = LIS->getInterval(Reg);
1586     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1587     if (!DefVNI)
1588       continue;
1589 
1590     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1591     const SUnit *DefSU = getSUnit(DefMI);
1592     if (!DefSU)
1593       continue;
1594 
1595     unsigned LiveOutHeight = DefSU->getHeight();
1596     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1597     // Visit all local users of the vreg def.
1598     for (const VReg2SUnit &V2SU
1599          : make_range(VRegUses.find(Reg), VRegUses.end())) {
1600       SUnit *SU = V2SU.SU;
1601       if (SU == &ExitSU)
1602         continue;
1603 
1604       // Only consider uses of the phi.
1605       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1606       if (!LRQ.valueIn()->isPHIDef())
1607         continue;
1608 
1609       // Assume that a path spanning two iterations is a cycle, which could
1610       // overestimate in strange cases. This allows cyclic latency to be
1611       // estimated as the minimum slack of the vreg's depth or height.
1612       unsigned CyclicLatency = 0;
1613       if (LiveOutDepth > SU->getDepth())
1614         CyclicLatency = LiveOutDepth - SU->getDepth();
1615 
1616       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1617       if (LiveInHeight > LiveOutHeight) {
1618         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1619           CyclicLatency = LiveInHeight - LiveOutHeight;
1620       } else
1621         CyclicLatency = 0;
1622 
1623       LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1624                         << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1625       if (CyclicLatency > MaxCyclicLatency)
1626         MaxCyclicLatency = CyclicLatency;
1627     }
1628   }
1629   LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1630   return MaxCyclicLatency;
1631 }
1632 
1633 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1634 /// the Top RP tracker in case the region beginning has changed.
1635 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1636                                    ArrayRef<SUnit*> BotRoots) {
1637   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1638   if (ShouldTrackPressure) {
1639     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1640     TopRPTracker.setPos(CurrentTop);
1641   }
1642 }
1643 
1644 /// Move an instruction and update register pressure.
1645 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1646   // Move the instruction to its new location in the instruction stream.
1647   MachineInstr *MI = SU->getInstr();
1648 
1649   if (IsTopNode) {
1650     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1651     if (&*CurrentTop == MI)
1652       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1653     else {
1654       moveInstruction(MI, CurrentTop);
1655       TopRPTracker.setPos(MI);
1656     }
1657 
1658     if (ShouldTrackPressure) {
1659       // Update top scheduled pressure.
1660       RegisterOperands RegOpers;
1661       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1662       if (ShouldTrackLaneMasks) {
1663         // Adjust liveness and add missing dead+read-undef flags.
1664         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1665         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1666       } else {
1667         // Adjust for missing dead-def flags.
1668         RegOpers.detectDeadDefs(*MI, *LIS);
1669       }
1670 
1671       TopRPTracker.advance(RegOpers);
1672       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1673       LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1674                      TopRPTracker.getRegSetPressureAtPos(), TRI););
1675 
1676       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1677     }
1678   } else {
1679     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1680     MachineBasicBlock::iterator priorII =
1681       priorNonDebug(CurrentBottom, CurrentTop);
1682     if (&*priorII == MI)
1683       CurrentBottom = priorII;
1684     else {
1685       if (&*CurrentTop == MI) {
1686         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1687         TopRPTracker.setPos(CurrentTop);
1688       }
1689       moveInstruction(MI, CurrentBottom);
1690       CurrentBottom = MI;
1691       BotRPTracker.setPos(CurrentBottom);
1692     }
1693     if (ShouldTrackPressure) {
1694       RegisterOperands RegOpers;
1695       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1696       if (ShouldTrackLaneMasks) {
1697         // Adjust liveness and add missing dead+read-undef flags.
1698         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1699         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1700       } else {
1701         // Adjust for missing dead-def flags.
1702         RegOpers.detectDeadDefs(*MI, *LIS);
1703       }
1704 
1705       if (BotRPTracker.getPos() != CurrentBottom)
1706         BotRPTracker.recedeSkipDebugValues();
1707       SmallVector<RegisterMaskPair, 8> LiveUses;
1708       BotRPTracker.recede(RegOpers, &LiveUses);
1709       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1710       LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1711                      BotRPTracker.getRegSetPressureAtPos(), TRI););
1712 
1713       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1714       updatePressureDiffs(LiveUses);
1715     }
1716   }
1717 }
1718 
1719 //===----------------------------------------------------------------------===//
1720 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1721 //===----------------------------------------------------------------------===//
1722 
1723 namespace {
1724 
1725 /// Post-process the DAG to create cluster edges between neighboring
1726 /// loads or between neighboring stores.
1727 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1728   struct MemOpInfo {
1729     SUnit *SU;
1730     SmallVector<const MachineOperand *, 4> BaseOps;
1731     int64_t Offset;
1732     LocationSize Width;
1733     bool OffsetIsScalable;
1734 
1735     MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1736               int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1737         : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1738           Width(Width), OffsetIsScalable(OffsetIsScalable) {}
1739 
1740     static bool Compare(const MachineOperand *const &A,
1741                         const MachineOperand *const &B) {
1742       if (A->getType() != B->getType())
1743         return A->getType() < B->getType();
1744       if (A->isReg())
1745         return A->getReg() < B->getReg();
1746       if (A->isFI()) {
1747         const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1748         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1749         bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1750                               TargetFrameLowering::StackGrowsDown;
1751         return StackGrowsDown ? A->getIndex() > B->getIndex()
1752                               : A->getIndex() < B->getIndex();
1753       }
1754 
1755       llvm_unreachable("MemOpClusterMutation only supports register or frame "
1756                        "index bases.");
1757     }
1758 
1759     bool operator<(const MemOpInfo &RHS) const {
1760       // FIXME: Don't compare everything twice. Maybe use C++20 three way
1761       // comparison instead when it's available.
1762       if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1763                                        RHS.BaseOps.begin(), RHS.BaseOps.end(),
1764                                        Compare))
1765         return true;
1766       if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1767                                        BaseOps.begin(), BaseOps.end(), Compare))
1768         return false;
1769       if (Offset != RHS.Offset)
1770         return Offset < RHS.Offset;
1771       return SU->NodeNum < RHS.SU->NodeNum;
1772     }
1773   };
1774 
1775   const TargetInstrInfo *TII;
1776   const TargetRegisterInfo *TRI;
1777   bool IsLoad;
1778   bool ReorderWhileClustering;
1779 
1780 public:
1781   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1782                            const TargetRegisterInfo *tri, bool IsLoad,
1783                            bool ReorderWhileClustering)
1784       : TII(tii), TRI(tri), IsLoad(IsLoad),
1785         ReorderWhileClustering(ReorderWhileClustering) {}
1786 
1787   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1788 
1789 protected:
1790   void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1791                                 ScheduleDAGInstrs *DAG);
1792   void collectMemOpRecords(std::vector<SUnit> &SUnits,
1793                            SmallVectorImpl<MemOpInfo> &MemOpRecords);
1794   bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1795                    DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
1796 };
1797 
1798 class StoreClusterMutation : public BaseMemOpClusterMutation {
1799 public:
1800   StoreClusterMutation(const TargetInstrInfo *tii,
1801                        const TargetRegisterInfo *tri,
1802                        bool ReorderWhileClustering)
1803       : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1804 };
1805 
1806 class LoadClusterMutation : public BaseMemOpClusterMutation {
1807 public:
1808   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1809                       bool ReorderWhileClustering)
1810       : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1811 };
1812 
1813 } // end anonymous namespace
1814 
1815 namespace llvm {
1816 
1817 std::unique_ptr<ScheduleDAGMutation>
1818 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1819                              const TargetRegisterInfo *TRI,
1820                              bool ReorderWhileClustering) {
1821   return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1822                                   TII, TRI, ReorderWhileClustering)
1823                             : nullptr;
1824 }
1825 
1826 std::unique_ptr<ScheduleDAGMutation>
1827 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1828                               const TargetRegisterInfo *TRI,
1829                               bool ReorderWhileClustering) {
1830   return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1831                                   TII, TRI, ReorderWhileClustering)
1832                             : nullptr;
1833 }
1834 
1835 } // end namespace llvm
1836 
1837 // Sorting all the loads/stores first, then for each load/store, checking the
1838 // following load/store one by one, until reach the first non-dependent one and
1839 // call target hook to see if they can cluster.
1840 // If FastCluster is enabled, we assume that, all the loads/stores have been
1841 // preprocessed and now, they didn't have dependencies on each other.
1842 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1843     ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1844     ScheduleDAGInstrs *DAG) {
1845   // Keep track of the current cluster length and bytes for each SUnit.
1846   DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1847 
1848   // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1849   // cluster mem ops collected within `MemOpRecords` array.
1850   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1851     // Decision to cluster mem ops is taken based on target dependent logic
1852     auto MemOpa = MemOpRecords[Idx];
1853 
1854     // Seek for the next load/store to do the cluster.
1855     unsigned NextIdx = Idx + 1;
1856     for (; NextIdx < End; ++NextIdx)
1857       // Skip if MemOpb has been clustered already or has dependency with
1858       // MemOpa.
1859       if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1860           (FastCluster ||
1861            (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1862             !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1863         break;
1864     if (NextIdx == End)
1865       continue;
1866 
1867     auto MemOpb = MemOpRecords[NextIdx];
1868     unsigned ClusterLength = 2;
1869     unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1870                                    MemOpb.Width.getValue().getKnownMinValue();
1871     if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1872       ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1873       CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1874                             MemOpb.Width.getValue().getKnownMinValue();
1875     }
1876 
1877     if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1878                                   MemOpa.OffsetIsScalable, MemOpb.BaseOps,
1879                                   MemOpb.Offset, MemOpb.OffsetIsScalable,
1880                                   ClusterLength, CurrentClusterBytes))
1881       continue;
1882 
1883     SUnit *SUa = MemOpa.SU;
1884     SUnit *SUb = MemOpb.SU;
1885     if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1886       std::swap(SUa, SUb);
1887 
1888     // FIXME: Is this check really required?
1889     if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1890       continue;
1891 
1892     LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1893                       << SUb->NodeNum << ")\n");
1894     ++NumClustered;
1895 
1896     if (IsLoad) {
1897       // Copy successor edges from SUa to SUb. Interleaving computation
1898       // dependent on SUa can prevent load combining due to register reuse.
1899       // Predecessor edges do not need to be copied from SUb to SUa since
1900       // nearby loads should have effectively the same inputs.
1901       for (const SDep &Succ : SUa->Succs) {
1902         if (Succ.getSUnit() == SUb)
1903           continue;
1904         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1905                           << ")\n");
1906         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1907       }
1908     } else {
1909       // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1910       // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1911       // do not need to be copied from SUa to SUb since no one will depend
1912       // on stores.
1913       // Notice that, we don't need to care about the memory dependency as
1914       // we won't try to cluster them if they have any memory dependency.
1915       for (const SDep &Pred : SUb->Preds) {
1916         if (Pred.getSUnit() == SUa)
1917           continue;
1918         LLVM_DEBUG(dbgs() << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum
1919                           << ")\n");
1920         DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1921       }
1922     }
1923 
1924     SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1925                                              CurrentClusterBytes};
1926 
1927     LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength
1928                       << ", Curr cluster bytes: " << CurrentClusterBytes
1929                       << "\n");
1930   }
1931 }
1932 
1933 void BaseMemOpClusterMutation::collectMemOpRecords(
1934     std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1935   for (auto &SU : SUnits) {
1936     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1937         (!IsLoad && !SU.getInstr()->mayStore()))
1938       continue;
1939 
1940     const MachineInstr &MI = *SU.getInstr();
1941     SmallVector<const MachineOperand *, 4> BaseOps;
1942     int64_t Offset;
1943     bool OffsetIsScalable;
1944     LocationSize Width = 0;
1945     if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1946                                            OffsetIsScalable, Width, TRI)) {
1947       MemOpRecords.push_back(
1948           MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1949 
1950       LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1951                         << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1952                         << ", Width: " << Width << "\n");
1953     }
1954 #ifndef NDEBUG
1955     for (const auto *Op : BaseOps)
1956       assert(Op);
1957 #endif
1958   }
1959 }
1960 
1961 bool BaseMemOpClusterMutation::groupMemOps(
1962     ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1963     DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
1964   bool FastCluster =
1965       ForceFastCluster ||
1966       MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1967 
1968   for (const auto &MemOp : MemOps) {
1969     unsigned ChainPredID = DAG->SUnits.size();
1970     if (FastCluster) {
1971       for (const SDep &Pred : MemOp.SU->Preds) {
1972         // We only want to cluster the mem ops that have the same ctrl(non-data)
1973         // pred so that they didn't have ctrl dependency for each other. But for
1974         // store instrs, we can still cluster them if the pred is load instr.
1975         if ((Pred.isCtrl() &&
1976              (IsLoad ||
1977               (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1978             !Pred.isArtificial()) {
1979           ChainPredID = Pred.getSUnit()->NodeNum;
1980           break;
1981         }
1982       }
1983     } else
1984       ChainPredID = 0;
1985 
1986     Groups[ChainPredID].push_back(MemOp);
1987   }
1988   return FastCluster;
1989 }
1990 
1991 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1992 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1993   // Collect all the clusterable loads/stores
1994   SmallVector<MemOpInfo, 32> MemOpRecords;
1995   collectMemOpRecords(DAG->SUnits, MemOpRecords);
1996 
1997   if (MemOpRecords.size() < 2)
1998     return;
1999 
2000   // Put the loads/stores without dependency into the same group with some
2001   // heuristic if the DAG is too complex to avoid compiling time blow up.
2002   // Notice that, some fusion pair could be lost with this.
2003   DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
2004   bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
2005 
2006   for (auto &Group : Groups) {
2007     // Sorting the loads/stores, so that, we can stop the cluster as early as
2008     // possible.
2009     llvm::sort(Group.second);
2010 
2011     // Trying to cluster all the neighboring loads/stores.
2012     clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2013   }
2014 }
2015 
2016 //===----------------------------------------------------------------------===//
2017 // CopyConstrain - DAG post-processing to encourage copy elimination.
2018 //===----------------------------------------------------------------------===//
2019 
2020 namespace {
2021 
2022 /// Post-process the DAG to create weak edges from all uses of a copy to
2023 /// the one use that defines the copy's source vreg, most likely an induction
2024 /// variable increment.
2025 class CopyConstrain : public ScheduleDAGMutation {
2026   // Transient state.
2027   SlotIndex RegionBeginIdx;
2028 
2029   // RegionEndIdx is the slot index of the last non-debug instruction in the
2030   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2031   SlotIndex RegionEndIdx;
2032 
2033 public:
2034   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2035 
2036   void apply(ScheduleDAGInstrs *DAGInstrs) override;
2037 
2038 protected:
2039   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2040 };
2041 
2042 } // end anonymous namespace
2043 
2044 namespace llvm {
2045 
2046 std::unique_ptr<ScheduleDAGMutation>
2047 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
2048                                const TargetRegisterInfo *TRI) {
2049   return std::make_unique<CopyConstrain>(TII, TRI);
2050 }
2051 
2052 } // end namespace llvm
2053 
2054 /// constrainLocalCopy handles two possibilities:
2055 /// 1) Local src:
2056 /// I0:     = dst
2057 /// I1: src = ...
2058 /// I2:     = dst
2059 /// I3: dst = src (copy)
2060 /// (create pred->succ edges I0->I1, I2->I1)
2061 ///
2062 /// 2) Local copy:
2063 /// I0: dst = src (copy)
2064 /// I1:     = dst
2065 /// I2: src = ...
2066 /// I3:     = dst
2067 /// (create pred->succ edges I1->I2, I3->I2)
2068 ///
2069 /// Although the MachineScheduler is currently constrained to single blocks,
2070 /// this algorithm should handle extended blocks. An EBB is a set of
2071 /// contiguously numbered blocks such that the previous block in the EBB is
2072 /// always the single predecessor.
2073 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2074   LiveIntervals *LIS = DAG->getLIS();
2075   MachineInstr *Copy = CopySU->getInstr();
2076 
2077   // Check for pure vreg copies.
2078   const MachineOperand &SrcOp = Copy->getOperand(1);
2079   Register SrcReg = SrcOp.getReg();
2080   if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2081     return;
2082 
2083   const MachineOperand &DstOp = Copy->getOperand(0);
2084   Register DstReg = DstOp.getReg();
2085   if (!DstReg.isVirtual() || DstOp.isDead())
2086     return;
2087 
2088   // Check if either the dest or source is local. If it's live across a back
2089   // edge, it's not local. Note that if both vregs are live across the back
2090   // edge, we cannot successfully contrain the copy without cyclic scheduling.
2091   // If both the copy's source and dest are local live intervals, then we
2092   // should treat the dest as the global for the purpose of adding
2093   // constraints. This adds edges from source's other uses to the copy.
2094   unsigned LocalReg = SrcReg;
2095   unsigned GlobalReg = DstReg;
2096   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2097   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2098     LocalReg = DstReg;
2099     GlobalReg = SrcReg;
2100     LocalLI = &LIS->getInterval(LocalReg);
2101     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2102       return;
2103   }
2104   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2105 
2106   // Find the global segment after the start of the local LI.
2107   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2108   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2109   // local live range. We could create edges from other global uses to the local
2110   // start, but the coalescer should have already eliminated these cases, so
2111   // don't bother dealing with it.
2112   if (GlobalSegment == GlobalLI->end())
2113     return;
2114 
2115   // If GlobalSegment is killed at the LocalLI->start, the call to find()
2116   // returned the next global segment. But if GlobalSegment overlaps with
2117   // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2118   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2119   if (GlobalSegment->contains(LocalLI->beginIndex()))
2120     ++GlobalSegment;
2121 
2122   if (GlobalSegment == GlobalLI->end())
2123     return;
2124 
2125   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2126   if (GlobalSegment != GlobalLI->begin()) {
2127     // Two address defs have no hole.
2128     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2129                                GlobalSegment->start)) {
2130       return;
2131     }
2132     // If the prior global segment may be defined by the same two-address
2133     // instruction that also defines LocalLI, then can't make a hole here.
2134     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2135                                LocalLI->beginIndex())) {
2136       return;
2137     }
2138     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2139     // it would be a disconnected component in the live range.
2140     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2141            "Disconnected LRG within the scheduling region.");
2142   }
2143   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2144   if (!GlobalDef)
2145     return;
2146 
2147   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2148   if (!GlobalSU)
2149     return;
2150 
2151   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2152   // constraining the uses of the last local def to precede GlobalDef.
2153   SmallVector<SUnit*,8> LocalUses;
2154   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2155   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2156   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2157   for (const SDep &Succ : LastLocalSU->Succs) {
2158     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2159       continue;
2160     if (Succ.getSUnit() == GlobalSU)
2161       continue;
2162     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2163       return;
2164     LocalUses.push_back(Succ.getSUnit());
2165   }
2166   // Open the top of the GlobalLI hole by constraining any earlier global uses
2167   // to precede the start of LocalLI.
2168   SmallVector<SUnit*,8> GlobalUses;
2169   MachineInstr *FirstLocalDef =
2170     LIS->getInstructionFromIndex(LocalLI->beginIndex());
2171   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2172   for (const SDep &Pred : GlobalSU->Preds) {
2173     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2174       continue;
2175     if (Pred.getSUnit() == FirstLocalSU)
2176       continue;
2177     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2178       return;
2179     GlobalUses.push_back(Pred.getSUnit());
2180   }
2181   LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2182   // Add the weak edges.
2183   for (SUnit *LU : LocalUses) {
2184     LLVM_DEBUG(dbgs() << "  Local use SU(" << LU->NodeNum << ") -> SU("
2185                       << GlobalSU->NodeNum << ")\n");
2186     DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2187   }
2188   for (SUnit *GU : GlobalUses) {
2189     LLVM_DEBUG(dbgs() << "  Global use SU(" << GU->NodeNum << ") -> SU("
2190                       << FirstLocalSU->NodeNum << ")\n");
2191     DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2192   }
2193 }
2194 
2195 /// Callback from DAG postProcessing to create weak edges to encourage
2196 /// copy elimination.
2197 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2198   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2199   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2200 
2201   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2202   if (FirstPos == DAG->end())
2203     return;
2204   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2205   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2206       *priorNonDebug(DAG->end(), DAG->begin()));
2207 
2208   for (SUnit &SU : DAG->SUnits) {
2209     if (!SU.getInstr()->isCopy())
2210       continue;
2211 
2212     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2213   }
2214 }
2215 
2216 //===----------------------------------------------------------------------===//
2217 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2218 // and possibly other custom schedulers.
2219 //===----------------------------------------------------------------------===//
2220 
2221 static const unsigned InvalidCycle = ~0U;
2222 
2223 SchedBoundary::~SchedBoundary() { delete HazardRec; }
2224 
2225 /// Given a Count of resource usage and a Latency value, return true if a
2226 /// SchedBoundary becomes resource limited.
2227 /// If we are checking after scheduling a node, we should return true when
2228 /// we just reach the resource limit.
2229 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2230                                unsigned Latency, bool AfterSchedNode) {
2231   int ResCntFactor = (int)(Count - (Latency * LFactor));
2232   if (AfterSchedNode)
2233     return ResCntFactor >= (int)LFactor;
2234   else
2235     return ResCntFactor > (int)LFactor;
2236 }
2237 
2238 void SchedBoundary::reset() {
2239   // A new HazardRec is created for each DAG and owned by SchedBoundary.
2240   // Destroying and reconstructing it is very expensive though. So keep
2241   // invalid, placeholder HazardRecs.
2242   if (HazardRec && HazardRec->isEnabled()) {
2243     delete HazardRec;
2244     HazardRec = nullptr;
2245   }
2246   Available.clear();
2247   Pending.clear();
2248   CheckPending = false;
2249   CurrCycle = 0;
2250   CurrMOps = 0;
2251   MinReadyCycle = std::numeric_limits<unsigned>::max();
2252   ExpectedLatency = 0;
2253   DependentLatency = 0;
2254   RetiredMOps = 0;
2255   MaxExecutedResCount = 0;
2256   ZoneCritResIdx = 0;
2257   IsResourceLimited = false;
2258   ReservedCycles.clear();
2259   ReservedResourceSegments.clear();
2260   ReservedCyclesIndex.clear();
2261   ResourceGroupSubUnitMasks.clear();
2262 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2263   // Track the maximum number of stall cycles that could arise either from the
2264   // latency of a DAG edge or the number of cycles that a processor resource is
2265   // reserved (SchedBoundary::ReservedCycles).
2266   MaxObservedStall = 0;
2267 #endif
2268   // Reserve a zero-count for invalid CritResIdx.
2269   ExecutedResCounts.resize(1);
2270   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2271 }
2272 
2273 void SchedRemainder::
2274 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2275   reset();
2276   if (!SchedModel->hasInstrSchedModel())
2277     return;
2278   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2279   for (SUnit &SU : DAG->SUnits) {
2280     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2281     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2282       * SchedModel->getMicroOpFactor();
2283     for (TargetSchedModel::ProcResIter
2284            PI = SchedModel->getWriteProcResBegin(SC),
2285            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2286       unsigned PIdx = PI->ProcResourceIdx;
2287       unsigned Factor = SchedModel->getResourceFactor(PIdx);
2288       assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2289       RemainingCounts[PIdx] +=
2290           (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2291     }
2292   }
2293 }
2294 
2295 void SchedBoundary::
2296 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2297   reset();
2298   DAG = dag;
2299   SchedModel = smodel;
2300   Rem = rem;
2301   if (SchedModel->hasInstrSchedModel()) {
2302     unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2303     ReservedCyclesIndex.resize(ResourceCount);
2304     ExecutedResCounts.resize(ResourceCount);
2305     ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2306     unsigned NumUnits = 0;
2307 
2308     for (unsigned i = 0; i < ResourceCount; ++i) {
2309       ReservedCyclesIndex[i] = NumUnits;
2310       NumUnits += SchedModel->getProcResource(i)->NumUnits;
2311       if (isUnbufferedGroup(i)) {
2312         auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2313         for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2314              U != UE; ++U)
2315           ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2316       }
2317     }
2318 
2319     ReservedCycles.resize(NumUnits, InvalidCycle);
2320   }
2321 }
2322 
2323 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2324 /// these "soft stalls" differently than the hard stall cycles based on CPU
2325 /// resources and computed by checkHazard(). A fully in-order model
2326 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2327 /// available for scheduling until they are ready. However, a weaker in-order
2328 /// model may use this for heuristics. For example, if a processor has in-order
2329 /// behavior when reading certain resources, this may come into play.
2330 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2331   if (!SU->isUnbuffered)
2332     return 0;
2333 
2334   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2335   if (ReadyCycle > CurrCycle)
2336     return ReadyCycle - CurrCycle;
2337   return 0;
2338 }
2339 
2340 /// Compute the next cycle at which the given processor resource unit
2341 /// can be scheduled.
2342 unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2343                                                        unsigned ReleaseAtCycle,
2344                                                        unsigned AcquireAtCycle) {
2345   if (SchedModel && SchedModel->enableIntervals()) {
2346     if (isTop())
2347       return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2348           CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2349 
2350     return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2351         CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2352   }
2353 
2354   unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2355   // If this resource has never been used, always return cycle zero.
2356   if (NextUnreserved == InvalidCycle)
2357     return CurrCycle;
2358   // For bottom-up scheduling add the cycles needed for the current operation.
2359   if (!isTop())
2360     NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2361   return NextUnreserved;
2362 }
2363 
2364 /// Compute the next cycle at which the given processor resource can be
2365 /// scheduled.  Returns the next cycle and the index of the processor resource
2366 /// instance in the reserved cycles vector.
2367 std::pair<unsigned, unsigned>
2368 SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
2369                                     unsigned ReleaseAtCycle,
2370                                     unsigned AcquireAtCycle) {
2371   if (MischedDetailResourceBooking) {
2372     LLVM_DEBUG(dbgs() << "  Resource booking (@" << CurrCycle << "c): \n");
2373     LLVM_DEBUG(dumpReservedCycles());
2374     LLVM_DEBUG(dbgs() << "  getNextResourceCycle (@" << CurrCycle << "c): \n");
2375   }
2376   unsigned MinNextUnreserved = InvalidCycle;
2377   unsigned InstanceIdx = 0;
2378   unsigned StartIndex = ReservedCyclesIndex[PIdx];
2379   unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2380   assert(NumberOfInstances > 0 &&
2381          "Cannot have zero instances of a ProcResource");
2382 
2383   if (isUnbufferedGroup(PIdx)) {
2384     // If any subunits are used by the instruction, report that the
2385     // subunits of the resource group are available at the first cycle
2386     // in which the unit is available, effectively removing the group
2387     // record from hazarding and basing the hazarding decisions on the
2388     // subunit records. Otherwise, choose the first available instance
2389     // from among the subunits.  Specifications which assign cycles to
2390     // both the subunits and the group or which use an unbuffered
2391     // group with buffered subunits will appear to schedule
2392     // strangely. In the first case, the additional cycles for the
2393     // group will be ignored.  In the second, the group will be
2394     // ignored entirely.
2395     for (const MCWriteProcResEntry &PE :
2396          make_range(SchedModel->getWriteProcResBegin(SC),
2397                     SchedModel->getWriteProcResEnd(SC)))
2398       if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2399         return std::make_pair(getNextResourceCycleByInstance(
2400                                   StartIndex, ReleaseAtCycle, AcquireAtCycle),
2401                               StartIndex);
2402 
2403     auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2404     for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2405       unsigned NextUnreserved, NextInstanceIdx;
2406       std::tie(NextUnreserved, NextInstanceIdx) =
2407           getNextResourceCycle(SC, SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2408       if (MinNextUnreserved > NextUnreserved) {
2409         InstanceIdx = NextInstanceIdx;
2410         MinNextUnreserved = NextUnreserved;
2411       }
2412     }
2413     return std::make_pair(MinNextUnreserved, InstanceIdx);
2414   }
2415 
2416   for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2417        ++I) {
2418     unsigned NextUnreserved =
2419         getNextResourceCycleByInstance(I, ReleaseAtCycle, AcquireAtCycle);
2420     if (MischedDetailResourceBooking)
2421       LLVM_DEBUG(dbgs() << "    Instance " << I - StartIndex << " available @"
2422                         << NextUnreserved << "c\n");
2423     if (MinNextUnreserved > NextUnreserved) {
2424       InstanceIdx = I;
2425       MinNextUnreserved = NextUnreserved;
2426     }
2427   }
2428   if (MischedDetailResourceBooking)
2429     LLVM_DEBUG(dbgs() << "    selecting " << SchedModel->getResourceName(PIdx)
2430                       << "[" << InstanceIdx - StartIndex << "]"
2431                       << " available @" << MinNextUnreserved << "c"
2432                       << "\n");
2433   return std::make_pair(MinNextUnreserved, InstanceIdx);
2434 }
2435 
2436 /// Does this SU have a hazard within the current instruction group.
2437 ///
2438 /// The scheduler supports two modes of hazard recognition. The first is the
2439 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2440 /// supports highly complicated in-order reservation tables
2441 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2442 ///
2443 /// The second is a streamlined mechanism that checks for hazards based on
2444 /// simple counters that the scheduler itself maintains. It explicitly checks
2445 /// for instruction dispatch limitations, including the number of micro-ops that
2446 /// can dispatch per cycle.
2447 ///
2448 /// TODO: Also check whether the SU must start a new group.
2449 bool SchedBoundary::checkHazard(SUnit *SU) {
2450   if (HazardRec->isEnabled()
2451       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2452     return true;
2453   }
2454 
2455   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2456   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2457     LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2458                       << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2459     return true;
2460   }
2461 
2462   if (CurrMOps > 0 &&
2463       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2464        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2465     LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2466                       << (isTop() ? "begin" : "end") << " group\n");
2467     return true;
2468   }
2469 
2470   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2471     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2472     for (const MCWriteProcResEntry &PE :
2473           make_range(SchedModel->getWriteProcResBegin(SC),
2474                      SchedModel->getWriteProcResEnd(SC))) {
2475       unsigned ResIdx = PE.ProcResourceIdx;
2476       unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2477       unsigned AcquireAtCycle = PE.AcquireAtCycle;
2478       unsigned NRCycle, InstanceIdx;
2479       std::tie(NRCycle, InstanceIdx) =
2480           getNextResourceCycle(SC, ResIdx, ReleaseAtCycle, AcquireAtCycle);
2481       if (NRCycle > CurrCycle) {
2482 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2483         MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2484 #endif
2485         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2486                           << SchedModel->getResourceName(ResIdx)
2487                           << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2488                           << "=" << NRCycle << "c\n");
2489         return true;
2490       }
2491     }
2492   }
2493   return false;
2494 }
2495 
2496 // Find the unscheduled node in ReadySUs with the highest latency.
2497 unsigned SchedBoundary::
2498 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2499   SUnit *LateSU = nullptr;
2500   unsigned RemLatency = 0;
2501   for (SUnit *SU : ReadySUs) {
2502     unsigned L = getUnscheduledLatency(SU);
2503     if (L > RemLatency) {
2504       RemLatency = L;
2505       LateSU = SU;
2506     }
2507   }
2508   if (LateSU) {
2509     LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2510                       << LateSU->NodeNum << ") " << RemLatency << "c\n");
2511   }
2512   return RemLatency;
2513 }
2514 
2515 // Count resources in this zone and the remaining unscheduled
2516 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2517 // resource index, or zero if the zone is issue limited.
2518 unsigned SchedBoundary::
2519 getOtherResourceCount(unsigned &OtherCritIdx) {
2520   OtherCritIdx = 0;
2521   if (!SchedModel->hasInstrSchedModel())
2522     return 0;
2523 
2524   unsigned OtherCritCount = Rem->RemIssueCount
2525     + (RetiredMOps * SchedModel->getMicroOpFactor());
2526   LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2527                     << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2528   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2529        PIdx != PEnd; ++PIdx) {
2530     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2531     if (OtherCount > OtherCritCount) {
2532       OtherCritCount = OtherCount;
2533       OtherCritIdx = PIdx;
2534     }
2535   }
2536   if (OtherCritIdx) {
2537     LLVM_DEBUG(
2538         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2539                << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2540                << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2541   }
2542   return OtherCritCount;
2543 }
2544 
2545 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2546                                 unsigned Idx) {
2547   assert(SU->getInstr() && "Scheduled SUnit must have instr");
2548 
2549 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2550   // ReadyCycle was been bumped up to the CurrCycle when this node was
2551   // scheduled, but CurrCycle may have been eagerly advanced immediately after
2552   // scheduling, so may now be greater than ReadyCycle.
2553   if (ReadyCycle > CurrCycle)
2554     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2555 #endif
2556 
2557   if (ReadyCycle < MinReadyCycle)
2558     MinReadyCycle = ReadyCycle;
2559 
2560   // Check for interlocks first. For the purpose of other heuristics, an
2561   // instruction that cannot issue appears as if it's not in the ReadyQueue.
2562   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2563   bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2564                         checkHazard(SU) || (Available.size() >= ReadyListLimit);
2565 
2566   if (!HazardDetected) {
2567     Available.push(SU);
2568 
2569     if (InPQueue)
2570       Pending.remove(Pending.begin() + Idx);
2571     return;
2572   }
2573 
2574   if (!InPQueue)
2575     Pending.push(SU);
2576 }
2577 
2578 /// Move the boundary of scheduled code by one cycle.
2579 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2580   if (SchedModel->getMicroOpBufferSize() == 0) {
2581     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2582            "MinReadyCycle uninitialized");
2583     if (MinReadyCycle > NextCycle)
2584       NextCycle = MinReadyCycle;
2585   }
2586   // Update the current micro-ops, which will issue in the next cycle.
2587   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2588   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2589 
2590   // Decrement DependentLatency based on the next cycle.
2591   if ((NextCycle - CurrCycle) > DependentLatency)
2592     DependentLatency = 0;
2593   else
2594     DependentLatency -= (NextCycle - CurrCycle);
2595 
2596   if (!HazardRec->isEnabled()) {
2597     // Bypass HazardRec virtual calls.
2598     CurrCycle = NextCycle;
2599   } else {
2600     // Bypass getHazardType calls in case of long latency.
2601     for (; CurrCycle != NextCycle; ++CurrCycle) {
2602       if (isTop())
2603         HazardRec->AdvanceCycle();
2604       else
2605         HazardRec->RecedeCycle();
2606     }
2607   }
2608   CheckPending = true;
2609   IsResourceLimited =
2610       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2611                          getScheduledLatency(), true);
2612 
2613   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2614                     << '\n');
2615 }
2616 
2617 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2618   ExecutedResCounts[PIdx] += Count;
2619   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2620     MaxExecutedResCount = ExecutedResCounts[PIdx];
2621 }
2622 
2623 /// Add the given processor resource to this scheduled zone.
2624 ///
2625 /// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2626 /// cycles during which this resource is released.
2627 ///
2628 /// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2629 /// cycles at which the resource is aquired after issue (assuming no stalls).
2630 ///
2631 /// \return the next cycle at which the instruction may execute without
2632 /// oversubscribing resources.
2633 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2634                                       unsigned ReleaseAtCycle,
2635                                       unsigned NextCycle,
2636                                       unsigned AcquireAtCycle) {
2637   unsigned Factor = SchedModel->getResourceFactor(PIdx);
2638   unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2639   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2640                     << ReleaseAtCycle << "x" << Factor << "u\n");
2641 
2642   // Update Executed resources counts.
2643   incExecutedResources(PIdx, Count);
2644   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2645   Rem->RemainingCounts[PIdx] -= Count;
2646 
2647   // Check if this resource exceeds the current critical resource. If so, it
2648   // becomes the critical resource.
2649   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2650     ZoneCritResIdx = PIdx;
2651     LLVM_DEBUG(dbgs() << "  *** Critical resource "
2652                       << SchedModel->getResourceName(PIdx) << ": "
2653                       << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2654                       << "c\n");
2655   }
2656   // For reserved resources, record the highest cycle using the resource.
2657   unsigned NextAvailable, InstanceIdx;
2658   std::tie(NextAvailable, InstanceIdx) =
2659       getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2660   if (NextAvailable > CurrCycle) {
2661     LLVM_DEBUG(dbgs() << "  Resource conflict: "
2662                       << SchedModel->getResourceName(PIdx)
2663                       << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2664                       << " reserved until @" << NextAvailable << "\n");
2665   }
2666   return NextAvailable;
2667 }
2668 
2669 /// Move the boundary of scheduled code by one SUnit.
2670 void SchedBoundary::bumpNode(SUnit *SU) {
2671   // Update the reservation table.
2672   if (HazardRec->isEnabled()) {
2673     if (!isTop() && SU->isCall) {
2674       // Calls are scheduled with their preceding instructions. For bottom-up
2675       // scheduling, clear the pipeline state before emitting.
2676       HazardRec->Reset();
2677     }
2678     HazardRec->EmitInstruction(SU);
2679     // Scheduling an instruction may have made pending instructions available.
2680     CheckPending = true;
2681   }
2682   // checkHazard should prevent scheduling multiple instructions per cycle that
2683   // exceed the issue width.
2684   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2685   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2686   assert(
2687       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2688       "Cannot schedule this instruction's MicroOps in the current cycle.");
2689 
2690   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2691   LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2692 
2693   unsigned NextCycle = CurrCycle;
2694   switch (SchedModel->getMicroOpBufferSize()) {
2695   case 0:
2696     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2697     break;
2698   case 1:
2699     if (ReadyCycle > NextCycle) {
2700       NextCycle = ReadyCycle;
2701       LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2702     }
2703     break;
2704   default:
2705     // We don't currently model the OOO reorder buffer, so consider all
2706     // scheduled MOps to be "retired". We do loosely model in-order resource
2707     // latency. If this instruction uses an in-order resource, account for any
2708     // likely stall cycles.
2709     if (SU->isUnbuffered && ReadyCycle > NextCycle)
2710       NextCycle = ReadyCycle;
2711     break;
2712   }
2713   RetiredMOps += IncMOps;
2714 
2715   // Update resource counts and critical resource.
2716   if (SchedModel->hasInstrSchedModel()) {
2717     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2718     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2719     Rem->RemIssueCount -= DecRemIssue;
2720     if (ZoneCritResIdx) {
2721       // Scale scheduled micro-ops for comparing with the critical resource.
2722       unsigned ScaledMOps =
2723         RetiredMOps * SchedModel->getMicroOpFactor();
2724 
2725       // If scaled micro-ops are now more than the previous critical resource by
2726       // a full cycle, then micro-ops issue becomes critical.
2727       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2728           >= (int)SchedModel->getLatencyFactor()) {
2729         ZoneCritResIdx = 0;
2730         LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2731                           << ScaledMOps / SchedModel->getLatencyFactor()
2732                           << "c\n");
2733       }
2734     }
2735     for (TargetSchedModel::ProcResIter
2736            PI = SchedModel->getWriteProcResBegin(SC),
2737            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2738       unsigned RCycle =
2739           countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2740                         PI->AcquireAtCycle);
2741       if (RCycle > NextCycle)
2742         NextCycle = RCycle;
2743     }
2744     if (SU->hasReservedResource) {
2745       // For reserved resources, record the highest cycle using the resource.
2746       // For top-down scheduling, this is the cycle in which we schedule this
2747       // instruction plus the number of cycles the operations reserves the
2748       // resource. For bottom-up is it simply the instruction's cycle.
2749       for (TargetSchedModel::ProcResIter
2750              PI = SchedModel->getWriteProcResBegin(SC),
2751              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2752         unsigned PIdx = PI->ProcResourceIdx;
2753         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2754 
2755           if (SchedModel && SchedModel->enableIntervals()) {
2756             unsigned ReservedUntil, InstanceIdx;
2757             std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2758                 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2759             if (isTop()) {
2760               ReservedResourceSegments[InstanceIdx].add(
2761                   ResourceSegments::getResourceIntervalTop(
2762                       NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2763                   MIResourceCutOff);
2764             } else {
2765               ReservedResourceSegments[InstanceIdx].add(
2766                   ResourceSegments::getResourceIntervalBottom(
2767                       NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2768                   MIResourceCutOff);
2769             }
2770           } else {
2771 
2772             unsigned ReservedUntil, InstanceIdx;
2773             std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(
2774                 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2775             if (isTop()) {
2776               ReservedCycles[InstanceIdx] =
2777                   std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2778             } else
2779               ReservedCycles[InstanceIdx] = NextCycle;
2780           }
2781         }
2782       }
2783     }
2784   }
2785   // Update ExpectedLatency and DependentLatency.
2786   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2787   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2788   if (SU->getDepth() > TopLatency) {
2789     TopLatency = SU->getDepth();
2790     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2791                       << SU->NodeNum << ") " << TopLatency << "c\n");
2792   }
2793   if (SU->getHeight() > BotLatency) {
2794     BotLatency = SU->getHeight();
2795     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2796                       << SU->NodeNum << ") " << BotLatency << "c\n");
2797   }
2798   // If we stall for any reason, bump the cycle.
2799   if (NextCycle > CurrCycle)
2800     bumpCycle(NextCycle);
2801   else
2802     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2803     // resource limited. If a stall occurred, bumpCycle does this.
2804     IsResourceLimited =
2805         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2806                            getScheduledLatency(), true);
2807 
2808   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2809   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2810   // one cycle.  Since we commonly reach the max MOps here, opportunistically
2811   // bump the cycle to avoid uselessly checking everything in the readyQ.
2812   CurrMOps += IncMOps;
2813 
2814   // Bump the cycle count for issue group constraints.
2815   // This must be done after NextCycle has been adjust for all other stalls.
2816   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2817   // currCycle to X.
2818   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2819       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2820     LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2821                       << " group\n");
2822     bumpCycle(++NextCycle);
2823   }
2824 
2825   while (CurrMOps >= SchedModel->getIssueWidth()) {
2826     LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2827                       << CurrCycle << '\n');
2828     bumpCycle(++NextCycle);
2829   }
2830   LLVM_DEBUG(dumpScheduledState());
2831 }
2832 
2833 /// Release pending ready nodes in to the available queue. This makes them
2834 /// visible to heuristics.
2835 void SchedBoundary::releasePending() {
2836   // If the available queue is empty, it is safe to reset MinReadyCycle.
2837   if (Available.empty())
2838     MinReadyCycle = std::numeric_limits<unsigned>::max();
2839 
2840   // Check to see if any of the pending instructions are ready to issue.  If
2841   // so, add them to the available queue.
2842   for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2843     SUnit *SU = *(Pending.begin() + I);
2844     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2845 
2846     if (ReadyCycle < MinReadyCycle)
2847       MinReadyCycle = ReadyCycle;
2848 
2849     if (Available.size() >= ReadyListLimit)
2850       break;
2851 
2852     releaseNode(SU, ReadyCycle, true, I);
2853     if (E != Pending.size()) {
2854       --I;
2855       --E;
2856     }
2857   }
2858   CheckPending = false;
2859 }
2860 
2861 /// Remove SU from the ready set for this boundary.
2862 void SchedBoundary::removeReady(SUnit *SU) {
2863   if (Available.isInQueue(SU))
2864     Available.remove(Available.find(SU));
2865   else {
2866     assert(Pending.isInQueue(SU) && "bad ready count");
2867     Pending.remove(Pending.find(SU));
2868   }
2869 }
2870 
2871 /// If this queue only has one ready candidate, return it. As a side effect,
2872 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2873 /// one node is ready. If multiple instructions are ready, return NULL.
2874 SUnit *SchedBoundary::pickOnlyChoice() {
2875   if (CheckPending)
2876     releasePending();
2877 
2878   // Defer any ready instrs that now have a hazard.
2879   for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2880     if (checkHazard(*I)) {
2881       Pending.push(*I);
2882       I = Available.remove(I);
2883       continue;
2884     }
2885     ++I;
2886   }
2887   for (unsigned i = 0; Available.empty(); ++i) {
2888 //  FIXME: Re-enable assert once PR20057 is resolved.
2889 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2890 //           "permanent hazard");
2891     (void)i;
2892     bumpCycle(CurrCycle + 1);
2893     releasePending();
2894   }
2895 
2896   LLVM_DEBUG(Pending.dump());
2897   LLVM_DEBUG(Available.dump());
2898 
2899   if (Available.size() == 1)
2900     return *Available.begin();
2901   return nullptr;
2902 }
2903 
2904 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2905 
2906 /// Dump the content of the \ref ReservedCycles vector for the
2907 /// resources that are used in the basic block.
2908 ///
2909 LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
2910   if (!SchedModel->hasInstrSchedModel())
2911     return;
2912 
2913   unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2914   unsigned StartIdx = 0;
2915 
2916   for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2917     const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2918     std::string ResName = SchedModel->getResourceName(ResIdx);
2919     for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2920       dbgs() << ResName << "(" << UnitIdx << ") = ";
2921       if (SchedModel && SchedModel->enableIntervals()) {
2922         if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2923           dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2924         else
2925           dbgs() << "{ }\n";
2926       } else
2927         dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2928     }
2929     StartIdx += NumUnits;
2930   }
2931 }
2932 
2933 // This is useful information to dump after bumpNode.
2934 // Note that the Queue contents are more useful before pickNodeFromQueue.
2935 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2936   unsigned ResFactor;
2937   unsigned ResCount;
2938   if (ZoneCritResIdx) {
2939     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2940     ResCount = getResourceCount(ZoneCritResIdx);
2941   } else {
2942     ResFactor = SchedModel->getMicroOpFactor();
2943     ResCount = RetiredMOps * ResFactor;
2944   }
2945   unsigned LFactor = SchedModel->getLatencyFactor();
2946   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2947          << "  Retired: " << RetiredMOps;
2948   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2949   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2950          << ResCount / ResFactor << " "
2951          << SchedModel->getResourceName(ZoneCritResIdx)
2952          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2953          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2954          << " limited.\n";
2955   if (MISchedDumpReservedCycles)
2956     dumpReservedCycles();
2957 }
2958 #endif
2959 
2960 //===----------------------------------------------------------------------===//
2961 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2962 //===----------------------------------------------------------------------===//
2963 
2964 void GenericSchedulerBase::SchedCandidate::
2965 initResourceDelta(const ScheduleDAGMI *DAG,
2966                   const TargetSchedModel *SchedModel) {
2967   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2968     return;
2969 
2970   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2971   for (TargetSchedModel::ProcResIter
2972          PI = SchedModel->getWriteProcResBegin(SC),
2973          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2974     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2975       ResDelta.CritResources += PI->ReleaseAtCycle;
2976     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2977       ResDelta.DemandedResources += PI->ReleaseAtCycle;
2978   }
2979 }
2980 
2981 /// Compute remaining latency. We need this both to determine whether the
2982 /// overall schedule has become latency-limited and whether the instructions
2983 /// outside this zone are resource or latency limited.
2984 ///
2985 /// The "dependent" latency is updated incrementally during scheduling as the
2986 /// max height/depth of scheduled nodes minus the cycles since it was
2987 /// scheduled:
2988 ///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2989 ///
2990 /// The "independent" latency is the max ready queue depth:
2991 ///   ILat = max N.depth for N in Available|Pending
2992 ///
2993 /// RemainingLatency is the greater of independent and dependent latency.
2994 ///
2995 /// These computations are expensive, especially in DAGs with many edges, so
2996 /// only do them if necessary.
2997 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2998   unsigned RemLatency = CurrZone.getDependentLatency();
2999   RemLatency = std::max(RemLatency,
3000                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
3001   RemLatency = std::max(RemLatency,
3002                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
3003   return RemLatency;
3004 }
3005 
3006 /// Returns true if the current cycle plus remaning latency is greater than
3007 /// the critical path in the scheduling region.
3008 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3009                                                SchedBoundary &CurrZone,
3010                                                bool ComputeRemLatency,
3011                                                unsigned &RemLatency) const {
3012   // The current cycle is already greater than the critical path, so we are
3013   // already latency limited and don't need to compute the remaining latency.
3014   if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3015     return true;
3016 
3017   // If we haven't scheduled anything yet, then we aren't latency limited.
3018   if (CurrZone.getCurrCycle() == 0)
3019     return false;
3020 
3021   if (ComputeRemLatency)
3022     RemLatency = computeRemLatency(CurrZone);
3023 
3024   return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3025 }
3026 
3027 /// Set the CandPolicy given a scheduling zone given the current resources and
3028 /// latencies inside and outside the zone.
3029 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
3030                                      SchedBoundary &CurrZone,
3031                                      SchedBoundary *OtherZone) {
3032   // Apply preemptive heuristics based on the total latency and resources
3033   // inside and outside this zone. Potential stalls should be considered before
3034   // following this policy.
3035 
3036   // Compute the critical resource outside the zone.
3037   unsigned OtherCritIdx = 0;
3038   unsigned OtherCount =
3039     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3040 
3041   bool OtherResLimited = false;
3042   unsigned RemLatency = 0;
3043   bool RemLatencyComputed = false;
3044   if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3045     RemLatency = computeRemLatency(CurrZone);
3046     RemLatencyComputed = true;
3047     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3048                                          OtherCount, RemLatency, false);
3049   }
3050 
3051   // Schedule aggressively for latency in PostRA mode. We don't check for
3052   // acyclic latency during PostRA, and highly out-of-order processors will
3053   // skip PostRA scheduling.
3054   if (!OtherResLimited &&
3055       (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3056                                        RemLatency))) {
3057     Policy.ReduceLatency |= true;
3058     LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
3059                       << " RemainingLatency " << RemLatency << " + "
3060                       << CurrZone.getCurrCycle() << "c > CritPath "
3061                       << Rem.CriticalPath << "\n");
3062   }
3063   // If the same resource is limiting inside and outside the zone, do nothing.
3064   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3065     return;
3066 
3067   LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3068     dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
3069            << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3070   } if (OtherResLimited) dbgs()
3071                  << "  RemainingLimit: "
3072                  << SchedModel->getResourceName(OtherCritIdx) << "\n";
3073              if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3074              << "  Latency limited both directions.\n");
3075 
3076   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3077     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3078 
3079   if (OtherResLimited)
3080     Policy.DemandResIdx = OtherCritIdx;
3081 }
3082 
3083 #ifndef NDEBUG
3084 const char *GenericSchedulerBase::getReasonStr(
3085   GenericSchedulerBase::CandReason Reason) {
3086   switch (Reason) {
3087   case NoCand:         return "NOCAND    ";
3088   case Only1:          return "ONLY1     ";
3089   case PhysReg:        return "PHYS-REG  ";
3090   case RegExcess:      return "REG-EXCESS";
3091   case RegCritical:    return "REG-CRIT  ";
3092   case Stall:          return "STALL     ";
3093   case Cluster:        return "CLUSTER   ";
3094   case Weak:           return "WEAK      ";
3095   case RegMax:         return "REG-MAX   ";
3096   case ResourceReduce: return "RES-REDUCE";
3097   case ResourceDemand: return "RES-DEMAND";
3098   case TopDepthReduce: return "TOP-DEPTH ";
3099   case TopPathReduce:  return "TOP-PATH  ";
3100   case BotHeightReduce:return "BOT-HEIGHT";
3101   case BotPathReduce:  return "BOT-PATH  ";
3102   case NextDefUse:     return "DEF-USE   ";
3103   case NodeOrder:      return "ORDER     ";
3104   };
3105   llvm_unreachable("Unknown reason!");
3106 }
3107 
3108 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3109   PressureChange P;
3110   unsigned ResIdx = 0;
3111   unsigned Latency = 0;
3112   switch (Cand.Reason) {
3113   default:
3114     break;
3115   case RegExcess:
3116     P = Cand.RPDelta.Excess;
3117     break;
3118   case RegCritical:
3119     P = Cand.RPDelta.CriticalMax;
3120     break;
3121   case RegMax:
3122     P = Cand.RPDelta.CurrentMax;
3123     break;
3124   case ResourceReduce:
3125     ResIdx = Cand.Policy.ReduceResIdx;
3126     break;
3127   case ResourceDemand:
3128     ResIdx = Cand.Policy.DemandResIdx;
3129     break;
3130   case TopDepthReduce:
3131     Latency = Cand.SU->getDepth();
3132     break;
3133   case TopPathReduce:
3134     Latency = Cand.SU->getHeight();
3135     break;
3136   case BotHeightReduce:
3137     Latency = Cand.SU->getHeight();
3138     break;
3139   case BotPathReduce:
3140     Latency = Cand.SU->getDepth();
3141     break;
3142   }
3143   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3144   if (P.isValid())
3145     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3146            << ":" << P.getUnitInc() << " ";
3147   else
3148     dbgs() << "      ";
3149   if (ResIdx)
3150     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3151   else
3152     dbgs() << "         ";
3153   if (Latency)
3154     dbgs() << " " << Latency << " cycles ";
3155   else
3156     dbgs() << "          ";
3157   dbgs() << '\n';
3158 }
3159 #endif
3160 
3161 namespace llvm {
3162 /// Return true if this heuristic determines order.
3163 /// TODO: Consider refactor return type of these functions as integer or enum,
3164 /// as we may need to differentiate whether TryCand is better than Cand.
3165 bool tryLess(int TryVal, int CandVal,
3166              GenericSchedulerBase::SchedCandidate &TryCand,
3167              GenericSchedulerBase::SchedCandidate &Cand,
3168              GenericSchedulerBase::CandReason Reason) {
3169   if (TryVal < CandVal) {
3170     TryCand.Reason = Reason;
3171     return true;
3172   }
3173   if (TryVal > CandVal) {
3174     if (Cand.Reason > Reason)
3175       Cand.Reason = Reason;
3176     return true;
3177   }
3178   return false;
3179 }
3180 
3181 bool tryGreater(int TryVal, int CandVal,
3182                 GenericSchedulerBase::SchedCandidate &TryCand,
3183                 GenericSchedulerBase::SchedCandidate &Cand,
3184                 GenericSchedulerBase::CandReason Reason) {
3185   if (TryVal > CandVal) {
3186     TryCand.Reason = Reason;
3187     return true;
3188   }
3189   if (TryVal < CandVal) {
3190     if (Cand.Reason > Reason)
3191       Cand.Reason = Reason;
3192     return true;
3193   }
3194   return false;
3195 }
3196 
3197 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3198                 GenericSchedulerBase::SchedCandidate &Cand,
3199                 SchedBoundary &Zone) {
3200   if (Zone.isTop()) {
3201     // Prefer the candidate with the lesser depth, but only if one of them has
3202     // depth greater than the total latency scheduled so far, otherwise either
3203     // of them could be scheduled now with no stall.
3204     if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3205         Zone.getScheduledLatency()) {
3206       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3207                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
3208         return true;
3209     }
3210     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3211                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
3212       return true;
3213   } else {
3214     // Prefer the candidate with the lesser height, but only if one of them has
3215     // height greater than the total latency scheduled so far, otherwise either
3216     // of them could be scheduled now with no stall.
3217     if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3218         Zone.getScheduledLatency()) {
3219       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3220                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
3221         return true;
3222     }
3223     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3224                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
3225       return true;
3226   }
3227   return false;
3228 }
3229 } // end namespace llvm
3230 
3231 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3232   LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3233                     << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3234 }
3235 
3236 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
3237   tracePick(Cand.Reason, Cand.AtTop);
3238 }
3239 
3240 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3241   assert(dag->hasVRegLiveness() &&
3242          "(PreRA)GenericScheduler needs vreg liveness");
3243   DAG = static_cast<ScheduleDAGMILive*>(dag);
3244   SchedModel = DAG->getSchedModel();
3245   TRI = DAG->TRI;
3246 
3247   if (RegionPolicy.ComputeDFSResult)
3248     DAG->computeDFSResult();
3249 
3250   Rem.init(DAG, SchedModel);
3251   Top.init(DAG, SchedModel, &Rem);
3252   Bot.init(DAG, SchedModel, &Rem);
3253 
3254   // Initialize resource counts.
3255 
3256   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3257   // are disabled, then these HazardRecs will be disabled.
3258   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3259   if (!Top.HazardRec) {
3260     Top.HazardRec =
3261         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3262             Itin, DAG);
3263   }
3264   if (!Bot.HazardRec) {
3265     Bot.HazardRec =
3266         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3267             Itin, DAG);
3268   }
3269   TopCand.SU = nullptr;
3270   BotCand.SU = nullptr;
3271 }
3272 
3273 /// Initialize the per-region scheduling policy.
3274 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3275                                   MachineBasicBlock::iterator End,
3276                                   unsigned NumRegionInstrs) {
3277   const MachineFunction &MF = *Begin->getMF();
3278   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3279 
3280   // Avoid setting up the register pressure tracker for small regions to save
3281   // compile time. As a rough heuristic, only track pressure when the number of
3282   // schedulable instructions exceeds half the integer register file.
3283   RegionPolicy.ShouldTrackPressure = true;
3284   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
3285     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3286     if (TLI->isTypeLegal(LegalIntVT)) {
3287       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3288         TLI->getRegClassFor(LegalIntVT));
3289       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3290     }
3291   }
3292 
3293   // For generic targets, we default to bottom-up, because it's simpler and more
3294   // compile-time optimizations have been implemented in that direction.
3295   RegionPolicy.OnlyBottomUp = true;
3296 
3297   // Allow the subtarget to override default policy.
3298   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3299 
3300   // After subtarget overrides, apply command line options.
3301   if (!EnableRegPressure) {
3302     RegionPolicy.ShouldTrackPressure = false;
3303     RegionPolicy.ShouldTrackLaneMasks = false;
3304   }
3305 
3306   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3307   // e.g. -misched-bottomup=false allows scheduling in both directions.
3308   assert((!ForceTopDown || !ForceBottomUp) &&
3309          "-misched-topdown incompatible with -misched-bottomup");
3310   if (ForceBottomUp.getNumOccurrences() > 0) {
3311     RegionPolicy.OnlyBottomUp = ForceBottomUp;
3312     if (RegionPolicy.OnlyBottomUp)
3313       RegionPolicy.OnlyTopDown = false;
3314   }
3315   if (ForceTopDown.getNumOccurrences() > 0) {
3316     RegionPolicy.OnlyTopDown = ForceTopDown;
3317     if (RegionPolicy.OnlyTopDown)
3318       RegionPolicy.OnlyBottomUp = false;
3319   }
3320 }
3321 
3322 void GenericScheduler::dumpPolicy() const {
3323   // Cannot completely remove virtual function even in release mode.
3324 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3325   dbgs() << "GenericScheduler RegionPolicy: "
3326          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3327          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3328          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3329          << "\n";
3330 #endif
3331 }
3332 
3333 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3334 /// critical path by more cycles than it takes to drain the instruction buffer.
3335 /// We estimate an upper bounds on in-flight instructions as:
3336 ///
3337 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3338 /// InFlightIterations = AcyclicPath / CyclesPerIteration
3339 /// InFlightResources = InFlightIterations * LoopResources
3340 ///
3341 /// TODO: Check execution resources in addition to IssueCount.
3342 void GenericScheduler::checkAcyclicLatency() {
3343   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3344     return;
3345 
3346   // Scaled number of cycles per loop iteration.
3347   unsigned IterCount =
3348     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3349              Rem.RemIssueCount);
3350   // Scaled acyclic critical path.
3351   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3352   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3353   unsigned InFlightCount =
3354     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3355   unsigned BufferLimit =
3356     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3357 
3358   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3359 
3360   LLVM_DEBUG(
3361       dbgs() << "IssueCycles="
3362              << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3363              << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3364              << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3365              << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3366              << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3367       if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
3368 }
3369 
3370 void GenericScheduler::registerRoots() {
3371   Rem.CriticalPath = DAG->ExitSU.getDepth();
3372 
3373   // Some roots may not feed into ExitSU. Check all of them in case.
3374   for (const SUnit *SU : Bot.Available) {
3375     if (SU->getDepth() > Rem.CriticalPath)
3376       Rem.CriticalPath = SU->getDepth();
3377   }
3378   LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3379   if (DumpCriticalPathLength) {
3380     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3381   }
3382 
3383   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3384     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3385     checkAcyclicLatency();
3386   }
3387 }
3388 
3389 namespace llvm {
3390 bool tryPressure(const PressureChange &TryP,
3391                  const PressureChange &CandP,
3392                  GenericSchedulerBase::SchedCandidate &TryCand,
3393                  GenericSchedulerBase::SchedCandidate &Cand,
3394                  GenericSchedulerBase::CandReason Reason,
3395                  const TargetRegisterInfo *TRI,
3396                  const MachineFunction &MF) {
3397   // If one candidate decreases and the other increases, go with it.
3398   // Invalid candidates have UnitInc==0.
3399   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3400                  Reason)) {
3401     return true;
3402   }
3403   // Do not compare the magnitude of pressure changes between top and bottom
3404   // boundary.
3405   if (Cand.AtTop != TryCand.AtTop)
3406     return false;
3407 
3408   // If both candidates affect the same set in the same boundary, go with the
3409   // smallest increase.
3410   unsigned TryPSet = TryP.getPSetOrMax();
3411   unsigned CandPSet = CandP.getPSetOrMax();
3412   if (TryPSet == CandPSet) {
3413     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3414                    Reason);
3415   }
3416 
3417   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3418                                  std::numeric_limits<int>::max();
3419 
3420   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3421                                    std::numeric_limits<int>::max();
3422 
3423   // If the candidates are decreasing pressure, reverse priority.
3424   if (TryP.getUnitInc() < 0)
3425     std::swap(TryRank, CandRank);
3426   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3427 }
3428 
3429 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3430   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3431 }
3432 
3433 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3434 /// their physreg def/use.
3435 ///
3436 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3437 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3438 /// with the operation that produces or consumes the physreg. We'll do this when
3439 /// regalloc has support for parallel copies.
3440 int biasPhysReg(const SUnit *SU, bool isTop) {
3441   const MachineInstr *MI = SU->getInstr();
3442 
3443   if (MI->isCopy()) {
3444     unsigned ScheduledOper = isTop ? 1 : 0;
3445     unsigned UnscheduledOper = isTop ? 0 : 1;
3446     // If we have already scheduled the physreg produce/consumer, immediately
3447     // schedule the copy.
3448     if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3449       return 1;
3450     // If the physreg is at the boundary, defer it. Otherwise schedule it
3451     // immediately to free the dependent. We can hoist the copy later.
3452     bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3453     if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3454       return AtBoundary ? -1 : 1;
3455   }
3456 
3457   if (MI->isMoveImmediate()) {
3458     // If we have a move immediate and all successors have been assigned, bias
3459     // towards scheduling this later. Make sure all register defs are to
3460     // physical registers.
3461     bool DoBias = true;
3462     for (const MachineOperand &Op : MI->defs()) {
3463       if (Op.isReg() && !Op.getReg().isPhysical()) {
3464         DoBias = false;
3465         break;
3466       }
3467     }
3468 
3469     if (DoBias)
3470       return isTop ? -1 : 1;
3471   }
3472 
3473   return 0;
3474 }
3475 } // end namespace llvm
3476 
3477 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3478                                      bool AtTop,
3479                                      const RegPressureTracker &RPTracker,
3480                                      RegPressureTracker &TempTracker) {
3481   Cand.SU = SU;
3482   Cand.AtTop = AtTop;
3483   if (DAG->isTrackingPressure()) {
3484     if (AtTop) {
3485       TempTracker.getMaxDownwardPressureDelta(
3486         Cand.SU->getInstr(),
3487         Cand.RPDelta,
3488         DAG->getRegionCriticalPSets(),
3489         DAG->getRegPressure().MaxSetPressure);
3490     } else {
3491       if (VerifyScheduling) {
3492         TempTracker.getMaxUpwardPressureDelta(
3493           Cand.SU->getInstr(),
3494           &DAG->getPressureDiff(Cand.SU),
3495           Cand.RPDelta,
3496           DAG->getRegionCriticalPSets(),
3497           DAG->getRegPressure().MaxSetPressure);
3498       } else {
3499         RPTracker.getUpwardPressureDelta(
3500           Cand.SU->getInstr(),
3501           DAG->getPressureDiff(Cand.SU),
3502           Cand.RPDelta,
3503           DAG->getRegionCriticalPSets(),
3504           DAG->getRegPressure().MaxSetPressure);
3505       }
3506     }
3507   }
3508   LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3509              << "  Try  SU(" << Cand.SU->NodeNum << ") "
3510              << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3511              << Cand.RPDelta.Excess.getUnitInc() << "\n");
3512 }
3513 
3514 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3515 /// hierarchical. This may be more efficient than a graduated cost model because
3516 /// we don't need to evaluate all aspects of the model for each node in the
3517 /// queue. But it's really done to make the heuristics easier to debug and
3518 /// statistically analyze.
3519 ///
3520 /// \param Cand provides the policy and current best candidate.
3521 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3522 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3523 ///             if Cand is from a different zone than TryCand.
3524 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3525 bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3526                                     SchedCandidate &TryCand,
3527                                     SchedBoundary *Zone) const {
3528   // Initialize the candidate if needed.
3529   if (!Cand.isValid()) {
3530     TryCand.Reason = NodeOrder;
3531     return true;
3532   }
3533 
3534   // Bias PhysReg Defs and copies to their uses and defined respectively.
3535   if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3536                  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3537     return TryCand.Reason != NoCand;
3538 
3539   // Avoid exceeding the target's limit.
3540   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3541                                                Cand.RPDelta.Excess,
3542                                                TryCand, Cand, RegExcess, TRI,
3543                                                DAG->MF))
3544     return TryCand.Reason != NoCand;
3545 
3546   // Avoid increasing the max critical pressure in the scheduled region.
3547   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3548                                                Cand.RPDelta.CriticalMax,
3549                                                TryCand, Cand, RegCritical, TRI,
3550                                                DAG->MF))
3551     return TryCand.Reason != NoCand;
3552 
3553   // We only compare a subset of features when comparing nodes between
3554   // Top and Bottom boundary. Some properties are simply incomparable, in many
3555   // other instances we should only override the other boundary if something
3556   // is a clear good pick on one boundary. Skip heuristics that are more
3557   // "tie-breaking" in nature.
3558   bool SameBoundary = Zone != nullptr;
3559   if (SameBoundary) {
3560     // For loops that are acyclic path limited, aggressively schedule for
3561     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3562     // heuristics to take precedence.
3563     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3564         tryLatency(TryCand, Cand, *Zone))
3565       return TryCand.Reason != NoCand;
3566 
3567     // Prioritize instructions that read unbuffered resources by stall cycles.
3568     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3569                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3570       return TryCand.Reason != NoCand;
3571   }
3572 
3573   // Keep clustered nodes together to encourage downstream peephole
3574   // optimizations which may reduce resource requirements.
3575   //
3576   // This is a best effort to set things up for a post-RA pass. Optimizations
3577   // like generating loads of multiple registers should ideally be done within
3578   // the scheduler pass by combining the loads during DAG postprocessing.
3579   const SUnit *CandNextClusterSU =
3580     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3581   const SUnit *TryCandNextClusterSU =
3582     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3583   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3584                  Cand.SU == CandNextClusterSU,
3585                  TryCand, Cand, Cluster))
3586     return TryCand.Reason != NoCand;
3587 
3588   if (SameBoundary) {
3589     // Weak edges are for clustering and other constraints.
3590     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3591                 getWeakLeft(Cand.SU, Cand.AtTop),
3592                 TryCand, Cand, Weak))
3593       return TryCand.Reason != NoCand;
3594   }
3595 
3596   // Avoid increasing the max pressure of the entire region.
3597   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3598                                                Cand.RPDelta.CurrentMax,
3599                                                TryCand, Cand, RegMax, TRI,
3600                                                DAG->MF))
3601     return TryCand.Reason != NoCand;
3602 
3603   if (SameBoundary) {
3604     // Avoid critical resource consumption and balance the schedule.
3605     TryCand.initResourceDelta(DAG, SchedModel);
3606     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3607                 TryCand, Cand, ResourceReduce))
3608       return TryCand.Reason != NoCand;
3609     if (tryGreater(TryCand.ResDelta.DemandedResources,
3610                    Cand.ResDelta.DemandedResources,
3611                    TryCand, Cand, ResourceDemand))
3612       return TryCand.Reason != NoCand;
3613 
3614     // Avoid serializing long latency dependence chains.
3615     // For acyclic path limited loops, latency was already checked above.
3616     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3617         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3618       return TryCand.Reason != NoCand;
3619 
3620     // Fall through to original instruction order.
3621     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3622         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3623       TryCand.Reason = NodeOrder;
3624       return true;
3625     }
3626   }
3627 
3628   return false;
3629 }
3630 
3631 /// Pick the best candidate from the queue.
3632 ///
3633 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3634 /// DAG building. To adjust for the current scheduling location we need to
3635 /// maintain the number of vreg uses remaining to be top-scheduled.
3636 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3637                                          const CandPolicy &ZonePolicy,
3638                                          const RegPressureTracker &RPTracker,
3639                                          SchedCandidate &Cand) {
3640   // getMaxPressureDelta temporarily modifies the tracker.
3641   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3642 
3643   ReadyQueue &Q = Zone.Available;
3644   for (SUnit *SU : Q) {
3645 
3646     SchedCandidate TryCand(ZonePolicy);
3647     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3648     // Pass SchedBoundary only when comparing nodes from the same boundary.
3649     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3650     if (tryCandidate(Cand, TryCand, ZoneArg)) {
3651       // Initialize resource delta if needed in case future heuristics query it.
3652       if (TryCand.ResDelta == SchedResourceDelta())
3653         TryCand.initResourceDelta(DAG, SchedModel);
3654       Cand.setBest(TryCand);
3655       LLVM_DEBUG(traceCandidate(Cand));
3656     }
3657   }
3658 }
3659 
3660 /// Pick the best candidate node from either the top or bottom queue.
3661 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3662   // Schedule as far as possible in the direction of no choice. This is most
3663   // efficient, but also provides the best heuristics for CriticalPSets.
3664   if (SUnit *SU = Bot.pickOnlyChoice()) {
3665     IsTopNode = false;
3666     tracePick(Only1, false);
3667     return SU;
3668   }
3669   if (SUnit *SU = Top.pickOnlyChoice()) {
3670     IsTopNode = true;
3671     tracePick(Only1, true);
3672     return SU;
3673   }
3674   // Set the bottom-up policy based on the state of the current bottom zone and
3675   // the instructions outside the zone, including the top zone.
3676   CandPolicy BotPolicy;
3677   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3678   // Set the top-down policy based on the state of the current top zone and
3679   // the instructions outside the zone, including the bottom zone.
3680   CandPolicy TopPolicy;
3681   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3682 
3683   // See if BotCand is still valid (because we previously scheduled from Top).
3684   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3685   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3686       BotCand.Policy != BotPolicy) {
3687     BotCand.reset(CandPolicy());
3688     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3689     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3690   } else {
3691     LLVM_DEBUG(traceCandidate(BotCand));
3692 #ifndef NDEBUG
3693     if (VerifyScheduling) {
3694       SchedCandidate TCand;
3695       TCand.reset(CandPolicy());
3696       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3697       assert(TCand.SU == BotCand.SU &&
3698              "Last pick result should correspond to re-picking right now");
3699     }
3700 #endif
3701   }
3702 
3703   // Check if the top Q has a better candidate.
3704   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3705   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3706       TopCand.Policy != TopPolicy) {
3707     TopCand.reset(CandPolicy());
3708     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3709     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3710   } else {
3711     LLVM_DEBUG(traceCandidate(TopCand));
3712 #ifndef NDEBUG
3713     if (VerifyScheduling) {
3714       SchedCandidate TCand;
3715       TCand.reset(CandPolicy());
3716       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3717       assert(TCand.SU == TopCand.SU &&
3718            "Last pick result should correspond to re-picking right now");
3719     }
3720 #endif
3721   }
3722 
3723   // Pick best from BotCand and TopCand.
3724   assert(BotCand.isValid());
3725   assert(TopCand.isValid());
3726   SchedCandidate Cand = BotCand;
3727   TopCand.Reason = NoCand;
3728   if (tryCandidate(Cand, TopCand, nullptr)) {
3729     Cand.setBest(TopCand);
3730     LLVM_DEBUG(traceCandidate(Cand));
3731   }
3732 
3733   IsTopNode = Cand.AtTop;
3734   tracePick(Cand);
3735   return Cand.SU;
3736 }
3737 
3738 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3739 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3740   if (DAG->top() == DAG->bottom()) {
3741     assert(Top.Available.empty() && Top.Pending.empty() &&
3742            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3743     return nullptr;
3744   }
3745   SUnit *SU;
3746   do {
3747     if (RegionPolicy.OnlyTopDown) {
3748       SU = Top.pickOnlyChoice();
3749       if (!SU) {
3750         CandPolicy NoPolicy;
3751         TopCand.reset(NoPolicy);
3752         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3753         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3754         tracePick(TopCand);
3755         SU = TopCand.SU;
3756       }
3757       IsTopNode = true;
3758     } else if (RegionPolicy.OnlyBottomUp) {
3759       SU = Bot.pickOnlyChoice();
3760       if (!SU) {
3761         CandPolicy NoPolicy;
3762         BotCand.reset(NoPolicy);
3763         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3764         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3765         tracePick(BotCand);
3766         SU = BotCand.SU;
3767       }
3768       IsTopNode = false;
3769     } else {
3770       SU = pickNodeBidirectional(IsTopNode);
3771     }
3772   } while (SU->isScheduled);
3773 
3774   if (SU->isTopReady())
3775     Top.removeReady(SU);
3776   if (SU->isBottomReady())
3777     Bot.removeReady(SU);
3778 
3779   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3780                     << *SU->getInstr());
3781   return SU;
3782 }
3783 
3784 void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3785   MachineBasicBlock::iterator InsertPos = SU->getInstr();
3786   if (!isTop)
3787     ++InsertPos;
3788   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3789 
3790   // Find already scheduled copies with a single physreg dependence and move
3791   // them just above the scheduled instruction.
3792   for (SDep &Dep : Deps) {
3793     if (Dep.getKind() != SDep::Data ||
3794         !Register::isPhysicalRegister(Dep.getReg()))
3795       continue;
3796     SUnit *DepSU = Dep.getSUnit();
3797     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3798       continue;
3799     MachineInstr *Copy = DepSU->getInstr();
3800     if (!Copy->isCopy() && !Copy->isMoveImmediate())
3801       continue;
3802     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3803                DAG->dumpNode(*Dep.getSUnit()));
3804     DAG->moveInstruction(Copy, InsertPos);
3805   }
3806 }
3807 
3808 /// Update the scheduler's state after scheduling a node. This is the same node
3809 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3810 /// update it's state based on the current cycle before MachineSchedStrategy
3811 /// does.
3812 ///
3813 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3814 /// them here. See comments in biasPhysReg.
3815 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3816   if (IsTopNode) {
3817     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3818     Top.bumpNode(SU);
3819     if (SU->hasPhysRegUses)
3820       reschedulePhysReg(SU, true);
3821   } else {
3822     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3823     Bot.bumpNode(SU);
3824     if (SU->hasPhysRegDefs)
3825       reschedulePhysReg(SU, false);
3826   }
3827 }
3828 
3829 /// Create the standard converging machine scheduler. This will be used as the
3830 /// default scheduler if the target does not set a default.
3831 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3832   ScheduleDAGMILive *DAG =
3833       new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3834   // Register DAG post-processors.
3835   //
3836   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3837   // data and pass it to later mutations. Have a single mutation that gathers
3838   // the interesting nodes in one pass.
3839   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3840 
3841   const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3842   // Add MacroFusion mutation if fusions are not empty.
3843   const auto &MacroFusions = STI.getMacroFusions();
3844   if (!MacroFusions.empty())
3845     DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3846   return DAG;
3847 }
3848 
3849 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
3850   return createGenericSchedLive(C);
3851 }
3852 
3853 static MachineSchedRegistry
3854 GenericSchedRegistry("converge", "Standard converging scheduler.",
3855                      createConvergingSched);
3856 
3857 //===----------------------------------------------------------------------===//
3858 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3859 //===----------------------------------------------------------------------===//
3860 
3861 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3862   DAG = Dag;
3863   SchedModel = DAG->getSchedModel();
3864   TRI = DAG->TRI;
3865 
3866   Rem.init(DAG, SchedModel);
3867   Top.init(DAG, SchedModel, &Rem);
3868   Bot.init(DAG, SchedModel, &Rem);
3869 
3870   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3871   // or are disabled, then these HazardRecs will be disabled.
3872   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3873   if (!Top.HazardRec) {
3874     Top.HazardRec =
3875         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3876             Itin, DAG);
3877   }
3878   if (!Bot.HazardRec) {
3879     Bot.HazardRec =
3880         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3881             Itin, DAG);
3882   }
3883 }
3884 
3885 void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3886                                       MachineBasicBlock::iterator End,
3887                                       unsigned NumRegionInstrs) {
3888   if (PostRADirection == MISchedPostRASched::TopDown) {
3889     RegionPolicy.OnlyTopDown = true;
3890     RegionPolicy.OnlyBottomUp = false;
3891   } else if (PostRADirection == MISchedPostRASched::BottomUp) {
3892     RegionPolicy.OnlyTopDown = false;
3893     RegionPolicy.OnlyBottomUp = true;
3894   }
3895 }
3896 
3897 void PostGenericScheduler::registerRoots() {
3898   Rem.CriticalPath = DAG->ExitSU.getDepth();
3899 
3900   // Some roots may not feed into ExitSU. Check all of them in case.
3901   for (const SUnit *SU : Bot.Available) {
3902     if (SU->getDepth() > Rem.CriticalPath)
3903       Rem.CriticalPath = SU->getDepth();
3904   }
3905   LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3906   if (DumpCriticalPathLength) {
3907     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3908   }
3909 }
3910 
3911 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3912 ///
3913 /// \param Cand provides the policy and current best candidate.
3914 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3915 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3916 bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3917                                         SchedCandidate &TryCand) {
3918   // Initialize the candidate if needed.
3919   if (!Cand.isValid()) {
3920     TryCand.Reason = NodeOrder;
3921     return true;
3922   }
3923 
3924   // Prioritize instructions that read unbuffered resources by stall cycles.
3925   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3926               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3927     return TryCand.Reason != NoCand;
3928 
3929   // Keep clustered nodes together.
3930   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3931                  Cand.SU == DAG->getNextClusterSucc(),
3932                  TryCand, Cand, Cluster))
3933     return TryCand.Reason != NoCand;
3934 
3935   // Avoid critical resource consumption and balance the schedule.
3936   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3937               TryCand, Cand, ResourceReduce))
3938     return TryCand.Reason != NoCand;
3939   if (tryGreater(TryCand.ResDelta.DemandedResources,
3940                  Cand.ResDelta.DemandedResources,
3941                  TryCand, Cand, ResourceDemand))
3942     return TryCand.Reason != NoCand;
3943 
3944   // Avoid serializing long latency dependence chains.
3945   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3946     return TryCand.Reason != NoCand;
3947   }
3948 
3949   // Fall through to original instruction order.
3950   if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3951     TryCand.Reason = NodeOrder;
3952     return true;
3953   }
3954 
3955   return false;
3956 }
3957 
3958 void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3959                                              SchedCandidate &Cand) {
3960   ReadyQueue &Q = Zone.Available;
3961   for (SUnit *SU : Q) {
3962     SchedCandidate TryCand(Cand.Policy);
3963     TryCand.SU = SU;
3964     TryCand.AtTop = Zone.isTop();
3965     TryCand.initResourceDelta(DAG, SchedModel);
3966     if (tryCandidate(Cand, TryCand)) {
3967       Cand.setBest(TryCand);
3968       LLVM_DEBUG(traceCandidate(Cand));
3969     }
3970   }
3971 }
3972 
3973 /// Pick the next node to schedule.
3974 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3975   if (DAG->top() == DAG->bottom()) {
3976     assert(Top.Available.empty() && Top.Pending.empty() &&
3977            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3978     return nullptr;
3979   }
3980   SUnit *SU;
3981   do {
3982     if (RegionPolicy.OnlyBottomUp) {
3983       assert(!RegionPolicy.OnlyTopDown);
3984       SU = Bot.pickOnlyChoice();
3985       if (SU) {
3986         tracePick(Only1, true);
3987       } else {
3988         CandPolicy NoPolicy;
3989         SchedCandidate BotCand(NoPolicy);
3990         // Set the bottom-up policy based on the state of the current bottom
3991         // zone and the instructions outside the zone, including the top zone.
3992         setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
3993         pickNodeFromQueue(Bot, BotCand);
3994         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3995         tracePick(BotCand);
3996         SU = BotCand.SU;
3997       }
3998       IsTopNode = false;
3999     } else {
4000 
4001       assert(RegionPolicy.OnlyTopDown);
4002       SU = Top.pickOnlyChoice();
4003       if (SU) {
4004         tracePick(Only1, true);
4005       } else {
4006         CandPolicy NoPolicy;
4007         SchedCandidate TopCand(NoPolicy);
4008         // Set the top-down policy based on the state of the current top zone
4009         // and the instructions outside the zone, including the bottom zone.
4010         setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
4011         pickNodeFromQueue(Top, TopCand);
4012         assert(TopCand.Reason != NoCand && "failed to find a candidate");
4013         tracePick(TopCand);
4014         SU = TopCand.SU;
4015       }
4016       IsTopNode = true;
4017     }
4018   } while (SU->isScheduled);
4019 
4020   if (SU->isTopReady())
4021     Top.removeReady(SU);
4022   if (SU->isBottomReady())
4023     Bot.removeReady(SU);
4024 
4025   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4026                     << *SU->getInstr());
4027   return SU;
4028 }
4029 
4030 /// Called after ScheduleDAGMI has scheduled an instruction and updated
4031 /// scheduled/remaining flags in the DAG nodes.
4032 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4033   if (IsTopNode) {
4034     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4035     Top.bumpNode(SU);
4036   } else {
4037     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4038     Bot.bumpNode(SU);
4039   }
4040 }
4041 
4042 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
4043   ScheduleDAGMI *DAG =
4044       new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
4045                         /*RemoveKillFlags=*/true);
4046   const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4047   // Add MacroFusion mutation if fusions are not empty.
4048   const auto &MacroFusions = STI.getMacroFusions();
4049   if (!MacroFusions.empty())
4050     DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4051   return DAG;
4052 }
4053 
4054 //===----------------------------------------------------------------------===//
4055 // ILP Scheduler. Currently for experimental analysis of heuristics.
4056 //===----------------------------------------------------------------------===//
4057 
4058 namespace {
4059 
4060 /// Order nodes by the ILP metric.
4061 struct ILPOrder {
4062   const SchedDFSResult *DFSResult = nullptr;
4063   const BitVector *ScheduledTrees = nullptr;
4064   bool MaximizeILP;
4065 
4066   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4067 
4068   /// Apply a less-than relation on node priority.
4069   ///
4070   /// (Return true if A comes after B in the Q.)
4071   bool operator()(const SUnit *A, const SUnit *B) const {
4072     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4073     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4074     if (SchedTreeA != SchedTreeB) {
4075       // Unscheduled trees have lower priority.
4076       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4077         return ScheduledTrees->test(SchedTreeB);
4078 
4079       // Trees with shallower connections have lower priority.
4080       if (DFSResult->getSubtreeLevel(SchedTreeA)
4081           != DFSResult->getSubtreeLevel(SchedTreeB)) {
4082         return DFSResult->getSubtreeLevel(SchedTreeA)
4083           < DFSResult->getSubtreeLevel(SchedTreeB);
4084       }
4085     }
4086     if (MaximizeILP)
4087       return DFSResult->getILP(A) < DFSResult->getILP(B);
4088     else
4089       return DFSResult->getILP(A) > DFSResult->getILP(B);
4090   }
4091 };
4092 
4093 /// Schedule based on the ILP metric.
4094 class ILPScheduler : public MachineSchedStrategy {
4095   ScheduleDAGMILive *DAG = nullptr;
4096   ILPOrder Cmp;
4097 
4098   std::vector<SUnit*> ReadyQ;
4099 
4100 public:
4101   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4102 
4103   void initialize(ScheduleDAGMI *dag) override {
4104     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4105     DAG = static_cast<ScheduleDAGMILive*>(dag);
4106     DAG->computeDFSResult();
4107     Cmp.DFSResult = DAG->getDFSResult();
4108     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4109     ReadyQ.clear();
4110   }
4111 
4112   void registerRoots() override {
4113     // Restore the heap in ReadyQ with the updated DFS results.
4114     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4115   }
4116 
4117   /// Implement MachineSchedStrategy interface.
4118   /// -----------------------------------------
4119 
4120   /// Callback to select the highest priority node from the ready Q.
4121   SUnit *pickNode(bool &IsTopNode) override {
4122     if (ReadyQ.empty()) return nullptr;
4123     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4124     SUnit *SU = ReadyQ.back();
4125     ReadyQ.pop_back();
4126     IsTopNode = false;
4127     LLVM_DEBUG(dbgs() << "Pick node "
4128                       << "SU(" << SU->NodeNum << ") "
4129                       << " ILP: " << DAG->getDFSResult()->getILP(SU)
4130                       << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4131                       << " @"
4132                       << DAG->getDFSResult()->getSubtreeLevel(
4133                              DAG->getDFSResult()->getSubtreeID(SU))
4134                       << '\n'
4135                       << "Scheduling " << *SU->getInstr());
4136     return SU;
4137   }
4138 
4139   /// Scheduler callback to notify that a new subtree is scheduled.
4140   void scheduleTree(unsigned SubtreeID) override {
4141     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4142   }
4143 
4144   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4145   /// DFSResults, and resort the priority Q.
4146   void schedNode(SUnit *SU, bool IsTopNode) override {
4147     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4148   }
4149 
4150   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4151 
4152   void releaseBottomNode(SUnit *SU) override {
4153     ReadyQ.push_back(SU);
4154     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4155   }
4156 };
4157 
4158 } // end anonymous namespace
4159 
4160 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
4161   return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
4162 }
4163 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
4164   return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
4165 }
4166 
4167 static MachineSchedRegistry ILPMaxRegistry(
4168   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4169 static MachineSchedRegistry ILPMinRegistry(
4170   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4171 
4172 //===----------------------------------------------------------------------===//
4173 // Machine Instruction Shuffler for Correctness Testing
4174 //===----------------------------------------------------------------------===//
4175 
4176 #ifndef NDEBUG
4177 namespace {
4178 
4179 /// Apply a less-than relation on the node order, which corresponds to the
4180 /// instruction order prior to scheduling. IsReverse implements greater-than.
4181 template<bool IsReverse>
4182 struct SUnitOrder {
4183   bool operator()(SUnit *A, SUnit *B) const {
4184     if (IsReverse)
4185       return A->NodeNum > B->NodeNum;
4186     else
4187       return A->NodeNum < B->NodeNum;
4188   }
4189 };
4190 
4191 /// Reorder instructions as much as possible.
4192 class InstructionShuffler : public MachineSchedStrategy {
4193   bool IsAlternating;
4194   bool IsTopDown;
4195 
4196   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4197   // gives nodes with a higher number higher priority causing the latest
4198   // instructions to be scheduled first.
4199   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4200     TopQ;
4201 
4202   // When scheduling bottom-up, use greater-than as the queue priority.
4203   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4204     BottomQ;
4205 
4206 public:
4207   InstructionShuffler(bool alternate, bool topdown)
4208     : IsAlternating(alternate), IsTopDown(topdown) {}
4209 
4210   void initialize(ScheduleDAGMI*) override {
4211     TopQ.clear();
4212     BottomQ.clear();
4213   }
4214 
4215   /// Implement MachineSchedStrategy interface.
4216   /// -----------------------------------------
4217 
4218   SUnit *pickNode(bool &IsTopNode) override {
4219     SUnit *SU;
4220     if (IsTopDown) {
4221       do {
4222         if (TopQ.empty()) return nullptr;
4223         SU = TopQ.top();
4224         TopQ.pop();
4225       } while (SU->isScheduled);
4226       IsTopNode = true;
4227     } else {
4228       do {
4229         if (BottomQ.empty()) return nullptr;
4230         SU = BottomQ.top();
4231         BottomQ.pop();
4232       } while (SU->isScheduled);
4233       IsTopNode = false;
4234     }
4235     if (IsAlternating)
4236       IsTopDown = !IsTopDown;
4237     return SU;
4238   }
4239 
4240   void schedNode(SUnit *SU, bool IsTopNode) override {}
4241 
4242   void releaseTopNode(SUnit *SU) override {
4243     TopQ.push(SU);
4244   }
4245   void releaseBottomNode(SUnit *SU) override {
4246     BottomQ.push(SU);
4247   }
4248 };
4249 
4250 } // end anonymous namespace
4251 
4252 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
4253   bool Alternate = !ForceTopDown && !ForceBottomUp;
4254   bool TopDown = !ForceBottomUp;
4255   assert((TopDown || !ForceTopDown) &&
4256          "-misched-topdown incompatible with -misched-bottomup");
4257   return new ScheduleDAGMILive(
4258       C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4259 }
4260 
4261 static MachineSchedRegistry ShufflerRegistry(
4262   "shuffle", "Shuffle machine instructions alternating directions",
4263   createInstructionShuffler);
4264 #endif // !NDEBUG
4265 
4266 //===----------------------------------------------------------------------===//
4267 // GraphWriter support for ScheduleDAGMILive.
4268 //===----------------------------------------------------------------------===//
4269 
4270 #ifndef NDEBUG
4271 namespace llvm {
4272 
4273 template<> struct GraphTraits<
4274   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
4275 
4276 template<>
4277 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
4278   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
4279 
4280   static std::string getGraphName(const ScheduleDAG *G) {
4281     return std::string(G->MF.getName());
4282   }
4283 
4284   static bool renderGraphFromBottomUp() {
4285     return true;
4286   }
4287 
4288   static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4289     if (ViewMISchedCutoff == 0)
4290       return false;
4291     return (Node->Preds.size() > ViewMISchedCutoff
4292          || Node->Succs.size() > ViewMISchedCutoff);
4293   }
4294 
4295   /// If you want to override the dot attributes printed for a particular
4296   /// edge, override this method.
4297   static std::string getEdgeAttributes(const SUnit *Node,
4298                                        SUnitIterator EI,
4299                                        const ScheduleDAG *Graph) {
4300     if (EI.isArtificialDep())
4301       return "color=cyan,style=dashed";
4302     if (EI.isCtrlDep())
4303       return "color=blue,style=dashed";
4304     return "";
4305   }
4306 
4307   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4308     std::string Str;
4309     raw_string_ostream SS(Str);
4310     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4311     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4312       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4313     SS << "SU:" << SU->NodeNum;
4314     if (DFS)
4315       SS << " I:" << DFS->getNumInstrs(SU);
4316     return SS.str();
4317   }
4318 
4319   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4320     return G->getGraphNodeLabel(SU);
4321   }
4322 
4323   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4324     std::string Str("shape=Mrecord");
4325     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4326     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4327       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4328     if (DFS) {
4329       Str += ",style=filled,fillcolor=\"#";
4330       Str += DOT::getColorString(DFS->getSubtreeID(N));
4331       Str += '"';
4332     }
4333     return Str;
4334   }
4335 };
4336 
4337 } // end namespace llvm
4338 #endif // NDEBUG
4339 
4340 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4341 /// rendered using 'dot'.
4342 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4343 #ifndef NDEBUG
4344   ViewGraph(this, Name, false, Title);
4345 #else
4346   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4347          << "systems with Graphviz or gv!\n";
4348 #endif  // NDEBUG
4349 }
4350 
4351 /// Out-of-line implementation with no arguments is handy for gdb.
4352 void ScheduleDAGMI::viewGraph() {
4353   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4354 }
4355 
4356 /// Sort predicate for the intervals stored in an instance of
4357 /// ResourceSegments. Intervals are always disjoint (no intersection
4358 /// for any pairs of intervals), therefore we can sort the totality of
4359 /// the intervals by looking only at the left boundary.
4360 static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4361                           const ResourceSegments::IntervalTy &B) {
4362   return A.first < B.first;
4363 }
4364 
4365 unsigned ResourceSegments::getFirstAvailableAt(
4366     unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4367     std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4368         IntervalBuilder) const {
4369   assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4370                         sortIntervals) &&
4371          "Cannot execute on an un-sorted set of intervals.");
4372 
4373   // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4374   // a ResourceSegment interval for that situation.
4375   if (AcquireAtCycle == ReleaseAtCycle)
4376     return CurrCycle;
4377 
4378   unsigned RetCycle = CurrCycle;
4379   ResourceSegments::IntervalTy NewInterval =
4380       IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4381   for (auto &Interval : _Intervals) {
4382     if (!intersects(NewInterval, Interval))
4383       continue;
4384 
4385     // Move the interval right next to the top of the one it
4386     // intersects.
4387     assert(Interval.second > NewInterval.first &&
4388            "Invalid intervals configuration.");
4389     RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4390     NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4391   }
4392   return RetCycle;
4393 }
4394 
4395 void ResourceSegments::add(ResourceSegments::IntervalTy A,
4396                            const unsigned CutOff) {
4397   assert(A.first <= A.second && "Cannot add negative resource usage");
4398   assert(CutOff > 0 && "0-size interval history has no use.");
4399   // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4400   // instruction needed the resource to be available but does not use it.
4401   // However, ResourceSegment represents an interval that is closed on the left
4402   // and open on the right. It is impossible to represent an empty interval when
4403   // the left is closed. Do not add it to Intervals.
4404   if (A.first == A.second)
4405     return;
4406 
4407   assert(all_of(_Intervals,
4408                 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4409                   return !intersects(A, Interval);
4410                 }) &&
4411          "A resource is being overwritten");
4412   _Intervals.push_back(A);
4413 
4414   sortAndMerge();
4415 
4416   // Do not keep the full history of the intervals, just the
4417   // latest #CutOff.
4418   while (_Intervals.size() > CutOff)
4419     _Intervals.pop_front();
4420 }
4421 
4422 bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4423                                   ResourceSegments::IntervalTy B) {
4424   assert(A.first <= A.second && "Invalid interval");
4425   assert(B.first <= B.second && "Invalid interval");
4426 
4427   // Share one boundary.
4428   if ((A.first == B.first) || (A.second == B.second))
4429     return true;
4430 
4431   // full intersersect: [    ***     )  B
4432   //                        [***)       A
4433   if ((A.first > B.first) && (A.second < B.second))
4434     return true;
4435 
4436   // right intersect: [     ***)        B
4437   //                       [***      )  A
4438   if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4439     return true;
4440 
4441   // left intersect:      [***      )  B
4442   //                 [     ***)        A
4443   if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4444     return true;
4445 
4446   return false;
4447 }
4448 
4449 void ResourceSegments::sortAndMerge() {
4450   if (_Intervals.size() <= 1)
4451     return;
4452 
4453   // First sort the collection.
4454   _Intervals.sort(sortIntervals);
4455 
4456   // can use next because I have at least 2 elements in the list
4457   auto next = std::next(std::begin(_Intervals));
4458   auto E = std::end(_Intervals);
4459   for (; next != E; ++next) {
4460     if (std::prev(next)->second >= next->first) {
4461       next->first = std::prev(next)->first;
4462       _Intervals.erase(std::prev(next));
4463       continue;
4464     }
4465   }
4466 }
4467