xref: /llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp (revision 62c0bdabfa32f6e32b17b45425b15309db340dfb)
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/MachineScheduler.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/LiveInterval.h"
25 #include "llvm/CodeGen/LiveIntervals.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineLoopInfo.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachinePassRegistry.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/MachineValueType.h"
36 #include "llvm/CodeGen/RegisterClassInfo.h"
37 #include "llvm/CodeGen/RegisterPressure.h"
38 #include "llvm/CodeGen/ScheduleDAG.h"
39 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
40 #include "llvm/CodeGen/ScheduleDAGMutation.h"
41 #include "llvm/CodeGen/ScheduleDFS.h"
42 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
43 #include "llvm/CodeGen/SlotIndexes.h"
44 #include "llvm/CodeGen/TargetFrameLowering.h"
45 #include "llvm/CodeGen/TargetInstrInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetPassConfig.h"
48 #include "llvm/CodeGen/TargetRegisterInfo.h"
49 #include "llvm/CodeGen/TargetSchedule.h"
50 #include "llvm/CodeGen/TargetSubtargetInfo.h"
51 #include "llvm/Config/llvm-config.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/MC/LaneBitmask.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/CommandLine.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/ErrorHandling.h"
59 #include "llvm/Support/GraphWriter.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <iterator>
65 #include <limits>
66 #include <memory>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "machine-scheduler"
75 
76 STATISTIC(NumClustered, "Number of load/store pairs clustered");
77 
78 namespace llvm {
79 
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81                            cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83                             cl::desc("Force bottom-up list scheduling"));
84 cl::opt<bool>
85 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
86                        cl::desc("Print critical path length to stdout"));
87 
88 cl::opt<bool> VerifyScheduling(
89     "verify-misched", cl::Hidden,
90     cl::desc("Verify machine instrs before and after machine scheduling"));
91 
92 #ifndef NDEBUG
93 cl::opt<bool> ViewMISchedDAGs(
94     "view-misched-dags", cl::Hidden,
95     cl::desc("Pop up a window to show MISched dags after they are processed"));
96 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
97                         cl::desc("Print schedule DAGs"));
98 cl::opt<bool> MISchedDumpReservedCycles(
99     "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
100     cl::desc("Dump resource usage at schedule boundary."));
101 #else
102 const bool ViewMISchedDAGs = false;
103 const bool PrintDAGs = false;
104 #ifdef LLVM_ENABLE_DUMP
105 const bool MISchedDumpReservedCycles = false;
106 #endif // LLVM_ENABLE_DUMP
107 #endif // NDEBUG
108 
109 } // end namespace llvm
110 
111 #ifndef NDEBUG
112 /// In some situations a few uninteresting nodes depend on nearly all other
113 /// nodes in the graph, provide a cutoff to hide them.
114 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
115   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
116 
117 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
118   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
119 
120 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
121   cl::desc("Only schedule this function"));
122 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
123                                         cl::desc("Only schedule this MBB#"));
124 #endif // NDEBUG
125 
126 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
127 /// size of the ready lists.
128 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
129   cl::desc("Limit ready list to N instructions"), cl::init(256));
130 
131 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
132   cl::desc("Enable register pressure scheduling."), cl::init(true));
133 
134 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
135   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
136 
137 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
138                                         cl::desc("Enable memop clustering."),
139                                         cl::init(true));
140 static cl::opt<bool>
141     ForceFastCluster("force-fast-cluster", cl::Hidden,
142                      cl::desc("Switch to fast cluster algorithm with the lost "
143                               "of some fusion opportunities"),
144                      cl::init(false));
145 static cl::opt<unsigned>
146     FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
147                          cl::desc("The threshold for fast cluster"),
148                          cl::init(1000));
149 
150 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
151 static cl::opt<bool> MISchedDumpScheduleTrace(
152     "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
153     cl::desc("Dump resource usage at schedule boundary."));
154 static cl::opt<unsigned>
155     HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
156                    cl::desc("Set width of the columns with "
157                             "the resources and schedule units"),
158                    cl::init(19));
159 static cl::opt<unsigned>
160     ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
161              cl::desc("Set width of the columns showing resource booking."),
162              cl::init(5));
163 #endif
164 
165 static cl::opt<unsigned>
166     MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
167                      cl::desc("Number of intervals to track"), cl::init(10));
168 
169 // DAG subtrees must have at least this many nodes.
170 static const unsigned MinSubtreeSize = 8;
171 
172 // Pin the vtables to this file.
173 void MachineSchedStrategy::anchor() {}
174 
175 void ScheduleDAGMutation::anchor() {}
176 
177 //===----------------------------------------------------------------------===//
178 // Machine Instruction Scheduling Pass and Registry
179 //===----------------------------------------------------------------------===//
180 
181 MachineSchedContext::MachineSchedContext() {
182   RegClassInfo = new RegisterClassInfo();
183 }
184 
185 MachineSchedContext::~MachineSchedContext() {
186   delete RegClassInfo;
187 }
188 
189 namespace {
190 
191 /// Base class for a machine scheduler class that can run at any point.
192 class MachineSchedulerBase : public MachineSchedContext,
193                              public MachineFunctionPass {
194 public:
195   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
196 
197   void print(raw_ostream &O, const Module* = nullptr) const override;
198 
199 protected:
200   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
201 };
202 
203 /// MachineScheduler runs after coalescing and before register allocation.
204 class MachineScheduler : public MachineSchedulerBase {
205 public:
206   MachineScheduler();
207 
208   void getAnalysisUsage(AnalysisUsage &AU) const override;
209 
210   bool runOnMachineFunction(MachineFunction&) override;
211 
212   static char ID; // Class identification, replacement for typeinfo
213 
214 protected:
215   ScheduleDAGInstrs *createMachineScheduler();
216 };
217 
218 /// PostMachineScheduler runs after shortly before code emission.
219 class PostMachineScheduler : public MachineSchedulerBase {
220 public:
221   PostMachineScheduler();
222 
223   void getAnalysisUsage(AnalysisUsage &AU) const override;
224 
225   bool runOnMachineFunction(MachineFunction&) override;
226 
227   static char ID; // Class identification, replacement for typeinfo
228 
229 protected:
230   ScheduleDAGInstrs *createPostMachineScheduler();
231 };
232 
233 } // end anonymous namespace
234 
235 char MachineScheduler::ID = 0;
236 
237 char &llvm::MachineSchedulerID = MachineScheduler::ID;
238 
239 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
240                       "Machine Instruction Scheduler", false, false)
241 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
242 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
243 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
244 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
245 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
246 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
247                     "Machine Instruction Scheduler", false, false)
248 
249 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
250   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
251 }
252 
253 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
254   AU.setPreservesCFG();
255   AU.addRequired<MachineDominatorTree>();
256   AU.addRequired<MachineLoopInfo>();
257   AU.addRequired<AAResultsWrapperPass>();
258   AU.addRequired<TargetPassConfig>();
259   AU.addRequired<SlotIndexes>();
260   AU.addPreserved<SlotIndexes>();
261   AU.addRequired<LiveIntervals>();
262   AU.addPreserved<LiveIntervals>();
263   MachineFunctionPass::getAnalysisUsage(AU);
264 }
265 
266 char PostMachineScheduler::ID = 0;
267 
268 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
269 
270 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
271                       "PostRA Machine Instruction Scheduler", false, false)
272 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
273 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
274 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
275 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
276                     "PostRA Machine Instruction Scheduler", false, false)
277 
278 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
279   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
280 }
281 
282 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
283   AU.setPreservesCFG();
284   AU.addRequired<MachineDominatorTree>();
285   AU.addRequired<MachineLoopInfo>();
286   AU.addRequired<AAResultsWrapperPass>();
287   AU.addRequired<TargetPassConfig>();
288   MachineFunctionPass::getAnalysisUsage(AU);
289 }
290 
291 MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
292     MachineSchedRegistry::Registry;
293 
294 /// A dummy default scheduler factory indicates whether the scheduler
295 /// is overridden on the command line.
296 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
297   return nullptr;
298 }
299 
300 /// MachineSchedOpt allows command line selection of the scheduler.
301 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
302                RegisterPassParser<MachineSchedRegistry>>
303 MachineSchedOpt("misched",
304                 cl::init(&useDefaultMachineSched), cl::Hidden,
305                 cl::desc("Machine instruction scheduler to use"));
306 
307 static MachineSchedRegistry
308 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
309                      useDefaultMachineSched);
310 
311 static cl::opt<bool> EnableMachineSched(
312     "enable-misched",
313     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
314     cl::Hidden);
315 
316 static cl::opt<bool> EnablePostRAMachineSched(
317     "enable-post-misched",
318     cl::desc("Enable the post-ra machine instruction scheduling pass."),
319     cl::init(true), cl::Hidden);
320 
321 /// Decrement this iterator until reaching the top or a non-debug instr.
322 static MachineBasicBlock::const_iterator
323 priorNonDebug(MachineBasicBlock::const_iterator I,
324               MachineBasicBlock::const_iterator Beg) {
325   assert(I != Beg && "reached the top of the region, cannot decrement");
326   while (--I != Beg) {
327     if (!I->isDebugOrPseudoInstr())
328       break;
329   }
330   return I;
331 }
332 
333 /// Non-const version.
334 static MachineBasicBlock::iterator
335 priorNonDebug(MachineBasicBlock::iterator I,
336               MachineBasicBlock::const_iterator Beg) {
337   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
338       .getNonConstIterator();
339 }
340 
341 /// If this iterator is a debug value, increment until reaching the End or a
342 /// non-debug instruction.
343 static MachineBasicBlock::const_iterator
344 nextIfDebug(MachineBasicBlock::const_iterator I,
345             MachineBasicBlock::const_iterator End) {
346   for(; I != End; ++I) {
347     if (!I->isDebugOrPseudoInstr())
348       break;
349   }
350   return I;
351 }
352 
353 /// Non-const version.
354 static MachineBasicBlock::iterator
355 nextIfDebug(MachineBasicBlock::iterator I,
356             MachineBasicBlock::const_iterator End) {
357   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
358       .getNonConstIterator();
359 }
360 
361 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
362 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
363   // Select the scheduler, or set the default.
364   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
365   if (Ctor != useDefaultMachineSched)
366     return Ctor(this);
367 
368   // Get the default scheduler set by the target for this function.
369   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
370   if (Scheduler)
371     return Scheduler;
372 
373   // Default to GenericScheduler.
374   return createGenericSchedLive(this);
375 }
376 
377 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
378 /// the caller. We don't have a command line option to override the postRA
379 /// scheduler. The Target must configure it.
380 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
381   // Get the postRA scheduler set by the target for this function.
382   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
383   if (Scheduler)
384     return Scheduler;
385 
386   // Default to GenericScheduler.
387   return createGenericSchedPostRA(this);
388 }
389 
390 /// Top-level MachineScheduler pass driver.
391 ///
392 /// Visit blocks in function order. Divide each block into scheduling regions
393 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
394 /// consistent with the DAG builder, which traverses the interior of the
395 /// scheduling regions bottom-up.
396 ///
397 /// This design avoids exposing scheduling boundaries to the DAG builder,
398 /// simplifying the DAG builder's support for "special" target instructions.
399 /// At the same time the design allows target schedulers to operate across
400 /// scheduling boundaries, for example to bundle the boundary instructions
401 /// without reordering them. This creates complexity, because the target
402 /// scheduler must update the RegionBegin and RegionEnd positions cached by
403 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
404 /// design would be to split blocks at scheduling boundaries, but LLVM has a
405 /// general bias against block splitting purely for implementation simplicity.
406 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
407   if (skipFunction(mf.getFunction()))
408     return false;
409 
410   if (EnableMachineSched.getNumOccurrences()) {
411     if (!EnableMachineSched)
412       return false;
413   } else if (!mf.getSubtarget().enableMachineScheduler())
414     return false;
415 
416   LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
417 
418   // Initialize the context of the pass.
419   MF = &mf;
420   MLI = &getAnalysis<MachineLoopInfo>();
421   MDT = &getAnalysis<MachineDominatorTree>();
422   PassConfig = &getAnalysis<TargetPassConfig>();
423   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
424 
425   LIS = &getAnalysis<LiveIntervals>();
426 
427   if (VerifyScheduling) {
428     LLVM_DEBUG(LIS->dump());
429     MF->verify(this, "Before machine scheduling.");
430   }
431   RegClassInfo->runOnMachineFunction(*MF);
432 
433   // Instantiate the selected scheduler for this target, function, and
434   // optimization level.
435   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
436   scheduleRegions(*Scheduler, false);
437 
438   LLVM_DEBUG(LIS->dump());
439   if (VerifyScheduling)
440     MF->verify(this, "After machine scheduling.");
441   return true;
442 }
443 
444 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
445   if (skipFunction(mf.getFunction()))
446     return false;
447 
448   if (EnablePostRAMachineSched.getNumOccurrences()) {
449     if (!EnablePostRAMachineSched)
450       return false;
451   } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
452     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
453     return false;
454   }
455   LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
456 
457   // Initialize the context of the pass.
458   MF = &mf;
459   MLI = &getAnalysis<MachineLoopInfo>();
460   PassConfig = &getAnalysis<TargetPassConfig>();
461   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
462 
463   if (VerifyScheduling)
464     MF->verify(this, "Before post machine scheduling.");
465 
466   // Instantiate the selected scheduler for this target, function, and
467   // optimization level.
468   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
469   scheduleRegions(*Scheduler, true);
470 
471   if (VerifyScheduling)
472     MF->verify(this, "After post machine scheduling.");
473   return true;
474 }
475 
476 /// Return true of the given instruction should not be included in a scheduling
477 /// region.
478 ///
479 /// MachineScheduler does not currently support scheduling across calls. To
480 /// handle calls, the DAG builder needs to be modified to create register
481 /// anti/output dependencies on the registers clobbered by the call's regmask
482 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
483 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
484 /// the boundary, but there would be no benefit to postRA scheduling across
485 /// calls this late anyway.
486 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
487                             MachineBasicBlock *MBB,
488                             MachineFunction *MF,
489                             const TargetInstrInfo *TII) {
490   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
491 }
492 
493 /// A region of an MBB for scheduling.
494 namespace {
495 struct SchedRegion {
496   /// RegionBegin is the first instruction in the scheduling region, and
497   /// RegionEnd is either MBB->end() or the scheduling boundary after the
498   /// last instruction in the scheduling region. These iterators cannot refer
499   /// to instructions outside of the identified scheduling region because
500   /// those may be reordered before scheduling this region.
501   MachineBasicBlock::iterator RegionBegin;
502   MachineBasicBlock::iterator RegionEnd;
503   unsigned NumRegionInstrs;
504 
505   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
506               unsigned N) :
507     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
508 };
509 } // end anonymous namespace
510 
511 using MBBRegionsVector = SmallVector<SchedRegion, 16>;
512 
513 static void
514 getSchedRegions(MachineBasicBlock *MBB,
515                 MBBRegionsVector &Regions,
516                 bool RegionsTopDown) {
517   MachineFunction *MF = MBB->getParent();
518   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
519 
520   MachineBasicBlock::iterator I = nullptr;
521   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
522       RegionEnd != MBB->begin(); RegionEnd = I) {
523 
524     // Avoid decrementing RegionEnd for blocks with no terminator.
525     if (RegionEnd != MBB->end() ||
526         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
527       --RegionEnd;
528     }
529 
530     // The next region starts above the previous region. Look backward in the
531     // instruction stream until we find the nearest boundary.
532     unsigned NumRegionInstrs = 0;
533     I = RegionEnd;
534     for (;I != MBB->begin(); --I) {
535       MachineInstr &MI = *std::prev(I);
536       if (isSchedBoundary(&MI, &*MBB, MF, TII))
537         break;
538       if (!MI.isDebugOrPseudoInstr()) {
539         // MBB::size() uses instr_iterator to count. Here we need a bundle to
540         // count as a single instruction.
541         ++NumRegionInstrs;
542       }
543     }
544 
545     // It's possible we found a scheduling region that only has debug
546     // instructions. Don't bother scheduling these.
547     if (NumRegionInstrs != 0)
548       Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
549   }
550 
551   if (RegionsTopDown)
552     std::reverse(Regions.begin(), Regions.end());
553 }
554 
555 /// Main driver for both MachineScheduler and PostMachineScheduler.
556 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
557                                            bool FixKillFlags) {
558   // Visit all machine basic blocks.
559   //
560   // TODO: Visit blocks in global postorder or postorder within the bottom-up
561   // loop tree. Then we can optionally compute global RegPressure.
562   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
563        MBB != MBBEnd; ++MBB) {
564 
565     Scheduler.startBlock(&*MBB);
566 
567 #ifndef NDEBUG
568     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
569       continue;
570     if (SchedOnlyBlock.getNumOccurrences()
571         && (int)SchedOnlyBlock != MBB->getNumber())
572       continue;
573 #endif
574 
575     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
576     // points to the scheduling boundary at the bottom of the region. The DAG
577     // does not include RegionEnd, but the region does (i.e. the next
578     // RegionEnd is above the previous RegionBegin). If the current block has
579     // no terminator then RegionEnd == MBB->end() for the bottom region.
580     //
581     // All the regions of MBB are first found and stored in MBBRegions, which
582     // will be processed (MBB) top-down if initialized with true.
583     //
584     // The Scheduler may insert instructions during either schedule() or
585     // exitRegion(), even for empty regions. So the local iterators 'I' and
586     // 'RegionEnd' are invalid across these calls. Instructions must not be
587     // added to other regions than the current one without updating MBBRegions.
588 
589     MBBRegionsVector MBBRegions;
590     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
591     for (const SchedRegion &R : MBBRegions) {
592       MachineBasicBlock::iterator I = R.RegionBegin;
593       MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
594       unsigned NumRegionInstrs = R.NumRegionInstrs;
595 
596       // Notify the scheduler of the region, even if we may skip scheduling
597       // it. Perhaps it still needs to be bundled.
598       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
599 
600       // Skip empty scheduling regions (0 or 1 schedulable instructions).
601       if (I == RegionEnd || I == std::prev(RegionEnd)) {
602         // Close the current region. Bundle the terminator if needed.
603         // This invalidates 'RegionEnd' and 'I'.
604         Scheduler.exitRegion();
605         continue;
606       }
607       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
608       LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
609                         << " " << MBB->getName() << "\n  From: " << *I
610                         << "    To: ";
611                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
612                  else dbgs() << "End\n";
613                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
614       if (DumpCriticalPathLength) {
615         errs() << MF->getName();
616         errs() << ":%bb. " << MBB->getNumber();
617         errs() << " " << MBB->getName() << " \n";
618       }
619 
620       // Schedule a region: possibly reorder instructions.
621       // This invalidates the original region iterators.
622       Scheduler.schedule();
623 
624       // Close the current region.
625       Scheduler.exitRegion();
626     }
627     Scheduler.finishBlock();
628     // FIXME: Ideally, no further passes should rely on kill flags. However,
629     // thumb2 size reduction is currently an exception, so the PostMIScheduler
630     // needs to do this.
631     if (FixKillFlags)
632       Scheduler.fixupKills(*MBB);
633   }
634   Scheduler.finalizeSchedule();
635 }
636 
637 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
638   // unimplemented
639 }
640 
641 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
642 LLVM_DUMP_METHOD void ReadyQueue::dump() const {
643   dbgs() << "Queue " << Name << ": ";
644   for (const SUnit *SU : Queue)
645     dbgs() << SU->NodeNum << " ";
646   dbgs() << "\n";
647 }
648 #endif
649 
650 //===----------------------------------------------------------------------===//
651 // ScheduleDAGMI - Basic machine instruction scheduling. This is
652 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
653 // virtual registers.
654 // ===----------------------------------------------------------------------===/
655 
656 // Provide a vtable anchor.
657 ScheduleDAGMI::~ScheduleDAGMI() = default;
658 
659 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
660 /// NumPredsLeft reaches zero, release the successor node.
661 ///
662 /// FIXME: Adjust SuccSU height based on MinLatency.
663 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
664   SUnit *SuccSU = SuccEdge->getSUnit();
665 
666   if (SuccEdge->isWeak()) {
667     --SuccSU->WeakPredsLeft;
668     if (SuccEdge->isCluster())
669       NextClusterSucc = SuccSU;
670     return;
671   }
672 #ifndef NDEBUG
673   if (SuccSU->NumPredsLeft == 0) {
674     dbgs() << "*** Scheduling failed! ***\n";
675     dumpNode(*SuccSU);
676     dbgs() << " has been released too many times!\n";
677     llvm_unreachable(nullptr);
678   }
679 #endif
680   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
681   // CurrCycle may have advanced since then.
682   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
683     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
684 
685   --SuccSU->NumPredsLeft;
686   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
687     SchedImpl->releaseTopNode(SuccSU);
688 }
689 
690 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
691 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
692   for (SDep &Succ : SU->Succs)
693     releaseSucc(SU, &Succ);
694 }
695 
696 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
697 /// NumSuccsLeft reaches zero, release the predecessor node.
698 ///
699 /// FIXME: Adjust PredSU height based on MinLatency.
700 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
701   SUnit *PredSU = PredEdge->getSUnit();
702 
703   if (PredEdge->isWeak()) {
704     --PredSU->WeakSuccsLeft;
705     if (PredEdge->isCluster())
706       NextClusterPred = PredSU;
707     return;
708   }
709 #ifndef NDEBUG
710   if (PredSU->NumSuccsLeft == 0) {
711     dbgs() << "*** Scheduling failed! ***\n";
712     dumpNode(*PredSU);
713     dbgs() << " has been released too many times!\n";
714     llvm_unreachable(nullptr);
715   }
716 #endif
717   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
718   // CurrCycle may have advanced since then.
719   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
720     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
721 
722   --PredSU->NumSuccsLeft;
723   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
724     SchedImpl->releaseBottomNode(PredSU);
725 }
726 
727 /// releasePredecessors - Call releasePred on each of SU's predecessors.
728 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
729   for (SDep &Pred : SU->Preds)
730     releasePred(SU, &Pred);
731 }
732 
733 void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
734   ScheduleDAGInstrs::startBlock(bb);
735   SchedImpl->enterMBB(bb);
736 }
737 
738 void ScheduleDAGMI::finishBlock() {
739   SchedImpl->leaveMBB();
740   ScheduleDAGInstrs::finishBlock();
741 }
742 
743 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
744 /// crossing a scheduling boundary. [begin, end) includes all instructions in
745 /// the region, including the boundary itself and single-instruction regions
746 /// that don't get scheduled.
747 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
748                                      MachineBasicBlock::iterator begin,
749                                      MachineBasicBlock::iterator end,
750                                      unsigned regioninstrs)
751 {
752   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
753 
754   SchedImpl->initPolicy(begin, end, regioninstrs);
755 }
756 
757 /// This is normally called from the main scheduler loop but may also be invoked
758 /// by the scheduling strategy to perform additional code motion.
759 void ScheduleDAGMI::moveInstruction(
760   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
761   // Advance RegionBegin if the first instruction moves down.
762   if (&*RegionBegin == MI)
763     ++RegionBegin;
764 
765   // Update the instruction stream.
766   BB->splice(InsertPos, BB, MI);
767 
768   // Update LiveIntervals
769   if (LIS)
770     LIS->handleMove(*MI, /*UpdateFlags=*/true);
771 
772   // Recede RegionBegin if an instruction moves above the first.
773   if (RegionBegin == InsertPos)
774     RegionBegin = MI;
775 }
776 
777 bool ScheduleDAGMI::checkSchedLimit() {
778 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
779   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
780     CurrentTop = CurrentBottom;
781     return false;
782   }
783   ++NumInstrsScheduled;
784 #endif
785   return true;
786 }
787 
788 /// Per-region scheduling driver, called back from
789 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
790 /// does not consider liveness or register pressure. It is useful for PostRA
791 /// scheduling and potentially other custom schedulers.
792 void ScheduleDAGMI::schedule() {
793   LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
794   LLVM_DEBUG(SchedImpl->dumpPolicy());
795 
796   // Build the DAG.
797   buildSchedGraph(AA);
798 
799   postProcessDAG();
800 
801   SmallVector<SUnit*, 8> TopRoots, BotRoots;
802   findRootsAndBiasEdges(TopRoots, BotRoots);
803 
804   LLVM_DEBUG(dump());
805   if (PrintDAGs) dump();
806   if (ViewMISchedDAGs) viewGraph();
807 
808   // Initialize the strategy before modifying the DAG.
809   // This may initialize a DFSResult to be used for queue priority.
810   SchedImpl->initialize(this);
811 
812   // Initialize ready queues now that the DAG and priority data are finalized.
813   initQueues(TopRoots, BotRoots);
814 
815   bool IsTopNode = false;
816   while (true) {
817     LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
818     SUnit *SU = SchedImpl->pickNode(IsTopNode);
819     if (!SU) break;
820 
821     assert(!SU->isScheduled && "Node already scheduled");
822     if (!checkSchedLimit())
823       break;
824 
825     MachineInstr *MI = SU->getInstr();
826     if (IsTopNode) {
827       assert(SU->isTopReady() && "node still has unscheduled dependencies");
828       if (&*CurrentTop == MI)
829         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
830       else
831         moveInstruction(MI, CurrentTop);
832     } else {
833       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
834       MachineBasicBlock::iterator priorII =
835         priorNonDebug(CurrentBottom, CurrentTop);
836       if (&*priorII == MI)
837         CurrentBottom = priorII;
838       else {
839         if (&*CurrentTop == MI)
840           CurrentTop = nextIfDebug(++CurrentTop, priorII);
841         moveInstruction(MI, CurrentBottom);
842         CurrentBottom = MI;
843       }
844     }
845     // Notify the scheduling strategy before updating the DAG.
846     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
847     // runs, it can then use the accurate ReadyCycle time to determine whether
848     // newly released nodes can move to the readyQ.
849     SchedImpl->schedNode(SU, IsTopNode);
850 
851     updateQueues(SU, IsTopNode);
852   }
853   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
854 
855   placeDebugValues();
856 
857   LLVM_DEBUG({
858     dbgs() << "*** Final schedule for "
859            << printMBBReference(*begin()->getParent()) << " ***\n";
860     dumpSchedule();
861     dbgs() << '\n';
862   });
863 }
864 
865 /// Apply each ScheduleDAGMutation step in order.
866 void ScheduleDAGMI::postProcessDAG() {
867   for (auto &m : Mutations)
868     m->apply(this);
869 }
870 
871 void ScheduleDAGMI::
872 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
873                       SmallVectorImpl<SUnit*> &BotRoots) {
874   for (SUnit &SU : SUnits) {
875     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
876 
877     // Order predecessors so DFSResult follows the critical path.
878     SU.biasCriticalPath();
879 
880     // A SUnit is ready to top schedule if it has no predecessors.
881     if (!SU.NumPredsLeft)
882       TopRoots.push_back(&SU);
883     // A SUnit is ready to bottom schedule if it has no successors.
884     if (!SU.NumSuccsLeft)
885       BotRoots.push_back(&SU);
886   }
887   ExitSU.biasCriticalPath();
888 }
889 
890 /// Identify DAG roots and setup scheduler queues.
891 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
892                                ArrayRef<SUnit*> BotRoots) {
893   NextClusterSucc = nullptr;
894   NextClusterPred = nullptr;
895 
896   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
897   //
898   // Nodes with unreleased weak edges can still be roots.
899   // Release top roots in forward order.
900   for (SUnit *SU : TopRoots)
901     SchedImpl->releaseTopNode(SU);
902 
903   // Release bottom roots in reverse order so the higher priority nodes appear
904   // first. This is more natural and slightly more efficient.
905   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
906          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
907     SchedImpl->releaseBottomNode(*I);
908   }
909 
910   releaseSuccessors(&EntrySU);
911   releasePredecessors(&ExitSU);
912 
913   SchedImpl->registerRoots();
914 
915   // Advance past initial DebugValues.
916   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
917   CurrentBottom = RegionEnd;
918 }
919 
920 /// Update scheduler queues after scheduling an instruction.
921 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
922   // Release dependent instructions for scheduling.
923   if (IsTopNode)
924     releaseSuccessors(SU);
925   else
926     releasePredecessors(SU);
927 
928   SU->isScheduled = true;
929 }
930 
931 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
932 void ScheduleDAGMI::placeDebugValues() {
933   // If first instruction was a DBG_VALUE then put it back.
934   if (FirstDbgValue) {
935     BB->splice(RegionBegin, BB, FirstDbgValue);
936     RegionBegin = FirstDbgValue;
937   }
938 
939   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
940          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
941     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
942     MachineInstr *DbgValue = P.first;
943     MachineBasicBlock::iterator OrigPrevMI = P.second;
944     if (&*RegionBegin == DbgValue)
945       ++RegionBegin;
946     BB->splice(std::next(OrigPrevMI), BB, DbgValue);
947     if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
948       RegionEnd = DbgValue;
949   }
950 }
951 
952 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
953 static const char *scheduleTableLegend = "  i: issue\n  x: resource booked";
954 
955 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
956   //  Nothing to show if there is no or just one instruction.
957   if (BB->size() < 2)
958     return;
959 
960   dbgs() << " * Schedule table (TopDown):\n";
961   dbgs() << scheduleTableLegend << "\n";
962   const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
963   unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
964   for (MachineInstr &MI : *this) {
965     SUnit *SU = getSUnit(&MI);
966     if (!SU)
967       continue;
968     const MCSchedClassDesc *SC = getSchedClass(SU);
969     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
970                                        PE = SchedModel.getWriteProcResEnd(SC);
971          PI != PE; ++PI) {
972       if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
973         LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
974     }
975   }
976   // Print the header with the cycles
977   dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
978   for (unsigned C = FirstCycle; C <= LastCycle; ++C)
979     dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
980   dbgs() << "|\n";
981 
982   for (MachineInstr &MI : *this) {
983     SUnit *SU = getSUnit(&MI);
984     if (!SU) {
985       dbgs() << "Missing SUnit\n";
986       continue;
987     }
988     std::string NodeName("SU(");
989     NodeName += std::to_string(SU->NodeNum) + ")";
990     dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
991     unsigned C = FirstCycle;
992     for (; C <= LastCycle; ++C) {
993       if (C == SU->TopReadyCycle)
994         dbgs() << llvm::left_justify("| i", ColWidth);
995       else
996         dbgs() << llvm::left_justify("|", ColWidth);
997     }
998     dbgs() << "|\n";
999     const MCSchedClassDesc *SC = getSchedClass(SU);
1000     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1001                                        PE = SchedModel.getWriteProcResEnd(SC);
1002          PI != PE; ++PI) {
1003       C = FirstCycle;
1004       const std::string ResName =
1005           SchedModel.getResourceName(PI->ProcResourceIdx);
1006       dbgs() << llvm::left_justify(ResName, HeaderColWidth);
1007       for (; C < SU->TopReadyCycle; ++C) {
1008         dbgs() << llvm::left_justify("|", ColWidth);
1009       }
1010       for (unsigned i = 0; i < PI->Cycles; ++i, ++C)
1011         dbgs() << llvm::left_justify("| x", ColWidth);
1012       while (C++ <= LastCycle)
1013         dbgs() << llvm::left_justify("|", ColWidth);
1014       // Place end char
1015       dbgs() << "| \n";
1016     }
1017   }
1018 }
1019 
1020 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1021   //  Nothing to show if there is no or just one instruction.
1022   if (BB->size() < 2)
1023     return;
1024 
1025   dbgs() << " * Schedule table (BottomUp):\n";
1026   dbgs() << scheduleTableLegend << "\n";
1027 
1028   const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1029   int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1030   for (MachineInstr &MI : *this) {
1031     SUnit *SU = getSUnit(&MI);
1032     if (!SU)
1033       continue;
1034     const MCSchedClassDesc *SC = getSchedClass(SU);
1035     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1036                                        PE = SchedModel.getWriteProcResEnd(SC);
1037          PI != PE; ++PI) {
1038       if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
1039         LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
1040     }
1041   }
1042   // Print the header with the cycles
1043   dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1044   for (int C = FirstCycle; C >= LastCycle; --C)
1045     dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1046   dbgs() << "|\n";
1047 
1048   for (MachineInstr &MI : *this) {
1049     SUnit *SU = getSUnit(&MI);
1050     if (!SU) {
1051       dbgs() << "Missing SUnit\n";
1052       continue;
1053     }
1054     std::string NodeName("SU(");
1055     NodeName += std::to_string(SU->NodeNum) + ")";
1056     dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1057     int C = FirstCycle;
1058     for (; C >= LastCycle; --C) {
1059       if (C == (int)SU->BotReadyCycle)
1060         dbgs() << llvm::left_justify("| i", ColWidth);
1061       else
1062         dbgs() << llvm::left_justify("|", ColWidth);
1063     }
1064     dbgs() << "|\n";
1065     const MCSchedClassDesc *SC = getSchedClass(SU);
1066     for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1067                                        PE = SchedModel.getWriteProcResEnd(SC);
1068          PI != PE; ++PI) {
1069       C = FirstCycle;
1070       const std::string ResName =
1071           SchedModel.getResourceName(PI->ProcResourceIdx);
1072       dbgs() << llvm::left_justify(ResName, HeaderColWidth);
1073       for (; C > (int)SU->BotReadyCycle; --C) {
1074         dbgs() << llvm::left_justify("|", ColWidth);
1075       }
1076       for (unsigned i = 0; i < PI->Cycles; ++i, --C)
1077         dbgs() << llvm::left_justify("| x", ColWidth);
1078       while (C-- >= LastCycle)
1079         dbgs() << llvm::left_justify("|", ColWidth);
1080       // Place end char
1081       dbgs() << "| \n";
1082     }
1083   }
1084 }
1085 #endif
1086 
1087 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1088 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1089   if (MISchedDumpScheduleTrace) {
1090     if (ForceTopDown)
1091       dumpScheduleTraceTopDown();
1092     else if (ForceBottomUp)
1093       dumpScheduleTraceBottomUp();
1094     else {
1095       dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1096     }
1097   }
1098 
1099   for (MachineInstr &MI : *this) {
1100     if (SUnit *SU = getSUnit(&MI))
1101       dumpNode(*SU);
1102     else
1103       dbgs() << "Missing SUnit\n";
1104   }
1105 }
1106 #endif
1107 
1108 //===----------------------------------------------------------------------===//
1109 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1110 // preservation.
1111 //===----------------------------------------------------------------------===//
1112 
1113 ScheduleDAGMILive::~ScheduleDAGMILive() {
1114   delete DFSResult;
1115 }
1116 
1117 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1118   const MachineInstr &MI = *SU.getInstr();
1119   for (const MachineOperand &MO : MI.operands()) {
1120     if (!MO.isReg())
1121       continue;
1122     if (!MO.readsReg())
1123       continue;
1124     if (TrackLaneMasks && !MO.isUse())
1125       continue;
1126 
1127     Register Reg = MO.getReg();
1128     if (!Reg.isVirtual())
1129       continue;
1130 
1131     // Ignore re-defs.
1132     if (TrackLaneMasks) {
1133       bool FoundDef = false;
1134       for (const MachineOperand &MO2 : MI.all_defs()) {
1135         if (MO2.getReg() == Reg && !MO2.isDead()) {
1136           FoundDef = true;
1137           break;
1138         }
1139       }
1140       if (FoundDef)
1141         continue;
1142     }
1143 
1144     // Record this local VReg use.
1145     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
1146     for (; UI != VRegUses.end(); ++UI) {
1147       if (UI->SU == &SU)
1148         break;
1149     }
1150     if (UI == VRegUses.end())
1151       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1152   }
1153 }
1154 
1155 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1156 /// crossing a scheduling boundary. [begin, end) includes all instructions in
1157 /// the region, including the boundary itself and single-instruction regions
1158 /// that don't get scheduled.
1159 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1160                                 MachineBasicBlock::iterator begin,
1161                                 MachineBasicBlock::iterator end,
1162                                 unsigned regioninstrs)
1163 {
1164   // ScheduleDAGMI initializes SchedImpl's per-region policy.
1165   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1166 
1167   // For convenience remember the end of the liveness region.
1168   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1169 
1170   SUPressureDiffs.clear();
1171 
1172   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1173   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1174 
1175   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1176          "ShouldTrackLaneMasks requires ShouldTrackPressure");
1177 }
1178 
1179 // Setup the register pressure trackers for the top scheduled and bottom
1180 // scheduled regions.
1181 void ScheduleDAGMILive::initRegPressure() {
1182   VRegUses.clear();
1183   VRegUses.setUniverse(MRI.getNumVirtRegs());
1184   for (SUnit &SU : SUnits)
1185     collectVRegUses(SU);
1186 
1187   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1188                     ShouldTrackLaneMasks, false);
1189   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1190                     ShouldTrackLaneMasks, false);
1191 
1192   // Close the RPTracker to finalize live ins.
1193   RPTracker.closeRegion();
1194 
1195   LLVM_DEBUG(RPTracker.dump());
1196 
1197   // Initialize the live ins and live outs.
1198   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1199   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1200 
1201   // Close one end of the tracker so we can call
1202   // getMaxUpward/DownwardPressureDelta before advancing across any
1203   // instructions. This converts currently live regs into live ins/outs.
1204   TopRPTracker.closeTop();
1205   BotRPTracker.closeBottom();
1206 
1207   BotRPTracker.initLiveThru(RPTracker);
1208   if (!BotRPTracker.getLiveThru().empty()) {
1209     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1210     LLVM_DEBUG(dbgs() << "Live Thru: ";
1211                dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1212   };
1213 
1214   // For each live out vreg reduce the pressure change associated with other
1215   // uses of the same vreg below the live-out reaching def.
1216   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1217 
1218   // Account for liveness generated by the region boundary.
1219   if (LiveRegionEnd != RegionEnd) {
1220     SmallVector<RegisterMaskPair, 8> LiveUses;
1221     BotRPTracker.recede(&LiveUses);
1222     updatePressureDiffs(LiveUses);
1223   }
1224 
1225   LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1226              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1227              dbgs() << "Bottom Pressure:\n";
1228              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1229 
1230   assert((BotRPTracker.getPos() == RegionEnd ||
1231           (RegionEnd->isDebugInstr() &&
1232            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1233          "Can't find the region bottom");
1234 
1235   // Cache the list of excess pressure sets in this region. This will also track
1236   // the max pressure in the scheduled code for these sets.
1237   RegionCriticalPSets.clear();
1238   const std::vector<unsigned> &RegionPressure =
1239     RPTracker.getPressure().MaxSetPressure;
1240   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1241     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1242     if (RegionPressure[i] > Limit) {
1243       LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1244                         << " Actual " << RegionPressure[i] << "\n");
1245       RegionCriticalPSets.push_back(PressureChange(i));
1246     }
1247   }
1248   LLVM_DEBUG(dbgs() << "Excess PSets: ";
1249              for (const PressureChange &RCPS
1250                   : RegionCriticalPSets) dbgs()
1251              << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1252              dbgs() << "\n");
1253 }
1254 
1255 void ScheduleDAGMILive::
1256 updateScheduledPressure(const SUnit *SU,
1257                         const std::vector<unsigned> &NewMaxPressure) {
1258   const PressureDiff &PDiff = getPressureDiff(SU);
1259   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1260   for (const PressureChange &PC : PDiff) {
1261     if (!PC.isValid())
1262       break;
1263     unsigned ID = PC.getPSet();
1264     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1265       ++CritIdx;
1266     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1267       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1268           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1269         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1270     }
1271     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1272     if (NewMaxPressure[ID] >= Limit - 2) {
1273       LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
1274                         << NewMaxPressure[ID]
1275                         << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1276                         << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1277                         << " livethru)\n");
1278     }
1279   }
1280 }
1281 
1282 /// Update the PressureDiff array for liveness after scheduling this
1283 /// instruction.
1284 void ScheduleDAGMILive::updatePressureDiffs(
1285     ArrayRef<RegisterMaskPair> LiveUses) {
1286   for (const RegisterMaskPair &P : LiveUses) {
1287     Register Reg = P.RegUnit;
1288     /// FIXME: Currently assuming single-use physregs.
1289     if (!Reg.isVirtual())
1290       continue;
1291 
1292     if (ShouldTrackLaneMasks) {
1293       // If the register has just become live then other uses won't change
1294       // this fact anymore => decrement pressure.
1295       // If the register has just become dead then other uses make it come
1296       // back to life => increment pressure.
1297       bool Decrement = P.LaneMask.any();
1298 
1299       for (const VReg2SUnit &V2SU
1300            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1301         SUnit &SU = *V2SU.SU;
1302         if (SU.isScheduled || &SU == &ExitSU)
1303           continue;
1304 
1305         PressureDiff &PDiff = getPressureDiff(&SU);
1306         PDiff.addPressureChange(Reg, Decrement, &MRI);
1307         LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
1308                           << printReg(Reg, TRI) << ':'
1309                           << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1310                    dbgs() << "              to "; PDiff.dump(*TRI););
1311       }
1312     } else {
1313       assert(P.LaneMask.any());
1314       LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1315       // This may be called before CurrentBottom has been initialized. However,
1316       // BotRPTracker must have a valid position. We want the value live into the
1317       // instruction or live out of the block, so ask for the previous
1318       // instruction's live-out.
1319       const LiveInterval &LI = LIS->getInterval(Reg);
1320       VNInfo *VNI;
1321       MachineBasicBlock::const_iterator I =
1322         nextIfDebug(BotRPTracker.getPos(), BB->end());
1323       if (I == BB->end())
1324         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1325       else {
1326         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1327         VNI = LRQ.valueIn();
1328       }
1329       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1330       assert(VNI && "No live value at use.");
1331       for (const VReg2SUnit &V2SU
1332            : make_range(VRegUses.find(Reg), VRegUses.end())) {
1333         SUnit *SU = V2SU.SU;
1334         // If this use comes before the reaching def, it cannot be a last use,
1335         // so decrease its pressure change.
1336         if (!SU->isScheduled && SU != &ExitSU) {
1337           LiveQueryResult LRQ =
1338               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1339           if (LRQ.valueIn() == VNI) {
1340             PressureDiff &PDiff = getPressureDiff(SU);
1341             PDiff.addPressureChange(Reg, true, &MRI);
1342             LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
1343                               << *SU->getInstr();
1344                        dbgs() << "              to "; PDiff.dump(*TRI););
1345           }
1346         }
1347       }
1348     }
1349   }
1350 }
1351 
1352 void ScheduleDAGMILive::dump() const {
1353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1354   if (EntrySU.getInstr() != nullptr)
1355     dumpNodeAll(EntrySU);
1356   for (const SUnit &SU : SUnits) {
1357     dumpNodeAll(SU);
1358     if (ShouldTrackPressure) {
1359       dbgs() << "  Pressure Diff      : ";
1360       getPressureDiff(&SU).dump(*TRI);
1361     }
1362     dbgs() << "  Single Issue       : ";
1363     if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1364         SchedModel.mustEndGroup(SU.getInstr()))
1365       dbgs() << "true;";
1366     else
1367       dbgs() << "false;";
1368     dbgs() << '\n';
1369   }
1370   if (ExitSU.getInstr() != nullptr)
1371     dumpNodeAll(ExitSU);
1372 #endif
1373 }
1374 
1375 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1376 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1377 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1378 ///
1379 /// This is a skeletal driver, with all the functionality pushed into helpers,
1380 /// so that it can be easily extended by experimental schedulers. Generally,
1381 /// implementing MachineSchedStrategy should be sufficient to implement a new
1382 /// scheduling algorithm. However, if a scheduler further subclasses
1383 /// ScheduleDAGMILive then it will want to override this virtual method in order
1384 /// to update any specialized state.
1385 void ScheduleDAGMILive::schedule() {
1386   LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1387   LLVM_DEBUG(SchedImpl->dumpPolicy());
1388   buildDAGWithRegPressure();
1389 
1390   postProcessDAG();
1391 
1392   SmallVector<SUnit*, 8> TopRoots, BotRoots;
1393   findRootsAndBiasEdges(TopRoots, BotRoots);
1394 
1395   // Initialize the strategy before modifying the DAG.
1396   // This may initialize a DFSResult to be used for queue priority.
1397   SchedImpl->initialize(this);
1398 
1399   LLVM_DEBUG(dump());
1400   if (PrintDAGs) dump();
1401   if (ViewMISchedDAGs) viewGraph();
1402 
1403   // Initialize ready queues now that the DAG and priority data are finalized.
1404   initQueues(TopRoots, BotRoots);
1405 
1406   bool IsTopNode = false;
1407   while (true) {
1408     LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1409     SUnit *SU = SchedImpl->pickNode(IsTopNode);
1410     if (!SU) break;
1411 
1412     assert(!SU->isScheduled && "Node already scheduled");
1413     if (!checkSchedLimit())
1414       break;
1415 
1416     scheduleMI(SU, IsTopNode);
1417 
1418     if (DFSResult) {
1419       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1420       if (!ScheduledTrees.test(SubtreeID)) {
1421         ScheduledTrees.set(SubtreeID);
1422         DFSResult->scheduleTree(SubtreeID);
1423         SchedImpl->scheduleTree(SubtreeID);
1424       }
1425     }
1426 
1427     // Notify the scheduling strategy after updating the DAG.
1428     SchedImpl->schedNode(SU, IsTopNode);
1429 
1430     updateQueues(SU, IsTopNode);
1431   }
1432   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1433 
1434   placeDebugValues();
1435 
1436   LLVM_DEBUG({
1437     dbgs() << "*** Final schedule for "
1438            << printMBBReference(*begin()->getParent()) << " ***\n";
1439     dumpSchedule();
1440     dbgs() << '\n';
1441   });
1442 }
1443 
1444 /// Build the DAG and setup three register pressure trackers.
1445 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1446   if (!ShouldTrackPressure) {
1447     RPTracker.reset();
1448     RegionCriticalPSets.clear();
1449     buildSchedGraph(AA);
1450     return;
1451   }
1452 
1453   // Initialize the register pressure tracker used by buildSchedGraph.
1454   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1455                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1456 
1457   // Account for liveness generate by the region boundary.
1458   if (LiveRegionEnd != RegionEnd)
1459     RPTracker.recede();
1460 
1461   // Build the DAG, and compute current register pressure.
1462   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1463 
1464   // Initialize top/bottom trackers after computing region pressure.
1465   initRegPressure();
1466 }
1467 
1468 void ScheduleDAGMILive::computeDFSResult() {
1469   if (!DFSResult)
1470     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1471   DFSResult->clear();
1472   ScheduledTrees.clear();
1473   DFSResult->resize(SUnits.size());
1474   DFSResult->compute(SUnits);
1475   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1476 }
1477 
1478 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1479 /// only provides the critical path for single block loops. To handle loops that
1480 /// span blocks, we could use the vreg path latencies provided by
1481 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1482 /// available for use in the scheduler.
1483 ///
1484 /// The cyclic path estimation identifies a def-use pair that crosses the back
1485 /// edge and considers the depth and height of the nodes. For example, consider
1486 /// the following instruction sequence where each instruction has unit latency
1487 /// and defines an eponymous virtual register:
1488 ///
1489 /// a->b(a,c)->c(b)->d(c)->exit
1490 ///
1491 /// The cyclic critical path is a two cycles: b->c->b
1492 /// The acyclic critical path is four cycles: a->b->c->d->exit
1493 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1494 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1495 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1496 /// LiveInDepth = depth(b) = len(a->b) = 1
1497 ///
1498 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1499 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1500 /// CyclicCriticalPath = min(2, 2) = 2
1501 ///
1502 /// This could be relevant to PostRA scheduling, but is currently implemented
1503 /// assuming LiveIntervals.
1504 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1505   // This only applies to single block loop.
1506   if (!BB->isSuccessor(BB))
1507     return 0;
1508 
1509   unsigned MaxCyclicLatency = 0;
1510   // Visit each live out vreg def to find def/use pairs that cross iterations.
1511   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1512     Register Reg = P.RegUnit;
1513     if (!Reg.isVirtual())
1514       continue;
1515     const LiveInterval &LI = LIS->getInterval(Reg);
1516     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1517     if (!DefVNI)
1518       continue;
1519 
1520     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1521     const SUnit *DefSU = getSUnit(DefMI);
1522     if (!DefSU)
1523       continue;
1524 
1525     unsigned LiveOutHeight = DefSU->getHeight();
1526     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1527     // Visit all local users of the vreg def.
1528     for (const VReg2SUnit &V2SU
1529          : make_range(VRegUses.find(Reg), VRegUses.end())) {
1530       SUnit *SU = V2SU.SU;
1531       if (SU == &ExitSU)
1532         continue;
1533 
1534       // Only consider uses of the phi.
1535       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1536       if (!LRQ.valueIn()->isPHIDef())
1537         continue;
1538 
1539       // Assume that a path spanning two iterations is a cycle, which could
1540       // overestimate in strange cases. This allows cyclic latency to be
1541       // estimated as the minimum slack of the vreg's depth or height.
1542       unsigned CyclicLatency = 0;
1543       if (LiveOutDepth > SU->getDepth())
1544         CyclicLatency = LiveOutDepth - SU->getDepth();
1545 
1546       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1547       if (LiveInHeight > LiveOutHeight) {
1548         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1549           CyclicLatency = LiveInHeight - LiveOutHeight;
1550       } else
1551         CyclicLatency = 0;
1552 
1553       LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1554                         << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1555       if (CyclicLatency > MaxCyclicLatency)
1556         MaxCyclicLatency = CyclicLatency;
1557     }
1558   }
1559   LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1560   return MaxCyclicLatency;
1561 }
1562 
1563 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1564 /// the Top RP tracker in case the region beginning has changed.
1565 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1566                                    ArrayRef<SUnit*> BotRoots) {
1567   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1568   if (ShouldTrackPressure) {
1569     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1570     TopRPTracker.setPos(CurrentTop);
1571   }
1572 }
1573 
1574 /// Move an instruction and update register pressure.
1575 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1576   // Move the instruction to its new location in the instruction stream.
1577   MachineInstr *MI = SU->getInstr();
1578 
1579   if (IsTopNode) {
1580     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1581     if (&*CurrentTop == MI)
1582       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1583     else {
1584       moveInstruction(MI, CurrentTop);
1585       TopRPTracker.setPos(MI);
1586     }
1587 
1588     if (ShouldTrackPressure) {
1589       // Update top scheduled pressure.
1590       RegisterOperands RegOpers;
1591       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1592       if (ShouldTrackLaneMasks) {
1593         // Adjust liveness and add missing dead+read-undef flags.
1594         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1595         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1596       } else {
1597         // Adjust for missing dead-def flags.
1598         RegOpers.detectDeadDefs(*MI, *LIS);
1599       }
1600 
1601       TopRPTracker.advance(RegOpers);
1602       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1603       LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1604                      TopRPTracker.getRegSetPressureAtPos(), TRI););
1605 
1606       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1607     }
1608   } else {
1609     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1610     MachineBasicBlock::iterator priorII =
1611       priorNonDebug(CurrentBottom, CurrentTop);
1612     if (&*priorII == MI)
1613       CurrentBottom = priorII;
1614     else {
1615       if (&*CurrentTop == MI) {
1616         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1617         TopRPTracker.setPos(CurrentTop);
1618       }
1619       moveInstruction(MI, CurrentBottom);
1620       CurrentBottom = MI;
1621       BotRPTracker.setPos(CurrentBottom);
1622     }
1623     if (ShouldTrackPressure) {
1624       RegisterOperands RegOpers;
1625       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1626       if (ShouldTrackLaneMasks) {
1627         // Adjust liveness and add missing dead+read-undef flags.
1628         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1629         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1630       } else {
1631         // Adjust for missing dead-def flags.
1632         RegOpers.detectDeadDefs(*MI, *LIS);
1633       }
1634 
1635       if (BotRPTracker.getPos() != CurrentBottom)
1636         BotRPTracker.recedeSkipDebugValues();
1637       SmallVector<RegisterMaskPair, 8> LiveUses;
1638       BotRPTracker.recede(RegOpers, &LiveUses);
1639       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1640       LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1641                      BotRPTracker.getRegSetPressureAtPos(), TRI););
1642 
1643       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1644       updatePressureDiffs(LiveUses);
1645     }
1646   }
1647 }
1648 
1649 //===----------------------------------------------------------------------===//
1650 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1651 //===----------------------------------------------------------------------===//
1652 
1653 namespace {
1654 
1655 /// Post-process the DAG to create cluster edges between neighboring
1656 /// loads or between neighboring stores.
1657 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1658   struct MemOpInfo {
1659     SUnit *SU;
1660     SmallVector<const MachineOperand *, 4> BaseOps;
1661     int64_t Offset;
1662     unsigned Width;
1663 
1664     MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1665               int64_t Offset, unsigned Width)
1666         : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1667           Width(Width) {}
1668 
1669     static bool Compare(const MachineOperand *const &A,
1670                         const MachineOperand *const &B) {
1671       if (A->getType() != B->getType())
1672         return A->getType() < B->getType();
1673       if (A->isReg())
1674         return A->getReg() < B->getReg();
1675       if (A->isFI()) {
1676         const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1677         const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1678         bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1679                               TargetFrameLowering::StackGrowsDown;
1680         return StackGrowsDown ? A->getIndex() > B->getIndex()
1681                               : A->getIndex() < B->getIndex();
1682       }
1683 
1684       llvm_unreachable("MemOpClusterMutation only supports register or frame "
1685                        "index bases.");
1686     }
1687 
1688     bool operator<(const MemOpInfo &RHS) const {
1689       // FIXME: Don't compare everything twice. Maybe use C++20 three way
1690       // comparison instead when it's available.
1691       if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1692                                        RHS.BaseOps.begin(), RHS.BaseOps.end(),
1693                                        Compare))
1694         return true;
1695       if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1696                                        BaseOps.begin(), BaseOps.end(), Compare))
1697         return false;
1698       if (Offset != RHS.Offset)
1699         return Offset < RHS.Offset;
1700       return SU->NodeNum < RHS.SU->NodeNum;
1701     }
1702   };
1703 
1704   const TargetInstrInfo *TII;
1705   const TargetRegisterInfo *TRI;
1706   bool IsLoad;
1707 
1708 public:
1709   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1710                            const TargetRegisterInfo *tri, bool IsLoad)
1711       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1712 
1713   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1714 
1715 protected:
1716   void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1717                                 ScheduleDAGInstrs *DAG);
1718   void collectMemOpRecords(std::vector<SUnit> &SUnits,
1719                            SmallVectorImpl<MemOpInfo> &MemOpRecords);
1720   bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1721                    DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
1722 };
1723 
1724 class StoreClusterMutation : public BaseMemOpClusterMutation {
1725 public:
1726   StoreClusterMutation(const TargetInstrInfo *tii,
1727                        const TargetRegisterInfo *tri)
1728       : BaseMemOpClusterMutation(tii, tri, false) {}
1729 };
1730 
1731 class LoadClusterMutation : public BaseMemOpClusterMutation {
1732 public:
1733   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1734       : BaseMemOpClusterMutation(tii, tri, true) {}
1735 };
1736 
1737 } // end anonymous namespace
1738 
1739 namespace llvm {
1740 
1741 std::unique_ptr<ScheduleDAGMutation>
1742 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1743                              const TargetRegisterInfo *TRI) {
1744   return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1745                             : nullptr;
1746 }
1747 
1748 std::unique_ptr<ScheduleDAGMutation>
1749 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1750                               const TargetRegisterInfo *TRI) {
1751   return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1752                             : nullptr;
1753 }
1754 
1755 } // end namespace llvm
1756 
1757 // Sorting all the loads/stores first, then for each load/store, checking the
1758 // following load/store one by one, until reach the first non-dependent one and
1759 // call target hook to see if they can cluster.
1760 // If FastCluster is enabled, we assume that, all the loads/stores have been
1761 // preprocessed and now, they didn't have dependencies on each other.
1762 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1763     ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1764     ScheduleDAGInstrs *DAG) {
1765   // Keep track of the current cluster length and bytes for each SUnit.
1766   DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1767 
1768   // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1769   // cluster mem ops collected within `MemOpRecords` array.
1770   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1771     // Decision to cluster mem ops is taken based on target dependent logic
1772     auto MemOpa = MemOpRecords[Idx];
1773 
1774     // Seek for the next load/store to do the cluster.
1775     unsigned NextIdx = Idx + 1;
1776     for (; NextIdx < End; ++NextIdx)
1777       // Skip if MemOpb has been clustered already or has dependency with
1778       // MemOpa.
1779       if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1780           (FastCluster ||
1781            (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1782             !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1783         break;
1784     if (NextIdx == End)
1785       continue;
1786 
1787     auto MemOpb = MemOpRecords[NextIdx];
1788     unsigned ClusterLength = 2;
1789     unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1790     if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1791       ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1792       CurrentClusterBytes =
1793           SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1794     }
1795 
1796     if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1797                                   CurrentClusterBytes))
1798       continue;
1799 
1800     SUnit *SUa = MemOpa.SU;
1801     SUnit *SUb = MemOpb.SU;
1802     if (SUa->NodeNum > SUb->NodeNum)
1803       std::swap(SUa, SUb);
1804 
1805     // FIXME: Is this check really required?
1806     if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1807       continue;
1808 
1809     LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1810                       << SUb->NodeNum << ")\n");
1811     ++NumClustered;
1812 
1813     if (IsLoad) {
1814       // Copy successor edges from SUa to SUb. Interleaving computation
1815       // dependent on SUa can prevent load combining due to register reuse.
1816       // Predecessor edges do not need to be copied from SUb to SUa since
1817       // nearby loads should have effectively the same inputs.
1818       for (const SDep &Succ : SUa->Succs) {
1819         if (Succ.getSUnit() == SUb)
1820           continue;
1821         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
1822                           << ")\n");
1823         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1824       }
1825     } else {
1826       // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1827       // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1828       // do not need to be copied from SUa to SUb since no one will depend
1829       // on stores.
1830       // Notice that, we don't need to care about the memory dependency as
1831       // we won't try to cluster them if they have any memory dependency.
1832       for (const SDep &Pred : SUb->Preds) {
1833         if (Pred.getSUnit() == SUa)
1834           continue;
1835         LLVM_DEBUG(dbgs() << "  Copy Pred SU(" << Pred.getSUnit()->NodeNum
1836                           << ")\n");
1837         DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1838       }
1839     }
1840 
1841     SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1842                                              CurrentClusterBytes};
1843 
1844     LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength
1845                       << ", Curr cluster bytes: " << CurrentClusterBytes
1846                       << "\n");
1847   }
1848 }
1849 
1850 void BaseMemOpClusterMutation::collectMemOpRecords(
1851     std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1852   for (auto &SU : SUnits) {
1853     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1854         (!IsLoad && !SU.getInstr()->mayStore()))
1855       continue;
1856 
1857     const MachineInstr &MI = *SU.getInstr();
1858     SmallVector<const MachineOperand *, 4> BaseOps;
1859     int64_t Offset;
1860     bool OffsetIsScalable;
1861     unsigned Width;
1862     if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1863                                            OffsetIsScalable, Width, TRI)) {
1864       MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1865 
1866       LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1867                         << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1868                         << ", Width: " << Width << "\n");
1869     }
1870 #ifndef NDEBUG
1871     for (const auto *Op : BaseOps)
1872       assert(Op);
1873 #endif
1874   }
1875 }
1876 
1877 bool BaseMemOpClusterMutation::groupMemOps(
1878     ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1879     DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
1880   bool FastCluster =
1881       ForceFastCluster ||
1882       MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1883 
1884   for (const auto &MemOp : MemOps) {
1885     unsigned ChainPredID = DAG->SUnits.size();
1886     if (FastCluster) {
1887       for (const SDep &Pred : MemOp.SU->Preds) {
1888         // We only want to cluster the mem ops that have the same ctrl(non-data)
1889         // pred so that they didn't have ctrl dependency for each other. But for
1890         // store instrs, we can still cluster them if the pred is load instr.
1891         if ((Pred.isCtrl() &&
1892              (IsLoad ||
1893               (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1894             !Pred.isArtificial()) {
1895           ChainPredID = Pred.getSUnit()->NodeNum;
1896           break;
1897         }
1898       }
1899     } else
1900       ChainPredID = 0;
1901 
1902     Groups[ChainPredID].push_back(MemOp);
1903   }
1904   return FastCluster;
1905 }
1906 
1907 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1908 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1909   // Collect all the clusterable loads/stores
1910   SmallVector<MemOpInfo, 32> MemOpRecords;
1911   collectMemOpRecords(DAG->SUnits, MemOpRecords);
1912 
1913   if (MemOpRecords.size() < 2)
1914     return;
1915 
1916   // Put the loads/stores without dependency into the same group with some
1917   // heuristic if the DAG is too complex to avoid compiling time blow up.
1918   // Notice that, some fusion pair could be lost with this.
1919   DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
1920   bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1921 
1922   for (auto &Group : Groups) {
1923     // Sorting the loads/stores, so that, we can stop the cluster as early as
1924     // possible.
1925     llvm::sort(Group.second);
1926 
1927     // Trying to cluster all the neighboring loads/stores.
1928     clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1929   }
1930 }
1931 
1932 //===----------------------------------------------------------------------===//
1933 // CopyConstrain - DAG post-processing to encourage copy elimination.
1934 //===----------------------------------------------------------------------===//
1935 
1936 namespace {
1937 
1938 /// Post-process the DAG to create weak edges from all uses of a copy to
1939 /// the one use that defines the copy's source vreg, most likely an induction
1940 /// variable increment.
1941 class CopyConstrain : public ScheduleDAGMutation {
1942   // Transient state.
1943   SlotIndex RegionBeginIdx;
1944 
1945   // RegionEndIdx is the slot index of the last non-debug instruction in the
1946   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1947   SlotIndex RegionEndIdx;
1948 
1949 public:
1950   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1951 
1952   void apply(ScheduleDAGInstrs *DAGInstrs) override;
1953 
1954 protected:
1955   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1956 };
1957 
1958 } // end anonymous namespace
1959 
1960 namespace llvm {
1961 
1962 std::unique_ptr<ScheduleDAGMutation>
1963 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1964                                const TargetRegisterInfo *TRI) {
1965   return std::make_unique<CopyConstrain>(TII, TRI);
1966 }
1967 
1968 } // end namespace llvm
1969 
1970 /// constrainLocalCopy handles two possibilities:
1971 /// 1) Local src:
1972 /// I0:     = dst
1973 /// I1: src = ...
1974 /// I2:     = dst
1975 /// I3: dst = src (copy)
1976 /// (create pred->succ edges I0->I1, I2->I1)
1977 ///
1978 /// 2) Local copy:
1979 /// I0: dst = src (copy)
1980 /// I1:     = dst
1981 /// I2: src = ...
1982 /// I3:     = dst
1983 /// (create pred->succ edges I1->I2, I3->I2)
1984 ///
1985 /// Although the MachineScheduler is currently constrained to single blocks,
1986 /// this algorithm should handle extended blocks. An EBB is a set of
1987 /// contiguously numbered blocks such that the previous block in the EBB is
1988 /// always the single predecessor.
1989 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1990   LiveIntervals *LIS = DAG->getLIS();
1991   MachineInstr *Copy = CopySU->getInstr();
1992 
1993   // Check for pure vreg copies.
1994   const MachineOperand &SrcOp = Copy->getOperand(1);
1995   Register SrcReg = SrcOp.getReg();
1996   if (!SrcReg.isVirtual() || !SrcOp.readsReg())
1997     return;
1998 
1999   const MachineOperand &DstOp = Copy->getOperand(0);
2000   Register DstReg = DstOp.getReg();
2001   if (!DstReg.isVirtual() || DstOp.isDead())
2002     return;
2003 
2004   // Check if either the dest or source is local. If it's live across a back
2005   // edge, it's not local. Note that if both vregs are live across the back
2006   // edge, we cannot successfully contrain the copy without cyclic scheduling.
2007   // If both the copy's source and dest are local live intervals, then we
2008   // should treat the dest as the global for the purpose of adding
2009   // constraints. This adds edges from source's other uses to the copy.
2010   unsigned LocalReg = SrcReg;
2011   unsigned GlobalReg = DstReg;
2012   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2013   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2014     LocalReg = DstReg;
2015     GlobalReg = SrcReg;
2016     LocalLI = &LIS->getInterval(LocalReg);
2017     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2018       return;
2019   }
2020   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2021 
2022   // Find the global segment after the start of the local LI.
2023   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2024   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2025   // local live range. We could create edges from other global uses to the local
2026   // start, but the coalescer should have already eliminated these cases, so
2027   // don't bother dealing with it.
2028   if (GlobalSegment == GlobalLI->end())
2029     return;
2030 
2031   // If GlobalSegment is killed at the LocalLI->start, the call to find()
2032   // returned the next global segment. But if GlobalSegment overlaps with
2033   // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2034   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2035   if (GlobalSegment->contains(LocalLI->beginIndex()))
2036     ++GlobalSegment;
2037 
2038   if (GlobalSegment == GlobalLI->end())
2039     return;
2040 
2041   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2042   if (GlobalSegment != GlobalLI->begin()) {
2043     // Two address defs have no hole.
2044     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2045                                GlobalSegment->start)) {
2046       return;
2047     }
2048     // If the prior global segment may be defined by the same two-address
2049     // instruction that also defines LocalLI, then can't make a hole here.
2050     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2051                                LocalLI->beginIndex())) {
2052       return;
2053     }
2054     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2055     // it would be a disconnected component in the live range.
2056     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2057            "Disconnected LRG within the scheduling region.");
2058   }
2059   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2060   if (!GlobalDef)
2061     return;
2062 
2063   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2064   if (!GlobalSU)
2065     return;
2066 
2067   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2068   // constraining the uses of the last local def to precede GlobalDef.
2069   SmallVector<SUnit*,8> LocalUses;
2070   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2071   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2072   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2073   for (const SDep &Succ : LastLocalSU->Succs) {
2074     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2075       continue;
2076     if (Succ.getSUnit() == GlobalSU)
2077       continue;
2078     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2079       return;
2080     LocalUses.push_back(Succ.getSUnit());
2081   }
2082   // Open the top of the GlobalLI hole by constraining any earlier global uses
2083   // to precede the start of LocalLI.
2084   SmallVector<SUnit*,8> GlobalUses;
2085   MachineInstr *FirstLocalDef =
2086     LIS->getInstructionFromIndex(LocalLI->beginIndex());
2087   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2088   for (const SDep &Pred : GlobalSU->Preds) {
2089     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2090       continue;
2091     if (Pred.getSUnit() == FirstLocalSU)
2092       continue;
2093     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2094       return;
2095     GlobalUses.push_back(Pred.getSUnit());
2096   }
2097   LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2098   // Add the weak edges.
2099   for (SUnit *LU : LocalUses) {
2100     LLVM_DEBUG(dbgs() << "  Local use SU(" << LU->NodeNum << ") -> SU("
2101                       << GlobalSU->NodeNum << ")\n");
2102     DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2103   }
2104   for (SUnit *GU : GlobalUses) {
2105     LLVM_DEBUG(dbgs() << "  Global use SU(" << GU->NodeNum << ") -> SU("
2106                       << FirstLocalSU->NodeNum << ")\n");
2107     DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2108   }
2109 }
2110 
2111 /// Callback from DAG postProcessing to create weak edges to encourage
2112 /// copy elimination.
2113 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2114   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2115   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2116 
2117   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2118   if (FirstPos == DAG->end())
2119     return;
2120   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2121   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2122       *priorNonDebug(DAG->end(), DAG->begin()));
2123 
2124   for (SUnit &SU : DAG->SUnits) {
2125     if (!SU.getInstr()->isCopy())
2126       continue;
2127 
2128     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2129   }
2130 }
2131 
2132 //===----------------------------------------------------------------------===//
2133 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2134 // and possibly other custom schedulers.
2135 //===----------------------------------------------------------------------===//
2136 
2137 static const unsigned InvalidCycle = ~0U;
2138 
2139 SchedBoundary::~SchedBoundary() { delete HazardRec; }
2140 
2141 /// Given a Count of resource usage and a Latency value, return true if a
2142 /// SchedBoundary becomes resource limited.
2143 /// If we are checking after scheduling a node, we should return true when
2144 /// we just reach the resource limit.
2145 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2146                                unsigned Latency, bool AfterSchedNode) {
2147   int ResCntFactor = (int)(Count - (Latency * LFactor));
2148   if (AfterSchedNode)
2149     return ResCntFactor >= (int)LFactor;
2150   else
2151     return ResCntFactor > (int)LFactor;
2152 }
2153 
2154 void SchedBoundary::reset() {
2155   // A new HazardRec is created for each DAG and owned by SchedBoundary.
2156   // Destroying and reconstructing it is very expensive though. So keep
2157   // invalid, placeholder HazardRecs.
2158   if (HazardRec && HazardRec->isEnabled()) {
2159     delete HazardRec;
2160     HazardRec = nullptr;
2161   }
2162   Available.clear();
2163   Pending.clear();
2164   CheckPending = false;
2165   CurrCycle = 0;
2166   CurrMOps = 0;
2167   MinReadyCycle = std::numeric_limits<unsigned>::max();
2168   ExpectedLatency = 0;
2169   DependentLatency = 0;
2170   RetiredMOps = 0;
2171   MaxExecutedResCount = 0;
2172   ZoneCritResIdx = 0;
2173   IsResourceLimited = false;
2174   ReservedCycles.clear();
2175   ReservedResourceSegments.clear();
2176   ReservedCyclesIndex.clear();
2177   ResourceGroupSubUnitMasks.clear();
2178 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2179   // Track the maximum number of stall cycles that could arise either from the
2180   // latency of a DAG edge or the number of cycles that a processor resource is
2181   // reserved (SchedBoundary::ReservedCycles).
2182   MaxObservedStall = 0;
2183 #endif
2184   // Reserve a zero-count for invalid CritResIdx.
2185   ExecutedResCounts.resize(1);
2186   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2187 }
2188 
2189 void SchedRemainder::
2190 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2191   reset();
2192   if (!SchedModel->hasInstrSchedModel())
2193     return;
2194   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2195   for (SUnit &SU : DAG->SUnits) {
2196     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2197     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2198       * SchedModel->getMicroOpFactor();
2199     for (TargetSchedModel::ProcResIter
2200            PI = SchedModel->getWriteProcResBegin(SC),
2201            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2202       unsigned PIdx = PI->ProcResourceIdx;
2203       unsigned Factor = SchedModel->getResourceFactor(PIdx);
2204       assert(PI->Cycles >= PI->StartAtCycle);
2205       RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle));
2206     }
2207   }
2208 }
2209 
2210 void SchedBoundary::
2211 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2212   reset();
2213   DAG = dag;
2214   SchedModel = smodel;
2215   Rem = rem;
2216   if (SchedModel->hasInstrSchedModel()) {
2217     unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2218     ReservedCyclesIndex.resize(ResourceCount);
2219     ExecutedResCounts.resize(ResourceCount);
2220     ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2221     unsigned NumUnits = 0;
2222 
2223     for (unsigned i = 0; i < ResourceCount; ++i) {
2224       ReservedCyclesIndex[i] = NumUnits;
2225       NumUnits += SchedModel->getProcResource(i)->NumUnits;
2226       if (isUnbufferedGroup(i)) {
2227         auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2228         for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2229              U != UE; ++U)
2230           ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2231       }
2232     }
2233 
2234     ReservedCycles.resize(NumUnits, InvalidCycle);
2235   }
2236 }
2237 
2238 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2239 /// these "soft stalls" differently than the hard stall cycles based on CPU
2240 /// resources and computed by checkHazard(). A fully in-order model
2241 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2242 /// available for scheduling until they are ready. However, a weaker in-order
2243 /// model may use this for heuristics. For example, if a processor has in-order
2244 /// behavior when reading certain resources, this may come into play.
2245 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2246   if (!SU->isUnbuffered)
2247     return 0;
2248 
2249   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2250   if (ReadyCycle > CurrCycle)
2251     return ReadyCycle - CurrCycle;
2252   return 0;
2253 }
2254 
2255 /// Compute the next cycle at which the given processor resource unit
2256 /// can be scheduled.
2257 unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2258                                                        unsigned Cycles,
2259                                                        unsigned StartAtCycle) {
2260   if (SchedModel && SchedModel->enableIntervals()) {
2261     if (isTop())
2262       return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2263           CurrCycle, StartAtCycle, Cycles);
2264 
2265     return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2266         CurrCycle, StartAtCycle, Cycles);
2267   }
2268 
2269   unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2270   // If this resource has never been used, always return cycle zero.
2271   if (NextUnreserved == InvalidCycle)
2272     return 0;
2273   // For bottom-up scheduling add the cycles needed for the current operation.
2274   if (!isTop())
2275     NextUnreserved += Cycles;
2276   return NextUnreserved;
2277 }
2278 
2279 /// Compute the next cycle at which the given processor resource can be
2280 /// scheduled.  Returns the next cycle and the index of the processor resource
2281 /// instance in the reserved cycles vector.
2282 std::pair<unsigned, unsigned>
2283 SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
2284                                     unsigned Cycles, unsigned StartAtCycle) {
2285 
2286   unsigned MinNextUnreserved = InvalidCycle;
2287   unsigned InstanceIdx = 0;
2288   unsigned StartIndex = ReservedCyclesIndex[PIdx];
2289   unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2290   assert(NumberOfInstances > 0 &&
2291          "Cannot have zero instances of a ProcResource");
2292 
2293   if (isUnbufferedGroup(PIdx)) {
2294     // If any subunits are used by the instruction, report that the resource
2295     // group is available at 0, effectively removing the group record from
2296     // hazarding and basing the hazarding decisions on the subunit records.
2297     // Otherwise, choose the first available instance from among the subunits.
2298     // Specifications which assign cycles to both the subunits and the group or
2299     // which use an unbuffered group with buffered subunits will appear to
2300     // schedule strangely. In the first case, the additional cycles for the
2301     // group will be ignored.  In the second, the group will be ignored
2302     // entirely.
2303     for (const MCWriteProcResEntry &PE :
2304          make_range(SchedModel->getWriteProcResBegin(SC),
2305                     SchedModel->getWriteProcResEnd(SC)))
2306       if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2307         return std::make_pair(0u, StartIndex);
2308 
2309     auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2310     for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2311       unsigned NextUnreserved, NextInstanceIdx;
2312       std::tie(NextUnreserved, NextInstanceIdx) =
2313           getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle);
2314       if (MinNextUnreserved > NextUnreserved) {
2315         InstanceIdx = NextInstanceIdx;
2316         MinNextUnreserved = NextUnreserved;
2317       }
2318     }
2319     return std::make_pair(MinNextUnreserved, InstanceIdx);
2320   }
2321 
2322   for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2323        ++I) {
2324     unsigned NextUnreserved =
2325         getNextResourceCycleByInstance(I, Cycles, StartAtCycle);
2326     if (MinNextUnreserved > NextUnreserved) {
2327       InstanceIdx = I;
2328       MinNextUnreserved = NextUnreserved;
2329     }
2330   }
2331   return std::make_pair(MinNextUnreserved, InstanceIdx);
2332 }
2333 
2334 /// Does this SU have a hazard within the current instruction group.
2335 ///
2336 /// The scheduler supports two modes of hazard recognition. The first is the
2337 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2338 /// supports highly complicated in-order reservation tables
2339 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2340 ///
2341 /// The second is a streamlined mechanism that checks for hazards based on
2342 /// simple counters that the scheduler itself maintains. It explicitly checks
2343 /// for instruction dispatch limitations, including the number of micro-ops that
2344 /// can dispatch per cycle.
2345 ///
2346 /// TODO: Also check whether the SU must start a new group.
2347 bool SchedBoundary::checkHazard(SUnit *SU) {
2348   if (HazardRec->isEnabled()
2349       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2350     return true;
2351   }
2352 
2353   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2354   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2355     LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
2356                       << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2357     return true;
2358   }
2359 
2360   if (CurrMOps > 0 &&
2361       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2362        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2363     LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
2364                       << (isTop() ? "begin" : "end") << " group\n");
2365     return true;
2366   }
2367 
2368   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2369     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2370     for (const MCWriteProcResEntry &PE :
2371           make_range(SchedModel->getWriteProcResBegin(SC),
2372                      SchedModel->getWriteProcResEnd(SC))) {
2373       unsigned ResIdx = PE.ProcResourceIdx;
2374       unsigned Cycles = PE.Cycles;
2375       unsigned StartAtCycle = PE.StartAtCycle;
2376       unsigned NRCycle, InstanceIdx;
2377       std::tie(NRCycle, InstanceIdx) =
2378           getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle);
2379       if (NRCycle > CurrCycle) {
2380 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2381         MaxObservedStall = std::max(Cycles, MaxObservedStall);
2382 #endif
2383         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
2384                           << SchedModel->getResourceName(ResIdx)
2385                           << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx]  << ']'
2386                           << "=" << NRCycle << "c\n");
2387         return true;
2388       }
2389     }
2390   }
2391   return false;
2392 }
2393 
2394 // Find the unscheduled node in ReadySUs with the highest latency.
2395 unsigned SchedBoundary::
2396 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2397   SUnit *LateSU = nullptr;
2398   unsigned RemLatency = 0;
2399   for (SUnit *SU : ReadySUs) {
2400     unsigned L = getUnscheduledLatency(SU);
2401     if (L > RemLatency) {
2402       RemLatency = L;
2403       LateSU = SU;
2404     }
2405   }
2406   if (LateSU) {
2407     LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2408                       << LateSU->NodeNum << ") " << RemLatency << "c\n");
2409   }
2410   return RemLatency;
2411 }
2412 
2413 // Count resources in this zone and the remaining unscheduled
2414 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2415 // resource index, or zero if the zone is issue limited.
2416 unsigned SchedBoundary::
2417 getOtherResourceCount(unsigned &OtherCritIdx) {
2418   OtherCritIdx = 0;
2419   if (!SchedModel->hasInstrSchedModel())
2420     return 0;
2421 
2422   unsigned OtherCritCount = Rem->RemIssueCount
2423     + (RetiredMOps * SchedModel->getMicroOpFactor());
2424   LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
2425                     << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2426   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2427        PIdx != PEnd; ++PIdx) {
2428     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2429     if (OtherCount > OtherCritCount) {
2430       OtherCritCount = OtherCount;
2431       OtherCritIdx = PIdx;
2432     }
2433   }
2434   if (OtherCritIdx) {
2435     LLVM_DEBUG(
2436         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
2437                << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2438                << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2439   }
2440   return OtherCritCount;
2441 }
2442 
2443 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2444                                 unsigned Idx) {
2445   assert(SU->getInstr() && "Scheduled SUnit must have instr");
2446 
2447 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2448   // ReadyCycle was been bumped up to the CurrCycle when this node was
2449   // scheduled, but CurrCycle may have been eagerly advanced immediately after
2450   // scheduling, so may now be greater than ReadyCycle.
2451   if (ReadyCycle > CurrCycle)
2452     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2453 #endif
2454 
2455   if (ReadyCycle < MinReadyCycle)
2456     MinReadyCycle = ReadyCycle;
2457 
2458   // Check for interlocks first. For the purpose of other heuristics, an
2459   // instruction that cannot issue appears as if it's not in the ReadyQueue.
2460   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2461   bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2462                         checkHazard(SU) || (Available.size() >= ReadyListLimit);
2463 
2464   if (!HazardDetected) {
2465     Available.push(SU);
2466 
2467     if (InPQueue)
2468       Pending.remove(Pending.begin() + Idx);
2469     return;
2470   }
2471 
2472   if (!InPQueue)
2473     Pending.push(SU);
2474 }
2475 
2476 /// Move the boundary of scheduled code by one cycle.
2477 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2478   if (SchedModel->getMicroOpBufferSize() == 0) {
2479     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2480            "MinReadyCycle uninitialized");
2481     if (MinReadyCycle > NextCycle)
2482       NextCycle = MinReadyCycle;
2483   }
2484   // Update the current micro-ops, which will issue in the next cycle.
2485   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2486   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2487 
2488   // Decrement DependentLatency based on the next cycle.
2489   if ((NextCycle - CurrCycle) > DependentLatency)
2490     DependentLatency = 0;
2491   else
2492     DependentLatency -= (NextCycle - CurrCycle);
2493 
2494   if (!HazardRec->isEnabled()) {
2495     // Bypass HazardRec virtual calls.
2496     CurrCycle = NextCycle;
2497   } else {
2498     // Bypass getHazardType calls in case of long latency.
2499     for (; CurrCycle != NextCycle; ++CurrCycle) {
2500       if (isTop())
2501         HazardRec->AdvanceCycle();
2502       else
2503         HazardRec->RecedeCycle();
2504     }
2505   }
2506   CheckPending = true;
2507   IsResourceLimited =
2508       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2509                          getScheduledLatency(), true);
2510 
2511   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2512                     << '\n');
2513 }
2514 
2515 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2516   ExecutedResCounts[PIdx] += Count;
2517   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2518     MaxExecutedResCount = ExecutedResCounts[PIdx];
2519 }
2520 
2521 /// Add the given processor resource to this scheduled zone.
2522 ///
2523 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2524 /// during which this resource is consumed.
2525 ///
2526 /// \return the next cycle at which the instruction may execute without
2527 /// oversubscribing resources.
2528 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2529                                       unsigned Cycles, unsigned NextCycle,
2530                                       unsigned StartAtCycle) {
2531   unsigned Factor = SchedModel->getResourceFactor(PIdx);
2532   unsigned Count = Factor * (Cycles - StartAtCycle);
2533   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
2534                     << Cycles << "x" << Factor << "u\n");
2535 
2536   // Update Executed resources counts.
2537   incExecutedResources(PIdx, Count);
2538   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2539   Rem->RemainingCounts[PIdx] -= Count;
2540 
2541   // Check if this resource exceeds the current critical resource. If so, it
2542   // becomes the critical resource.
2543   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2544     ZoneCritResIdx = PIdx;
2545     LLVM_DEBUG(dbgs() << "  *** Critical resource "
2546                       << SchedModel->getResourceName(PIdx) << ": "
2547                       << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2548                       << "c\n");
2549   }
2550   // For reserved resources, record the highest cycle using the resource.
2551   unsigned NextAvailable, InstanceIdx;
2552   std::tie(NextAvailable, InstanceIdx) =
2553       getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle);
2554   if (NextAvailable > CurrCycle) {
2555     LLVM_DEBUG(dbgs() << "  Resource conflict: "
2556                       << SchedModel->getResourceName(PIdx)
2557                       << '[' << InstanceIdx - ReservedCyclesIndex[PIdx]  << ']'
2558                       << " reserved until @" << NextAvailable << "\n");
2559   }
2560   return NextAvailable;
2561 }
2562 
2563 /// Move the boundary of scheduled code by one SUnit.
2564 void SchedBoundary::bumpNode(SUnit *SU) {
2565   // Update the reservation table.
2566   if (HazardRec->isEnabled()) {
2567     if (!isTop() && SU->isCall) {
2568       // Calls are scheduled with their preceding instructions. For bottom-up
2569       // scheduling, clear the pipeline state before emitting.
2570       HazardRec->Reset();
2571     }
2572     HazardRec->EmitInstruction(SU);
2573     // Scheduling an instruction may have made pending instructions available.
2574     CheckPending = true;
2575   }
2576   // checkHazard should prevent scheduling multiple instructions per cycle that
2577   // exceed the issue width.
2578   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2579   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2580   assert(
2581       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2582       "Cannot schedule this instruction's MicroOps in the current cycle.");
2583 
2584   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2585   LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
2586 
2587   unsigned NextCycle = CurrCycle;
2588   switch (SchedModel->getMicroOpBufferSize()) {
2589   case 0:
2590     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2591     break;
2592   case 1:
2593     if (ReadyCycle > NextCycle) {
2594       NextCycle = ReadyCycle;
2595       LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
2596     }
2597     break;
2598   default:
2599     // We don't currently model the OOO reorder buffer, so consider all
2600     // scheduled MOps to be "retired". We do loosely model in-order resource
2601     // latency. If this instruction uses an in-order resource, account for any
2602     // likely stall cycles.
2603     if (SU->isUnbuffered && ReadyCycle > NextCycle)
2604       NextCycle = ReadyCycle;
2605     break;
2606   }
2607   RetiredMOps += IncMOps;
2608 
2609   // Update resource counts and critical resource.
2610   if (SchedModel->hasInstrSchedModel()) {
2611     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2612     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2613     Rem->RemIssueCount -= DecRemIssue;
2614     if (ZoneCritResIdx) {
2615       // Scale scheduled micro-ops for comparing with the critical resource.
2616       unsigned ScaledMOps =
2617         RetiredMOps * SchedModel->getMicroOpFactor();
2618 
2619       // If scaled micro-ops are now more than the previous critical resource by
2620       // a full cycle, then micro-ops issue becomes critical.
2621       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2622           >= (int)SchedModel->getLatencyFactor()) {
2623         ZoneCritResIdx = 0;
2624         LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
2625                           << ScaledMOps / SchedModel->getLatencyFactor()
2626                           << "c\n");
2627       }
2628     }
2629     for (TargetSchedModel::ProcResIter
2630            PI = SchedModel->getWriteProcResBegin(SC),
2631            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2632       unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles,
2633                                       NextCycle, PI->StartAtCycle);
2634       if (RCycle > NextCycle)
2635         NextCycle = RCycle;
2636     }
2637     if (SU->hasReservedResource) {
2638       // For reserved resources, record the highest cycle using the resource.
2639       // For top-down scheduling, this is the cycle in which we schedule this
2640       // instruction plus the number of cycles the operations reserves the
2641       // resource. For bottom-up is it simply the instruction's cycle.
2642       for (TargetSchedModel::ProcResIter
2643              PI = SchedModel->getWriteProcResBegin(SC),
2644              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2645         unsigned PIdx = PI->ProcResourceIdx;
2646         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2647 
2648           if (SchedModel && SchedModel->enableIntervals()) {
2649             unsigned ReservedUntil, InstanceIdx;
2650             std::tie(ReservedUntil, InstanceIdx) =
2651                 getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
2652             if (isTop()) {
2653               ReservedResourceSegments[InstanceIdx].add(
2654                   ResourceSegments::getResourceIntervalTop(
2655                       NextCycle, PI->StartAtCycle, PI->Cycles),
2656                   MIResourceCutOff);
2657             } else {
2658               ReservedResourceSegments[InstanceIdx].add(
2659                   ResourceSegments::getResourceIntervalBottom(
2660                       NextCycle, PI->StartAtCycle, PI->Cycles),
2661                   MIResourceCutOff);
2662             }
2663           } else {
2664 
2665             unsigned ReservedUntil, InstanceIdx;
2666             std::tie(ReservedUntil, InstanceIdx) =
2667                 getNextResourceCycle(SC, PIdx, 0, PI->StartAtCycle);
2668             if (isTop()) {
2669               ReservedCycles[InstanceIdx] =
2670                   std::max(ReservedUntil, NextCycle + PI->Cycles);
2671             } else
2672               ReservedCycles[InstanceIdx] = NextCycle;
2673           }
2674         }
2675       }
2676     }
2677   }
2678   // Update ExpectedLatency and DependentLatency.
2679   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2680   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2681   if (SU->getDepth() > TopLatency) {
2682     TopLatency = SU->getDepth();
2683     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
2684                       << SU->NodeNum << ") " << TopLatency << "c\n");
2685   }
2686   if (SU->getHeight() > BotLatency) {
2687     BotLatency = SU->getHeight();
2688     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
2689                       << SU->NodeNum << ") " << BotLatency << "c\n");
2690   }
2691   // If we stall for any reason, bump the cycle.
2692   if (NextCycle > CurrCycle)
2693     bumpCycle(NextCycle);
2694   else
2695     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2696     // resource limited. If a stall occurred, bumpCycle does this.
2697     IsResourceLimited =
2698         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2699                            getScheduledLatency(), true);
2700 
2701   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2702   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2703   // one cycle.  Since we commonly reach the max MOps here, opportunistically
2704   // bump the cycle to avoid uselessly checking everything in the readyQ.
2705   CurrMOps += IncMOps;
2706 
2707   // Bump the cycle count for issue group constraints.
2708   // This must be done after NextCycle has been adjust for all other stalls.
2709   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2710   // currCycle to X.
2711   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
2712       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2713     LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
2714                       << " group\n");
2715     bumpCycle(++NextCycle);
2716   }
2717 
2718   while (CurrMOps >= SchedModel->getIssueWidth()) {
2719     LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
2720                       << CurrCycle << '\n');
2721     bumpCycle(++NextCycle);
2722   }
2723   LLVM_DEBUG(dumpScheduledState());
2724 }
2725 
2726 /// Release pending ready nodes in to the available queue. This makes them
2727 /// visible to heuristics.
2728 void SchedBoundary::releasePending() {
2729   // If the available queue is empty, it is safe to reset MinReadyCycle.
2730   if (Available.empty())
2731     MinReadyCycle = std::numeric_limits<unsigned>::max();
2732 
2733   // Check to see if any of the pending instructions are ready to issue.  If
2734   // so, add them to the available queue.
2735   for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2736     SUnit *SU = *(Pending.begin() + I);
2737     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2738 
2739     if (ReadyCycle < MinReadyCycle)
2740       MinReadyCycle = ReadyCycle;
2741 
2742     if (Available.size() >= ReadyListLimit)
2743       break;
2744 
2745     releaseNode(SU, ReadyCycle, true, I);
2746     if (E != Pending.size()) {
2747       --I;
2748       --E;
2749     }
2750   }
2751   CheckPending = false;
2752 }
2753 
2754 /// Remove SU from the ready set for this boundary.
2755 void SchedBoundary::removeReady(SUnit *SU) {
2756   if (Available.isInQueue(SU))
2757     Available.remove(Available.find(SU));
2758   else {
2759     assert(Pending.isInQueue(SU) && "bad ready count");
2760     Pending.remove(Pending.find(SU));
2761   }
2762 }
2763 
2764 /// If this queue only has one ready candidate, return it. As a side effect,
2765 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2766 /// one node is ready. If multiple instructions are ready, return NULL.
2767 SUnit *SchedBoundary::pickOnlyChoice() {
2768   if (CheckPending)
2769     releasePending();
2770 
2771   // Defer any ready instrs that now have a hazard.
2772   for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2773     if (checkHazard(*I)) {
2774       Pending.push(*I);
2775       I = Available.remove(I);
2776       continue;
2777     }
2778     ++I;
2779   }
2780   for (unsigned i = 0; Available.empty(); ++i) {
2781 //  FIXME: Re-enable assert once PR20057 is resolved.
2782 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2783 //           "permanent hazard");
2784     (void)i;
2785     bumpCycle(CurrCycle + 1);
2786     releasePending();
2787   }
2788 
2789   LLVM_DEBUG(Pending.dump());
2790   LLVM_DEBUG(Available.dump());
2791 
2792   if (Available.size() == 1)
2793     return *Available.begin();
2794   return nullptr;
2795 }
2796 
2797 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2798 
2799 /// Dump the content of the \ref ReservedCycles vector for the
2800 /// resources that are used in the basic block.
2801 ///
2802 LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
2803   if (!SchedModel->hasInstrSchedModel())
2804     return;
2805 
2806   unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2807   unsigned StartIdx = 0;
2808 
2809   for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2810     const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2811     std::string ResName = SchedModel->getResourceName(ResIdx);
2812     for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2813       dbgs() << ResName << "(" << UnitIdx << ") = ";
2814       if (SchedModel && SchedModel->enableIntervals()) {
2815         if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2816           dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2817         else
2818           dbgs() << "{ }\n";
2819       } else
2820         dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2821     }
2822     StartIdx += NumUnits;
2823   }
2824 }
2825 
2826 // This is useful information to dump after bumpNode.
2827 // Note that the Queue contents are more useful before pickNodeFromQueue.
2828 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2829   unsigned ResFactor;
2830   unsigned ResCount;
2831   if (ZoneCritResIdx) {
2832     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2833     ResCount = getResourceCount(ZoneCritResIdx);
2834   } else {
2835     ResFactor = SchedModel->getMicroOpFactor();
2836     ResCount = RetiredMOps * ResFactor;
2837   }
2838   unsigned LFactor = SchedModel->getLatencyFactor();
2839   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2840          << "  Retired: " << RetiredMOps;
2841   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2842   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2843          << ResCount / ResFactor << " "
2844          << SchedModel->getResourceName(ZoneCritResIdx)
2845          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2846          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2847          << " limited.\n";
2848   if (MISchedDumpReservedCycles)
2849     dumpReservedCycles();
2850 }
2851 #endif
2852 
2853 //===----------------------------------------------------------------------===//
2854 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2855 //===----------------------------------------------------------------------===//
2856 
2857 void GenericSchedulerBase::SchedCandidate::
2858 initResourceDelta(const ScheduleDAGMI *DAG,
2859                   const TargetSchedModel *SchedModel) {
2860   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2861     return;
2862 
2863   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2864   for (TargetSchedModel::ProcResIter
2865          PI = SchedModel->getWriteProcResBegin(SC),
2866          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2867     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2868       ResDelta.CritResources += PI->Cycles;
2869     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2870       ResDelta.DemandedResources += PI->Cycles;
2871   }
2872 }
2873 
2874 /// Compute remaining latency. We need this both to determine whether the
2875 /// overall schedule has become latency-limited and whether the instructions
2876 /// outside this zone are resource or latency limited.
2877 ///
2878 /// The "dependent" latency is updated incrementally during scheduling as the
2879 /// max height/depth of scheduled nodes minus the cycles since it was
2880 /// scheduled:
2881 ///   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2882 ///
2883 /// The "independent" latency is the max ready queue depth:
2884 ///   ILat = max N.depth for N in Available|Pending
2885 ///
2886 /// RemainingLatency is the greater of independent and dependent latency.
2887 ///
2888 /// These computations are expensive, especially in DAGs with many edges, so
2889 /// only do them if necessary.
2890 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2891   unsigned RemLatency = CurrZone.getDependentLatency();
2892   RemLatency = std::max(RemLatency,
2893                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2894   RemLatency = std::max(RemLatency,
2895                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2896   return RemLatency;
2897 }
2898 
2899 /// Returns true if the current cycle plus remaning latency is greater than
2900 /// the critical path in the scheduling region.
2901 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2902                                                SchedBoundary &CurrZone,
2903                                                bool ComputeRemLatency,
2904                                                unsigned &RemLatency) const {
2905   // The current cycle is already greater than the critical path, so we are
2906   // already latency limited and don't need to compute the remaining latency.
2907   if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2908     return true;
2909 
2910   // If we haven't scheduled anything yet, then we aren't latency limited.
2911   if (CurrZone.getCurrCycle() == 0)
2912     return false;
2913 
2914   if (ComputeRemLatency)
2915     RemLatency = computeRemLatency(CurrZone);
2916 
2917   return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2918 }
2919 
2920 /// Set the CandPolicy given a scheduling zone given the current resources and
2921 /// latencies inside and outside the zone.
2922 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2923                                      SchedBoundary &CurrZone,
2924                                      SchedBoundary *OtherZone) {
2925   // Apply preemptive heuristics based on the total latency and resources
2926   // inside and outside this zone. Potential stalls should be considered before
2927   // following this policy.
2928 
2929   // Compute the critical resource outside the zone.
2930   unsigned OtherCritIdx = 0;
2931   unsigned OtherCount =
2932     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2933 
2934   bool OtherResLimited = false;
2935   unsigned RemLatency = 0;
2936   bool RemLatencyComputed = false;
2937   if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2938     RemLatency = computeRemLatency(CurrZone);
2939     RemLatencyComputed = true;
2940     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2941                                          OtherCount, RemLatency, false);
2942   }
2943 
2944   // Schedule aggressively for latency in PostRA mode. We don't check for
2945   // acyclic latency during PostRA, and highly out-of-order processors will
2946   // skip PostRA scheduling.
2947   if (!OtherResLimited &&
2948       (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2949                                        RemLatency))) {
2950     Policy.ReduceLatency |= true;
2951     LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2952                       << " RemainingLatency " << RemLatency << " + "
2953                       << CurrZone.getCurrCycle() << "c > CritPath "
2954                       << Rem.CriticalPath << "\n");
2955   }
2956   // If the same resource is limiting inside and outside the zone, do nothing.
2957   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2958     return;
2959 
2960   LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2961     dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2962            << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2963   } if (OtherResLimited) dbgs()
2964                  << "  RemainingLimit: "
2965                  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2966              if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2967              << "  Latency limited both directions.\n");
2968 
2969   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2970     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2971 
2972   if (OtherResLimited)
2973     Policy.DemandResIdx = OtherCritIdx;
2974 }
2975 
2976 #ifndef NDEBUG
2977 const char *GenericSchedulerBase::getReasonStr(
2978   GenericSchedulerBase::CandReason Reason) {
2979   switch (Reason) {
2980   case NoCand:         return "NOCAND    ";
2981   case Only1:          return "ONLY1     ";
2982   case PhysReg:        return "PHYS-REG  ";
2983   case RegExcess:      return "REG-EXCESS";
2984   case RegCritical:    return "REG-CRIT  ";
2985   case Stall:          return "STALL     ";
2986   case Cluster:        return "CLUSTER   ";
2987   case Weak:           return "WEAK      ";
2988   case RegMax:         return "REG-MAX   ";
2989   case ResourceReduce: return "RES-REDUCE";
2990   case ResourceDemand: return "RES-DEMAND";
2991   case TopDepthReduce: return "TOP-DEPTH ";
2992   case TopPathReduce:  return "TOP-PATH  ";
2993   case BotHeightReduce:return "BOT-HEIGHT";
2994   case BotPathReduce:  return "BOT-PATH  ";
2995   case NextDefUse:     return "DEF-USE   ";
2996   case NodeOrder:      return "ORDER     ";
2997   };
2998   llvm_unreachable("Unknown reason!");
2999 }
3000 
3001 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3002   PressureChange P;
3003   unsigned ResIdx = 0;
3004   unsigned Latency = 0;
3005   switch (Cand.Reason) {
3006   default:
3007     break;
3008   case RegExcess:
3009     P = Cand.RPDelta.Excess;
3010     break;
3011   case RegCritical:
3012     P = Cand.RPDelta.CriticalMax;
3013     break;
3014   case RegMax:
3015     P = Cand.RPDelta.CurrentMax;
3016     break;
3017   case ResourceReduce:
3018     ResIdx = Cand.Policy.ReduceResIdx;
3019     break;
3020   case ResourceDemand:
3021     ResIdx = Cand.Policy.DemandResIdx;
3022     break;
3023   case TopDepthReduce:
3024     Latency = Cand.SU->getDepth();
3025     break;
3026   case TopPathReduce:
3027     Latency = Cand.SU->getHeight();
3028     break;
3029   case BotHeightReduce:
3030     Latency = Cand.SU->getHeight();
3031     break;
3032   case BotPathReduce:
3033     Latency = Cand.SU->getDepth();
3034     break;
3035   }
3036   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3037   if (P.isValid())
3038     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3039            << ":" << P.getUnitInc() << " ";
3040   else
3041     dbgs() << "      ";
3042   if (ResIdx)
3043     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3044   else
3045     dbgs() << "         ";
3046   if (Latency)
3047     dbgs() << " " << Latency << " cycles ";
3048   else
3049     dbgs() << "          ";
3050   dbgs() << '\n';
3051 }
3052 #endif
3053 
3054 namespace llvm {
3055 /// Return true if this heuristic determines order.
3056 /// TODO: Consider refactor return type of these functions as integer or enum,
3057 /// as we may need to differentiate whether TryCand is better than Cand.
3058 bool tryLess(int TryVal, int CandVal,
3059              GenericSchedulerBase::SchedCandidate &TryCand,
3060              GenericSchedulerBase::SchedCandidate &Cand,
3061              GenericSchedulerBase::CandReason Reason) {
3062   if (TryVal < CandVal) {
3063     TryCand.Reason = Reason;
3064     return true;
3065   }
3066   if (TryVal > CandVal) {
3067     if (Cand.Reason > Reason)
3068       Cand.Reason = Reason;
3069     return true;
3070   }
3071   return false;
3072 }
3073 
3074 bool tryGreater(int TryVal, int CandVal,
3075                 GenericSchedulerBase::SchedCandidate &TryCand,
3076                 GenericSchedulerBase::SchedCandidate &Cand,
3077                 GenericSchedulerBase::CandReason Reason) {
3078   if (TryVal > CandVal) {
3079     TryCand.Reason = Reason;
3080     return true;
3081   }
3082   if (TryVal < CandVal) {
3083     if (Cand.Reason > Reason)
3084       Cand.Reason = Reason;
3085     return true;
3086   }
3087   return false;
3088 }
3089 
3090 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3091                 GenericSchedulerBase::SchedCandidate &Cand,
3092                 SchedBoundary &Zone) {
3093   if (Zone.isTop()) {
3094     // Prefer the candidate with the lesser depth, but only if one of them has
3095     // depth greater than the total latency scheduled so far, otherwise either
3096     // of them could be scheduled now with no stall.
3097     if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3098         Zone.getScheduledLatency()) {
3099       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3100                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
3101         return true;
3102     }
3103     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3104                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
3105       return true;
3106   } else {
3107     // Prefer the candidate with the lesser height, but only if one of them has
3108     // height greater than the total latency scheduled so far, otherwise either
3109     // of them could be scheduled now with no stall.
3110     if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3111         Zone.getScheduledLatency()) {
3112       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3113                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
3114         return true;
3115     }
3116     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3117                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
3118       return true;
3119   }
3120   return false;
3121 }
3122 } // end namespace llvm
3123 
3124 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3125   LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3126                     << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3127 }
3128 
3129 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
3130   tracePick(Cand.Reason, Cand.AtTop);
3131 }
3132 
3133 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3134   assert(dag->hasVRegLiveness() &&
3135          "(PreRA)GenericScheduler needs vreg liveness");
3136   DAG = static_cast<ScheduleDAGMILive*>(dag);
3137   SchedModel = DAG->getSchedModel();
3138   TRI = DAG->TRI;
3139 
3140   if (RegionPolicy.ComputeDFSResult)
3141     DAG->computeDFSResult();
3142 
3143   Rem.init(DAG, SchedModel);
3144   Top.init(DAG, SchedModel, &Rem);
3145   Bot.init(DAG, SchedModel, &Rem);
3146 
3147   // Initialize resource counts.
3148 
3149   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3150   // are disabled, then these HazardRecs will be disabled.
3151   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3152   if (!Top.HazardRec) {
3153     Top.HazardRec =
3154         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3155             Itin, DAG);
3156   }
3157   if (!Bot.HazardRec) {
3158     Bot.HazardRec =
3159         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3160             Itin, DAG);
3161   }
3162   TopCand.SU = nullptr;
3163   BotCand.SU = nullptr;
3164 }
3165 
3166 /// Initialize the per-region scheduling policy.
3167 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3168                                   MachineBasicBlock::iterator End,
3169                                   unsigned NumRegionInstrs) {
3170   const MachineFunction &MF = *Begin->getMF();
3171   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3172 
3173   // Avoid setting up the register pressure tracker for small regions to save
3174   // compile time. As a rough heuristic, only track pressure when the number of
3175   // schedulable instructions exceeds half the integer register file.
3176   RegionPolicy.ShouldTrackPressure = true;
3177   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
3178     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3179     if (TLI->isTypeLegal(LegalIntVT)) {
3180       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3181         TLI->getRegClassFor(LegalIntVT));
3182       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3183     }
3184   }
3185 
3186   // For generic targets, we default to bottom-up, because it's simpler and more
3187   // compile-time optimizations have been implemented in that direction.
3188   RegionPolicy.OnlyBottomUp = true;
3189 
3190   // Allow the subtarget to override default policy.
3191   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3192 
3193   // After subtarget overrides, apply command line options.
3194   if (!EnableRegPressure) {
3195     RegionPolicy.ShouldTrackPressure = false;
3196     RegionPolicy.ShouldTrackLaneMasks = false;
3197   }
3198 
3199   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3200   // e.g. -misched-bottomup=false allows scheduling in both directions.
3201   assert((!ForceTopDown || !ForceBottomUp) &&
3202          "-misched-topdown incompatible with -misched-bottomup");
3203   if (ForceBottomUp.getNumOccurrences() > 0) {
3204     RegionPolicy.OnlyBottomUp = ForceBottomUp;
3205     if (RegionPolicy.OnlyBottomUp)
3206       RegionPolicy.OnlyTopDown = false;
3207   }
3208   if (ForceTopDown.getNumOccurrences() > 0) {
3209     RegionPolicy.OnlyTopDown = ForceTopDown;
3210     if (RegionPolicy.OnlyTopDown)
3211       RegionPolicy.OnlyBottomUp = false;
3212   }
3213 }
3214 
3215 void GenericScheduler::dumpPolicy() const {
3216   // Cannot completely remove virtual function even in release mode.
3217 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3218   dbgs() << "GenericScheduler RegionPolicy: "
3219          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3220          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3221          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3222          << "\n";
3223 #endif
3224 }
3225 
3226 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3227 /// critical path by more cycles than it takes to drain the instruction buffer.
3228 /// We estimate an upper bounds on in-flight instructions as:
3229 ///
3230 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3231 /// InFlightIterations = AcyclicPath / CyclesPerIteration
3232 /// InFlightResources = InFlightIterations * LoopResources
3233 ///
3234 /// TODO: Check execution resources in addition to IssueCount.
3235 void GenericScheduler::checkAcyclicLatency() {
3236   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3237     return;
3238 
3239   // Scaled number of cycles per loop iteration.
3240   unsigned IterCount =
3241     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3242              Rem.RemIssueCount);
3243   // Scaled acyclic critical path.
3244   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3245   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3246   unsigned InFlightCount =
3247     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3248   unsigned BufferLimit =
3249     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3250 
3251   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3252 
3253   LLVM_DEBUG(
3254       dbgs() << "IssueCycles="
3255              << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3256              << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3257              << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3258              << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3259              << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3260       if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
3261 }
3262 
3263 void GenericScheduler::registerRoots() {
3264   Rem.CriticalPath = DAG->ExitSU.getDepth();
3265 
3266   // Some roots may not feed into ExitSU. Check all of them in case.
3267   for (const SUnit *SU : Bot.Available) {
3268     if (SU->getDepth() > Rem.CriticalPath)
3269       Rem.CriticalPath = SU->getDepth();
3270   }
3271   LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3272   if (DumpCriticalPathLength) {
3273     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3274   }
3275 
3276   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3277     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3278     checkAcyclicLatency();
3279   }
3280 }
3281 
3282 namespace llvm {
3283 bool tryPressure(const PressureChange &TryP,
3284                  const PressureChange &CandP,
3285                  GenericSchedulerBase::SchedCandidate &TryCand,
3286                  GenericSchedulerBase::SchedCandidate &Cand,
3287                  GenericSchedulerBase::CandReason Reason,
3288                  const TargetRegisterInfo *TRI,
3289                  const MachineFunction &MF) {
3290   // If one candidate decreases and the other increases, go with it.
3291   // Invalid candidates have UnitInc==0.
3292   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3293                  Reason)) {
3294     return true;
3295   }
3296   // Do not compare the magnitude of pressure changes between top and bottom
3297   // boundary.
3298   if (Cand.AtTop != TryCand.AtTop)
3299     return false;
3300 
3301   // If both candidates affect the same set in the same boundary, go with the
3302   // smallest increase.
3303   unsigned TryPSet = TryP.getPSetOrMax();
3304   unsigned CandPSet = CandP.getPSetOrMax();
3305   if (TryPSet == CandPSet) {
3306     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3307                    Reason);
3308   }
3309 
3310   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3311                                  std::numeric_limits<int>::max();
3312 
3313   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3314                                    std::numeric_limits<int>::max();
3315 
3316   // If the candidates are decreasing pressure, reverse priority.
3317   if (TryP.getUnitInc() < 0)
3318     std::swap(TryRank, CandRank);
3319   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3320 }
3321 
3322 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3323   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3324 }
3325 
3326 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3327 /// their physreg def/use.
3328 ///
3329 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3330 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3331 /// with the operation that produces or consumes the physreg. We'll do this when
3332 /// regalloc has support for parallel copies.
3333 int biasPhysReg(const SUnit *SU, bool isTop) {
3334   const MachineInstr *MI = SU->getInstr();
3335 
3336   if (MI->isCopy()) {
3337     unsigned ScheduledOper = isTop ? 1 : 0;
3338     unsigned UnscheduledOper = isTop ? 0 : 1;
3339     // If we have already scheduled the physreg produce/consumer, immediately
3340     // schedule the copy.
3341     if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3342       return 1;
3343     // If the physreg is at the boundary, defer it. Otherwise schedule it
3344     // immediately to free the dependent. We can hoist the copy later.
3345     bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3346     if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3347       return AtBoundary ? -1 : 1;
3348   }
3349 
3350   if (MI->isMoveImmediate()) {
3351     // If we have a move immediate and all successors have been assigned, bias
3352     // towards scheduling this later. Make sure all register defs are to
3353     // physical registers.
3354     bool DoBias = true;
3355     for (const MachineOperand &Op : MI->defs()) {
3356       if (Op.isReg() && !Op.getReg().isPhysical()) {
3357         DoBias = false;
3358         break;
3359       }
3360     }
3361 
3362     if (DoBias)
3363       return isTop ? -1 : 1;
3364   }
3365 
3366   return 0;
3367 }
3368 } // end namespace llvm
3369 
3370 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3371                                      bool AtTop,
3372                                      const RegPressureTracker &RPTracker,
3373                                      RegPressureTracker &TempTracker) {
3374   Cand.SU = SU;
3375   Cand.AtTop = AtTop;
3376   if (DAG->isTrackingPressure()) {
3377     if (AtTop) {
3378       TempTracker.getMaxDownwardPressureDelta(
3379         Cand.SU->getInstr(),
3380         Cand.RPDelta,
3381         DAG->getRegionCriticalPSets(),
3382         DAG->getRegPressure().MaxSetPressure);
3383     } else {
3384       if (VerifyScheduling) {
3385         TempTracker.getMaxUpwardPressureDelta(
3386           Cand.SU->getInstr(),
3387           &DAG->getPressureDiff(Cand.SU),
3388           Cand.RPDelta,
3389           DAG->getRegionCriticalPSets(),
3390           DAG->getRegPressure().MaxSetPressure);
3391       } else {
3392         RPTracker.getUpwardPressureDelta(
3393           Cand.SU->getInstr(),
3394           DAG->getPressureDiff(Cand.SU),
3395           Cand.RPDelta,
3396           DAG->getRegionCriticalPSets(),
3397           DAG->getRegPressure().MaxSetPressure);
3398       }
3399     }
3400   }
3401   LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3402              << "  Try  SU(" << Cand.SU->NodeNum << ") "
3403              << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3404              << Cand.RPDelta.Excess.getUnitInc() << "\n");
3405 }
3406 
3407 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3408 /// hierarchical. This may be more efficient than a graduated cost model because
3409 /// we don't need to evaluate all aspects of the model for each node in the
3410 /// queue. But it's really done to make the heuristics easier to debug and
3411 /// statistically analyze.
3412 ///
3413 /// \param Cand provides the policy and current best candidate.
3414 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3415 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3416 ///             if Cand is from a different zone than TryCand.
3417 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3418 bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3419                                     SchedCandidate &TryCand,
3420                                     SchedBoundary *Zone) const {
3421   // Initialize the candidate if needed.
3422   if (!Cand.isValid()) {
3423     TryCand.Reason = NodeOrder;
3424     return true;
3425   }
3426 
3427   // Bias PhysReg Defs and copies to their uses and defined respectively.
3428   if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3429                  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3430     return TryCand.Reason != NoCand;
3431 
3432   // Avoid exceeding the target's limit.
3433   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3434                                                Cand.RPDelta.Excess,
3435                                                TryCand, Cand, RegExcess, TRI,
3436                                                DAG->MF))
3437     return TryCand.Reason != NoCand;
3438 
3439   // Avoid increasing the max critical pressure in the scheduled region.
3440   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3441                                                Cand.RPDelta.CriticalMax,
3442                                                TryCand, Cand, RegCritical, TRI,
3443                                                DAG->MF))
3444     return TryCand.Reason != NoCand;
3445 
3446   // We only compare a subset of features when comparing nodes between
3447   // Top and Bottom boundary. Some properties are simply incomparable, in many
3448   // other instances we should only override the other boundary if something
3449   // is a clear good pick on one boundary. Skip heuristics that are more
3450   // "tie-breaking" in nature.
3451   bool SameBoundary = Zone != nullptr;
3452   if (SameBoundary) {
3453     // For loops that are acyclic path limited, aggressively schedule for
3454     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3455     // heuristics to take precedence.
3456     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3457         tryLatency(TryCand, Cand, *Zone))
3458       return TryCand.Reason != NoCand;
3459 
3460     // Prioritize instructions that read unbuffered resources by stall cycles.
3461     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3462                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3463       return TryCand.Reason != NoCand;
3464   }
3465 
3466   // Keep clustered nodes together to encourage downstream peephole
3467   // optimizations which may reduce resource requirements.
3468   //
3469   // This is a best effort to set things up for a post-RA pass. Optimizations
3470   // like generating loads of multiple registers should ideally be done within
3471   // the scheduler pass by combining the loads during DAG postprocessing.
3472   const SUnit *CandNextClusterSU =
3473     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3474   const SUnit *TryCandNextClusterSU =
3475     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3476   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3477                  Cand.SU == CandNextClusterSU,
3478                  TryCand, Cand, Cluster))
3479     return TryCand.Reason != NoCand;
3480 
3481   if (SameBoundary) {
3482     // Weak edges are for clustering and other constraints.
3483     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3484                 getWeakLeft(Cand.SU, Cand.AtTop),
3485                 TryCand, Cand, Weak))
3486       return TryCand.Reason != NoCand;
3487   }
3488 
3489   // Avoid increasing the max pressure of the entire region.
3490   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3491                                                Cand.RPDelta.CurrentMax,
3492                                                TryCand, Cand, RegMax, TRI,
3493                                                DAG->MF))
3494     return TryCand.Reason != NoCand;
3495 
3496   if (SameBoundary) {
3497     // Avoid critical resource consumption and balance the schedule.
3498     TryCand.initResourceDelta(DAG, SchedModel);
3499     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3500                 TryCand, Cand, ResourceReduce))
3501       return TryCand.Reason != NoCand;
3502     if (tryGreater(TryCand.ResDelta.DemandedResources,
3503                    Cand.ResDelta.DemandedResources,
3504                    TryCand, Cand, ResourceDemand))
3505       return TryCand.Reason != NoCand;
3506 
3507     // Avoid serializing long latency dependence chains.
3508     // For acyclic path limited loops, latency was already checked above.
3509     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3510         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3511       return TryCand.Reason != NoCand;
3512 
3513     // Fall through to original instruction order.
3514     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3515         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3516       TryCand.Reason = NodeOrder;
3517       return true;
3518     }
3519   }
3520 
3521   return false;
3522 }
3523 
3524 /// Pick the best candidate from the queue.
3525 ///
3526 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3527 /// DAG building. To adjust for the current scheduling location we need to
3528 /// maintain the number of vreg uses remaining to be top-scheduled.
3529 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3530                                          const CandPolicy &ZonePolicy,
3531                                          const RegPressureTracker &RPTracker,
3532                                          SchedCandidate &Cand) {
3533   // getMaxPressureDelta temporarily modifies the tracker.
3534   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3535 
3536   ReadyQueue &Q = Zone.Available;
3537   for (SUnit *SU : Q) {
3538 
3539     SchedCandidate TryCand(ZonePolicy);
3540     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3541     // Pass SchedBoundary only when comparing nodes from the same boundary.
3542     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3543     if (tryCandidate(Cand, TryCand, ZoneArg)) {
3544       // Initialize resource delta if needed in case future heuristics query it.
3545       if (TryCand.ResDelta == SchedResourceDelta())
3546         TryCand.initResourceDelta(DAG, SchedModel);
3547       Cand.setBest(TryCand);
3548       LLVM_DEBUG(traceCandidate(Cand));
3549     }
3550   }
3551 }
3552 
3553 /// Pick the best candidate node from either the top or bottom queue.
3554 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3555   // Schedule as far as possible in the direction of no choice. This is most
3556   // efficient, but also provides the best heuristics for CriticalPSets.
3557   if (SUnit *SU = Bot.pickOnlyChoice()) {
3558     IsTopNode = false;
3559     tracePick(Only1, false);
3560     return SU;
3561   }
3562   if (SUnit *SU = Top.pickOnlyChoice()) {
3563     IsTopNode = true;
3564     tracePick(Only1, true);
3565     return SU;
3566   }
3567   // Set the bottom-up policy based on the state of the current bottom zone and
3568   // the instructions outside the zone, including the top zone.
3569   CandPolicy BotPolicy;
3570   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3571   // Set the top-down policy based on the state of the current top zone and
3572   // the instructions outside the zone, including the bottom zone.
3573   CandPolicy TopPolicy;
3574   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3575 
3576   // See if BotCand is still valid (because we previously scheduled from Top).
3577   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3578   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3579       BotCand.Policy != BotPolicy) {
3580     BotCand.reset(CandPolicy());
3581     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3582     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3583   } else {
3584     LLVM_DEBUG(traceCandidate(BotCand));
3585 #ifndef NDEBUG
3586     if (VerifyScheduling) {
3587       SchedCandidate TCand;
3588       TCand.reset(CandPolicy());
3589       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3590       assert(TCand.SU == BotCand.SU &&
3591              "Last pick result should correspond to re-picking right now");
3592     }
3593 #endif
3594   }
3595 
3596   // Check if the top Q has a better candidate.
3597   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3598   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3599       TopCand.Policy != TopPolicy) {
3600     TopCand.reset(CandPolicy());
3601     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3602     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3603   } else {
3604     LLVM_DEBUG(traceCandidate(TopCand));
3605 #ifndef NDEBUG
3606     if (VerifyScheduling) {
3607       SchedCandidate TCand;
3608       TCand.reset(CandPolicy());
3609       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3610       assert(TCand.SU == TopCand.SU &&
3611            "Last pick result should correspond to re-picking right now");
3612     }
3613 #endif
3614   }
3615 
3616   // Pick best from BotCand and TopCand.
3617   assert(BotCand.isValid());
3618   assert(TopCand.isValid());
3619   SchedCandidate Cand = BotCand;
3620   TopCand.Reason = NoCand;
3621   if (tryCandidate(Cand, TopCand, nullptr)) {
3622     Cand.setBest(TopCand);
3623     LLVM_DEBUG(traceCandidate(Cand));
3624   }
3625 
3626   IsTopNode = Cand.AtTop;
3627   tracePick(Cand);
3628   return Cand.SU;
3629 }
3630 
3631 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3632 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3633   if (DAG->top() == DAG->bottom()) {
3634     assert(Top.Available.empty() && Top.Pending.empty() &&
3635            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3636     return nullptr;
3637   }
3638   SUnit *SU;
3639   do {
3640     if (RegionPolicy.OnlyTopDown) {
3641       SU = Top.pickOnlyChoice();
3642       if (!SU) {
3643         CandPolicy NoPolicy;
3644         TopCand.reset(NoPolicy);
3645         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3646         assert(TopCand.Reason != NoCand && "failed to find a candidate");
3647         tracePick(TopCand);
3648         SU = TopCand.SU;
3649       }
3650       IsTopNode = true;
3651     } else if (RegionPolicy.OnlyBottomUp) {
3652       SU = Bot.pickOnlyChoice();
3653       if (!SU) {
3654         CandPolicy NoPolicy;
3655         BotCand.reset(NoPolicy);
3656         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3657         assert(BotCand.Reason != NoCand && "failed to find a candidate");
3658         tracePick(BotCand);
3659         SU = BotCand.SU;
3660       }
3661       IsTopNode = false;
3662     } else {
3663       SU = pickNodeBidirectional(IsTopNode);
3664     }
3665   } while (SU->isScheduled);
3666 
3667   if (SU->isTopReady())
3668     Top.removeReady(SU);
3669   if (SU->isBottomReady())
3670     Bot.removeReady(SU);
3671 
3672   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3673                     << *SU->getInstr());
3674   return SU;
3675 }
3676 
3677 void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3678   MachineBasicBlock::iterator InsertPos = SU->getInstr();
3679   if (!isTop)
3680     ++InsertPos;
3681   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3682 
3683   // Find already scheduled copies with a single physreg dependence and move
3684   // them just above the scheduled instruction.
3685   for (SDep &Dep : Deps) {
3686     if (Dep.getKind() != SDep::Data ||
3687         !Register::isPhysicalRegister(Dep.getReg()))
3688       continue;
3689     SUnit *DepSU = Dep.getSUnit();
3690     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3691       continue;
3692     MachineInstr *Copy = DepSU->getInstr();
3693     if (!Copy->isCopy() && !Copy->isMoveImmediate())
3694       continue;
3695     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
3696                DAG->dumpNode(*Dep.getSUnit()));
3697     DAG->moveInstruction(Copy, InsertPos);
3698   }
3699 }
3700 
3701 /// Update the scheduler's state after scheduling a node. This is the same node
3702 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3703 /// update it's state based on the current cycle before MachineSchedStrategy
3704 /// does.
3705 ///
3706 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3707 /// them here. See comments in biasPhysReg.
3708 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3709   if (IsTopNode) {
3710     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3711     Top.bumpNode(SU);
3712     if (SU->hasPhysRegUses)
3713       reschedulePhysReg(SU, true);
3714   } else {
3715     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3716     Bot.bumpNode(SU);
3717     if (SU->hasPhysRegDefs)
3718       reschedulePhysReg(SU, false);
3719   }
3720 }
3721 
3722 /// Create the standard converging machine scheduler. This will be used as the
3723 /// default scheduler if the target does not set a default.
3724 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3725   ScheduleDAGMILive *DAG =
3726       new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3727   // Register DAG post-processors.
3728   //
3729   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3730   // data and pass it to later mutations. Have a single mutation that gathers
3731   // the interesting nodes in one pass.
3732   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3733   return DAG;
3734 }
3735 
3736 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
3737   return createGenericSchedLive(C);
3738 }
3739 
3740 static MachineSchedRegistry
3741 GenericSchedRegistry("converge", "Standard converging scheduler.",
3742                      createConvergingSched);
3743 
3744 //===----------------------------------------------------------------------===//
3745 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3746 //===----------------------------------------------------------------------===//
3747 
3748 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3749   DAG = Dag;
3750   SchedModel = DAG->getSchedModel();
3751   TRI = DAG->TRI;
3752 
3753   Rem.init(DAG, SchedModel);
3754   Top.init(DAG, SchedModel, &Rem);
3755   BotRoots.clear();
3756 
3757   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3758   // or are disabled, then these HazardRecs will be disabled.
3759   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3760   if (!Top.HazardRec) {
3761     Top.HazardRec =
3762         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3763             Itin, DAG);
3764   }
3765 }
3766 
3767 void PostGenericScheduler::registerRoots() {
3768   Rem.CriticalPath = DAG->ExitSU.getDepth();
3769 
3770   // Some roots may not feed into ExitSU. Check all of them in case.
3771   for (const SUnit *SU : BotRoots) {
3772     if (SU->getDepth() > Rem.CriticalPath)
3773       Rem.CriticalPath = SU->getDepth();
3774   }
3775   LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3776   if (DumpCriticalPathLength) {
3777     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3778   }
3779 }
3780 
3781 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3782 ///
3783 /// \param Cand provides the policy and current best candidate.
3784 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3785 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3786 bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3787                                         SchedCandidate &TryCand) {
3788   // Initialize the candidate if needed.
3789   if (!Cand.isValid()) {
3790     TryCand.Reason = NodeOrder;
3791     return true;
3792   }
3793 
3794   // Prioritize instructions that read unbuffered resources by stall cycles.
3795   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3796               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3797     return TryCand.Reason != NoCand;
3798 
3799   // Keep clustered nodes together.
3800   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3801                  Cand.SU == DAG->getNextClusterSucc(),
3802                  TryCand, Cand, Cluster))
3803     return TryCand.Reason != NoCand;
3804 
3805   // Avoid critical resource consumption and balance the schedule.
3806   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3807               TryCand, Cand, ResourceReduce))
3808     return TryCand.Reason != NoCand;
3809   if (tryGreater(TryCand.ResDelta.DemandedResources,
3810                  Cand.ResDelta.DemandedResources,
3811                  TryCand, Cand, ResourceDemand))
3812     return TryCand.Reason != NoCand;
3813 
3814   // Avoid serializing long latency dependence chains.
3815   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3816     return TryCand.Reason != NoCand;
3817   }
3818 
3819   // Fall through to original instruction order.
3820   if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3821     TryCand.Reason = NodeOrder;
3822     return true;
3823   }
3824 
3825   return false;
3826 }
3827 
3828 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3829   ReadyQueue &Q = Top.Available;
3830   for (SUnit *SU : Q) {
3831     SchedCandidate TryCand(Cand.Policy);
3832     TryCand.SU = SU;
3833     TryCand.AtTop = true;
3834     TryCand.initResourceDelta(DAG, SchedModel);
3835     if (tryCandidate(Cand, TryCand)) {
3836       Cand.setBest(TryCand);
3837       LLVM_DEBUG(traceCandidate(Cand));
3838     }
3839   }
3840 }
3841 
3842 /// Pick the next node to schedule.
3843 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3844   if (DAG->top() == DAG->bottom()) {
3845     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3846     return nullptr;
3847   }
3848   SUnit *SU;
3849   do {
3850     SU = Top.pickOnlyChoice();
3851     if (SU) {
3852       tracePick(Only1, true);
3853     } else {
3854       CandPolicy NoPolicy;
3855       SchedCandidate TopCand(NoPolicy);
3856       // Set the top-down policy based on the state of the current top zone and
3857       // the instructions outside the zone, including the bottom zone.
3858       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3859       pickNodeFromQueue(TopCand);
3860       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3861       tracePick(TopCand);
3862       SU = TopCand.SU;
3863     }
3864   } while (SU->isScheduled);
3865 
3866   IsTopNode = true;
3867   Top.removeReady(SU);
3868 
3869   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3870                     << *SU->getInstr());
3871   return SU;
3872 }
3873 
3874 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3875 /// scheduled/remaining flags in the DAG nodes.
3876 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3877   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3878   Top.bumpNode(SU);
3879 }
3880 
3881 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3882   return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3883                            /*RemoveKillFlags=*/true);
3884 }
3885 
3886 //===----------------------------------------------------------------------===//
3887 // ILP Scheduler. Currently for experimental analysis of heuristics.
3888 //===----------------------------------------------------------------------===//
3889 
3890 namespace {
3891 
3892 /// Order nodes by the ILP metric.
3893 struct ILPOrder {
3894   const SchedDFSResult *DFSResult = nullptr;
3895   const BitVector *ScheduledTrees = nullptr;
3896   bool MaximizeILP;
3897 
3898   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3899 
3900   /// Apply a less-than relation on node priority.
3901   ///
3902   /// (Return true if A comes after B in the Q.)
3903   bool operator()(const SUnit *A, const SUnit *B) const {
3904     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3905     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3906     if (SchedTreeA != SchedTreeB) {
3907       // Unscheduled trees have lower priority.
3908       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3909         return ScheduledTrees->test(SchedTreeB);
3910 
3911       // Trees with shallower connections have have lower priority.
3912       if (DFSResult->getSubtreeLevel(SchedTreeA)
3913           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3914         return DFSResult->getSubtreeLevel(SchedTreeA)
3915           < DFSResult->getSubtreeLevel(SchedTreeB);
3916       }
3917     }
3918     if (MaximizeILP)
3919       return DFSResult->getILP(A) < DFSResult->getILP(B);
3920     else
3921       return DFSResult->getILP(A) > DFSResult->getILP(B);
3922   }
3923 };
3924 
3925 /// Schedule based on the ILP metric.
3926 class ILPScheduler : public MachineSchedStrategy {
3927   ScheduleDAGMILive *DAG = nullptr;
3928   ILPOrder Cmp;
3929 
3930   std::vector<SUnit*> ReadyQ;
3931 
3932 public:
3933   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3934 
3935   void initialize(ScheduleDAGMI *dag) override {
3936     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3937     DAG = static_cast<ScheduleDAGMILive*>(dag);
3938     DAG->computeDFSResult();
3939     Cmp.DFSResult = DAG->getDFSResult();
3940     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3941     ReadyQ.clear();
3942   }
3943 
3944   void registerRoots() override {
3945     // Restore the heap in ReadyQ with the updated DFS results.
3946     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3947   }
3948 
3949   /// Implement MachineSchedStrategy interface.
3950   /// -----------------------------------------
3951 
3952   /// Callback to select the highest priority node from the ready Q.
3953   SUnit *pickNode(bool &IsTopNode) override {
3954     if (ReadyQ.empty()) return nullptr;
3955     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3956     SUnit *SU = ReadyQ.back();
3957     ReadyQ.pop_back();
3958     IsTopNode = false;
3959     LLVM_DEBUG(dbgs() << "Pick node "
3960                       << "SU(" << SU->NodeNum << ") "
3961                       << " ILP: " << DAG->getDFSResult()->getILP(SU)
3962                       << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3963                       << " @"
3964                       << DAG->getDFSResult()->getSubtreeLevel(
3965                              DAG->getDFSResult()->getSubtreeID(SU))
3966                       << '\n'
3967                       << "Scheduling " << *SU->getInstr());
3968     return SU;
3969   }
3970 
3971   /// Scheduler callback to notify that a new subtree is scheduled.
3972   void scheduleTree(unsigned SubtreeID) override {
3973     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3974   }
3975 
3976   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3977   /// DFSResults, and resort the priority Q.
3978   void schedNode(SUnit *SU, bool IsTopNode) override {
3979     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3980   }
3981 
3982   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3983 
3984   void releaseBottomNode(SUnit *SU) override {
3985     ReadyQ.push_back(SU);
3986     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3987   }
3988 };
3989 
3990 } // end anonymous namespace
3991 
3992 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3993   return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3994 }
3995 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3996   return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3997 }
3998 
3999 static MachineSchedRegistry ILPMaxRegistry(
4000   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4001 static MachineSchedRegistry ILPMinRegistry(
4002   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4003 
4004 //===----------------------------------------------------------------------===//
4005 // Machine Instruction Shuffler for Correctness Testing
4006 //===----------------------------------------------------------------------===//
4007 
4008 #ifndef NDEBUG
4009 namespace {
4010 
4011 /// Apply a less-than relation on the node order, which corresponds to the
4012 /// instruction order prior to scheduling. IsReverse implements greater-than.
4013 template<bool IsReverse>
4014 struct SUnitOrder {
4015   bool operator()(SUnit *A, SUnit *B) const {
4016     if (IsReverse)
4017       return A->NodeNum > B->NodeNum;
4018     else
4019       return A->NodeNum < B->NodeNum;
4020   }
4021 };
4022 
4023 /// Reorder instructions as much as possible.
4024 class InstructionShuffler : public MachineSchedStrategy {
4025   bool IsAlternating;
4026   bool IsTopDown;
4027 
4028   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4029   // gives nodes with a higher number higher priority causing the latest
4030   // instructions to be scheduled first.
4031   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4032     TopQ;
4033 
4034   // When scheduling bottom-up, use greater-than as the queue priority.
4035   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4036     BottomQ;
4037 
4038 public:
4039   InstructionShuffler(bool alternate, bool topdown)
4040     : IsAlternating(alternate), IsTopDown(topdown) {}
4041 
4042   void initialize(ScheduleDAGMI*) override {
4043     TopQ.clear();
4044     BottomQ.clear();
4045   }
4046 
4047   /// Implement MachineSchedStrategy interface.
4048   /// -----------------------------------------
4049 
4050   SUnit *pickNode(bool &IsTopNode) override {
4051     SUnit *SU;
4052     if (IsTopDown) {
4053       do {
4054         if (TopQ.empty()) return nullptr;
4055         SU = TopQ.top();
4056         TopQ.pop();
4057       } while (SU->isScheduled);
4058       IsTopNode = true;
4059     } else {
4060       do {
4061         if (BottomQ.empty()) return nullptr;
4062         SU = BottomQ.top();
4063         BottomQ.pop();
4064       } while (SU->isScheduled);
4065       IsTopNode = false;
4066     }
4067     if (IsAlternating)
4068       IsTopDown = !IsTopDown;
4069     return SU;
4070   }
4071 
4072   void schedNode(SUnit *SU, bool IsTopNode) override {}
4073 
4074   void releaseTopNode(SUnit *SU) override {
4075     TopQ.push(SU);
4076   }
4077   void releaseBottomNode(SUnit *SU) override {
4078     BottomQ.push(SU);
4079   }
4080 };
4081 
4082 } // end anonymous namespace
4083 
4084 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
4085   bool Alternate = !ForceTopDown && !ForceBottomUp;
4086   bool TopDown = !ForceBottomUp;
4087   assert((TopDown || !ForceTopDown) &&
4088          "-misched-topdown incompatible with -misched-bottomup");
4089   return new ScheduleDAGMILive(
4090       C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4091 }
4092 
4093 static MachineSchedRegistry ShufflerRegistry(
4094   "shuffle", "Shuffle machine instructions alternating directions",
4095   createInstructionShuffler);
4096 #endif // !NDEBUG
4097 
4098 //===----------------------------------------------------------------------===//
4099 // GraphWriter support for ScheduleDAGMILive.
4100 //===----------------------------------------------------------------------===//
4101 
4102 #ifndef NDEBUG
4103 namespace llvm {
4104 
4105 template<> struct GraphTraits<
4106   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
4107 
4108 template<>
4109 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
4110   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
4111 
4112   static std::string getGraphName(const ScheduleDAG *G) {
4113     return std::string(G->MF.getName());
4114   }
4115 
4116   static bool renderGraphFromBottomUp() {
4117     return true;
4118   }
4119 
4120   static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4121     if (ViewMISchedCutoff == 0)
4122       return false;
4123     return (Node->Preds.size() > ViewMISchedCutoff
4124          || Node->Succs.size() > ViewMISchedCutoff);
4125   }
4126 
4127   /// If you want to override the dot attributes printed for a particular
4128   /// edge, override this method.
4129   static std::string getEdgeAttributes(const SUnit *Node,
4130                                        SUnitIterator EI,
4131                                        const ScheduleDAG *Graph) {
4132     if (EI.isArtificialDep())
4133       return "color=cyan,style=dashed";
4134     if (EI.isCtrlDep())
4135       return "color=blue,style=dashed";
4136     return "";
4137   }
4138 
4139   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4140     std::string Str;
4141     raw_string_ostream SS(Str);
4142     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4143     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4144       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4145     SS << "SU:" << SU->NodeNum;
4146     if (DFS)
4147       SS << " I:" << DFS->getNumInstrs(SU);
4148     return SS.str();
4149   }
4150 
4151   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4152     return G->getGraphNodeLabel(SU);
4153   }
4154 
4155   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4156     std::string Str("shape=Mrecord");
4157     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4158     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4159       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4160     if (DFS) {
4161       Str += ",style=filled,fillcolor=\"#";
4162       Str += DOT::getColorString(DFS->getSubtreeID(N));
4163       Str += '"';
4164     }
4165     return Str;
4166   }
4167 };
4168 
4169 } // end namespace llvm
4170 #endif // NDEBUG
4171 
4172 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4173 /// rendered using 'dot'.
4174 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4175 #ifndef NDEBUG
4176   ViewGraph(this, Name, false, Title);
4177 #else
4178   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4179          << "systems with Graphviz or gv!\n";
4180 #endif  // NDEBUG
4181 }
4182 
4183 /// Out-of-line implementation with no arguments is handy for gdb.
4184 void ScheduleDAGMI::viewGraph() {
4185   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4186 }
4187 
4188 /// Sort predicate for the intervals stored in an instance of
4189 /// ResourceSegments. Intervals are always disjoint (no intersection
4190 /// for any pairs of intervals), therefore we can sort the totality of
4191 /// the intervals by looking only at the left boundary.
4192 static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4193                           const ResourceSegments::IntervalTy &B) {
4194   return A.first < B.first;
4195 }
4196 
4197 unsigned ResourceSegments::getFirstAvailableAt(
4198     unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
4199     std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4200         IntervalBuilder) const {
4201   assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4202                         sortIntervals) &&
4203          "Cannot execute on an un-sorted set of intervals.");
4204   unsigned RetCycle = CurrCycle;
4205   ResourceSegments::IntervalTy NewInterval =
4206       IntervalBuilder(RetCycle, StartAtCycle, Cycle);
4207   for (auto &Interval : _Intervals) {
4208     if (!intersects(NewInterval, Interval))
4209       continue;
4210 
4211     // Move the interval right next to the top of the one it
4212     // intersects.
4213     assert(Interval.second > NewInterval.first &&
4214            "Invalid intervals configuration.");
4215     RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4216     NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle);
4217   }
4218   return RetCycle;
4219 }
4220 
4221 void ResourceSegments::add(ResourceSegments::IntervalTy A,
4222                            const unsigned CutOff) {
4223   assert(A.first < A.second && "Cannot add empty resource usage");
4224   assert(CutOff > 0 && "0-size interval history has no use.");
4225   assert(all_of(_Intervals,
4226                 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4227                   return !intersects(A, Interval);
4228                 }) &&
4229          "A resource is being overwritten");
4230   _Intervals.push_back(A);
4231 
4232   sortAndMerge();
4233 
4234   // Do not keep the full history of the intervals, just the
4235   // latest #CutOff.
4236   while (_Intervals.size() > CutOff)
4237     _Intervals.pop_front();
4238 }
4239 
4240 bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4241                                   ResourceSegments::IntervalTy B) {
4242   assert(A.first <= A.second && "Invalid interval");
4243   assert(B.first <= B.second && "Invalid interval");
4244 
4245   // Share one boundary.
4246   if ((A.first == B.first) || (A.second == B.second))
4247     return true;
4248 
4249   // full intersersect: [    ***     )  B
4250   //                        [***)       A
4251   if ((A.first > B.first) && (A.second < B.second))
4252     return true;
4253 
4254   // right intersect: [     ***)        B
4255   //                       [***      )  A
4256   if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4257     return true;
4258 
4259   // left intersect:      [***      )  B
4260   //                 [     ***)        A
4261   if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4262     return true;
4263 
4264   return false;
4265 }
4266 
4267 void ResourceSegments::sortAndMerge() {
4268   if (_Intervals.size() <= 1)
4269     return;
4270 
4271   // First sort the collection.
4272   _Intervals.sort(sortIntervals);
4273 
4274   // can use next because I have at least 2 elements in the list
4275   auto next = std::next(std::begin(_Intervals));
4276   auto E = std::end(_Intervals);
4277   for (; next != E; ++next) {
4278     if (std::prev(next)->second >= next->first) {
4279       next->first = std::prev(next)->first;
4280       _Intervals.erase(std::prev(next));
4281       continue;
4282     }
4283   }
4284 }
4285