Lines Matching +full:max +full:- +full:reason

1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
51 #include "llvm/Config/llvm-config.h"
74 #define DEBUG_TYPE "machine-scheduler"
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81 cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83 cl::desc("Force bottom-up list scheduling"));
92 "misched-postra-direction", cl::Hidden,
93 cl::desc("Post reg-alloc list scheduling direction"),
94 // Default to top-down because it was implemented first and existing targets
99 "Force top-down post reg-alloc list scheduling"),
101 "Force bottom-up post reg-alloc list scheduling"),
103 "Force bidirectional post reg-alloc list scheduling")));
105 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
109 "verify-misched", cl::Hidden,
114 "view-misched-dags", cl::Hidden,
116 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
122 "misched-detail-resource-booking", cl::Hidden, cl::init(false),
138 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
141 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
144 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
146 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
152 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
155 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
158 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
161 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
204 //===----------------------------------------------------------------------===//
206 //===----------------------------------------------------------------------===//
339 "enable-misched",
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
348 /// Decrement this iterator until reaching the top or a non-debug instr.
353 while (--I != Beg) {
354 if (!I->isDebugOrPseudoInstr())
360 /// Non-const version.
369 /// non-debug instruction.
374 if (!I->isDebugOrPseudoInstr())
380 /// Non-const version.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
417 /// Top-level MachineScheduler pass driver.
420 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
422 /// scheduling regions bottom-up.
455 LLVM_DEBUG(LIS->dump());
456 MF->verify(this, "Before machine scheduling.");
458 RegClassInfo->runOnMachineFunction(*MF);
470 Scheduler->setDumpDirection(D);
473 LLVM_DEBUG(LIS->dump());
475 MF->verify(this, "After machine scheduling.");
487 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
490 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
499 MF->verify(this, "Before post machine scheduling.");
511 Scheduler->setDumpDirection(D);
515 MF->verify(this, "After post machine scheduling.");
533 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
540 /// RegionEnd is either MBB->end() or the scheduling boundary after the
560 MachineFunction *MF = MBB->getParent();
561 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
564 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565 RegionEnd != MBB->begin(); RegionEnd = I) {
568 if (RegionEnd != MBB->end() ||
570 --RegionEnd;
577 for (;I != MBB->begin(); --I) {
603 // TODO: Visit blocks in global postorder or postorder within the bottom-up
605 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
611 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
614 && (int)SchedOnlyBlock != MBB->getNumber())
622 // no terminator then RegionEnd == MBB->end() for the bottom region.
625 // will be processed (MBB) top-down if initialized with true.
651 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652 << " " << MBB->getName() << "\n From: " << *I
654 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
658 errs() << MF->getName();
659 errs() << ":%bb. " << MBB->getNumber();
660 errs() << " " << MBB->getName() << " \n";
688 dbgs() << SU->NodeNum << " ";
693 //===----------------------------------------------------------------------===//
694 // ScheduleDAGMI - Basic machine instruction scheduling. This is
695 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
697 // ===----------------------------------------------------------------------===/
702 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
707 SUnit *SuccSU = SuccEdge->getSUnit();
709 if (SuccEdge->isWeak()) {
710 --SuccSU->WeakPredsLeft;
711 if (SuccEdge->isCluster())
716 if (SuccSU->NumPredsLeft == 0) {
723 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
725 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
728 --SuccSU->NumPredsLeft;
729 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
730 SchedImpl->releaseTopNode(SuccSU);
733 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
735 for (SDep &Succ : SU->Succs)
739 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
744 SUnit *PredSU = PredEdge->getSUnit();
746 if (PredEdge->isWeak()) {
747 --PredSU->WeakSuccsLeft;
748 if (PredEdge->isCluster())
753 if (PredSU->NumSuccsLeft == 0) {
760 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
762 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
765 --PredSU->NumSuccsLeft;
766 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
767 SchedImpl->releaseBottomNode(PredSU);
770 /// releasePredecessors - Call releasePred on each of SU's predecessors.
772 for (SDep &Pred : SU->Preds)
778 SchedImpl->enterMBB(bb);
782 SchedImpl->leaveMBB();
786 /// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
788 /// in the region, including the boundary itself and single-instruction regions
797 SchedImpl->initPolicy(begin, end, regioninstrs);
809 BB->splice(InsertPos, BB, MI);
813 LIS->handleMove(*MI, /*UpdateFlags=*/true);
831 /// Per-region scheduling driver, called back from
837 LLVM_DEBUG(SchedImpl->dumpPolicy());
853 SchedImpl->initialize(this);
861 SUnit *SU = SchedImpl->pickNode(IsTopNode);
864 assert(!SU->isScheduled && "Node already scheduled");
868 MachineInstr *MI = SU->getInstr();
870 assert(SU->isTopReady() && "node still has unscheduled dependencies");
876 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
892 SchedImpl->schedNode(SU, IsTopNode);
902 << printMBBReference(*begin()->getParent()) << " ***\n";
911 m->apply(this);
944 SchedImpl->releaseTopNode(SU);
950 SchedImpl->releaseBottomNode(*I);
956 SchedImpl->registerRoots();
971 SU->isScheduled = true;
978 BB->splice(RegionBegin, BB, FirstDbgValue);
983 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
989 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
990 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
1004 if (BB->size() < 2)
1009 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
1010 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
1019 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1020 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1036 NodeName += std::to_string(SU->NodeNum) + ")";
1040 if (C == SU->TopReadyCycle)
1055 const MCWriteProcResEntry &RHS) -> bool {
1065 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1068 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1085 if (BB->size() < 2)
1091 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1092 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1101 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1102 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1107 for (int C = FirstCycle; C >= LastCycle; --C)
1118 NodeName += std::to_string(SU->NodeNum) + ")";
1121 for (; C >= LastCycle; --C) {
1122 if (C == (int)SU->BotReadyCycle)
1136 const MCWriteProcResEntry &RHS) -> bool {
1146 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1149 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150 ++I, --C)
1152 while (C-- >= LastCycle)
1184 //===----------------------------------------------------------------------===//
1185 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1187 //===----------------------------------------------------------------------===//
1207 // Ignore re-defs.
1223 if (UI->SU == &SU)
1231 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1233 /// the region, including the boundary itself and single-instruction regions
1240 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1244 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1248 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1249 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1291 // uses of the same vreg below the live-out reaching def.
1307 (RegionEnd->isDebugInstr() &&
1312 // the max pressure in the scheduled code for these sets.
1317 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1319 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1327 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1344 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1347 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1348 if (NewMaxPressure[ID] >= Limit - 2) {
1349 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1364 /// FIXME: Currently assuming single-use physregs.
1394 // instruction's live-out.
1395 const LiveInterval &LI = LIS->getInterval(Reg);
1398 nextIfDebug(BotRPTracker.getPos(), BB->end());
1399 if (I == BB->end())
1400 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1402 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1412 if (!SU->isScheduled && SU != &ExitSU) {
1414 LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1418 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419 << *SU->getInstr();
1451 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1463 LLVM_DEBUG(SchedImpl->dumpPolicy());
1473 SchedImpl->initialize(this);
1485 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1488 assert(!SU->isScheduled && "Node already scheduled");
1495 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1498 DFSResult->scheduleTree(SubtreeID);
1499 SchedImpl->scheduleTree(SubtreeID);
1504 SchedImpl->schedNode(SU, IsTopNode);
1514 << printMBBReference(*begin()->getParent()) << " ***\n";
1547 DFSResult->clear();
1549 DFSResult->resize(SUnits.size());
1550 DFSResult->compute(SUnits);
1551 ScheduledTrees.resize(DFSResult->getNumSubtrees());
1554 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1560 /// The cyclic path estimation identifies a def-use pair that crosses the back
1565 /// a->b(a,c)->c(b)->d(c)->exit
1567 /// The cyclic critical path is a two cycles: b->c->b
1568 /// The acyclic critical path is four cycles: a->b->c->d->exit
1569 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572 /// LiveInDepth = depth(b) = len(a->b) = 1
1574 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1582 if (!BB->isSuccessor(BB))
1591 const LiveInterval &LI = LIS->getInterval(Reg);
1592 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1596 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1601 unsigned LiveOutHeight = DefSU->getHeight();
1602 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1611 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1612 if (!LRQ.valueIn()->isPHIDef())
1619 if (LiveOutDepth > SU->getDepth())
1620 CyclicLatency = LiveOutDepth - SU->getDepth();
1622 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1624 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625 CyclicLatency = LiveInHeight - LiveOutHeight;
1629 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1639 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1653 MachineInstr *MI = SU->getInstr();
1656 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1670 // Adjust liveness and add missing dead+read-undef flags.
1671 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1674 // Adjust for missing dead-def flags.
1686 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1705 // Adjust liveness and add missing dead+read-undef flags.
1706 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1709 // Adjust for missing dead-def flags.
1727 //===----------------------------------------------------------------------===//
1728 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1729 //===----------------------------------------------------------------------===//
1733 /// Post-process the DAG to create cluster edges between neighboring
1750 if (A->getType() != B->getType())
1751 return A->getType() < B->getType();
1752 if (A->isReg())
1753 return A->getReg() < B->getReg();
1754 if (A->isFI()) {
1755 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1759 return StackGrowsDown ? A->getIndex() > B->getIndex()
1760 : A->getIndex() < B->getIndex();
1779 return SU->NodeNum < RHS.SU->NodeNum;
1846 // following load/store one by one, until reach the first non-dependent one and
1858 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1867 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1869 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1870 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1879 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1880 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1881 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1885 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
1893 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1897 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1900 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1901 << SUb->NodeNum << ")\n");
1909 for (const SDep &Succ : SUa->Succs) {
1912 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1914 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1918 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1923 for (const SDep &Pred : SUb->Preds) {
1926 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1928 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1932 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1944 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1945 (!IsLoad && !SU.getInstr()->mayStore()))
1953 if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1974 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1977 unsigned ChainPredID = DAG->SUnits.size();
1979 for (const SDep &Pred : MemOp.SU->Preds) {
1980 // We only want to cluster the mem ops that have the same ctrl(non-data)
1985 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1987 ChainPredID = Pred.getSUnit()->NodeNum;
2003 collectMemOpRecords(DAG->SUnits, MemOpRecords);
2024 //===----------------------------------------------------------------------===//
2025 // CopyConstrain - DAG post-processing to encourage copy elimination.
2026 //===----------------------------------------------------------------------===//
2030 /// Post-process the DAG to create weak edges from all uses of a copy to
2037 // RegionEndIdx is the slot index of the last non-debug instruction in the
2068 /// (create pred->succ edges I0->I1, I2->I1)
2075 /// (create pred->succ edges I1->I2, I3->I2)
2082 LiveIntervals *LIS = DAG->getLIS();
2083 MachineInstr *Copy = CopySU->getInstr();
2086 const MachineOperand &SrcOp = Copy->getOperand(1);
2091 const MachineOperand &DstOp = Copy->getOperand(0);
2104 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2105 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2108 LocalLI = &LIS->getInterval(LocalReg);
2109 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2112 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2115 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2116 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2120 if (GlobalSegment == GlobalLI->end())
2123 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2125 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2127 if (GlobalSegment->contains(LocalLI->beginIndex()))
2130 if (GlobalSegment == GlobalLI->end())
2134 if (GlobalSegment != GlobalLI->begin()) {
2136 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2137 GlobalSegment->start)) {
2140 // If the prior global segment may be defined by the same two-address
2142 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2143 LocalLI->beginIndex())) {
2148 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2151 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2155 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2162 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2163 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2164 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2165 for (const SDep &Succ : LastLocalSU->Succs) {
2170 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2178 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2179 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2180 for (const SDep &Pred : GlobalSU->Preds) {
2185 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2189 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2192 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2193 << GlobalSU->NodeNum << ")\n");
2194 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2197 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2198 << FirstLocalSU->NodeNum << ")\n");
2199 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2207 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2209 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2210 if (FirstPos == DAG->end())
2212 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2213 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2214 *priorNonDebug(DAG->end(), DAG->begin()));
2216 for (SUnit &SU : DAG->SUnits) {
2217 if (!SU.getInstr()->isCopy())
2224 //===----------------------------------------------------------------------===//
2227 //===----------------------------------------------------------------------===//
2239 int ResCntFactor = (int)(Count - (Latency * LFactor));
2250 if (HazardRec && HazardRec->isEnabled()) {
2259 MinReadyCycle = std::numeric_limits<unsigned>::max();
2276 // Reserve a zero-count for invalid CritResIdx.
2284 if (!SchedModel->hasInstrSchedModel())
2286 RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
2287 for (SUnit &SU : DAG->SUnits) {
2288 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2289 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2290 * SchedModel->getMicroOpFactor();
2292 PI = SchedModel->getWriteProcResBegin(SC),
2293 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2294 unsigned PIdx = PI->ProcResourceIdx;
2295 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2296 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2298 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2309 if (SchedModel->hasInstrSchedModel()) {
2310 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2318 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2320 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2321 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2333 /// resources and computed by checkHazard(). A fully in-order model
2335 /// available for scheduling until they are ready. However, a weaker in-order
2336 /// model may use this for heuristics. For example, if a processor has in-order
2339 if (!SU->isUnbuffered)
2342 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2344 return ReadyCycle - CurrCycle;
2353 if (SchedModel && SchedModel->enableIntervals()) {
2366 // For bottom-up scheduling add the cycles needed for the current operation.
2368 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2387 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2404 make_range(SchedModel->getWriteProcResBegin(SC),
2405 SchedModel->getWriteProcResEnd(SC)))
2411 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2429 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2437 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2438 << "[" << InstanceIdx - StartIndex << "]"
2448 /// supports highly complicated in-order reservation tables
2449 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2453 /// for instruction dispatch limitations, including the number of micro-ops that
2458 if (HazardRec->isEnabled()
2459 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2463 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2464 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2465 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2466 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2471 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2472 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2473 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2478 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2479 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2481 make_range(SchedModel->getWriteProcResBegin(SC),
2482 SchedModel->getWriteProcResEnd(SC))) {
2491 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2493 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2494 << SchedModel->getResourceName(ResIdx)
2495 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2518 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2524 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2529 if (!SchedModel->hasInstrSchedModel())
2532 unsigned OtherCritCount = Rem->RemIssueCount
2533 + (RetiredMOps * SchedModel->getMicroOpFactor());
2535 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2536 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2538 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2547 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2548 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2555 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2562 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2570 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2588 if (SchedModel->getMicroOpBufferSize() == 0) {
2589 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2594 // Update the current micro-ops, which will issue in the next cycle.
2595 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2596 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2599 if ((NextCycle - CurrCycle) > DependentLatency)
2602 DependentLatency -= (NextCycle - CurrCycle);
2604 if (!HazardRec->isEnabled()) {
2611 HazardRec->AdvanceCycle();
2613 HazardRec->RecedeCycle();
2618 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2633 /// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2636 /// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2645 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2646 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2647 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2652 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2653 Rem->RemainingCounts[PIdx] -= Count;
2660 << SchedModel->getResourceName(PIdx) << ": "
2661 << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2670 << SchedModel->getResourceName(PIdx)
2671 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2680 if (HazardRec->isEnabled()) {
2681 if (!isTop() && SU->isCall) {
2682 // Calls are scheduled with their preceding instructions. For bottom-up
2684 HazardRec->Reset();
2686 HazardRec->EmitInstruction(SU);
2692 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2693 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2695 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2698 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2702 switch (SchedModel->getMicroOpBufferSize()) {
2714 // scheduled MOps to be "retired". We do loosely model in-order resource
2715 // latency. If this instruction uses an in-order resource, account for any
2717 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2724 if (SchedModel->hasInstrSchedModel()) {
2725 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2726 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2727 Rem->RemIssueCount -= DecRemIssue;
2729 // Scale scheduled micro-ops for comparing with the critical resource.
2731 RetiredMOps * SchedModel->getMicroOpFactor();
2733 // If scaled micro-ops are now more than the previous critical resource by
2734 // a full cycle, then micro-ops issue becomes critical.
2735 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2736 >= (int)SchedModel->getLatencyFactor()) {
2739 << ScaledMOps / SchedModel->getLatencyFactor()
2744 PI = SchedModel->getWriteProcResBegin(SC),
2745 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2747 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2748 PI->AcquireAtCycle);
2752 if (SU->hasReservedResource) {
2754 // For top-down scheduling, this is the cycle in which we schedule this
2756 // resource. For bottom-up is it simply the instruction's cycle.
2758 PI = SchedModel->getWriteProcResBegin(SC),
2759 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2760 unsigned PIdx = PI->ProcResourceIdx;
2761 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2763 if (SchedModel && SchedModel->enableIntervals()) {
2766 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2770 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2775 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2782 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2785 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2796 if (SU->getDepth() > TopLatency) {
2797 TopLatency = SU->getDepth();
2799 << SU->NodeNum << ") " << TopLatency << "c\n");
2801 if (SU->getHeight() > BotLatency) {
2802 BotLatency = SU->getHeight();
2804 << SU->NodeNum << ") " << BotLatency << "c\n");
2806 // If we stall for any reason, bump the cycle.
2813 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2818 // one cycle. Since we commonly reach the max MOps here, opportunistically
2826 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2827 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2833 while (CurrMOps >= SchedModel->getIssueWidth()) {
2834 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2846 MinReadyCycle = std::numeric_limits<unsigned>::max();
2852 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2862 --I;
2863 --E;
2896 // FIXME: Re-enable assert once PR20057 is resolved.
2897 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2918 if (!SchedModel->hasInstrSchedModel())
2921 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2925 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2926 std::string ResName = SchedModel->getResourceName(ResIdx);
2929 if (SchedModel && SchedModel->enableIntervals()) {
2947 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2950 ResFactor = SchedModel->getMicroOpFactor();
2953 unsigned LFactor = SchedModel->getLatencyFactor();
2959 << SchedModel->getResourceName(ZoneCritResIdx)
2961 << (IsResourceLimited ? " - Resource" : " - Latency")
2968 //===----------------------------------------------------------------------===//
2969 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2970 //===----------------------------------------------------------------------===//
2978 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2980 PI = SchedModel->getWriteProcResBegin(SC),
2981 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2982 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2983 ResDelta.CritResources += PI->ReleaseAtCycle;
2984 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2985 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2990 /// overall schedule has become latency-limited and whether the instructions
2994 /// max height/depth of scheduled nodes minus the cycles since it was
2996 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2998 /// The "independent" latency is the max ready queue depth:
2999 /// ILat = max N.depth for N in Available|Pending
3007 RemLatency = std::max(RemLatency,
3009 RemLatency = std::max(RemLatency,
3047 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3052 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3055 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
3060 // acyclic latency during PostRA, and highly out-of-order processors will
3077 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3080 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3093 GenericSchedulerBase::CandReason Reason) {
3094 switch (Reason) {
3097 case PhysReg: return "PHYS-REG ";
3098 case RegExcess: return "REG-EXCESS";
3099 case RegCritical: return "REG-CRIT ";
3103 case RegMax: return "REG-MAX ";
3104 case ResourceReduce: return "RES-REDUCE";
3105 case ResourceDemand: return "RES-DEMAND";
3106 case TopDepthReduce: return "TOP-DEPTH ";
3107 case TopPathReduce: return "TOP-PATH ";
3108 case BotHeightReduce:return "BOT-HEIGHT";
3109 case BotPathReduce: return "BOT-PATH ";
3110 case NextDefUse: return "DEF-USE ";
3113 llvm_unreachable("Unknown reason!");
3120 switch (Cand.Reason) {
3139 Latency = Cand.SU->getDepth();
3142 Latency = Cand.SU->getHeight();
3145 Latency = Cand.SU->getHeight();
3148 Latency = Cand.SU->getDepth();
3151 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
3153 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
3158 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
3176 GenericSchedulerBase::CandReason Reason) {
3178 TryCand.Reason = Reason;
3182 if (Cand.Reason > Reason)
3183 Cand.Reason = Reason;
3192 GenericSchedulerBase::CandReason Reason) {
3194 TryCand.Reason = Reason;
3198 if (Cand.Reason > Reason)
3199 Cand.Reason = Reason;
3212 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3214 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3218 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3225 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3227 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3231 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3239 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3241 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3245 tracePick(Cand.Reason, Cand.AtTop);
3249 assert(dag->hasVRegLiveness() &&
3252 SchedModel = DAG->getSchedModel();
3253 TRI = DAG->TRI;
3256 DAG->computeDFSResult();
3266 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3268 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3271 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3277 /// Initialize the per-region scheduling policy.
3281 const MachineFunction &MF = *Begin->getMF();
3289 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3291 if (TLI->isTypeLegal(LegalIntVT)) {
3292 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3293 TLI->getRegClassFor(LegalIntVT));
3299 // For generic targets, we default to bottom-up, because it's simpler and more
3300 // compile-time optimizations have been implemented in that direction.
3312 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3313 // e.g. -misched-bottomup=false allows scheduling in both directions.
3315 "-misched-topdown incompatible with -misched-bottomup");
3341 /// We estimate an upper bounds on in-flight instructions as:
3343 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3354 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3357 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3360 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3362 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3368 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3369 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3370 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3371 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3372 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3377 Rem.CriticalPath = DAG->ExitSU.getDepth();
3381 if (SU->getDepth() > Rem.CriticalPath)
3382 Rem.CriticalPath = SU->getDepth();
3384 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3386 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3389 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3390 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3400 GenericSchedulerBase::CandReason Reason,
3406 Reason)) {
3420 Reason);
3423 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3424 std::numeric_limits<int>::max();
3426 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3427 std::numeric_limits<int>::max();
3432 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3436 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3447 const MachineInstr *MI = SU->getInstr();
3449 if (MI->isCopy()) {
3454 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3458 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3459 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3460 return AtBoundary ? -1 : 1;
3463 if (MI->isMoveImmediate()) {
3468 for (const MachineOperand &Op : MI->defs()) {
3476 return isTop ? -1 : 1;
3489 if (DAG->isTrackingPressure()) {
3492 Cand.SU->getInstr(),
3494 DAG->getRegionCriticalPSets(),
3495 DAG->getRegPressure().MaxSetPressure);
3499 Cand.SU->getInstr(),
3500 &DAG->getPressureDiff(Cand.SU),
3502 DAG->getRegionCriticalPSets(),
3503 DAG->getRegPressure().MaxSetPressure);
3506 Cand.SU->getInstr(),
3507 DAG->getPressureDiff(Cand.SU),
3509 DAG->getRegionCriticalPSets(),
3510 DAG->getRegPressure().MaxSetPressure);
3515 << " Try SU(" << Cand.SU->NodeNum << ") "
3516 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3530 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3536 TryCand.Reason = NodeOrder;
3543 return TryCand.Reason != NoCand;
3546 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3549 DAG->MF))
3550 return TryCand.Reason != NoCand;
3552 // Avoid increasing the max critical pressure in the scheduled region.
3553 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3556 DAG->MF))
3557 return TryCand.Reason != NoCand;
3563 // "tie-breaking" in nature.
3569 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3571 return TryCand.Reason != NoCand;
3574 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3575 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3576 return TryCand.Reason != NoCand;
3582 // This is a best effort to set things up for a post-RA pass. Optimizations
3586 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3588 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3592 return TryCand.Reason != NoCand;
3599 return TryCand.Reason != NoCand;
3602 // Avoid increasing the max pressure of the entire region.
3603 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3606 DAG->MF))
3607 return TryCand.Reason != NoCand;
3614 return TryCand.Reason != NoCand;
3618 return TryCand.Reason != NoCand;
3624 return TryCand.Reason != NoCand;
3627 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3628 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3629 TryCand.Reason = NodeOrder;
3641 /// maintain the number of vreg uses remaining to be top-scheduled.
3680 // Set the bottom-up policy based on the state of the current bottom zone and
3684 // Set the top-down policy based on the state of the current top zone and
3691 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3694 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3695 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3702 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3704 "Last pick result should correspond to re-picking right now");
3711 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3714 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3715 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3722 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3724 "Last pick result should correspond to re-picking right now");
3733 TopCand.Reason = NoCand;
3746 if (DAG->top() == DAG->bottom()) {
3758 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3759 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3769 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3770 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3778 } while (SU->isScheduled);
3794 // removed so it does not get re-picked in a subsequent pickNode call.
3795 if (SU->isTopReady())
3797 if (SU->isBottomReady())
3800 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3801 << *SU->getInstr());
3806 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3809 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3818 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3820 MachineInstr *Copy = DepSU->getInstr();
3821 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3824 DAG->dumpNode(*Dep.getSUnit()));
3825 DAG->moveInstruction(Copy, InsertPos);
3838 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3840 if (SU->hasPhysRegUses)
3843 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3845 if (SU->hasPhysRegDefs)
3855 // Register DAG post-processors.
3860 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3862 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3866 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
3878 //===----------------------------------------------------------------------===//
3879 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3880 //===----------------------------------------------------------------------===//
3884 SchedModel = DAG->getSchedModel();
3885 TRI = DAG->TRI;
3893 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3895 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3898 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3918 Rem.CriticalPath = DAG->ExitSU.getDepth();
3922 if (SU->getDepth() > Rem.CriticalPath)
3923 Rem.CriticalPath = SU->getDepth();
3925 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3927 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3935 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3940 TryCand.Reason = NodeOrder;
3947 return TryCand.Reason != NoCand;
3950 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3951 Cand.SU == DAG->getNextClusterSucc(),
3953 return TryCand.Reason != NoCand;
3958 return TryCand.Reason != NoCand;
3962 return TryCand.Reason != NoCand;
3966 return TryCand.Reason != NoCand;
3970 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3971 TryCand.Reason = NodeOrder;
4010 // Set the bottom-up policy based on the state of the current bottom zone and
4014 // Set the top-down policy based on the state of the current top zone and
4021 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4025 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4034 "Last pick result should correspond to re-picking right now");
4041 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4045 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4054 "Last pick result should correspond to re-picking right now");
4063 TopCand.Reason = NoCand;
4076 if (DAG->top() == DAG->bottom()) {
4090 // Set the bottom-up policy based on the state of the current bottom
4094 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4106 // Set the top-down policy based on the state of the current top zone
4110 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4118 } while (SU->isScheduled);
4120 if (SU->isTopReady())
4122 if (SU->isBottomReady())
4125 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4126 << *SU->getInstr());
4134 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
4137 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
4146 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4150 DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
4154 //===----------------------------------------------------------------------===//
4156 //===----------------------------------------------------------------------===//
4168 /// Apply a less-than relation on node priority.
4172 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
4173 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
4176 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
4177 return ScheduledTrees->test(SchedTreeB);
4180 if (DFSResult->getSubtreeLevel(SchedTreeA)
4181 != DFSResult->getSubtreeLevel(SchedTreeB)) {
4182 return DFSResult->getSubtreeLevel(SchedTreeA)
4183 < DFSResult->getSubtreeLevel(SchedTreeB);
4187 return DFSResult->getILP(A) < DFSResult->getILP(B);
4189 return DFSResult->getILP(A) > DFSResult->getILP(B);
4204 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4206 DAG->computeDFSResult();
4207 Cmp.DFSResult = DAG->getDFSResult();
4208 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4218 /// -----------------------------------------
4228 << "SU(" << SU->NodeNum << ") "
4229 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4230 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4232 << DAG->getDFSResult()->getSubtreeLevel(
4233 DAG->getDFSResult()->getSubtreeID(SU))
4235 << "Scheduling " << *SU->getInstr());
4247 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4268 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4270 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4272 //===----------------------------------------------------------------------===//
4274 //===----------------------------------------------------------------------===//
4279 /// Apply a less-than relation on the node order, which corresponds to the
4280 /// instruction order prior to scheduling. IsReverse implements greater-than.
4285 return A->NodeNum > B->NodeNum;
4287 return A->NodeNum < B->NodeNum;
4296 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4302 // When scheduling bottom-up, use greater-than as the queue priority.
4316 /// -----------------------------------------
4325 } while (SU->isScheduled);
4332 } while (SU->isScheduled);
4356 "-misched-topdown incompatible with -misched-bottomup");
4366 //===----------------------------------------------------------------------===//
4368 //===----------------------------------------------------------------------===//
4381 return std::string(G->MF.getName());
4391 return (Node->Preds.size() > ViewMISchedCutoff
4392 || Node->Succs.size() > ViewMISchedCutoff);
4411 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4412 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4413 SS << "SU:" << SU->NodeNum;
4415 SS << " I:" << DFS->getNumInstrs(SU);
4420 return G->getGraphNodeLabel(SU);
4426 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4427 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4430 Str += DOT::getColorString(DFS->getSubtreeID(N));
4440 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4451 /// Out-of-line implementation with no arguments is handy for gdb.
4453 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4471 "Cannot execute on an un-sorted set of intervals.");
4489 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4498 assert(CutOff > 0 && "0-size interval history has no use.");
4508 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4560 if (std::prev(next)->second >= next->first) {
4561 next->first = std::prev(next)->first;