xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
17330f729Sjoerg //===- GCNIterativeScheduler.cpp ------------------------------------------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
8*82d56013Sjoerg ///
9*82d56013Sjoerg /// \file
10*82d56013Sjoerg /// This file implements the class GCNIterativeScheduler.
11*82d56013Sjoerg ///
12*82d56013Sjoerg //===----------------------------------------------------------------------===//
137330f729Sjoerg 
147330f729Sjoerg #include "GCNIterativeScheduler.h"
157330f729Sjoerg #include "GCNSchedStrategy.h"
167330f729Sjoerg #include "SIMachineFunctionInfo.h"
177330f729Sjoerg 
187330f729Sjoerg using namespace llvm;
197330f729Sjoerg 
207330f729Sjoerg #define DEBUG_TYPE "machine-scheduler"
217330f729Sjoerg 
227330f729Sjoerg namespace llvm {
237330f729Sjoerg 
247330f729Sjoerg std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots,
257330f729Sjoerg                                               const ScheduleDAG &DAG);
267330f729Sjoerg 
277330f729Sjoerg   std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
287330f729Sjoerg     const ScheduleDAG &DAG);
297330f729Sjoerg }
307330f729Sjoerg 
317330f729Sjoerg // shim accessors for different order containers
getMachineInstr(MachineInstr * MI)327330f729Sjoerg static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
337330f729Sjoerg   return MI;
347330f729Sjoerg }
getMachineInstr(const SUnit * SU)357330f729Sjoerg static inline MachineInstr *getMachineInstr(const SUnit *SU) {
367330f729Sjoerg   return SU->getInstr();
377330f729Sjoerg }
getMachineInstr(const SUnit & SU)387330f729Sjoerg static inline MachineInstr *getMachineInstr(const SUnit &SU) {
397330f729Sjoerg   return SU.getInstr();
407330f729Sjoerg }
417330f729Sjoerg 
427330f729Sjoerg #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
437330f729Sjoerg LLVM_DUMP_METHOD
printRegion(raw_ostream & OS,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,const LiveIntervals * LIS,unsigned MaxInstNum=std::numeric_limits<unsigned>::max ())447330f729Sjoerg static void printRegion(raw_ostream &OS,
457330f729Sjoerg                         MachineBasicBlock::iterator Begin,
467330f729Sjoerg                         MachineBasicBlock::iterator End,
477330f729Sjoerg                         const LiveIntervals *LIS,
487330f729Sjoerg                         unsigned MaxInstNum =
497330f729Sjoerg                           std::numeric_limits<unsigned>::max()) {
507330f729Sjoerg   auto BB = Begin->getParent();
517330f729Sjoerg   OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' '
527330f729Sjoerg      << BB->getName() << ":\n";
537330f729Sjoerg   auto I = Begin;
547330f729Sjoerg   MaxInstNum = std::max(MaxInstNum, 1u);
557330f729Sjoerg   for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
567330f729Sjoerg     if (!I->isDebugInstr() && LIS)
577330f729Sjoerg       OS << LIS->getInstructionIndex(*I);
587330f729Sjoerg     OS << '\t' << *I;
597330f729Sjoerg   }
607330f729Sjoerg   if (I != End) {
617330f729Sjoerg     OS << "\t...\n";
627330f729Sjoerg     I = std::prev(End);
637330f729Sjoerg     if (!I->isDebugInstr() && LIS)
647330f729Sjoerg       OS << LIS->getInstructionIndex(*I);
657330f729Sjoerg     OS << '\t' << *I;
667330f729Sjoerg   }
677330f729Sjoerg   if (End != BB->end()) { // print boundary inst if present
687330f729Sjoerg     OS << "----\n";
697330f729Sjoerg     if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
707330f729Sjoerg     OS << *End;
717330f729Sjoerg   }
727330f729Sjoerg }
737330f729Sjoerg 
747330f729Sjoerg LLVM_DUMP_METHOD
printLivenessInfo(raw_ostream & OS,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,const LiveIntervals * LIS)757330f729Sjoerg static void printLivenessInfo(raw_ostream &OS,
767330f729Sjoerg                               MachineBasicBlock::iterator Begin,
777330f729Sjoerg                               MachineBasicBlock::iterator End,
787330f729Sjoerg                               const LiveIntervals *LIS) {
797330f729Sjoerg   const auto BB = Begin->getParent();
807330f729Sjoerg   const auto &MRI = BB->getParent()->getRegInfo();
817330f729Sjoerg 
827330f729Sjoerg   const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
837330f729Sjoerg   OS << "LIn RP: ";
847330f729Sjoerg   getRegPressure(MRI, LiveIns).print(OS);
857330f729Sjoerg 
867330f729Sjoerg   const auto BottomMI = End == BB->end() ? std::prev(End) : End;
877330f729Sjoerg   const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
887330f729Sjoerg   OS << "LOt RP: ";
897330f729Sjoerg   getRegPressure(MRI, LiveOuts).print(OS);
907330f729Sjoerg }
917330f729Sjoerg 
927330f729Sjoerg LLVM_DUMP_METHOD
printRegions(raw_ostream & OS) const937330f729Sjoerg void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
947330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
957330f729Sjoerg   for (const auto R : Regions) {
967330f729Sjoerg     OS << "Region to schedule ";
977330f729Sjoerg     printRegion(OS, R->Begin, R->End, LIS, 1);
987330f729Sjoerg     printLivenessInfo(OS, R->Begin, R->End, LIS);
997330f729Sjoerg     OS << "Max RP: ";
1007330f729Sjoerg     R->MaxPressure.print(OS, &ST);
1017330f729Sjoerg   }
1027330f729Sjoerg }
1037330f729Sjoerg 
1047330f729Sjoerg LLVM_DUMP_METHOD
printSchedResult(raw_ostream & OS,const Region * R,const GCNRegPressure & RP) const1057330f729Sjoerg void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
1067330f729Sjoerg                                              const Region *R,
1077330f729Sjoerg                                              const GCNRegPressure &RP) const {
1087330f729Sjoerg   OS << "\nAfter scheduling ";
1097330f729Sjoerg   printRegion(OS, R->Begin, R->End, LIS);
1107330f729Sjoerg   printSchedRP(OS, R->MaxPressure, RP);
1117330f729Sjoerg   OS << '\n';
1127330f729Sjoerg }
1137330f729Sjoerg 
1147330f729Sjoerg LLVM_DUMP_METHOD
printSchedRP(raw_ostream & OS,const GCNRegPressure & Before,const GCNRegPressure & After) const1157330f729Sjoerg void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
1167330f729Sjoerg                                          const GCNRegPressure &Before,
1177330f729Sjoerg                                          const GCNRegPressure &After) const {
1187330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
1197330f729Sjoerg   OS << "RP before: ";
1207330f729Sjoerg   Before.print(OS, &ST);
1217330f729Sjoerg   OS << "RP after:  ";
1227330f729Sjoerg   After.print(OS, &ST);
1237330f729Sjoerg }
1247330f729Sjoerg #endif
1257330f729Sjoerg 
1267330f729Sjoerg // DAG builder helper
1277330f729Sjoerg class GCNIterativeScheduler::BuildDAG {
1287330f729Sjoerg   GCNIterativeScheduler &Sch;
1297330f729Sjoerg   SmallVector<SUnit *, 8> TopRoots;
1307330f729Sjoerg 
1317330f729Sjoerg   SmallVector<SUnit*, 8> BotRoots;
1327330f729Sjoerg public:
BuildDAG(const Region & R,GCNIterativeScheduler & _Sch)1337330f729Sjoerg   BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
1347330f729Sjoerg     : Sch(_Sch) {
1357330f729Sjoerg     auto BB = R.Begin->getParent();
1367330f729Sjoerg     Sch.BaseClass::startBlock(BB);
1377330f729Sjoerg     Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
1387330f729Sjoerg 
1397330f729Sjoerg     Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
1407330f729Sjoerg                         /*TrackLaneMask*/true);
1417330f729Sjoerg     Sch.Topo.InitDAGTopologicalSorting();
1427330f729Sjoerg     Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
1437330f729Sjoerg   }
1447330f729Sjoerg 
~BuildDAG()1457330f729Sjoerg   ~BuildDAG() {
1467330f729Sjoerg     Sch.BaseClass::exitRegion();
1477330f729Sjoerg     Sch.BaseClass::finishBlock();
1487330f729Sjoerg   }
1497330f729Sjoerg 
getTopRoots() const1507330f729Sjoerg   ArrayRef<const SUnit *> getTopRoots() const {
1517330f729Sjoerg     return TopRoots;
1527330f729Sjoerg   }
getBottomRoots() const1537330f729Sjoerg   ArrayRef<SUnit*> getBottomRoots() const {
1547330f729Sjoerg     return BotRoots;
1557330f729Sjoerg   }
1567330f729Sjoerg };
1577330f729Sjoerg 
1587330f729Sjoerg class GCNIterativeScheduler::OverrideLegacyStrategy {
1597330f729Sjoerg   GCNIterativeScheduler &Sch;
1607330f729Sjoerg   Region &Rgn;
1617330f729Sjoerg   std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
1627330f729Sjoerg   GCNRegPressure SaveMaxRP;
1637330f729Sjoerg 
1647330f729Sjoerg public:
OverrideLegacyStrategy(Region & R,MachineSchedStrategy & OverrideStrategy,GCNIterativeScheduler & _Sch)1657330f729Sjoerg   OverrideLegacyStrategy(Region &R,
1667330f729Sjoerg                          MachineSchedStrategy &OverrideStrategy,
1677330f729Sjoerg                          GCNIterativeScheduler &_Sch)
1687330f729Sjoerg     : Sch(_Sch)
1697330f729Sjoerg     , Rgn(R)
1707330f729Sjoerg     , SaveSchedImpl(std::move(_Sch.SchedImpl))
1717330f729Sjoerg     , SaveMaxRP(R.MaxPressure) {
1727330f729Sjoerg     Sch.SchedImpl.reset(&OverrideStrategy);
1737330f729Sjoerg     auto BB = R.Begin->getParent();
1747330f729Sjoerg     Sch.BaseClass::startBlock(BB);
1757330f729Sjoerg     Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
1767330f729Sjoerg   }
1777330f729Sjoerg 
~OverrideLegacyStrategy()1787330f729Sjoerg   ~OverrideLegacyStrategy() {
1797330f729Sjoerg     Sch.BaseClass::exitRegion();
1807330f729Sjoerg     Sch.BaseClass::finishBlock();
1817330f729Sjoerg     Sch.SchedImpl.release();
1827330f729Sjoerg     Sch.SchedImpl = std::move(SaveSchedImpl);
1837330f729Sjoerg   }
1847330f729Sjoerg 
schedule()1857330f729Sjoerg   void schedule() {
1867330f729Sjoerg     assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
1877330f729Sjoerg     LLVM_DEBUG(dbgs() << "\nScheduling ";
1887330f729Sjoerg                printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
1897330f729Sjoerg     Sch.BaseClass::schedule();
1907330f729Sjoerg 
1917330f729Sjoerg     // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
1927330f729Sjoerg     Sch.RegionEnd = Rgn.End;
1937330f729Sjoerg     //assert(Rgn.End == Sch.RegionEnd);
1947330f729Sjoerg     Rgn.Begin = Sch.RegionBegin;
1957330f729Sjoerg     Rgn.MaxPressure.clear();
1967330f729Sjoerg   }
1977330f729Sjoerg 
restoreOrder()1987330f729Sjoerg   void restoreOrder() {
1997330f729Sjoerg     assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
2007330f729Sjoerg     // DAG SUnits are stored using original region's order
2017330f729Sjoerg     // so just use SUnits as the restoring schedule
2027330f729Sjoerg     Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
2037330f729Sjoerg   }
2047330f729Sjoerg };
2057330f729Sjoerg 
2067330f729Sjoerg namespace {
2077330f729Sjoerg 
2087330f729Sjoerg // just a stub to make base class happy
2097330f729Sjoerg class SchedStrategyStub : public MachineSchedStrategy {
2107330f729Sjoerg public:
shouldTrackPressure() const2117330f729Sjoerg   bool shouldTrackPressure() const override { return false; }
shouldTrackLaneMasks() const2127330f729Sjoerg   bool shouldTrackLaneMasks() const override { return false; }
initialize(ScheduleDAGMI * DAG)2137330f729Sjoerg   void initialize(ScheduleDAGMI *DAG) override {}
pickNode(bool & IsTopNode)2147330f729Sjoerg   SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
schedNode(SUnit * SU,bool IsTopNode)2157330f729Sjoerg   void schedNode(SUnit *SU, bool IsTopNode) override {}
releaseTopNode(SUnit * SU)2167330f729Sjoerg   void releaseTopNode(SUnit *SU) override {}
releaseBottomNode(SUnit * SU)2177330f729Sjoerg   void releaseBottomNode(SUnit *SU) override {}
2187330f729Sjoerg };
2197330f729Sjoerg 
2207330f729Sjoerg } // end anonymous namespace
2217330f729Sjoerg 
GCNIterativeScheduler(MachineSchedContext * C,StrategyKind S)2227330f729Sjoerg GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
2237330f729Sjoerg                                              StrategyKind S)
2247330f729Sjoerg   : BaseClass(C, std::make_unique<SchedStrategyStub>())
2257330f729Sjoerg   , Context(C)
2267330f729Sjoerg   , Strategy(S)
2277330f729Sjoerg   , UPTracker(*LIS) {
2287330f729Sjoerg }
2297330f729Sjoerg 
2307330f729Sjoerg // returns max pressure for a region
2317330f729Sjoerg GCNRegPressure
getRegionPressure(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End) const2327330f729Sjoerg GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
2337330f729Sjoerg                                          MachineBasicBlock::iterator End)
2347330f729Sjoerg   const {
2357330f729Sjoerg   // For the purpose of pressure tracking bottom inst of the region should
2367330f729Sjoerg   // be also processed. End is either BB end, BB terminator inst or sched
2377330f729Sjoerg   // boundary inst.
2387330f729Sjoerg   auto const BBEnd = Begin->getParent()->end();
2397330f729Sjoerg   auto const BottomMI = End == BBEnd ? std::prev(End) : End;
2407330f729Sjoerg 
2417330f729Sjoerg   // scheduleRegions walks bottom to top, so its likely we just get next
2427330f729Sjoerg   // instruction to track
2437330f729Sjoerg   auto AfterBottomMI = std::next(BottomMI);
2447330f729Sjoerg   if (AfterBottomMI == BBEnd ||
2457330f729Sjoerg       &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
2467330f729Sjoerg     UPTracker.reset(*BottomMI);
2477330f729Sjoerg   } else {
2487330f729Sjoerg     assert(UPTracker.isValid());
2497330f729Sjoerg   }
2507330f729Sjoerg 
2517330f729Sjoerg   for (auto I = BottomMI; I != Begin; --I)
2527330f729Sjoerg     UPTracker.recede(*I);
2537330f729Sjoerg 
2547330f729Sjoerg   UPTracker.recede(*Begin);
2557330f729Sjoerg 
2567330f729Sjoerg   assert(UPTracker.isValid() ||
2577330f729Sjoerg          (dbgs() << "Tracked region ",
2587330f729Sjoerg           printRegion(dbgs(), Begin, End, LIS), false));
2597330f729Sjoerg   return UPTracker.moveMaxPressure();
2607330f729Sjoerg }
2617330f729Sjoerg 
2627330f729Sjoerg // returns max pressure for a tentative schedule
2637330f729Sjoerg template <typename Range> GCNRegPressure
getSchedulePressure(const Region & R,Range && Schedule) const2647330f729Sjoerg GCNIterativeScheduler::getSchedulePressure(const Region &R,
2657330f729Sjoerg                                            Range &&Schedule) const {
2667330f729Sjoerg   auto const BBEnd = R.Begin->getParent()->end();
2677330f729Sjoerg   GCNUpwardRPTracker RPTracker(*LIS);
2687330f729Sjoerg   if (R.End != BBEnd) {
2697330f729Sjoerg     // R.End points to the boundary instruction but the
2707330f729Sjoerg     // schedule doesn't include it
2717330f729Sjoerg     RPTracker.reset(*R.End);
2727330f729Sjoerg     RPTracker.recede(*R.End);
2737330f729Sjoerg   } else {
2747330f729Sjoerg     // R.End doesn't point to the boundary instruction
2757330f729Sjoerg     RPTracker.reset(*std::prev(BBEnd));
2767330f729Sjoerg   }
2777330f729Sjoerg   for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
2787330f729Sjoerg     RPTracker.recede(*getMachineInstr(*--I));
2797330f729Sjoerg   }
2807330f729Sjoerg   return RPTracker.moveMaxPressure();
2817330f729Sjoerg }
2827330f729Sjoerg 
enterRegion(MachineBasicBlock * BB,MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)2837330f729Sjoerg void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
2847330f729Sjoerg                                         MachineBasicBlock::iterator Begin,
2857330f729Sjoerg                                         MachineBasicBlock::iterator End,
2867330f729Sjoerg                                         unsigned NumRegionInstrs) {
2877330f729Sjoerg   BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
2887330f729Sjoerg   if (NumRegionInstrs > 2) {
2897330f729Sjoerg     Regions.push_back(
2907330f729Sjoerg       new (Alloc.Allocate())
2917330f729Sjoerg       Region { Begin, End, NumRegionInstrs,
2927330f729Sjoerg                getRegionPressure(Begin, End), nullptr });
2937330f729Sjoerg   }
2947330f729Sjoerg }
2957330f729Sjoerg 
schedule()2967330f729Sjoerg void GCNIterativeScheduler::schedule() { // overriden
2977330f729Sjoerg   // do nothing
2987330f729Sjoerg   LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
2997330f729Sjoerg              if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
3007330f729Sjoerg                dbgs() << "Max RP: ";
3017330f729Sjoerg                Regions.back()->MaxPressure.print(
3027330f729Sjoerg                    dbgs(), &MF.getSubtarget<GCNSubtarget>());
3037330f729Sjoerg              } dbgs()
3047330f729Sjoerg              << '\n';);
3057330f729Sjoerg }
3067330f729Sjoerg 
finalizeSchedule()3077330f729Sjoerg void GCNIterativeScheduler::finalizeSchedule() { // overriden
3087330f729Sjoerg   if (Regions.empty())
3097330f729Sjoerg     return;
3107330f729Sjoerg   switch (Strategy) {
3117330f729Sjoerg   case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
3127330f729Sjoerg   case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
3137330f729Sjoerg   case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
3147330f729Sjoerg   case SCHEDULE_ILP: scheduleILP(false); break;
3157330f729Sjoerg   }
3167330f729Sjoerg }
3177330f729Sjoerg 
3187330f729Sjoerg // Detach schedule from SUnits and interleave it with debug values.
3197330f729Sjoerg // Returned schedule becomes independent of DAG state.
3207330f729Sjoerg std::vector<MachineInstr*>
detachSchedule(ScheduleRef Schedule) const3217330f729Sjoerg GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
3227330f729Sjoerg   std::vector<MachineInstr*> Res;
3237330f729Sjoerg   Res.reserve(Schedule.size() * 2);
3247330f729Sjoerg 
3257330f729Sjoerg   if (FirstDbgValue)
3267330f729Sjoerg     Res.push_back(FirstDbgValue);
3277330f729Sjoerg 
3287330f729Sjoerg   const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
3297330f729Sjoerg   for (auto SU : Schedule) {
3307330f729Sjoerg     Res.push_back(SU->getInstr());
3317330f729Sjoerg     const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
3327330f729Sjoerg       return P.second == SU->getInstr();
3337330f729Sjoerg     });
3347330f729Sjoerg     if (D != DbgE)
3357330f729Sjoerg       Res.push_back(D->first);
3367330f729Sjoerg   }
3377330f729Sjoerg   return Res;
3387330f729Sjoerg }
3397330f729Sjoerg 
setBestSchedule(Region & R,ScheduleRef Schedule,const GCNRegPressure & MaxRP)3407330f729Sjoerg void GCNIterativeScheduler::setBestSchedule(Region &R,
3417330f729Sjoerg                                             ScheduleRef Schedule,
3427330f729Sjoerg                                             const GCNRegPressure &MaxRP) {
3437330f729Sjoerg   R.BestSchedule.reset(
3447330f729Sjoerg     new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
3457330f729Sjoerg }
3467330f729Sjoerg 
scheduleBest(Region & R)3477330f729Sjoerg void GCNIterativeScheduler::scheduleBest(Region &R) {
3487330f729Sjoerg   assert(R.BestSchedule.get() && "No schedule specified");
3497330f729Sjoerg   scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
3507330f729Sjoerg   R.BestSchedule.reset();
3517330f729Sjoerg }
3527330f729Sjoerg 
3537330f729Sjoerg // minimal required region scheduler, works for ranges of SUnits*,
3547330f729Sjoerg // SUnits or MachineIntrs*
3557330f729Sjoerg template <typename Range>
scheduleRegion(Region & R,Range && Schedule,const GCNRegPressure & MaxRP)3567330f729Sjoerg void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
3577330f729Sjoerg                                            const GCNRegPressure &MaxRP) {
3587330f729Sjoerg   assert(RegionBegin == R.Begin && RegionEnd == R.End);
3597330f729Sjoerg   assert(LIS != nullptr);
3607330f729Sjoerg #ifndef NDEBUG
3617330f729Sjoerg   const auto SchedMaxRP = getSchedulePressure(R, Schedule);
3627330f729Sjoerg #endif
3637330f729Sjoerg   auto BB = R.Begin->getParent();
3647330f729Sjoerg   auto Top = R.Begin;
3657330f729Sjoerg   for (const auto &I : Schedule) {
3667330f729Sjoerg     auto MI = getMachineInstr(I);
3677330f729Sjoerg     if (MI != &*Top) {
3687330f729Sjoerg       BB->remove(MI);
3697330f729Sjoerg       BB->insert(Top, MI);
3707330f729Sjoerg       if (!MI->isDebugInstr())
3717330f729Sjoerg         LIS->handleMove(*MI, true);
3727330f729Sjoerg     }
3737330f729Sjoerg     if (!MI->isDebugInstr()) {
3747330f729Sjoerg       // Reset read - undef flags and update them later.
3757330f729Sjoerg       for (auto &Op : MI->operands())
3767330f729Sjoerg         if (Op.isReg() && Op.isDef())
3777330f729Sjoerg           Op.setIsUndef(false);
3787330f729Sjoerg 
3797330f729Sjoerg       RegisterOperands RegOpers;
3807330f729Sjoerg       RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
3817330f729Sjoerg                                        /*IgnoreDead*/false);
3827330f729Sjoerg       // Adjust liveness and add missing dead+read-undef flags.
3837330f729Sjoerg       auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
3847330f729Sjoerg       RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
3857330f729Sjoerg     }
3867330f729Sjoerg     Top = std::next(MI->getIterator());
3877330f729Sjoerg   }
3887330f729Sjoerg   RegionBegin = getMachineInstr(Schedule.front());
3897330f729Sjoerg 
3907330f729Sjoerg   // Schedule consisting of MachineInstr* is considered 'detached'
3917330f729Sjoerg   // and already interleaved with debug values
3927330f729Sjoerg   if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
3937330f729Sjoerg     placeDebugValues();
3947330f729Sjoerg     // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
3957330f729Sjoerg     //assert(R.End == RegionEnd);
3967330f729Sjoerg     RegionEnd = R.End;
3977330f729Sjoerg   }
3987330f729Sjoerg 
3997330f729Sjoerg   R.Begin = RegionBegin;
4007330f729Sjoerg   R.MaxPressure = MaxRP;
4017330f729Sjoerg 
4027330f729Sjoerg #ifndef NDEBUG
4037330f729Sjoerg   const auto RegionMaxRP = getRegionPressure(R);
4047330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
4057330f729Sjoerg #endif
4067330f729Sjoerg   assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
4077330f729Sjoerg   || (dbgs() << "Max RP mismatch!!!\n"
4087330f729Sjoerg                 "RP for schedule (calculated): ",
4097330f729Sjoerg       SchedMaxRP.print(dbgs(), &ST),
4107330f729Sjoerg       dbgs() << "RP for schedule (reported): ",
4117330f729Sjoerg       MaxRP.print(dbgs(), &ST),
4127330f729Sjoerg       dbgs() << "RP after scheduling: ",
4137330f729Sjoerg       RegionMaxRP.print(dbgs(), &ST),
4147330f729Sjoerg       false));
4157330f729Sjoerg }
4167330f729Sjoerg 
4177330f729Sjoerg // Sort recorded regions by pressure - highest at the front
sortRegionsByPressure(unsigned TargetOcc)4187330f729Sjoerg void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
4197330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
4207330f729Sjoerg   llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
4217330f729Sjoerg     return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
4227330f729Sjoerg   });
4237330f729Sjoerg }
4247330f729Sjoerg 
4257330f729Sjoerg ///////////////////////////////////////////////////////////////////////////////
4267330f729Sjoerg // Legacy MaxOccupancy Strategy
4277330f729Sjoerg 
4287330f729Sjoerg // Tries to increase occupancy applying minreg scheduler for a sequence of
4297330f729Sjoerg // most demanding regions. Obtained schedules are saved as BestSchedule for a
4307330f729Sjoerg // region.
4317330f729Sjoerg // TargetOcc is the best achievable occupancy for a kernel.
4327330f729Sjoerg // Returns better occupancy on success or current occupancy on fail.
4337330f729Sjoerg // BestSchedules aren't deleted on fail.
tryMaximizeOccupancy(unsigned TargetOcc)4347330f729Sjoerg unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
4357330f729Sjoerg   // TODO: assert Regions are sorted descending by pressure
4367330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
4377330f729Sjoerg   const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
4387330f729Sjoerg   LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
4397330f729Sjoerg                     << ", current = " << Occ << '\n');
4407330f729Sjoerg 
4417330f729Sjoerg   auto NewOcc = TargetOcc;
4427330f729Sjoerg   for (auto R : Regions) {
4437330f729Sjoerg     if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
4447330f729Sjoerg       break;
4457330f729Sjoerg 
4467330f729Sjoerg     LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
4477330f729Sjoerg                printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
4487330f729Sjoerg 
4497330f729Sjoerg     BuildDAG DAG(*R, *this);
4507330f729Sjoerg     const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
4517330f729Sjoerg     const auto MaxRP = getSchedulePressure(*R, MinSchedule);
4527330f729Sjoerg     LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
4537330f729Sjoerg                printSchedRP(dbgs(), R->MaxPressure, MaxRP));
4547330f729Sjoerg 
4557330f729Sjoerg     NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
4567330f729Sjoerg     if (NewOcc <= Occ)
4577330f729Sjoerg       break;
4587330f729Sjoerg 
4597330f729Sjoerg     setBestSchedule(*R, MinSchedule, MaxRP);
4607330f729Sjoerg   }
4617330f729Sjoerg   LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
4627330f729Sjoerg                     << ", prev occupancy = " << Occ << '\n');
4637330f729Sjoerg   if (NewOcc > Occ) {
4647330f729Sjoerg     SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
4657330f729Sjoerg     MFI->increaseOccupancy(MF, NewOcc);
4667330f729Sjoerg   }
4677330f729Sjoerg 
4687330f729Sjoerg   return std::max(NewOcc, Occ);
4697330f729Sjoerg }
4707330f729Sjoerg 
scheduleLegacyMaxOccupancy(bool TryMaximizeOccupancy)4717330f729Sjoerg void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
4727330f729Sjoerg   bool TryMaximizeOccupancy) {
4737330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
4747330f729Sjoerg   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
4757330f729Sjoerg   auto TgtOcc = MFI->getMinAllowedOccupancy();
4767330f729Sjoerg 
4777330f729Sjoerg   sortRegionsByPressure(TgtOcc);
4787330f729Sjoerg   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
4797330f729Sjoerg 
4807330f729Sjoerg   if (TryMaximizeOccupancy && Occ < TgtOcc)
4817330f729Sjoerg     Occ = tryMaximizeOccupancy(TgtOcc);
4827330f729Sjoerg 
4837330f729Sjoerg   // This is really weird but for some magic scheduling regions twice
4847330f729Sjoerg   // gives performance improvement
4857330f729Sjoerg   const int NumPasses = Occ < TgtOcc ? 2 : 1;
4867330f729Sjoerg 
4877330f729Sjoerg   TgtOcc = std::min(Occ, TgtOcc);
4887330f729Sjoerg   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
4897330f729Sjoerg                        "target occupancy = "
4907330f729Sjoerg                     << TgtOcc << '\n');
4917330f729Sjoerg   GCNMaxOccupancySchedStrategy LStrgy(Context);
4927330f729Sjoerg   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
4937330f729Sjoerg 
4947330f729Sjoerg   for (int I = 0; I < NumPasses; ++I) {
4957330f729Sjoerg     // running first pass with TargetOccupancy = 0 mimics previous scheduling
4967330f729Sjoerg     // approach and is a performance magic
4977330f729Sjoerg     LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
4987330f729Sjoerg     for (auto R : Regions) {
4997330f729Sjoerg       OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
5007330f729Sjoerg 
5017330f729Sjoerg       Ovr.schedule();
5027330f729Sjoerg       const auto RP = getRegionPressure(*R);
5037330f729Sjoerg       LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
5047330f729Sjoerg 
5057330f729Sjoerg       if (RP.getOccupancy(ST) < TgtOcc) {
5067330f729Sjoerg         LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
5077330f729Sjoerg         if (R->BestSchedule.get() &&
5087330f729Sjoerg             R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
5097330f729Sjoerg           LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
5107330f729Sjoerg           scheduleBest(*R);
5117330f729Sjoerg         } else {
5127330f729Sjoerg           LLVM_DEBUG(dbgs() << ", restoring\n");
5137330f729Sjoerg           Ovr.restoreOrder();
5147330f729Sjoerg           assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
5157330f729Sjoerg         }
5167330f729Sjoerg       }
5177330f729Sjoerg       FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
5187330f729Sjoerg     }
5197330f729Sjoerg   }
5207330f729Sjoerg   MFI->limitOccupancy(FinalOccupancy);
5217330f729Sjoerg }
5227330f729Sjoerg 
5237330f729Sjoerg ///////////////////////////////////////////////////////////////////////////////
5247330f729Sjoerg // Minimal Register Strategy
5257330f729Sjoerg 
scheduleMinReg(bool force)5267330f729Sjoerg void GCNIterativeScheduler::scheduleMinReg(bool force) {
5277330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
5287330f729Sjoerg   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5297330f729Sjoerg   const auto TgtOcc = MFI->getOccupancy();
5307330f729Sjoerg   sortRegionsByPressure(TgtOcc);
5317330f729Sjoerg 
5327330f729Sjoerg   auto MaxPressure = Regions.front()->MaxPressure;
5337330f729Sjoerg   for (auto R : Regions) {
5347330f729Sjoerg     if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
5357330f729Sjoerg       break;
5367330f729Sjoerg 
5377330f729Sjoerg     BuildDAG DAG(*R, *this);
5387330f729Sjoerg     const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
5397330f729Sjoerg 
5407330f729Sjoerg     const auto RP = getSchedulePressure(*R, MinSchedule);
5417330f729Sjoerg     LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
5427330f729Sjoerg       dbgs() << "\nWarning: Pressure becomes worse after minreg!";
5437330f729Sjoerg       printSchedRP(dbgs(), R->MaxPressure, RP);
5447330f729Sjoerg     });
5457330f729Sjoerg 
5467330f729Sjoerg     if (!force && MaxPressure.less(ST, RP, TgtOcc))
5477330f729Sjoerg       break;
5487330f729Sjoerg 
5497330f729Sjoerg     scheduleRegion(*R, MinSchedule, RP);
5507330f729Sjoerg     LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
5517330f729Sjoerg 
5527330f729Sjoerg     MaxPressure = RP;
5537330f729Sjoerg   }
5547330f729Sjoerg }
5557330f729Sjoerg 
5567330f729Sjoerg ///////////////////////////////////////////////////////////////////////////////
5577330f729Sjoerg // ILP scheduler port
5587330f729Sjoerg 
scheduleILP(bool TryMaximizeOccupancy)5597330f729Sjoerg void GCNIterativeScheduler::scheduleILP(
5607330f729Sjoerg   bool TryMaximizeOccupancy) {
5617330f729Sjoerg   const auto &ST = MF.getSubtarget<GCNSubtarget>();
5627330f729Sjoerg   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5637330f729Sjoerg   auto TgtOcc = MFI->getMinAllowedOccupancy();
5647330f729Sjoerg 
5657330f729Sjoerg   sortRegionsByPressure(TgtOcc);
5667330f729Sjoerg   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
5677330f729Sjoerg 
5687330f729Sjoerg   if (TryMaximizeOccupancy && Occ < TgtOcc)
5697330f729Sjoerg     Occ = tryMaximizeOccupancy(TgtOcc);
5707330f729Sjoerg 
5717330f729Sjoerg   TgtOcc = std::min(Occ, TgtOcc);
5727330f729Sjoerg   LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
5737330f729Sjoerg                        "target occupancy = "
5747330f729Sjoerg                     << TgtOcc << '\n');
5757330f729Sjoerg 
5767330f729Sjoerg   unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
5777330f729Sjoerg   for (auto R : Regions) {
5787330f729Sjoerg     BuildDAG DAG(*R, *this);
5797330f729Sjoerg     const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
5807330f729Sjoerg 
5817330f729Sjoerg     const auto RP = getSchedulePressure(*R, ILPSchedule);
5827330f729Sjoerg     LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
5837330f729Sjoerg 
5847330f729Sjoerg     if (RP.getOccupancy(ST) < TgtOcc) {
5857330f729Sjoerg       LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
5867330f729Sjoerg       if (R->BestSchedule.get() &&
5877330f729Sjoerg         R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
5887330f729Sjoerg         LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
5897330f729Sjoerg         scheduleBest(*R);
5907330f729Sjoerg       }
5917330f729Sjoerg     } else {
5927330f729Sjoerg       scheduleRegion(*R, ILPSchedule, RP);
5937330f729Sjoerg       LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
5947330f729Sjoerg       FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
5957330f729Sjoerg     }
5967330f729Sjoerg   }
5977330f729Sjoerg   MFI->limitOccupancy(FinalOccupancy);
5987330f729Sjoerg }
599