xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp (revision 6c4b055cfb6bf549e9145dde6454cc6b178c35e4)
10fca6ea1SDimitry Andric //======----------- WindowScheduler.cpp - window scheduler -------------======//
20fca6ea1SDimitry Andric //
30fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60fca6ea1SDimitry Andric //
70fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
80fca6ea1SDimitry Andric //
90fca6ea1SDimitry Andric // An implementation of the Window Scheduling software pipelining algorithm.
100fca6ea1SDimitry Andric //
110fca6ea1SDimitry Andric // The fundamental concept of the window scheduling algorithm involves folding
120fca6ea1SDimitry Andric // the original MBB at a specific position, followed by list scheduling on the
130fca6ea1SDimitry Andric // folded MIs. The optimal scheduling result is then chosen from various folding
140fca6ea1SDimitry Andric // positions as the final scheduling outcome.
150fca6ea1SDimitry Andric //
160fca6ea1SDimitry Andric // The primary challenge in this algorithm lies in generating the folded MIs and
170fca6ea1SDimitry Andric // establishing their dependencies. We have innovatively employed a new MBB,
180fca6ea1SDimitry Andric // created by copying the original MBB three times, known as TripleMBB. This
190fca6ea1SDimitry Andric // TripleMBB enables the convenient implementation of MI folding and dependency
200fca6ea1SDimitry Andric // establishment. To facilitate the algorithm's implementation, we have also
210fca6ea1SDimitry Andric // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
220fca6ea1SDimitry Andric //
230fca6ea1SDimitry Andric // Another challenge in the algorithm is the scheduling of phis. Semantically,
240fca6ea1SDimitry Andric // it is difficult to place the phis in the window and perform list scheduling.
250fca6ea1SDimitry Andric // Therefore, we schedule these phis separately after each list scheduling.
260fca6ea1SDimitry Andric //
270fca6ea1SDimitry Andric // The provided implementation is designed for use before the Register Allocator
280fca6ea1SDimitry Andric // (RA). If the target requires implementation after RA, it is recommended to
290fca6ea1SDimitry Andric // reimplement analyseII(), schedulePhi(), and expand(). Additionally,
300fca6ea1SDimitry Andric // target-specific logic can be added in initialize(), preProcess(), and
310fca6ea1SDimitry Andric // postProcess().
320fca6ea1SDimitry Andric //
330fca6ea1SDimitry Andric // Lastly, it is worth mentioning that getSearchIndexes() is an important
340fca6ea1SDimitry Andric // function. We have experimented with more complex heuristics on downstream
350fca6ea1SDimitry Andric // target and achieved favorable results.
360fca6ea1SDimitry Andric //
370fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
380fca6ea1SDimitry Andric #include "llvm/CodeGen/WindowScheduler.h"
390fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h"
400fca6ea1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
410fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
420fca6ea1SDimitry Andric #include "llvm/CodeGen/MachinePipeliner.h"
430fca6ea1SDimitry Andric #include "llvm/CodeGen/ModuloSchedule.h"
440fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
450fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h"
460fca6ea1SDimitry Andric #include "llvm/Support/Debug.h"
470fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h"
480fca6ea1SDimitry Andric 
490fca6ea1SDimitry Andric using namespace llvm;
500fca6ea1SDimitry Andric 
510fca6ea1SDimitry Andric #define DEBUG_TYPE "pipeliner"
520fca6ea1SDimitry Andric 
530fca6ea1SDimitry Andric namespace {
540fca6ea1SDimitry Andric STATISTIC(NumTryWindowSchedule,
550fca6ea1SDimitry Andric           "Number of loops that we attempt to use window scheduling");
560fca6ea1SDimitry Andric STATISTIC(NumTryWindowSearch,
570fca6ea1SDimitry Andric           "Number of times that we run list schedule in the window scheduling");
580fca6ea1SDimitry Andric STATISTIC(NumWindowSchedule,
590fca6ea1SDimitry Andric           "Number of loops that we successfully use window scheduling");
600fca6ea1SDimitry Andric STATISTIC(NumFailAnalyseII,
610fca6ea1SDimitry Andric           "Window scheduling abort due to the failure of the II analysis");
620fca6ea1SDimitry Andric 
630fca6ea1SDimitry Andric cl::opt<unsigned>
640fca6ea1SDimitry Andric     WindowSearchNum("window-search-num",
650fca6ea1SDimitry Andric                     cl::desc("The number of searches per loop in the window "
660fca6ea1SDimitry Andric                              "algorithm. 0 means no search number limit."),
670fca6ea1SDimitry Andric                     cl::Hidden, cl::init(6));
680fca6ea1SDimitry Andric 
690fca6ea1SDimitry Andric cl::opt<unsigned> WindowSearchRatio(
700fca6ea1SDimitry Andric     "window-search-ratio",
710fca6ea1SDimitry Andric     cl::desc("The ratio of searches per loop in the window algorithm. 100 "
720fca6ea1SDimitry Andric              "means search all positions in the loop, while 0 means not "
730fca6ea1SDimitry Andric              "performing any search."),
740fca6ea1SDimitry Andric     cl::Hidden, cl::init(40));
750fca6ea1SDimitry Andric 
760fca6ea1SDimitry Andric cl::opt<unsigned> WindowIICoeff(
770fca6ea1SDimitry Andric     "window-ii-coeff",
780fca6ea1SDimitry Andric     cl::desc(
790fca6ea1SDimitry Andric         "The coefficient used when initializing II in the window algorithm."),
800fca6ea1SDimitry Andric     cl::Hidden, cl::init(5));
810fca6ea1SDimitry Andric 
820fca6ea1SDimitry Andric cl::opt<unsigned> WindowRegionLimit(
830fca6ea1SDimitry Andric     "window-region-limit",
840fca6ea1SDimitry Andric     cl::desc(
850fca6ea1SDimitry Andric         "The lower limit of the scheduling region in the window algorithm."),
860fca6ea1SDimitry Andric     cl::Hidden, cl::init(3));
870fca6ea1SDimitry Andric 
880fca6ea1SDimitry Andric cl::opt<unsigned> WindowDiffLimit(
890fca6ea1SDimitry Andric     "window-diff-limit",
900fca6ea1SDimitry Andric     cl::desc("The lower limit of the difference between best II and base II in "
910fca6ea1SDimitry Andric              "the window algorithm. If the difference is smaller than "
920fca6ea1SDimitry Andric              "this lower limit, window scheduling will not be performed."),
930fca6ea1SDimitry Andric     cl::Hidden, cl::init(2));
940fca6ea1SDimitry Andric } // namespace
950fca6ea1SDimitry Andric 
960fca6ea1SDimitry Andric // WindowIILimit serves as an indicator of abnormal scheduling results and could
970fca6ea1SDimitry Andric // potentially be referenced by the derived target window scheduler.
980fca6ea1SDimitry Andric cl::opt<unsigned>
990fca6ea1SDimitry Andric     WindowIILimit("window-ii-limit",
1000fca6ea1SDimitry Andric                   cl::desc("The upper limit of II in the window algorithm."),
1010fca6ea1SDimitry Andric                   cl::Hidden, cl::init(1000));
1020fca6ea1SDimitry Andric 
1030fca6ea1SDimitry Andric WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
1040fca6ea1SDimitry Andric     : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
1050fca6ea1SDimitry Andric       Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
1060fca6ea1SDimitry Andric       TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
1070fca6ea1SDimitry Andric   TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
1080fca6ea1SDimitry Andric       createMachineScheduler(/*OnlyBuildGraph=*/true));
1090fca6ea1SDimitry Andric }
1100fca6ea1SDimitry Andric 
1110fca6ea1SDimitry Andric bool WindowScheduler::run() {
1120fca6ea1SDimitry Andric   if (!initialize()) {
1130fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
1140fca6ea1SDimitry Andric     return false;
1150fca6ea1SDimitry Andric   }
1160fca6ea1SDimitry Andric   // The window algorithm is time-consuming, and its compilation time should be
1170fca6ea1SDimitry Andric   // taken into consideration.
1180fca6ea1SDimitry Andric   TimeTraceScope Scope("WindowSearch");
1190fca6ea1SDimitry Andric   ++NumTryWindowSchedule;
1200fca6ea1SDimitry Andric   // Performing the relevant processing before window scheduling.
1210fca6ea1SDimitry Andric   preProcess();
1220fca6ea1SDimitry Andric   // The main window scheduling begins.
1230fca6ea1SDimitry Andric   std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
1240fca6ea1SDimitry Andric   auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
1250fca6ea1SDimitry Andric   for (unsigned Idx : SearchIndexes) {
1260fca6ea1SDimitry Andric     OriToCycle.clear();
1270fca6ea1SDimitry Andric     ++NumTryWindowSearch;
1280fca6ea1SDimitry Andric     // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
1290fca6ea1SDimitry Andric     // be added to Idx.
1300fca6ea1SDimitry Andric     unsigned Offset = Idx + SchedPhiNum;
1310fca6ea1SDimitry Andric     auto Range = getScheduleRange(Offset, SchedInstrNum);
1320fca6ea1SDimitry Andric     SchedDAG->startBlock(MBB);
1330fca6ea1SDimitry Andric     SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
1340fca6ea1SDimitry Andric     SchedDAG->schedule();
1350fca6ea1SDimitry Andric     LLVM_DEBUG(SchedDAG->dump());
1360fca6ea1SDimitry Andric     unsigned II = analyseII(*SchedDAG, Offset);
1370fca6ea1SDimitry Andric     if (II == WindowIILimit) {
1380fca6ea1SDimitry Andric       restoreTripleMBB();
1390fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
1400fca6ea1SDimitry Andric       ++NumFailAnalyseII;
1410fca6ea1SDimitry Andric       continue;
1420fca6ea1SDimitry Andric     }
1430fca6ea1SDimitry Andric     schedulePhi(Offset, II);
1440fca6ea1SDimitry Andric     updateScheduleResult(Offset, II);
1450fca6ea1SDimitry Andric     restoreTripleMBB();
1460fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
1470fca6ea1SDimitry Andric                       << II << ".\n");
1480fca6ea1SDimitry Andric   }
1490fca6ea1SDimitry Andric   // Performing the relevant processing after window scheduling.
1500fca6ea1SDimitry Andric   postProcess();
1510fca6ea1SDimitry Andric   // Check whether the scheduling result is valid.
1520fca6ea1SDimitry Andric   if (!isScheduleValid()) {
1530fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
1540fca6ea1SDimitry Andric     return false;
1550fca6ea1SDimitry Andric   }
1560fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
1570fca6ea1SDimitry Andric                     << " and Best II is " << BestII << ".\n");
1580fca6ea1SDimitry Andric   // Expand the scheduling result to prologue, kernel, and epilogue.
1590fca6ea1SDimitry Andric   expand();
1600fca6ea1SDimitry Andric   ++NumWindowSchedule;
1610fca6ea1SDimitry Andric   return true;
1620fca6ea1SDimitry Andric }
1630fca6ea1SDimitry Andric 
1640fca6ea1SDimitry Andric ScheduleDAGInstrs *
1650fca6ea1SDimitry Andric WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
1660fca6ea1SDimitry Andric   return OnlyBuildGraph
1670fca6ea1SDimitry Andric              ? new ScheduleDAGMI(
1680fca6ea1SDimitry Andric                    Context, std::make_unique<PostGenericScheduler>(Context),
1690fca6ea1SDimitry Andric                    true)
1700fca6ea1SDimitry Andric              : Context->PassConfig->createMachineScheduler(Context);
1710fca6ea1SDimitry Andric }
1720fca6ea1SDimitry Andric 
1730fca6ea1SDimitry Andric bool WindowScheduler::initialize() {
1740fca6ea1SDimitry Andric   if (!Subtarget->enableWindowScheduler()) {
1750fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
1760fca6ea1SDimitry Andric     return false;
1770fca6ea1SDimitry Andric   }
1780fca6ea1SDimitry Andric   // Initialized the member variables used by window algorithm.
1790fca6ea1SDimitry Andric   OriMIs.clear();
1800fca6ea1SDimitry Andric   TriMIs.clear();
1810fca6ea1SDimitry Andric   TriToOri.clear();
1820fca6ea1SDimitry Andric   OriToCycle.clear();
1830fca6ea1SDimitry Andric   SchedResult.clear();
1840fca6ea1SDimitry Andric   SchedPhiNum = 0;
1850fca6ea1SDimitry Andric   SchedInstrNum = 0;
1860fca6ea1SDimitry Andric   BestII = UINT_MAX;
1870fca6ea1SDimitry Andric   BestOffset = 0;
1880fca6ea1SDimitry Andric   BaseII = 0;
1890fca6ea1SDimitry Andric   // List scheduling used in the window algorithm depends on LiveIntervals.
1900fca6ea1SDimitry Andric   if (!Context->LIS) {
1910fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
1920fca6ea1SDimitry Andric     return false;
1930fca6ea1SDimitry Andric   }
1940fca6ea1SDimitry Andric   // Check each MI in MBB.
1950fca6ea1SDimitry Andric   SmallSet<Register, 8> PrevDefs;
1960fca6ea1SDimitry Andric   SmallSet<Register, 8> PrevUses;
1970fca6ea1SDimitry Andric   auto IsLoopCarried = [&](MachineInstr &Phi) {
1980fca6ea1SDimitry Andric     // Two cases are checked here: (1)The virtual register defined by the
1990fca6ea1SDimitry Andric     // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
2000fca6ea1SDimitry Andric     // virtual register defined by the succeeding phi.
2010fca6ea1SDimitry Andric     if (PrevUses.count(Phi.getOperand(0).getReg()))
2020fca6ea1SDimitry Andric       return true;
2030fca6ea1SDimitry Andric     PrevDefs.insert(Phi.getOperand(0).getReg());
2040fca6ea1SDimitry Andric     for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
2050fca6ea1SDimitry Andric       if (PrevDefs.count(Phi.getOperand(I).getReg()))
2060fca6ea1SDimitry Andric         return true;
2070fca6ea1SDimitry Andric       PrevUses.insert(Phi.getOperand(I).getReg());
2080fca6ea1SDimitry Andric     }
2090fca6ea1SDimitry Andric     return false;
2100fca6ea1SDimitry Andric   };
2110fca6ea1SDimitry Andric   auto PLI = TII->analyzeLoopForPipelining(MBB);
2120fca6ea1SDimitry Andric   for (auto &MI : *MBB) {
2130fca6ea1SDimitry Andric     if (MI.isMetaInstruction() || MI.isTerminator())
2140fca6ea1SDimitry Andric       continue;
2150fca6ea1SDimitry Andric     if (MI.isPHI()) {
2160fca6ea1SDimitry Andric       if (IsLoopCarried(MI)) {
2170fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
2180fca6ea1SDimitry Andric         return false;
2190fca6ea1SDimitry Andric       }
2200fca6ea1SDimitry Andric       ++SchedPhiNum;
2210fca6ea1SDimitry Andric       ++BestOffset;
2220fca6ea1SDimitry Andric     } else
2230fca6ea1SDimitry Andric       ++SchedInstrNum;
2240fca6ea1SDimitry Andric     if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
2250fca6ea1SDimitry Andric       LLVM_DEBUG(
2260fca6ea1SDimitry Andric           dbgs() << "Boundary MI is not allowed in window scheduling!\n");
2270fca6ea1SDimitry Andric       return false;
2280fca6ea1SDimitry Andric     }
2290fca6ea1SDimitry Andric     if (PLI->shouldIgnoreForPipelining(&MI)) {
2300fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
2310fca6ea1SDimitry Andric                            "window scheduling!\n");
2320fca6ea1SDimitry Andric       return false;
2330fca6ea1SDimitry Andric     }
2340fca6ea1SDimitry Andric     for (auto &Def : MI.all_defs())
235*6c4b055cSDimitry Andric       if (Def.isReg() && Def.getReg().isPhysical()) {
236*6c4b055cSDimitry Andric         LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
237*6c4b055cSDimitry Andric                              "window scheduling!\n");
2380fca6ea1SDimitry Andric         return false;
2390fca6ea1SDimitry Andric       }
240*6c4b055cSDimitry Andric   }
2410fca6ea1SDimitry Andric   if (SchedInstrNum <= WindowRegionLimit) {
2420fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
2430fca6ea1SDimitry Andric     return false;
2440fca6ea1SDimitry Andric   }
2450fca6ea1SDimitry Andric   return true;
2460fca6ea1SDimitry Andric }
2470fca6ea1SDimitry Andric 
2480fca6ea1SDimitry Andric void WindowScheduler::preProcess() {
2490fca6ea1SDimitry Andric   // Prior to window scheduling, it's necessary to backup the original MBB,
2500fca6ea1SDimitry Andric   // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
2510fca6ea1SDimitry Andric   backupMBB();
2520fca6ea1SDimitry Andric   generateTripleMBB();
2530fca6ea1SDimitry Andric   TripleDAG->startBlock(MBB);
2540fca6ea1SDimitry Andric   TripleDAG->enterRegion(
2550fca6ea1SDimitry Andric       MBB, MBB->begin(), MBB->getFirstTerminator(),
2560fca6ea1SDimitry Andric       std::distance(MBB->begin(), MBB->getFirstTerminator()));
2570fca6ea1SDimitry Andric   TripleDAG->buildSchedGraph(Context->AA);
2580fca6ea1SDimitry Andric }
2590fca6ea1SDimitry Andric 
2600fca6ea1SDimitry Andric void WindowScheduler::postProcess() {
2610fca6ea1SDimitry Andric   // After window scheduling, it's necessary to clear the TripleDAG and restore
2620fca6ea1SDimitry Andric   // to the original MBB.
2630fca6ea1SDimitry Andric   TripleDAG->exitRegion();
2640fca6ea1SDimitry Andric   TripleDAG->finishBlock();
2650fca6ea1SDimitry Andric   restoreMBB();
2660fca6ea1SDimitry Andric }
2670fca6ea1SDimitry Andric 
2680fca6ea1SDimitry Andric void WindowScheduler::backupMBB() {
2690fca6ea1SDimitry Andric   for (auto &MI : MBB->instrs())
2700fca6ea1SDimitry Andric     OriMIs.push_back(&MI);
2710fca6ea1SDimitry Andric   // Remove MIs and the corresponding live intervals.
2720fca6ea1SDimitry Andric   for (auto &MI : make_early_inc_range(*MBB)) {
2730fca6ea1SDimitry Andric     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
2740fca6ea1SDimitry Andric     MBB->remove(&MI);
2750fca6ea1SDimitry Andric   }
2760fca6ea1SDimitry Andric }
2770fca6ea1SDimitry Andric 
2780fca6ea1SDimitry Andric void WindowScheduler::restoreMBB() {
2790fca6ea1SDimitry Andric   // Erase MIs and the corresponding live intervals.
2800fca6ea1SDimitry Andric   for (auto &MI : make_early_inc_range(*MBB)) {
2810fca6ea1SDimitry Andric     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
2820fca6ea1SDimitry Andric     MI.eraseFromParent();
2830fca6ea1SDimitry Andric   }
2840fca6ea1SDimitry Andric   // Restore MBB to the state before window scheduling.
2850fca6ea1SDimitry Andric   for (auto *MI : OriMIs)
2860fca6ea1SDimitry Andric     MBB->push_back(MI);
2870fca6ea1SDimitry Andric   updateLiveIntervals();
2880fca6ea1SDimitry Andric }
2890fca6ea1SDimitry Andric 
2900fca6ea1SDimitry Andric void WindowScheduler::generateTripleMBB() {
2910fca6ea1SDimitry Andric   const unsigned DuplicateNum = 3;
2920fca6ea1SDimitry Andric   TriMIs.clear();
2930fca6ea1SDimitry Andric   TriToOri.clear();
2940fca6ea1SDimitry Andric   assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
2950fca6ea1SDimitry Andric   // Step 1: Performing the first copy of MBB instructions, excluding
2960fca6ea1SDimitry Andric   // terminators. At the same time, we back up the anti-register of phis.
2970fca6ea1SDimitry Andric   // DefPairs hold the old and new define register pairs.
2980fca6ea1SDimitry Andric   DenseMap<Register, Register> DefPairs;
2990fca6ea1SDimitry Andric   for (auto *MI : OriMIs) {
3000fca6ea1SDimitry Andric     if (MI->isMetaInstruction() || MI->isTerminator())
3010fca6ea1SDimitry Andric       continue;
3020fca6ea1SDimitry Andric     if (MI->isPHI())
3030fca6ea1SDimitry Andric       if (Register AntiReg = getAntiRegister(MI))
3040fca6ea1SDimitry Andric         DefPairs[MI->getOperand(0).getReg()] = AntiReg;
3050fca6ea1SDimitry Andric     auto *NewMI = MF->CloneMachineInstr(MI);
3060fca6ea1SDimitry Andric     MBB->push_back(NewMI);
3070fca6ea1SDimitry Andric     TriMIs.push_back(NewMI);
3080fca6ea1SDimitry Andric     TriToOri[NewMI] = MI;
3090fca6ea1SDimitry Andric   }
3100fca6ea1SDimitry Andric   // Step 2: Performing the remaining two copies of MBB instructions excluding
3110fca6ea1SDimitry Andric   // phis, and the last one contains terminators. At the same time, registers
3120fca6ea1SDimitry Andric   // are updated accordingly.
3130fca6ea1SDimitry Andric   for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
3140fca6ea1SDimitry Andric     for (auto *MI : OriMIs) {
3150fca6ea1SDimitry Andric       if (MI->isPHI() || MI->isMetaInstruction() ||
3160fca6ea1SDimitry Andric           (MI->isTerminator() && Cnt < DuplicateNum - 1))
3170fca6ea1SDimitry Andric         continue;
3180fca6ea1SDimitry Andric       auto *NewMI = MF->CloneMachineInstr(MI);
3190fca6ea1SDimitry Andric       DenseMap<Register, Register> NewDefs;
3200fca6ea1SDimitry Andric       // New defines are updated.
3210fca6ea1SDimitry Andric       for (auto MO : NewMI->all_defs())
3220fca6ea1SDimitry Andric         if (MO.isReg() && MO.getReg().isVirtual()) {
3230fca6ea1SDimitry Andric           Register NewDef =
3240fca6ea1SDimitry Andric               MRI->createVirtualRegister(MRI->getRegClass(MO.getReg()));
3250fca6ea1SDimitry Andric           NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
3260fca6ea1SDimitry Andric           NewDefs[MO.getReg()] = NewDef;
3270fca6ea1SDimitry Andric         }
3280fca6ea1SDimitry Andric       // New uses are updated.
3290fca6ea1SDimitry Andric       for (auto DefRegPair : DefPairs)
3300fca6ea1SDimitry Andric         if (NewMI->readsRegister(DefRegPair.first, TRI)) {
3310fca6ea1SDimitry Andric           Register NewUse = DefRegPair.second;
3320fca6ea1SDimitry Andric           // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
3330fca6ea1SDimitry Andric           //
3340fca6ea1SDimitry Andric           // BB.3:                                  DefPairs
3350fca6ea1SDimitry Andric           // ==================================
3360fca6ea1SDimitry Andric           // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]  (%1,%7)
3370fca6ea1SDimitry Andric           // ...
3380fca6ea1SDimitry Andric           // ==================================
3390fca6ea1SDimitry Andric           // ...
3400fca6ea1SDimitry Andric           // %4 = sub i32 %1, %3
3410fca6ea1SDimitry Andric           // ...
3420fca6ea1SDimitry Andric           // %7 = add i32 %5, %6
3430fca6ea1SDimitry Andric           // ...
3440fca6ea1SDimitry Andric           // ----------------------------------
3450fca6ea1SDimitry Andric           // ...
3460fca6ea1SDimitry Andric           // %8 = sub i32 %7, %3                    (%1,%7),(%4,%8)
3470fca6ea1SDimitry Andric           // ...
3480fca6ea1SDimitry Andric           // %9 = add i32 %5, %6                    (%1,%7),(%4,%8),(%7,%9)
3490fca6ea1SDimitry Andric           // ...
3500fca6ea1SDimitry Andric           // ----------------------------------
3510fca6ea1SDimitry Andric           // ...
3520fca6ea1SDimitry Andric           // %10 = sub i32 %9, %3                   (%1,%7),(%4,%10),(%7,%9)
3530fca6ea1SDimitry Andric           // ...            ^
3540fca6ea1SDimitry Andric           // %11 = add i32 %5, %6                   (%1,%7),(%4,%10),(%7,%11)
3550fca6ea1SDimitry Andric           // ...
3560fca6ea1SDimitry Andric           // ==================================
3570fca6ea1SDimitry Andric           //          < Terminators >
3580fca6ea1SDimitry Andric           // ==================================
3590fca6ea1SDimitry Andric           if (DefPairs.count(NewUse))
3600fca6ea1SDimitry Andric             NewUse = DefPairs[NewUse];
3610fca6ea1SDimitry Andric           NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
3620fca6ea1SDimitry Andric         }
3630fca6ea1SDimitry Andric       // DefPairs is updated at last.
3640fca6ea1SDimitry Andric       for (auto &NewDef : NewDefs)
3650fca6ea1SDimitry Andric         DefPairs[NewDef.first] = NewDef.second;
3660fca6ea1SDimitry Andric       MBB->push_back(NewMI);
3670fca6ea1SDimitry Andric       TriMIs.push_back(NewMI);
3680fca6ea1SDimitry Andric       TriToOri[NewMI] = MI;
3690fca6ea1SDimitry Andric     }
3700fca6ea1SDimitry Andric   }
3710fca6ea1SDimitry Andric   // Step 3: The registers used by phis are updated, and they are generated in
3720fca6ea1SDimitry Andric   // the third copy of MBB.
3730fca6ea1SDimitry Andric   // In the privious example, the old phi is:
3740fca6ea1SDimitry Andric   // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
3750fca6ea1SDimitry Andric   // The new phi is:
3760fca6ea1SDimitry Andric   // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
3770fca6ea1SDimitry Andric   for (auto &Phi : MBB->phis()) {
3780fca6ea1SDimitry Andric     for (auto DefRegPair : DefPairs)
3790fca6ea1SDimitry Andric       if (Phi.readsRegister(DefRegPair.first, TRI))
3800fca6ea1SDimitry Andric         Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
3810fca6ea1SDimitry Andric   }
3820fca6ea1SDimitry Andric   updateLiveIntervals();
3830fca6ea1SDimitry Andric }
3840fca6ea1SDimitry Andric 
3850fca6ea1SDimitry Andric void WindowScheduler::restoreTripleMBB() {
3860fca6ea1SDimitry Andric   // After list scheduling, the MBB is restored in one traversal.
3870fca6ea1SDimitry Andric   for (size_t I = 0; I < TriMIs.size(); ++I) {
3880fca6ea1SDimitry Andric     auto *MI = TriMIs[I];
3890fca6ea1SDimitry Andric     auto OldPos = MBB->begin();
3900fca6ea1SDimitry Andric     std::advance(OldPos, I);
3910fca6ea1SDimitry Andric     auto CurPos = MI->getIterator();
3920fca6ea1SDimitry Andric     if (CurPos != OldPos) {
3930fca6ea1SDimitry Andric       MBB->splice(OldPos, MBB, CurPos);
3940fca6ea1SDimitry Andric       Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
3950fca6ea1SDimitry Andric     }
3960fca6ea1SDimitry Andric   }
3970fca6ea1SDimitry Andric }
3980fca6ea1SDimitry Andric 
3990fca6ea1SDimitry Andric SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum,
4000fca6ea1SDimitry Andric                                                         unsigned SearchRatio) {
4010fca6ea1SDimitry Andric   // We use SearchRatio to get the index range, and then evenly get the indexes
4020fca6ea1SDimitry Andric   // according to the SearchNum. This is a simple huristic. Depending on the
4030fca6ea1SDimitry Andric   // characteristics of the target, more complex algorithms can be used for both
4040fca6ea1SDimitry Andric   // performance and compilation time.
4050fca6ea1SDimitry Andric   assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
4060fca6ea1SDimitry Andric   unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
4070fca6ea1SDimitry Andric   unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
4080fca6ea1SDimitry Andric   SmallVector<unsigned> SearchIndexes;
4090fca6ea1SDimitry Andric   for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
4100fca6ea1SDimitry Andric     SearchIndexes.push_back(Idx);
4110fca6ea1SDimitry Andric   return SearchIndexes;
4120fca6ea1SDimitry Andric }
4130fca6ea1SDimitry Andric 
4140fca6ea1SDimitry Andric int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) {
4150fca6ea1SDimitry Andric   // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
4160fca6ea1SDimitry Andric   unsigned MaxDepth = 1;
4170fca6ea1SDimitry Andric   for (auto &SU : DAG.SUnits)
4180fca6ea1SDimitry Andric     MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
4190fca6ea1SDimitry Andric   return MaxDepth * WindowIICoeff;
4200fca6ea1SDimitry Andric }
4210fca6ea1SDimitry Andric 
4220fca6ea1SDimitry Andric int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
4230fca6ea1SDimitry Andric                                        unsigned Offset) {
4240fca6ea1SDimitry Andric   int InitII = getEstimatedII(DAG);
4250fca6ea1SDimitry Andric   ResourceManager RM(Subtarget, &DAG);
4260fca6ea1SDimitry Andric   RM.init(InitII);
4270fca6ea1SDimitry Andric   // ResourceManager and DAG are used to calculate the maximum cycle for the
4280fca6ea1SDimitry Andric   // scheduled MIs. Since MIs in the Region have already been scheduled, the
4290fca6ea1SDimitry Andric   // emit cycles can be estimated in order here.
4300fca6ea1SDimitry Andric   int CurCycle = 0;
4310fca6ea1SDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
4320fca6ea1SDimitry Andric   for (auto &MI : Range) {
4330fca6ea1SDimitry Andric     auto *SU = DAG.getSUnit(&MI);
4340fca6ea1SDimitry Andric     int ExpectCycle = CurCycle;
4350fca6ea1SDimitry Andric     // The predecessors of current MI determine its earliest issue cycle.
4360fca6ea1SDimitry Andric     for (auto &Pred : SU->Preds) {
4370fca6ea1SDimitry Andric       if (Pred.isWeak())
4380fca6ea1SDimitry Andric         continue;
4390fca6ea1SDimitry Andric       auto *PredMI = Pred.getSUnit()->getInstr();
4400fca6ea1SDimitry Andric       int PredCycle = getOriCycle(PredMI);
4410fca6ea1SDimitry Andric       ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
4420fca6ea1SDimitry Andric     }
443*6c4b055cSDimitry Andric     // Zero cost instructions do not need to check resource.
444*6c4b055cSDimitry Andric     if (!TII->isZeroCost(MI.getOpcode())) {
4450fca6ea1SDimitry Andric       // ResourceManager can be used to detect resource conflicts between the
4460fca6ea1SDimitry Andric       // current MI and the previously inserted MIs.
4470fca6ea1SDimitry Andric       while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
4480fca6ea1SDimitry Andric         ++CurCycle;
4490fca6ea1SDimitry Andric         if (CurCycle == (int)WindowIILimit)
4500fca6ea1SDimitry Andric           return CurCycle;
4510fca6ea1SDimitry Andric       }
4520fca6ea1SDimitry Andric       RM.reserveResources(*SU, CurCycle);
453*6c4b055cSDimitry Andric     }
4540fca6ea1SDimitry Andric     OriToCycle[getOriMI(&MI)] = CurCycle;
4550fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
4560fca6ea1SDimitry Andric                       << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
4570fca6ea1SDimitry Andric   }
4580fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
4590fca6ea1SDimitry Andric   return CurCycle;
4600fca6ea1SDimitry Andric }
4610fca6ea1SDimitry Andric 
4620fca6ea1SDimitry Andric // By utilizing TripleDAG, we can easily establish dependencies between A and B.
4630fca6ea1SDimitry Andric // Based on the MaxCycle and the issue cycle of A and B, we can determine
4640fca6ea1SDimitry Andric // whether it is necessary to add a stall cycle. This is because, without
4650fca6ea1SDimitry Andric // inserting the stall cycle, the latency constraint between A and B cannot be
4660fca6ea1SDimitry Andric // satisfied. The details are as follows:
4670fca6ea1SDimitry Andric //
4680fca6ea1SDimitry Andric // New MBB:
4690fca6ea1SDimitry Andric // ========================================
4700fca6ea1SDimitry Andric //                 < Phis >
4710fca6ea1SDimitry Andric // ========================================     (sliding direction)
4720fca6ea1SDimitry Andric // MBB copy 1                                            |
4730fca6ea1SDimitry Andric //                                                       V
4740fca6ea1SDimitry Andric //
4750fca6ea1SDimitry Andric // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~  ----schedule window-----
4760fca6ea1SDimitry Andric //                    |                                  |
4770fca6ea1SDimitry Andric // ===================V====================              |
4780fca6ea1SDimitry Andric // MBB copy 2      < MI B >                              |
4790fca6ea1SDimitry Andric //                                                       |
4800fca6ea1SDimitry Andric //                 < MI A >                              V
4810fca6ea1SDimitry Andric // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~  ------------------------
4820fca6ea1SDimitry Andric //                    :
4830fca6ea1SDimitry Andric // ===================V====================
4840fca6ea1SDimitry Andric // MBB copy 3      < MI B'>
4850fca6ea1SDimitry Andric //
4860fca6ea1SDimitry Andric //
4870fca6ea1SDimitry Andric //
4880fca6ea1SDimitry Andric //
4890fca6ea1SDimitry Andric // ========================================
4900fca6ea1SDimitry Andric //              < Terminators >
4910fca6ea1SDimitry Andric // ========================================
4920fca6ea1SDimitry Andric int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
4930fca6ea1SDimitry Andric   int MaxStallCycle = 0;
494*6c4b055cSDimitry Andric   int CurrentII = MaxCycle + 1;
4950fca6ea1SDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
4960fca6ea1SDimitry Andric   for (auto &MI : Range) {
4970fca6ea1SDimitry Andric     auto *SU = TripleDAG->getSUnit(&MI);
4980fca6ea1SDimitry Andric     int DefCycle = getOriCycle(&MI);
4990fca6ea1SDimitry Andric     for (auto &Succ : SU->Succs) {
5000fca6ea1SDimitry Andric       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
5010fca6ea1SDimitry Andric         continue;
502*6c4b055cSDimitry Andric       // If the expected cycle does not exceed CurrentII, no check is needed.
503*6c4b055cSDimitry Andric       if (DefCycle + (int)Succ.getLatency() <= CurrentII)
5040fca6ea1SDimitry Andric         continue;
5050fca6ea1SDimitry Andric       // If the cycle of the scheduled MI A is less than that of the scheduled
5060fca6ea1SDimitry Andric       // MI B, the scheduling will fail because the lifetime of the
5070fca6ea1SDimitry Andric       // corresponding register exceeds II.
5080fca6ea1SDimitry Andric       auto *SuccMI = Succ.getSUnit()->getInstr();
5090fca6ea1SDimitry Andric       int UseCycle = getOriCycle(SuccMI);
5100fca6ea1SDimitry Andric       if (DefCycle < UseCycle)
5110fca6ea1SDimitry Andric         return WindowIILimit;
5120fca6ea1SDimitry Andric       // Get the stall cycle introduced by the register between two trips.
513*6c4b055cSDimitry Andric       int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
5140fca6ea1SDimitry Andric       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
5150fca6ea1SDimitry Andric     }
5160fca6ea1SDimitry Andric   }
5170fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
5180fca6ea1SDimitry Andric   return MaxStallCycle;
5190fca6ea1SDimitry Andric }
5200fca6ea1SDimitry Andric 
5210fca6ea1SDimitry Andric unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) {
5220fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
5230fca6ea1SDimitry Andric   int MaxCycle = calculateMaxCycle(DAG, Offset);
5240fca6ea1SDimitry Andric   if (MaxCycle == (int)WindowIILimit)
5250fca6ea1SDimitry Andric     return MaxCycle;
5260fca6ea1SDimitry Andric   int StallCycle = calculateStallCycle(Offset, MaxCycle);
5270fca6ea1SDimitry Andric   if (StallCycle == (int)WindowIILimit)
5280fca6ea1SDimitry Andric     return StallCycle;
5290fca6ea1SDimitry Andric   // The value of II is equal to the maximum execution cycle plus 1.
5300fca6ea1SDimitry Andric   return MaxCycle + StallCycle + 1;
5310fca6ea1SDimitry Andric }
5320fca6ea1SDimitry Andric 
5330fca6ea1SDimitry Andric void WindowScheduler::schedulePhi(int Offset, unsigned &II) {
5340fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
5350fca6ea1SDimitry Andric   for (auto &Phi : MBB->phis()) {
5360fca6ea1SDimitry Andric     int LateCycle = INT_MAX;
5370fca6ea1SDimitry Andric     auto *SU = TripleDAG->getSUnit(&Phi);
5380fca6ea1SDimitry Andric     for (auto &Succ : SU->Succs) {
5390fca6ea1SDimitry Andric       // Phi doesn't have any Anti successors.
5400fca6ea1SDimitry Andric       if (Succ.getKind() != SDep::Data)
5410fca6ea1SDimitry Andric         continue;
5420fca6ea1SDimitry Andric       // Phi is scheduled before the successor of stage 0. The issue cycle of
5430fca6ea1SDimitry Andric       // phi is the latest cycle in this interval.
5440fca6ea1SDimitry Andric       auto *SuccMI = Succ.getSUnit()->getInstr();
5450fca6ea1SDimitry Andric       int Cycle = getOriCycle(SuccMI);
5460fca6ea1SDimitry Andric       if (getOriStage(getOriMI(SuccMI), Offset) == 0)
5470fca6ea1SDimitry Andric         LateCycle = std::min(LateCycle, Cycle);
5480fca6ea1SDimitry Andric     }
5490fca6ea1SDimitry Andric     // The anti-dependency of phi need to be handled separately in the same way.
5500fca6ea1SDimitry Andric     if (Register AntiReg = getAntiRegister(&Phi)) {
5510fca6ea1SDimitry Andric       auto *AntiMI = MRI->getVRegDef(AntiReg);
5520fca6ea1SDimitry Andric       // AntiReg may be defined outside the kernel MBB.
5530fca6ea1SDimitry Andric       if (AntiMI->getParent() == MBB) {
5540fca6ea1SDimitry Andric         auto AntiCycle = getOriCycle(AntiMI);
5550fca6ea1SDimitry Andric         if (getOriStage(getOriMI(AntiMI), Offset) == 0)
5560fca6ea1SDimitry Andric           LateCycle = std::min(LateCycle, AntiCycle);
5570fca6ea1SDimitry Andric       }
5580fca6ea1SDimitry Andric     }
5590fca6ea1SDimitry Andric     // If there is no limit to the late cycle, a default value is given.
5600fca6ea1SDimitry Andric     if (LateCycle == INT_MAX)
5610fca6ea1SDimitry Andric       LateCycle = (int)(II - 1);
5620fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
5630fca6ea1SDimitry Andric     // The issue cycle of phi is set to the latest cycle in the interval.
5640fca6ea1SDimitry Andric     auto *OriPhi = getOriMI(&Phi);
5650fca6ea1SDimitry Andric     OriToCycle[OriPhi] = LateCycle;
5660fca6ea1SDimitry Andric   }
5670fca6ea1SDimitry Andric }
5680fca6ea1SDimitry Andric 
5690fca6ea1SDimitry Andric DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset,
5700fca6ea1SDimitry Andric                                                              unsigned II) {
5710fca6ea1SDimitry Andric   // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
5720fca6ea1SDimitry Andric   // way is to put phi at the beginning of the current cycle.
5730fca6ea1SDimitry Andric   DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs;
5740fca6ea1SDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
5750fca6ea1SDimitry Andric   for (auto &Phi : MBB->phis())
5760fca6ea1SDimitry Andric     CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
5770fca6ea1SDimitry Andric   for (auto &MI : Range)
5780fca6ea1SDimitry Andric     CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
5790fca6ea1SDimitry Andric   // Each MI is assigned a separate ordered Id, which is used as a sort marker
5800fca6ea1SDimitry Andric   // in the following expand process.
5810fca6ea1SDimitry Andric   DenseMap<MachineInstr *, int> IssueOrder;
5820fca6ea1SDimitry Andric   int Id = 0;
5830fca6ea1SDimitry Andric   for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
5840fca6ea1SDimitry Andric     if (!CycleToMIs.count(Cycle))
5850fca6ea1SDimitry Andric       continue;
5860fca6ea1SDimitry Andric     for (auto *MI : CycleToMIs[Cycle])
5870fca6ea1SDimitry Andric       IssueOrder[MI] = Id++;
5880fca6ea1SDimitry Andric   }
5890fca6ea1SDimitry Andric   return IssueOrder;
5900fca6ea1SDimitry Andric }
5910fca6ea1SDimitry Andric 
5920fca6ea1SDimitry Andric void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) {
5930fca6ea1SDimitry Andric   // At the first update, Offset is equal to SchedPhiNum. At this time, only
5940fca6ea1SDimitry Andric   // BestII, BestOffset, and BaseII need to be updated.
5950fca6ea1SDimitry Andric   if (Offset == SchedPhiNum) {
5960fca6ea1SDimitry Andric     BestII = II;
5970fca6ea1SDimitry Andric     BestOffset = SchedPhiNum;
5980fca6ea1SDimitry Andric     BaseII = II;
5990fca6ea1SDimitry Andric     return;
6000fca6ea1SDimitry Andric   }
6010fca6ea1SDimitry Andric   // The update will only continue if the II is smaller than BestII and the II
6020fca6ea1SDimitry Andric   // is sufficiently small.
6030fca6ea1SDimitry Andric   if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
6040fca6ea1SDimitry Andric     return;
6050fca6ea1SDimitry Andric   BestII = II;
6060fca6ea1SDimitry Andric   BestOffset = Offset;
6070fca6ea1SDimitry Andric   // Record the result of the current list scheduling, noting that each MI is
6080fca6ea1SDimitry Andric   // stored unordered in SchedResult.
6090fca6ea1SDimitry Andric   SchedResult.clear();
6100fca6ea1SDimitry Andric   auto IssueOrder = getIssueOrder(Offset, II);
6110fca6ea1SDimitry Andric   for (auto &Pair : OriToCycle) {
6120fca6ea1SDimitry Andric     assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
6130fca6ea1SDimitry Andric     SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
6140fca6ea1SDimitry Andric                                           getOriStage(Pair.first, Offset),
6150fca6ea1SDimitry Andric                                           IssueOrder[Pair.first]));
6160fca6ea1SDimitry Andric   }
6170fca6ea1SDimitry Andric }
6180fca6ea1SDimitry Andric 
6190fca6ea1SDimitry Andric void WindowScheduler::expand() {
6200fca6ea1SDimitry Andric   // The MIs in the SchedResult are sorted by the issue order ID.
6210fca6ea1SDimitry Andric   llvm::stable_sort(SchedResult,
6220fca6ea1SDimitry Andric                     [](const std::tuple<MachineInstr *, int, int, int> &A,
6230fca6ea1SDimitry Andric                        const std::tuple<MachineInstr *, int, int, int> &B) {
6240fca6ea1SDimitry Andric                       return std::get<3>(A) < std::get<3>(B);
6250fca6ea1SDimitry Andric                     });
6260fca6ea1SDimitry Andric   // Use the scheduling infrastructure for expansion, noting that InstrChanges
6270fca6ea1SDimitry Andric   // is not supported here.
6280fca6ea1SDimitry Andric   DenseMap<MachineInstr *, int> Cycles, Stages;
6290fca6ea1SDimitry Andric   std::vector<MachineInstr *> OrderedInsts;
6300fca6ea1SDimitry Andric   for (auto &Info : SchedResult) {
6310fca6ea1SDimitry Andric     auto *MI = std::get<0>(Info);
6320fca6ea1SDimitry Andric     OrderedInsts.push_back(MI);
6330fca6ea1SDimitry Andric     Cycles[MI] = std::get<1>(Info);
6340fca6ea1SDimitry Andric     Stages[MI] = std::get<2>(Info);
6350fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
6360fca6ea1SDimitry Andric                       << "]: " << *MI);
6370fca6ea1SDimitry Andric   }
6380fca6ea1SDimitry Andric   ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
6390fca6ea1SDimitry Andric                     std::move(Stages));
6400fca6ea1SDimitry Andric   ModuloScheduleExpander MSE(*MF, MS, *Context->LIS,
6410fca6ea1SDimitry Andric                              ModuloScheduleExpander::InstrChangesTy());
6420fca6ea1SDimitry Andric   MSE.expand();
6430fca6ea1SDimitry Andric   MSE.cleanup();
6440fca6ea1SDimitry Andric }
6450fca6ea1SDimitry Andric 
6460fca6ea1SDimitry Andric void WindowScheduler::updateLiveIntervals() {
6470fca6ea1SDimitry Andric   SmallVector<Register, 128> UsedRegs;
6480fca6ea1SDimitry Andric   for (MachineInstr &MI : *MBB)
6490fca6ea1SDimitry Andric     for (const MachineOperand &MO : MI.operands()) {
6500fca6ea1SDimitry Andric       if (!MO.isReg() || MO.getReg() == 0)
6510fca6ea1SDimitry Andric         continue;
6520fca6ea1SDimitry Andric       Register Reg = MO.getReg();
6530fca6ea1SDimitry Andric       if (!is_contained(UsedRegs, Reg))
6540fca6ea1SDimitry Andric         UsedRegs.push_back(Reg);
6550fca6ea1SDimitry Andric     }
6560fca6ea1SDimitry Andric   Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
6570fca6ea1SDimitry Andric }
6580fca6ea1SDimitry Andric 
6590fca6ea1SDimitry Andric iterator_range<MachineBasicBlock::iterator>
6600fca6ea1SDimitry Andric WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) {
6610fca6ea1SDimitry Andric   auto RegionBegin = MBB->begin();
6620fca6ea1SDimitry Andric   std::advance(RegionBegin, Offset);
6630fca6ea1SDimitry Andric   auto RegionEnd = RegionBegin;
6640fca6ea1SDimitry Andric   std::advance(RegionEnd, Num);
6650fca6ea1SDimitry Andric   return make_range(RegionBegin, RegionEnd);
6660fca6ea1SDimitry Andric }
6670fca6ea1SDimitry Andric 
6680fca6ea1SDimitry Andric int WindowScheduler::getOriCycle(MachineInstr *NewMI) {
6690fca6ea1SDimitry Andric   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
6700fca6ea1SDimitry Andric   auto *OriMI = TriToOri[NewMI];
6710fca6ea1SDimitry Andric   assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
6720fca6ea1SDimitry Andric   return OriToCycle[OriMI];
6730fca6ea1SDimitry Andric }
6740fca6ea1SDimitry Andric 
6750fca6ea1SDimitry Andric MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) {
6760fca6ea1SDimitry Andric   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
6770fca6ea1SDimitry Andric   return TriToOri[NewMI];
6780fca6ea1SDimitry Andric }
6790fca6ea1SDimitry Andric 
6800fca6ea1SDimitry Andric unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) {
6810fca6ea1SDimitry Andric   assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
6820fca6ea1SDimitry Andric          "Cannot find OriMI in OriMIs!");
6830fca6ea1SDimitry Andric   // If there is no instruction fold, all MI stages are 0.
6840fca6ea1SDimitry Andric   if (Offset == SchedPhiNum)
6850fca6ea1SDimitry Andric     return 0;
6860fca6ea1SDimitry Andric   // For those MIs with an ID less than the Offset, their stages are set to 0,
6870fca6ea1SDimitry Andric   // while the rest are set to 1.
6880fca6ea1SDimitry Andric   unsigned Id = 0;
6890fca6ea1SDimitry Andric   for (auto *MI : OriMIs) {
6900fca6ea1SDimitry Andric     if (MI->isMetaInstruction())
6910fca6ea1SDimitry Andric       continue;
6920fca6ea1SDimitry Andric     if (MI == OriMI)
6930fca6ea1SDimitry Andric       break;
6940fca6ea1SDimitry Andric     ++Id;
6950fca6ea1SDimitry Andric   }
6960fca6ea1SDimitry Andric   return Id >= (size_t)Offset ? 1 : 0;
6970fca6ea1SDimitry Andric }
6980fca6ea1SDimitry Andric 
6990fca6ea1SDimitry Andric Register WindowScheduler::getAntiRegister(MachineInstr *Phi) {
7000fca6ea1SDimitry Andric   assert(Phi->isPHI() && "Expecting PHI!");
7010fca6ea1SDimitry Andric   Register AntiReg;
7020fca6ea1SDimitry Andric   for (auto MO : Phi->uses()) {
7030fca6ea1SDimitry Andric     if (MO.isReg())
7040fca6ea1SDimitry Andric       AntiReg = MO.getReg();
7050fca6ea1SDimitry Andric     else if (MO.isMBB() && MO.getMBB() == MBB)
7060fca6ea1SDimitry Andric       return AntiReg;
7070fca6ea1SDimitry Andric   }
7080fca6ea1SDimitry Andric   return 0;
7090fca6ea1SDimitry Andric }
710