xref: /llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp (revision d808f15828d0685fb65f140c49d6f7e7142d2503)
1b6bf4024SHua Tian //======----------- WindowScheduler.cpp - window scheduler -------------======//
2b6bf4024SHua Tian //
3b6bf4024SHua Tian // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b6bf4024SHua Tian // See https://llvm.org/LICENSE.txt for license information.
5b6bf4024SHua Tian // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b6bf4024SHua Tian //
7b6bf4024SHua Tian //===----------------------------------------------------------------------===//
8b6bf4024SHua Tian //
9b6bf4024SHua Tian // An implementation of the Window Scheduling software pipelining algorithm.
10b6bf4024SHua Tian //
11b6bf4024SHua Tian // The fundamental concept of the window scheduling algorithm involves folding
12b6bf4024SHua Tian // the original MBB at a specific position, followed by list scheduling on the
13b6bf4024SHua Tian // folded MIs. The optimal scheduling result is then chosen from various folding
14b6bf4024SHua Tian // positions as the final scheduling outcome.
15b6bf4024SHua Tian //
16b6bf4024SHua Tian // The primary challenge in this algorithm lies in generating the folded MIs and
17b6bf4024SHua Tian // establishing their dependencies. We have innovatively employed a new MBB,
18b6bf4024SHua Tian // created by copying the original MBB three times, known as TripleMBB. This
19b6bf4024SHua Tian // TripleMBB enables the convenient implementation of MI folding and dependency
20b6bf4024SHua Tian // establishment. To facilitate the algorithm's implementation, we have also
21b6bf4024SHua Tian // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
22b6bf4024SHua Tian //
23b6bf4024SHua Tian // Another challenge in the algorithm is the scheduling of phis. Semantically,
24b6bf4024SHua Tian // it is difficult to place the phis in the window and perform list scheduling.
25b6bf4024SHua Tian // Therefore, we schedule these phis separately after each list scheduling.
26b6bf4024SHua Tian //
27b6bf4024SHua Tian // The provided implementation is designed for use before the Register Allocator
28b6bf4024SHua Tian // (RA). If the target requires implementation after RA, it is recommended to
29b6bf4024SHua Tian // reimplement analyseII(), schedulePhi(), and expand(). Additionally,
30b6bf4024SHua Tian // target-specific logic can be added in initialize(), preProcess(), and
31b6bf4024SHua Tian // postProcess().
32b6bf4024SHua Tian //
33b6bf4024SHua Tian // Lastly, it is worth mentioning that getSearchIndexes() is an important
34b6bf4024SHua Tian // function. We have experimented with more complex heuristics on downstream
35b6bf4024SHua Tian // target and achieved favorable results.
36b6bf4024SHua Tian //
37b6bf4024SHua Tian //===----------------------------------------------------------------------===//
38b6bf4024SHua Tian #include "llvm/CodeGen/WindowScheduler.h"
39b6bf4024SHua Tian #include "llvm/ADT/Statistic.h"
40b6bf4024SHua Tian #include "llvm/CodeGen/LiveIntervals.h"
41b6bf4024SHua Tian #include "llvm/CodeGen/MachineLoopInfo.h"
42b6bf4024SHua Tian #include "llvm/CodeGen/MachinePipeliner.h"
43b6bf4024SHua Tian #include "llvm/CodeGen/ModuloSchedule.h"
44b6bf4024SHua Tian #include "llvm/CodeGen/TargetPassConfig.h"
45b6bf4024SHua Tian #include "llvm/Support/CommandLine.h"
46b6bf4024SHua Tian #include "llvm/Support/Debug.h"
47b6bf4024SHua Tian #include "llvm/Support/TimeProfiler.h"
48b6bf4024SHua Tian 
49b6bf4024SHua Tian using namespace llvm;
50b6bf4024SHua Tian 
51b6bf4024SHua Tian #define DEBUG_TYPE "pipeliner"
52b6bf4024SHua Tian 
53b6bf4024SHua Tian namespace {
54b6bf4024SHua Tian STATISTIC(NumTryWindowSchedule,
55b6bf4024SHua Tian           "Number of loops that we attempt to use window scheduling");
56b6bf4024SHua Tian STATISTIC(NumTryWindowSearch,
57b6bf4024SHua Tian           "Number of times that we run list schedule in the window scheduling");
58b6bf4024SHua Tian STATISTIC(NumWindowSchedule,
59b6bf4024SHua Tian           "Number of loops that we successfully use window scheduling");
60b6bf4024SHua Tian STATISTIC(NumFailAnalyseII,
61b6bf4024SHua Tian           "Window scheduling abort due to the failure of the II analysis");
62b6bf4024SHua Tian 
63b6bf4024SHua Tian cl::opt<unsigned>
64b6bf4024SHua Tian     WindowSearchNum("window-search-num",
65b6bf4024SHua Tian                     cl::desc("The number of searches per loop in the window "
66b6bf4024SHua Tian                              "algorithm. 0 means no search number limit."),
67b6bf4024SHua Tian                     cl::Hidden, cl::init(6));
68b6bf4024SHua Tian 
69b6bf4024SHua Tian cl::opt<unsigned> WindowSearchRatio(
70b6bf4024SHua Tian     "window-search-ratio",
71b6bf4024SHua Tian     cl::desc("The ratio of searches per loop in the window algorithm. 100 "
72b6bf4024SHua Tian              "means search all positions in the loop, while 0 means not "
73b6bf4024SHua Tian              "performing any search."),
74b6bf4024SHua Tian     cl::Hidden, cl::init(40));
75b6bf4024SHua Tian 
76b6bf4024SHua Tian cl::opt<unsigned> WindowIICoeff(
77b6bf4024SHua Tian     "window-ii-coeff",
78b6bf4024SHua Tian     cl::desc(
79b6bf4024SHua Tian         "The coefficient used when initializing II in the window algorithm."),
80b6bf4024SHua Tian     cl::Hidden, cl::init(5));
81b6bf4024SHua Tian 
82b6bf4024SHua Tian cl::opt<unsigned> WindowRegionLimit(
83b6bf4024SHua Tian     "window-region-limit",
84b6bf4024SHua Tian     cl::desc(
85b6bf4024SHua Tian         "The lower limit of the scheduling region in the window algorithm."),
86b6bf4024SHua Tian     cl::Hidden, cl::init(3));
87b6bf4024SHua Tian 
88b6bf4024SHua Tian cl::opt<unsigned> WindowDiffLimit(
89b6bf4024SHua Tian     "window-diff-limit",
90b6bf4024SHua Tian     cl::desc("The lower limit of the difference between best II and base II in "
91b6bf4024SHua Tian              "the window algorithm. If the difference is smaller than "
92b6bf4024SHua Tian              "this lower limit, window scheduling will not be performed."),
93b6bf4024SHua Tian     cl::Hidden, cl::init(2));
94b6bf4024SHua Tian } // namespace
95b6bf4024SHua Tian 
96b6bf4024SHua Tian // WindowIILimit serves as an indicator of abnormal scheduling results and could
97b6bf4024SHua Tian // potentially be referenced by the derived target window scheduler.
98b6bf4024SHua Tian cl::opt<unsigned>
99b6bf4024SHua Tian     WindowIILimit("window-ii-limit",
100b6bf4024SHua Tian                   cl::desc("The upper limit of II in the window algorithm."),
101b6bf4024SHua Tian                   cl::Hidden, cl::init(1000));
102b6bf4024SHua Tian 
103b6bf4024SHua Tian WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
104b6bf4024SHua Tian     : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
105b6bf4024SHua Tian       Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
106b6bf4024SHua Tian       TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
107b6bf4024SHua Tian   TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
108b6bf4024SHua Tian       createMachineScheduler(/*OnlyBuildGraph=*/true));
109b6bf4024SHua Tian }
110b6bf4024SHua Tian 
111b6bf4024SHua Tian bool WindowScheduler::run() {
112b6bf4024SHua Tian   if (!initialize()) {
113b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
114b6bf4024SHua Tian     return false;
115b6bf4024SHua Tian   }
116b6bf4024SHua Tian   // The window algorithm is time-consuming, and its compilation time should be
117b6bf4024SHua Tian   // taken into consideration.
118b6bf4024SHua Tian   TimeTraceScope Scope("WindowSearch");
119b6bf4024SHua Tian   ++NumTryWindowSchedule;
120b6bf4024SHua Tian   // Performing the relevant processing before window scheduling.
121b6bf4024SHua Tian   preProcess();
122b6bf4024SHua Tian   // The main window scheduling begins.
123b6bf4024SHua Tian   std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
124b6bf4024SHua Tian   auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
125b6bf4024SHua Tian   for (unsigned Idx : SearchIndexes) {
126b6bf4024SHua Tian     OriToCycle.clear();
127b6bf4024SHua Tian     ++NumTryWindowSearch;
128b6bf4024SHua Tian     // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
129b6bf4024SHua Tian     // be added to Idx.
130b6bf4024SHua Tian     unsigned Offset = Idx + SchedPhiNum;
131b6bf4024SHua Tian     auto Range = getScheduleRange(Offset, SchedInstrNum);
132b6bf4024SHua Tian     SchedDAG->startBlock(MBB);
133b6bf4024SHua Tian     SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
134b6bf4024SHua Tian     SchedDAG->schedule();
135b6bf4024SHua Tian     LLVM_DEBUG(SchedDAG->dump());
136b6bf4024SHua Tian     unsigned II = analyseII(*SchedDAG, Offset);
137b6bf4024SHua Tian     if (II == WindowIILimit) {
138b6bf4024SHua Tian       restoreTripleMBB();
139b6bf4024SHua Tian       LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
140b6bf4024SHua Tian       ++NumFailAnalyseII;
141b6bf4024SHua Tian       continue;
142b6bf4024SHua Tian     }
143b6bf4024SHua Tian     schedulePhi(Offset, II);
144b6bf4024SHua Tian     updateScheduleResult(Offset, II);
145b6bf4024SHua Tian     restoreTripleMBB();
146b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
147b6bf4024SHua Tian                       << II << ".\n");
148b6bf4024SHua Tian   }
149b6bf4024SHua Tian   // Performing the relevant processing after window scheduling.
150b6bf4024SHua Tian   postProcess();
151b6bf4024SHua Tian   // Check whether the scheduling result is valid.
152b6bf4024SHua Tian   if (!isScheduleValid()) {
153b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
154b6bf4024SHua Tian     return false;
155b6bf4024SHua Tian   }
156b6bf4024SHua Tian   LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
157b6bf4024SHua Tian                     << " and Best II is " << BestII << ".\n");
158b6bf4024SHua Tian   // Expand the scheduling result to prologue, kernel, and epilogue.
159b6bf4024SHua Tian   expand();
160b6bf4024SHua Tian   ++NumWindowSchedule;
161b6bf4024SHua Tian   return true;
162b6bf4024SHua Tian }
163b6bf4024SHua Tian 
164b6bf4024SHua Tian ScheduleDAGInstrs *
165b6bf4024SHua Tian WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
166b6bf4024SHua Tian   return OnlyBuildGraph
167b6bf4024SHua Tian              ? new ScheduleDAGMI(
168b6bf4024SHua Tian                    Context, std::make_unique<PostGenericScheduler>(Context),
169b6bf4024SHua Tian                    true)
170b6bf4024SHua Tian              : Context->PassConfig->createMachineScheduler(Context);
171b6bf4024SHua Tian }
172b6bf4024SHua Tian 
173b6bf4024SHua Tian bool WindowScheduler::initialize() {
174b6bf4024SHua Tian   if (!Subtarget->enableWindowScheduler()) {
175b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
176b6bf4024SHua Tian     return false;
177b6bf4024SHua Tian   }
178b6bf4024SHua Tian   // Initialized the member variables used by window algorithm.
179b6bf4024SHua Tian   OriMIs.clear();
180b6bf4024SHua Tian   TriMIs.clear();
181b6bf4024SHua Tian   TriToOri.clear();
182b6bf4024SHua Tian   OriToCycle.clear();
183b6bf4024SHua Tian   SchedResult.clear();
184b6bf4024SHua Tian   SchedPhiNum = 0;
185b6bf4024SHua Tian   SchedInstrNum = 0;
186b6bf4024SHua Tian   BestII = UINT_MAX;
187b6bf4024SHua Tian   BestOffset = 0;
188b6bf4024SHua Tian   BaseII = 0;
189b6bf4024SHua Tian   // List scheduling used in the window algorithm depends on LiveIntervals.
190b6bf4024SHua Tian   if (!Context->LIS) {
191b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
192b6bf4024SHua Tian     return false;
193b6bf4024SHua Tian   }
194b6bf4024SHua Tian   // Check each MI in MBB.
195811e505cSHua Tian   SmallSet<Register, 8> PrevDefs;
196811e505cSHua Tian   SmallSet<Register, 8> PrevUses;
197811e505cSHua Tian   auto IsLoopCarried = [&](MachineInstr &Phi) {
198811e505cSHua Tian     // Two cases are checked here: (1)The virtual register defined by the
199811e505cSHua Tian     // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
200811e505cSHua Tian     // virtual register defined by the succeeding phi.
201811e505cSHua Tian     if (PrevUses.count(Phi.getOperand(0).getReg()))
202811e505cSHua Tian       return true;
203811e505cSHua Tian     PrevDefs.insert(Phi.getOperand(0).getReg());
204811e505cSHua Tian     for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
205811e505cSHua Tian       if (PrevDefs.count(Phi.getOperand(I).getReg()))
206811e505cSHua Tian         return true;
207811e505cSHua Tian       PrevUses.insert(Phi.getOperand(I).getReg());
208811e505cSHua Tian     }
209811e505cSHua Tian     return false;
210811e505cSHua Tian   };
211b6bf4024SHua Tian   auto PLI = TII->analyzeLoopForPipelining(MBB);
212b6bf4024SHua Tian   for (auto &MI : *MBB) {
213b6bf4024SHua Tian     if (MI.isMetaInstruction() || MI.isTerminator())
214b6bf4024SHua Tian       continue;
215b6bf4024SHua Tian     if (MI.isPHI()) {
216811e505cSHua Tian       if (IsLoopCarried(MI)) {
217811e505cSHua Tian         LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
218b6bf4024SHua Tian         return false;
219b6bf4024SHua Tian       }
220b6bf4024SHua Tian       ++SchedPhiNum;
221b6bf4024SHua Tian       ++BestOffset;
222b6bf4024SHua Tian     } else
223b6bf4024SHua Tian       ++SchedInstrNum;
224b6bf4024SHua Tian     if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
225b6bf4024SHua Tian       LLVM_DEBUG(
226b6bf4024SHua Tian           dbgs() << "Boundary MI is not allowed in window scheduling!\n");
227b6bf4024SHua Tian       return false;
228b6bf4024SHua Tian     }
229b6bf4024SHua Tian     if (PLI->shouldIgnoreForPipelining(&MI)) {
230b6bf4024SHua Tian       LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
231b6bf4024SHua Tian                            "window scheduling!\n");
232b6bf4024SHua Tian       return false;
233b6bf4024SHua Tian     }
234b6bf4024SHua Tian     for (auto &Def : MI.all_defs())
2352d6ff0c5SKai Yan       if (Def.isReg() && Def.getReg().isPhysical()) {
2362d6ff0c5SKai Yan         LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
2372d6ff0c5SKai Yan                              "window scheduling!\n");
238b6bf4024SHua Tian         return false;
239b6bf4024SHua Tian       }
2402d6ff0c5SKai Yan   }
241b6bf4024SHua Tian   if (SchedInstrNum <= WindowRegionLimit) {
242b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
243b6bf4024SHua Tian     return false;
244b6bf4024SHua Tian   }
245b6bf4024SHua Tian   return true;
246b6bf4024SHua Tian }
247b6bf4024SHua Tian 
248b6bf4024SHua Tian void WindowScheduler::preProcess() {
249b6bf4024SHua Tian   // Prior to window scheduling, it's necessary to backup the original MBB,
250b6bf4024SHua Tian   // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
251b6bf4024SHua Tian   backupMBB();
252b6bf4024SHua Tian   generateTripleMBB();
253b6bf4024SHua Tian   TripleDAG->startBlock(MBB);
254b6bf4024SHua Tian   TripleDAG->enterRegion(
255b6bf4024SHua Tian       MBB, MBB->begin(), MBB->getFirstTerminator(),
256b6bf4024SHua Tian       std::distance(MBB->begin(), MBB->getFirstTerminator()));
257b6bf4024SHua Tian   TripleDAG->buildSchedGraph(Context->AA);
258b6bf4024SHua Tian }
259b6bf4024SHua Tian 
260b6bf4024SHua Tian void WindowScheduler::postProcess() {
261b6bf4024SHua Tian   // After window scheduling, it's necessary to clear the TripleDAG and restore
262b6bf4024SHua Tian   // to the original MBB.
263b6bf4024SHua Tian   TripleDAG->exitRegion();
264b6bf4024SHua Tian   TripleDAG->finishBlock();
265b6bf4024SHua Tian   restoreMBB();
266b6bf4024SHua Tian }
267b6bf4024SHua Tian 
268b6bf4024SHua Tian void WindowScheduler::backupMBB() {
269b6bf4024SHua Tian   for (auto &MI : MBB->instrs())
270b6bf4024SHua Tian     OriMIs.push_back(&MI);
271b6bf4024SHua Tian   // Remove MIs and the corresponding live intervals.
272b6bf4024SHua Tian   for (auto &MI : make_early_inc_range(*MBB)) {
273b6bf4024SHua Tian     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
274b6bf4024SHua Tian     MBB->remove(&MI);
275b6bf4024SHua Tian   }
276b6bf4024SHua Tian }
277b6bf4024SHua Tian 
278b6bf4024SHua Tian void WindowScheduler::restoreMBB() {
279b6bf4024SHua Tian   // Erase MIs and the corresponding live intervals.
280b6bf4024SHua Tian   for (auto &MI : make_early_inc_range(*MBB)) {
281b6bf4024SHua Tian     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
282b6bf4024SHua Tian     MI.eraseFromParent();
283b6bf4024SHua Tian   }
284b6bf4024SHua Tian   // Restore MBB to the state before window scheduling.
285b6bf4024SHua Tian   for (auto *MI : OriMIs)
286b6bf4024SHua Tian     MBB->push_back(MI);
287b6bf4024SHua Tian   updateLiveIntervals();
288b6bf4024SHua Tian }
289b6bf4024SHua Tian 
290b6bf4024SHua Tian void WindowScheduler::generateTripleMBB() {
291b6bf4024SHua Tian   const unsigned DuplicateNum = 3;
292b6bf4024SHua Tian   TriMIs.clear();
293b6bf4024SHua Tian   TriToOri.clear();
294b6bf4024SHua Tian   assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
295b6bf4024SHua Tian   // Step 1: Performing the first copy of MBB instructions, excluding
296b6bf4024SHua Tian   // terminators. At the same time, we back up the anti-register of phis.
297b6bf4024SHua Tian   // DefPairs hold the old and new define register pairs.
298b6bf4024SHua Tian   DenseMap<Register, Register> DefPairs;
299b6bf4024SHua Tian   for (auto *MI : OriMIs) {
300b6bf4024SHua Tian     if (MI->isMetaInstruction() || MI->isTerminator())
301b6bf4024SHua Tian       continue;
302b6bf4024SHua Tian     if (MI->isPHI())
303b6bf4024SHua Tian       if (Register AntiReg = getAntiRegister(MI))
304b6bf4024SHua Tian         DefPairs[MI->getOperand(0).getReg()] = AntiReg;
305b6bf4024SHua Tian     auto *NewMI = MF->CloneMachineInstr(MI);
306b6bf4024SHua Tian     MBB->push_back(NewMI);
307b6bf4024SHua Tian     TriMIs.push_back(NewMI);
308b6bf4024SHua Tian     TriToOri[NewMI] = MI;
309b6bf4024SHua Tian   }
310b6bf4024SHua Tian   // Step 2: Performing the remaining two copies of MBB instructions excluding
311b6bf4024SHua Tian   // phis, and the last one contains terminators. At the same time, registers
312b6bf4024SHua Tian   // are updated accordingly.
313b6bf4024SHua Tian   for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
314b6bf4024SHua Tian     for (auto *MI : OriMIs) {
315b6bf4024SHua Tian       if (MI->isPHI() || MI->isMetaInstruction() ||
316b6bf4024SHua Tian           (MI->isTerminator() && Cnt < DuplicateNum - 1))
317b6bf4024SHua Tian         continue;
318b6bf4024SHua Tian       auto *NewMI = MF->CloneMachineInstr(MI);
319b6bf4024SHua Tian       DenseMap<Register, Register> NewDefs;
320b6bf4024SHua Tian       // New defines are updated.
321b6bf4024SHua Tian       for (auto MO : NewMI->all_defs())
322b6bf4024SHua Tian         if (MO.isReg() && MO.getReg().isVirtual()) {
323b6bf4024SHua Tian           Register NewDef =
324b6bf4024SHua Tian               MRI->createVirtualRegister(MRI->getRegClass(MO.getReg()));
325b6bf4024SHua Tian           NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
326b6bf4024SHua Tian           NewDefs[MO.getReg()] = NewDef;
327b6bf4024SHua Tian         }
328b6bf4024SHua Tian       // New uses are updated.
329b6bf4024SHua Tian       for (auto DefRegPair : DefPairs)
330b6bf4024SHua Tian         if (NewMI->readsRegister(DefRegPair.first, TRI)) {
331b6bf4024SHua Tian           Register NewUse = DefRegPair.second;
332b6bf4024SHua Tian           // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
333b6bf4024SHua Tian           //
334b6bf4024SHua Tian           // BB.3:                                  DefPairs
335b6bf4024SHua Tian           // ==================================
336b6bf4024SHua Tian           // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]  (%1,%7)
337b6bf4024SHua Tian           // ...
338b6bf4024SHua Tian           // ==================================
339b6bf4024SHua Tian           // ...
340b6bf4024SHua Tian           // %4 = sub i32 %1, %3
341b6bf4024SHua Tian           // ...
342b6bf4024SHua Tian           // %7 = add i32 %5, %6
343b6bf4024SHua Tian           // ...
344b6bf4024SHua Tian           // ----------------------------------
345b6bf4024SHua Tian           // ...
346b6bf4024SHua Tian           // %8 = sub i32 %7, %3                    (%1,%7),(%4,%8)
347b6bf4024SHua Tian           // ...
348b6bf4024SHua Tian           // %9 = add i32 %5, %6                    (%1,%7),(%4,%8),(%7,%9)
349b6bf4024SHua Tian           // ...
350b6bf4024SHua Tian           // ----------------------------------
351b6bf4024SHua Tian           // ...
352b6bf4024SHua Tian           // %10 = sub i32 %9, %3                   (%1,%7),(%4,%10),(%7,%9)
353b6bf4024SHua Tian           // ...            ^
354b6bf4024SHua Tian           // %11 = add i32 %5, %6                   (%1,%7),(%4,%10),(%7,%11)
355b6bf4024SHua Tian           // ...
356b6bf4024SHua Tian           // ==================================
357b6bf4024SHua Tian           //          < Terminators >
358b6bf4024SHua Tian           // ==================================
359b6bf4024SHua Tian           if (DefPairs.count(NewUse))
360b6bf4024SHua Tian             NewUse = DefPairs[NewUse];
361b6bf4024SHua Tian           NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
362b6bf4024SHua Tian         }
363b6bf4024SHua Tian       // DefPairs is updated at last.
364b6bf4024SHua Tian       for (auto &NewDef : NewDefs)
365b6bf4024SHua Tian         DefPairs[NewDef.first] = NewDef.second;
366b6bf4024SHua Tian       MBB->push_back(NewMI);
367b6bf4024SHua Tian       TriMIs.push_back(NewMI);
368b6bf4024SHua Tian       TriToOri[NewMI] = MI;
369b6bf4024SHua Tian     }
370b6bf4024SHua Tian   }
371b6bf4024SHua Tian   // Step 3: The registers used by phis are updated, and they are generated in
372b6bf4024SHua Tian   // the third copy of MBB.
373b6bf4024SHua Tian   // In the privious example, the old phi is:
374b6bf4024SHua Tian   // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
375b6bf4024SHua Tian   // The new phi is:
376b6bf4024SHua Tian   // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
377b6bf4024SHua Tian   for (auto &Phi : MBB->phis()) {
378b6bf4024SHua Tian     for (auto DefRegPair : DefPairs)
379b6bf4024SHua Tian       if (Phi.readsRegister(DefRegPair.first, TRI))
380b6bf4024SHua Tian         Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
381b6bf4024SHua Tian   }
382b6bf4024SHua Tian   updateLiveIntervals();
383b6bf4024SHua Tian }
384b6bf4024SHua Tian 
385b6bf4024SHua Tian void WindowScheduler::restoreTripleMBB() {
386b6bf4024SHua Tian   // After list scheduling, the MBB is restored in one traversal.
387b6bf4024SHua Tian   for (size_t I = 0; I < TriMIs.size(); ++I) {
388b6bf4024SHua Tian     auto *MI = TriMIs[I];
389b6bf4024SHua Tian     auto OldPos = MBB->begin();
390b6bf4024SHua Tian     std::advance(OldPos, I);
391b6bf4024SHua Tian     auto CurPos = MI->getIterator();
392b6bf4024SHua Tian     if (CurPos != OldPos) {
393b6bf4024SHua Tian       MBB->splice(OldPos, MBB, CurPos);
394b6bf4024SHua Tian       Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
395b6bf4024SHua Tian     }
396b6bf4024SHua Tian   }
397b6bf4024SHua Tian }
398b6bf4024SHua Tian 
399b6bf4024SHua Tian SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum,
400b6bf4024SHua Tian                                                         unsigned SearchRatio) {
401b6bf4024SHua Tian   // We use SearchRatio to get the index range, and then evenly get the indexes
402b6bf4024SHua Tian   // according to the SearchNum. This is a simple huristic. Depending on the
403b6bf4024SHua Tian   // characteristics of the target, more complex algorithms can be used for both
404b6bf4024SHua Tian   // performance and compilation time.
405b6bf4024SHua Tian   assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
406b6bf4024SHua Tian   unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
407b6bf4024SHua Tian   unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
408b6bf4024SHua Tian   SmallVector<unsigned> SearchIndexes;
409b6bf4024SHua Tian   for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
410b6bf4024SHua Tian     SearchIndexes.push_back(Idx);
411b6bf4024SHua Tian   return SearchIndexes;
412b6bf4024SHua Tian }
413b6bf4024SHua Tian 
414b6bf4024SHua Tian int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) {
415b6bf4024SHua Tian   // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
416b6bf4024SHua Tian   unsigned MaxDepth = 1;
417b6bf4024SHua Tian   for (auto &SU : DAG.SUnits)
418b6bf4024SHua Tian     MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
419b6bf4024SHua Tian   return MaxDepth * WindowIICoeff;
420b6bf4024SHua Tian }
421b6bf4024SHua Tian 
422b6bf4024SHua Tian int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
423b6bf4024SHua Tian                                        unsigned Offset) {
424b6bf4024SHua Tian   int InitII = getEstimatedII(DAG);
425b6bf4024SHua Tian   ResourceManager RM(Subtarget, &DAG);
426b6bf4024SHua Tian   RM.init(InitII);
427b6bf4024SHua Tian   // ResourceManager and DAG are used to calculate the maximum cycle for the
428b6bf4024SHua Tian   // scheduled MIs. Since MIs in the Region have already been scheduled, the
429b6bf4024SHua Tian   // emit cycles can be estimated in order here.
430b6bf4024SHua Tian   int CurCycle = 0;
431b6bf4024SHua Tian   auto Range = getScheduleRange(Offset, SchedInstrNum);
432b6bf4024SHua Tian   for (auto &MI : Range) {
433b6bf4024SHua Tian     auto *SU = DAG.getSUnit(&MI);
434b6bf4024SHua Tian     int ExpectCycle = CurCycle;
435b6bf4024SHua Tian     // The predecessors of current MI determine its earliest issue cycle.
436b6bf4024SHua Tian     for (auto &Pred : SU->Preds) {
437355e4a9eSHua Tian       if (Pred.isWeak())
438355e4a9eSHua Tian         continue;
439b6bf4024SHua Tian       auto *PredMI = Pred.getSUnit()->getInstr();
440b6bf4024SHua Tian       int PredCycle = getOriCycle(PredMI);
441b6bf4024SHua Tian       ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
442b6bf4024SHua Tian     }
443d27ee36cSKai Yan     // Zero cost instructions do not need to check resource.
444d27ee36cSKai Yan     if (!TII->isZeroCost(MI.getOpcode())) {
445b6bf4024SHua Tian       // ResourceManager can be used to detect resource conflicts between the
446b6bf4024SHua Tian       // current MI and the previously inserted MIs.
447b6bf4024SHua Tian       while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
448b6bf4024SHua Tian         ++CurCycle;
449b6bf4024SHua Tian         if (CurCycle == (int)WindowIILimit)
450b6bf4024SHua Tian           return CurCycle;
451b6bf4024SHua Tian       }
452b6bf4024SHua Tian       RM.reserveResources(*SU, CurCycle);
453d27ee36cSKai Yan     }
454b6bf4024SHua Tian     OriToCycle[getOriMI(&MI)] = CurCycle;
455b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
456b6bf4024SHua Tian                       << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
457b6bf4024SHua Tian   }
458b6bf4024SHua Tian   LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
459b6bf4024SHua Tian   return CurCycle;
460b6bf4024SHua Tian }
461b6bf4024SHua Tian 
462b6bf4024SHua Tian // By utilizing TripleDAG, we can easily establish dependencies between A and B.
463b6bf4024SHua Tian // Based on the MaxCycle and the issue cycle of A and B, we can determine
464b6bf4024SHua Tian // whether it is necessary to add a stall cycle. This is because, without
465b6bf4024SHua Tian // inserting the stall cycle, the latency constraint between A and B cannot be
466b6bf4024SHua Tian // satisfied. The details are as follows:
467b6bf4024SHua Tian //
468b6bf4024SHua Tian // New MBB:
469b6bf4024SHua Tian // ========================================
470b6bf4024SHua Tian //                 < Phis >
471b6bf4024SHua Tian // ========================================     (sliding direction)
472b6bf4024SHua Tian // MBB copy 1                                            |
473b6bf4024SHua Tian //                                                       V
474b6bf4024SHua Tian //
475b6bf4024SHua Tian // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~  ----schedule window-----
476b6bf4024SHua Tian //                    |                                  |
477b6bf4024SHua Tian // ===================V====================              |
478b6bf4024SHua Tian // MBB copy 2      < MI B >                              |
479b6bf4024SHua Tian //                                                       |
480b6bf4024SHua Tian //                 < MI A >                              V
481b6bf4024SHua Tian // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~  ------------------------
482b6bf4024SHua Tian //                    :
483b6bf4024SHua Tian // ===================V====================
484b6bf4024SHua Tian // MBB copy 3      < MI B'>
485b6bf4024SHua Tian //
486b6bf4024SHua Tian //
487b6bf4024SHua Tian //
488b6bf4024SHua Tian //
489b6bf4024SHua Tian // ========================================
490b6bf4024SHua Tian //              < Terminators >
491b6bf4024SHua Tian // ========================================
492b6bf4024SHua Tian int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
493b6bf4024SHua Tian   int MaxStallCycle = 0;
494*90a99798SKai Yan   int CurrentII = MaxCycle + 1;
495b6bf4024SHua Tian   auto Range = getScheduleRange(Offset, SchedInstrNum);
496b6bf4024SHua Tian   for (auto &MI : Range) {
497b6bf4024SHua Tian     auto *SU = TripleDAG->getSUnit(&MI);
498b6bf4024SHua Tian     int DefCycle = getOriCycle(&MI);
499b6bf4024SHua Tian     for (auto &Succ : SU->Succs) {
500355e4a9eSHua Tian       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
501b6bf4024SHua Tian         continue;
502*90a99798SKai Yan       // If the expected cycle does not exceed CurrentII, no check is needed.
503*90a99798SKai Yan       if (DefCycle + (int)Succ.getLatency() <= CurrentII)
504b6bf4024SHua Tian         continue;
505b6bf4024SHua Tian       // If the cycle of the scheduled MI A is less than that of the scheduled
506b6bf4024SHua Tian       // MI B, the scheduling will fail because the lifetime of the
507b6bf4024SHua Tian       // corresponding register exceeds II.
508b6bf4024SHua Tian       auto *SuccMI = Succ.getSUnit()->getInstr();
509b6bf4024SHua Tian       int UseCycle = getOriCycle(SuccMI);
510b6bf4024SHua Tian       if (DefCycle < UseCycle)
511b6bf4024SHua Tian         return WindowIILimit;
512b6bf4024SHua Tian       // Get the stall cycle introduced by the register between two trips.
513*90a99798SKai Yan       int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
514b6bf4024SHua Tian       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
515b6bf4024SHua Tian     }
516b6bf4024SHua Tian   }
517b6bf4024SHua Tian   LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
518b6bf4024SHua Tian   return MaxStallCycle;
519b6bf4024SHua Tian }
520b6bf4024SHua Tian 
521b6bf4024SHua Tian unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) {
522b6bf4024SHua Tian   LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
523b6bf4024SHua Tian   int MaxCycle = calculateMaxCycle(DAG, Offset);
524b6bf4024SHua Tian   if (MaxCycle == (int)WindowIILimit)
525b6bf4024SHua Tian     return MaxCycle;
526b6bf4024SHua Tian   int StallCycle = calculateStallCycle(Offset, MaxCycle);
527b6bf4024SHua Tian   if (StallCycle == (int)WindowIILimit)
528b6bf4024SHua Tian     return StallCycle;
529b6bf4024SHua Tian   // The value of II is equal to the maximum execution cycle plus 1.
530b6bf4024SHua Tian   return MaxCycle + StallCycle + 1;
531b6bf4024SHua Tian }
532b6bf4024SHua Tian 
533b6bf4024SHua Tian void WindowScheduler::schedulePhi(int Offset, unsigned &II) {
534b6bf4024SHua Tian   LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
535b6bf4024SHua Tian   for (auto &Phi : MBB->phis()) {
536b6bf4024SHua Tian     int LateCycle = INT_MAX;
537b6bf4024SHua Tian     auto *SU = TripleDAG->getSUnit(&Phi);
538b6bf4024SHua Tian     for (auto &Succ : SU->Succs) {
539b6bf4024SHua Tian       // Phi doesn't have any Anti successors.
540b6bf4024SHua Tian       if (Succ.getKind() != SDep::Data)
541b6bf4024SHua Tian         continue;
542b6bf4024SHua Tian       // Phi is scheduled before the successor of stage 0. The issue cycle of
543b6bf4024SHua Tian       // phi is the latest cycle in this interval.
544b6bf4024SHua Tian       auto *SuccMI = Succ.getSUnit()->getInstr();
545b6bf4024SHua Tian       int Cycle = getOriCycle(SuccMI);
546b6bf4024SHua Tian       if (getOriStage(getOriMI(SuccMI), Offset) == 0)
547b6bf4024SHua Tian         LateCycle = std::min(LateCycle, Cycle);
548b6bf4024SHua Tian     }
549b6bf4024SHua Tian     // The anti-dependency of phi need to be handled separately in the same way.
550b6bf4024SHua Tian     if (Register AntiReg = getAntiRegister(&Phi)) {
551b6bf4024SHua Tian       auto *AntiMI = MRI->getVRegDef(AntiReg);
552dd1d2b7cSHua Tian       // AntiReg may be defined outside the kernel MBB.
553dd1d2b7cSHua Tian       if (AntiMI->getParent() == MBB) {
554b6bf4024SHua Tian         auto AntiCycle = getOriCycle(AntiMI);
555b6bf4024SHua Tian         if (getOriStage(getOriMI(AntiMI), Offset) == 0)
556b6bf4024SHua Tian           LateCycle = std::min(LateCycle, AntiCycle);
557b6bf4024SHua Tian       }
558dd1d2b7cSHua Tian     }
559b6bf4024SHua Tian     // If there is no limit to the late cycle, a default value is given.
560b6bf4024SHua Tian     if (LateCycle == INT_MAX)
561b6bf4024SHua Tian       LateCycle = (int)(II - 1);
562b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
563b6bf4024SHua Tian     // The issue cycle of phi is set to the latest cycle in the interval.
564b6bf4024SHua Tian     auto *OriPhi = getOriMI(&Phi);
565b6bf4024SHua Tian     OriToCycle[OriPhi] = LateCycle;
566b6bf4024SHua Tian   }
567b6bf4024SHua Tian }
568b6bf4024SHua Tian 
569b6bf4024SHua Tian DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset,
570b6bf4024SHua Tian                                                              unsigned II) {
571b6bf4024SHua Tian   // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
572b6bf4024SHua Tian   // way is to put phi at the beginning of the current cycle.
573b6bf4024SHua Tian   DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs;
574b6bf4024SHua Tian   auto Range = getScheduleRange(Offset, SchedInstrNum);
575b6bf4024SHua Tian   for (auto &Phi : MBB->phis())
576b6bf4024SHua Tian     CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
577b6bf4024SHua Tian   for (auto &MI : Range)
578b6bf4024SHua Tian     CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
579b6bf4024SHua Tian   // Each MI is assigned a separate ordered Id, which is used as a sort marker
580b6bf4024SHua Tian   // in the following expand process.
581b6bf4024SHua Tian   DenseMap<MachineInstr *, int> IssueOrder;
582b6bf4024SHua Tian   int Id = 0;
583b6bf4024SHua Tian   for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
584b6bf4024SHua Tian     if (!CycleToMIs.count(Cycle))
585b6bf4024SHua Tian       continue;
586b6bf4024SHua Tian     for (auto *MI : CycleToMIs[Cycle])
587b6bf4024SHua Tian       IssueOrder[MI] = Id++;
588b6bf4024SHua Tian   }
589b6bf4024SHua Tian   return IssueOrder;
590b6bf4024SHua Tian }
591b6bf4024SHua Tian 
592b6bf4024SHua Tian void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) {
593b6bf4024SHua Tian   // At the first update, Offset is equal to SchedPhiNum. At this time, only
594b6bf4024SHua Tian   // BestII, BestOffset, and BaseII need to be updated.
595b6bf4024SHua Tian   if (Offset == SchedPhiNum) {
596b6bf4024SHua Tian     BestII = II;
597b6bf4024SHua Tian     BestOffset = SchedPhiNum;
598b6bf4024SHua Tian     BaseII = II;
599b6bf4024SHua Tian     return;
600b6bf4024SHua Tian   }
601b6bf4024SHua Tian   // The update will only continue if the II is smaller than BestII and the II
602b6bf4024SHua Tian   // is sufficiently small.
603b6bf4024SHua Tian   if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
604b6bf4024SHua Tian     return;
605b6bf4024SHua Tian   BestII = II;
606b6bf4024SHua Tian   BestOffset = Offset;
607b6bf4024SHua Tian   // Record the result of the current list scheduling, noting that each MI is
608b6bf4024SHua Tian   // stored unordered in SchedResult.
609b6bf4024SHua Tian   SchedResult.clear();
610b6bf4024SHua Tian   auto IssueOrder = getIssueOrder(Offset, II);
611b6bf4024SHua Tian   for (auto &Pair : OriToCycle) {
612b6bf4024SHua Tian     assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
613b6bf4024SHua Tian     SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
614b6bf4024SHua Tian                                           getOriStage(Pair.first, Offset),
615b6bf4024SHua Tian                                           IssueOrder[Pair.first]));
616b6bf4024SHua Tian   }
617b6bf4024SHua Tian }
618b6bf4024SHua Tian 
619b6bf4024SHua Tian void WindowScheduler::expand() {
620b6bf4024SHua Tian   // The MIs in the SchedResult are sorted by the issue order ID.
621b6bf4024SHua Tian   llvm::stable_sort(SchedResult,
622b6bf4024SHua Tian                     [](const std::tuple<MachineInstr *, int, int, int> &A,
623b6bf4024SHua Tian                        const std::tuple<MachineInstr *, int, int, int> &B) {
624b6bf4024SHua Tian                       return std::get<3>(A) < std::get<3>(B);
625b6bf4024SHua Tian                     });
626b6bf4024SHua Tian   // Use the scheduling infrastructure for expansion, noting that InstrChanges
627b6bf4024SHua Tian   // is not supported here.
628b6bf4024SHua Tian   DenseMap<MachineInstr *, int> Cycles, Stages;
629b6bf4024SHua Tian   std::vector<MachineInstr *> OrderedInsts;
630b6bf4024SHua Tian   for (auto &Info : SchedResult) {
631b6bf4024SHua Tian     auto *MI = std::get<0>(Info);
632b6bf4024SHua Tian     OrderedInsts.push_back(MI);
633b6bf4024SHua Tian     Cycles[MI] = std::get<1>(Info);
634b6bf4024SHua Tian     Stages[MI] = std::get<2>(Info);
635b6bf4024SHua Tian     LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
636b6bf4024SHua Tian                       << "]: " << *MI);
637b6bf4024SHua Tian   }
638b6bf4024SHua Tian   ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
639b6bf4024SHua Tian                     std::move(Stages));
640b6bf4024SHua Tian   ModuloScheduleExpander MSE(*MF, MS, *Context->LIS,
641b6bf4024SHua Tian                              ModuloScheduleExpander::InstrChangesTy());
642b6bf4024SHua Tian   MSE.expand();
643b6bf4024SHua Tian   MSE.cleanup();
644b6bf4024SHua Tian }
645b6bf4024SHua Tian 
646b6bf4024SHua Tian void WindowScheduler::updateLiveIntervals() {
647b6bf4024SHua Tian   SmallVector<Register, 128> UsedRegs;
648b6bf4024SHua Tian   for (MachineInstr &MI : *MBB)
649b6bf4024SHua Tian     for (const MachineOperand &MO : MI.operands()) {
650b6bf4024SHua Tian       if (!MO.isReg() || MO.getReg() == 0)
651b6bf4024SHua Tian         continue;
652b6bf4024SHua Tian       Register Reg = MO.getReg();
653b6bf4024SHua Tian       if (!is_contained(UsedRegs, Reg))
654b6bf4024SHua Tian         UsedRegs.push_back(Reg);
655b6bf4024SHua Tian     }
656b6bf4024SHua Tian   Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
657b6bf4024SHua Tian }
658b6bf4024SHua Tian 
659b6bf4024SHua Tian iterator_range<MachineBasicBlock::iterator>
660b6bf4024SHua Tian WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) {
661b6bf4024SHua Tian   auto RegionBegin = MBB->begin();
662b6bf4024SHua Tian   std::advance(RegionBegin, Offset);
663b6bf4024SHua Tian   auto RegionEnd = RegionBegin;
664b6bf4024SHua Tian   std::advance(RegionEnd, Num);
665b6bf4024SHua Tian   return make_range(RegionBegin, RegionEnd);
666b6bf4024SHua Tian }
667b6bf4024SHua Tian 
668b6bf4024SHua Tian int WindowScheduler::getOriCycle(MachineInstr *NewMI) {
669b6bf4024SHua Tian   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
670b6bf4024SHua Tian   auto *OriMI = TriToOri[NewMI];
671b6bf4024SHua Tian   assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
672b6bf4024SHua Tian   return OriToCycle[OriMI];
673b6bf4024SHua Tian }
674b6bf4024SHua Tian 
675b6bf4024SHua Tian MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) {
676b6bf4024SHua Tian   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
677b6bf4024SHua Tian   return TriToOri[NewMI];
678b6bf4024SHua Tian }
679b6bf4024SHua Tian 
680b6bf4024SHua Tian unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) {
681b6bf4024SHua Tian   assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
682b6bf4024SHua Tian          "Cannot find OriMI in OriMIs!");
683b6bf4024SHua Tian   // If there is no instruction fold, all MI stages are 0.
684b6bf4024SHua Tian   if (Offset == SchedPhiNum)
685b6bf4024SHua Tian     return 0;
686b6bf4024SHua Tian   // For those MIs with an ID less than the Offset, their stages are set to 0,
687b6bf4024SHua Tian   // while the rest are set to 1.
688b6bf4024SHua Tian   unsigned Id = 0;
689b6bf4024SHua Tian   for (auto *MI : OriMIs) {
690b6bf4024SHua Tian     if (MI->isMetaInstruction())
691b6bf4024SHua Tian       continue;
692b6bf4024SHua Tian     if (MI == OriMI)
693b6bf4024SHua Tian       break;
694b6bf4024SHua Tian     ++Id;
695b6bf4024SHua Tian   }
696b6bf4024SHua Tian   return Id >= (size_t)Offset ? 1 : 0;
697b6bf4024SHua Tian }
698b6bf4024SHua Tian 
699b6bf4024SHua Tian Register WindowScheduler::getAntiRegister(MachineInstr *Phi) {
700b6bf4024SHua Tian   assert(Phi->isPHI() && "Expecting PHI!");
701b6bf4024SHua Tian   Register AntiReg;
702b6bf4024SHua Tian   for (auto MO : Phi->uses()) {
703b6bf4024SHua Tian     if (MO.isReg())
704b6bf4024SHua Tian       AntiReg = MO.getReg();
705dd1d2b7cSHua Tian     else if (MO.isMBB() && MO.getMBB() == MBB)
706b6bf4024SHua Tian       return AntiReg;
707b6bf4024SHua Tian   }
708b6bf4024SHua Tian   return 0;
709b6bf4024SHua Tian }
710