1b6bf4024SHua Tian //======----------- WindowScheduler.cpp - window scheduler -------------======// 2b6bf4024SHua Tian // 3b6bf4024SHua Tian // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4b6bf4024SHua Tian // See https://llvm.org/LICENSE.txt for license information. 5b6bf4024SHua Tian // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6b6bf4024SHua Tian // 7b6bf4024SHua Tian //===----------------------------------------------------------------------===// 8b6bf4024SHua Tian // 9b6bf4024SHua Tian // An implementation of the Window Scheduling software pipelining algorithm. 10b6bf4024SHua Tian // 11b6bf4024SHua Tian // The fundamental concept of the window scheduling algorithm involves folding 12b6bf4024SHua Tian // the original MBB at a specific position, followed by list scheduling on the 13b6bf4024SHua Tian // folded MIs. The optimal scheduling result is then chosen from various folding 14b6bf4024SHua Tian // positions as the final scheduling outcome. 15b6bf4024SHua Tian // 16b6bf4024SHua Tian // The primary challenge in this algorithm lies in generating the folded MIs and 17b6bf4024SHua Tian // establishing their dependencies. We have innovatively employed a new MBB, 18b6bf4024SHua Tian // created by copying the original MBB three times, known as TripleMBB. This 19b6bf4024SHua Tian // TripleMBB enables the convenient implementation of MI folding and dependency 20b6bf4024SHua Tian // establishment. To facilitate the algorithm's implementation, we have also 21b6bf4024SHua Tian // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle. 22b6bf4024SHua Tian // 23b6bf4024SHua Tian // Another challenge in the algorithm is the scheduling of phis. Semantically, 24b6bf4024SHua Tian // it is difficult to place the phis in the window and perform list scheduling. 25b6bf4024SHua Tian // Therefore, we schedule these phis separately after each list scheduling. 26b6bf4024SHua Tian // 27b6bf4024SHua Tian // The provided implementation is designed for use before the Register Allocator 28b6bf4024SHua Tian // (RA). If the target requires implementation after RA, it is recommended to 29b6bf4024SHua Tian // reimplement analyseII(), schedulePhi(), and expand(). Additionally, 30b6bf4024SHua Tian // target-specific logic can be added in initialize(), preProcess(), and 31b6bf4024SHua Tian // postProcess(). 32b6bf4024SHua Tian // 33b6bf4024SHua Tian // Lastly, it is worth mentioning that getSearchIndexes() is an important 34b6bf4024SHua Tian // function. We have experimented with more complex heuristics on downstream 35b6bf4024SHua Tian // target and achieved favorable results. 36b6bf4024SHua Tian // 37b6bf4024SHua Tian //===----------------------------------------------------------------------===// 38b6bf4024SHua Tian #include "llvm/CodeGen/WindowScheduler.h" 39b6bf4024SHua Tian #include "llvm/ADT/Statistic.h" 40b6bf4024SHua Tian #include "llvm/CodeGen/LiveIntervals.h" 41b6bf4024SHua Tian #include "llvm/CodeGen/MachineLoopInfo.h" 42b6bf4024SHua Tian #include "llvm/CodeGen/MachinePipeliner.h" 43b6bf4024SHua Tian #include "llvm/CodeGen/ModuloSchedule.h" 44b6bf4024SHua Tian #include "llvm/CodeGen/TargetPassConfig.h" 45b6bf4024SHua Tian #include "llvm/Support/CommandLine.h" 46b6bf4024SHua Tian #include "llvm/Support/Debug.h" 47b6bf4024SHua Tian #include "llvm/Support/TimeProfiler.h" 48b6bf4024SHua Tian 49b6bf4024SHua Tian using namespace llvm; 50b6bf4024SHua Tian 51b6bf4024SHua Tian #define DEBUG_TYPE "pipeliner" 52b6bf4024SHua Tian 53b6bf4024SHua Tian namespace { 54b6bf4024SHua Tian STATISTIC(NumTryWindowSchedule, 55b6bf4024SHua Tian "Number of loops that we attempt to use window scheduling"); 56b6bf4024SHua Tian STATISTIC(NumTryWindowSearch, 57b6bf4024SHua Tian "Number of times that we run list schedule in the window scheduling"); 58b6bf4024SHua Tian STATISTIC(NumWindowSchedule, 59b6bf4024SHua Tian "Number of loops that we successfully use window scheduling"); 60b6bf4024SHua Tian STATISTIC(NumFailAnalyseII, 61b6bf4024SHua Tian "Window scheduling abort due to the failure of the II analysis"); 62b6bf4024SHua Tian 63b6bf4024SHua Tian cl::opt<unsigned> 64b6bf4024SHua Tian WindowSearchNum("window-search-num", 65b6bf4024SHua Tian cl::desc("The number of searches per loop in the window " 66b6bf4024SHua Tian "algorithm. 0 means no search number limit."), 67b6bf4024SHua Tian cl::Hidden, cl::init(6)); 68b6bf4024SHua Tian 69b6bf4024SHua Tian cl::opt<unsigned> WindowSearchRatio( 70b6bf4024SHua Tian "window-search-ratio", 71b6bf4024SHua Tian cl::desc("The ratio of searches per loop in the window algorithm. 100 " 72b6bf4024SHua Tian "means search all positions in the loop, while 0 means not " 73b6bf4024SHua Tian "performing any search."), 74b6bf4024SHua Tian cl::Hidden, cl::init(40)); 75b6bf4024SHua Tian 76b6bf4024SHua Tian cl::opt<unsigned> WindowIICoeff( 77b6bf4024SHua Tian "window-ii-coeff", 78b6bf4024SHua Tian cl::desc( 79b6bf4024SHua Tian "The coefficient used when initializing II in the window algorithm."), 80b6bf4024SHua Tian cl::Hidden, cl::init(5)); 81b6bf4024SHua Tian 82b6bf4024SHua Tian cl::opt<unsigned> WindowRegionLimit( 83b6bf4024SHua Tian "window-region-limit", 84b6bf4024SHua Tian cl::desc( 85b6bf4024SHua Tian "The lower limit of the scheduling region in the window algorithm."), 86b6bf4024SHua Tian cl::Hidden, cl::init(3)); 87b6bf4024SHua Tian 88b6bf4024SHua Tian cl::opt<unsigned> WindowDiffLimit( 89b6bf4024SHua Tian "window-diff-limit", 90b6bf4024SHua Tian cl::desc("The lower limit of the difference between best II and base II in " 91b6bf4024SHua Tian "the window algorithm. If the difference is smaller than " 92b6bf4024SHua Tian "this lower limit, window scheduling will not be performed."), 93b6bf4024SHua Tian cl::Hidden, cl::init(2)); 94b6bf4024SHua Tian } // namespace 95b6bf4024SHua Tian 96b6bf4024SHua Tian // WindowIILimit serves as an indicator of abnormal scheduling results and could 97b6bf4024SHua Tian // potentially be referenced by the derived target window scheduler. 98b6bf4024SHua Tian cl::opt<unsigned> 99b6bf4024SHua Tian WindowIILimit("window-ii-limit", 100b6bf4024SHua Tian cl::desc("The upper limit of II in the window algorithm."), 101b6bf4024SHua Tian cl::Hidden, cl::init(1000)); 102b6bf4024SHua Tian 103b6bf4024SHua Tian WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML) 104b6bf4024SHua Tian : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML), 105b6bf4024SHua Tian Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()), 106b6bf4024SHua Tian TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) { 107b6bf4024SHua Tian TripleDAG = std::unique_ptr<ScheduleDAGInstrs>( 108b6bf4024SHua Tian createMachineScheduler(/*OnlyBuildGraph=*/true)); 109b6bf4024SHua Tian } 110b6bf4024SHua Tian 111b6bf4024SHua Tian bool WindowScheduler::run() { 112b6bf4024SHua Tian if (!initialize()) { 113b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n"); 114b6bf4024SHua Tian return false; 115b6bf4024SHua Tian } 116b6bf4024SHua Tian // The window algorithm is time-consuming, and its compilation time should be 117b6bf4024SHua Tian // taken into consideration. 118b6bf4024SHua Tian TimeTraceScope Scope("WindowSearch"); 119b6bf4024SHua Tian ++NumTryWindowSchedule; 120b6bf4024SHua Tian // Performing the relevant processing before window scheduling. 121b6bf4024SHua Tian preProcess(); 122b6bf4024SHua Tian // The main window scheduling begins. 123b6bf4024SHua Tian std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler()); 124b6bf4024SHua Tian auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio); 125b6bf4024SHua Tian for (unsigned Idx : SearchIndexes) { 126b6bf4024SHua Tian OriToCycle.clear(); 127b6bf4024SHua Tian ++NumTryWindowSearch; 128b6bf4024SHua Tian // The scheduling starts with non-phi instruction, so SchedPhiNum needs to 129b6bf4024SHua Tian // be added to Idx. 130b6bf4024SHua Tian unsigned Offset = Idx + SchedPhiNum; 131b6bf4024SHua Tian auto Range = getScheduleRange(Offset, SchedInstrNum); 132b6bf4024SHua Tian SchedDAG->startBlock(MBB); 133b6bf4024SHua Tian SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum); 134b6bf4024SHua Tian SchedDAG->schedule(); 135b6bf4024SHua Tian LLVM_DEBUG(SchedDAG->dump()); 136b6bf4024SHua Tian unsigned II = analyseII(*SchedDAG, Offset); 137b6bf4024SHua Tian if (II == WindowIILimit) { 138b6bf4024SHua Tian restoreTripleMBB(); 139b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n"); 140b6bf4024SHua Tian ++NumFailAnalyseII; 141b6bf4024SHua Tian continue; 142b6bf4024SHua Tian } 143b6bf4024SHua Tian schedulePhi(Offset, II); 144b6bf4024SHua Tian updateScheduleResult(Offset, II); 145b6bf4024SHua Tian restoreTripleMBB(); 146b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is " 147b6bf4024SHua Tian << II << ".\n"); 148b6bf4024SHua Tian } 149b6bf4024SHua Tian // Performing the relevant processing after window scheduling. 150b6bf4024SHua Tian postProcess(); 151b6bf4024SHua Tian // Check whether the scheduling result is valid. 152b6bf4024SHua Tian if (!isScheduleValid()) { 153b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n"); 154b6bf4024SHua Tian return false; 155b6bf4024SHua Tian } 156b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset 157b6bf4024SHua Tian << " and Best II is " << BestII << ".\n"); 158b6bf4024SHua Tian // Expand the scheduling result to prologue, kernel, and epilogue. 159b6bf4024SHua Tian expand(); 160b6bf4024SHua Tian ++NumWindowSchedule; 161b6bf4024SHua Tian return true; 162b6bf4024SHua Tian } 163b6bf4024SHua Tian 164b6bf4024SHua Tian ScheduleDAGInstrs * 165b6bf4024SHua Tian WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) { 166b6bf4024SHua Tian return OnlyBuildGraph 167b6bf4024SHua Tian ? new ScheduleDAGMI( 168b6bf4024SHua Tian Context, std::make_unique<PostGenericScheduler>(Context), 169b6bf4024SHua Tian true) 170b6bf4024SHua Tian : Context->PassConfig->createMachineScheduler(Context); 171b6bf4024SHua Tian } 172b6bf4024SHua Tian 173b6bf4024SHua Tian bool WindowScheduler::initialize() { 174b6bf4024SHua Tian if (!Subtarget->enableWindowScheduler()) { 175b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n"); 176b6bf4024SHua Tian return false; 177b6bf4024SHua Tian } 178b6bf4024SHua Tian // Initialized the member variables used by window algorithm. 179b6bf4024SHua Tian OriMIs.clear(); 180b6bf4024SHua Tian TriMIs.clear(); 181b6bf4024SHua Tian TriToOri.clear(); 182b6bf4024SHua Tian OriToCycle.clear(); 183b6bf4024SHua Tian SchedResult.clear(); 184b6bf4024SHua Tian SchedPhiNum = 0; 185b6bf4024SHua Tian SchedInstrNum = 0; 186b6bf4024SHua Tian BestII = UINT_MAX; 187b6bf4024SHua Tian BestOffset = 0; 188b6bf4024SHua Tian BaseII = 0; 189b6bf4024SHua Tian // List scheduling used in the window algorithm depends on LiveIntervals. 190b6bf4024SHua Tian if (!Context->LIS) { 191b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n"); 192b6bf4024SHua Tian return false; 193b6bf4024SHua Tian } 194b6bf4024SHua Tian // Check each MI in MBB. 195811e505cSHua Tian SmallSet<Register, 8> PrevDefs; 196811e505cSHua Tian SmallSet<Register, 8> PrevUses; 197811e505cSHua Tian auto IsLoopCarried = [&](MachineInstr &Phi) { 198811e505cSHua Tian // Two cases are checked here: (1)The virtual register defined by the 199811e505cSHua Tian // preceding phi is used by the succeeding phi;(2)The preceding phi uses the 200811e505cSHua Tian // virtual register defined by the succeeding phi. 201811e505cSHua Tian if (PrevUses.count(Phi.getOperand(0).getReg())) 202811e505cSHua Tian return true; 203811e505cSHua Tian PrevDefs.insert(Phi.getOperand(0).getReg()); 204811e505cSHua Tian for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { 205811e505cSHua Tian if (PrevDefs.count(Phi.getOperand(I).getReg())) 206811e505cSHua Tian return true; 207811e505cSHua Tian PrevUses.insert(Phi.getOperand(I).getReg()); 208811e505cSHua Tian } 209811e505cSHua Tian return false; 210811e505cSHua Tian }; 211b6bf4024SHua Tian auto PLI = TII->analyzeLoopForPipelining(MBB); 212b6bf4024SHua Tian for (auto &MI : *MBB) { 213b6bf4024SHua Tian if (MI.isMetaInstruction() || MI.isTerminator()) 214b6bf4024SHua Tian continue; 215b6bf4024SHua Tian if (MI.isPHI()) { 216811e505cSHua Tian if (IsLoopCarried(MI)) { 217811e505cSHua Tian LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n"); 218b6bf4024SHua Tian return false; 219b6bf4024SHua Tian } 220b6bf4024SHua Tian ++SchedPhiNum; 221b6bf4024SHua Tian ++BestOffset; 222b6bf4024SHua Tian } else 223b6bf4024SHua Tian ++SchedInstrNum; 224b6bf4024SHua Tian if (TII->isSchedulingBoundary(MI, MBB, *MF)) { 225b6bf4024SHua Tian LLVM_DEBUG( 226b6bf4024SHua Tian dbgs() << "Boundary MI is not allowed in window scheduling!\n"); 227b6bf4024SHua Tian return false; 228b6bf4024SHua Tian } 229b6bf4024SHua Tian if (PLI->shouldIgnoreForPipelining(&MI)) { 230b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in " 231b6bf4024SHua Tian "window scheduling!\n"); 232b6bf4024SHua Tian return false; 233b6bf4024SHua Tian } 234b6bf4024SHua Tian for (auto &Def : MI.all_defs()) 2352d6ff0c5SKai Yan if (Def.isReg() && Def.getReg().isPhysical()) { 2362d6ff0c5SKai Yan LLVM_DEBUG(dbgs() << "Physical registers are not supported in " 2372d6ff0c5SKai Yan "window scheduling!\n"); 238b6bf4024SHua Tian return false; 239b6bf4024SHua Tian } 2402d6ff0c5SKai Yan } 241b6bf4024SHua Tian if (SchedInstrNum <= WindowRegionLimit) { 242b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); 243b6bf4024SHua Tian return false; 244b6bf4024SHua Tian } 245b6bf4024SHua Tian return true; 246b6bf4024SHua Tian } 247b6bf4024SHua Tian 248b6bf4024SHua Tian void WindowScheduler::preProcess() { 249b6bf4024SHua Tian // Prior to window scheduling, it's necessary to backup the original MBB, 250b6bf4024SHua Tian // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB. 251b6bf4024SHua Tian backupMBB(); 252b6bf4024SHua Tian generateTripleMBB(); 253b6bf4024SHua Tian TripleDAG->startBlock(MBB); 254b6bf4024SHua Tian TripleDAG->enterRegion( 255b6bf4024SHua Tian MBB, MBB->begin(), MBB->getFirstTerminator(), 256b6bf4024SHua Tian std::distance(MBB->begin(), MBB->getFirstTerminator())); 257b6bf4024SHua Tian TripleDAG->buildSchedGraph(Context->AA); 258b6bf4024SHua Tian } 259b6bf4024SHua Tian 260b6bf4024SHua Tian void WindowScheduler::postProcess() { 261b6bf4024SHua Tian // After window scheduling, it's necessary to clear the TripleDAG and restore 262b6bf4024SHua Tian // to the original MBB. 263b6bf4024SHua Tian TripleDAG->exitRegion(); 264b6bf4024SHua Tian TripleDAG->finishBlock(); 265b6bf4024SHua Tian restoreMBB(); 266b6bf4024SHua Tian } 267b6bf4024SHua Tian 268b6bf4024SHua Tian void WindowScheduler::backupMBB() { 269b6bf4024SHua Tian for (auto &MI : MBB->instrs()) 270b6bf4024SHua Tian OriMIs.push_back(&MI); 271b6bf4024SHua Tian // Remove MIs and the corresponding live intervals. 272b6bf4024SHua Tian for (auto &MI : make_early_inc_range(*MBB)) { 273b6bf4024SHua Tian Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); 274b6bf4024SHua Tian MBB->remove(&MI); 275b6bf4024SHua Tian } 276b6bf4024SHua Tian } 277b6bf4024SHua Tian 278b6bf4024SHua Tian void WindowScheduler::restoreMBB() { 279b6bf4024SHua Tian // Erase MIs and the corresponding live intervals. 280b6bf4024SHua Tian for (auto &MI : make_early_inc_range(*MBB)) { 281b6bf4024SHua Tian Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); 282b6bf4024SHua Tian MI.eraseFromParent(); 283b6bf4024SHua Tian } 284b6bf4024SHua Tian // Restore MBB to the state before window scheduling. 285b6bf4024SHua Tian for (auto *MI : OriMIs) 286b6bf4024SHua Tian MBB->push_back(MI); 287b6bf4024SHua Tian updateLiveIntervals(); 288b6bf4024SHua Tian } 289b6bf4024SHua Tian 290b6bf4024SHua Tian void WindowScheduler::generateTripleMBB() { 291b6bf4024SHua Tian const unsigned DuplicateNum = 3; 292b6bf4024SHua Tian TriMIs.clear(); 293b6bf4024SHua Tian TriToOri.clear(); 294b6bf4024SHua Tian assert(OriMIs.size() > 0 && "The Original MIs were not backed up!"); 295b6bf4024SHua Tian // Step 1: Performing the first copy of MBB instructions, excluding 296b6bf4024SHua Tian // terminators. At the same time, we back up the anti-register of phis. 297b6bf4024SHua Tian // DefPairs hold the old and new define register pairs. 298b6bf4024SHua Tian DenseMap<Register, Register> DefPairs; 299b6bf4024SHua Tian for (auto *MI : OriMIs) { 300b6bf4024SHua Tian if (MI->isMetaInstruction() || MI->isTerminator()) 301b6bf4024SHua Tian continue; 302b6bf4024SHua Tian if (MI->isPHI()) 303b6bf4024SHua Tian if (Register AntiReg = getAntiRegister(MI)) 304b6bf4024SHua Tian DefPairs[MI->getOperand(0).getReg()] = AntiReg; 305b6bf4024SHua Tian auto *NewMI = MF->CloneMachineInstr(MI); 306b6bf4024SHua Tian MBB->push_back(NewMI); 307b6bf4024SHua Tian TriMIs.push_back(NewMI); 308b6bf4024SHua Tian TriToOri[NewMI] = MI; 309b6bf4024SHua Tian } 310b6bf4024SHua Tian // Step 2: Performing the remaining two copies of MBB instructions excluding 311b6bf4024SHua Tian // phis, and the last one contains terminators. At the same time, registers 312b6bf4024SHua Tian // are updated accordingly. 313b6bf4024SHua Tian for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) { 314b6bf4024SHua Tian for (auto *MI : OriMIs) { 315b6bf4024SHua Tian if (MI->isPHI() || MI->isMetaInstruction() || 316b6bf4024SHua Tian (MI->isTerminator() && Cnt < DuplicateNum - 1)) 317b6bf4024SHua Tian continue; 318b6bf4024SHua Tian auto *NewMI = MF->CloneMachineInstr(MI); 319b6bf4024SHua Tian DenseMap<Register, Register> NewDefs; 320b6bf4024SHua Tian // New defines are updated. 321b6bf4024SHua Tian for (auto MO : NewMI->all_defs()) 322b6bf4024SHua Tian if (MO.isReg() && MO.getReg().isVirtual()) { 323b6bf4024SHua Tian Register NewDef = 324b6bf4024SHua Tian MRI->createVirtualRegister(MRI->getRegClass(MO.getReg())); 325b6bf4024SHua Tian NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI); 326b6bf4024SHua Tian NewDefs[MO.getReg()] = NewDef; 327b6bf4024SHua Tian } 328b6bf4024SHua Tian // New uses are updated. 329b6bf4024SHua Tian for (auto DefRegPair : DefPairs) 330b6bf4024SHua Tian if (NewMI->readsRegister(DefRegPair.first, TRI)) { 331b6bf4024SHua Tian Register NewUse = DefRegPair.second; 332b6bf4024SHua Tian // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3': 333b6bf4024SHua Tian // 334b6bf4024SHua Tian // BB.3: DefPairs 335b6bf4024SHua Tian // ================================== 336b6bf4024SHua Tian // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7) 337b6bf4024SHua Tian // ... 338b6bf4024SHua Tian // ================================== 339b6bf4024SHua Tian // ... 340b6bf4024SHua Tian // %4 = sub i32 %1, %3 341b6bf4024SHua Tian // ... 342b6bf4024SHua Tian // %7 = add i32 %5, %6 343b6bf4024SHua Tian // ... 344b6bf4024SHua Tian // ---------------------------------- 345b6bf4024SHua Tian // ... 346b6bf4024SHua Tian // %8 = sub i32 %7, %3 (%1,%7),(%4,%8) 347b6bf4024SHua Tian // ... 348b6bf4024SHua Tian // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9) 349b6bf4024SHua Tian // ... 350b6bf4024SHua Tian // ---------------------------------- 351b6bf4024SHua Tian // ... 352b6bf4024SHua Tian // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9) 353b6bf4024SHua Tian // ... ^ 354b6bf4024SHua Tian // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11) 355b6bf4024SHua Tian // ... 356b6bf4024SHua Tian // ================================== 357b6bf4024SHua Tian // < Terminators > 358b6bf4024SHua Tian // ================================== 359b6bf4024SHua Tian if (DefPairs.count(NewUse)) 360b6bf4024SHua Tian NewUse = DefPairs[NewUse]; 361b6bf4024SHua Tian NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI); 362b6bf4024SHua Tian } 363b6bf4024SHua Tian // DefPairs is updated at last. 364b6bf4024SHua Tian for (auto &NewDef : NewDefs) 365b6bf4024SHua Tian DefPairs[NewDef.first] = NewDef.second; 366b6bf4024SHua Tian MBB->push_back(NewMI); 367b6bf4024SHua Tian TriMIs.push_back(NewMI); 368b6bf4024SHua Tian TriToOri[NewMI] = MI; 369b6bf4024SHua Tian } 370b6bf4024SHua Tian } 371b6bf4024SHua Tian // Step 3: The registers used by phis are updated, and they are generated in 372b6bf4024SHua Tian // the third copy of MBB. 373b6bf4024SHua Tian // In the privious example, the old phi is: 374b6bf4024SHua Tian // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] 375b6bf4024SHua Tian // The new phi is: 376b6bf4024SHua Tian // %1 = phi i32 [%2, %BB.1], [%11, %BB.3] 377b6bf4024SHua Tian for (auto &Phi : MBB->phis()) { 378b6bf4024SHua Tian for (auto DefRegPair : DefPairs) 379b6bf4024SHua Tian if (Phi.readsRegister(DefRegPair.first, TRI)) 380b6bf4024SHua Tian Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI); 381b6bf4024SHua Tian } 382b6bf4024SHua Tian updateLiveIntervals(); 383b6bf4024SHua Tian } 384b6bf4024SHua Tian 385b6bf4024SHua Tian void WindowScheduler::restoreTripleMBB() { 386b6bf4024SHua Tian // After list scheduling, the MBB is restored in one traversal. 387b6bf4024SHua Tian for (size_t I = 0; I < TriMIs.size(); ++I) { 388b6bf4024SHua Tian auto *MI = TriMIs[I]; 389b6bf4024SHua Tian auto OldPos = MBB->begin(); 390b6bf4024SHua Tian std::advance(OldPos, I); 391b6bf4024SHua Tian auto CurPos = MI->getIterator(); 392b6bf4024SHua Tian if (CurPos != OldPos) { 393b6bf4024SHua Tian MBB->splice(OldPos, MBB, CurPos); 394b6bf4024SHua Tian Context->LIS->handleMove(*MI, /*UpdateFlags=*/false); 395b6bf4024SHua Tian } 396b6bf4024SHua Tian } 397b6bf4024SHua Tian } 398b6bf4024SHua Tian 399b6bf4024SHua Tian SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum, 400b6bf4024SHua Tian unsigned SearchRatio) { 401b6bf4024SHua Tian // We use SearchRatio to get the index range, and then evenly get the indexes 402b6bf4024SHua Tian // according to the SearchNum. This is a simple huristic. Depending on the 403b6bf4024SHua Tian // characteristics of the target, more complex algorithms can be used for both 404b6bf4024SHua Tian // performance and compilation time. 405b6bf4024SHua Tian assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!"); 406b6bf4024SHua Tian unsigned MaxIdx = SchedInstrNum * SearchRatio / 100; 407b6bf4024SHua Tian unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1; 408b6bf4024SHua Tian SmallVector<unsigned> SearchIndexes; 409b6bf4024SHua Tian for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step) 410b6bf4024SHua Tian SearchIndexes.push_back(Idx); 411b6bf4024SHua Tian return SearchIndexes; 412b6bf4024SHua Tian } 413b6bf4024SHua Tian 414b6bf4024SHua Tian int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) { 415b6bf4024SHua Tian // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1. 416b6bf4024SHua Tian unsigned MaxDepth = 1; 417b6bf4024SHua Tian for (auto &SU : DAG.SUnits) 418b6bf4024SHua Tian MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth); 419b6bf4024SHua Tian return MaxDepth * WindowIICoeff; 420b6bf4024SHua Tian } 421b6bf4024SHua Tian 422b6bf4024SHua Tian int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, 423b6bf4024SHua Tian unsigned Offset) { 424b6bf4024SHua Tian int InitII = getEstimatedII(DAG); 425b6bf4024SHua Tian ResourceManager RM(Subtarget, &DAG); 426b6bf4024SHua Tian RM.init(InitII); 427b6bf4024SHua Tian // ResourceManager and DAG are used to calculate the maximum cycle for the 428b6bf4024SHua Tian // scheduled MIs. Since MIs in the Region have already been scheduled, the 429b6bf4024SHua Tian // emit cycles can be estimated in order here. 430b6bf4024SHua Tian int CurCycle = 0; 431b6bf4024SHua Tian auto Range = getScheduleRange(Offset, SchedInstrNum); 432b6bf4024SHua Tian for (auto &MI : Range) { 433b6bf4024SHua Tian auto *SU = DAG.getSUnit(&MI); 434b6bf4024SHua Tian int ExpectCycle = CurCycle; 435b6bf4024SHua Tian // The predecessors of current MI determine its earliest issue cycle. 436b6bf4024SHua Tian for (auto &Pred : SU->Preds) { 437355e4a9eSHua Tian if (Pred.isWeak()) 438355e4a9eSHua Tian continue; 439b6bf4024SHua Tian auto *PredMI = Pred.getSUnit()->getInstr(); 440b6bf4024SHua Tian int PredCycle = getOriCycle(PredMI); 441b6bf4024SHua Tian ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); 442b6bf4024SHua Tian } 443d27ee36cSKai Yan // Zero cost instructions do not need to check resource. 444d27ee36cSKai Yan if (!TII->isZeroCost(MI.getOpcode())) { 445b6bf4024SHua Tian // ResourceManager can be used to detect resource conflicts between the 446b6bf4024SHua Tian // current MI and the previously inserted MIs. 447b6bf4024SHua Tian while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { 448b6bf4024SHua Tian ++CurCycle; 449b6bf4024SHua Tian if (CurCycle == (int)WindowIILimit) 450b6bf4024SHua Tian return CurCycle; 451b6bf4024SHua Tian } 452b6bf4024SHua Tian RM.reserveResources(*SU, CurCycle); 453d27ee36cSKai Yan } 454b6bf4024SHua Tian OriToCycle[getOriMI(&MI)] = CurCycle; 455b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." 456b6bf4024SHua Tian << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); 457b6bf4024SHua Tian } 458b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n"); 459b6bf4024SHua Tian return CurCycle; 460b6bf4024SHua Tian } 461b6bf4024SHua Tian 462b6bf4024SHua Tian // By utilizing TripleDAG, we can easily establish dependencies between A and B. 463b6bf4024SHua Tian // Based on the MaxCycle and the issue cycle of A and B, we can determine 464b6bf4024SHua Tian // whether it is necessary to add a stall cycle. This is because, without 465b6bf4024SHua Tian // inserting the stall cycle, the latency constraint between A and B cannot be 466b6bf4024SHua Tian // satisfied. The details are as follows: 467b6bf4024SHua Tian // 468b6bf4024SHua Tian // New MBB: 469b6bf4024SHua Tian // ======================================== 470b6bf4024SHua Tian // < Phis > 471b6bf4024SHua Tian // ======================================== (sliding direction) 472b6bf4024SHua Tian // MBB copy 1 | 473b6bf4024SHua Tian // V 474b6bf4024SHua Tian // 475b6bf4024SHua Tian // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window----- 476b6bf4024SHua Tian // | | 477b6bf4024SHua Tian // ===================V==================== | 478b6bf4024SHua Tian // MBB copy 2 < MI B > | 479b6bf4024SHua Tian // | 480b6bf4024SHua Tian // < MI A > V 481b6bf4024SHua Tian // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------ 482b6bf4024SHua Tian // : 483b6bf4024SHua Tian // ===================V==================== 484b6bf4024SHua Tian // MBB copy 3 < MI B'> 485b6bf4024SHua Tian // 486b6bf4024SHua Tian // 487b6bf4024SHua Tian // 488b6bf4024SHua Tian // 489b6bf4024SHua Tian // ======================================== 490b6bf4024SHua Tian // < Terminators > 491b6bf4024SHua Tian // ======================================== 492b6bf4024SHua Tian int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { 493b6bf4024SHua Tian int MaxStallCycle = 0; 494*90a99798SKai Yan int CurrentII = MaxCycle + 1; 495b6bf4024SHua Tian auto Range = getScheduleRange(Offset, SchedInstrNum); 496b6bf4024SHua Tian for (auto &MI : Range) { 497b6bf4024SHua Tian auto *SU = TripleDAG->getSUnit(&MI); 498b6bf4024SHua Tian int DefCycle = getOriCycle(&MI); 499b6bf4024SHua Tian for (auto &Succ : SU->Succs) { 500355e4a9eSHua Tian if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) 501b6bf4024SHua Tian continue; 502*90a99798SKai Yan // If the expected cycle does not exceed CurrentII, no check is needed. 503*90a99798SKai Yan if (DefCycle + (int)Succ.getLatency() <= CurrentII) 504b6bf4024SHua Tian continue; 505b6bf4024SHua Tian // If the cycle of the scheduled MI A is less than that of the scheduled 506b6bf4024SHua Tian // MI B, the scheduling will fail because the lifetime of the 507b6bf4024SHua Tian // corresponding register exceeds II. 508b6bf4024SHua Tian auto *SuccMI = Succ.getSUnit()->getInstr(); 509b6bf4024SHua Tian int UseCycle = getOriCycle(SuccMI); 510b6bf4024SHua Tian if (DefCycle < UseCycle) 511b6bf4024SHua Tian return WindowIILimit; 512b6bf4024SHua Tian // Get the stall cycle introduced by the register between two trips. 513*90a99798SKai Yan int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; 514b6bf4024SHua Tian MaxStallCycle = std::max(MaxStallCycle, StallCycle); 515b6bf4024SHua Tian } 516b6bf4024SHua Tian } 517b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n"); 518b6bf4024SHua Tian return MaxStallCycle; 519b6bf4024SHua Tian } 520b6bf4024SHua Tian 521b6bf4024SHua Tian unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) { 522b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Start analyzing II:\n"); 523b6bf4024SHua Tian int MaxCycle = calculateMaxCycle(DAG, Offset); 524b6bf4024SHua Tian if (MaxCycle == (int)WindowIILimit) 525b6bf4024SHua Tian return MaxCycle; 526b6bf4024SHua Tian int StallCycle = calculateStallCycle(Offset, MaxCycle); 527b6bf4024SHua Tian if (StallCycle == (int)WindowIILimit) 528b6bf4024SHua Tian return StallCycle; 529b6bf4024SHua Tian // The value of II is equal to the maximum execution cycle plus 1. 530b6bf4024SHua Tian return MaxCycle + StallCycle + 1; 531b6bf4024SHua Tian } 532b6bf4024SHua Tian 533b6bf4024SHua Tian void WindowScheduler::schedulePhi(int Offset, unsigned &II) { 534b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n"); 535b6bf4024SHua Tian for (auto &Phi : MBB->phis()) { 536b6bf4024SHua Tian int LateCycle = INT_MAX; 537b6bf4024SHua Tian auto *SU = TripleDAG->getSUnit(&Phi); 538b6bf4024SHua Tian for (auto &Succ : SU->Succs) { 539b6bf4024SHua Tian // Phi doesn't have any Anti successors. 540b6bf4024SHua Tian if (Succ.getKind() != SDep::Data) 541b6bf4024SHua Tian continue; 542b6bf4024SHua Tian // Phi is scheduled before the successor of stage 0. The issue cycle of 543b6bf4024SHua Tian // phi is the latest cycle in this interval. 544b6bf4024SHua Tian auto *SuccMI = Succ.getSUnit()->getInstr(); 545b6bf4024SHua Tian int Cycle = getOriCycle(SuccMI); 546b6bf4024SHua Tian if (getOriStage(getOriMI(SuccMI), Offset) == 0) 547b6bf4024SHua Tian LateCycle = std::min(LateCycle, Cycle); 548b6bf4024SHua Tian } 549b6bf4024SHua Tian // The anti-dependency of phi need to be handled separately in the same way. 550b6bf4024SHua Tian if (Register AntiReg = getAntiRegister(&Phi)) { 551b6bf4024SHua Tian auto *AntiMI = MRI->getVRegDef(AntiReg); 552dd1d2b7cSHua Tian // AntiReg may be defined outside the kernel MBB. 553dd1d2b7cSHua Tian if (AntiMI->getParent() == MBB) { 554b6bf4024SHua Tian auto AntiCycle = getOriCycle(AntiMI); 555b6bf4024SHua Tian if (getOriStage(getOriMI(AntiMI), Offset) == 0) 556b6bf4024SHua Tian LateCycle = std::min(LateCycle, AntiCycle); 557b6bf4024SHua Tian } 558dd1d2b7cSHua Tian } 559b6bf4024SHua Tian // If there is no limit to the late cycle, a default value is given. 560b6bf4024SHua Tian if (LateCycle == INT_MAX) 561b6bf4024SHua Tian LateCycle = (int)(II - 1); 562b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi); 563b6bf4024SHua Tian // The issue cycle of phi is set to the latest cycle in the interval. 564b6bf4024SHua Tian auto *OriPhi = getOriMI(&Phi); 565b6bf4024SHua Tian OriToCycle[OriPhi] = LateCycle; 566b6bf4024SHua Tian } 567b6bf4024SHua Tian } 568b6bf4024SHua Tian 569b6bf4024SHua Tian DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset, 570b6bf4024SHua Tian unsigned II) { 571b6bf4024SHua Tian // At each issue cycle, phi is placed before MIs in stage 0. So the simplest 572b6bf4024SHua Tian // way is to put phi at the beginning of the current cycle. 573b6bf4024SHua Tian DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs; 574b6bf4024SHua Tian auto Range = getScheduleRange(Offset, SchedInstrNum); 575b6bf4024SHua Tian for (auto &Phi : MBB->phis()) 576b6bf4024SHua Tian CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi)); 577b6bf4024SHua Tian for (auto &MI : Range) 578b6bf4024SHua Tian CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI)); 579b6bf4024SHua Tian // Each MI is assigned a separate ordered Id, which is used as a sort marker 580b6bf4024SHua Tian // in the following expand process. 581b6bf4024SHua Tian DenseMap<MachineInstr *, int> IssueOrder; 582b6bf4024SHua Tian int Id = 0; 583b6bf4024SHua Tian for (int Cycle = 0; Cycle < (int)II; ++Cycle) { 584b6bf4024SHua Tian if (!CycleToMIs.count(Cycle)) 585b6bf4024SHua Tian continue; 586b6bf4024SHua Tian for (auto *MI : CycleToMIs[Cycle]) 587b6bf4024SHua Tian IssueOrder[MI] = Id++; 588b6bf4024SHua Tian } 589b6bf4024SHua Tian return IssueOrder; 590b6bf4024SHua Tian } 591b6bf4024SHua Tian 592b6bf4024SHua Tian void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) { 593b6bf4024SHua Tian // At the first update, Offset is equal to SchedPhiNum. At this time, only 594b6bf4024SHua Tian // BestII, BestOffset, and BaseII need to be updated. 595b6bf4024SHua Tian if (Offset == SchedPhiNum) { 596b6bf4024SHua Tian BestII = II; 597b6bf4024SHua Tian BestOffset = SchedPhiNum; 598b6bf4024SHua Tian BaseII = II; 599b6bf4024SHua Tian return; 600b6bf4024SHua Tian } 601b6bf4024SHua Tian // The update will only continue if the II is smaller than BestII and the II 602b6bf4024SHua Tian // is sufficiently small. 603b6bf4024SHua Tian if ((II >= BestII) || (II + WindowDiffLimit > BaseII)) 604b6bf4024SHua Tian return; 605b6bf4024SHua Tian BestII = II; 606b6bf4024SHua Tian BestOffset = Offset; 607b6bf4024SHua Tian // Record the result of the current list scheduling, noting that each MI is 608b6bf4024SHua Tian // stored unordered in SchedResult. 609b6bf4024SHua Tian SchedResult.clear(); 610b6bf4024SHua Tian auto IssueOrder = getIssueOrder(Offset, II); 611b6bf4024SHua Tian for (auto &Pair : OriToCycle) { 612b6bf4024SHua Tian assert(IssueOrder.count(Pair.first) && "Cannot find original MI!"); 613b6bf4024SHua Tian SchedResult.push_back(std::make_tuple(Pair.first, Pair.second, 614b6bf4024SHua Tian getOriStage(Pair.first, Offset), 615b6bf4024SHua Tian IssueOrder[Pair.first])); 616b6bf4024SHua Tian } 617b6bf4024SHua Tian } 618b6bf4024SHua Tian 619b6bf4024SHua Tian void WindowScheduler::expand() { 620b6bf4024SHua Tian // The MIs in the SchedResult are sorted by the issue order ID. 621b6bf4024SHua Tian llvm::stable_sort(SchedResult, 622b6bf4024SHua Tian [](const std::tuple<MachineInstr *, int, int, int> &A, 623b6bf4024SHua Tian const std::tuple<MachineInstr *, int, int, int> &B) { 624b6bf4024SHua Tian return std::get<3>(A) < std::get<3>(B); 625b6bf4024SHua Tian }); 626b6bf4024SHua Tian // Use the scheduling infrastructure for expansion, noting that InstrChanges 627b6bf4024SHua Tian // is not supported here. 628b6bf4024SHua Tian DenseMap<MachineInstr *, int> Cycles, Stages; 629b6bf4024SHua Tian std::vector<MachineInstr *> OrderedInsts; 630b6bf4024SHua Tian for (auto &Info : SchedResult) { 631b6bf4024SHua Tian auto *MI = std::get<0>(Info); 632b6bf4024SHua Tian OrderedInsts.push_back(MI); 633b6bf4024SHua Tian Cycles[MI] = std::get<1>(Info); 634b6bf4024SHua Tian Stages[MI] = std::get<2>(Info); 635b6bf4024SHua Tian LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI] 636b6bf4024SHua Tian << "]: " << *MI); 637b6bf4024SHua Tian } 638b6bf4024SHua Tian ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles), 639b6bf4024SHua Tian std::move(Stages)); 640b6bf4024SHua Tian ModuloScheduleExpander MSE(*MF, MS, *Context->LIS, 641b6bf4024SHua Tian ModuloScheduleExpander::InstrChangesTy()); 642b6bf4024SHua Tian MSE.expand(); 643b6bf4024SHua Tian MSE.cleanup(); 644b6bf4024SHua Tian } 645b6bf4024SHua Tian 646b6bf4024SHua Tian void WindowScheduler::updateLiveIntervals() { 647b6bf4024SHua Tian SmallVector<Register, 128> UsedRegs; 648b6bf4024SHua Tian for (MachineInstr &MI : *MBB) 649b6bf4024SHua Tian for (const MachineOperand &MO : MI.operands()) { 650b6bf4024SHua Tian if (!MO.isReg() || MO.getReg() == 0) 651b6bf4024SHua Tian continue; 652b6bf4024SHua Tian Register Reg = MO.getReg(); 653b6bf4024SHua Tian if (!is_contained(UsedRegs, Reg)) 654b6bf4024SHua Tian UsedRegs.push_back(Reg); 655b6bf4024SHua Tian } 656b6bf4024SHua Tian Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs); 657b6bf4024SHua Tian } 658b6bf4024SHua Tian 659b6bf4024SHua Tian iterator_range<MachineBasicBlock::iterator> 660b6bf4024SHua Tian WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) { 661b6bf4024SHua Tian auto RegionBegin = MBB->begin(); 662b6bf4024SHua Tian std::advance(RegionBegin, Offset); 663b6bf4024SHua Tian auto RegionEnd = RegionBegin; 664b6bf4024SHua Tian std::advance(RegionEnd, Num); 665b6bf4024SHua Tian return make_range(RegionBegin, RegionEnd); 666b6bf4024SHua Tian } 667b6bf4024SHua Tian 668b6bf4024SHua Tian int WindowScheduler::getOriCycle(MachineInstr *NewMI) { 669b6bf4024SHua Tian assert(TriToOri.count(NewMI) && "Cannot find original MI!"); 670b6bf4024SHua Tian auto *OriMI = TriToOri[NewMI]; 671b6bf4024SHua Tian assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!"); 672b6bf4024SHua Tian return OriToCycle[OriMI]; 673b6bf4024SHua Tian } 674b6bf4024SHua Tian 675b6bf4024SHua Tian MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) { 676b6bf4024SHua Tian assert(TriToOri.count(NewMI) && "Cannot find original MI!"); 677b6bf4024SHua Tian return TriToOri[NewMI]; 678b6bf4024SHua Tian } 679b6bf4024SHua Tian 680b6bf4024SHua Tian unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) { 681b6bf4024SHua Tian assert(llvm::find(OriMIs, OriMI) != OriMIs.end() && 682b6bf4024SHua Tian "Cannot find OriMI in OriMIs!"); 683b6bf4024SHua Tian // If there is no instruction fold, all MI stages are 0. 684b6bf4024SHua Tian if (Offset == SchedPhiNum) 685b6bf4024SHua Tian return 0; 686b6bf4024SHua Tian // For those MIs with an ID less than the Offset, their stages are set to 0, 687b6bf4024SHua Tian // while the rest are set to 1. 688b6bf4024SHua Tian unsigned Id = 0; 689b6bf4024SHua Tian for (auto *MI : OriMIs) { 690b6bf4024SHua Tian if (MI->isMetaInstruction()) 691b6bf4024SHua Tian continue; 692b6bf4024SHua Tian if (MI == OriMI) 693b6bf4024SHua Tian break; 694b6bf4024SHua Tian ++Id; 695b6bf4024SHua Tian } 696b6bf4024SHua Tian return Id >= (size_t)Offset ? 1 : 0; 697b6bf4024SHua Tian } 698b6bf4024SHua Tian 699b6bf4024SHua Tian Register WindowScheduler::getAntiRegister(MachineInstr *Phi) { 700b6bf4024SHua Tian assert(Phi->isPHI() && "Expecting PHI!"); 701b6bf4024SHua Tian Register AntiReg; 702b6bf4024SHua Tian for (auto MO : Phi->uses()) { 703b6bf4024SHua Tian if (MO.isReg()) 704b6bf4024SHua Tian AntiReg = MO.getReg(); 705dd1d2b7cSHua Tian else if (MO.isMBB() && MO.getMBB() == MBB) 706b6bf4024SHua Tian return AntiReg; 707b6bf4024SHua Tian } 708b6bf4024SHua Tian return 0; 709b6bf4024SHua Tian } 710