10fca6ea1SDimitry Andric //======----------- WindowScheduler.cpp - window scheduler -------------======// 20fca6ea1SDimitry Andric // 30fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60fca6ea1SDimitry Andric // 70fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 80fca6ea1SDimitry Andric // 90fca6ea1SDimitry Andric // An implementation of the Window Scheduling software pipelining algorithm. 100fca6ea1SDimitry Andric // 110fca6ea1SDimitry Andric // The fundamental concept of the window scheduling algorithm involves folding 120fca6ea1SDimitry Andric // the original MBB at a specific position, followed by list scheduling on the 130fca6ea1SDimitry Andric // folded MIs. The optimal scheduling result is then chosen from various folding 140fca6ea1SDimitry Andric // positions as the final scheduling outcome. 150fca6ea1SDimitry Andric // 160fca6ea1SDimitry Andric // The primary challenge in this algorithm lies in generating the folded MIs and 170fca6ea1SDimitry Andric // establishing their dependencies. We have innovatively employed a new MBB, 180fca6ea1SDimitry Andric // created by copying the original MBB three times, known as TripleMBB. This 190fca6ea1SDimitry Andric // TripleMBB enables the convenient implementation of MI folding and dependency 200fca6ea1SDimitry Andric // establishment. To facilitate the algorithm's implementation, we have also 210fca6ea1SDimitry Andric // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle. 220fca6ea1SDimitry Andric // 230fca6ea1SDimitry Andric // Another challenge in the algorithm is the scheduling of phis. Semantically, 240fca6ea1SDimitry Andric // it is difficult to place the phis in the window and perform list scheduling. 250fca6ea1SDimitry Andric // Therefore, we schedule these phis separately after each list scheduling. 260fca6ea1SDimitry Andric // 270fca6ea1SDimitry Andric // The provided implementation is designed for use before the Register Allocator 280fca6ea1SDimitry Andric // (RA). If the target requires implementation after RA, it is recommended to 290fca6ea1SDimitry Andric // reimplement analyseII(), schedulePhi(), and expand(). Additionally, 300fca6ea1SDimitry Andric // target-specific logic can be added in initialize(), preProcess(), and 310fca6ea1SDimitry Andric // postProcess(). 320fca6ea1SDimitry Andric // 330fca6ea1SDimitry Andric // Lastly, it is worth mentioning that getSearchIndexes() is an important 340fca6ea1SDimitry Andric // function. We have experimented with more complex heuristics on downstream 350fca6ea1SDimitry Andric // target and achieved favorable results. 360fca6ea1SDimitry Andric // 370fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 380fca6ea1SDimitry Andric #include "llvm/CodeGen/WindowScheduler.h" 390fca6ea1SDimitry Andric #include "llvm/ADT/Statistic.h" 400fca6ea1SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 410fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 420fca6ea1SDimitry Andric #include "llvm/CodeGen/MachinePipeliner.h" 430fca6ea1SDimitry Andric #include "llvm/CodeGen/ModuloSchedule.h" 440fca6ea1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 450fca6ea1SDimitry Andric #include "llvm/Support/CommandLine.h" 460fca6ea1SDimitry Andric #include "llvm/Support/Debug.h" 470fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h" 480fca6ea1SDimitry Andric 490fca6ea1SDimitry Andric using namespace llvm; 500fca6ea1SDimitry Andric 510fca6ea1SDimitry Andric #define DEBUG_TYPE "pipeliner" 520fca6ea1SDimitry Andric 530fca6ea1SDimitry Andric namespace { 540fca6ea1SDimitry Andric STATISTIC(NumTryWindowSchedule, 550fca6ea1SDimitry Andric "Number of loops that we attempt to use window scheduling"); 560fca6ea1SDimitry Andric STATISTIC(NumTryWindowSearch, 570fca6ea1SDimitry Andric "Number of times that we run list schedule in the window scheduling"); 580fca6ea1SDimitry Andric STATISTIC(NumWindowSchedule, 590fca6ea1SDimitry Andric "Number of loops that we successfully use window scheduling"); 600fca6ea1SDimitry Andric STATISTIC(NumFailAnalyseII, 610fca6ea1SDimitry Andric "Window scheduling abort due to the failure of the II analysis"); 620fca6ea1SDimitry Andric 630fca6ea1SDimitry Andric cl::opt<unsigned> 640fca6ea1SDimitry Andric WindowSearchNum("window-search-num", 650fca6ea1SDimitry Andric cl::desc("The number of searches per loop in the window " 660fca6ea1SDimitry Andric "algorithm. 0 means no search number limit."), 670fca6ea1SDimitry Andric cl::Hidden, cl::init(6)); 680fca6ea1SDimitry Andric 690fca6ea1SDimitry Andric cl::opt<unsigned> WindowSearchRatio( 700fca6ea1SDimitry Andric "window-search-ratio", 710fca6ea1SDimitry Andric cl::desc("The ratio of searches per loop in the window algorithm. 100 " 720fca6ea1SDimitry Andric "means search all positions in the loop, while 0 means not " 730fca6ea1SDimitry Andric "performing any search."), 740fca6ea1SDimitry Andric cl::Hidden, cl::init(40)); 750fca6ea1SDimitry Andric 760fca6ea1SDimitry Andric cl::opt<unsigned> WindowIICoeff( 770fca6ea1SDimitry Andric "window-ii-coeff", 780fca6ea1SDimitry Andric cl::desc( 790fca6ea1SDimitry Andric "The coefficient used when initializing II in the window algorithm."), 800fca6ea1SDimitry Andric cl::Hidden, cl::init(5)); 810fca6ea1SDimitry Andric 820fca6ea1SDimitry Andric cl::opt<unsigned> WindowRegionLimit( 830fca6ea1SDimitry Andric "window-region-limit", 840fca6ea1SDimitry Andric cl::desc( 850fca6ea1SDimitry Andric "The lower limit of the scheduling region in the window algorithm."), 860fca6ea1SDimitry Andric cl::Hidden, cl::init(3)); 870fca6ea1SDimitry Andric 880fca6ea1SDimitry Andric cl::opt<unsigned> WindowDiffLimit( 890fca6ea1SDimitry Andric "window-diff-limit", 900fca6ea1SDimitry Andric cl::desc("The lower limit of the difference between best II and base II in " 910fca6ea1SDimitry Andric "the window algorithm. If the difference is smaller than " 920fca6ea1SDimitry Andric "this lower limit, window scheduling will not be performed."), 930fca6ea1SDimitry Andric cl::Hidden, cl::init(2)); 940fca6ea1SDimitry Andric } // namespace 950fca6ea1SDimitry Andric 960fca6ea1SDimitry Andric // WindowIILimit serves as an indicator of abnormal scheduling results and could 970fca6ea1SDimitry Andric // potentially be referenced by the derived target window scheduler. 980fca6ea1SDimitry Andric cl::opt<unsigned> 990fca6ea1SDimitry Andric WindowIILimit("window-ii-limit", 1000fca6ea1SDimitry Andric cl::desc("The upper limit of II in the window algorithm."), 1010fca6ea1SDimitry Andric cl::Hidden, cl::init(1000)); 1020fca6ea1SDimitry Andric 1030fca6ea1SDimitry Andric WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML) 1040fca6ea1SDimitry Andric : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML), 1050fca6ea1SDimitry Andric Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()), 1060fca6ea1SDimitry Andric TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) { 1070fca6ea1SDimitry Andric TripleDAG = std::unique_ptr<ScheduleDAGInstrs>( 1080fca6ea1SDimitry Andric createMachineScheduler(/*OnlyBuildGraph=*/true)); 1090fca6ea1SDimitry Andric } 1100fca6ea1SDimitry Andric 1110fca6ea1SDimitry Andric bool WindowScheduler::run() { 1120fca6ea1SDimitry Andric if (!initialize()) { 1130fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n"); 1140fca6ea1SDimitry Andric return false; 1150fca6ea1SDimitry Andric } 1160fca6ea1SDimitry Andric // The window algorithm is time-consuming, and its compilation time should be 1170fca6ea1SDimitry Andric // taken into consideration. 1180fca6ea1SDimitry Andric TimeTraceScope Scope("WindowSearch"); 1190fca6ea1SDimitry Andric ++NumTryWindowSchedule; 1200fca6ea1SDimitry Andric // Performing the relevant processing before window scheduling. 1210fca6ea1SDimitry Andric preProcess(); 1220fca6ea1SDimitry Andric // The main window scheduling begins. 1230fca6ea1SDimitry Andric std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler()); 1240fca6ea1SDimitry Andric auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio); 1250fca6ea1SDimitry Andric for (unsigned Idx : SearchIndexes) { 1260fca6ea1SDimitry Andric OriToCycle.clear(); 1270fca6ea1SDimitry Andric ++NumTryWindowSearch; 1280fca6ea1SDimitry Andric // The scheduling starts with non-phi instruction, so SchedPhiNum needs to 1290fca6ea1SDimitry Andric // be added to Idx. 1300fca6ea1SDimitry Andric unsigned Offset = Idx + SchedPhiNum; 1310fca6ea1SDimitry Andric auto Range = getScheduleRange(Offset, SchedInstrNum); 1320fca6ea1SDimitry Andric SchedDAG->startBlock(MBB); 1330fca6ea1SDimitry Andric SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum); 1340fca6ea1SDimitry Andric SchedDAG->schedule(); 1350fca6ea1SDimitry Andric LLVM_DEBUG(SchedDAG->dump()); 1360fca6ea1SDimitry Andric unsigned II = analyseII(*SchedDAG, Offset); 1370fca6ea1SDimitry Andric if (II == WindowIILimit) { 1380fca6ea1SDimitry Andric restoreTripleMBB(); 1390fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n"); 1400fca6ea1SDimitry Andric ++NumFailAnalyseII; 1410fca6ea1SDimitry Andric continue; 1420fca6ea1SDimitry Andric } 1430fca6ea1SDimitry Andric schedulePhi(Offset, II); 1440fca6ea1SDimitry Andric updateScheduleResult(Offset, II); 1450fca6ea1SDimitry Andric restoreTripleMBB(); 1460fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is " 1470fca6ea1SDimitry Andric << II << ".\n"); 1480fca6ea1SDimitry Andric } 1490fca6ea1SDimitry Andric // Performing the relevant processing after window scheduling. 1500fca6ea1SDimitry Andric postProcess(); 1510fca6ea1SDimitry Andric // Check whether the scheduling result is valid. 1520fca6ea1SDimitry Andric if (!isScheduleValid()) { 1530fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n"); 1540fca6ea1SDimitry Andric return false; 1550fca6ea1SDimitry Andric } 1560fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset 1570fca6ea1SDimitry Andric << " and Best II is " << BestII << ".\n"); 1580fca6ea1SDimitry Andric // Expand the scheduling result to prologue, kernel, and epilogue. 1590fca6ea1SDimitry Andric expand(); 1600fca6ea1SDimitry Andric ++NumWindowSchedule; 1610fca6ea1SDimitry Andric return true; 1620fca6ea1SDimitry Andric } 1630fca6ea1SDimitry Andric 1640fca6ea1SDimitry Andric ScheduleDAGInstrs * 1650fca6ea1SDimitry Andric WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) { 1660fca6ea1SDimitry Andric return OnlyBuildGraph 1670fca6ea1SDimitry Andric ? new ScheduleDAGMI( 1680fca6ea1SDimitry Andric Context, std::make_unique<PostGenericScheduler>(Context), 1690fca6ea1SDimitry Andric true) 1700fca6ea1SDimitry Andric : Context->PassConfig->createMachineScheduler(Context); 1710fca6ea1SDimitry Andric } 1720fca6ea1SDimitry Andric 1730fca6ea1SDimitry Andric bool WindowScheduler::initialize() { 1740fca6ea1SDimitry Andric if (!Subtarget->enableWindowScheduler()) { 1750fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n"); 1760fca6ea1SDimitry Andric return false; 1770fca6ea1SDimitry Andric } 1780fca6ea1SDimitry Andric // Initialized the member variables used by window algorithm. 1790fca6ea1SDimitry Andric OriMIs.clear(); 1800fca6ea1SDimitry Andric TriMIs.clear(); 1810fca6ea1SDimitry Andric TriToOri.clear(); 1820fca6ea1SDimitry Andric OriToCycle.clear(); 1830fca6ea1SDimitry Andric SchedResult.clear(); 1840fca6ea1SDimitry Andric SchedPhiNum = 0; 1850fca6ea1SDimitry Andric SchedInstrNum = 0; 1860fca6ea1SDimitry Andric BestII = UINT_MAX; 1870fca6ea1SDimitry Andric BestOffset = 0; 1880fca6ea1SDimitry Andric BaseII = 0; 1890fca6ea1SDimitry Andric // List scheduling used in the window algorithm depends on LiveIntervals. 1900fca6ea1SDimitry Andric if (!Context->LIS) { 1910fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n"); 1920fca6ea1SDimitry Andric return false; 1930fca6ea1SDimitry Andric } 1940fca6ea1SDimitry Andric // Check each MI in MBB. 1950fca6ea1SDimitry Andric SmallSet<Register, 8> PrevDefs; 1960fca6ea1SDimitry Andric SmallSet<Register, 8> PrevUses; 1970fca6ea1SDimitry Andric auto IsLoopCarried = [&](MachineInstr &Phi) { 1980fca6ea1SDimitry Andric // Two cases are checked here: (1)The virtual register defined by the 1990fca6ea1SDimitry Andric // preceding phi is used by the succeeding phi;(2)The preceding phi uses the 2000fca6ea1SDimitry Andric // virtual register defined by the succeeding phi. 2010fca6ea1SDimitry Andric if (PrevUses.count(Phi.getOperand(0).getReg())) 2020fca6ea1SDimitry Andric return true; 2030fca6ea1SDimitry Andric PrevDefs.insert(Phi.getOperand(0).getReg()); 2040fca6ea1SDimitry Andric for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) { 2050fca6ea1SDimitry Andric if (PrevDefs.count(Phi.getOperand(I).getReg())) 2060fca6ea1SDimitry Andric return true; 2070fca6ea1SDimitry Andric PrevUses.insert(Phi.getOperand(I).getReg()); 2080fca6ea1SDimitry Andric } 2090fca6ea1SDimitry Andric return false; 2100fca6ea1SDimitry Andric }; 2110fca6ea1SDimitry Andric auto PLI = TII->analyzeLoopForPipelining(MBB); 2120fca6ea1SDimitry Andric for (auto &MI : *MBB) { 2130fca6ea1SDimitry Andric if (MI.isMetaInstruction() || MI.isTerminator()) 2140fca6ea1SDimitry Andric continue; 2150fca6ea1SDimitry Andric if (MI.isPHI()) { 2160fca6ea1SDimitry Andric if (IsLoopCarried(MI)) { 2170fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n"); 2180fca6ea1SDimitry Andric return false; 2190fca6ea1SDimitry Andric } 2200fca6ea1SDimitry Andric ++SchedPhiNum; 2210fca6ea1SDimitry Andric ++BestOffset; 2220fca6ea1SDimitry Andric } else 2230fca6ea1SDimitry Andric ++SchedInstrNum; 2240fca6ea1SDimitry Andric if (TII->isSchedulingBoundary(MI, MBB, *MF)) { 2250fca6ea1SDimitry Andric LLVM_DEBUG( 2260fca6ea1SDimitry Andric dbgs() << "Boundary MI is not allowed in window scheduling!\n"); 2270fca6ea1SDimitry Andric return false; 2280fca6ea1SDimitry Andric } 2290fca6ea1SDimitry Andric if (PLI->shouldIgnoreForPipelining(&MI)) { 2300fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in " 2310fca6ea1SDimitry Andric "window scheduling!\n"); 2320fca6ea1SDimitry Andric return false; 2330fca6ea1SDimitry Andric } 2340fca6ea1SDimitry Andric for (auto &Def : MI.all_defs()) 235*6c4b055cSDimitry Andric if (Def.isReg() && Def.getReg().isPhysical()) { 236*6c4b055cSDimitry Andric LLVM_DEBUG(dbgs() << "Physical registers are not supported in " 237*6c4b055cSDimitry Andric "window scheduling!\n"); 2380fca6ea1SDimitry Andric return false; 2390fca6ea1SDimitry Andric } 240*6c4b055cSDimitry Andric } 2410fca6ea1SDimitry Andric if (SchedInstrNum <= WindowRegionLimit) { 2420fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n"); 2430fca6ea1SDimitry Andric return false; 2440fca6ea1SDimitry Andric } 2450fca6ea1SDimitry Andric return true; 2460fca6ea1SDimitry Andric } 2470fca6ea1SDimitry Andric 2480fca6ea1SDimitry Andric void WindowScheduler::preProcess() { 2490fca6ea1SDimitry Andric // Prior to window scheduling, it's necessary to backup the original MBB, 2500fca6ea1SDimitry Andric // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB. 2510fca6ea1SDimitry Andric backupMBB(); 2520fca6ea1SDimitry Andric generateTripleMBB(); 2530fca6ea1SDimitry Andric TripleDAG->startBlock(MBB); 2540fca6ea1SDimitry Andric TripleDAG->enterRegion( 2550fca6ea1SDimitry Andric MBB, MBB->begin(), MBB->getFirstTerminator(), 2560fca6ea1SDimitry Andric std::distance(MBB->begin(), MBB->getFirstTerminator())); 2570fca6ea1SDimitry Andric TripleDAG->buildSchedGraph(Context->AA); 2580fca6ea1SDimitry Andric } 2590fca6ea1SDimitry Andric 2600fca6ea1SDimitry Andric void WindowScheduler::postProcess() { 2610fca6ea1SDimitry Andric // After window scheduling, it's necessary to clear the TripleDAG and restore 2620fca6ea1SDimitry Andric // to the original MBB. 2630fca6ea1SDimitry Andric TripleDAG->exitRegion(); 2640fca6ea1SDimitry Andric TripleDAG->finishBlock(); 2650fca6ea1SDimitry Andric restoreMBB(); 2660fca6ea1SDimitry Andric } 2670fca6ea1SDimitry Andric 2680fca6ea1SDimitry Andric void WindowScheduler::backupMBB() { 2690fca6ea1SDimitry Andric for (auto &MI : MBB->instrs()) 2700fca6ea1SDimitry Andric OriMIs.push_back(&MI); 2710fca6ea1SDimitry Andric // Remove MIs and the corresponding live intervals. 2720fca6ea1SDimitry Andric for (auto &MI : make_early_inc_range(*MBB)) { 2730fca6ea1SDimitry Andric Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); 2740fca6ea1SDimitry Andric MBB->remove(&MI); 2750fca6ea1SDimitry Andric } 2760fca6ea1SDimitry Andric } 2770fca6ea1SDimitry Andric 2780fca6ea1SDimitry Andric void WindowScheduler::restoreMBB() { 2790fca6ea1SDimitry Andric // Erase MIs and the corresponding live intervals. 2800fca6ea1SDimitry Andric for (auto &MI : make_early_inc_range(*MBB)) { 2810fca6ea1SDimitry Andric Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true); 2820fca6ea1SDimitry Andric MI.eraseFromParent(); 2830fca6ea1SDimitry Andric } 2840fca6ea1SDimitry Andric // Restore MBB to the state before window scheduling. 2850fca6ea1SDimitry Andric for (auto *MI : OriMIs) 2860fca6ea1SDimitry Andric MBB->push_back(MI); 2870fca6ea1SDimitry Andric updateLiveIntervals(); 2880fca6ea1SDimitry Andric } 2890fca6ea1SDimitry Andric 2900fca6ea1SDimitry Andric void WindowScheduler::generateTripleMBB() { 2910fca6ea1SDimitry Andric const unsigned DuplicateNum = 3; 2920fca6ea1SDimitry Andric TriMIs.clear(); 2930fca6ea1SDimitry Andric TriToOri.clear(); 2940fca6ea1SDimitry Andric assert(OriMIs.size() > 0 && "The Original MIs were not backed up!"); 2950fca6ea1SDimitry Andric // Step 1: Performing the first copy of MBB instructions, excluding 2960fca6ea1SDimitry Andric // terminators. At the same time, we back up the anti-register of phis. 2970fca6ea1SDimitry Andric // DefPairs hold the old and new define register pairs. 2980fca6ea1SDimitry Andric DenseMap<Register, Register> DefPairs; 2990fca6ea1SDimitry Andric for (auto *MI : OriMIs) { 3000fca6ea1SDimitry Andric if (MI->isMetaInstruction() || MI->isTerminator()) 3010fca6ea1SDimitry Andric continue; 3020fca6ea1SDimitry Andric if (MI->isPHI()) 3030fca6ea1SDimitry Andric if (Register AntiReg = getAntiRegister(MI)) 3040fca6ea1SDimitry Andric DefPairs[MI->getOperand(0).getReg()] = AntiReg; 3050fca6ea1SDimitry Andric auto *NewMI = MF->CloneMachineInstr(MI); 3060fca6ea1SDimitry Andric MBB->push_back(NewMI); 3070fca6ea1SDimitry Andric TriMIs.push_back(NewMI); 3080fca6ea1SDimitry Andric TriToOri[NewMI] = MI; 3090fca6ea1SDimitry Andric } 3100fca6ea1SDimitry Andric // Step 2: Performing the remaining two copies of MBB instructions excluding 3110fca6ea1SDimitry Andric // phis, and the last one contains terminators. At the same time, registers 3120fca6ea1SDimitry Andric // are updated accordingly. 3130fca6ea1SDimitry Andric for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) { 3140fca6ea1SDimitry Andric for (auto *MI : OriMIs) { 3150fca6ea1SDimitry Andric if (MI->isPHI() || MI->isMetaInstruction() || 3160fca6ea1SDimitry Andric (MI->isTerminator() && Cnt < DuplicateNum - 1)) 3170fca6ea1SDimitry Andric continue; 3180fca6ea1SDimitry Andric auto *NewMI = MF->CloneMachineInstr(MI); 3190fca6ea1SDimitry Andric DenseMap<Register, Register> NewDefs; 3200fca6ea1SDimitry Andric // New defines are updated. 3210fca6ea1SDimitry Andric for (auto MO : NewMI->all_defs()) 3220fca6ea1SDimitry Andric if (MO.isReg() && MO.getReg().isVirtual()) { 3230fca6ea1SDimitry Andric Register NewDef = 3240fca6ea1SDimitry Andric MRI->createVirtualRegister(MRI->getRegClass(MO.getReg())); 3250fca6ea1SDimitry Andric NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI); 3260fca6ea1SDimitry Andric NewDefs[MO.getReg()] = NewDef; 3270fca6ea1SDimitry Andric } 3280fca6ea1SDimitry Andric // New uses are updated. 3290fca6ea1SDimitry Andric for (auto DefRegPair : DefPairs) 3300fca6ea1SDimitry Andric if (NewMI->readsRegister(DefRegPair.first, TRI)) { 3310fca6ea1SDimitry Andric Register NewUse = DefRegPair.second; 3320fca6ea1SDimitry Andric // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3': 3330fca6ea1SDimitry Andric // 3340fca6ea1SDimitry Andric // BB.3: DefPairs 3350fca6ea1SDimitry Andric // ================================== 3360fca6ea1SDimitry Andric // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7) 3370fca6ea1SDimitry Andric // ... 3380fca6ea1SDimitry Andric // ================================== 3390fca6ea1SDimitry Andric // ... 3400fca6ea1SDimitry Andric // %4 = sub i32 %1, %3 3410fca6ea1SDimitry Andric // ... 3420fca6ea1SDimitry Andric // %7 = add i32 %5, %6 3430fca6ea1SDimitry Andric // ... 3440fca6ea1SDimitry Andric // ---------------------------------- 3450fca6ea1SDimitry Andric // ... 3460fca6ea1SDimitry Andric // %8 = sub i32 %7, %3 (%1,%7),(%4,%8) 3470fca6ea1SDimitry Andric // ... 3480fca6ea1SDimitry Andric // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9) 3490fca6ea1SDimitry Andric // ... 3500fca6ea1SDimitry Andric // ---------------------------------- 3510fca6ea1SDimitry Andric // ... 3520fca6ea1SDimitry Andric // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9) 3530fca6ea1SDimitry Andric // ... ^ 3540fca6ea1SDimitry Andric // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11) 3550fca6ea1SDimitry Andric // ... 3560fca6ea1SDimitry Andric // ================================== 3570fca6ea1SDimitry Andric // < Terminators > 3580fca6ea1SDimitry Andric // ================================== 3590fca6ea1SDimitry Andric if (DefPairs.count(NewUse)) 3600fca6ea1SDimitry Andric NewUse = DefPairs[NewUse]; 3610fca6ea1SDimitry Andric NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI); 3620fca6ea1SDimitry Andric } 3630fca6ea1SDimitry Andric // DefPairs is updated at last. 3640fca6ea1SDimitry Andric for (auto &NewDef : NewDefs) 3650fca6ea1SDimitry Andric DefPairs[NewDef.first] = NewDef.second; 3660fca6ea1SDimitry Andric MBB->push_back(NewMI); 3670fca6ea1SDimitry Andric TriMIs.push_back(NewMI); 3680fca6ea1SDimitry Andric TriToOri[NewMI] = MI; 3690fca6ea1SDimitry Andric } 3700fca6ea1SDimitry Andric } 3710fca6ea1SDimitry Andric // Step 3: The registers used by phis are updated, and they are generated in 3720fca6ea1SDimitry Andric // the third copy of MBB. 3730fca6ea1SDimitry Andric // In the privious example, the old phi is: 3740fca6ea1SDimitry Andric // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] 3750fca6ea1SDimitry Andric // The new phi is: 3760fca6ea1SDimitry Andric // %1 = phi i32 [%2, %BB.1], [%11, %BB.3] 3770fca6ea1SDimitry Andric for (auto &Phi : MBB->phis()) { 3780fca6ea1SDimitry Andric for (auto DefRegPair : DefPairs) 3790fca6ea1SDimitry Andric if (Phi.readsRegister(DefRegPair.first, TRI)) 3800fca6ea1SDimitry Andric Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI); 3810fca6ea1SDimitry Andric } 3820fca6ea1SDimitry Andric updateLiveIntervals(); 3830fca6ea1SDimitry Andric } 3840fca6ea1SDimitry Andric 3850fca6ea1SDimitry Andric void WindowScheduler::restoreTripleMBB() { 3860fca6ea1SDimitry Andric // After list scheduling, the MBB is restored in one traversal. 3870fca6ea1SDimitry Andric for (size_t I = 0; I < TriMIs.size(); ++I) { 3880fca6ea1SDimitry Andric auto *MI = TriMIs[I]; 3890fca6ea1SDimitry Andric auto OldPos = MBB->begin(); 3900fca6ea1SDimitry Andric std::advance(OldPos, I); 3910fca6ea1SDimitry Andric auto CurPos = MI->getIterator(); 3920fca6ea1SDimitry Andric if (CurPos != OldPos) { 3930fca6ea1SDimitry Andric MBB->splice(OldPos, MBB, CurPos); 3940fca6ea1SDimitry Andric Context->LIS->handleMove(*MI, /*UpdateFlags=*/false); 3950fca6ea1SDimitry Andric } 3960fca6ea1SDimitry Andric } 3970fca6ea1SDimitry Andric } 3980fca6ea1SDimitry Andric 3990fca6ea1SDimitry Andric SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum, 4000fca6ea1SDimitry Andric unsigned SearchRatio) { 4010fca6ea1SDimitry Andric // We use SearchRatio to get the index range, and then evenly get the indexes 4020fca6ea1SDimitry Andric // according to the SearchNum. This is a simple huristic. Depending on the 4030fca6ea1SDimitry Andric // characteristics of the target, more complex algorithms can be used for both 4040fca6ea1SDimitry Andric // performance and compilation time. 4050fca6ea1SDimitry Andric assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!"); 4060fca6ea1SDimitry Andric unsigned MaxIdx = SchedInstrNum * SearchRatio / 100; 4070fca6ea1SDimitry Andric unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1; 4080fca6ea1SDimitry Andric SmallVector<unsigned> SearchIndexes; 4090fca6ea1SDimitry Andric for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step) 4100fca6ea1SDimitry Andric SearchIndexes.push_back(Idx); 4110fca6ea1SDimitry Andric return SearchIndexes; 4120fca6ea1SDimitry Andric } 4130fca6ea1SDimitry Andric 4140fca6ea1SDimitry Andric int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) { 4150fca6ea1SDimitry Andric // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1. 4160fca6ea1SDimitry Andric unsigned MaxDepth = 1; 4170fca6ea1SDimitry Andric for (auto &SU : DAG.SUnits) 4180fca6ea1SDimitry Andric MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth); 4190fca6ea1SDimitry Andric return MaxDepth * WindowIICoeff; 4200fca6ea1SDimitry Andric } 4210fca6ea1SDimitry Andric 4220fca6ea1SDimitry Andric int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, 4230fca6ea1SDimitry Andric unsigned Offset) { 4240fca6ea1SDimitry Andric int InitII = getEstimatedII(DAG); 4250fca6ea1SDimitry Andric ResourceManager RM(Subtarget, &DAG); 4260fca6ea1SDimitry Andric RM.init(InitII); 4270fca6ea1SDimitry Andric // ResourceManager and DAG are used to calculate the maximum cycle for the 4280fca6ea1SDimitry Andric // scheduled MIs. Since MIs in the Region have already been scheduled, the 4290fca6ea1SDimitry Andric // emit cycles can be estimated in order here. 4300fca6ea1SDimitry Andric int CurCycle = 0; 4310fca6ea1SDimitry Andric auto Range = getScheduleRange(Offset, SchedInstrNum); 4320fca6ea1SDimitry Andric for (auto &MI : Range) { 4330fca6ea1SDimitry Andric auto *SU = DAG.getSUnit(&MI); 4340fca6ea1SDimitry Andric int ExpectCycle = CurCycle; 4350fca6ea1SDimitry Andric // The predecessors of current MI determine its earliest issue cycle. 4360fca6ea1SDimitry Andric for (auto &Pred : SU->Preds) { 4370fca6ea1SDimitry Andric if (Pred.isWeak()) 4380fca6ea1SDimitry Andric continue; 4390fca6ea1SDimitry Andric auto *PredMI = Pred.getSUnit()->getInstr(); 4400fca6ea1SDimitry Andric int PredCycle = getOriCycle(PredMI); 4410fca6ea1SDimitry Andric ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency()); 4420fca6ea1SDimitry Andric } 443*6c4b055cSDimitry Andric // Zero cost instructions do not need to check resource. 444*6c4b055cSDimitry Andric if (!TII->isZeroCost(MI.getOpcode())) { 4450fca6ea1SDimitry Andric // ResourceManager can be used to detect resource conflicts between the 4460fca6ea1SDimitry Andric // current MI and the previously inserted MIs. 4470fca6ea1SDimitry Andric while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) { 4480fca6ea1SDimitry Andric ++CurCycle; 4490fca6ea1SDimitry Andric if (CurCycle == (int)WindowIILimit) 4500fca6ea1SDimitry Andric return CurCycle; 4510fca6ea1SDimitry Andric } 4520fca6ea1SDimitry Andric RM.reserveResources(*SU, CurCycle); 453*6c4b055cSDimitry Andric } 4540fca6ea1SDimitry Andric OriToCycle[getOriMI(&MI)] = CurCycle; 4550fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S." 4560fca6ea1SDimitry Andric << getOriStage(getOriMI(&MI), Offset) << "]: " << MI); 4570fca6ea1SDimitry Andric } 4580fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n"); 4590fca6ea1SDimitry Andric return CurCycle; 4600fca6ea1SDimitry Andric } 4610fca6ea1SDimitry Andric 4620fca6ea1SDimitry Andric // By utilizing TripleDAG, we can easily establish dependencies between A and B. 4630fca6ea1SDimitry Andric // Based on the MaxCycle and the issue cycle of A and B, we can determine 4640fca6ea1SDimitry Andric // whether it is necessary to add a stall cycle. This is because, without 4650fca6ea1SDimitry Andric // inserting the stall cycle, the latency constraint between A and B cannot be 4660fca6ea1SDimitry Andric // satisfied. The details are as follows: 4670fca6ea1SDimitry Andric // 4680fca6ea1SDimitry Andric // New MBB: 4690fca6ea1SDimitry Andric // ======================================== 4700fca6ea1SDimitry Andric // < Phis > 4710fca6ea1SDimitry Andric // ======================================== (sliding direction) 4720fca6ea1SDimitry Andric // MBB copy 1 | 4730fca6ea1SDimitry Andric // V 4740fca6ea1SDimitry Andric // 4750fca6ea1SDimitry Andric // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window----- 4760fca6ea1SDimitry Andric // | | 4770fca6ea1SDimitry Andric // ===================V==================== | 4780fca6ea1SDimitry Andric // MBB copy 2 < MI B > | 4790fca6ea1SDimitry Andric // | 4800fca6ea1SDimitry Andric // < MI A > V 4810fca6ea1SDimitry Andric // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------ 4820fca6ea1SDimitry Andric // : 4830fca6ea1SDimitry Andric // ===================V==================== 4840fca6ea1SDimitry Andric // MBB copy 3 < MI B'> 4850fca6ea1SDimitry Andric // 4860fca6ea1SDimitry Andric // 4870fca6ea1SDimitry Andric // 4880fca6ea1SDimitry Andric // 4890fca6ea1SDimitry Andric // ======================================== 4900fca6ea1SDimitry Andric // < Terminators > 4910fca6ea1SDimitry Andric // ======================================== 4920fca6ea1SDimitry Andric int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { 4930fca6ea1SDimitry Andric int MaxStallCycle = 0; 494*6c4b055cSDimitry Andric int CurrentII = MaxCycle + 1; 4950fca6ea1SDimitry Andric auto Range = getScheduleRange(Offset, SchedInstrNum); 4960fca6ea1SDimitry Andric for (auto &MI : Range) { 4970fca6ea1SDimitry Andric auto *SU = TripleDAG->getSUnit(&MI); 4980fca6ea1SDimitry Andric int DefCycle = getOriCycle(&MI); 4990fca6ea1SDimitry Andric for (auto &Succ : SU->Succs) { 5000fca6ea1SDimitry Andric if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) 5010fca6ea1SDimitry Andric continue; 502*6c4b055cSDimitry Andric // If the expected cycle does not exceed CurrentII, no check is needed. 503*6c4b055cSDimitry Andric if (DefCycle + (int)Succ.getLatency() <= CurrentII) 5040fca6ea1SDimitry Andric continue; 5050fca6ea1SDimitry Andric // If the cycle of the scheduled MI A is less than that of the scheduled 5060fca6ea1SDimitry Andric // MI B, the scheduling will fail because the lifetime of the 5070fca6ea1SDimitry Andric // corresponding register exceeds II. 5080fca6ea1SDimitry Andric auto *SuccMI = Succ.getSUnit()->getInstr(); 5090fca6ea1SDimitry Andric int UseCycle = getOriCycle(SuccMI); 5100fca6ea1SDimitry Andric if (DefCycle < UseCycle) 5110fca6ea1SDimitry Andric return WindowIILimit; 5120fca6ea1SDimitry Andric // Get the stall cycle introduced by the register between two trips. 513*6c4b055cSDimitry Andric int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; 5140fca6ea1SDimitry Andric MaxStallCycle = std::max(MaxStallCycle, StallCycle); 5150fca6ea1SDimitry Andric } 5160fca6ea1SDimitry Andric } 5170fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n"); 5180fca6ea1SDimitry Andric return MaxStallCycle; 5190fca6ea1SDimitry Andric } 5200fca6ea1SDimitry Andric 5210fca6ea1SDimitry Andric unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) { 5220fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Start analyzing II:\n"); 5230fca6ea1SDimitry Andric int MaxCycle = calculateMaxCycle(DAG, Offset); 5240fca6ea1SDimitry Andric if (MaxCycle == (int)WindowIILimit) 5250fca6ea1SDimitry Andric return MaxCycle; 5260fca6ea1SDimitry Andric int StallCycle = calculateStallCycle(Offset, MaxCycle); 5270fca6ea1SDimitry Andric if (StallCycle == (int)WindowIILimit) 5280fca6ea1SDimitry Andric return StallCycle; 5290fca6ea1SDimitry Andric // The value of II is equal to the maximum execution cycle plus 1. 5300fca6ea1SDimitry Andric return MaxCycle + StallCycle + 1; 5310fca6ea1SDimitry Andric } 5320fca6ea1SDimitry Andric 5330fca6ea1SDimitry Andric void WindowScheduler::schedulePhi(int Offset, unsigned &II) { 5340fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n"); 5350fca6ea1SDimitry Andric for (auto &Phi : MBB->phis()) { 5360fca6ea1SDimitry Andric int LateCycle = INT_MAX; 5370fca6ea1SDimitry Andric auto *SU = TripleDAG->getSUnit(&Phi); 5380fca6ea1SDimitry Andric for (auto &Succ : SU->Succs) { 5390fca6ea1SDimitry Andric // Phi doesn't have any Anti successors. 5400fca6ea1SDimitry Andric if (Succ.getKind() != SDep::Data) 5410fca6ea1SDimitry Andric continue; 5420fca6ea1SDimitry Andric // Phi is scheduled before the successor of stage 0. The issue cycle of 5430fca6ea1SDimitry Andric // phi is the latest cycle in this interval. 5440fca6ea1SDimitry Andric auto *SuccMI = Succ.getSUnit()->getInstr(); 5450fca6ea1SDimitry Andric int Cycle = getOriCycle(SuccMI); 5460fca6ea1SDimitry Andric if (getOriStage(getOriMI(SuccMI), Offset) == 0) 5470fca6ea1SDimitry Andric LateCycle = std::min(LateCycle, Cycle); 5480fca6ea1SDimitry Andric } 5490fca6ea1SDimitry Andric // The anti-dependency of phi need to be handled separately in the same way. 5500fca6ea1SDimitry Andric if (Register AntiReg = getAntiRegister(&Phi)) { 5510fca6ea1SDimitry Andric auto *AntiMI = MRI->getVRegDef(AntiReg); 5520fca6ea1SDimitry Andric // AntiReg may be defined outside the kernel MBB. 5530fca6ea1SDimitry Andric if (AntiMI->getParent() == MBB) { 5540fca6ea1SDimitry Andric auto AntiCycle = getOriCycle(AntiMI); 5550fca6ea1SDimitry Andric if (getOriStage(getOriMI(AntiMI), Offset) == 0) 5560fca6ea1SDimitry Andric LateCycle = std::min(LateCycle, AntiCycle); 5570fca6ea1SDimitry Andric } 5580fca6ea1SDimitry Andric } 5590fca6ea1SDimitry Andric // If there is no limit to the late cycle, a default value is given. 5600fca6ea1SDimitry Andric if (LateCycle == INT_MAX) 5610fca6ea1SDimitry Andric LateCycle = (int)(II - 1); 5620fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi); 5630fca6ea1SDimitry Andric // The issue cycle of phi is set to the latest cycle in the interval. 5640fca6ea1SDimitry Andric auto *OriPhi = getOriMI(&Phi); 5650fca6ea1SDimitry Andric OriToCycle[OriPhi] = LateCycle; 5660fca6ea1SDimitry Andric } 5670fca6ea1SDimitry Andric } 5680fca6ea1SDimitry Andric 5690fca6ea1SDimitry Andric DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset, 5700fca6ea1SDimitry Andric unsigned II) { 5710fca6ea1SDimitry Andric // At each issue cycle, phi is placed before MIs in stage 0. So the simplest 5720fca6ea1SDimitry Andric // way is to put phi at the beginning of the current cycle. 5730fca6ea1SDimitry Andric DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs; 5740fca6ea1SDimitry Andric auto Range = getScheduleRange(Offset, SchedInstrNum); 5750fca6ea1SDimitry Andric for (auto &Phi : MBB->phis()) 5760fca6ea1SDimitry Andric CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi)); 5770fca6ea1SDimitry Andric for (auto &MI : Range) 5780fca6ea1SDimitry Andric CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI)); 5790fca6ea1SDimitry Andric // Each MI is assigned a separate ordered Id, which is used as a sort marker 5800fca6ea1SDimitry Andric // in the following expand process. 5810fca6ea1SDimitry Andric DenseMap<MachineInstr *, int> IssueOrder; 5820fca6ea1SDimitry Andric int Id = 0; 5830fca6ea1SDimitry Andric for (int Cycle = 0; Cycle < (int)II; ++Cycle) { 5840fca6ea1SDimitry Andric if (!CycleToMIs.count(Cycle)) 5850fca6ea1SDimitry Andric continue; 5860fca6ea1SDimitry Andric for (auto *MI : CycleToMIs[Cycle]) 5870fca6ea1SDimitry Andric IssueOrder[MI] = Id++; 5880fca6ea1SDimitry Andric } 5890fca6ea1SDimitry Andric return IssueOrder; 5900fca6ea1SDimitry Andric } 5910fca6ea1SDimitry Andric 5920fca6ea1SDimitry Andric void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) { 5930fca6ea1SDimitry Andric // At the first update, Offset is equal to SchedPhiNum. At this time, only 5940fca6ea1SDimitry Andric // BestII, BestOffset, and BaseII need to be updated. 5950fca6ea1SDimitry Andric if (Offset == SchedPhiNum) { 5960fca6ea1SDimitry Andric BestII = II; 5970fca6ea1SDimitry Andric BestOffset = SchedPhiNum; 5980fca6ea1SDimitry Andric BaseII = II; 5990fca6ea1SDimitry Andric return; 6000fca6ea1SDimitry Andric } 6010fca6ea1SDimitry Andric // The update will only continue if the II is smaller than BestII and the II 6020fca6ea1SDimitry Andric // is sufficiently small. 6030fca6ea1SDimitry Andric if ((II >= BestII) || (II + WindowDiffLimit > BaseII)) 6040fca6ea1SDimitry Andric return; 6050fca6ea1SDimitry Andric BestII = II; 6060fca6ea1SDimitry Andric BestOffset = Offset; 6070fca6ea1SDimitry Andric // Record the result of the current list scheduling, noting that each MI is 6080fca6ea1SDimitry Andric // stored unordered in SchedResult. 6090fca6ea1SDimitry Andric SchedResult.clear(); 6100fca6ea1SDimitry Andric auto IssueOrder = getIssueOrder(Offset, II); 6110fca6ea1SDimitry Andric for (auto &Pair : OriToCycle) { 6120fca6ea1SDimitry Andric assert(IssueOrder.count(Pair.first) && "Cannot find original MI!"); 6130fca6ea1SDimitry Andric SchedResult.push_back(std::make_tuple(Pair.first, Pair.second, 6140fca6ea1SDimitry Andric getOriStage(Pair.first, Offset), 6150fca6ea1SDimitry Andric IssueOrder[Pair.first])); 6160fca6ea1SDimitry Andric } 6170fca6ea1SDimitry Andric } 6180fca6ea1SDimitry Andric 6190fca6ea1SDimitry Andric void WindowScheduler::expand() { 6200fca6ea1SDimitry Andric // The MIs in the SchedResult are sorted by the issue order ID. 6210fca6ea1SDimitry Andric llvm::stable_sort(SchedResult, 6220fca6ea1SDimitry Andric [](const std::tuple<MachineInstr *, int, int, int> &A, 6230fca6ea1SDimitry Andric const std::tuple<MachineInstr *, int, int, int> &B) { 6240fca6ea1SDimitry Andric return std::get<3>(A) < std::get<3>(B); 6250fca6ea1SDimitry Andric }); 6260fca6ea1SDimitry Andric // Use the scheduling infrastructure for expansion, noting that InstrChanges 6270fca6ea1SDimitry Andric // is not supported here. 6280fca6ea1SDimitry Andric DenseMap<MachineInstr *, int> Cycles, Stages; 6290fca6ea1SDimitry Andric std::vector<MachineInstr *> OrderedInsts; 6300fca6ea1SDimitry Andric for (auto &Info : SchedResult) { 6310fca6ea1SDimitry Andric auto *MI = std::get<0>(Info); 6320fca6ea1SDimitry Andric OrderedInsts.push_back(MI); 6330fca6ea1SDimitry Andric Cycles[MI] = std::get<1>(Info); 6340fca6ea1SDimitry Andric Stages[MI] = std::get<2>(Info); 6350fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI] 6360fca6ea1SDimitry Andric << "]: " << *MI); 6370fca6ea1SDimitry Andric } 6380fca6ea1SDimitry Andric ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles), 6390fca6ea1SDimitry Andric std::move(Stages)); 6400fca6ea1SDimitry Andric ModuloScheduleExpander MSE(*MF, MS, *Context->LIS, 6410fca6ea1SDimitry Andric ModuloScheduleExpander::InstrChangesTy()); 6420fca6ea1SDimitry Andric MSE.expand(); 6430fca6ea1SDimitry Andric MSE.cleanup(); 6440fca6ea1SDimitry Andric } 6450fca6ea1SDimitry Andric 6460fca6ea1SDimitry Andric void WindowScheduler::updateLiveIntervals() { 6470fca6ea1SDimitry Andric SmallVector<Register, 128> UsedRegs; 6480fca6ea1SDimitry Andric for (MachineInstr &MI : *MBB) 6490fca6ea1SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 6500fca6ea1SDimitry Andric if (!MO.isReg() || MO.getReg() == 0) 6510fca6ea1SDimitry Andric continue; 6520fca6ea1SDimitry Andric Register Reg = MO.getReg(); 6530fca6ea1SDimitry Andric if (!is_contained(UsedRegs, Reg)) 6540fca6ea1SDimitry Andric UsedRegs.push_back(Reg); 6550fca6ea1SDimitry Andric } 6560fca6ea1SDimitry Andric Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs); 6570fca6ea1SDimitry Andric } 6580fca6ea1SDimitry Andric 6590fca6ea1SDimitry Andric iterator_range<MachineBasicBlock::iterator> 6600fca6ea1SDimitry Andric WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) { 6610fca6ea1SDimitry Andric auto RegionBegin = MBB->begin(); 6620fca6ea1SDimitry Andric std::advance(RegionBegin, Offset); 6630fca6ea1SDimitry Andric auto RegionEnd = RegionBegin; 6640fca6ea1SDimitry Andric std::advance(RegionEnd, Num); 6650fca6ea1SDimitry Andric return make_range(RegionBegin, RegionEnd); 6660fca6ea1SDimitry Andric } 6670fca6ea1SDimitry Andric 6680fca6ea1SDimitry Andric int WindowScheduler::getOriCycle(MachineInstr *NewMI) { 6690fca6ea1SDimitry Andric assert(TriToOri.count(NewMI) && "Cannot find original MI!"); 6700fca6ea1SDimitry Andric auto *OriMI = TriToOri[NewMI]; 6710fca6ea1SDimitry Andric assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!"); 6720fca6ea1SDimitry Andric return OriToCycle[OriMI]; 6730fca6ea1SDimitry Andric } 6740fca6ea1SDimitry Andric 6750fca6ea1SDimitry Andric MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) { 6760fca6ea1SDimitry Andric assert(TriToOri.count(NewMI) && "Cannot find original MI!"); 6770fca6ea1SDimitry Andric return TriToOri[NewMI]; 6780fca6ea1SDimitry Andric } 6790fca6ea1SDimitry Andric 6800fca6ea1SDimitry Andric unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) { 6810fca6ea1SDimitry Andric assert(llvm::find(OriMIs, OriMI) != OriMIs.end() && 6820fca6ea1SDimitry Andric "Cannot find OriMI in OriMIs!"); 6830fca6ea1SDimitry Andric // If there is no instruction fold, all MI stages are 0. 6840fca6ea1SDimitry Andric if (Offset == SchedPhiNum) 6850fca6ea1SDimitry Andric return 0; 6860fca6ea1SDimitry Andric // For those MIs with an ID less than the Offset, their stages are set to 0, 6870fca6ea1SDimitry Andric // while the rest are set to 1. 6880fca6ea1SDimitry Andric unsigned Id = 0; 6890fca6ea1SDimitry Andric for (auto *MI : OriMIs) { 6900fca6ea1SDimitry Andric if (MI->isMetaInstruction()) 6910fca6ea1SDimitry Andric continue; 6920fca6ea1SDimitry Andric if (MI == OriMI) 6930fca6ea1SDimitry Andric break; 6940fca6ea1SDimitry Andric ++Id; 6950fca6ea1SDimitry Andric } 6960fca6ea1SDimitry Andric return Id >= (size_t)Offset ? 1 : 0; 6970fca6ea1SDimitry Andric } 6980fca6ea1SDimitry Andric 6990fca6ea1SDimitry Andric Register WindowScheduler::getAntiRegister(MachineInstr *Phi) { 7000fca6ea1SDimitry Andric assert(Phi->isPHI() && "Expecting PHI!"); 7010fca6ea1SDimitry Andric Register AntiReg; 7020fca6ea1SDimitry Andric for (auto MO : Phi->uses()) { 7030fca6ea1SDimitry Andric if (MO.isReg()) 7040fca6ea1SDimitry Andric AntiReg = MO.getReg(); 7050fca6ea1SDimitry Andric else if (MO.isMBB() && MO.getMBB() == MBB) 7060fca6ea1SDimitry Andric return AntiReg; 7070fca6ea1SDimitry Andric } 7080fca6ea1SDimitry Andric return 0; 7090fca6ea1SDimitry Andric } 710