10b57cec5SDimitry Andric //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This contains a MachineSchedStrategy implementation for maximizing wave 110b57cec5SDimitry Andric /// occupancy on GCN hardware. 12972a253aSDimitry Andric /// 13972a253aSDimitry Andric /// This pass will apply multiple scheduling stages to the same function. 14972a253aSDimitry Andric /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual 15972a253aSDimitry Andric /// entry point for the scheduling of those regions is 16972a253aSDimitry Andric /// GCNScheduleDAGMILive::runSchedStages. 17972a253aSDimitry Andric 18972a253aSDimitry Andric /// Generally, the reason for having multiple scheduling stages is to account 19972a253aSDimitry Andric /// for the kernel-wide effect of register usage on occupancy. Usually, only a 20972a253aSDimitry Andric /// few scheduling regions will have register pressure high enough to limit 21972a253aSDimitry Andric /// occupancy for the kernel, so constraints can be relaxed to improve ILP in 22972a253aSDimitry Andric /// other regions. 23972a253aSDimitry Andric /// 240b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric #include "GCNSchedStrategy.h" 27bdd1243dSDimitry Andric #include "AMDGPUIGroupLP.h" 280b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 2981ad6265SDimitry Andric #include "llvm/CodeGen/RegisterClassInfo.h" 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric using namespace llvm; 340b57cec5SDimitry Andric 355f757f3fSDimitry Andric static cl::opt<bool> DisableUnclusterHighRP( 365f757f3fSDimitry Andric "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, 375f757f3fSDimitry Andric cl::desc("Disable unclustered high register pressure " 38bdd1243dSDimitry Andric "reduction scheduling stage."), 39bdd1243dSDimitry Andric cl::init(false)); 405f757f3fSDimitry Andric 415f757f3fSDimitry Andric static cl::opt<bool> DisableClusteredLowOccupancy( 425f757f3fSDimitry Andric "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, 435f757f3fSDimitry Andric cl::desc("Disable clustered low occupancy " 445f757f3fSDimitry Andric "rescheduling for ILP scheduling stage."), 455f757f3fSDimitry Andric cl::init(false)); 465f757f3fSDimitry Andric 47bdd1243dSDimitry Andric static cl::opt<unsigned> ScheduleMetricBias( 48bdd1243dSDimitry Andric "amdgpu-schedule-metric-bias", cl::Hidden, 49bdd1243dSDimitry Andric cl::desc( 50bdd1243dSDimitry Andric "Sets the bias which adds weight to occupancy vs latency. Set it to " 51bdd1243dSDimitry Andric "100 to chase the occupancy only."), 52bdd1243dSDimitry Andric cl::init(10)); 530b57cec5SDimitry Andric 5406c3fb27SDimitry Andric static cl::opt<bool> 5506c3fb27SDimitry Andric RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, 5606c3fb27SDimitry Andric cl::desc("Relax occupancy targets for kernels which are memory " 5706c3fb27SDimitry Andric "bound (amdgpu-membound-threshold), or " 5806c3fb27SDimitry Andric "Wave Limited (amdgpu-limit-wave-threshold)."), 5906c3fb27SDimitry Andric cl::init(false)); 6006c3fb27SDimitry Andric 61bdd1243dSDimitry Andric const unsigned ScheduleMetrics::ScaleFactor = 100; 62bdd1243dSDimitry Andric 63bdd1243dSDimitry Andric GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) 64bdd1243dSDimitry Andric : GenericScheduler(C), TargetOccupancy(0), MF(nullptr), 65bdd1243dSDimitry Andric HasHighPressure(false) {} 66bdd1243dSDimitry Andric 67bdd1243dSDimitry Andric void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { 680b57cec5SDimitry Andric GenericScheduler::initialize(DAG); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric MF = &DAG->MF; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); 730b57cec5SDimitry Andric 74349cc55cSDimitry Andric SGPRExcessLimit = 75349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass); 76349cc55cSDimitry Andric VGPRExcessLimit = 77349cc55cSDimitry Andric Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass); 780b57cec5SDimitry Andric 79349cc55cSDimitry Andric SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>(); 80349cc55cSDimitry Andric // Set the initial TargetOccupnacy to the maximum occupancy that we can 81349cc55cSDimitry Andric // achieve for this function. This effectively sets a lower bound on the 82349cc55cSDimitry Andric // 'Critical' register limits in the scheduler. 8306c3fb27SDimitry Andric // Allow for lower occupancy targets if kernel is wave limited or memory 8406c3fb27SDimitry Andric // bound, and using the relaxed occupancy feature. 8506c3fb27SDimitry Andric TargetOccupancy = 8606c3fb27SDimitry Andric RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy(); 87349cc55cSDimitry Andric SGPRCriticalLimit = 88349cc55cSDimitry Andric std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit); 89bdd1243dSDimitry Andric 90bdd1243dSDimitry Andric if (!KnownExcessRP) { 91349cc55cSDimitry Andric VGPRCriticalLimit = 92349cc55cSDimitry Andric std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit); 93bdd1243dSDimitry Andric } else { 94bdd1243dSDimitry Andric // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except 95bdd1243dSDimitry Andric // returns a reasonably small number for targets with lots of VGPRs, such 96bdd1243dSDimitry Andric // as GFX10 and GFX11. 97bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative " 98bdd1243dSDimitry Andric "VGPRCriticalLimit calculation method.\n"); 99349cc55cSDimitry Andric 100bdd1243dSDimitry Andric unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST); 101bdd1243dSDimitry Andric unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST); 102bdd1243dSDimitry Andric unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule); 103bdd1243dSDimitry Andric VGPRBudget = std::max(VGPRBudget, Granule); 104bdd1243dSDimitry Andric VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 107bdd1243dSDimitry Andric // Subtract error margin and bias from register limits and avoid overflow. 108bdd1243dSDimitry Andric SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit); 109bdd1243dSDimitry Andric VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit); 110bdd1243dSDimitry Andric SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit); 111bdd1243dSDimitry Andric VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit); 112bdd1243dSDimitry Andric 113bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit 114bdd1243dSDimitry Andric << ", VGPRExcessLimit = " << VGPRExcessLimit 115bdd1243dSDimitry Andric << ", SGPRCriticalLimit = " << SGPRCriticalLimit 116bdd1243dSDimitry Andric << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n"); 117bdd1243dSDimitry Andric } 118bdd1243dSDimitry Andric 119*0fca6ea1SDimitry Andric /// Checks whether \p SU can use the cached DAG pressure diffs to compute the 120*0fca6ea1SDimitry Andric /// current register pressure. 121*0fca6ea1SDimitry Andric /// 122*0fca6ea1SDimitry Andric /// This works for the common case, but it has a few exceptions that have been 123*0fca6ea1SDimitry Andric /// observed through trial and error: 124*0fca6ea1SDimitry Andric /// - Explicit physical register operands 125*0fca6ea1SDimitry Andric /// - Subregister definitions 126*0fca6ea1SDimitry Andric /// 127*0fca6ea1SDimitry Andric /// In both of those cases, PressureDiff doesn't represent the actual pressure, 128*0fca6ea1SDimitry Andric /// and querying LiveIntervals through the RegPressureTracker is needed to get 129*0fca6ea1SDimitry Andric /// an accurate value. 130*0fca6ea1SDimitry Andric /// 131*0fca6ea1SDimitry Andric /// We should eventually only use PressureDiff for maximum performance, but this 132*0fca6ea1SDimitry Andric /// already allows 80% of SUs to take the fast path without changing scheduling 133*0fca6ea1SDimitry Andric /// at all. Further changes would either change scheduling, or require a lot 134*0fca6ea1SDimitry Andric /// more logic to recover an accurate pressure estimate from the PressureDiffs. 135*0fca6ea1SDimitry Andric static bool canUsePressureDiffs(const SUnit &SU) { 136*0fca6ea1SDimitry Andric if (!SU.isInstr()) 137*0fca6ea1SDimitry Andric return false; 138*0fca6ea1SDimitry Andric 139*0fca6ea1SDimitry Andric // Cannot use pressure diffs for subregister defs or with physregs, it's 140*0fca6ea1SDimitry Andric // imprecise in both cases. 141*0fca6ea1SDimitry Andric for (const auto &Op : SU.getInstr()->operands()) { 142*0fca6ea1SDimitry Andric if (!Op.isReg() || Op.isImplicit()) 143*0fca6ea1SDimitry Andric continue; 144*0fca6ea1SDimitry Andric if (Op.getReg().isPhysical() || 145*0fca6ea1SDimitry Andric (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister)) 146*0fca6ea1SDimitry Andric return false; 147*0fca6ea1SDimitry Andric } 148*0fca6ea1SDimitry Andric return true; 149*0fca6ea1SDimitry Andric } 150*0fca6ea1SDimitry Andric 151*0fca6ea1SDimitry Andric static void getRegisterPressures(bool AtTop, 152*0fca6ea1SDimitry Andric const RegPressureTracker &RPTracker, SUnit *SU, 153*0fca6ea1SDimitry Andric std::vector<unsigned> &Pressure, 154*0fca6ea1SDimitry Andric std::vector<unsigned> &MaxPressure) { 155*0fca6ea1SDimitry Andric // getDownwardPressure() and getUpwardPressure() make temporary changes to 156*0fca6ea1SDimitry Andric // the tracker, so we need to pass those function a non-const copy. 157*0fca6ea1SDimitry Andric RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 158*0fca6ea1SDimitry Andric if (AtTop) 159*0fca6ea1SDimitry Andric TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); 160*0fca6ea1SDimitry Andric else 161*0fca6ea1SDimitry Andric TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); 162*0fca6ea1SDimitry Andric } 163*0fca6ea1SDimitry Andric 164bdd1243dSDimitry Andric void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, 165bdd1243dSDimitry Andric bool AtTop, 166bdd1243dSDimitry Andric const RegPressureTracker &RPTracker, 1670b57cec5SDimitry Andric const SIRegisterInfo *SRI, 1680b57cec5SDimitry Andric unsigned SGPRPressure, 169*0fca6ea1SDimitry Andric unsigned VGPRPressure, bool IsBottomUp) { 1700b57cec5SDimitry Andric Cand.SU = SU; 1710b57cec5SDimitry Andric Cand.AtTop = AtTop; 1720b57cec5SDimitry Andric 173bdd1243dSDimitry Andric if (!DAG->isTrackingPressure()) 174bdd1243dSDimitry Andric return; 175bdd1243dSDimitry Andric 1768bcb0991SDimitry Andric Pressure.clear(); 1778bcb0991SDimitry Andric MaxPressure.clear(); 1780b57cec5SDimitry Andric 179*0fca6ea1SDimitry Andric // We try to use the cached PressureDiffs in the ScheduleDAG whenever 180*0fca6ea1SDimitry Andric // possible over querying the RegPressureTracker. 181*0fca6ea1SDimitry Andric // 182*0fca6ea1SDimitry Andric // RegPressureTracker will make a lot of LIS queries which are very 183*0fca6ea1SDimitry Andric // expensive, it is considered a slow function in this context. 184*0fca6ea1SDimitry Andric // 185*0fca6ea1SDimitry Andric // PressureDiffs are precomputed and cached, and getPressureDiff is just a 186*0fca6ea1SDimitry Andric // trivial lookup into an array. It is pretty much free. 187*0fca6ea1SDimitry Andric // 188*0fca6ea1SDimitry Andric // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of 189*0fca6ea1SDimitry Andric // PressureDiffs. 190*0fca6ea1SDimitry Andric if (AtTop || !canUsePressureDiffs(*SU)) { 191*0fca6ea1SDimitry Andric getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure); 192*0fca6ea1SDimitry Andric } else { 193*0fca6ea1SDimitry Andric // Reserve 4 slots. 194*0fca6ea1SDimitry Andric Pressure.resize(4, 0); 195*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure; 196*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure; 197*0fca6ea1SDimitry Andric 198*0fca6ea1SDimitry Andric for (const auto &Diff : DAG->getPressureDiff(SU)) { 199*0fca6ea1SDimitry Andric if (!Diff.isValid()) 200*0fca6ea1SDimitry Andric continue; 201*0fca6ea1SDimitry Andric // PressureDiffs is always bottom-up so if we're working top-down we need 202*0fca6ea1SDimitry Andric // to invert its sign. 203*0fca6ea1SDimitry Andric Pressure[Diff.getPSet()] += 204*0fca6ea1SDimitry Andric (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc()); 205*0fca6ea1SDimitry Andric } 206*0fca6ea1SDimitry Andric 207*0fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 208*0fca6ea1SDimitry Andric std::vector<unsigned> CheckPressure, CheckMaxPressure; 209*0fca6ea1SDimitry Andric getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure); 210*0fca6ea1SDimitry Andric if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] != 211*0fca6ea1SDimitry Andric CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] || 212*0fca6ea1SDimitry Andric Pressure[AMDGPU::RegisterPressureSets::VGPR_32] != 213*0fca6ea1SDimitry Andric CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) { 214*0fca6ea1SDimitry Andric errs() << "Register Pressure is inaccurate when calculated through " 215*0fca6ea1SDimitry Andric "PressureDiff\n" 216*0fca6ea1SDimitry Andric << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32] 217*0fca6ea1SDimitry Andric << ", expected " 218*0fca6ea1SDimitry Andric << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n" 219*0fca6ea1SDimitry Andric << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32] 220*0fca6ea1SDimitry Andric << ", expected " 221*0fca6ea1SDimitry Andric << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n"; 222*0fca6ea1SDimitry Andric report_fatal_error("inaccurate register pressure calculation"); 223*0fca6ea1SDimitry Andric } 224*0fca6ea1SDimitry Andric #endif 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2275ffd83dbSDimitry Andric unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 2285ffd83dbSDimitry Andric unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric // If two instructions increase the pressure of different register sets 2310b57cec5SDimitry Andric // by the same amount, the generic scheduler will prefer to schedule the 2320b57cec5SDimitry Andric // instruction that increases the set with the least amount of registers, 2330b57cec5SDimitry Andric // which in our case would be SGPRs. This is rarely what we want, so 2340b57cec5SDimitry Andric // when we report excess/critical register pressure, we do it either 2350b57cec5SDimitry Andric // only for VGPRs or only for SGPRs. 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs. 2380b57cec5SDimitry Andric const unsigned MaxVGPRPressureInc = 16; 2390b57cec5SDimitry Andric bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; 2400b57cec5SDimitry Andric bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric // FIXME: We have to enter REG-EXCESS before we reach the actual threshold 2430b57cec5SDimitry Andric // to increase the likelihood we don't go over the limits. We should improve 2440b57cec5SDimitry Andric // the analysis to look through dependencies to find the path with the least 2450b57cec5SDimitry Andric // register pressure. 2460b57cec5SDimitry Andric 2478bcb0991SDimitry Andric // We only need to update the RPDelta for instructions that increase register 2488bcb0991SDimitry Andric // pressure. Instructions that decrease or keep reg pressure the same will be 2498bcb0991SDimitry Andric // marked as RegExcess in tryCandidate() when they are compared with 2508bcb0991SDimitry Andric // instructions that increase the register pressure. 2510b57cec5SDimitry Andric if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) { 252bdd1243dSDimitry Andric HasHighPressure = true; 2535ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 2540b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) { 258bdd1243dSDimitry Andric HasHighPressure = true; 2595ffd83dbSDimitry Andric Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 2600b57cec5SDimitry Andric Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit); 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric // Register pressure is considered 'CRITICAL' if it is approaching a value 2640b57cec5SDimitry Andric // that would reduce the wave occupancy for the execution unit. When 265349cc55cSDimitry Andric // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both 2660b57cec5SDimitry Andric // has the same cost, so we don't need to prefer one over the other. 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit; 2690b57cec5SDimitry Andric int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit; 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric if (SGPRDelta >= 0 || VGPRDelta >= 0) { 272bdd1243dSDimitry Andric HasHighPressure = true; 2730b57cec5SDimitry Andric if (SGPRDelta > VGPRDelta) { 2745ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 2755ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::SReg_32); 2760b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta); 2770b57cec5SDimitry Andric } else { 2785ffd83dbSDimitry Andric Cand.RPDelta.CriticalMax = 2795ffd83dbSDimitry Andric PressureChange(AMDGPU::RegisterPressureSets::VGPR_32); 2800b57cec5SDimitry Andric Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric // This function is mostly cut and pasted from 2860b57cec5SDimitry Andric // GenericScheduler::pickNodeFromQueue() 287bdd1243dSDimitry Andric void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, 2880b57cec5SDimitry Andric const CandPolicy &ZonePolicy, 2890b57cec5SDimitry Andric const RegPressureTracker &RPTracker, 290*0fca6ea1SDimitry Andric SchedCandidate &Cand, 291*0fca6ea1SDimitry Andric bool IsBottomUp) { 2920b57cec5SDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI); 2930b57cec5SDimitry Andric ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos(); 294bdd1243dSDimitry Andric unsigned SGPRPressure = 0; 295bdd1243dSDimitry Andric unsigned VGPRPressure = 0; 296bdd1243dSDimitry Andric if (DAG->isTrackingPressure()) { 297bdd1243dSDimitry Andric SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; 298bdd1243dSDimitry Andric VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 299bdd1243dSDimitry Andric } 3000b57cec5SDimitry Andric ReadyQueue &Q = Zone.Available; 3010b57cec5SDimitry Andric for (SUnit *SU : Q) { 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric SchedCandidate TryCand(ZonePolicy); 304*0fca6ea1SDimitry Andric initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, 305*0fca6ea1SDimitry Andric VGPRPressure, IsBottomUp); 3060b57cec5SDimitry Andric // Pass SchedBoundary only when comparing nodes from the same boundary. 3070b57cec5SDimitry Andric SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 308bdd1243dSDimitry Andric tryCandidate(Cand, TryCand, ZoneArg); 3090b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 3100b57cec5SDimitry Andric // Initialize resource delta if needed in case future heuristics query it. 3110b57cec5SDimitry Andric if (TryCand.ResDelta == SchedResourceDelta()) 3120b57cec5SDimitry Andric TryCand.initResourceDelta(Zone.DAG, SchedModel); 3130b57cec5SDimitry Andric Cand.setBest(TryCand); 3148bcb0991SDimitry Andric LLVM_DEBUG(traceCandidate(Cand)); 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric // This function is mostly cut and pasted from 3200b57cec5SDimitry Andric // GenericScheduler::pickNodeBidirectional() 321bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { 3220b57cec5SDimitry Andric // Schedule as far as possible in the direction of no choice. This is most 3230b57cec5SDimitry Andric // efficient, but also provides the best heuristics for CriticalPSets. 3240b57cec5SDimitry Andric if (SUnit *SU = Bot.pickOnlyChoice()) { 3250b57cec5SDimitry Andric IsTopNode = false; 3260b57cec5SDimitry Andric return SU; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric if (SUnit *SU = Top.pickOnlyChoice()) { 3290b57cec5SDimitry Andric IsTopNode = true; 3300b57cec5SDimitry Andric return SU; 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric // Set the bottom-up policy based on the state of the current bottom zone and 3330b57cec5SDimitry Andric // the instructions outside the zone, including the top zone. 3340b57cec5SDimitry Andric CandPolicy BotPolicy; 3350b57cec5SDimitry Andric setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 3360b57cec5SDimitry Andric // Set the top-down policy based on the state of the current top zone and 3370b57cec5SDimitry Andric // the instructions outside the zone, including the bottom zone. 3380b57cec5SDimitry Andric CandPolicy TopPolicy; 3390b57cec5SDimitry Andric setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric // See if BotCand is still valid (because we previously scheduled from Top). 3420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 3430b57cec5SDimitry Andric if (!BotCand.isValid() || BotCand.SU->isScheduled || 3440b57cec5SDimitry Andric BotCand.Policy != BotPolicy) { 3450b57cec5SDimitry Andric BotCand.reset(CandPolicy()); 346*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand, 347*0fca6ea1SDimitry Andric /*IsBottomUp=*/true); 3480b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 3490b57cec5SDimitry Andric } else { 3500b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(BotCand)); 3518bcb0991SDimitry Andric #ifndef NDEBUG 3528bcb0991SDimitry Andric if (VerifyScheduling) { 3538bcb0991SDimitry Andric SchedCandidate TCand; 3548bcb0991SDimitry Andric TCand.reset(CandPolicy()); 355*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand, 356*0fca6ea1SDimitry Andric /*IsBottomUp=*/true); 3578bcb0991SDimitry Andric assert(TCand.SU == BotCand.SU && 3588bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 3598bcb0991SDimitry Andric } 3608bcb0991SDimitry Andric #endif 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric // Check if the top Q has a better candidate. 3640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 3650b57cec5SDimitry Andric if (!TopCand.isValid() || TopCand.SU->isScheduled || 3660b57cec5SDimitry Andric TopCand.Policy != TopPolicy) { 3670b57cec5SDimitry Andric TopCand.reset(CandPolicy()); 368*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand, 369*0fca6ea1SDimitry Andric /*IsBottomUp=*/false); 3700b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 3710b57cec5SDimitry Andric } else { 3720b57cec5SDimitry Andric LLVM_DEBUG(traceCandidate(TopCand)); 3738bcb0991SDimitry Andric #ifndef NDEBUG 3748bcb0991SDimitry Andric if (VerifyScheduling) { 3758bcb0991SDimitry Andric SchedCandidate TCand; 3768bcb0991SDimitry Andric TCand.reset(CandPolicy()); 377*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand, 378*0fca6ea1SDimitry Andric /*IsBottomUp=*/false); 3798bcb0991SDimitry Andric assert(TCand.SU == TopCand.SU && 3808bcb0991SDimitry Andric "Last pick result should correspond to re-picking right now"); 3818bcb0991SDimitry Andric } 3828bcb0991SDimitry Andric #endif 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric // Pick best from BotCand and TopCand. 3860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand); 3870b57cec5SDimitry Andric dbgs() << "Bot Cand: "; traceCandidate(BotCand);); 3885ffd83dbSDimitry Andric SchedCandidate Cand = BotCand; 3890b57cec5SDimitry Andric TopCand.Reason = NoCand; 390bdd1243dSDimitry Andric tryCandidate(Cand, TopCand, nullptr); 3910b57cec5SDimitry Andric if (TopCand.Reason != NoCand) { 3920b57cec5SDimitry Andric Cand.setBest(TopCand); 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand);); 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric IsTopNode = Cand.AtTop; 3970b57cec5SDimitry Andric return Cand.SU; 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // This function is mostly cut and pasted from 4010b57cec5SDimitry Andric // GenericScheduler::pickNode() 402bdd1243dSDimitry Andric SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { 4030b57cec5SDimitry Andric if (DAG->top() == DAG->bottom()) { 4040b57cec5SDimitry Andric assert(Top.Available.empty() && Top.Pending.empty() && 4050b57cec5SDimitry Andric Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 4060b57cec5SDimitry Andric return nullptr; 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric SUnit *SU; 4090b57cec5SDimitry Andric do { 4100b57cec5SDimitry Andric if (RegionPolicy.OnlyTopDown) { 4110b57cec5SDimitry Andric SU = Top.pickOnlyChoice(); 4120b57cec5SDimitry Andric if (!SU) { 4130b57cec5SDimitry Andric CandPolicy NoPolicy; 4140b57cec5SDimitry Andric TopCand.reset(NoPolicy); 415*0fca6ea1SDimitry Andric pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, 416*0fca6ea1SDimitry Andric /*IsBottomUp=*/false); 4170b57cec5SDimitry Andric assert(TopCand.Reason != NoCand && "failed to find a candidate"); 4180b57cec5SDimitry Andric SU = TopCand.SU; 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric IsTopNode = true; 4210b57cec5SDimitry Andric } else if (RegionPolicy.OnlyBottomUp) { 4220b57cec5SDimitry Andric SU = Bot.pickOnlyChoice(); 4230b57cec5SDimitry Andric if (!SU) { 4240b57cec5SDimitry Andric CandPolicy NoPolicy; 4250b57cec5SDimitry Andric BotCand.reset(NoPolicy); 426*0fca6ea1SDimitry Andric pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand, 427*0fca6ea1SDimitry Andric /*IsBottomUp=*/true); 4280b57cec5SDimitry Andric assert(BotCand.Reason != NoCand && "failed to find a candidate"); 4290b57cec5SDimitry Andric SU = BotCand.SU; 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric IsTopNode = false; 4320b57cec5SDimitry Andric } else { 4330b57cec5SDimitry Andric SU = pickNodeBidirectional(IsTopNode); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric } while (SU->isScheduled); 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric if (SU->isTopReady()) 4380b57cec5SDimitry Andric Top.removeReady(SU); 4390b57cec5SDimitry Andric if (SU->isBottomReady()) 4400b57cec5SDimitry Andric Bot.removeReady(SU); 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 4430b57cec5SDimitry Andric << *SU->getInstr()); 4440b57cec5SDimitry Andric return SU; 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric 447bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getCurrentStage() { 448bdd1243dSDimitry Andric assert(CurrentStage && CurrentStage != SchedStages.end()); 449bdd1243dSDimitry Andric return *CurrentStage; 450bdd1243dSDimitry Andric } 451bdd1243dSDimitry Andric 452bdd1243dSDimitry Andric bool GCNSchedStrategy::advanceStage() { 453bdd1243dSDimitry Andric assert(CurrentStage != SchedStages.end()); 454bdd1243dSDimitry Andric if (!CurrentStage) 455bdd1243dSDimitry Andric CurrentStage = SchedStages.begin(); 456bdd1243dSDimitry Andric else 457bdd1243dSDimitry Andric CurrentStage++; 458bdd1243dSDimitry Andric 459bdd1243dSDimitry Andric return CurrentStage != SchedStages.end(); 460bdd1243dSDimitry Andric } 461bdd1243dSDimitry Andric 462bdd1243dSDimitry Andric bool GCNSchedStrategy::hasNextStage() const { 463bdd1243dSDimitry Andric assert(CurrentStage); 464bdd1243dSDimitry Andric return std::next(CurrentStage) != SchedStages.end(); 465bdd1243dSDimitry Andric } 466bdd1243dSDimitry Andric 467bdd1243dSDimitry Andric GCNSchedStageID GCNSchedStrategy::getNextStage() const { 468bdd1243dSDimitry Andric assert(CurrentStage && std::next(CurrentStage) != SchedStages.end()); 469bdd1243dSDimitry Andric return *std::next(CurrentStage); 470bdd1243dSDimitry Andric } 471bdd1243dSDimitry Andric 472bdd1243dSDimitry Andric GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( 473bdd1243dSDimitry Andric const MachineSchedContext *C) 474bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 475bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); 476bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); 477bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); 478bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::PreRARematerialize); 479bdd1243dSDimitry Andric } 480bdd1243dSDimitry Andric 481bdd1243dSDimitry Andric GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C) 482bdd1243dSDimitry Andric : GCNSchedStrategy(C) { 483bdd1243dSDimitry Andric SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule); 484bdd1243dSDimitry Andric } 485bdd1243dSDimitry Andric 486bdd1243dSDimitry Andric bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand, 487bdd1243dSDimitry Andric SchedCandidate &TryCand, 488bdd1243dSDimitry Andric SchedBoundary *Zone) const { 489bdd1243dSDimitry Andric // Initialize the candidate if needed. 490bdd1243dSDimitry Andric if (!Cand.isValid()) { 491bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 492bdd1243dSDimitry Andric return true; 493bdd1243dSDimitry Andric } 494bdd1243dSDimitry Andric 495bdd1243dSDimitry Andric // Avoid spilling by exceeding the register limit. 496bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 497bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, 498bdd1243dSDimitry Andric RegExcess, TRI, DAG->MF)) 499bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 500bdd1243dSDimitry Andric 501bdd1243dSDimitry Andric // Bias PhysReg Defs and copies to their uses and defined respectively. 502bdd1243dSDimitry Andric if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 503bdd1243dSDimitry Andric biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 504bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 505bdd1243dSDimitry Andric 506bdd1243dSDimitry Andric bool SameBoundary = Zone != nullptr; 507bdd1243dSDimitry Andric if (SameBoundary) { 508bdd1243dSDimitry Andric // Prioritize instructions that read unbuffered resources by stall cycles. 509bdd1243dSDimitry Andric if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 510bdd1243dSDimitry Andric Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 511bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 512bdd1243dSDimitry Andric 513bdd1243dSDimitry Andric // Avoid critical resource consumption and balance the schedule. 514bdd1243dSDimitry Andric TryCand.initResourceDelta(DAG, SchedModel); 515bdd1243dSDimitry Andric if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 516bdd1243dSDimitry Andric TryCand, Cand, ResourceReduce)) 517bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 518bdd1243dSDimitry Andric if (tryGreater(TryCand.ResDelta.DemandedResources, 519bdd1243dSDimitry Andric Cand.ResDelta.DemandedResources, TryCand, Cand, 520bdd1243dSDimitry Andric ResourceDemand)) 521bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 522bdd1243dSDimitry Andric 523bdd1243dSDimitry Andric // Unconditionally try to reduce latency. 524bdd1243dSDimitry Andric if (tryLatency(TryCand, Cand, *Zone)) 525bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 526bdd1243dSDimitry Andric 527bdd1243dSDimitry Andric // Weak edges are for clustering and other constraints. 528bdd1243dSDimitry Andric if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 529bdd1243dSDimitry Andric getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) 530bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 531bdd1243dSDimitry Andric } 532bdd1243dSDimitry Andric 533bdd1243dSDimitry Andric // Keep clustered nodes together to encourage downstream peephole 534bdd1243dSDimitry Andric // optimizations which may reduce resource requirements. 535bdd1243dSDimitry Andric // 536bdd1243dSDimitry Andric // This is a best effort to set things up for a post-RA pass. Optimizations 537bdd1243dSDimitry Andric // like generating loads of multiple registers should ideally be done within 538bdd1243dSDimitry Andric // the scheduler pass by combining the loads during DAG postprocessing. 539bdd1243dSDimitry Andric const SUnit *CandNextClusterSU = 540bdd1243dSDimitry Andric Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 541bdd1243dSDimitry Andric const SUnit *TryCandNextClusterSU = 542bdd1243dSDimitry Andric TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 543bdd1243dSDimitry Andric if (tryGreater(TryCand.SU == TryCandNextClusterSU, 544bdd1243dSDimitry Andric Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) 545bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 546bdd1243dSDimitry Andric 547bdd1243dSDimitry Andric // Avoid increasing the max critical pressure in the scheduled region. 548bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 549bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, 550bdd1243dSDimitry Andric TryCand, Cand, RegCritical, TRI, DAG->MF)) 551bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 552bdd1243dSDimitry Andric 553bdd1243dSDimitry Andric // Avoid increasing the max pressure of the entire region. 554bdd1243dSDimitry Andric if (DAG->isTrackingPressure() && 555bdd1243dSDimitry Andric tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, 556bdd1243dSDimitry Andric Cand, RegMax, TRI, DAG->MF)) 557bdd1243dSDimitry Andric return TryCand.Reason != NoCand; 558bdd1243dSDimitry Andric 559bdd1243dSDimitry Andric if (SameBoundary) { 560bdd1243dSDimitry Andric // Fall through to original instruction order. 561bdd1243dSDimitry Andric if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || 562bdd1243dSDimitry Andric (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 563bdd1243dSDimitry Andric TryCand.Reason = NodeOrder; 564bdd1243dSDimitry Andric return true; 565bdd1243dSDimitry Andric } 566bdd1243dSDimitry Andric } 567bdd1243dSDimitry Andric return false; 568bdd1243dSDimitry Andric } 569bdd1243dSDimitry Andric 570972a253aSDimitry Andric GCNScheduleDAGMILive::GCNScheduleDAGMILive( 571972a253aSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S) 572972a253aSDimitry Andric : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()), 5730b57cec5SDimitry Andric MFI(*MF.getInfo<SIMachineFunctionInfo>()), 574972a253aSDimitry Andric StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) { 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); 57706c3fb27SDimitry Andric if (RelaxedOcc) { 57806c3fb27SDimitry Andric MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy); 57906c3fb27SDimitry Andric if (MinOccupancy != StartingOccupancy) 58006c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy 58106c3fb27SDimitry Andric << ".\n"); 58206c3fb27SDimitry Andric } 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 585bdd1243dSDimitry Andric std::unique_ptr<GCNSchedStage> 586bdd1243dSDimitry Andric GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { 587bdd1243dSDimitry Andric switch (SchedStageID) { 588bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 589bdd1243dSDimitry Andric return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this); 590bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 591bdd1243dSDimitry Andric return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this); 592bdd1243dSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 593bdd1243dSDimitry Andric return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this); 594bdd1243dSDimitry Andric case GCNSchedStageID::PreRARematerialize: 595bdd1243dSDimitry Andric return std::make_unique<PreRARematStage>(SchedStageID, *this); 596bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 597bdd1243dSDimitry Andric return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this); 598bdd1243dSDimitry Andric } 599bdd1243dSDimitry Andric 600bdd1243dSDimitry Andric llvm_unreachable("Unknown SchedStageID."); 601bdd1243dSDimitry Andric } 602bdd1243dSDimitry Andric 6030b57cec5SDimitry Andric void GCNScheduleDAGMILive::schedule() { 604972a253aSDimitry Andric // Collect all scheduling regions. The actual scheduling is performed in 605972a253aSDimitry Andric // GCNScheduleDAGMILive::finalizeSchedule. 606bdd1243dSDimitry Andric Regions.push_back(std::pair(RegionBegin, RegionEnd)); 6070b57cec5SDimitry Andric } 6080b57cec5SDimitry Andric 609972a253aSDimitry Andric GCNRegPressure 610972a253aSDimitry Andric GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const { 6110b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 6120b57cec5SDimitry Andric RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); 6130b57cec5SDimitry Andric return RPTracker.moveMaxPressure(); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 616972a253aSDimitry Andric void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, 617972a253aSDimitry Andric const MachineBasicBlock *MBB) { 6180b57cec5SDimitry Andric GCNDownwardRPTracker RPTracker(*LIS); 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric // If the block has the only successor then live-ins of that successor are 6210b57cec5SDimitry Andric // live-outs of the current block. We can reuse calculated live set if the 6220b57cec5SDimitry Andric // successor will be sent to scheduling past current block. 62306c3fb27SDimitry Andric 62406c3fb27SDimitry Andric // However, due to the bug in LiveInterval analysis it may happen that two 62506c3fb27SDimitry Andric // predecessors of the same successor block have different lane bitmasks for 62606c3fb27SDimitry Andric // a live-out register. Workaround that by sticking to one-to-one relationship 62706c3fb27SDimitry Andric // i.e. one predecessor with one successor block. 6280b57cec5SDimitry Andric const MachineBasicBlock *OnlySucc = nullptr; 62906c3fb27SDimitry Andric if (MBB->succ_size() == 1) { 63006c3fb27SDimitry Andric auto *Candidate = *MBB->succ_begin(); 63106c3fb27SDimitry Andric if (!Candidate->empty() && Candidate->pred_size() == 1) { 6320b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 63306c3fb27SDimitry Andric if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate)) 63406c3fb27SDimitry Andric OnlySucc = Candidate; 63506c3fb27SDimitry Andric } 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric // Scheduler sends regions from the end of the block upwards. 6390b57cec5SDimitry Andric size_t CurRegion = RegionIdx; 6400b57cec5SDimitry Andric for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) 6410b57cec5SDimitry Andric if (Regions[CurRegion].first->getParent() != MBB) 6420b57cec5SDimitry Andric break; 6430b57cec5SDimitry Andric --CurRegion; 6440b57cec5SDimitry Andric 6450b57cec5SDimitry Andric auto I = MBB->begin(); 6460b57cec5SDimitry Andric auto LiveInIt = MBBLiveIns.find(MBB); 64781ad6265SDimitry Andric auto &Rgn = Regions[CurRegion]; 64881ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); 6490b57cec5SDimitry Andric if (LiveInIt != MBBLiveIns.end()) { 6500b57cec5SDimitry Andric auto LiveIn = std::move(LiveInIt->second); 6510b57cec5SDimitry Andric RPTracker.reset(*MBB->begin(), &LiveIn); 6520b57cec5SDimitry Andric MBBLiveIns.erase(LiveInIt); 6530b57cec5SDimitry Andric } else { 6540b57cec5SDimitry Andric I = Rgn.first; 6550b57cec5SDimitry Andric auto LRS = BBLiveInMap.lookup(NonDbgMI); 656fe6060f1SDimitry Andric #ifdef EXPENSIVE_CHECKS 6570b57cec5SDimitry Andric assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); 658fe6060f1SDimitry Andric #endif 6590b57cec5SDimitry Andric RPTracker.reset(*I, &LRS); 6600b57cec5SDimitry Andric } 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric for (;;) { 6630b57cec5SDimitry Andric I = RPTracker.getNext(); 6640b57cec5SDimitry Andric 66581ad6265SDimitry Andric if (Regions[CurRegion].first == I || NonDbgMI == I) { 6660b57cec5SDimitry Andric LiveIns[CurRegion] = RPTracker.getLiveRegs(); 6670b57cec5SDimitry Andric RPTracker.clearMaxPressure(); 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric if (Regions[CurRegion].second == I) { 6710b57cec5SDimitry Andric Pressure[CurRegion] = RPTracker.moveMaxPressure(); 6720b57cec5SDimitry Andric if (CurRegion-- == RegionIdx) 6730b57cec5SDimitry Andric break; 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric RPTracker.advanceToNext(); 6760b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric if (OnlySucc) { 6800b57cec5SDimitry Andric if (I != MBB->end()) { 6810b57cec5SDimitry Andric RPTracker.advanceToNext(); 6820b57cec5SDimitry Andric RPTracker.advance(MBB->end()); 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric RPTracker.advanceBeforeNext(); 6850b57cec5SDimitry Andric MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric } 6880b57cec5SDimitry Andric 6890b57cec5SDimitry Andric DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> 6900b57cec5SDimitry Andric GCNScheduleDAGMILive::getBBLiveInMap() const { 6910b57cec5SDimitry Andric assert(!Regions.empty()); 6920b57cec5SDimitry Andric std::vector<MachineInstr *> BBStarters; 6930b57cec5SDimitry Andric BBStarters.reserve(Regions.size()); 6940b57cec5SDimitry Andric auto I = Regions.rbegin(), E = Regions.rend(); 6950b57cec5SDimitry Andric auto *BB = I->first->getParent(); 6960b57cec5SDimitry Andric do { 6970b57cec5SDimitry Andric auto *MI = &*skipDebugInstructionsForward(I->first, I->second); 6980b57cec5SDimitry Andric BBStarters.push_back(MI); 6990b57cec5SDimitry Andric do { 7000b57cec5SDimitry Andric ++I; 7010b57cec5SDimitry Andric } while (I != E && I->first->getParent() == BB); 7020b57cec5SDimitry Andric } while (I != E); 7030b57cec5SDimitry Andric return getLiveRegMap(BBStarters, false /*After*/, *LIS); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric void GCNScheduleDAGMILive::finalizeSchedule() { 707972a253aSDimitry Andric // Start actual scheduling here. This function is called by the base 708972a253aSDimitry Andric // MachineScheduler after all regions have been recorded by 709972a253aSDimitry Andric // GCNScheduleDAGMILive::schedule(). 7100b57cec5SDimitry Andric LiveIns.resize(Regions.size()); 7110b57cec5SDimitry Andric Pressure.resize(Regions.size()); 7125ffd83dbSDimitry Andric RescheduleRegions.resize(Regions.size()); 713fe6060f1SDimitry Andric RegionsWithHighRP.resize(Regions.size()); 714bdd1243dSDimitry Andric RegionsWithExcessRP.resize(Regions.size()); 71581ad6265SDimitry Andric RegionsWithMinOcc.resize(Regions.size()); 716bdd1243dSDimitry Andric RegionsWithIGLPInstrs.resize(Regions.size()); 7175ffd83dbSDimitry Andric RescheduleRegions.set(); 718fe6060f1SDimitry Andric RegionsWithHighRP.reset(); 719bdd1243dSDimitry Andric RegionsWithExcessRP.reset(); 72081ad6265SDimitry Andric RegionsWithMinOcc.reset(); 721bdd1243dSDimitry Andric RegionsWithIGLPInstrs.reset(); 7220b57cec5SDimitry Andric 723972a253aSDimitry Andric runSchedStages(); 724972a253aSDimitry Andric } 725972a253aSDimitry Andric 726972a253aSDimitry Andric void GCNScheduleDAGMILive::runSchedStages() { 727972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); 728972a253aSDimitry Andric 7290b57cec5SDimitry Andric if (!Regions.empty()) 7300b57cec5SDimitry Andric BBLiveInMap = getBBLiveInMap(); 7310b57cec5SDimitry Andric 732bdd1243dSDimitry Andric GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl); 733bdd1243dSDimitry Andric while (S.advanceStage()) { 734bdd1243dSDimitry Andric auto Stage = createSchedStage(S.getCurrentStage()); 735972a253aSDimitry Andric if (!Stage->initGCNSchedStage()) 7365ffd83dbSDimitry Andric continue; 737972a253aSDimitry Andric 738972a253aSDimitry Andric for (auto Region : Regions) { 739972a253aSDimitry Andric RegionBegin = Region.first; 740972a253aSDimitry Andric RegionEnd = Region.second; 741972a253aSDimitry Andric // Setup for scheduling the region and check whether it should be skipped. 742972a253aSDimitry Andric if (!Stage->initGCNRegion()) { 743972a253aSDimitry Andric Stage->advanceRegion(); 744972a253aSDimitry Andric exitRegion(); 745972a253aSDimitry Andric continue; 7465ffd83dbSDimitry Andric } 7475ffd83dbSDimitry Andric 748972a253aSDimitry Andric ScheduleDAGMILive::schedule(); 749972a253aSDimitry Andric Stage->finalizeGCNRegion(); 750972a253aSDimitry Andric } 751972a253aSDimitry Andric 752972a253aSDimitry Andric Stage->finalizeGCNSchedStage(); 753972a253aSDimitry Andric } 754972a253aSDimitry Andric } 755972a253aSDimitry Andric 756972a253aSDimitry Andric #ifndef NDEBUG 757972a253aSDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { 758972a253aSDimitry Andric switch (StageID) { 759bdd1243dSDimitry Andric case GCNSchedStageID::OccInitialSchedule: 760bdd1243dSDimitry Andric OS << "Max Occupancy Initial Schedule"; 7610b57cec5SDimitry Andric break; 762bdd1243dSDimitry Andric case GCNSchedStageID::UnclusteredHighRPReschedule: 763bdd1243dSDimitry Andric OS << "Unclustered High Register Pressure Reschedule"; 764972a253aSDimitry Andric break; 765972a253aSDimitry Andric case GCNSchedStageID::ClusteredLowOccupancyReschedule: 766972a253aSDimitry Andric OS << "Clustered Low Occupancy Reschedule"; 767972a253aSDimitry Andric break; 768972a253aSDimitry Andric case GCNSchedStageID::PreRARematerialize: 769972a253aSDimitry Andric OS << "Pre-RA Rematerialize"; 770972a253aSDimitry Andric break; 771bdd1243dSDimitry Andric case GCNSchedStageID::ILPInitialSchedule: 772bdd1243dSDimitry Andric OS << "Max ILP Initial Schedule"; 773bdd1243dSDimitry Andric break; 774972a253aSDimitry Andric } 775bdd1243dSDimitry Andric 776972a253aSDimitry Andric return OS; 777972a253aSDimitry Andric } 778972a253aSDimitry Andric #endif 779972a253aSDimitry Andric 780972a253aSDimitry Andric GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) 781bdd1243dSDimitry Andric : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF), 782bdd1243dSDimitry Andric MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {} 783972a253aSDimitry Andric 784972a253aSDimitry Andric bool GCNSchedStage::initGCNSchedStage() { 785972a253aSDimitry Andric if (!DAG.LIS) 786972a253aSDimitry Andric return false; 787972a253aSDimitry Andric 788972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n"); 789972a253aSDimitry Andric return true; 790972a253aSDimitry Andric } 791972a253aSDimitry Andric 792bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNSchedStage() { 793bdd1243dSDimitry Andric if (DisableUnclusterHighRP) 794bdd1243dSDimitry Andric return false; 795bdd1243dSDimitry Andric 796972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 797972a253aSDimitry Andric return false; 798972a253aSDimitry Andric 799bdd1243dSDimitry Andric if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none()) 800972a253aSDimitry Andric return false; 801972a253aSDimitry Andric 802972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 803*0fca6ea1SDimitry Andric DAG.addMutation( 804*0fca6ea1SDimitry Andric createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry)); 805972a253aSDimitry Andric 806bdd1243dSDimitry Andric InitialOccupancy = DAG.MinOccupancy; 807bdd1243dSDimitry Andric // Aggressivly try to reduce register pressure in the unclustered high RP 808bdd1243dSDimitry Andric // stage. Temporarily increase occupancy target in the region. 809bdd1243dSDimitry Andric S.SGPRLimitBias = S.HighRPSGPRBias; 810bdd1243dSDimitry Andric S.VGPRLimitBias = S.HighRPVGPRBias; 811bdd1243dSDimitry Andric if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy) 812bdd1243dSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 813bdd1243dSDimitry Andric 814bdd1243dSDimitry Andric LLVM_DEBUG( 815bdd1243dSDimitry Andric dbgs() 816bdd1243dSDimitry Andric << "Retrying function scheduling without clustering. " 817bdd1243dSDimitry Andric "Aggressivly try to reduce register pressure to achieve occupancy " 818bdd1243dSDimitry Andric << DAG.MinOccupancy << ".\n"); 819bdd1243dSDimitry Andric 820972a253aSDimitry Andric return true; 821972a253aSDimitry Andric } 822972a253aSDimitry Andric 823972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNSchedStage() { 8245f757f3fSDimitry Andric if (DisableClusteredLowOccupancy) 8255f757f3fSDimitry Andric return false; 8265f757f3fSDimitry Andric 827972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 828972a253aSDimitry Andric return false; 829972a253aSDimitry Andric 830972a253aSDimitry Andric // Don't bother trying to improve ILP in lower RP regions if occupancy has not 831972a253aSDimitry Andric // been dropped. All regions will have already been scheduled with the ideal 832972a253aSDimitry Andric // occupancy targets. 833972a253aSDimitry Andric if (DAG.StartingOccupancy <= DAG.MinOccupancy) 834972a253aSDimitry Andric return false; 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric LLVM_DEBUG( 837972a253aSDimitry Andric dbgs() << "Retrying function scheduling with lowest recorded occupancy " 838972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 839972a253aSDimitry Andric return true; 8400b57cec5SDimitry Andric } 84181ad6265SDimitry Andric 842972a253aSDimitry Andric bool PreRARematStage::initGCNSchedStage() { 843972a253aSDimitry Andric if (!GCNSchedStage::initGCNSchedStage()) 844972a253aSDimitry Andric return false; 84581ad6265SDimitry Andric 846972a253aSDimitry Andric if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1) 847972a253aSDimitry Andric return false; 848972a253aSDimitry Andric 84981ad6265SDimitry Andric const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 85081ad6265SDimitry Andric // Check maximum occupancy 85181ad6265SDimitry Andric if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) == 852972a253aSDimitry Andric DAG.MinOccupancy) 853972a253aSDimitry Andric return false; 85481ad6265SDimitry Andric 85581ad6265SDimitry Andric // FIXME: This pass will invalidate cached MBBLiveIns for regions 85681ad6265SDimitry Andric // inbetween the defs and region we sinked the def to. Cached pressure 85781ad6265SDimitry Andric // for regions where a def is sinked from will also be invalidated. Will 85881ad6265SDimitry Andric // need to be fixed if there is another pass after this pass. 859bdd1243dSDimitry Andric assert(!S.hasNextStage()); 86081ad6265SDimitry Andric 86181ad6265SDimitry Andric collectRematerializableInstructions(); 86281ad6265SDimitry Andric if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII)) 863972a253aSDimitry Andric return false; 86481ad6265SDimitry Andric 86581ad6265SDimitry Andric LLVM_DEBUG( 86681ad6265SDimitry Andric dbgs() << "Retrying function scheduling with improved occupancy of " 867972a253aSDimitry Andric << DAG.MinOccupancy << " from rematerializing\n"); 868972a253aSDimitry Andric return true; 8695ffd83dbSDimitry Andric } 8705ffd83dbSDimitry Andric 871972a253aSDimitry Andric void GCNSchedStage::finalizeGCNSchedStage() { 872972a253aSDimitry Andric DAG.finishBlock(); 873972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n"); 874e8d8bef9SDimitry Andric } 8755ffd83dbSDimitry Andric 876bdd1243dSDimitry Andric void UnclusteredHighRPStage::finalizeGCNSchedStage() { 877972a253aSDimitry Andric SavedMutations.swap(DAG.Mutations); 878bdd1243dSDimitry Andric S.SGPRLimitBias = S.VGPRLimitBias = 0; 879bdd1243dSDimitry Andric if (DAG.MinOccupancy > InitialOccupancy) { 880bdd1243dSDimitry Andric for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX) 881bdd1243dSDimitry Andric DAG.RegionsWithMinOcc[IDX] = 882bdd1243dSDimitry Andric DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy; 883bdd1243dSDimitry Andric 884bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << StageID 885bdd1243dSDimitry Andric << " stage successfully increased occupancy to " 886bdd1243dSDimitry Andric << DAG.MinOccupancy << '\n'); 887bdd1243dSDimitry Andric } 8880b57cec5SDimitry Andric 889972a253aSDimitry Andric GCNSchedStage::finalizeGCNSchedStage(); 8900b57cec5SDimitry Andric } 8910b57cec5SDimitry Andric 892972a253aSDimitry Andric bool GCNSchedStage::initGCNRegion() { 893972a253aSDimitry Andric // Check whether this new region is also a new block. 894972a253aSDimitry Andric if (DAG.RegionBegin->getParent() != CurrentMBB) 895972a253aSDimitry Andric setupNewBlock(); 896972a253aSDimitry Andric 897972a253aSDimitry Andric unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end()); 898972a253aSDimitry Andric DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs); 8990b57cec5SDimitry Andric 9000b57cec5SDimitry Andric // Skip empty scheduling regions (0 or 1 schedulable instructions). 901972a253aSDimitry Andric if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end())) 902972a253aSDimitry Andric return false; 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 905972a253aSDimitry Andric LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB) 906972a253aSDimitry Andric << " " << CurrentMBB->getName() 907972a253aSDimitry Andric << "\n From: " << *DAG.begin() << " To: "; 908972a253aSDimitry Andric if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd; 9090b57cec5SDimitry Andric else dbgs() << "End"; 9100b57cec5SDimitry Andric dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 9110b57cec5SDimitry Andric 912972a253aSDimitry Andric // Save original instruction order before scheduling for possible revert. 913972a253aSDimitry Andric Unsched.clear(); 914972a253aSDimitry Andric Unsched.reserve(DAG.NumRegionInstrs); 915bdd1243dSDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 916bdd1243dSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) { 917bdd1243dSDimitry Andric for (auto &I : DAG) { 918bdd1243dSDimitry Andric Unsched.push_back(&I); 919bdd1243dSDimitry Andric if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER || 920bdd1243dSDimitry Andric I.getOpcode() == AMDGPU::IGLP_OPT) 921bdd1243dSDimitry Andric DAG.RegionsWithIGLPInstrs[RegionIdx] = true; 922bdd1243dSDimitry Andric } 923bdd1243dSDimitry Andric } else { 924972a253aSDimitry Andric for (auto &I : DAG) 925972a253aSDimitry Andric Unsched.push_back(&I); 926bdd1243dSDimitry Andric } 9270b57cec5SDimitry Andric 928972a253aSDimitry Andric PressureBefore = DAG.Pressure[RegionIdx]; 9290b57cec5SDimitry Andric 930972a253aSDimitry Andric LLVM_DEBUG( 931bdd1243dSDimitry Andric dbgs() << "Pressure before scheduling:\nRegion live-ins:" 932bdd1243dSDimitry Andric << print(DAG.LiveIns[RegionIdx], DAG.MRI) 933bdd1243dSDimitry Andric << "Region live-in pressure: " 934bdd1243dSDimitry Andric << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx])) 935bdd1243dSDimitry Andric << "Region register pressure: " << print(PressureBefore)); 936972a253aSDimitry Andric 937bdd1243dSDimitry Andric S.HasHighPressure = false; 938bdd1243dSDimitry Andric S.KnownExcessRP = isRegionWithExcessRP(); 939bdd1243dSDimitry Andric 940bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 941bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) { 942bdd1243dSDimitry Andric SavedMutations.clear(); 943bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 9445f757f3fSDimitry Andric bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule || 9455f757f3fSDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule; 946*0fca6ea1SDimitry Andric DAG.addMutation(createIGroupLPDAGMutation( 947*0fca6ea1SDimitry Andric IsInitialStage ? AMDGPU::SchedulingPhase::Initial 948*0fca6ea1SDimitry Andric : AMDGPU::SchedulingPhase::PreRAReentry)); 949bdd1243dSDimitry Andric } 950972a253aSDimitry Andric 951972a253aSDimitry Andric return true; 9520b57cec5SDimitry Andric } 95381ad6265SDimitry Andric 954bdd1243dSDimitry Andric bool UnclusteredHighRPStage::initGCNRegion() { 955bdd1243dSDimitry Andric // Only reschedule regions with the minimum occupancy or regions that may have 956bdd1243dSDimitry Andric // spilling (excess register pressure). 957bdd1243dSDimitry Andric if ((!DAG.RegionsWithMinOcc[RegionIdx] || 958bdd1243dSDimitry Andric DAG.MinOccupancy <= InitialOccupancy) && 959bdd1243dSDimitry Andric !DAG.RegionsWithExcessRP[RegionIdx]) 960972a253aSDimitry Andric return false; 961972a253aSDimitry Andric 962972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 963972a253aSDimitry Andric } 964972a253aSDimitry Andric 965972a253aSDimitry Andric bool ClusteredLowOccStage::initGCNRegion() { 966bdd1243dSDimitry Andric // We may need to reschedule this region if it wasn't rescheduled in the last 967bdd1243dSDimitry Andric // stage, or if we found it was testing critical register pressure limits in 968bdd1243dSDimitry Andric // the unclustered reschedule stage. The later is because we may not have been 969bdd1243dSDimitry Andric // able to raise the min occupancy in the previous stage so the region may be 970bdd1243dSDimitry Andric // overly constrained even if it was already rescheduled. 971bdd1243dSDimitry Andric if (!DAG.RegionsWithHighRP[RegionIdx]) 972972a253aSDimitry Andric return false; 973972a253aSDimitry Andric 974972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 975972a253aSDimitry Andric } 976972a253aSDimitry Andric 977972a253aSDimitry Andric bool PreRARematStage::initGCNRegion() { 978972a253aSDimitry Andric if (!DAG.RescheduleRegions[RegionIdx]) 979972a253aSDimitry Andric return false; 980972a253aSDimitry Andric 981972a253aSDimitry Andric return GCNSchedStage::initGCNRegion(); 982972a253aSDimitry Andric } 983972a253aSDimitry Andric 984972a253aSDimitry Andric void GCNSchedStage::setupNewBlock() { 985972a253aSDimitry Andric if (CurrentMBB) 986972a253aSDimitry Andric DAG.finishBlock(); 987972a253aSDimitry Andric 988972a253aSDimitry Andric CurrentMBB = DAG.RegionBegin->getParent(); 989972a253aSDimitry Andric DAG.startBlock(CurrentMBB); 990972a253aSDimitry Andric // Get real RP for the region if it hasn't be calculated before. After the 991972a253aSDimitry Andric // initial schedule stage real RP will be collected after scheduling. 99206c3fb27SDimitry Andric if (StageID == GCNSchedStageID::OccInitialSchedule || 99306c3fb27SDimitry Andric StageID == GCNSchedStageID::ILPInitialSchedule) 994972a253aSDimitry Andric DAG.computeBlockPressure(RegionIdx, CurrentMBB); 995972a253aSDimitry Andric } 996972a253aSDimitry Andric 997972a253aSDimitry Andric void GCNSchedStage::finalizeGCNRegion() { 998bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 999972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = false; 1000bdd1243dSDimitry Andric if (S.HasHighPressure) 1001972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 1002972a253aSDimitry Andric 1003972a253aSDimitry Andric // Revert scheduling if we have dropped occupancy or there is some other 1004972a253aSDimitry Andric // reason that the original schedule is better. 1005972a253aSDimitry Andric checkScheduling(); 1006972a253aSDimitry Andric 1007bdd1243dSDimitry Andric if (DAG.RegionsWithIGLPInstrs[RegionIdx] && 1008bdd1243dSDimitry Andric StageID != GCNSchedStageID::UnclusteredHighRPReschedule) 1009bdd1243dSDimitry Andric SavedMutations.swap(DAG.Mutations); 1010bdd1243dSDimitry Andric 1011972a253aSDimitry Andric DAG.exitRegion(); 1012972a253aSDimitry Andric RegionIdx++; 1013972a253aSDimitry Andric } 1014972a253aSDimitry Andric 1015972a253aSDimitry Andric void GCNSchedStage::checkScheduling() { 1016972a253aSDimitry Andric // Check the results of scheduling. 1017972a253aSDimitry Andric PressureAfter = DAG.getRealRegPressure(RegionIdx); 1018bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter)); 1019bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n"); 1020972a253aSDimitry Andric 1021972a253aSDimitry Andric if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && 1022972a253aSDimitry Andric PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { 1023972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 1024972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 1025972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 1026972a253aSDimitry Andric 1027*0fca6ea1SDimitry Andric // Early out if we have achieved the occupancy target. 1028972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); 1029972a253aSDimitry Andric return; 1030972a253aSDimitry Andric } 1031972a253aSDimitry Andric 1032bdd1243dSDimitry Andric unsigned TargetOccupancy = 1033bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF)); 1034972a253aSDimitry Andric unsigned WavesAfter = 1035bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureAfter.getOccupancy(ST)); 1036972a253aSDimitry Andric unsigned WavesBefore = 1037bdd1243dSDimitry Andric std::min(TargetOccupancy, PressureBefore.getOccupancy(ST)); 1038972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore 1039972a253aSDimitry Andric << ", after " << WavesAfter << ".\n"); 1040972a253aSDimitry Andric 1041972a253aSDimitry Andric // We may not be able to keep the current target occupancy because of the just 1042972a253aSDimitry Andric // scheduled region. We might still be able to revert scheduling if the 1043972a253aSDimitry Andric // occupancy before was higher, or if the current schedule has register 1044972a253aSDimitry Andric // pressure higher than the excess limits which could lead to more spilling. 1045972a253aSDimitry Andric unsigned NewOccupancy = std::max(WavesAfter, WavesBefore); 1046972a253aSDimitry Andric 1047972a253aSDimitry Andric // Allow memory bound functions to drop to 4 waves if not limited by an 1048972a253aSDimitry Andric // attribute. 1049972a253aSDimitry Andric if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy && 1050972a253aSDimitry Andric WavesAfter >= MFI.getMinAllowedOccupancy()) { 1051972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to " 1052972a253aSDimitry Andric << MFI.getMinAllowedOccupancy() << " waves\n"); 1053972a253aSDimitry Andric NewOccupancy = WavesAfter; 1054972a253aSDimitry Andric } 1055972a253aSDimitry Andric 1056972a253aSDimitry Andric if (NewOccupancy < DAG.MinOccupancy) { 1057972a253aSDimitry Andric DAG.MinOccupancy = NewOccupancy; 1058972a253aSDimitry Andric MFI.limitOccupancy(DAG.MinOccupancy); 1059972a253aSDimitry Andric DAG.RegionsWithMinOcc.reset(); 1060972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " 1061972a253aSDimitry Andric << DAG.MinOccupancy << ".\n"); 1062972a253aSDimitry Andric } 1063*0fca6ea1SDimitry Andric // The maximum number of arch VGPR on non-unified register file, or the 1064*0fca6ea1SDimitry Andric // maximum VGPR + AGPR in the unified register file case. 1065972a253aSDimitry Andric unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF); 1066*0fca6ea1SDimitry Andric // The maximum number of arch VGPR for both unified and non-unified register 1067*0fca6ea1SDimitry Andric // file. 1068*0fca6ea1SDimitry Andric unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs()); 1069972a253aSDimitry Andric unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF); 1070*0fca6ea1SDimitry Andric 1071*0fca6ea1SDimitry Andric if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs || 1072*0fca6ea1SDimitry Andric PressureAfter.getVGPRNum(false) > MaxArchVGPRs || 1073*0fca6ea1SDimitry Andric PressureAfter.getAGPRNum() > MaxArchVGPRs || 1074972a253aSDimitry Andric PressureAfter.getSGPRNum() > MaxSGPRs) { 1075972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = true; 1076972a253aSDimitry Andric DAG.RegionsWithHighRP[RegionIdx] = true; 1077bdd1243dSDimitry Andric DAG.RegionsWithExcessRP[RegionIdx] = true; 1078972a253aSDimitry Andric } 1079972a253aSDimitry Andric 1080972a253aSDimitry Andric // Revert if this region's schedule would cause a drop in occupancy or 1081972a253aSDimitry Andric // spilling. 1082972a253aSDimitry Andric if (shouldRevertScheduling(WavesAfter)) { 1083972a253aSDimitry Andric revertScheduling(); 1084972a253aSDimitry Andric } else { 1085972a253aSDimitry Andric DAG.Pressure[RegionIdx] = PressureAfter; 1086972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 1087972a253aSDimitry Andric PressureAfter.getOccupancy(ST) == DAG.MinOccupancy; 1088972a253aSDimitry Andric } 1089972a253aSDimitry Andric } 1090972a253aSDimitry Andric 1091bdd1243dSDimitry Andric unsigned 1092bdd1243dSDimitry Andric GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, 1093bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> &ReadyCycles, 1094bdd1243dSDimitry Andric const TargetSchedModel &SM) { 1095bdd1243dSDimitry Andric unsigned ReadyCycle = CurrCycle; 1096bdd1243dSDimitry Andric for (auto &D : SU.Preds) { 1097bdd1243dSDimitry Andric if (D.isAssignedRegDep()) { 1098bdd1243dSDimitry Andric MachineInstr *DefMI = D.getSUnit()->getInstr(); 1099bdd1243dSDimitry Andric unsigned Latency = SM.computeInstrLatency(DefMI); 1100bdd1243dSDimitry Andric unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum]; 1101bdd1243dSDimitry Andric ReadyCycle = std::max(ReadyCycle, DefReady + Latency); 1102bdd1243dSDimitry Andric } 1103bdd1243dSDimitry Andric } 1104bdd1243dSDimitry Andric ReadyCycles[SU.NodeNum] = ReadyCycle; 1105bdd1243dSDimitry Andric return ReadyCycle; 1106bdd1243dSDimitry Andric } 1107bdd1243dSDimitry Andric 1108bdd1243dSDimitry Andric #ifndef NDEBUG 1109bdd1243dSDimitry Andric struct EarlierIssuingCycle { 1110bdd1243dSDimitry Andric bool operator()(std::pair<MachineInstr *, unsigned> A, 1111bdd1243dSDimitry Andric std::pair<MachineInstr *, unsigned> B) const { 1112bdd1243dSDimitry Andric return A.second < B.second; 1113bdd1243dSDimitry Andric } 1114bdd1243dSDimitry Andric }; 1115bdd1243dSDimitry Andric 1116bdd1243dSDimitry Andric static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>, 1117bdd1243dSDimitry Andric EarlierIssuingCycle> &ReadyCycles) { 1118bdd1243dSDimitry Andric if (ReadyCycles.empty()) 1119bdd1243dSDimitry Andric return; 1120bdd1243dSDimitry Andric unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber(); 1121bdd1243dSDimitry Andric dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum 1122bdd1243dSDimitry Andric << " ##################\n# Cycle #\t\t\tInstruction " 1123bdd1243dSDimitry Andric " " 1124bdd1243dSDimitry Andric " \n"; 1125bdd1243dSDimitry Andric unsigned IPrev = 1; 1126bdd1243dSDimitry Andric for (auto &I : ReadyCycles) { 1127bdd1243dSDimitry Andric if (I.second > IPrev + 1) 1128bdd1243dSDimitry Andric dbgs() << "****************************** BUBBLE OF " << I.second - IPrev 1129bdd1243dSDimitry Andric << " CYCLES DETECTED ******************************\n\n"; 1130bdd1243dSDimitry Andric dbgs() << "[ " << I.second << " ] : " << *I.first << "\n"; 1131bdd1243dSDimitry Andric IPrev = I.second; 1132bdd1243dSDimitry Andric } 1133bdd1243dSDimitry Andric } 1134bdd1243dSDimitry Andric #endif 1135bdd1243dSDimitry Andric 1136bdd1243dSDimitry Andric ScheduleMetrics 1137bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) { 1138bdd1243dSDimitry Andric #ifndef NDEBUG 1139bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1140bdd1243dSDimitry Andric ReadyCyclesSorted; 1141bdd1243dSDimitry Andric #endif 1142bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1143bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1144bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1145bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1146bdd1243dSDimitry Andric for (auto &SU : InputSchedule) { 1147bdd1243dSDimitry Andric unsigned ReadyCycle = 1148bdd1243dSDimitry Andric computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM); 1149bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1150bdd1243dSDimitry Andric #ifndef NDEBUG 1151bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle)); 1152bdd1243dSDimitry Andric #endif 1153bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1154bdd1243dSDimitry Andric } 1155bdd1243dSDimitry Andric #ifndef NDEBUG 1156bdd1243dSDimitry Andric LLVM_DEBUG( 1157bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1158bdd1243dSDimitry Andric dbgs() << "\n\t" 1159bdd1243dSDimitry Andric << "Metric: " 1160bdd1243dSDimitry Andric << (SumBubbles 1161bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1162bdd1243dSDimitry Andric : 1) 1163bdd1243dSDimitry Andric << "\n\n"); 1164bdd1243dSDimitry Andric #endif 1165bdd1243dSDimitry Andric 1166bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1167bdd1243dSDimitry Andric } 1168bdd1243dSDimitry Andric 1169bdd1243dSDimitry Andric ScheduleMetrics 1170bdd1243dSDimitry Andric GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) { 1171bdd1243dSDimitry Andric #ifndef NDEBUG 1172bdd1243dSDimitry Andric std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle> 1173bdd1243dSDimitry Andric ReadyCyclesSorted; 1174bdd1243dSDimitry Andric #endif 1175bdd1243dSDimitry Andric const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel(); 1176bdd1243dSDimitry Andric unsigned SumBubbles = 0; 1177bdd1243dSDimitry Andric DenseMap<unsigned, unsigned> ReadyCycles; 1178bdd1243dSDimitry Andric unsigned CurrCycle = 0; 1179bdd1243dSDimitry Andric for (auto &MI : DAG) { 1180bdd1243dSDimitry Andric SUnit *SU = DAG.getSUnit(&MI); 1181bdd1243dSDimitry Andric if (!SU) 1182bdd1243dSDimitry Andric continue; 1183bdd1243dSDimitry Andric unsigned ReadyCycle = 1184bdd1243dSDimitry Andric computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM); 1185bdd1243dSDimitry Andric SumBubbles += ReadyCycle - CurrCycle; 1186bdd1243dSDimitry Andric #ifndef NDEBUG 1187bdd1243dSDimitry Andric ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle)); 1188bdd1243dSDimitry Andric #endif 1189bdd1243dSDimitry Andric CurrCycle = ++ReadyCycle; 1190bdd1243dSDimitry Andric } 1191bdd1243dSDimitry Andric #ifndef NDEBUG 1192bdd1243dSDimitry Andric LLVM_DEBUG( 1193bdd1243dSDimitry Andric printScheduleModel(ReadyCyclesSorted); 1194bdd1243dSDimitry Andric dbgs() << "\n\t" 1195bdd1243dSDimitry Andric << "Metric: " 1196bdd1243dSDimitry Andric << (SumBubbles 1197bdd1243dSDimitry Andric ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle 1198bdd1243dSDimitry Andric : 1) 1199bdd1243dSDimitry Andric << "\n\n"); 1200bdd1243dSDimitry Andric #endif 1201bdd1243dSDimitry Andric 1202bdd1243dSDimitry Andric return ScheduleMetrics(CurrCycle, SumBubbles); 1203bdd1243dSDimitry Andric } 1204bdd1243dSDimitry Andric 1205972a253aSDimitry Andric bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) { 1206972a253aSDimitry Andric if (WavesAfter < DAG.MinOccupancy) 1207972a253aSDimitry Andric return true; 1208972a253aSDimitry Andric 1209972a253aSDimitry Andric return false; 1210972a253aSDimitry Andric } 1211972a253aSDimitry Andric 1212bdd1243dSDimitry Andric bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1213bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1214bdd1243dSDimitry Andric return false; 1215bdd1243dSDimitry Andric 1216972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1217972a253aSDimitry Andric return true; 1218972a253aSDimitry Andric 1219972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1220972a253aSDimitry Andric return true; 1221972a253aSDimitry Andric 1222972a253aSDimitry Andric return false; 1223972a253aSDimitry Andric } 1224972a253aSDimitry Andric 1225bdd1243dSDimitry Andric bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) { 12265f757f3fSDimitry Andric // If RP is not reduced in the unclustered reschedule stage, revert to the 1227bdd1243dSDimitry Andric // old schedule. 1228bdd1243dSDimitry Andric if ((WavesAfter <= PressureBefore.getOccupancy(ST) && 1229bdd1243dSDimitry Andric mayCauseSpilling(WavesAfter)) || 1230bdd1243dSDimitry Andric GCNSchedStage::shouldRevertScheduling(WavesAfter)) { 1231972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n"); 1232972a253aSDimitry Andric return true; 1233972a253aSDimitry Andric } 1234972a253aSDimitry Andric 123506c3fb27SDimitry Andric // Do not attempt to relax schedule even more if we are already spilling. 123606c3fb27SDimitry Andric if (isRegionWithExcessRP()) 123706c3fb27SDimitry Andric return false; 123806c3fb27SDimitry Andric 1239bdd1243dSDimitry Andric LLVM_DEBUG( 1240bdd1243dSDimitry Andric dbgs() 1241bdd1243dSDimitry Andric << "\n\t *** In shouldRevertScheduling ***\n" 1242bdd1243dSDimitry Andric << " *********** BEFORE UnclusteredHighRPStage ***********\n"); 1243bdd1243dSDimitry Andric ScheduleMetrics MBefore = 1244bdd1243dSDimitry Andric getScheduleMetrics(DAG.SUnits); 1245bdd1243dSDimitry Andric LLVM_DEBUG( 1246bdd1243dSDimitry Andric dbgs() 1247bdd1243dSDimitry Andric << "\n *********** AFTER UnclusteredHighRPStage ***********\n"); 1248bdd1243dSDimitry Andric ScheduleMetrics MAfter = getScheduleMetrics(DAG); 1249bdd1243dSDimitry Andric unsigned OldMetric = MBefore.getMetric(); 1250bdd1243dSDimitry Andric unsigned NewMetric = MAfter.getMetric(); 1251bdd1243dSDimitry Andric unsigned WavesBefore = 1252bdd1243dSDimitry Andric std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST)); 1253bdd1243dSDimitry Andric unsigned Profit = 1254bdd1243dSDimitry Andric ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore * 1255bdd1243dSDimitry Andric ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) / 1256bdd1243dSDimitry Andric NewMetric) / 1257bdd1243dSDimitry Andric ScheduleMetrics::ScaleFactor; 1258bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after " 1259bdd1243dSDimitry Andric << MAfter << "Profit: " << Profit << "\n"); 1260bdd1243dSDimitry Andric return Profit < ScheduleMetrics::ScaleFactor; 1261972a253aSDimitry Andric } 1262972a253aSDimitry Andric 1263972a253aSDimitry Andric bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) { 1264bdd1243dSDimitry Andric if (PressureAfter == PressureBefore) 1265bdd1243dSDimitry Andric return false; 1266bdd1243dSDimitry Andric 1267972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1268972a253aSDimitry Andric return true; 1269972a253aSDimitry Andric 1270972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1271972a253aSDimitry Andric return true; 1272972a253aSDimitry Andric 1273972a253aSDimitry Andric return false; 1274972a253aSDimitry Andric } 1275972a253aSDimitry Andric 1276972a253aSDimitry Andric bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) { 1277972a253aSDimitry Andric if (GCNSchedStage::shouldRevertScheduling(WavesAfter)) 1278972a253aSDimitry Andric return true; 1279972a253aSDimitry Andric 1280972a253aSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1281972a253aSDimitry Andric return true; 1282972a253aSDimitry Andric 1283972a253aSDimitry Andric return false; 1284972a253aSDimitry Andric } 1285972a253aSDimitry Andric 1286bdd1243dSDimitry Andric bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) { 1287bdd1243dSDimitry Andric if (mayCauseSpilling(WavesAfter)) 1288bdd1243dSDimitry Andric return true; 1289bdd1243dSDimitry Andric 1290bdd1243dSDimitry Andric return false; 1291bdd1243dSDimitry Andric } 1292bdd1243dSDimitry Andric 1293972a253aSDimitry Andric bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { 1294*0fca6ea1SDimitry Andric if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() && 1295*0fca6ea1SDimitry Andric !PressureAfter.less(MF, PressureBefore)) { 1296972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); 1297972a253aSDimitry Andric return true; 1298972a253aSDimitry Andric } 1299972a253aSDimitry Andric 1300972a253aSDimitry Andric return false; 1301972a253aSDimitry Andric } 1302972a253aSDimitry Andric 1303972a253aSDimitry Andric void GCNSchedStage::revertScheduling() { 1304972a253aSDimitry Andric DAG.RegionsWithMinOcc[RegionIdx] = 1305972a253aSDimitry Andric PressureBefore.getOccupancy(ST) == DAG.MinOccupancy; 1306972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); 1307972a253aSDimitry Andric DAG.RescheduleRegions[RegionIdx] = 1308bdd1243dSDimitry Andric S.hasNextStage() && 1309bdd1243dSDimitry Andric S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule; 1310972a253aSDimitry Andric DAG.RegionEnd = DAG.RegionBegin; 1311972a253aSDimitry Andric int SkippedDebugInstr = 0; 1312972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1313972a253aSDimitry Andric if (MI->isDebugInstr()) { 1314972a253aSDimitry Andric ++SkippedDebugInstr; 1315972a253aSDimitry Andric continue; 1316972a253aSDimitry Andric } 1317972a253aSDimitry Andric 1318972a253aSDimitry Andric if (MI->getIterator() != DAG.RegionEnd) { 1319972a253aSDimitry Andric DAG.BB->remove(MI); 1320972a253aSDimitry Andric DAG.BB->insert(DAG.RegionEnd, MI); 1321972a253aSDimitry Andric if (!MI->isDebugInstr()) 1322972a253aSDimitry Andric DAG.LIS->handleMove(*MI, true); 1323972a253aSDimitry Andric } 1324972a253aSDimitry Andric 1325972a253aSDimitry Andric // Reset read-undef flags and update them later. 132606c3fb27SDimitry Andric for (auto &Op : MI->all_defs()) 1327972a253aSDimitry Andric Op.setIsUndef(false); 1328972a253aSDimitry Andric RegisterOperands RegOpers; 1329972a253aSDimitry Andric RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false); 1330972a253aSDimitry Andric if (!MI->isDebugInstr()) { 1331972a253aSDimitry Andric if (DAG.ShouldTrackLaneMasks) { 1332972a253aSDimitry Andric // Adjust liveness and add missing dead+read-undef flags. 1333972a253aSDimitry Andric SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot(); 1334972a253aSDimitry Andric RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI); 1335972a253aSDimitry Andric } else { 1336972a253aSDimitry Andric // Adjust for missing dead-def flags. 1337972a253aSDimitry Andric RegOpers.detectDeadDefs(*MI, *DAG.LIS); 1338972a253aSDimitry Andric } 1339972a253aSDimitry Andric } 1340972a253aSDimitry Andric DAG.RegionEnd = MI->getIterator(); 1341972a253aSDimitry Andric ++DAG.RegionEnd; 1342972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling " << *MI); 1343972a253aSDimitry Andric } 1344972a253aSDimitry Andric 1345972a253aSDimitry Andric // After reverting schedule, debug instrs will now be at the end of the block 1346972a253aSDimitry Andric // and RegionEnd will point to the first debug instr. Increment RegionEnd 1347972a253aSDimitry Andric // pass debug instrs to the actual end of the scheduling region. 1348972a253aSDimitry Andric while (SkippedDebugInstr-- > 0) 1349972a253aSDimitry Andric ++DAG.RegionEnd; 1350972a253aSDimitry Andric 1351972a253aSDimitry Andric // If Unsched.front() instruction is a debug instruction, this will actually 1352972a253aSDimitry Andric // shrink the region since we moved all debug instructions to the end of the 1353972a253aSDimitry Andric // block. Find the first instruction that is not a debug instruction. 1354972a253aSDimitry Andric DAG.RegionBegin = Unsched.front()->getIterator(); 1355972a253aSDimitry Andric if (DAG.RegionBegin->isDebugInstr()) { 1356972a253aSDimitry Andric for (MachineInstr *MI : Unsched) { 1357972a253aSDimitry Andric if (MI->isDebugInstr()) 1358972a253aSDimitry Andric continue; 1359972a253aSDimitry Andric DAG.RegionBegin = MI->getIterator(); 1360972a253aSDimitry Andric break; 1361972a253aSDimitry Andric } 1362972a253aSDimitry Andric } 1363972a253aSDimitry Andric 1364972a253aSDimitry Andric // Then move the debug instructions back into their correct place and set 1365972a253aSDimitry Andric // RegionBegin and RegionEnd if needed. 1366972a253aSDimitry Andric DAG.placeDebugValues(); 1367972a253aSDimitry Andric 1368bdd1243dSDimitry Andric DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); 1369972a253aSDimitry Andric } 1370972a253aSDimitry Andric 1371972a253aSDimitry Andric void PreRARematStage::collectRematerializableInstructions() { 1372972a253aSDimitry Andric const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI); 1373972a253aSDimitry Andric for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) { 137481ad6265SDimitry Andric Register Reg = Register::index2VirtReg(I); 1375972a253aSDimitry Andric if (!DAG.LIS->hasInterval(Reg)) 137681ad6265SDimitry Andric continue; 137781ad6265SDimitry Andric 137881ad6265SDimitry Andric // TODO: Handle AGPR and SGPR rematerialization 1379972a253aSDimitry Andric if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) || 1380972a253aSDimitry Andric !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg)) 138181ad6265SDimitry Andric continue; 138281ad6265SDimitry Andric 1383972a253aSDimitry Andric MachineOperand *Op = DAG.MRI.getOneDef(Reg); 138481ad6265SDimitry Andric MachineInstr *Def = Op->getParent(); 1385fcaf7f86SDimitry Andric if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) 138681ad6265SDimitry Andric continue; 138781ad6265SDimitry Andric 1388972a253aSDimitry Andric MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg); 138981ad6265SDimitry Andric if (Def->getParent() == UseI->getParent()) 139081ad6265SDimitry Andric continue; 139181ad6265SDimitry Andric 139281ad6265SDimitry Andric // We are only collecting defs that are defined in another block and are 139381ad6265SDimitry Andric // live-through or used inside regions at MinOccupancy. This means that the 139481ad6265SDimitry Andric // register must be in the live-in set for the region. 139581ad6265SDimitry Andric bool AddedToRematList = false; 1396972a253aSDimitry Andric for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) { 1397972a253aSDimitry Andric auto It = DAG.LiveIns[I].find(Reg); 1398972a253aSDimitry Andric if (It != DAG.LiveIns[I].end() && !It->second.none()) { 1399972a253aSDimitry Andric if (DAG.RegionsWithMinOcc[I]) { 140081ad6265SDimitry Andric RematerializableInsts[I][Def] = UseI; 140181ad6265SDimitry Andric AddedToRematList = true; 140281ad6265SDimitry Andric } 140381ad6265SDimitry Andric 140481ad6265SDimitry Andric // Collect regions with rematerializable reg as live-in to avoid 140581ad6265SDimitry Andric // searching later when updating RP. 140681ad6265SDimitry Andric RematDefToLiveInRegions[Def].push_back(I); 140781ad6265SDimitry Andric } 140881ad6265SDimitry Andric } 140981ad6265SDimitry Andric if (!AddedToRematList) 141081ad6265SDimitry Andric RematDefToLiveInRegions.erase(Def); 141181ad6265SDimitry Andric } 141281ad6265SDimitry Andric } 141381ad6265SDimitry Andric 1414972a253aSDimitry Andric bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, 141581ad6265SDimitry Andric const TargetInstrInfo *TII) { 141681ad6265SDimitry Andric // Temporary copies of cached variables we will be modifying and replacing if 141781ad6265SDimitry Andric // sinking succeeds. 141881ad6265SDimitry Andric SmallVector< 141981ad6265SDimitry Andric std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32> 142081ad6265SDimitry Andric NewRegions; 142181ad6265SDimitry Andric DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns; 142281ad6265SDimitry Andric DenseMap<unsigned, GCNRegPressure> NewPressure; 142381ad6265SDimitry Andric BitVector NewRescheduleRegions; 1424972a253aSDimitry Andric LiveIntervals *LIS = DAG.LIS; 142581ad6265SDimitry Andric 1426972a253aSDimitry Andric NewRegions.resize(DAG.Regions.size()); 1427972a253aSDimitry Andric NewRescheduleRegions.resize(DAG.Regions.size()); 142881ad6265SDimitry Andric 142981ad6265SDimitry Andric // Collect only regions that has a rematerializable def as a live-in. 143081ad6265SDimitry Andric SmallSet<unsigned, 16> ImpactedRegions; 143181ad6265SDimitry Andric for (const auto &It : RematDefToLiveInRegions) 143281ad6265SDimitry Andric ImpactedRegions.insert(It.second.begin(), It.second.end()); 143381ad6265SDimitry Andric 143481ad6265SDimitry Andric // Make copies of register pressure and live-ins cache that will be updated 143581ad6265SDimitry Andric // as we rematerialize. 143681ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1437972a253aSDimitry Andric NewPressure[Idx] = DAG.Pressure[Idx]; 1438972a253aSDimitry Andric NewLiveIns[Idx] = DAG.LiveIns[Idx]; 143981ad6265SDimitry Andric } 1440972a253aSDimitry Andric NewRegions = DAG.Regions; 144181ad6265SDimitry Andric NewRescheduleRegions.reset(); 144281ad6265SDimitry Andric 144381ad6265SDimitry Andric DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef; 144481ad6265SDimitry Andric bool Improved = false; 144581ad6265SDimitry Andric for (auto I : ImpactedRegions) { 1446972a253aSDimitry Andric if (!DAG.RegionsWithMinOcc[I]) 144781ad6265SDimitry Andric continue; 144881ad6265SDimitry Andric 144981ad6265SDimitry Andric Improved = false; 145081ad6265SDimitry Andric int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts()); 145181ad6265SDimitry Andric int SGPRUsage = NewPressure[I].getSGPRNum(); 145281ad6265SDimitry Andric 145381ad6265SDimitry Andric // TODO: Handle occupancy drop due to AGPR and SGPR. 145481ad6265SDimitry Andric // Check if cause of occupancy drop is due to VGPR usage and not SGPR. 1455972a253aSDimitry Andric if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy) 145681ad6265SDimitry Andric break; 145781ad6265SDimitry Andric 145881ad6265SDimitry Andric // The occupancy of this region could have been improved by a previous 145981ad6265SDimitry Andric // iteration's sinking of defs. 1460972a253aSDimitry Andric if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) { 146181ad6265SDimitry Andric NewRescheduleRegions[I] = true; 146281ad6265SDimitry Andric Improved = true; 146381ad6265SDimitry Andric continue; 146481ad6265SDimitry Andric } 146581ad6265SDimitry Andric 146681ad6265SDimitry Andric // First check if we have enough trivially rematerializable instructions to 146781ad6265SDimitry Andric // improve occupancy. Optimistically assume all instructions we are able to 146881ad6265SDimitry Andric // sink decreased RP. 146981ad6265SDimitry Andric int TotalSinkableRegs = 0; 147081ad6265SDimitry Andric for (const auto &It : RematerializableInsts[I]) { 147181ad6265SDimitry Andric MachineInstr *Def = It.first; 147281ad6265SDimitry Andric Register DefReg = Def->getOperand(0).getReg(); 147381ad6265SDimitry Andric TotalSinkableRegs += 147481ad6265SDimitry Andric SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]); 147581ad6265SDimitry Andric } 147681ad6265SDimitry Andric int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs; 147781ad6265SDimitry Andric unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink); 147881ad6265SDimitry Andric // If in the most optimistic scenario, we cannot improve occupancy, then do 147981ad6265SDimitry Andric // not attempt to sink any instructions. 1480972a253aSDimitry Andric if (OptimisticOccupancy <= DAG.MinOccupancy) 148181ad6265SDimitry Andric break; 148281ad6265SDimitry Andric 148381ad6265SDimitry Andric unsigned ImproveOccupancy = 0; 148481ad6265SDimitry Andric SmallVector<MachineInstr *, 4> SinkedDefs; 148581ad6265SDimitry Andric for (auto &It : RematerializableInsts[I]) { 148681ad6265SDimitry Andric MachineInstr *Def = It.first; 148781ad6265SDimitry Andric MachineBasicBlock::iterator InsertPos = 148881ad6265SDimitry Andric MachineBasicBlock::iterator(It.second); 148981ad6265SDimitry Andric Register Reg = Def->getOperand(0).getReg(); 149081ad6265SDimitry Andric // Rematerialize MI to its use block. Since we are only rematerializing 149181ad6265SDimitry Andric // instructions that do not have any virtual reg uses, we do not need to 149281ad6265SDimitry Andric // call LiveRangeEdit::allUsesAvailableAt() and 149381ad6265SDimitry Andric // LiveRangeEdit::canRematerializeAt(). 149481ad6265SDimitry Andric TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg, 1495972a253aSDimitry Andric Def->getOperand(0).getSubReg(), *Def, *DAG.TRI); 1496bdd1243dSDimitry Andric MachineInstr *NewMI = &*std::prev(InsertPos); 149781ad6265SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI); 149881ad6265SDimitry Andric LIS->removeInterval(Reg); 149981ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 150081ad6265SDimitry Andric InsertedMIToOldDef[NewMI] = Def; 150181ad6265SDimitry Andric 150281ad6265SDimitry Andric // Update region boundaries in scheduling region we sinked from since we 150381ad6265SDimitry Andric // may sink an instruction that was at the beginning or end of its region 1504972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr, 150581ad6265SDimitry Andric /*Removing =*/true); 150681ad6265SDimitry Andric 150781ad6265SDimitry Andric // Update region boundaries in region we sinked to. 1508972a253aSDimitry Andric DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI); 150981ad6265SDimitry Andric 151081ad6265SDimitry Andric LaneBitmask PrevMask = NewLiveIns[I][Reg]; 151181ad6265SDimitry Andric // FIXME: Also update cached pressure for where the def was sinked from. 151281ad6265SDimitry Andric // Update RP for all regions that has this reg as a live-in and remove 151381ad6265SDimitry Andric // the reg from all regions as a live-in. 151481ad6265SDimitry Andric for (auto Idx : RematDefToLiveInRegions[Def]) { 151581ad6265SDimitry Andric NewLiveIns[Idx].erase(Reg); 1516972a253aSDimitry Andric if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) { 151781ad6265SDimitry Andric // Def is live-through and not used in this block. 1518972a253aSDimitry Andric NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI); 151981ad6265SDimitry Andric } else { 152081ad6265SDimitry Andric // Def is used and rematerialized into this block. 152181ad6265SDimitry Andric GCNDownwardRPTracker RPT(*LIS); 152281ad6265SDimitry Andric auto *NonDbgMI = &*skipDebugInstructionsForward( 152381ad6265SDimitry Andric NewRegions[Idx].first, NewRegions[Idx].second); 152481ad6265SDimitry Andric RPT.reset(*NonDbgMI, &NewLiveIns[Idx]); 152581ad6265SDimitry Andric RPT.advance(NewRegions[Idx].second); 152681ad6265SDimitry Andric NewPressure[Idx] = RPT.moveMaxPressure(); 152781ad6265SDimitry Andric } 152881ad6265SDimitry Andric } 152981ad6265SDimitry Andric 153081ad6265SDimitry Andric SinkedDefs.push_back(Def); 153181ad6265SDimitry Andric ImproveOccupancy = NewPressure[I].getOccupancy(ST); 1532972a253aSDimitry Andric if (ImproveOccupancy > DAG.MinOccupancy) 153381ad6265SDimitry Andric break; 153481ad6265SDimitry Andric } 153581ad6265SDimitry Andric 153681ad6265SDimitry Andric // Remove defs we just sinked from all regions' list of sinkable defs 153781ad6265SDimitry Andric for (auto &Def : SinkedDefs) 153881ad6265SDimitry Andric for (auto TrackedIdx : RematDefToLiveInRegions[Def]) 153981ad6265SDimitry Andric RematerializableInsts[TrackedIdx].erase(Def); 154081ad6265SDimitry Andric 1541972a253aSDimitry Andric if (ImproveOccupancy <= DAG.MinOccupancy) 154281ad6265SDimitry Andric break; 154381ad6265SDimitry Andric 154481ad6265SDimitry Andric NewRescheduleRegions[I] = true; 154581ad6265SDimitry Andric Improved = true; 154681ad6265SDimitry Andric } 154781ad6265SDimitry Andric 154881ad6265SDimitry Andric if (!Improved) { 154981ad6265SDimitry Andric // Occupancy was not improved for all regions that were at MinOccupancy. 155081ad6265SDimitry Andric // Undo sinking and remove newly rematerialized instructions. 155181ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 155281ad6265SDimitry Andric MachineInstr *MI = Entry.first; 155381ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 155481ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 155581ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI); 155681ad6265SDimitry Andric MI->eraseFromParent(); 155781ad6265SDimitry Andric OldMI->clearRegisterDeads(Reg); 155881ad6265SDimitry Andric LIS->removeInterval(Reg); 155981ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 156081ad6265SDimitry Andric } 156181ad6265SDimitry Andric return false; 156281ad6265SDimitry Andric } 156381ad6265SDimitry Andric 156481ad6265SDimitry Andric // Occupancy was improved for all regions. 156581ad6265SDimitry Andric for (auto &Entry : InsertedMIToOldDef) { 156681ad6265SDimitry Andric MachineInstr *MI = Entry.first; 156781ad6265SDimitry Andric MachineInstr *OldMI = Entry.second; 156881ad6265SDimitry Andric 156981ad6265SDimitry Andric // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. 1570972a253aSDimitry Andric DAG.BBLiveInMap.erase(OldMI); 157181ad6265SDimitry Andric 157281ad6265SDimitry Andric // Remove OldMI and update LIS 157381ad6265SDimitry Andric Register Reg = MI->getOperand(0).getReg(); 157481ad6265SDimitry Andric LIS->RemoveMachineInstrFromMaps(*OldMI); 157581ad6265SDimitry Andric OldMI->eraseFromParent(); 157681ad6265SDimitry Andric LIS->removeInterval(Reg); 157781ad6265SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 157881ad6265SDimitry Andric } 157981ad6265SDimitry Andric 158081ad6265SDimitry Andric // Update live-ins, register pressure, and regions caches. 158181ad6265SDimitry Andric for (auto Idx : ImpactedRegions) { 1582972a253aSDimitry Andric DAG.LiveIns[Idx] = NewLiveIns[Idx]; 1583972a253aSDimitry Andric DAG.Pressure[Idx] = NewPressure[Idx]; 1584972a253aSDimitry Andric DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); 158581ad6265SDimitry Andric } 1586972a253aSDimitry Andric DAG.Regions = NewRegions; 1587972a253aSDimitry Andric DAG.RescheduleRegions = NewRescheduleRegions; 158881ad6265SDimitry Andric 158981ad6265SDimitry Andric SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1590972a253aSDimitry Andric MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); 159181ad6265SDimitry Andric 159281ad6265SDimitry Andric return true; 159381ad6265SDimitry Andric } 159481ad6265SDimitry Andric 159581ad6265SDimitry Andric // Copied from MachineLICM 1596972a253aSDimitry Andric bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) { 1597972a253aSDimitry Andric if (!DAG.TII->isTriviallyReMaterializable(MI)) 159881ad6265SDimitry Andric return false; 159981ad6265SDimitry Andric 160006c3fb27SDimitry Andric for (const MachineOperand &MO : MI.all_uses()) 160106c3fb27SDimitry Andric if (MO.getReg().isVirtual()) 160281ad6265SDimitry Andric return false; 160381ad6265SDimitry Andric 160481ad6265SDimitry Andric return true; 160581ad6265SDimitry Andric } 160681ad6265SDimitry Andric 160781ad6265SDimitry Andric // When removing, we will have to check both beginning and ending of the region. 160881ad6265SDimitry Andric // When inserting, we will only have to check if we are inserting NewMI in front 160981ad6265SDimitry Andric // of a scheduling region and do not need to check the ending since we will only 161081ad6265SDimitry Andric // ever be inserting before an already existing MI. 161181ad6265SDimitry Andric void GCNScheduleDAGMILive::updateRegionBoundaries( 161281ad6265SDimitry Andric SmallVectorImpl<std::pair<MachineBasicBlock::iterator, 161381ad6265SDimitry Andric MachineBasicBlock::iterator>> &RegionBoundaries, 161481ad6265SDimitry Andric MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) { 161581ad6265SDimitry Andric unsigned I = 0, E = RegionBoundaries.size(); 161681ad6265SDimitry Andric // Search for first region of the block where MI is located 161781ad6265SDimitry Andric while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent()) 161881ad6265SDimitry Andric ++I; 161981ad6265SDimitry Andric 162081ad6265SDimitry Andric for (; I != E; ++I) { 162181ad6265SDimitry Andric if (MI->getParent() != RegionBoundaries[I].first->getParent()) 162281ad6265SDimitry Andric return; 162381ad6265SDimitry Andric 162481ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].first && 162581ad6265SDimitry Andric MI == RegionBoundaries[I].second) { 162681ad6265SDimitry Andric // MI is in a region with size 1, after removing, the region will be 162781ad6265SDimitry Andric // size 0, set RegionBegin and RegionEnd to pass end of block iterator. 162881ad6265SDimitry Andric RegionBoundaries[I] = 1629bdd1243dSDimitry Andric std::pair(MI->getParent()->end(), MI->getParent()->end()); 163081ad6265SDimitry Andric return; 163181ad6265SDimitry Andric } 163281ad6265SDimitry Andric if (MI == RegionBoundaries[I].first) { 163381ad6265SDimitry Andric if (Removing) 163481ad6265SDimitry Andric RegionBoundaries[I] = 1635bdd1243dSDimitry Andric std::pair(std::next(MI), RegionBoundaries[I].second); 163681ad6265SDimitry Andric else 163781ad6265SDimitry Andric // Inserted NewMI in front of region, set new RegionBegin to NewMI 1638bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI), 163981ad6265SDimitry Andric RegionBoundaries[I].second); 164081ad6265SDimitry Andric return; 164181ad6265SDimitry Andric } 164281ad6265SDimitry Andric if (Removing && MI == RegionBoundaries[I].second) { 1643bdd1243dSDimitry Andric RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI)); 164481ad6265SDimitry Andric return; 164581ad6265SDimitry Andric } 164681ad6265SDimitry Andric } 164781ad6265SDimitry Andric } 1648bdd1243dSDimitry Andric 1649bdd1243dSDimitry Andric static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) { 1650bdd1243dSDimitry Andric return std::any_of( 1651bdd1243dSDimitry Andric DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) { 1652bdd1243dSDimitry Andric unsigned Opc = MI->getOpcode(); 1653bdd1243dSDimitry Andric return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT; 1654bdd1243dSDimitry Andric }); 1655bdd1243dSDimitry Andric } 1656bdd1243dSDimitry Andric 1657bdd1243dSDimitry Andric GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive( 1658bdd1243dSDimitry Andric MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 1659bdd1243dSDimitry Andric bool RemoveKillFlags) 1660bdd1243dSDimitry Andric : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {} 1661bdd1243dSDimitry Andric 1662bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::schedule() { 1663bdd1243dSDimitry Andric HasIGLPInstrs = hasIGLPInstrs(this); 1664bdd1243dSDimitry Andric if (HasIGLPInstrs) { 1665bdd1243dSDimitry Andric SavedMutations.clear(); 1666bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1667*0fca6ea1SDimitry Andric addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA)); 1668bdd1243dSDimitry Andric } 1669bdd1243dSDimitry Andric 1670bdd1243dSDimitry Andric ScheduleDAGMI::schedule(); 1671bdd1243dSDimitry Andric } 1672bdd1243dSDimitry Andric 1673bdd1243dSDimitry Andric void GCNPostScheduleDAGMILive::finalizeSchedule() { 1674bdd1243dSDimitry Andric if (HasIGLPInstrs) 1675bdd1243dSDimitry Andric SavedMutations.swap(Mutations); 1676bdd1243dSDimitry Andric 1677bdd1243dSDimitry Andric ScheduleDAGMI::finalizeSchedule(); 1678bdd1243dSDimitry Andric } 1679