10b57cec5SDimitry Andric //===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// SI Machine Scheduler interface 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "SIMachineScheduler.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16*0fca6ea1SDimitry Andric #include "SIInstrInfo.h" 170b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric // This scheduler implements a different scheduling algorithm than 250b57cec5SDimitry Andric // GenericScheduler. 260b57cec5SDimitry Andric // 270b57cec5SDimitry Andric // There are several specific architecture behaviours that can't be modelled 280b57cec5SDimitry Andric // for GenericScheduler: 290b57cec5SDimitry Andric // . When accessing the result of an SGPR load instruction, you have to wait 300b57cec5SDimitry Andric // for all the SGPR load instructions before your current instruction to 310b57cec5SDimitry Andric // have finished. 320b57cec5SDimitry Andric // . When accessing the result of an VGPR load instruction, you have to wait 330b57cec5SDimitry Andric // for all the VGPR load instructions previous to the VGPR load instruction 340b57cec5SDimitry Andric // you are interested in to finish. 350b57cec5SDimitry Andric // . The less the register pressure, the best load latencies are hidden 360b57cec5SDimitry Andric // 370b57cec5SDimitry Andric // Moreover some specifities (like the fact a lot of instructions in the shader 380b57cec5SDimitry Andric // have few dependencies) makes the generic scheduler have some unpredictable 390b57cec5SDimitry Andric // behaviours. For example when register pressure becomes high, it can either 400b57cec5SDimitry Andric // manage to prevent register pressure from going too high, or it can 410b57cec5SDimitry Andric // increase register pressure even more than if it hadn't taken register 420b57cec5SDimitry Andric // pressure into account. 430b57cec5SDimitry Andric // 440b57cec5SDimitry Andric // Also some other bad behaviours are generated, like loading at the beginning 450b57cec5SDimitry Andric // of the shader a constant in VGPR you won't need until the end of the shader. 460b57cec5SDimitry Andric // 470b57cec5SDimitry Andric // The scheduling problem for SI can distinguish three main parts: 480b57cec5SDimitry Andric // . Hiding high latencies (texture sampling, etc) 490b57cec5SDimitry Andric // . Hiding low latencies (SGPR constant loading, etc) 500b57cec5SDimitry Andric // . Keeping register usage low for better latency hiding and general 510b57cec5SDimitry Andric // performance 520b57cec5SDimitry Andric // 530b57cec5SDimitry Andric // Some other things can also affect performance, but are hard to predict 540b57cec5SDimitry Andric // (cache usage, the fact the HW can issue several instructions from different 550b57cec5SDimitry Andric // wavefronts if different types, etc) 560b57cec5SDimitry Andric // 570b57cec5SDimitry Andric // This scheduler tries to solve the scheduling problem by dividing it into 580b57cec5SDimitry Andric // simpler sub-problems. It divides the instructions into blocks, schedules 590b57cec5SDimitry Andric // locally inside the blocks where it takes care of low latencies, and then 600b57cec5SDimitry Andric // chooses the order of the blocks by taking care of high latencies. 610b57cec5SDimitry Andric // Dividing the instructions into blocks helps control keeping register 620b57cec5SDimitry Andric // usage low. 630b57cec5SDimitry Andric // 640b57cec5SDimitry Andric // First the instructions are put into blocks. 650b57cec5SDimitry Andric // We want the blocks help control register usage and hide high latencies 660b57cec5SDimitry Andric // later. To help control register usage, we typically want all local 6781ad6265SDimitry Andric // computations, when for example you create a result that can be consumed 680b57cec5SDimitry Andric // right away, to be contained in a block. Block inputs and outputs would 690b57cec5SDimitry Andric // typically be important results that are needed in several locations of 700b57cec5SDimitry Andric // the shader. Since we do want blocks to help hide high latencies, we want 710b57cec5SDimitry Andric // the instructions inside the block to have a minimal set of dependencies 720b57cec5SDimitry Andric // on high latencies. It will make it easy to pick blocks to hide specific 730b57cec5SDimitry Andric // high latencies. 740b57cec5SDimitry Andric // The block creation algorithm is divided into several steps, and several 750b57cec5SDimitry Andric // variants can be tried during the scheduling process. 760b57cec5SDimitry Andric // 770b57cec5SDimitry Andric // Second the order of the instructions inside the blocks is chosen. 780b57cec5SDimitry Andric // At that step we do take into account only register usage and hiding 790b57cec5SDimitry Andric // low latency instructions 800b57cec5SDimitry Andric // 810b57cec5SDimitry Andric // Third the block order is chosen, there we try to hide high latencies 820b57cec5SDimitry Andric // and keep register usage low. 830b57cec5SDimitry Andric // 840b57cec5SDimitry Andric // After the third step, a pass is done to improve the hiding of low 850b57cec5SDimitry Andric // latencies. 860b57cec5SDimitry Andric // 870b57cec5SDimitry Andric // Actually when talking about 'low latency' or 'high latency' it includes 880b57cec5SDimitry Andric // both the latency to get the cache (or global mem) data go to the register, 890b57cec5SDimitry Andric // and the bandwidth limitations. 900b57cec5SDimitry Andric // Increasing the number of active wavefronts helps hide the former, but it 910b57cec5SDimitry Andric // doesn't solve the latter, thus why even if wavefront count is high, we have 920b57cec5SDimitry Andric // to try have as many instructions hiding high latencies as possible. 9381ad6265SDimitry Andric // The OpenCL doc says for example latency of 400 cycles for a global mem 9481ad6265SDimitry Andric // access, which is hidden by 10 instructions if the wavefront count is 10. 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric // Some figures taken from AMD docs: 970b57cec5SDimitry Andric // Both texture and constant L1 caches are 4-way associative with 64 bytes 980b57cec5SDimitry Andric // lines. 990b57cec5SDimitry Andric // Constant cache is shared with 4 CUs. 1000b57cec5SDimitry Andric // For texture sampling, the address generation unit receives 4 texture 1010b57cec5SDimitry Andric // addresses per cycle, thus we could expect texture sampling latency to be 1020b57cec5SDimitry Andric // equivalent to 4 instructions in the very best case (a VGPR is 64 work items, 1030b57cec5SDimitry Andric // instructions in a wavefront group are executed every 4 cycles), 1040b57cec5SDimitry Andric // or 16 instructions if the other wavefronts associated to the 3 other VALUs 1050b57cec5SDimitry Andric // of the CU do texture sampling too. (Don't take these figures too seriously, 1060b57cec5SDimitry Andric // as I'm not 100% sure of the computation) 1070b57cec5SDimitry Andric // Data exports should get similar latency. 1080b57cec5SDimitry Andric // For constant loading, the cache is shader with 4 CUs. 1090b57cec5SDimitry Andric // The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit" 1100b57cec5SDimitry Andric // I guess if the other CU don't read the cache, it can go up to 64B/cycle. 1110b57cec5SDimitry Andric // It means a simple s_buffer_load should take one instruction to hide, as 1120b57cec5SDimitry Andric // well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same 1130b57cec5SDimitry Andric // cache line. 1140b57cec5SDimitry Andric // 1150b57cec5SDimitry Andric // As of today the driver doesn't preload the constants in cache, thus the 1160b57cec5SDimitry Andric // first loads get extra latency. The doc says global memory access can be 1170b57cec5SDimitry Andric // 300-600 cycles. We do not specially take that into account when scheduling 1180b57cec5SDimitry Andric // As we expect the driver to be able to preload the constants soon. 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric // common code // 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric #ifndef NDEBUG 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric static const char *getReasonStr(SIScheduleCandReason Reason) { 1250b57cec5SDimitry Andric switch (Reason) { 1260b57cec5SDimitry Andric case NoCand: return "NOCAND"; 1270b57cec5SDimitry Andric case RegUsage: return "REGUSAGE"; 1280b57cec5SDimitry Andric case Latency: return "LATENCY"; 1290b57cec5SDimitry Andric case Successor: return "SUCCESSOR"; 1300b57cec5SDimitry Andric case Depth: return "DEPTH"; 1310b57cec5SDimitry Andric case NodeOrder: return "ORDER"; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric llvm_unreachable("Unknown reason!"); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric #endif 1370b57cec5SDimitry Andric 138*0fca6ea1SDimitry Andric namespace llvm::SISched { 1390b57cec5SDimitry Andric static bool tryLess(int TryVal, int CandVal, 1400b57cec5SDimitry Andric SISchedulerCandidate &TryCand, 1410b57cec5SDimitry Andric SISchedulerCandidate &Cand, 1420b57cec5SDimitry Andric SIScheduleCandReason Reason) { 1430b57cec5SDimitry Andric if (TryVal < CandVal) { 1440b57cec5SDimitry Andric TryCand.Reason = Reason; 1450b57cec5SDimitry Andric return true; 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric if (TryVal > CandVal) { 1480b57cec5SDimitry Andric if (Cand.Reason > Reason) 1490b57cec5SDimitry Andric Cand.Reason = Reason; 1500b57cec5SDimitry Andric return true; 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric Cand.setRepeat(Reason); 1530b57cec5SDimitry Andric return false; 1540b57cec5SDimitry Andric } 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric static bool tryGreater(int TryVal, int CandVal, 1570b57cec5SDimitry Andric SISchedulerCandidate &TryCand, 1580b57cec5SDimitry Andric SISchedulerCandidate &Cand, 1590b57cec5SDimitry Andric SIScheduleCandReason Reason) { 1600b57cec5SDimitry Andric if (TryVal > CandVal) { 1610b57cec5SDimitry Andric TryCand.Reason = Reason; 1620b57cec5SDimitry Andric return true; 1630b57cec5SDimitry Andric } 1640b57cec5SDimitry Andric if (TryVal < CandVal) { 1650b57cec5SDimitry Andric if (Cand.Reason > Reason) 1660b57cec5SDimitry Andric Cand.Reason = Reason; 1670b57cec5SDimitry Andric return true; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric Cand.setRepeat(Reason); 1700b57cec5SDimitry Andric return false; 1710b57cec5SDimitry Andric } 172*0fca6ea1SDimitry Andric } // end namespace llvm::SISched 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // SIScheduleBlock // 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric void SIScheduleBlock::addUnit(SUnit *SU) { 1770b57cec5SDimitry Andric NodeNum2Index[SU->NodeNum] = SUnits.size(); 1780b57cec5SDimitry Andric SUnits.push_back(SU); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric #ifndef NDEBUG 1820b57cec5SDimitry Andric void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) { 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 1850b57cec5SDimitry Andric dbgs() << '\n'; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric #endif 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand, 1900b57cec5SDimitry Andric SISchedCandidate &TryCand) { 1910b57cec5SDimitry Andric // Initialize the candidate if needed. 1920b57cec5SDimitry Andric if (!Cand.isValid()) { 1930b57cec5SDimitry Andric TryCand.Reason = NodeOrder; 1940b57cec5SDimitry Andric return; 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric if (Cand.SGPRUsage > 60 && 1980b57cec5SDimitry Andric SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, 1990b57cec5SDimitry Andric TryCand, Cand, RegUsage)) 2000b57cec5SDimitry Andric return; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric // Schedule low latency instructions as top as possible. 2030b57cec5SDimitry Andric // Order of priority is: 2040b57cec5SDimitry Andric // . Low latency instructions which do not depend on other low latency 2050b57cec5SDimitry Andric // instructions we haven't waited for 2060b57cec5SDimitry Andric // . Other instructions which do not depend on low latency instructions 2070b57cec5SDimitry Andric // we haven't waited for 2080b57cec5SDimitry Andric // . Low latencies 2090b57cec5SDimitry Andric // . All other instructions 2100b57cec5SDimitry Andric // Goal is to get: low latency instructions - independent instructions 2110b57cec5SDimitry Andric // - (eventually some more low latency instructions) 2120b57cec5SDimitry Andric // - instructions that depend on the first low latency instructions. 2130b57cec5SDimitry Andric // If in the block there is a lot of constant loads, the SGPR usage 2140b57cec5SDimitry Andric // could go quite high, thus above the arbitrary limit of 60 will encourage 2150b57cec5SDimitry Andric // use the already loaded constants (in order to release some SGPRs) before 2160b57cec5SDimitry Andric // loading more. 2170b57cec5SDimitry Andric if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent, 2180b57cec5SDimitry Andric Cand.HasLowLatencyNonWaitedParent, 2190b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth)) 2200b57cec5SDimitry Andric return; 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency, 2230b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth)) 2240b57cec5SDimitry Andric return; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric if (TryCand.IsLowLatency && 2270b57cec5SDimitry Andric SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset, 2280b57cec5SDimitry Andric TryCand, Cand, SIScheduleCandReason::Depth)) 2290b57cec5SDimitry Andric return; 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, 2320b57cec5SDimitry Andric TryCand, Cand, RegUsage)) 2330b57cec5SDimitry Andric return; 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric // Fall through to original instruction order. 2360b57cec5SDimitry Andric if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { 2370b57cec5SDimitry Andric TryCand.Reason = NodeOrder; 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric SUnit* SIScheduleBlock::pickNode() { 2420b57cec5SDimitry Andric SISchedCandidate TopCand; 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric for (SUnit* SU : TopReadySUs) { 2450b57cec5SDimitry Andric SISchedCandidate TryCand; 2460b57cec5SDimitry Andric std::vector<unsigned> pressure; 2470b57cec5SDimitry Andric std::vector<unsigned> MaxPressure; 2480b57cec5SDimitry Andric // Predict register usage after this instruction. 2490b57cec5SDimitry Andric TryCand.SU = SU; 2500b57cec5SDimitry Andric TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure); 2515ffd83dbSDimitry Andric TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32]; 2525ffd83dbSDimitry Andric TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32]; 2530b57cec5SDimitry Andric TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum]; 2540b57cec5SDimitry Andric TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum]; 2550b57cec5SDimitry Andric TryCand.HasLowLatencyNonWaitedParent = 2560b57cec5SDimitry Andric HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]; 2570b57cec5SDimitry Andric tryCandidateTopDown(TopCand, TryCand); 2580b57cec5SDimitry Andric if (TryCand.Reason != NoCand) 2590b57cec5SDimitry Andric TopCand.setBest(TryCand); 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric return TopCand.SU; 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric // Schedule something valid. 2670b57cec5SDimitry Andric void SIScheduleBlock::fastSchedule() { 2680b57cec5SDimitry Andric TopReadySUs.clear(); 2690b57cec5SDimitry Andric if (Scheduled) 2700b57cec5SDimitry Andric undoSchedule(); 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric for (SUnit* SU : SUnits) { 2730b57cec5SDimitry Andric if (!SU->NumPredsLeft) 2740b57cec5SDimitry Andric TopReadySUs.push_back(SU); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric while (!TopReadySUs.empty()) { 2780b57cec5SDimitry Andric SUnit *SU = TopReadySUs[0]; 2790b57cec5SDimitry Andric ScheduledSUnits.push_back(SU); 2800b57cec5SDimitry Andric nodeScheduled(SU); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric Scheduled = true; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric // Returns if the register was set between first and last. 2870b57cec5SDimitry Andric static bool isDefBetween(unsigned Reg, 2880b57cec5SDimitry Andric SlotIndex First, SlotIndex Last, 2890b57cec5SDimitry Andric const MachineRegisterInfo *MRI, 2900b57cec5SDimitry Andric const LiveIntervals *LIS) { 2910b57cec5SDimitry Andric for (MachineRegisterInfo::def_instr_iterator 2920b57cec5SDimitry Andric UI = MRI->def_instr_begin(Reg), 2930b57cec5SDimitry Andric UE = MRI->def_instr_end(); UI != UE; ++UI) { 2940b57cec5SDimitry Andric const MachineInstr* MI = &*UI; 2950b57cec5SDimitry Andric if (MI->isDebugValue()) 2960b57cec5SDimitry Andric continue; 2970b57cec5SDimitry Andric SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot(); 2980b57cec5SDimitry Andric if (InstSlot >= First && InstSlot <= Last) 2990b57cec5SDimitry Andric return true; 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric return false; 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock, 3050b57cec5SDimitry Andric MachineBasicBlock::iterator EndBlock) { 3060b57cec5SDimitry Andric IntervalPressure Pressure, BotPressure; 3070b57cec5SDimitry Andric RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure); 3080b57cec5SDimitry Andric LiveIntervals *LIS = DAG->getLIS(); 3090b57cec5SDimitry Andric MachineRegisterInfo *MRI = DAG->getMRI(); 3100b57cec5SDimitry Andric DAG->initRPTracker(TopRPTracker); 3110b57cec5SDimitry Andric DAG->initRPTracker(BotRPTracker); 3120b57cec5SDimitry Andric DAG->initRPTracker(RPTracker); 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric // Goes though all SU. RPTracker captures what had to be alive for the SUs 3150b57cec5SDimitry Andric // to execute, and what is still alive at the end. 3160b57cec5SDimitry Andric for (SUnit* SU : ScheduledSUnits) { 3170b57cec5SDimitry Andric RPTracker.setPos(SU->getInstr()); 3180b57cec5SDimitry Andric RPTracker.advance(); 3190b57cec5SDimitry Andric } 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric // Close the RPTracker to finalize live ins/outs. 3220b57cec5SDimitry Andric RPTracker.closeRegion(); 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric // Initialize the live ins and live outs. 3250b57cec5SDimitry Andric TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 3260b57cec5SDimitry Andric BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 3270b57cec5SDimitry Andric 3280b57cec5SDimitry Andric // Do not Track Physical Registers, because it messes up. 3290b57cec5SDimitry Andric for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) { 330bdd1243dSDimitry Andric if (RegMaskPair.RegUnit.isVirtual()) 3310b57cec5SDimitry Andric LiveInRegs.insert(RegMaskPair.RegUnit); 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric LiveOutRegs.clear(); 3340b57cec5SDimitry Andric // There is several possibilities to distinguish: 3350b57cec5SDimitry Andric // 1) Reg is not input to any instruction in the block, but is output of one 3360b57cec5SDimitry Andric // 2) 1) + read in the block and not needed after it 3370b57cec5SDimitry Andric // 3) 1) + read in the block but needed in another block 3380b57cec5SDimitry Andric // 4) Reg is input of an instruction but another block will read it too 3390b57cec5SDimitry Andric // 5) Reg is input of an instruction and then rewritten in the block. 3400b57cec5SDimitry Andric // result is not read in the block (implies used in another block) 3410b57cec5SDimitry Andric // 6) Reg is input of an instruction and then rewritten in the block. 3420b57cec5SDimitry Andric // result is read in the block and not needed in another block 3430b57cec5SDimitry Andric // 7) Reg is input of an instruction and then rewritten in the block. 3440b57cec5SDimitry Andric // result is read in the block but also needed in another block 3450b57cec5SDimitry Andric // LiveInRegs will contains all the regs in situation 4, 5, 6, 7 3460b57cec5SDimitry Andric // We want LiveOutRegs to contain only Regs whose content will be read after 3470b57cec5SDimitry Andric // in another block, and whose content was written in the current block, 3480b57cec5SDimitry Andric // that is we want it to get 1, 3, 5, 7 3490b57cec5SDimitry Andric // Since we made the MIs of a block to be packed all together before 3500b57cec5SDimitry Andric // scheduling, then the LiveIntervals were correct, and the RPTracker was 3510b57cec5SDimitry Andric // able to correctly handle 5 vs 6, 2 vs 3. 3520b57cec5SDimitry Andric // (Note: This is not sufficient for RPTracker to not do mistakes for case 4) 3530b57cec5SDimitry Andric // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7 35481ad6265SDimitry Andric // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7 3550b57cec5SDimitry Andric // The use of findDefBetween removes the case 4. 3560b57cec5SDimitry Andric for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) { 357e8d8bef9SDimitry Andric Register Reg = RegMaskPair.RegUnit; 358e8d8bef9SDimitry Andric if (Reg.isVirtual() && 3590b57cec5SDimitry Andric isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(), 3600b57cec5SDimitry Andric LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI, 3610b57cec5SDimitry Andric LIS)) { 3620b57cec5SDimitry Andric LiveOutRegs.insert(Reg); 3630b57cec5SDimitry Andric } 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric // Pressure = sum_alive_registers register size 3670b57cec5SDimitry Andric // Internally llvm will represent some registers as big 128 bits registers 3680b57cec5SDimitry Andric // for example, but they actually correspond to 4 actual 32 bits registers. 3690b57cec5SDimitry Andric // Thus Pressure is not equal to num_alive_registers * constant. 3700b57cec5SDimitry Andric LiveInPressure = TopPressure.MaxSetPressure; 3710b57cec5SDimitry Andric LiveOutPressure = BotPressure.MaxSetPressure; 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric // Prepares TopRPTracker for top down scheduling. 3740b57cec5SDimitry Andric TopRPTracker.closeTop(); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock, 3780b57cec5SDimitry Andric MachineBasicBlock::iterator EndBlock) { 3790b57cec5SDimitry Andric if (!Scheduled) 3800b57cec5SDimitry Andric fastSchedule(); 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric // PreScheduling phase to set LiveIn and LiveOut. 3830b57cec5SDimitry Andric initRegPressure(BeginBlock, EndBlock); 3840b57cec5SDimitry Andric undoSchedule(); 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric // Schedule for real now. 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric TopReadySUs.clear(); 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric for (SUnit* SU : SUnits) { 3910b57cec5SDimitry Andric if (!SU->NumPredsLeft) 3920b57cec5SDimitry Andric TopReadySUs.push_back(SU); 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric while (!TopReadySUs.empty()) { 3960b57cec5SDimitry Andric SUnit *SU = pickNode(); 3970b57cec5SDimitry Andric ScheduledSUnits.push_back(SU); 3980b57cec5SDimitry Andric TopRPTracker.setPos(SU->getInstr()); 3990b57cec5SDimitry Andric TopRPTracker.advance(); 4000b57cec5SDimitry Andric nodeScheduled(SU); 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 40381ad6265SDimitry Andric // TODO: compute InternalAdditionalPressure. 404349cc55cSDimitry Andric InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size()); 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric // Check everything is right. 4070b57cec5SDimitry Andric #ifndef NDEBUG 4080b57cec5SDimitry Andric assert(SUnits.size() == ScheduledSUnits.size() && 4090b57cec5SDimitry Andric TopReadySUs.empty()); 4100b57cec5SDimitry Andric for (SUnit* SU : SUnits) { 4110b57cec5SDimitry Andric assert(SU->isScheduled && 4120b57cec5SDimitry Andric SU->NumPredsLeft == 0); 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric #endif 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric Scheduled = true; 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric void SIScheduleBlock::undoSchedule() { 4200b57cec5SDimitry Andric for (SUnit* SU : SUnits) { 4210b57cec5SDimitry Andric SU->isScheduled = false; 4220b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) { 4230b57cec5SDimitry Andric if (BC->isSUInBlock(Succ.getSUnit(), ID)) 4240b57cec5SDimitry Andric undoReleaseSucc(SU, &Succ); 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric } 4270b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0); 4280b57cec5SDimitry Andric ScheduledSUnits.clear(); 4290b57cec5SDimitry Andric Scheduled = false; 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) { 4330b57cec5SDimitry Andric SUnit *SuccSU = SuccEdge->getSUnit(); 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric if (SuccEdge->isWeak()) { 4360b57cec5SDimitry Andric ++SuccSU->WeakPredsLeft; 4370b57cec5SDimitry Andric return; 4380b57cec5SDimitry Andric } 4390b57cec5SDimitry Andric ++SuccSU->NumPredsLeft; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) { 4430b57cec5SDimitry Andric SUnit *SuccSU = SuccEdge->getSUnit(); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric if (SuccEdge->isWeak()) { 4460b57cec5SDimitry Andric --SuccSU->WeakPredsLeft; 4470b57cec5SDimitry Andric return; 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric #ifndef NDEBUG 4500b57cec5SDimitry Andric if (SuccSU->NumPredsLeft == 0) { 4510b57cec5SDimitry Andric dbgs() << "*** Scheduling failed! ***\n"; 4520b57cec5SDimitry Andric DAG->dumpNode(*SuccSU); 4530b57cec5SDimitry Andric dbgs() << " has been released too many times!\n"; 4540b57cec5SDimitry Andric llvm_unreachable(nullptr); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric #endif 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric --SuccSU->NumPredsLeft; 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric /// Release Successors of the SU that are in the block or not. 4620b57cec5SDimitry Andric void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) { 4630b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) { 4640b57cec5SDimitry Andric SUnit *SuccSU = Succ.getSUnit(); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric if (SuccSU->NodeNum >= DAG->SUnits.size()) 4670b57cec5SDimitry Andric continue; 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock) 4700b57cec5SDimitry Andric continue; 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric releaseSucc(SU, &Succ); 4730b57cec5SDimitry Andric if (SuccSU->NumPredsLeft == 0 && InOrOutBlock) 4740b57cec5SDimitry Andric TopReadySUs.push_back(SuccSU); 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric void SIScheduleBlock::nodeScheduled(SUnit *SU) { 4790b57cec5SDimitry Andric // Is in TopReadySUs 4800b57cec5SDimitry Andric assert (!SU->NumPredsLeft); 4810b57cec5SDimitry Andric std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU); 4820b57cec5SDimitry Andric if (I == TopReadySUs.end()) { 4830b57cec5SDimitry Andric dbgs() << "Data Structure Bug in SI Scheduler\n"; 4840b57cec5SDimitry Andric llvm_unreachable(nullptr); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric TopReadySUs.erase(I); 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric releaseSuccessors(SU, true); 4890b57cec5SDimitry Andric // Scheduling this node will trigger a wait, 4900b57cec5SDimitry Andric // thus propagate to other instructions that they do not need to wait either. 4910b57cec5SDimitry Andric if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]]) 4920b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0); 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric if (DAG->IsLowLatencySU[SU->NodeNum]) { 4950b57cec5SDimitry Andric for (SDep& Succ : SU->Succs) { 4960b57cec5SDimitry Andric std::map<unsigned, unsigned>::iterator I = 4970b57cec5SDimitry Andric NodeNum2Index.find(Succ.getSUnit()->NodeNum); 4980b57cec5SDimitry Andric if (I != NodeNum2Index.end()) 4990b57cec5SDimitry Andric HasLowLatencyNonWaitedParent[I->second] = 1; 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric SU->isScheduled = true; 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric void SIScheduleBlock::finalizeUnits() { 5060b57cec5SDimitry Andric // We remove links from outside blocks to enable scheduling inside the block. 5070b57cec5SDimitry Andric for (SUnit* SU : SUnits) { 5080b57cec5SDimitry Andric releaseSuccessors(SU, false); 5090b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum]) 5100b57cec5SDimitry Andric HighLatencyBlock = true; 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0); 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric // we maintain ascending order of IDs 5160b57cec5SDimitry Andric void SIScheduleBlock::addPred(SIScheduleBlock *Pred) { 5170b57cec5SDimitry Andric unsigned PredID = Pred->getID(); 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric // Check if not already predecessor. 5200b57cec5SDimitry Andric for (SIScheduleBlock* P : Preds) { 5210b57cec5SDimitry Andric if (PredID == P->getID()) 5220b57cec5SDimitry Andric return; 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric Preds.push_back(Pred); 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric assert(none_of(Succs, 5270b57cec5SDimitry Andric [=](std::pair<SIScheduleBlock*, 5280b57cec5SDimitry Andric SIScheduleBlockLinkKind> S) { 5290b57cec5SDimitry Andric return PredID == S.first->getID(); 5300b57cec5SDimitry Andric }) && 5310b57cec5SDimitry Andric "Loop in the Block Graph!"); 5320b57cec5SDimitry Andric } 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric void SIScheduleBlock::addSucc(SIScheduleBlock *Succ, 5350b57cec5SDimitry Andric SIScheduleBlockLinkKind Kind) { 5360b57cec5SDimitry Andric unsigned SuccID = Succ->getID(); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric // Check if not already predecessor. 5390b57cec5SDimitry Andric for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) { 5400b57cec5SDimitry Andric if (SuccID == S.first->getID()) { 5410b57cec5SDimitry Andric if (S.second == SIScheduleBlockLinkKind::NoData && 5420b57cec5SDimitry Andric Kind == SIScheduleBlockLinkKind::Data) 5430b57cec5SDimitry Andric S.second = Kind; 5440b57cec5SDimitry Andric return; 5450b57cec5SDimitry Andric } 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric if (Succ->isHighLatencyBlock()) 5480b57cec5SDimitry Andric ++NumHighLatencySuccessors; 549*0fca6ea1SDimitry Andric Succs.emplace_back(Succ, Kind); 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric assert(none_of(Preds, 5520b57cec5SDimitry Andric [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && 5530b57cec5SDimitry Andric "Loop in the Block Graph!"); 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric #ifndef NDEBUG 5570b57cec5SDimitry Andric void SIScheduleBlock::printDebug(bool full) { 5580b57cec5SDimitry Andric dbgs() << "Block (" << ID << ")\n"; 5590b57cec5SDimitry Andric if (!full) 5600b57cec5SDimitry Andric return; 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric dbgs() << "\nContains High Latency Instruction: " 5630b57cec5SDimitry Andric << HighLatencyBlock << '\n'; 5640b57cec5SDimitry Andric dbgs() << "\nDepends On:\n"; 5650b57cec5SDimitry Andric for (SIScheduleBlock* P : Preds) { 5660b57cec5SDimitry Andric P->printDebug(false); 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric 5690b57cec5SDimitry Andric dbgs() << "\nSuccessors:\n"; 5700b57cec5SDimitry Andric for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) { 5710b57cec5SDimitry Andric if (S.second == SIScheduleBlockLinkKind::Data) 5720b57cec5SDimitry Andric dbgs() << "(Data Dep) "; 5730b57cec5SDimitry Andric S.first->printDebug(false); 5740b57cec5SDimitry Andric } 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric if (Scheduled) { 5775ffd83dbSDimitry Andric dbgs() << "LiveInPressure " 5785ffd83dbSDimitry Andric << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' ' 5795ffd83dbSDimitry Andric << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n'; 5805ffd83dbSDimitry Andric dbgs() << "LiveOutPressure " 5815ffd83dbSDimitry Andric << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' ' 5825ffd83dbSDimitry Andric << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n"; 5830b57cec5SDimitry Andric dbgs() << "LiveIns:\n"; 5840b57cec5SDimitry Andric for (unsigned Reg : LiveInRegs) 5850b57cec5SDimitry Andric dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric dbgs() << "\nLiveOuts:\n"; 5880b57cec5SDimitry Andric for (unsigned Reg : LiveOutRegs) 5890b57cec5SDimitry Andric dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric dbgs() << "\nInstructions:\n"; 5930b57cec5SDimitry Andric for (const SUnit* SU : SUnits) 5940b57cec5SDimitry Andric DAG->dumpNode(*SU); 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric dbgs() << "///////////////////////\n"; 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric #endif 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric // SIScheduleBlockCreator // 6010b57cec5SDimitry Andric 602480093f4SDimitry Andric SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) 603480093f4SDimitry Andric : DAG(DAG) {} 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric SIScheduleBlocks 6060b57cec5SDimitry Andric SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) { 6070b57cec5SDimitry Andric std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B = 6080b57cec5SDimitry Andric Blocks.find(BlockVariant); 6090b57cec5SDimitry Andric if (B == Blocks.end()) { 6100b57cec5SDimitry Andric SIScheduleBlocks Res; 6110b57cec5SDimitry Andric createBlocksForVariant(BlockVariant); 6120b57cec5SDimitry Andric topologicalSort(); 6130b57cec5SDimitry Andric scheduleInsideBlocks(); 6140b57cec5SDimitry Andric fillStats(); 6150b57cec5SDimitry Andric Res.Blocks = CurrentBlocks; 6160b57cec5SDimitry Andric Res.TopDownIndex2Block = TopDownIndex2Block; 6170b57cec5SDimitry Andric Res.TopDownBlock2Index = TopDownBlock2Index; 6180b57cec5SDimitry Andric Blocks[BlockVariant] = Res; 6190b57cec5SDimitry Andric return Res; 6200b57cec5SDimitry Andric } 621*0fca6ea1SDimitry Andric return B->second; 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) { 6250b57cec5SDimitry Andric if (SU->NodeNum >= DAG->SUnits.size()) 6260b57cec5SDimitry Andric return false; 6270b57cec5SDimitry Andric return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesAlone() { 6310b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 6340b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i]; 6350b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum]) { 6360b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = NextReservedID++; 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric } 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric static bool 6420b57cec5SDimitry Andric hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) { 6430b57cec5SDimitry Andric for (const auto &PredDep : SU.Preds) { 6440b57cec5SDimitry Andric if (PredDep.getSUnit() == &FromSU && 6450b57cec5SDimitry Andric PredDep.getKind() == llvm::SDep::Data) 6460b57cec5SDimitry Andric return true; 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric return false; 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric void SIScheduleBlockCreator::colorHighLatenciesGroups() { 6520b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 6530b57cec5SDimitry Andric unsigned NumHighLatencies = 0; 6540b57cec5SDimitry Andric unsigned GroupSize; 6550b57cec5SDimitry Andric int Color = NextReservedID; 6560b57cec5SDimitry Andric unsigned Count = 0; 6570b57cec5SDimitry Andric std::set<unsigned> FormingGroup; 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 6600b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i]; 6610b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU->NodeNum]) 6620b57cec5SDimitry Andric ++NumHighLatencies; 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric if (NumHighLatencies == 0) 6660b57cec5SDimitry Andric return; 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric if (NumHighLatencies <= 6) 6690b57cec5SDimitry Andric GroupSize = 2; 6700b57cec5SDimitry Andric else if (NumHighLatencies <= 12) 6710b57cec5SDimitry Andric GroupSize = 3; 6720b57cec5SDimitry Andric else 6730b57cec5SDimitry Andric GroupSize = 4; 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) { 6760b57cec5SDimitry Andric const SUnit &SU = DAG->SUnits[SUNum]; 6770b57cec5SDimitry Andric if (DAG->IsHighLatencySU[SU.NodeNum]) { 6780b57cec5SDimitry Andric unsigned CompatibleGroup = true; 6790b57cec5SDimitry Andric int ProposedColor = Color; 6800b57cec5SDimitry Andric std::vector<int> AdditionalElements; 6810b57cec5SDimitry Andric 6820b57cec5SDimitry Andric // We don't want to put in the same block 6830b57cec5SDimitry Andric // two high latency instructions that depend 6840b57cec5SDimitry Andric // on each other. 6850b57cec5SDimitry Andric // One way would be to check canAddEdge 6860b57cec5SDimitry Andric // in both directions, but that currently is not 6870b57cec5SDimitry Andric // enough because there the high latency order is 6880b57cec5SDimitry Andric // enforced (via links). 6890b57cec5SDimitry Andric // Instead, look at the dependencies between the 6900b57cec5SDimitry Andric // high latency instructions and deduce if it is 6910b57cec5SDimitry Andric // a data dependency or not. 6920b57cec5SDimitry Andric for (unsigned j : FormingGroup) { 6930b57cec5SDimitry Andric bool HasSubGraph; 6940b57cec5SDimitry Andric std::vector<int> SubGraph; 6950b57cec5SDimitry Andric // By construction (topological order), if SU and 69681ad6265SDimitry Andric // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary 6970b57cec5SDimitry Andric // in the parent graph of SU. 6980b57cec5SDimitry Andric #ifndef NDEBUG 6990b57cec5SDimitry Andric SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j], 7000b57cec5SDimitry Andric HasSubGraph); 7010b57cec5SDimitry Andric assert(!HasSubGraph); 7020b57cec5SDimitry Andric #endif 7030b57cec5SDimitry Andric SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU, 7040b57cec5SDimitry Andric HasSubGraph); 7050b57cec5SDimitry Andric if (!HasSubGraph) 7060b57cec5SDimitry Andric continue; // No dependencies between each other 707*0fca6ea1SDimitry Andric if (SubGraph.size() > 5) { 7080b57cec5SDimitry Andric // Too many elements would be required to be added to the block. 7090b57cec5SDimitry Andric CompatibleGroup = false; 7100b57cec5SDimitry Andric break; 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric // Check the type of dependency 7130b57cec5SDimitry Andric for (unsigned k : SubGraph) { 7140b57cec5SDimitry Andric // If in the path to join the two instructions, 7150b57cec5SDimitry Andric // there is another high latency instruction, 7160b57cec5SDimitry Andric // or instructions colored for another block 7170b57cec5SDimitry Andric // abort the merge. 718*0fca6ea1SDimitry Andric if (DAG->IsHighLatencySU[k] || (CurrentColoring[k] != ProposedColor && 7190b57cec5SDimitry Andric CurrentColoring[k] != 0)) { 7200b57cec5SDimitry Andric CompatibleGroup = false; 7210b57cec5SDimitry Andric break; 7220b57cec5SDimitry Andric } 7230b57cec5SDimitry Andric // If one of the SU in the subgraph depends on the result of SU j, 7240b57cec5SDimitry Andric // there'll be a data dependency. 7250b57cec5SDimitry Andric if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) { 7260b57cec5SDimitry Andric CompatibleGroup = false; 7270b57cec5SDimitry Andric break; 7280b57cec5SDimitry Andric } 7290b57cec5SDimitry Andric } 7300b57cec5SDimitry Andric if (!CompatibleGroup) 7310b57cec5SDimitry Andric break; 7320b57cec5SDimitry Andric // Same check for the SU 7330b57cec5SDimitry Andric if (hasDataDependencyPred(SU, DAG->SUnits[j])) { 7340b57cec5SDimitry Andric CompatibleGroup = false; 7350b57cec5SDimitry Andric break; 7360b57cec5SDimitry Andric } 7370b57cec5SDimitry Andric // Add all the required instructions to the block 7380b57cec5SDimitry Andric // These cannot live in another block (because they 7390b57cec5SDimitry Andric // depend (order dependency) on one of the 7400b57cec5SDimitry Andric // instruction in the block, and are required for the 7410b57cec5SDimitry Andric // high latency instruction we add. 742e8d8bef9SDimitry Andric llvm::append_range(AdditionalElements, SubGraph); 7430b57cec5SDimitry Andric } 7440b57cec5SDimitry Andric if (CompatibleGroup) { 7450b57cec5SDimitry Andric FormingGroup.insert(SU.NodeNum); 7460b57cec5SDimitry Andric for (unsigned j : AdditionalElements) 7470b57cec5SDimitry Andric CurrentColoring[j] = ProposedColor; 7480b57cec5SDimitry Andric CurrentColoring[SU.NodeNum] = ProposedColor; 7490b57cec5SDimitry Andric ++Count; 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric // Found one incompatible instruction, 7520b57cec5SDimitry Andric // or has filled a big enough group. 7530b57cec5SDimitry Andric // -> start a new one. 7540b57cec5SDimitry Andric if (!CompatibleGroup) { 7550b57cec5SDimitry Andric FormingGroup.clear(); 7560b57cec5SDimitry Andric Color = ++NextReservedID; 7570b57cec5SDimitry Andric ProposedColor = Color; 7580b57cec5SDimitry Andric FormingGroup.insert(SU.NodeNum); 7590b57cec5SDimitry Andric CurrentColoring[SU.NodeNum] = ProposedColor; 7600b57cec5SDimitry Andric Count = 0; 7610b57cec5SDimitry Andric } else if (Count == GroupSize) { 7620b57cec5SDimitry Andric FormingGroup.clear(); 7630b57cec5SDimitry Andric Color = ++NextReservedID; 7640b57cec5SDimitry Andric ProposedColor = Color; 7650b57cec5SDimitry Andric Count = 0; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric } 7680b57cec5SDimitry Andric } 7690b57cec5SDimitry Andric } 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric void SIScheduleBlockCreator::colorComputeReservedDependencies() { 7720b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 7730b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned> ColorCombinations; 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.clear(); 7760b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.clear(); 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0); 7790b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0); 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric // Traverse TopDown, and give different colors to SUs depending 7820b57cec5SDimitry Andric // on which combination of High Latencies they depend on. 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) { 7850b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 7860b57cec5SDimitry Andric std::set<unsigned> SUColors; 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andric // Already given. 7890b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum]) { 7900b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] = 7910b57cec5SDimitry Andric CurrentColoring[SU->NodeNum]; 7920b57cec5SDimitry Andric continue; 7930b57cec5SDimitry Andric } 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) { 7960b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit(); 7970b57cec5SDimitry Andric if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) 7980b57cec5SDimitry Andric continue; 7990b57cec5SDimitry Andric if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0) 8000b57cec5SDimitry Andric SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]); 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric // Color 0 by default. 8030b57cec5SDimitry Andric if (SUColors.empty()) 8040b57cec5SDimitry Andric continue; 8050b57cec5SDimitry Andric // Same color than parents. 8060b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() > DAGSize) 8070b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] = 8080b57cec5SDimitry Andric *SUColors.begin(); 8090b57cec5SDimitry Andric else { 8100b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned>::iterator Pos = 8110b57cec5SDimitry Andric ColorCombinations.find(SUColors); 8120b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) { 8130b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second; 8140b57cec5SDimitry Andric } else { 8150b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] = 8160b57cec5SDimitry Andric NextNonReservedID; 8170b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++; 8180b57cec5SDimitry Andric } 8190b57cec5SDimitry Andric } 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric ColorCombinations.clear(); 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric // Same as before, but BottomUp. 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 8270b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 8280b57cec5SDimitry Andric std::set<unsigned> SUColors; 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric // Already given. 8310b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum]) { 8320b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = 8330b57cec5SDimitry Andric CurrentColoring[SU->NodeNum]; 8340b57cec5SDimitry Andric continue; 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 8380b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 8390b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 8400b57cec5SDimitry Andric continue; 8410b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0) 8420b57cec5SDimitry Andric SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric // Keep color 0. 8450b57cec5SDimitry Andric if (SUColors.empty()) 8460b57cec5SDimitry Andric continue; 8470b57cec5SDimitry Andric // Same color than parents. 8480b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() > DAGSize) 8490b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = 8500b57cec5SDimitry Andric *SUColors.begin(); 8510b57cec5SDimitry Andric else { 8520b57cec5SDimitry Andric std::map<std::set<unsigned>, unsigned>::iterator Pos = 8530b57cec5SDimitry Andric ColorCombinations.find(SUColors); 8540b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) { 8550b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second; 8560b57cec5SDimitry Andric } else { 8570b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = 8580b57cec5SDimitry Andric NextNonReservedID; 8590b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++; 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric void SIScheduleBlockCreator::colorAccordingToReservedDependencies() { 8660b57cec5SDimitry Andric std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations; 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric // Every combination of colors given by the top down 8690b57cec5SDimitry Andric // and bottom up Reserved node dependency 8700b57cec5SDimitry Andric 8710eae32dcSDimitry Andric for (const SUnit &SU : DAG->SUnits) { 8720b57cec5SDimitry Andric std::pair<unsigned, unsigned> SUColors; 8730b57cec5SDimitry Andric 8740b57cec5SDimitry Andric // High latency instructions: already given. 8750eae32dcSDimitry Andric if (CurrentColoring[SU.NodeNum]) 8760b57cec5SDimitry Andric continue; 8770b57cec5SDimitry Andric 8780eae32dcSDimitry Andric SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum]; 8790eae32dcSDimitry Andric SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum]; 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos = 8820b57cec5SDimitry Andric ColorCombinations.find(SUColors); 8830b57cec5SDimitry Andric if (Pos != ColorCombinations.end()) { 8840eae32dcSDimitry Andric CurrentColoring[SU.NodeNum] = Pos->second; 8850b57cec5SDimitry Andric } else { 8860eae32dcSDimitry Andric CurrentColoring[SU.NodeNum] = NextNonReservedID; 8870b57cec5SDimitry Andric ColorCombinations[SUColors] = NextNonReservedID++; 8880b57cec5SDimitry Andric } 8890b57cec5SDimitry Andric } 8900b57cec5SDimitry Andric } 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric void SIScheduleBlockCreator::colorEndsAccordingToDependencies() { 8930b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 8940b57cec5SDimitry Andric std::vector<int> PendingColoring = CurrentColoring; 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric assert(DAGSize >= 1 && 8970b57cec5SDimitry Andric CurrentBottomUpReservedDependencyColoring.size() == DAGSize && 8980b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring.size() == DAGSize); 8990b57cec5SDimitry Andric // If there is no reserved block at all, do nothing. We don't want 9000b57cec5SDimitry Andric // everything in one block. 901*0fca6ea1SDimitry Andric if (*llvm::max_element(CurrentBottomUpReservedDependencyColoring) == 0 && 902*0fca6ea1SDimitry Andric *llvm::max_element(CurrentTopDownReservedDependencyColoring) == 0) 9030b57cec5SDimitry Andric return; 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 9060b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 9070b57cec5SDimitry Andric std::set<unsigned> SUColors; 9080b57cec5SDimitry Andric std::set<unsigned> SUColorsPending; 9090b57cec5SDimitry Andric 9100b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 9110b57cec5SDimitry Andric continue; 9120b57cec5SDimitry Andric 9130b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 || 9140b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0) 9150b57cec5SDimitry Andric continue; 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 9180b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 9190b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 9200b57cec5SDimitry Andric continue; 9210b57cec5SDimitry Andric if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 || 9220b57cec5SDimitry Andric CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0) 9230b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]); 9240b57cec5SDimitry Andric SUColorsPending.insert(PendingColoring[Succ->NodeNum]); 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric // If there is only one child/parent block, and that block 9270b57cec5SDimitry Andric // is not among the ones we are removing in this path, then 9280b57cec5SDimitry Andric // merge the instruction to that block 9290b57cec5SDimitry Andric if (SUColors.size() == 1 && SUColorsPending.size() == 1) 9300b57cec5SDimitry Andric PendingColoring[SU->NodeNum] = *SUColors.begin(); 9310b57cec5SDimitry Andric else // TODO: Attribute new colors depending on color 9320b57cec5SDimitry Andric // combination of children. 9330b57cec5SDimitry Andric PendingColoring[SU->NodeNum] = NextNonReservedID++; 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric CurrentColoring = PendingColoring; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric 9380b57cec5SDimitry Andric 9390b57cec5SDimitry Andric void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() { 9400b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 9410b57cec5SDimitry Andric unsigned PreviousColor; 9420b57cec5SDimitry Andric std::set<unsigned> SeenColors; 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric if (DAGSize <= 1) 9450b57cec5SDimitry Andric return; 9460b57cec5SDimitry Andric 9470b57cec5SDimitry Andric PreviousColor = CurrentColoring[0]; 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric for (unsigned i = 1, e = DAGSize; i != e; ++i) { 9500b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i]; 9510b57cec5SDimitry Andric unsigned CurrentColor = CurrentColoring[i]; 9520b57cec5SDimitry Andric unsigned PreviousColorSave = PreviousColor; 9530b57cec5SDimitry Andric assert(i == SU->NodeNum); 9540b57cec5SDimitry Andric 9550b57cec5SDimitry Andric if (CurrentColor != PreviousColor) 9560b57cec5SDimitry Andric SeenColors.insert(PreviousColor); 9570b57cec5SDimitry Andric PreviousColor = CurrentColor; 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 9600b57cec5SDimitry Andric continue; 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric if (SeenColors.find(CurrentColor) == SeenColors.end()) 9630b57cec5SDimitry Andric continue; 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric if (PreviousColorSave != CurrentColor) 9660b57cec5SDimitry Andric CurrentColoring[i] = NextNonReservedID++; 9670b57cec5SDimitry Andric else 9680b57cec5SDimitry Andric CurrentColoring[i] = CurrentColoring[i-1]; 9690b57cec5SDimitry Andric } 9700b57cec5SDimitry Andric } 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() { 9730b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 9760b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 9770b57cec5SDimitry Andric std::set<unsigned> SUColors; 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 9800b57cec5SDimitry Andric continue; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric // No predecessor: Vgpr constant loading. 9830b57cec5SDimitry Andric // Low latency instructions usually have a predecessor (the address) 9840b57cec5SDimitry Andric if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum]) 9850b57cec5SDimitry Andric continue; 9860b57cec5SDimitry Andric 9870b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 9880b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 9890b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 9900b57cec5SDimitry Andric continue; 9910b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]); 9920b57cec5SDimitry Andric } 9930b57cec5SDimitry Andric if (SUColors.size() == 1) 9940b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin(); 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric } 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() { 9990b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 10020b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 10030b57cec5SDimitry Andric std::set<unsigned> SUColors; 10040b57cec5SDimitry Andric 10050b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 10060b57cec5SDimitry Andric continue; 10070b57cec5SDimitry Andric 10080b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 10090b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 10100b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 10110b57cec5SDimitry Andric continue; 10120b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]); 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric if (SUColors.size() == 1) 10150b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin(); 10160b57cec5SDimitry Andric } 10170b57cec5SDimitry Andric } 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() { 10200b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 10230b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 10240b57cec5SDimitry Andric std::set<unsigned> SUColors; 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 10270b57cec5SDimitry Andric continue; 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 10300b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 10310b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 10320b57cec5SDimitry Andric continue; 10330b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]); 10340b57cec5SDimitry Andric } 10350b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize) 10360b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin(); 10370b57cec5SDimitry Andric } 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() { 10410b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 10420b57cec5SDimitry Andric std::map<unsigned, unsigned> ColorCount; 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 10450b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 10460b57cec5SDimitry Andric unsigned color = CurrentColoring[SU->NodeNum]; 10470b57cec5SDimitry Andric ++ColorCount[color]; 10480b57cec5SDimitry Andric } 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 10510b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 10520b57cec5SDimitry Andric unsigned color = CurrentColoring[SU->NodeNum]; 10530b57cec5SDimitry Andric std::set<unsigned> SUColors; 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 10560b57cec5SDimitry Andric continue; 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric if (ColorCount[color] > 1) 10590b57cec5SDimitry Andric continue; 10600b57cec5SDimitry Andric 10610b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 10620b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 10630b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 10640b57cec5SDimitry Andric continue; 10650b57cec5SDimitry Andric SUColors.insert(CurrentColoring[Succ->NodeNum]); 10660b57cec5SDimitry Andric } 10670b57cec5SDimitry Andric if (SUColors.size() == 1 && *SUColors.begin() != color) { 10680b57cec5SDimitry Andric --ColorCount[color]; 10690b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = *SUColors.begin(); 10700b57cec5SDimitry Andric ++ColorCount[*SUColors.begin()]; 10710b57cec5SDimitry Andric } 10720b57cec5SDimitry Andric } 10730b57cec5SDimitry Andric } 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric void SIScheduleBlockCreator::cutHugeBlocks() { 10760b57cec5SDimitry Andric // TODO 10770b57cec5SDimitry Andric } 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric void SIScheduleBlockCreator::regroupNoUserInstructions() { 10800b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 10810b57cec5SDimitry Andric int GroupID = NextNonReservedID++; 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric for (unsigned SUNum : DAG->BottomUpIndex2SU) { 10840b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[SUNum]; 10850b57cec5SDimitry Andric bool hasSuccessor = false; 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric if (CurrentColoring[SU->NodeNum] <= (int)DAGSize) 10880b57cec5SDimitry Andric continue; 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 10910b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 10920b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 10930b57cec5SDimitry Andric continue; 10940b57cec5SDimitry Andric hasSuccessor = true; 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric if (!hasSuccessor) 10970b57cec5SDimitry Andric CurrentColoring[SU->NodeNum] = GroupID; 10980b57cec5SDimitry Andric } 10990b57cec5SDimitry Andric } 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric void SIScheduleBlockCreator::colorExports() { 11020b57cec5SDimitry Andric unsigned ExportColor = NextNonReservedID++; 11030b57cec5SDimitry Andric SmallVector<unsigned, 8> ExpGroup; 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric // Put all exports together in a block. 11060b57cec5SDimitry Andric // The block will naturally end up being scheduled last, 11070b57cec5SDimitry Andric // thus putting exports at the end of the schedule, which 11080b57cec5SDimitry Andric // is better for performance. 11090b57cec5SDimitry Andric // However we must ensure, for safety, the exports can be put 11100b57cec5SDimitry Andric // together in the same block without any other instruction. 11110b57cec5SDimitry Andric // This could happen, for example, when scheduling after regalloc 11120b57cec5SDimitry Andric // if reloading a spilled register from memory using the same 11130b57cec5SDimitry Andric // register than used in a previous export. 11140b57cec5SDimitry Andric // If that happens, do not regroup the exports. 11150b57cec5SDimitry Andric for (unsigned SUNum : DAG->TopDownIndex2SU) { 11160b57cec5SDimitry Andric const SUnit &SU = DAG->SUnits[SUNum]; 11170b57cec5SDimitry Andric if (SIInstrInfo::isEXP(*SU.getInstr())) { 111881ad6265SDimitry Andric // SU is an export instruction. Check whether one of its successor 111981ad6265SDimitry Andric // dependencies is a non-export, in which case we skip export grouping. 112081ad6265SDimitry Andric for (const SDep &SuccDep : SU.Succs) { 112181ad6265SDimitry Andric const SUnit *SuccSU = SuccDep.getSUnit(); 112281ad6265SDimitry Andric if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) { 112381ad6265SDimitry Andric // Ignore these dependencies. 112481ad6265SDimitry Andric continue; 112581ad6265SDimitry Andric } 112681ad6265SDimitry Andric assert(SuccSU->isInstr() && 112781ad6265SDimitry Andric "SUnit unexpectedly not representing an instruction!"); 11280b57cec5SDimitry Andric 112981ad6265SDimitry Andric if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) { 113081ad6265SDimitry Andric // A non-export depends on us. Skip export grouping. 113181ad6265SDimitry Andric // Note that this is a bit pessimistic: We could still group all other 113281ad6265SDimitry Andric // exports that are not depended on by non-exports, directly or 113381ad6265SDimitry Andric // indirectly. Simply skipping this particular export but grouping all 113481ad6265SDimitry Andric // others would not account for indirect dependencies. 11350b57cec5SDimitry Andric return; 11360b57cec5SDimitry Andric } 11370b57cec5SDimitry Andric } 11380b57cec5SDimitry Andric ExpGroup.push_back(SUNum); 11390b57cec5SDimitry Andric } 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric // The group can be formed. Give the color. 11430b57cec5SDimitry Andric for (unsigned j : ExpGroup) 11440b57cec5SDimitry Andric CurrentColoring[j] = ExportColor; 11450b57cec5SDimitry Andric } 11460b57cec5SDimitry Andric 11470b57cec5SDimitry Andric void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) { 11480b57cec5SDimitry Andric unsigned DAGSize = DAG->SUnits.size(); 11490b57cec5SDimitry Andric std::map<unsigned,unsigned> RealID; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric CurrentBlocks.clear(); 11520b57cec5SDimitry Andric CurrentColoring.clear(); 11530b57cec5SDimitry Andric CurrentColoring.resize(DAGSize, 0); 11540b57cec5SDimitry Andric Node2CurrentBlock.clear(); 11550b57cec5SDimitry Andric 11560b57cec5SDimitry Andric // Restore links previous scheduling variant has overridden. 11570b57cec5SDimitry Andric DAG->restoreSULinksLeft(); 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric NextReservedID = 1; 11600b57cec5SDimitry Andric NextNonReservedID = DAGSize + 1; 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Coloring the graph\n"); 11630b57cec5SDimitry Andric 11640b57cec5SDimitry Andric if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped) 11650b57cec5SDimitry Andric colorHighLatenciesGroups(); 11660b57cec5SDimitry Andric else 11670b57cec5SDimitry Andric colorHighLatenciesAlone(); 11680b57cec5SDimitry Andric colorComputeReservedDependencies(); 11690b57cec5SDimitry Andric colorAccordingToReservedDependencies(); 11700b57cec5SDimitry Andric colorEndsAccordingToDependencies(); 11710b57cec5SDimitry Andric if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive) 11720b57cec5SDimitry Andric colorForceConsecutiveOrderInGroup(); 11730b57cec5SDimitry Andric regroupNoUserInstructions(); 11740b57cec5SDimitry Andric colorMergeConstantLoadsNextGroup(); 11750b57cec5SDimitry Andric colorMergeIfPossibleNextGroupOnlyForReserved(); 11760b57cec5SDimitry Andric colorExports(); 11770b57cec5SDimitry Andric 11780b57cec5SDimitry Andric // Put SUs of same color into same block 11790b57cec5SDimitry Andric Node2CurrentBlock.resize(DAGSize, -1); 11800b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 11810b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i]; 11820b57cec5SDimitry Andric unsigned Color = CurrentColoring[SU->NodeNum]; 11830b57cec5SDimitry Andric if (RealID.find(Color) == RealID.end()) { 11840b57cec5SDimitry Andric int ID = CurrentBlocks.size(); 11858bcb0991SDimitry Andric BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID)); 11860b57cec5SDimitry Andric CurrentBlocks.push_back(BlockPtrs.rbegin()->get()); 11870b57cec5SDimitry Andric RealID[Color] = ID; 11880b57cec5SDimitry Andric } 11890b57cec5SDimitry Andric CurrentBlocks[RealID[Color]]->addUnit(SU); 11900b57cec5SDimitry Andric Node2CurrentBlock[SU->NodeNum] = RealID[Color]; 11910b57cec5SDimitry Andric } 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric // Build dependencies between blocks. 11940b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 11950b57cec5SDimitry Andric SUnit *SU = &DAG->SUnits[i]; 11960b57cec5SDimitry Andric int SUID = Node2CurrentBlock[i]; 11970b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 11980b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 11990b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 12000b57cec5SDimitry Andric continue; 12010b57cec5SDimitry Andric if (Node2CurrentBlock[Succ->NodeNum] != SUID) 12020b57cec5SDimitry Andric CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]], 12030b57cec5SDimitry Andric SuccDep.isCtrl() ? NoData : Data); 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) { 12060b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit(); 12070b57cec5SDimitry Andric if (PredDep.isWeak() || Pred->NodeNum >= DAGSize) 12080b57cec5SDimitry Andric continue; 12090b57cec5SDimitry Andric if (Node2CurrentBlock[Pred->NodeNum] != SUID) 12100b57cec5SDimitry Andric CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]); 12110b57cec5SDimitry Andric } 12120b57cec5SDimitry Andric } 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric // Free root and leafs of all blocks to enable scheduling inside them. 12150eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks) 12160b57cec5SDimitry Andric Block->finalizeUnits(); 12170eae32dcSDimitry Andric LLVM_DEBUG({ 12180eae32dcSDimitry Andric dbgs() << "Blocks created:\n\n"; 12190eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks) 12200b57cec5SDimitry Andric Block->printDebug(true); 12210b57cec5SDimitry Andric }); 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric // Two functions taken from Codegen/MachineScheduler.cpp 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric /// Non-const version. 12270b57cec5SDimitry Andric static MachineBasicBlock::iterator 12280b57cec5SDimitry Andric nextIfDebug(MachineBasicBlock::iterator I, 12290b57cec5SDimitry Andric MachineBasicBlock::const_iterator End) { 12300b57cec5SDimitry Andric for (; I != End; ++I) { 12310b57cec5SDimitry Andric if (!I->isDebugInstr()) 12320b57cec5SDimitry Andric break; 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric return I; 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric void SIScheduleBlockCreator::topologicalSort() { 12380b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size(); 12390b57cec5SDimitry Andric std::vector<int> WorkList; 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Topological Sort\n"); 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric WorkList.reserve(DAGSize); 12440b57cec5SDimitry Andric TopDownIndex2Block.resize(DAGSize); 12450b57cec5SDimitry Andric TopDownBlock2Index.resize(DAGSize); 12460b57cec5SDimitry Andric BottomUpIndex2Block.resize(DAGSize); 12470b57cec5SDimitry Andric 12480b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 12490b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i]; 12500b57cec5SDimitry Andric unsigned Degree = Block->getSuccs().size(); 12510b57cec5SDimitry Andric TopDownBlock2Index[i] = Degree; 12520b57cec5SDimitry Andric if (Degree == 0) { 12530b57cec5SDimitry Andric WorkList.push_back(i); 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric } 12560b57cec5SDimitry Andric 12570b57cec5SDimitry Andric int Id = DAGSize; 12580b57cec5SDimitry Andric while (!WorkList.empty()) { 12590b57cec5SDimitry Andric int i = WorkList.back(); 12600b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i]; 12610b57cec5SDimitry Andric WorkList.pop_back(); 12620b57cec5SDimitry Andric TopDownBlock2Index[i] = --Id; 12630b57cec5SDimitry Andric TopDownIndex2Block[Id] = i; 12640b57cec5SDimitry Andric for (SIScheduleBlock* Pred : Block->getPreds()) { 12650b57cec5SDimitry Andric if (!--TopDownBlock2Index[Pred->getID()]) 12660b57cec5SDimitry Andric WorkList.push_back(Pred->getID()); 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric } 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric #ifndef NDEBUG 12710b57cec5SDimitry Andric // Check correctness of the ordering. 12720b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 12730b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i]; 12740b57cec5SDimitry Andric for (SIScheduleBlock* Pred : Block->getPreds()) { 12750b57cec5SDimitry Andric assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && 12760b57cec5SDimitry Andric "Wrong Top Down topological sorting"); 12770b57cec5SDimitry Andric } 12780b57cec5SDimitry Andric } 12790b57cec5SDimitry Andric #endif 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(), 12820b57cec5SDimitry Andric TopDownIndex2Block.rend()); 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric void SIScheduleBlockCreator::scheduleInsideBlocks() { 12860b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size(); 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n"); 12890b57cec5SDimitry Andric 12900b57cec5SDimitry Andric // We do schedule a valid scheduling such that a Block corresponds 12910b57cec5SDimitry Andric // to a range of instructions. 12920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n"); 12930b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 12940b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i]; 12950b57cec5SDimitry Andric Block->fastSchedule(); 12960b57cec5SDimitry Andric } 12970b57cec5SDimitry Andric 12980b57cec5SDimitry Andric // Note: the following code, and the part restoring previous position 12990b57cec5SDimitry Andric // is by far the most expensive operation of the Scheduler. 13000b57cec5SDimitry Andric 13010b57cec5SDimitry Andric // Do not update CurrentTop. 13020b57cec5SDimitry Andric MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop(); 13030b57cec5SDimitry Andric std::vector<MachineBasicBlock::iterator> PosOld; 13040b57cec5SDimitry Andric std::vector<MachineBasicBlock::iterator> PosNew; 13050b57cec5SDimitry Andric PosOld.reserve(DAG->SUnits.size()); 13060b57cec5SDimitry Andric PosNew.reserve(DAG->SUnits.size()); 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 13090b57cec5SDimitry Andric int BlockIndice = TopDownIndex2Block[i]; 13100b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; 13110b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits(); 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric for (SUnit* SU : SUs) { 13140b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 13150b57cec5SDimitry Andric MachineBasicBlock::iterator Pos = MI; 13160b57cec5SDimitry Andric PosOld.push_back(Pos); 13170b57cec5SDimitry Andric if (&*CurrentTopFastSched == MI) { 13180b57cec5SDimitry Andric PosNew.push_back(Pos); 13190b57cec5SDimitry Andric CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched, 13200b57cec5SDimitry Andric DAG->getCurrentBottom()); 13210b57cec5SDimitry Andric } else { 13220b57cec5SDimitry Andric // Update the instruction stream. 13230b57cec5SDimitry Andric DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI); 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric // Update LiveIntervals. 13260b57cec5SDimitry Andric // Note: Moving all instructions and calling handleMove every time 13270b57cec5SDimitry Andric // is the most cpu intensive operation of the scheduler. 13280b57cec5SDimitry Andric // It would gain a lot if there was a way to recompute the 13290b57cec5SDimitry Andric // LiveIntervals for the entire scheduling region. 13300b57cec5SDimitry Andric DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true); 13310b57cec5SDimitry Andric PosNew.push_back(CurrentTopFastSched); 13320b57cec5SDimitry Andric } 13330b57cec5SDimitry Andric } 13340b57cec5SDimitry Andric } 13350b57cec5SDimitry Andric 13360b57cec5SDimitry Andric // Now we have Block of SUs == Block of MI. 13370b57cec5SDimitry Andric // We do the final schedule for the instructions inside the block. 13380b57cec5SDimitry Andric // The property that all the SUs of the Block are grouped together as MI 13390b57cec5SDimitry Andric // is used for correct reg usage tracking. 13400b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 13410b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[i]; 13420b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits(); 13430b57cec5SDimitry Andric Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr()); 13440b57cec5SDimitry Andric } 13450b57cec5SDimitry Andric 13460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Restoring MI Pos\n"); 13470b57cec5SDimitry Andric // Restore old ordering (which prevents a LIS->handleMove bug). 13480b57cec5SDimitry Andric for (unsigned i = PosOld.size(), e = 0; i != e; --i) { 13490b57cec5SDimitry Andric MachineBasicBlock::iterator POld = PosOld[i-1]; 13500b57cec5SDimitry Andric MachineBasicBlock::iterator PNew = PosNew[i-1]; 13510b57cec5SDimitry Andric if (PNew != POld) { 13520b57cec5SDimitry Andric // Update the instruction stream. 13530b57cec5SDimitry Andric DAG->getBB()->splice(POld, DAG->getBB(), PNew); 13540b57cec5SDimitry Andric 13550b57cec5SDimitry Andric // Update LiveIntervals. 13560b57cec5SDimitry Andric DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true); 13570b57cec5SDimitry Andric } 13580b57cec5SDimitry Andric } 13590b57cec5SDimitry Andric 13600eae32dcSDimitry Andric LLVM_DEBUG({ 13610eae32dcSDimitry Andric for (SIScheduleBlock *Block : CurrentBlocks) 13620b57cec5SDimitry Andric Block->printDebug(true); 13630b57cec5SDimitry Andric }); 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric void SIScheduleBlockCreator::fillStats() { 13670b57cec5SDimitry Andric unsigned DAGSize = CurrentBlocks.size(); 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 13700b57cec5SDimitry Andric int BlockIndice = TopDownIndex2Block[i]; 13710b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; 13720b57cec5SDimitry Andric if (Block->getPreds().empty()) 13730b57cec5SDimitry Andric Block->Depth = 0; 13740b57cec5SDimitry Andric else { 13750b57cec5SDimitry Andric unsigned Depth = 0; 13760b57cec5SDimitry Andric for (SIScheduleBlock *Pred : Block->getPreds()) { 13770b57cec5SDimitry Andric if (Depth < Pred->Depth + Pred->getCost()) 13780b57cec5SDimitry Andric Depth = Pred->Depth + Pred->getCost(); 13790b57cec5SDimitry Andric } 13800b57cec5SDimitry Andric Block->Depth = Depth; 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric } 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric for (unsigned i = 0, e = DAGSize; i != e; ++i) { 13850b57cec5SDimitry Andric int BlockIndice = BottomUpIndex2Block[i]; 13860b57cec5SDimitry Andric SIScheduleBlock *Block = CurrentBlocks[BlockIndice]; 13870b57cec5SDimitry Andric if (Block->getSuccs().empty()) 13880b57cec5SDimitry Andric Block->Height = 0; 13890b57cec5SDimitry Andric else { 13900b57cec5SDimitry Andric unsigned Height = 0; 13910b57cec5SDimitry Andric for (const auto &Succ : Block->getSuccs()) 13920b57cec5SDimitry Andric Height = std::max(Height, Succ.first->Height + Succ.first->getCost()); 13930b57cec5SDimitry Andric Block->Height = Height; 13940b57cec5SDimitry Andric } 13950b57cec5SDimitry Andric } 13960b57cec5SDimitry Andric } 13970b57cec5SDimitry Andric 13980b57cec5SDimitry Andric // SIScheduleBlockScheduler // 13990b57cec5SDimitry Andric 14000b57cec5SDimitry Andric SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, 14010b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant Variant, 14020b57cec5SDimitry Andric SIScheduleBlocks BlocksStruct) : 14030b57cec5SDimitry Andric DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks), 14040b57cec5SDimitry Andric LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0), 14050b57cec5SDimitry Andric SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) { 14060b57cec5SDimitry Andric 14070b57cec5SDimitry Andric // Fill the usage of every output 14080b57cec5SDimitry Andric // Warning: while by construction we always have a link between two blocks 14090b57cec5SDimitry Andric // when one needs a result from the other, the number of users of an output 14100b57cec5SDimitry Andric // is not the sum of child blocks having as input the same virtual register. 14110b57cec5SDimitry Andric // Here is an example. A produces x and y. B eats x and produces x'. 14120b57cec5SDimitry Andric // C eats x' and y. The register coalescer may have attributed the same 14130b57cec5SDimitry Andric // virtual register to x and x'. 14140b57cec5SDimitry Andric // To count accurately, we do a topological sort. In case the register is 14150b57cec5SDimitry Andric // found for several parents, we increment the usage of the one with the 14160b57cec5SDimitry Andric // highest topological index. 14170b57cec5SDimitry Andric LiveOutRegsNumUsages.resize(Blocks.size()); 14180eae32dcSDimitry Andric for (SIScheduleBlock *Block : Blocks) { 14190b57cec5SDimitry Andric for (unsigned Reg : Block->getInRegs()) { 14200b57cec5SDimitry Andric bool Found = false; 14210b57cec5SDimitry Andric int topoInd = -1; 14220b57cec5SDimitry Andric for (SIScheduleBlock* Pred: Block->getPreds()) { 14230b57cec5SDimitry Andric std::set<unsigned> PredOutRegs = Pred->getOutRegs(); 14240b57cec5SDimitry Andric std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg); 14250b57cec5SDimitry Andric 14260b57cec5SDimitry Andric if (RegPos != PredOutRegs.end()) { 14270b57cec5SDimitry Andric Found = true; 14280b57cec5SDimitry Andric if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) { 14290b57cec5SDimitry Andric topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()]; 14300b57cec5SDimitry Andric } 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric } 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric if (!Found) 14350b57cec5SDimitry Andric continue; 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric int PredID = BlocksStruct.TopDownIndex2Block[topoInd]; 14380b57cec5SDimitry Andric ++LiveOutRegsNumUsages[PredID][Reg]; 14390b57cec5SDimitry Andric } 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0); 14430b57cec5SDimitry Andric BlockNumPredsLeft.resize(Blocks.size()); 14440b57cec5SDimitry Andric BlockNumSuccsLeft.resize(Blocks.size()); 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 14470b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i]; 14480b57cec5SDimitry Andric BlockNumPredsLeft[i] = Block->getPreds().size(); 14490b57cec5SDimitry Andric BlockNumSuccsLeft[i] = Block->getSuccs().size(); 14500b57cec5SDimitry Andric } 14510b57cec5SDimitry Andric 14520b57cec5SDimitry Andric #ifndef NDEBUG 14530b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 14540b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i]; 14550b57cec5SDimitry Andric assert(Block->getID() == i); 14560b57cec5SDimitry Andric } 14570b57cec5SDimitry Andric #endif 14580b57cec5SDimitry Andric 14590b57cec5SDimitry Andric std::set<unsigned> InRegs = DAG->getInRegs(); 14600b57cec5SDimitry Andric addLiveRegs(InRegs); 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric // Increase LiveOutRegsNumUsages for blocks 14630b57cec5SDimitry Andric // producing registers consumed in another 14640b57cec5SDimitry Andric // scheduling region. 14650b57cec5SDimitry Andric for (unsigned Reg : DAG->getOutRegs()) { 14660b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 14670b57cec5SDimitry Andric // Do reverse traversal 14680b57cec5SDimitry Andric int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i]; 14690b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[ID]; 14700b57cec5SDimitry Andric const std::set<unsigned> &OutRegs = Block->getOutRegs(); 14710b57cec5SDimitry Andric 14720b57cec5SDimitry Andric if (OutRegs.find(Reg) == OutRegs.end()) 14730b57cec5SDimitry Andric continue; 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric ++LiveOutRegsNumUsages[ID][Reg]; 14760b57cec5SDimitry Andric break; 14770b57cec5SDimitry Andric } 14780b57cec5SDimitry Andric } 14790b57cec5SDimitry Andric 14800b57cec5SDimitry Andric // Fill LiveRegsConsumers for regs that were already 14810b57cec5SDimitry Andric // defined before scheduling. 14820eae32dcSDimitry Andric for (SIScheduleBlock *Block : Blocks) { 14830b57cec5SDimitry Andric for (unsigned Reg : Block->getInRegs()) { 14840b57cec5SDimitry Andric bool Found = false; 14850b57cec5SDimitry Andric for (SIScheduleBlock* Pred: Block->getPreds()) { 14860b57cec5SDimitry Andric std::set<unsigned> PredOutRegs = Pred->getOutRegs(); 14870b57cec5SDimitry Andric std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg); 14880b57cec5SDimitry Andric 14890b57cec5SDimitry Andric if (RegPos != PredOutRegs.end()) { 14900b57cec5SDimitry Andric Found = true; 14910b57cec5SDimitry Andric break; 14920b57cec5SDimitry Andric } 14930b57cec5SDimitry Andric } 14940b57cec5SDimitry Andric 14950b57cec5SDimitry Andric if (!Found) 14960b57cec5SDimitry Andric ++LiveRegsConsumers[Reg]; 14970b57cec5SDimitry Andric } 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 15010b57cec5SDimitry Andric SIScheduleBlock *Block = Blocks[i]; 15020b57cec5SDimitry Andric if (BlockNumPredsLeft[i] == 0) { 15030b57cec5SDimitry Andric ReadyBlocks.push_back(Block); 15040b57cec5SDimitry Andric } 15050b57cec5SDimitry Andric } 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric while (SIScheduleBlock *Block = pickBlock()) { 15080b57cec5SDimitry Andric BlocksScheduled.push_back(Block); 15090b57cec5SDimitry Andric blockScheduled(Block); 15100b57cec5SDimitry Andric } 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Block 15130b57cec5SDimitry Andric : BlocksScheduled) { 15140b57cec5SDimitry Andric dbgs() << ' ' << Block->getID(); 15150b57cec5SDimitry Andric } dbgs() << '\n';); 15160b57cec5SDimitry Andric } 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand, 15190b57cec5SDimitry Andric SIBlockSchedCandidate &TryCand) { 15200b57cec5SDimitry Andric if (!Cand.isValid()) { 15210b57cec5SDimitry Andric TryCand.Reason = NodeOrder; 15220b57cec5SDimitry Andric return true; 15230b57cec5SDimitry Andric } 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric // Try to hide high latencies. 15260b57cec5SDimitry Andric if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled, 15270b57cec5SDimitry Andric Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency)) 15280b57cec5SDimitry Andric return true; 15290b57cec5SDimitry Andric // Schedule high latencies early so you can hide them better. 15300b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency, 15310b57cec5SDimitry Andric TryCand, Cand, Latency)) 15320b57cec5SDimitry Andric return true; 15330b57cec5SDimitry Andric if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height, 15340b57cec5SDimitry Andric TryCand, Cand, Depth)) 15350b57cec5SDimitry Andric return true; 15360b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.NumHighLatencySuccessors, 15370b57cec5SDimitry Andric Cand.NumHighLatencySuccessors, 15380b57cec5SDimitry Andric TryCand, Cand, Successor)) 15390b57cec5SDimitry Andric return true; 15400b57cec5SDimitry Andric return false; 15410b57cec5SDimitry Andric } 15420b57cec5SDimitry Andric 15430b57cec5SDimitry Andric bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand, 15440b57cec5SDimitry Andric SIBlockSchedCandidate &TryCand) { 15450b57cec5SDimitry Andric if (!Cand.isValid()) { 15460b57cec5SDimitry Andric TryCand.Reason = NodeOrder; 15470b57cec5SDimitry Andric return true; 15480b57cec5SDimitry Andric } 15490b57cec5SDimitry Andric 15500b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0, 15510b57cec5SDimitry Andric TryCand, Cand, RegUsage)) 15520b57cec5SDimitry Andric return true; 15530b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.NumSuccessors > 0, 15540b57cec5SDimitry Andric Cand.NumSuccessors > 0, 15550b57cec5SDimitry Andric TryCand, Cand, Successor)) 15560b57cec5SDimitry Andric return true; 15570b57cec5SDimitry Andric if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth)) 15580b57cec5SDimitry Andric return true; 15590b57cec5SDimitry Andric if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff, 15600b57cec5SDimitry Andric TryCand, Cand, RegUsage)) 15610b57cec5SDimitry Andric return true; 15620b57cec5SDimitry Andric return false; 15630b57cec5SDimitry Andric } 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() { 15660b57cec5SDimitry Andric SIBlockSchedCandidate Cand; 15670b57cec5SDimitry Andric std::vector<SIScheduleBlock*>::iterator Best; 15680b57cec5SDimitry Andric SIScheduleBlock *Block; 15690b57cec5SDimitry Andric if (ReadyBlocks.empty()) 15700b57cec5SDimitry Andric return nullptr; 15710b57cec5SDimitry Andric 15720b57cec5SDimitry Andric DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(), 15730b57cec5SDimitry Andric VregCurrentUsage, SregCurrentUsage); 15740b57cec5SDimitry Andric if (VregCurrentUsage > maxVregUsage) 15750b57cec5SDimitry Andric maxVregUsage = VregCurrentUsage; 15760b57cec5SDimitry Andric if (SregCurrentUsage > maxSregUsage) 15770b57cec5SDimitry Andric maxSregUsage = SregCurrentUsage; 15780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: "; 15790b57cec5SDimitry Andric for (SIScheduleBlock *Block 15800b57cec5SDimitry Andric : ReadyBlocks) dbgs() 15810b57cec5SDimitry Andric << Block->getID() << ' '; 15820b57cec5SDimitry Andric dbgs() << "\nCurrent Live:\n"; 15830b57cec5SDimitry Andric for (unsigned Reg 15840b57cec5SDimitry Andric : LiveRegs) dbgs() 15850b57cec5SDimitry Andric << printVRegOrUnit(Reg, DAG->getTRI()) << ' '; 15860b57cec5SDimitry Andric dbgs() << '\n'; 15870b57cec5SDimitry Andric dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n'; 15880b57cec5SDimitry Andric dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';); 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric Cand.Block = nullptr; 15910b57cec5SDimitry Andric for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(), 15920b57cec5SDimitry Andric E = ReadyBlocks.end(); I != E; ++I) { 15930b57cec5SDimitry Andric SIBlockSchedCandidate TryCand; 15940b57cec5SDimitry Andric TryCand.Block = *I; 15950b57cec5SDimitry Andric TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock(); 15960b57cec5SDimitry Andric TryCand.VGPRUsageDiff = 15970b57cec5SDimitry Andric checkRegUsageImpact(TryCand.Block->getInRegs(), 15985ffd83dbSDimitry Andric TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32]; 15990b57cec5SDimitry Andric TryCand.NumSuccessors = TryCand.Block->getSuccs().size(); 16000b57cec5SDimitry Andric TryCand.NumHighLatencySuccessors = 16010b57cec5SDimitry Andric TryCand.Block->getNumHighLatencySuccessors(); 16020b57cec5SDimitry Andric TryCand.LastPosHighLatParentScheduled = 16030b57cec5SDimitry Andric (unsigned int) std::max<int> (0, 16040b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[TryCand.Block->getID()] - 16050b57cec5SDimitry Andric LastPosWaitedHighLatency); 16060b57cec5SDimitry Andric TryCand.Height = TryCand.Block->Height; 16070b57cec5SDimitry Andric // Try not to increase VGPR usage too much, else we may spill. 16080b57cec5SDimitry Andric if (VregCurrentUsage > 120 || 16090b57cec5SDimitry Andric Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) { 16100b57cec5SDimitry Andric if (!tryCandidateRegUsage(Cand, TryCand) && 16110b57cec5SDimitry Andric Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage) 16120b57cec5SDimitry Andric tryCandidateLatency(Cand, TryCand); 16130b57cec5SDimitry Andric } else { 16140b57cec5SDimitry Andric if (!tryCandidateLatency(Cand, TryCand)) 16150b57cec5SDimitry Andric tryCandidateRegUsage(Cand, TryCand); 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric if (TryCand.Reason != NoCand) { 16180b57cec5SDimitry Andric Cand.setBest(TryCand); 16190b57cec5SDimitry Andric Best = I; 16200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' ' 16210b57cec5SDimitry Andric << getReasonStr(Cand.Reason) << '\n'); 16220b57cec5SDimitry Andric } 16230b57cec5SDimitry Andric } 16240b57cec5SDimitry Andric 16250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n'; 16260b57cec5SDimitry Andric dbgs() << "Is a block with high latency instruction: " 16270b57cec5SDimitry Andric << (Cand.IsHighLatency ? "yes\n" : "no\n"); 16280b57cec5SDimitry Andric dbgs() << "Position of last high latency dependency: " 16290b57cec5SDimitry Andric << Cand.LastPosHighLatParentScheduled << '\n'; 16300b57cec5SDimitry Andric dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n'; 16310b57cec5SDimitry Andric dbgs() << '\n';); 16320b57cec5SDimitry Andric 16330b57cec5SDimitry Andric Block = Cand.Block; 16340b57cec5SDimitry Andric ReadyBlocks.erase(Best); 16350b57cec5SDimitry Andric return Block; 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andric // Tracking of currently alive registers to determine VGPR Usage. 16390b57cec5SDimitry Andric 16400b57cec5SDimitry Andric void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) { 1641e8d8bef9SDimitry Andric for (Register Reg : Regs) { 16420b57cec5SDimitry Andric // For now only track virtual registers. 1643e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 16440b57cec5SDimitry Andric continue; 16450b57cec5SDimitry Andric // If not already in the live set, then add it. 16460b57cec5SDimitry Andric (void) LiveRegs.insert(Reg); 16470b57cec5SDimitry Andric } 16480b57cec5SDimitry Andric } 16490b57cec5SDimitry Andric 16500b57cec5SDimitry Andric void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block, 16510b57cec5SDimitry Andric std::set<unsigned> &Regs) { 16520b57cec5SDimitry Andric for (unsigned Reg : Regs) { 16530b57cec5SDimitry Andric // For now only track virtual registers. 16540b57cec5SDimitry Andric std::set<unsigned>::iterator Pos = LiveRegs.find(Reg); 16550b57cec5SDimitry Andric assert (Pos != LiveRegs.end() && // Reg must be live. 16560b57cec5SDimitry Andric LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && 16570b57cec5SDimitry Andric LiveRegsConsumers[Reg] >= 1); 16580b57cec5SDimitry Andric --LiveRegsConsumers[Reg]; 16590b57cec5SDimitry Andric if (LiveRegsConsumers[Reg] == 0) 16600b57cec5SDimitry Andric LiveRegs.erase(Pos); 16610b57cec5SDimitry Andric } 16620b57cec5SDimitry Andric } 16630b57cec5SDimitry Andric 16640b57cec5SDimitry Andric void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) { 16650b57cec5SDimitry Andric for (const auto &Block : Parent->getSuccs()) { 16660b57cec5SDimitry Andric if (--BlockNumPredsLeft[Block.first->getID()] == 0) 16670b57cec5SDimitry Andric ReadyBlocks.push_back(Block.first); 16680b57cec5SDimitry Andric 16690b57cec5SDimitry Andric if (Parent->isHighLatencyBlock() && 16700b57cec5SDimitry Andric Block.second == SIScheduleBlockLinkKind::Data) 16710b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled; 16720b57cec5SDimitry Andric } 16730b57cec5SDimitry Andric } 16740b57cec5SDimitry Andric 16750b57cec5SDimitry Andric void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) { 16760b57cec5SDimitry Andric decreaseLiveRegs(Block, Block->getInRegs()); 16770b57cec5SDimitry Andric addLiveRegs(Block->getOutRegs()); 16780b57cec5SDimitry Andric releaseBlockSuccs(Block); 16790eae32dcSDimitry Andric for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) { 16800b57cec5SDimitry Andric // We produce this register, thus it must not be previously alive. 16810b57cec5SDimitry Andric assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || 16820b57cec5SDimitry Andric LiveRegsConsumers[RegP.first] == 0); 16830b57cec5SDimitry Andric LiveRegsConsumers[RegP.first] += RegP.second; 16840b57cec5SDimitry Andric } 16850b57cec5SDimitry Andric if (LastPosHighLatencyParentScheduled[Block->getID()] > 16860b57cec5SDimitry Andric (unsigned)LastPosWaitedHighLatency) 16870b57cec5SDimitry Andric LastPosWaitedHighLatency = 16880b57cec5SDimitry Andric LastPosHighLatencyParentScheduled[Block->getID()]; 16890b57cec5SDimitry Andric ++NumBlockScheduled; 16900b57cec5SDimitry Andric } 16910b57cec5SDimitry Andric 16920b57cec5SDimitry Andric std::vector<int> 16930b57cec5SDimitry Andric SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs, 16940b57cec5SDimitry Andric std::set<unsigned> &OutRegs) { 16950b57cec5SDimitry Andric std::vector<int> DiffSetPressure; 16960b57cec5SDimitry Andric DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0); 16970b57cec5SDimitry Andric 1698e8d8bef9SDimitry Andric for (Register Reg : InRegs) { 16990b57cec5SDimitry Andric // For now only track virtual registers. 1700e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 17010b57cec5SDimitry Andric continue; 17020b57cec5SDimitry Andric if (LiveRegsConsumers[Reg] > 1) 17030b57cec5SDimitry Andric continue; 17040b57cec5SDimitry Andric PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); 17050b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) { 17060b57cec5SDimitry Andric DiffSetPressure[*PSetI] -= PSetI.getWeight(); 17070b57cec5SDimitry Andric } 17080b57cec5SDimitry Andric } 17090b57cec5SDimitry Andric 1710e8d8bef9SDimitry Andric for (Register Reg : OutRegs) { 17110b57cec5SDimitry Andric // For now only track virtual registers. 1712e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 17130b57cec5SDimitry Andric continue; 17140b57cec5SDimitry Andric PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg); 17150b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) { 17160b57cec5SDimitry Andric DiffSetPressure[*PSetI] += PSetI.getWeight(); 17170b57cec5SDimitry Andric } 17180b57cec5SDimitry Andric } 17190b57cec5SDimitry Andric 17200b57cec5SDimitry Andric return DiffSetPressure; 17210b57cec5SDimitry Andric } 17220b57cec5SDimitry Andric 17230b57cec5SDimitry Andric // SIScheduler // 17240b57cec5SDimitry Andric 17250b57cec5SDimitry Andric struct SIScheduleBlockResult 17260b57cec5SDimitry Andric SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant, 17270b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant ScheduleVariant) { 17280b57cec5SDimitry Andric SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant); 17290b57cec5SDimitry Andric SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks); 17300b57cec5SDimitry Andric std::vector<SIScheduleBlock*> ScheduledBlocks; 17310b57cec5SDimitry Andric struct SIScheduleBlockResult Res; 17320b57cec5SDimitry Andric 17330b57cec5SDimitry Andric ScheduledBlocks = Scheduler.getBlocks(); 17340b57cec5SDimitry Andric 17350eae32dcSDimitry Andric for (SIScheduleBlock *Block : ScheduledBlocks) { 17360b57cec5SDimitry Andric std::vector<SUnit*> SUs = Block->getScheduledUnits(); 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric for (SUnit* SU : SUs) 17390b57cec5SDimitry Andric Res.SUs.push_back(SU->NodeNum); 17400b57cec5SDimitry Andric } 17410b57cec5SDimitry Andric 17420b57cec5SDimitry Andric Res.MaxSGPRUsage = Scheduler.getSGPRUsage(); 17430b57cec5SDimitry Andric Res.MaxVGPRUsage = Scheduler.getVGPRUsage(); 17440b57cec5SDimitry Andric return Res; 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric 17470b57cec5SDimitry Andric // SIScheduleDAGMI // 17480b57cec5SDimitry Andric 17490b57cec5SDimitry Andric SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) : 17508bcb0991SDimitry Andric ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) { 17510b57cec5SDimitry Andric SITII = static_cast<const SIInstrInfo*>(TII); 17520b57cec5SDimitry Andric SITRI = static_cast<const SIRegisterInfo*>(TRI); 17530b57cec5SDimitry Andric } 17540b57cec5SDimitry Andric 17550b57cec5SDimitry Andric SIScheduleDAGMI::~SIScheduleDAGMI() = default; 17560b57cec5SDimitry Andric 17570b57cec5SDimitry Andric // Code adapted from scheduleDAG.cpp 17580b57cec5SDimitry Andric // Does a topological sort over the SUs. 17590b57cec5SDimitry Andric // Both TopDown and BottomUp 17600b57cec5SDimitry Andric void SIScheduleDAGMI::topologicalSort() { 17610b57cec5SDimitry Andric Topo.InitDAGTopologicalSorting(); 17620b57cec5SDimitry Andric 17630b57cec5SDimitry Andric TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end()); 17640b57cec5SDimitry Andric BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend()); 17650b57cec5SDimitry Andric } 17660b57cec5SDimitry Andric 17670b57cec5SDimitry Andric // Move low latencies further from their user without 17680b57cec5SDimitry Andric // increasing SGPR usage (in general) 17690b57cec5SDimitry Andric // This is to be replaced by a better pass that would 17700b57cec5SDimitry Andric // take into account SGPR usage (based on VGPR Usage 17710b57cec5SDimitry Andric // and the corresponding wavefront count), that would 17720b57cec5SDimitry Andric // try to merge groups of loads if it make sense, etc 17730b57cec5SDimitry Andric void SIScheduleDAGMI::moveLowLatencies() { 17740b57cec5SDimitry Andric unsigned DAGSize = SUnits.size(); 17750b57cec5SDimitry Andric int LastLowLatencyUser = -1; 17760b57cec5SDimitry Andric int LastLowLatencyPos = -1; 17770b57cec5SDimitry Andric 17780b57cec5SDimitry Andric for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) { 17790b57cec5SDimitry Andric SUnit *SU = &SUnits[ScheduledSUnits[i]]; 17800b57cec5SDimitry Andric bool IsLowLatencyUser = false; 17810b57cec5SDimitry Andric unsigned MinPos = 0; 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric for (SDep& PredDep : SU->Preds) { 17840b57cec5SDimitry Andric SUnit *Pred = PredDep.getSUnit(); 17850b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*Pred->getInstr())) { 17860b57cec5SDimitry Andric IsLowLatencyUser = true; 17870b57cec5SDimitry Andric } 17880b57cec5SDimitry Andric if (Pred->NodeNum >= DAGSize) 17890b57cec5SDimitry Andric continue; 17900b57cec5SDimitry Andric unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum]; 17910b57cec5SDimitry Andric if (PredPos >= MinPos) 17920b57cec5SDimitry Andric MinPos = PredPos + 1; 17930b57cec5SDimitry Andric } 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*SU->getInstr())) { 17960b57cec5SDimitry Andric unsigned BestPos = LastLowLatencyUser + 1; 17970b57cec5SDimitry Andric if ((int)BestPos <= LastLowLatencyPos) 17980b57cec5SDimitry Andric BestPos = LastLowLatencyPos + 1; 17990b57cec5SDimitry Andric if (BestPos < MinPos) 18000b57cec5SDimitry Andric BestPos = MinPos; 18010b57cec5SDimitry Andric if (BestPos < i) { 18020b57cec5SDimitry Andric for (unsigned u = i; u > BestPos; --u) { 18030b57cec5SDimitry Andric ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; 18040b57cec5SDimitry Andric ScheduledSUnits[u] = ScheduledSUnits[u-1]; 18050b57cec5SDimitry Andric } 18060b57cec5SDimitry Andric ScheduledSUnits[BestPos] = SU->NodeNum; 18070b57cec5SDimitry Andric ScheduledSUnitsInv[SU->NodeNum] = BestPos; 18080b57cec5SDimitry Andric } 18090b57cec5SDimitry Andric LastLowLatencyPos = BestPos; 18100b57cec5SDimitry Andric if (IsLowLatencyUser) 18110b57cec5SDimitry Andric LastLowLatencyUser = BestPos; 18120b57cec5SDimitry Andric } else if (IsLowLatencyUser) { 18130b57cec5SDimitry Andric LastLowLatencyUser = i; 18140b57cec5SDimitry Andric // Moves COPY instructions on which depends 18150b57cec5SDimitry Andric // the low latency instructions too. 18160b57cec5SDimitry Andric } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) { 18170b57cec5SDimitry Andric bool CopyForLowLat = false; 18180b57cec5SDimitry Andric for (SDep& SuccDep : SU->Succs) { 18190b57cec5SDimitry Andric SUnit *Succ = SuccDep.getSUnit(); 18200b57cec5SDimitry Andric if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize) 18210b57cec5SDimitry Andric continue; 18220b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*Succ->getInstr())) { 18230b57cec5SDimitry Andric CopyForLowLat = true; 18240b57cec5SDimitry Andric } 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric if (!CopyForLowLat) 18270b57cec5SDimitry Andric continue; 18280b57cec5SDimitry Andric if (MinPos < i) { 18290b57cec5SDimitry Andric for (unsigned u = i; u > MinPos; --u) { 18300b57cec5SDimitry Andric ++ScheduledSUnitsInv[ScheduledSUnits[u-1]]; 18310b57cec5SDimitry Andric ScheduledSUnits[u] = ScheduledSUnits[u-1]; 18320b57cec5SDimitry Andric } 18330b57cec5SDimitry Andric ScheduledSUnits[MinPos] = SU->NodeNum; 18340b57cec5SDimitry Andric ScheduledSUnitsInv[SU->NodeNum] = MinPos; 18350b57cec5SDimitry Andric } 18360b57cec5SDimitry Andric } 18370b57cec5SDimitry Andric } 18380b57cec5SDimitry Andric } 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric void SIScheduleDAGMI::restoreSULinksLeft() { 18410b57cec5SDimitry Andric for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 18420b57cec5SDimitry Andric SUnits[i].isScheduled = false; 18430b57cec5SDimitry Andric SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft; 18440b57cec5SDimitry Andric SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft; 18450b57cec5SDimitry Andric SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft; 18460b57cec5SDimitry Andric SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft; 18470b57cec5SDimitry Andric } 18480b57cec5SDimitry Andric } 18490b57cec5SDimitry Andric 18500b57cec5SDimitry Andric // Return the Vgpr and Sgpr usage corresponding to some virtual registers. 18510b57cec5SDimitry Andric template<typename _Iterator> void 18520b57cec5SDimitry Andric SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End, 18530b57cec5SDimitry Andric unsigned &VgprUsage, unsigned &SgprUsage) { 18540b57cec5SDimitry Andric VgprUsage = 0; 18550b57cec5SDimitry Andric SgprUsage = 0; 18560b57cec5SDimitry Andric for (_Iterator RegI = First; RegI != End; ++RegI) { 1857e8d8bef9SDimitry Andric Register Reg = *RegI; 18580b57cec5SDimitry Andric // For now only track virtual registers 1859e8d8bef9SDimitry Andric if (!Reg.isVirtual()) 18600b57cec5SDimitry Andric continue; 18610b57cec5SDimitry Andric PSetIterator PSetI = MRI.getPressureSets(Reg); 18620b57cec5SDimitry Andric for (; PSetI.isValid(); ++PSetI) { 18635ffd83dbSDimitry Andric if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32) 18640b57cec5SDimitry Andric VgprUsage += PSetI.getWeight(); 18655ffd83dbSDimitry Andric else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32) 18660b57cec5SDimitry Andric SgprUsage += PSetI.getWeight(); 18670b57cec5SDimitry Andric } 18680b57cec5SDimitry Andric } 18690b57cec5SDimitry Andric } 18700b57cec5SDimitry Andric 18710b57cec5SDimitry Andric void SIScheduleDAGMI::schedule() 18720b57cec5SDimitry Andric { 18730b57cec5SDimitry Andric SmallVector<SUnit*, 8> TopRoots, BotRoots; 18740b57cec5SDimitry Andric SIScheduleBlockResult Best, Temp; 18750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Preparing Scheduling\n"); 18760b57cec5SDimitry Andric 18770b57cec5SDimitry Andric buildDAGWithRegPressure(); 187806c3fb27SDimitry Andric postProcessDAG(); 1879753f127fSDimitry Andric 18800b57cec5SDimitry Andric LLVM_DEBUG(dump()); 1881753f127fSDimitry Andric if (PrintDAGs) 1882753f127fSDimitry Andric dump(); 1883753f127fSDimitry Andric if (ViewMISchedDAGs) 1884753f127fSDimitry Andric viewGraph(); 18850b57cec5SDimitry Andric 18860b57cec5SDimitry Andric topologicalSort(); 18870b57cec5SDimitry Andric findRootsAndBiasEdges(TopRoots, BotRoots); 18880b57cec5SDimitry Andric // We reuse several ScheduleDAGMI and ScheduleDAGMILive 18890b57cec5SDimitry Andric // functions, but to make them happy we must initialize 18900b57cec5SDimitry Andric // the default Scheduler implementation (even if we do not 18910b57cec5SDimitry Andric // run it) 18920b57cec5SDimitry Andric SchedImpl->initialize(this); 18930b57cec5SDimitry Andric initQueues(TopRoots, BotRoots); 18940b57cec5SDimitry Andric 18950b57cec5SDimitry Andric // Fill some stats to help scheduling. 18960b57cec5SDimitry Andric 18970b57cec5SDimitry Andric SUnitsLinksBackup = SUnits; 18980b57cec5SDimitry Andric IsLowLatencySU.clear(); 18990b57cec5SDimitry Andric LowLatencyOffset.clear(); 19000b57cec5SDimitry Andric IsHighLatencySU.clear(); 19010b57cec5SDimitry Andric 19020b57cec5SDimitry Andric IsLowLatencySU.resize(SUnits.size(), 0); 19030b57cec5SDimitry Andric LowLatencyOffset.resize(SUnits.size(), 0); 19040b57cec5SDimitry Andric IsHighLatencySU.resize(SUnits.size(), 0); 19050b57cec5SDimitry Andric 19060b57cec5SDimitry Andric for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) { 19070b57cec5SDimitry Andric SUnit *SU = &SUnits[i]; 19080b57cec5SDimitry Andric const MachineOperand *BaseLatOp; 19090b57cec5SDimitry Andric int64_t OffLatReg; 19100b57cec5SDimitry Andric if (SITII->isLowLatencyInstruction(*SU->getInstr())) { 19110b57cec5SDimitry Andric IsLowLatencySU[i] = 1; 19125ffd83dbSDimitry Andric bool OffsetIsScalable; 19130b57cec5SDimitry Andric if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg, 19145ffd83dbSDimitry Andric OffsetIsScalable, TRI)) 19150b57cec5SDimitry Andric LowLatencyOffset[i] = OffLatReg; 19165ffd83dbSDimitry Andric } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode())) 19170b57cec5SDimitry Andric IsHighLatencySU[i] = 1; 19180b57cec5SDimitry Andric } 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric SIScheduler Scheduler(this); 19210b57cec5SDimitry Andric Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone, 19220b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage); 19230b57cec5SDimitry Andric 19240b57cec5SDimitry Andric // if VGPR usage is extremely high, try other good performing variants 19250b57cec5SDimitry Andric // which could lead to lower VGPR usage 19260b57cec5SDimitry Andric if (Best.MaxVGPRUsage > 180) { 19270b57cec5SDimitry Andric static const std::pair<SISchedulerBlockCreatorVariant, 19280b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant> 19290b57cec5SDimitry Andric Variants[] = { 19300b57cec5SDimitry Andric { LatenciesAlone, BlockRegUsageLatency }, 19310b57cec5SDimitry Andric // { LatenciesAlone, BlockRegUsage }, 19320b57cec5SDimitry Andric { LatenciesGrouped, BlockLatencyRegUsage }, 19330b57cec5SDimitry Andric // { LatenciesGrouped, BlockRegUsageLatency }, 19340b57cec5SDimitry Andric // { LatenciesGrouped, BlockRegUsage }, 19350b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage }, 19360b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockRegUsageLatency }, 19370b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockRegUsage } 19380b57cec5SDimitry Andric }; 19390b57cec5SDimitry Andric for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) { 19400b57cec5SDimitry Andric Temp = Scheduler.scheduleVariant(v.first, v.second); 19410b57cec5SDimitry Andric if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage) 19420b57cec5SDimitry Andric Best = Temp; 19430b57cec5SDimitry Andric } 19440b57cec5SDimitry Andric } 19450b57cec5SDimitry Andric // if VGPR usage is still extremely high, we may spill. Try other variants 19460b57cec5SDimitry Andric // which are less performing, but that could lead to lower VGPR usage. 19470b57cec5SDimitry Andric if (Best.MaxVGPRUsage > 200) { 19480b57cec5SDimitry Andric static const std::pair<SISchedulerBlockCreatorVariant, 19490b57cec5SDimitry Andric SISchedulerBlockSchedulerVariant> 19500b57cec5SDimitry Andric Variants[] = { 19510b57cec5SDimitry Andric // { LatenciesAlone, BlockRegUsageLatency }, 19520b57cec5SDimitry Andric { LatenciesAlone, BlockRegUsage }, 19530b57cec5SDimitry Andric // { LatenciesGrouped, BlockLatencyRegUsage }, 19540b57cec5SDimitry Andric { LatenciesGrouped, BlockRegUsageLatency }, 19550b57cec5SDimitry Andric { LatenciesGrouped, BlockRegUsage }, 19560b57cec5SDimitry Andric // { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage }, 19570b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockRegUsageLatency }, 19580b57cec5SDimitry Andric { LatenciesAlonePlusConsecutive, BlockRegUsage } 19590b57cec5SDimitry Andric }; 19600b57cec5SDimitry Andric for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) { 19610b57cec5SDimitry Andric Temp = Scheduler.scheduleVariant(v.first, v.second); 19620b57cec5SDimitry Andric if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage) 19630b57cec5SDimitry Andric Best = Temp; 19640b57cec5SDimitry Andric } 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric ScheduledSUnits = Best.SUs; 19680b57cec5SDimitry Andric ScheduledSUnitsInv.resize(SUnits.size()); 19690b57cec5SDimitry Andric 19700b57cec5SDimitry Andric for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) { 19710b57cec5SDimitry Andric ScheduledSUnitsInv[ScheduledSUnits[i]] = i; 19720b57cec5SDimitry Andric } 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric moveLowLatencies(); 19750b57cec5SDimitry Andric 19760b57cec5SDimitry Andric // Tell the outside world about the result of the scheduling. 19770b57cec5SDimitry Andric 19780b57cec5SDimitry Andric assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 19790b57cec5SDimitry Andric TopRPTracker.setPos(CurrentTop); 19800b57cec5SDimitry Andric 19810eae32dcSDimitry Andric for (unsigned I : ScheduledSUnits) { 19820eae32dcSDimitry Andric SUnit *SU = &SUnits[I]; 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric scheduleMI(SU, true); 19850b57cec5SDimitry Andric 19860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 19870b57cec5SDimitry Andric << *SU->getInstr()); 19880b57cec5SDimitry Andric } 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric placeDebugValues(); 19930b57cec5SDimitry Andric 19940b57cec5SDimitry Andric LLVM_DEBUG({ 19950b57cec5SDimitry Andric dbgs() << "*** Final schedule for " 19960b57cec5SDimitry Andric << printMBBReference(*begin()->getParent()) << " ***\n"; 19970b57cec5SDimitry Andric dumpSchedule(); 19980b57cec5SDimitry Andric dbgs() << '\n'; 19990b57cec5SDimitry Andric }); 20000b57cec5SDimitry Andric } 2001