1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 ///
13 /// This pass will apply multiple scheduling stages to the same function.
14 /// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15 /// entry point for the scheduling of those regions is
16 /// GCNScheduleDAGMILive::runSchedStages.
17
18 /// Generally, the reason for having multiple scheduling stages is to account
19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a
20 /// few scheduling regions will have register pressure high enough to limit
21 /// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22 /// other regions.
23 ///
24 //===----------------------------------------------------------------------===//
25
26 #include "GCNSchedStrategy.h"
27 #include "AMDGPUIGroupLP.h"
28 #include "SIMachineFunctionInfo.h"
29 #include "llvm/CodeGen/RegisterClassInfo.h"
30
31 #define DEBUG_TYPE "machine-scheduler"
32
33 using namespace llvm;
34
35 static cl::opt<bool>
36 DisableUnclusterHighRP("amdgpu-disable-unclustred-high-rp-reschedule",
37 cl::Hidden,
38 cl::desc("Disable unclustred high register pressure "
39 "reduction scheduling stage."),
40 cl::init(false));
41 static cl::opt<unsigned> ScheduleMetricBias(
42 "amdgpu-schedule-metric-bias", cl::Hidden,
43 cl::desc(
44 "Sets the bias which adds weight to occupancy vs latency. Set it to "
45 "100 to chase the occupancy only."),
46 cl::init(10));
47
48 const unsigned ScheduleMetrics::ScaleFactor = 100;
49
GCNSchedStrategy(const MachineSchedContext * C)50 GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
51 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
52 HasHighPressure(false) {}
53
initialize(ScheduleDAGMI * DAG)54 void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
55 GenericScheduler::initialize(DAG);
56
57 MF = &DAG->MF;
58
59 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
60
61 SGPRExcessLimit =
62 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
63 VGPRExcessLimit =
64 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
65
66 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
67 // Set the initial TargetOccupnacy to the maximum occupancy that we can
68 // achieve for this function. This effectively sets a lower bound on the
69 // 'Critical' register limits in the scheduler.
70 TargetOccupancy = MFI.getOccupancy();
71 SGPRCriticalLimit =
72 std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
73
74 if (!KnownExcessRP) {
75 VGPRCriticalLimit =
76 std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
77 } else {
78 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
79 // returns a reasonably small number for targets with lots of VGPRs, such
80 // as GFX10 and GFX11.
81 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
82 "VGPRCriticalLimit calculation method.\n");
83
84 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
85 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
86 unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);
87 VGPRBudget = std::max(VGPRBudget, Granule);
88 VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);
89 }
90
91 // Subtract error margin and bias from register limits and avoid overflow.
92 SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);
93 VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
94 SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
95 VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
96
97 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
98 << ", VGPRExcessLimit = " << VGPRExcessLimit
99 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
100 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
101 }
102
initCandidate(SchedCandidate & Cand,SUnit * SU,bool AtTop,const RegPressureTracker & RPTracker,const SIRegisterInfo * SRI,unsigned SGPRPressure,unsigned VGPRPressure)103 void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
104 bool AtTop,
105 const RegPressureTracker &RPTracker,
106 const SIRegisterInfo *SRI,
107 unsigned SGPRPressure,
108 unsigned VGPRPressure) {
109 Cand.SU = SU;
110 Cand.AtTop = AtTop;
111
112 if (!DAG->isTrackingPressure())
113 return;
114
115 // getDownwardPressure() and getUpwardPressure() make temporary changes to
116 // the tracker, so we need to pass those function a non-const copy.
117 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
118
119 Pressure.clear();
120 MaxPressure.clear();
121
122 if (AtTop)
123 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
124 else {
125 // FIXME: I think for bottom up scheduling, the register pressure is cached
126 // and can be retrieved by DAG->getPressureDif(SU).
127 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
128 }
129
130 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
131 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
132
133 // If two instructions increase the pressure of different register sets
134 // by the same amount, the generic scheduler will prefer to schedule the
135 // instruction that increases the set with the least amount of registers,
136 // which in our case would be SGPRs. This is rarely what we want, so
137 // when we report excess/critical register pressure, we do it either
138 // only for VGPRs or only for SGPRs.
139
140 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
141 const unsigned MaxVGPRPressureInc = 16;
142 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
143 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
144
145
146 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
147 // to increase the likelihood we don't go over the limits. We should improve
148 // the analysis to look through dependencies to find the path with the least
149 // register pressure.
150
151 // We only need to update the RPDelta for instructions that increase register
152 // pressure. Instructions that decrease or keep reg pressure the same will be
153 // marked as RegExcess in tryCandidate() when they are compared with
154 // instructions that increase the register pressure.
155 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
156 HasHighPressure = true;
157 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
158 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
159 }
160
161 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
162 HasHighPressure = true;
163 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
164 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
165 }
166
167 // Register pressure is considered 'CRITICAL' if it is approaching a value
168 // that would reduce the wave occupancy for the execution unit. When
169 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
170 // has the same cost, so we don't need to prefer one over the other.
171
172 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
173 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
174
175 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
176 HasHighPressure = true;
177 if (SGPRDelta > VGPRDelta) {
178 Cand.RPDelta.CriticalMax =
179 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
181 } else {
182 Cand.RPDelta.CriticalMax =
183 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
184 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
185 }
186 }
187 }
188
189 // This function is mostly cut and pasted from
190 // GenericScheduler::pickNodeFromQueue()
pickNodeFromQueue(SchedBoundary & Zone,const CandPolicy & ZonePolicy,const RegPressureTracker & RPTracker,SchedCandidate & Cand)191 void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
192 const CandPolicy &ZonePolicy,
193 const RegPressureTracker &RPTracker,
194 SchedCandidate &Cand) {
195 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
196 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
197 unsigned SGPRPressure = 0;
198 unsigned VGPRPressure = 0;
199 if (DAG->isTrackingPressure()) {
200 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
201 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
202 }
203 ReadyQueue &Q = Zone.Available;
204 for (SUnit *SU : Q) {
205
206 SchedCandidate TryCand(ZonePolicy);
207 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
208 SGPRPressure, VGPRPressure);
209 // Pass SchedBoundary only when comparing nodes from the same boundary.
210 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
211 tryCandidate(Cand, TryCand, ZoneArg);
212 if (TryCand.Reason != NoCand) {
213 // Initialize resource delta if needed in case future heuristics query it.
214 if (TryCand.ResDelta == SchedResourceDelta())
215 TryCand.initResourceDelta(Zone.DAG, SchedModel);
216 Cand.setBest(TryCand);
217 LLVM_DEBUG(traceCandidate(Cand));
218 }
219 }
220 }
221
222 // This function is mostly cut and pasted from
223 // GenericScheduler::pickNodeBidirectional()
pickNodeBidirectional(bool & IsTopNode)224 SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
225 // Schedule as far as possible in the direction of no choice. This is most
226 // efficient, but also provides the best heuristics for CriticalPSets.
227 if (SUnit *SU = Bot.pickOnlyChoice()) {
228 IsTopNode = false;
229 return SU;
230 }
231 if (SUnit *SU = Top.pickOnlyChoice()) {
232 IsTopNode = true;
233 return SU;
234 }
235 // Set the bottom-up policy based on the state of the current bottom zone and
236 // the instructions outside the zone, including the top zone.
237 CandPolicy BotPolicy;
238 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
239 // Set the top-down policy based on the state of the current top zone and
240 // the instructions outside the zone, including the bottom zone.
241 CandPolicy TopPolicy;
242 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
243
244 // See if BotCand is still valid (because we previously scheduled from Top).
245 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
246 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
247 BotCand.Policy != BotPolicy) {
248 BotCand.reset(CandPolicy());
249 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
250 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
251 } else {
252 LLVM_DEBUG(traceCandidate(BotCand));
253 #ifndef NDEBUG
254 if (VerifyScheduling) {
255 SchedCandidate TCand;
256 TCand.reset(CandPolicy());
257 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
258 assert(TCand.SU == BotCand.SU &&
259 "Last pick result should correspond to re-picking right now");
260 }
261 #endif
262 }
263
264 // Check if the top Q has a better candidate.
265 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
266 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
267 TopCand.Policy != TopPolicy) {
268 TopCand.reset(CandPolicy());
269 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
270 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
271 } else {
272 LLVM_DEBUG(traceCandidate(TopCand));
273 #ifndef NDEBUG
274 if (VerifyScheduling) {
275 SchedCandidate TCand;
276 TCand.reset(CandPolicy());
277 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
278 assert(TCand.SU == TopCand.SU &&
279 "Last pick result should correspond to re-picking right now");
280 }
281 #endif
282 }
283
284 // Pick best from BotCand and TopCand.
285 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
286 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
287 SchedCandidate Cand = BotCand;
288 TopCand.Reason = NoCand;
289 tryCandidate(Cand, TopCand, nullptr);
290 if (TopCand.Reason != NoCand) {
291 Cand.setBest(TopCand);
292 }
293 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
294
295 IsTopNode = Cand.AtTop;
296 return Cand.SU;
297 }
298
299 // This function is mostly cut and pasted from
300 // GenericScheduler::pickNode()
pickNode(bool & IsTopNode)301 SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
302 if (DAG->top() == DAG->bottom()) {
303 assert(Top.Available.empty() && Top.Pending.empty() &&
304 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
305 return nullptr;
306 }
307 SUnit *SU;
308 do {
309 if (RegionPolicy.OnlyTopDown) {
310 SU = Top.pickOnlyChoice();
311 if (!SU) {
312 CandPolicy NoPolicy;
313 TopCand.reset(NoPolicy);
314 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
315 assert(TopCand.Reason != NoCand && "failed to find a candidate");
316 SU = TopCand.SU;
317 }
318 IsTopNode = true;
319 } else if (RegionPolicy.OnlyBottomUp) {
320 SU = Bot.pickOnlyChoice();
321 if (!SU) {
322 CandPolicy NoPolicy;
323 BotCand.reset(NoPolicy);
324 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
325 assert(BotCand.Reason != NoCand && "failed to find a candidate");
326 SU = BotCand.SU;
327 }
328 IsTopNode = false;
329 } else {
330 SU = pickNodeBidirectional(IsTopNode);
331 }
332 } while (SU->isScheduled);
333
334 if (SU->isTopReady())
335 Top.removeReady(SU);
336 if (SU->isBottomReady())
337 Bot.removeReady(SU);
338
339 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
340 << *SU->getInstr());
341 return SU;
342 }
343
getCurrentStage()344 GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
345 assert(CurrentStage && CurrentStage != SchedStages.end());
346 return *CurrentStage;
347 }
348
advanceStage()349 bool GCNSchedStrategy::advanceStage() {
350 assert(CurrentStage != SchedStages.end());
351 if (!CurrentStage)
352 CurrentStage = SchedStages.begin();
353 else
354 CurrentStage++;
355
356 return CurrentStage != SchedStages.end();
357 }
358
hasNextStage() const359 bool GCNSchedStrategy::hasNextStage() const {
360 assert(CurrentStage);
361 return std::next(CurrentStage) != SchedStages.end();
362 }
363
getNextStage() const364 GCNSchedStageID GCNSchedStrategy::getNextStage() const {
365 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
366 return *std::next(CurrentStage);
367 }
368
GCNMaxOccupancySchedStrategy(const MachineSchedContext * C)369 GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
370 const MachineSchedContext *C)
371 : GCNSchedStrategy(C) {
372 SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
373 SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
374 SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
375 SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
376 }
377
GCNMaxILPSchedStrategy(const MachineSchedContext * C)378 GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
379 : GCNSchedStrategy(C) {
380 SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);
381 }
382
tryCandidate(SchedCandidate & Cand,SchedCandidate & TryCand,SchedBoundary * Zone) const383 bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
384 SchedCandidate &TryCand,
385 SchedBoundary *Zone) const {
386 // Initialize the candidate if needed.
387 if (!Cand.isValid()) {
388 TryCand.Reason = NodeOrder;
389 return true;
390 }
391
392 // Avoid spilling by exceeding the register limit.
393 if (DAG->isTrackingPressure() &&
394 tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
395 RegExcess, TRI, DAG->MF))
396 return TryCand.Reason != NoCand;
397
398 // Bias PhysReg Defs and copies to their uses and defined respectively.
399 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
400 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
401 return TryCand.Reason != NoCand;
402
403 bool SameBoundary = Zone != nullptr;
404 if (SameBoundary) {
405 // Prioritize instructions that read unbuffered resources by stall cycles.
406 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
407 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
408 return TryCand.Reason != NoCand;
409
410 // Avoid critical resource consumption and balance the schedule.
411 TryCand.initResourceDelta(DAG, SchedModel);
412 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
413 TryCand, Cand, ResourceReduce))
414 return TryCand.Reason != NoCand;
415 if (tryGreater(TryCand.ResDelta.DemandedResources,
416 Cand.ResDelta.DemandedResources, TryCand, Cand,
417 ResourceDemand))
418 return TryCand.Reason != NoCand;
419
420 // Unconditionally try to reduce latency.
421 if (tryLatency(TryCand, Cand, *Zone))
422 return TryCand.Reason != NoCand;
423
424 // Weak edges are for clustering and other constraints.
425 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
426 getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
427 return TryCand.Reason != NoCand;
428 }
429
430 // Keep clustered nodes together to encourage downstream peephole
431 // optimizations which may reduce resource requirements.
432 //
433 // This is a best effort to set things up for a post-RA pass. Optimizations
434 // like generating loads of multiple registers should ideally be done within
435 // the scheduler pass by combining the loads during DAG postprocessing.
436 const SUnit *CandNextClusterSU =
437 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
438 const SUnit *TryCandNextClusterSU =
439 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
440 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
441 Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
442 return TryCand.Reason != NoCand;
443
444 // Avoid increasing the max critical pressure in the scheduled region.
445 if (DAG->isTrackingPressure() &&
446 tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
447 TryCand, Cand, RegCritical, TRI, DAG->MF))
448 return TryCand.Reason != NoCand;
449
450 // Avoid increasing the max pressure of the entire region.
451 if (DAG->isTrackingPressure() &&
452 tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
453 Cand, RegMax, TRI, DAG->MF))
454 return TryCand.Reason != NoCand;
455
456 if (SameBoundary) {
457 // Fall through to original instruction order.
458 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
459 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
460 TryCand.Reason = NodeOrder;
461 return true;
462 }
463 }
464 return false;
465 }
466
GCNScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)467 GCNScheduleDAGMILive::GCNScheduleDAGMILive(
468 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
469 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
470 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
471 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
472
473 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
474 }
475
476 std::unique_ptr<GCNSchedStage>
createSchedStage(GCNSchedStageID SchedStageID)477 GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
478 switch (SchedStageID) {
479 case GCNSchedStageID::OccInitialSchedule:
480 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
481 case GCNSchedStageID::UnclusteredHighRPReschedule:
482 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
483 case GCNSchedStageID::ClusteredLowOccupancyReschedule:
484 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);
485 case GCNSchedStageID::PreRARematerialize:
486 return std::make_unique<PreRARematStage>(SchedStageID, *this);
487 case GCNSchedStageID::ILPInitialSchedule:
488 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);
489 }
490
491 llvm_unreachable("Unknown SchedStageID.");
492 }
493
schedule()494 void GCNScheduleDAGMILive::schedule() {
495 // Collect all scheduling regions. The actual scheduling is performed in
496 // GCNScheduleDAGMILive::finalizeSchedule.
497 Regions.push_back(std::pair(RegionBegin, RegionEnd));
498 }
499
500 GCNRegPressure
getRealRegPressure(unsigned RegionIdx) const501 GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
502 GCNDownwardRPTracker RPTracker(*LIS);
503 RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
504 return RPTracker.moveMaxPressure();
505 }
506
computeBlockPressure(unsigned RegionIdx,const MachineBasicBlock * MBB)507 void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
508 const MachineBasicBlock *MBB) {
509 GCNDownwardRPTracker RPTracker(*LIS);
510
511 // If the block has the only successor then live-ins of that successor are
512 // live-outs of the current block. We can reuse calculated live set if the
513 // successor will be sent to scheduling past current block.
514 const MachineBasicBlock *OnlySucc = nullptr;
515 if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
516 SlotIndexes *Ind = LIS->getSlotIndexes();
517 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
518 OnlySucc = *MBB->succ_begin();
519 }
520
521 // Scheduler sends regions from the end of the block upwards.
522 size_t CurRegion = RegionIdx;
523 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
524 if (Regions[CurRegion].first->getParent() != MBB)
525 break;
526 --CurRegion;
527
528 auto I = MBB->begin();
529 auto LiveInIt = MBBLiveIns.find(MBB);
530 auto &Rgn = Regions[CurRegion];
531 auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
532 if (LiveInIt != MBBLiveIns.end()) {
533 auto LiveIn = std::move(LiveInIt->second);
534 RPTracker.reset(*MBB->begin(), &LiveIn);
535 MBBLiveIns.erase(LiveInIt);
536 } else {
537 I = Rgn.first;
538 auto LRS = BBLiveInMap.lookup(NonDbgMI);
539 #ifdef EXPENSIVE_CHECKS
540 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
541 #endif
542 RPTracker.reset(*I, &LRS);
543 }
544
545 for (;;) {
546 I = RPTracker.getNext();
547
548 if (Regions[CurRegion].first == I || NonDbgMI == I) {
549 LiveIns[CurRegion] = RPTracker.getLiveRegs();
550 RPTracker.clearMaxPressure();
551 }
552
553 if (Regions[CurRegion].second == I) {
554 Pressure[CurRegion] = RPTracker.moveMaxPressure();
555 if (CurRegion-- == RegionIdx)
556 break;
557 }
558 RPTracker.advanceToNext();
559 RPTracker.advanceBeforeNext();
560 }
561
562 if (OnlySucc) {
563 if (I != MBB->end()) {
564 RPTracker.advanceToNext();
565 RPTracker.advance(MBB->end());
566 }
567 RPTracker.advanceBeforeNext();
568 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
569 }
570 }
571
572 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getBBLiveInMap() const573 GCNScheduleDAGMILive::getBBLiveInMap() const {
574 assert(!Regions.empty());
575 std::vector<MachineInstr *> BBStarters;
576 BBStarters.reserve(Regions.size());
577 auto I = Regions.rbegin(), E = Regions.rend();
578 auto *BB = I->first->getParent();
579 do {
580 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
581 BBStarters.push_back(MI);
582 do {
583 ++I;
584 } while (I != E && I->first->getParent() == BB);
585 } while (I != E);
586 return getLiveRegMap(BBStarters, false /*After*/, *LIS);
587 }
588
finalizeSchedule()589 void GCNScheduleDAGMILive::finalizeSchedule() {
590 // Start actual scheduling here. This function is called by the base
591 // MachineScheduler after all regions have been recorded by
592 // GCNScheduleDAGMILive::schedule().
593 LiveIns.resize(Regions.size());
594 Pressure.resize(Regions.size());
595 RescheduleRegions.resize(Regions.size());
596 RegionsWithHighRP.resize(Regions.size());
597 RegionsWithExcessRP.resize(Regions.size());
598 RegionsWithMinOcc.resize(Regions.size());
599 RegionsWithIGLPInstrs.resize(Regions.size());
600 RescheduleRegions.set();
601 RegionsWithHighRP.reset();
602 RegionsWithExcessRP.reset();
603 RegionsWithMinOcc.reset();
604 RegionsWithIGLPInstrs.reset();
605
606 runSchedStages();
607 }
608
runSchedStages()609 void GCNScheduleDAGMILive::runSchedStages() {
610 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
611
612 if (!Regions.empty())
613 BBLiveInMap = getBBLiveInMap();
614
615 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
616 while (S.advanceStage()) {
617 auto Stage = createSchedStage(S.getCurrentStage());
618 if (!Stage->initGCNSchedStage())
619 continue;
620
621 for (auto Region : Regions) {
622 RegionBegin = Region.first;
623 RegionEnd = Region.second;
624 // Setup for scheduling the region and check whether it should be skipped.
625 if (!Stage->initGCNRegion()) {
626 Stage->advanceRegion();
627 exitRegion();
628 continue;
629 }
630
631 ScheduleDAGMILive::schedule();
632 Stage->finalizeGCNRegion();
633 }
634
635 Stage->finalizeGCNSchedStage();
636 }
637 }
638
639 #ifndef NDEBUG
operator <<(raw_ostream & OS,const GCNSchedStageID & StageID)640 raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
641 switch (StageID) {
642 case GCNSchedStageID::OccInitialSchedule:
643 OS << "Max Occupancy Initial Schedule";
644 break;
645 case GCNSchedStageID::UnclusteredHighRPReschedule:
646 OS << "Unclustered High Register Pressure Reschedule";
647 break;
648 case GCNSchedStageID::ClusteredLowOccupancyReschedule:
649 OS << "Clustered Low Occupancy Reschedule";
650 break;
651 case GCNSchedStageID::PreRARematerialize:
652 OS << "Pre-RA Rematerialize";
653 break;
654 case GCNSchedStageID::ILPInitialSchedule:
655 OS << "Max ILP Initial Schedule";
656 break;
657 }
658
659 return OS;
660 }
661 #endif
662
GCNSchedStage(GCNSchedStageID StageID,GCNScheduleDAGMILive & DAG)663 GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
664 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
665 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
666
initGCNSchedStage()667 bool GCNSchedStage::initGCNSchedStage() {
668 if (!DAG.LIS)
669 return false;
670
671 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
672 return true;
673 }
674
initGCNSchedStage()675 bool UnclusteredHighRPStage::initGCNSchedStage() {
676 if (DisableUnclusterHighRP)
677 return false;
678
679 if (!GCNSchedStage::initGCNSchedStage())
680 return false;
681
682 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
683 return false;
684
685 SavedMutations.swap(DAG.Mutations);
686 DAG.addMutation(createIGroupLPDAGMutation());
687
688 InitialOccupancy = DAG.MinOccupancy;
689 // Aggressivly try to reduce register pressure in the unclustered high RP
690 // stage. Temporarily increase occupancy target in the region.
691 S.SGPRLimitBias = S.HighRPSGPRBias;
692 S.VGPRLimitBias = S.HighRPVGPRBias;
693 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
694 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
695
696 LLVM_DEBUG(
697 dbgs()
698 << "Retrying function scheduling without clustering. "
699 "Aggressivly try to reduce register pressure to achieve occupancy "
700 << DAG.MinOccupancy << ".\n");
701
702 return true;
703 }
704
initGCNSchedStage()705 bool ClusteredLowOccStage::initGCNSchedStage() {
706 if (!GCNSchedStage::initGCNSchedStage())
707 return false;
708
709 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
710 // been dropped. All regions will have already been scheduled with the ideal
711 // occupancy targets.
712 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
713 return false;
714
715 LLVM_DEBUG(
716 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
717 << DAG.MinOccupancy << ".\n");
718 return true;
719 }
720
initGCNSchedStage()721 bool PreRARematStage::initGCNSchedStage() {
722 if (!GCNSchedStage::initGCNSchedStage())
723 return false;
724
725 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
726 return false;
727
728 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
729 // Check maximum occupancy
730 if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
731 DAG.MinOccupancy)
732 return false;
733
734 // FIXME: This pass will invalidate cached MBBLiveIns for regions
735 // inbetween the defs and region we sinked the def to. Cached pressure
736 // for regions where a def is sinked from will also be invalidated. Will
737 // need to be fixed if there is another pass after this pass.
738 assert(!S.hasNextStage());
739
740 collectRematerializableInstructions();
741 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
742 return false;
743
744 LLVM_DEBUG(
745 dbgs() << "Retrying function scheduling with improved occupancy of "
746 << DAG.MinOccupancy << " from rematerializing\n");
747 return true;
748 }
749
finalizeGCNSchedStage()750 void GCNSchedStage::finalizeGCNSchedStage() {
751 DAG.finishBlock();
752 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
753 }
754
finalizeGCNSchedStage()755 void UnclusteredHighRPStage::finalizeGCNSchedStage() {
756 SavedMutations.swap(DAG.Mutations);
757 S.SGPRLimitBias = S.VGPRLimitBias = 0;
758 if (DAG.MinOccupancy > InitialOccupancy) {
759 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
760 DAG.RegionsWithMinOcc[IDX] =
761 DAG.Pressure[IDX].getOccupancy(DAG.ST) == DAG.MinOccupancy;
762
763 LLVM_DEBUG(dbgs() << StageID
764 << " stage successfully increased occupancy to "
765 << DAG.MinOccupancy << '\n');
766 }
767
768 GCNSchedStage::finalizeGCNSchedStage();
769 }
770
initGCNRegion()771 bool GCNSchedStage::initGCNRegion() {
772 // Check whether this new region is also a new block.
773 if (DAG.RegionBegin->getParent() != CurrentMBB)
774 setupNewBlock();
775
776 unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());
777 DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);
778
779 // Skip empty scheduling regions (0 or 1 schedulable instructions).
780 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))
781 return false;
782
783 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
784 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
785 << " " << CurrentMBB->getName()
786 << "\n From: " << *DAG.begin() << " To: ";
787 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
788 else dbgs() << "End";
789 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
790
791 // Save original instruction order before scheduling for possible revert.
792 Unsched.clear();
793 Unsched.reserve(DAG.NumRegionInstrs);
794 if (StageID == GCNSchedStageID::OccInitialSchedule ||
795 StageID == GCNSchedStageID::ILPInitialSchedule) {
796 for (auto &I : DAG) {
797 Unsched.push_back(&I);
798 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
799 I.getOpcode() == AMDGPU::IGLP_OPT)
800 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
801 }
802 } else {
803 for (auto &I : DAG)
804 Unsched.push_back(&I);
805 }
806
807 PressureBefore = DAG.Pressure[RegionIdx];
808
809 LLVM_DEBUG(
810 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
811 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
812 << "Region live-in pressure: "
813 << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
814 << "Region register pressure: " << print(PressureBefore));
815
816 S.HasHighPressure = false;
817 S.KnownExcessRP = isRegionWithExcessRP();
818
819 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
820 StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
821 SavedMutations.clear();
822 SavedMutations.swap(DAG.Mutations);
823 DAG.addMutation(createIGroupLPDAGMutation());
824 }
825
826 return true;
827 }
828
initGCNRegion()829 bool UnclusteredHighRPStage::initGCNRegion() {
830 // Only reschedule regions with the minimum occupancy or regions that may have
831 // spilling (excess register pressure).
832 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
833 DAG.MinOccupancy <= InitialOccupancy) &&
834 !DAG.RegionsWithExcessRP[RegionIdx])
835 return false;
836
837 return GCNSchedStage::initGCNRegion();
838 }
839
initGCNRegion()840 bool ClusteredLowOccStage::initGCNRegion() {
841 // We may need to reschedule this region if it wasn't rescheduled in the last
842 // stage, or if we found it was testing critical register pressure limits in
843 // the unclustered reschedule stage. The later is because we may not have been
844 // able to raise the min occupancy in the previous stage so the region may be
845 // overly constrained even if it was already rescheduled.
846 if (!DAG.RegionsWithHighRP[RegionIdx])
847 return false;
848
849 return GCNSchedStage::initGCNRegion();
850 }
851
initGCNRegion()852 bool PreRARematStage::initGCNRegion() {
853 if (!DAG.RescheduleRegions[RegionIdx])
854 return false;
855
856 return GCNSchedStage::initGCNRegion();
857 }
858
setupNewBlock()859 void GCNSchedStage::setupNewBlock() {
860 if (CurrentMBB)
861 DAG.finishBlock();
862
863 CurrentMBB = DAG.RegionBegin->getParent();
864 DAG.startBlock(CurrentMBB);
865 // Get real RP for the region if it hasn't be calculated before. After the
866 // initial schedule stage real RP will be collected after scheduling.
867 if (StageID == GCNSchedStageID::OccInitialSchedule)
868 DAG.computeBlockPressure(RegionIdx, CurrentMBB);
869 }
870
finalizeGCNRegion()871 void GCNSchedStage::finalizeGCNRegion() {
872 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
873 DAG.RescheduleRegions[RegionIdx] = false;
874 if (S.HasHighPressure)
875 DAG.RegionsWithHighRP[RegionIdx] = true;
876
877 // Revert scheduling if we have dropped occupancy or there is some other
878 // reason that the original schedule is better.
879 checkScheduling();
880
881 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
882 StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
883 SavedMutations.swap(DAG.Mutations);
884
885 DAG.exitRegion();
886 RegionIdx++;
887 }
888
checkScheduling()889 void GCNSchedStage::checkScheduling() {
890 // Check the results of scheduling.
891 PressureAfter = DAG.getRealRegPressure(RegionIdx);
892 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
893 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
894
895 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
896 PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
897 DAG.Pressure[RegionIdx] = PressureAfter;
898 DAG.RegionsWithMinOcc[RegionIdx] =
899 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
900
901 // Early out if we have achieve the occupancy target.
902 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
903 return;
904 }
905
906 unsigned TargetOccupancy =
907 std::min(S.getTargetOccupancy(), ST.getOccupancyWithLocalMemSize(MF));
908 unsigned WavesAfter =
909 std::min(TargetOccupancy, PressureAfter.getOccupancy(ST));
910 unsigned WavesBefore =
911 std::min(TargetOccupancy, PressureBefore.getOccupancy(ST));
912 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
913 << ", after " << WavesAfter << ".\n");
914
915 // We may not be able to keep the current target occupancy because of the just
916 // scheduled region. We might still be able to revert scheduling if the
917 // occupancy before was higher, or if the current schedule has register
918 // pressure higher than the excess limits which could lead to more spilling.
919 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
920
921 // Allow memory bound functions to drop to 4 waves if not limited by an
922 // attribute.
923 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
924 WavesAfter >= MFI.getMinAllowedOccupancy()) {
925 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
926 << MFI.getMinAllowedOccupancy() << " waves\n");
927 NewOccupancy = WavesAfter;
928 }
929
930 if (NewOccupancy < DAG.MinOccupancy) {
931 DAG.MinOccupancy = NewOccupancy;
932 MFI.limitOccupancy(DAG.MinOccupancy);
933 DAG.RegionsWithMinOcc.reset();
934 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
935 << DAG.MinOccupancy << ".\n");
936 }
937
938 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
939 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
940 if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
941 PressureAfter.getAGPRNum() > MaxVGPRs ||
942 PressureAfter.getSGPRNum() > MaxSGPRs) {
943 DAG.RescheduleRegions[RegionIdx] = true;
944 DAG.RegionsWithHighRP[RegionIdx] = true;
945 DAG.RegionsWithExcessRP[RegionIdx] = true;
946 }
947
948 // Revert if this region's schedule would cause a drop in occupancy or
949 // spilling.
950 if (shouldRevertScheduling(WavesAfter)) {
951 revertScheduling();
952 } else {
953 DAG.Pressure[RegionIdx] = PressureAfter;
954 DAG.RegionsWithMinOcc[RegionIdx] =
955 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
956 }
957 }
958
959 unsigned
computeSUnitReadyCycle(const SUnit & SU,unsigned CurrCycle,DenseMap<unsigned,unsigned> & ReadyCycles,const TargetSchedModel & SM)960 GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
961 DenseMap<unsigned, unsigned> &ReadyCycles,
962 const TargetSchedModel &SM) {
963 unsigned ReadyCycle = CurrCycle;
964 for (auto &D : SU.Preds) {
965 if (D.isAssignedRegDep()) {
966 MachineInstr *DefMI = D.getSUnit()->getInstr();
967 unsigned Latency = SM.computeInstrLatency(DefMI);
968 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
969 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
970 }
971 }
972 ReadyCycles[SU.NodeNum] = ReadyCycle;
973 return ReadyCycle;
974 }
975
976 #ifndef NDEBUG
977 struct EarlierIssuingCycle {
operator ()EarlierIssuingCycle978 bool operator()(std::pair<MachineInstr *, unsigned> A,
979 std::pair<MachineInstr *, unsigned> B) const {
980 return A.second < B.second;
981 }
982 };
983
printScheduleModel(std::set<std::pair<MachineInstr *,unsigned>,EarlierIssuingCycle> & ReadyCycles)984 static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
985 EarlierIssuingCycle> &ReadyCycles) {
986 if (ReadyCycles.empty())
987 return;
988 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
989 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
990 << " ##################\n# Cycle #\t\t\tInstruction "
991 " "
992 " \n";
993 unsigned IPrev = 1;
994 for (auto &I : ReadyCycles) {
995 if (I.second > IPrev + 1)
996 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
997 << " CYCLES DETECTED ******************************\n\n";
998 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
999 IPrev = I.second;
1000 }
1001 }
1002 #endif
1003
1004 ScheduleMetrics
getScheduleMetrics(const std::vector<SUnit> & InputSchedule)1005 GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1006 #ifndef NDEBUG
1007 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1008 ReadyCyclesSorted;
1009 #endif
1010 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1011 unsigned SumBubbles = 0;
1012 DenseMap<unsigned, unsigned> ReadyCycles;
1013 unsigned CurrCycle = 0;
1014 for (auto &SU : InputSchedule) {
1015 unsigned ReadyCycle =
1016 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1017 SumBubbles += ReadyCycle - CurrCycle;
1018 #ifndef NDEBUG
1019 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1020 #endif
1021 CurrCycle = ++ReadyCycle;
1022 }
1023 #ifndef NDEBUG
1024 LLVM_DEBUG(
1025 printScheduleModel(ReadyCyclesSorted);
1026 dbgs() << "\n\t"
1027 << "Metric: "
1028 << (SumBubbles
1029 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1030 : 1)
1031 << "\n\n");
1032 #endif
1033
1034 return ScheduleMetrics(CurrCycle, SumBubbles);
1035 }
1036
1037 ScheduleMetrics
getScheduleMetrics(const GCNScheduleDAGMILive & DAG)1038 GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1039 #ifndef NDEBUG
1040 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1041 ReadyCyclesSorted;
1042 #endif
1043 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1044 unsigned SumBubbles = 0;
1045 DenseMap<unsigned, unsigned> ReadyCycles;
1046 unsigned CurrCycle = 0;
1047 for (auto &MI : DAG) {
1048 SUnit *SU = DAG.getSUnit(&MI);
1049 if (!SU)
1050 continue;
1051 unsigned ReadyCycle =
1052 computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);
1053 SumBubbles += ReadyCycle - CurrCycle;
1054 #ifndef NDEBUG
1055 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1056 #endif
1057 CurrCycle = ++ReadyCycle;
1058 }
1059 #ifndef NDEBUG
1060 LLVM_DEBUG(
1061 printScheduleModel(ReadyCyclesSorted);
1062 dbgs() << "\n\t"
1063 << "Metric: "
1064 << (SumBubbles
1065 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1066 : 1)
1067 << "\n\n");
1068 #endif
1069
1070 return ScheduleMetrics(CurrCycle, SumBubbles);
1071 }
1072
shouldRevertScheduling(unsigned WavesAfter)1073 bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1074 if (WavesAfter < DAG.MinOccupancy)
1075 return true;
1076
1077 return false;
1078 }
1079
shouldRevertScheduling(unsigned WavesAfter)1080 bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1081 if (PressureAfter == PressureBefore)
1082 return false;
1083
1084 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1085 return true;
1086
1087 if (mayCauseSpilling(WavesAfter))
1088 return true;
1089
1090 return false;
1091 }
1092
shouldRevertScheduling(unsigned WavesAfter)1093 bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
1094 // If RP is not reduced in the unclustred reschedule stage, revert to the
1095 // old schedule.
1096 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1097 mayCauseSpilling(WavesAfter)) ||
1098 GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1099 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1100 return true;
1101 }
1102
1103 LLVM_DEBUG(
1104 dbgs()
1105 << "\n\t *** In shouldRevertScheduling ***\n"
1106 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1107 ScheduleMetrics MBefore =
1108 getScheduleMetrics(DAG.SUnits);
1109 LLVM_DEBUG(
1110 dbgs()
1111 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1112 ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1113 unsigned OldMetric = MBefore.getMetric();
1114 unsigned NewMetric = MAfter.getMetric();
1115 unsigned WavesBefore =
1116 std::min(S.getTargetOccupancy(), PressureBefore.getOccupancy(ST));
1117 unsigned Profit =
1118 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1119 ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1120 NewMetric) /
1121 ScheduleMetrics::ScaleFactor;
1122 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1123 << MAfter << "Profit: " << Profit << "\n");
1124 return Profit < ScheduleMetrics::ScaleFactor;
1125 }
1126
shouldRevertScheduling(unsigned WavesAfter)1127 bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1128 if (PressureAfter == PressureBefore)
1129 return false;
1130
1131 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1132 return true;
1133
1134 if (mayCauseSpilling(WavesAfter))
1135 return true;
1136
1137 return false;
1138 }
1139
shouldRevertScheduling(unsigned WavesAfter)1140 bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1141 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1142 return true;
1143
1144 if (mayCauseSpilling(WavesAfter))
1145 return true;
1146
1147 return false;
1148 }
1149
shouldRevertScheduling(unsigned WavesAfter)1150 bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1151 if (mayCauseSpilling(WavesAfter))
1152 return true;
1153
1154 return false;
1155 }
1156
mayCauseSpilling(unsigned WavesAfter)1157 bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1158 if (WavesAfter <= MFI.getMinWavesPerEU() &&
1159 !PressureAfter.less(ST, PressureBefore) &&
1160 isRegionWithExcessRP()) {
1161 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1162 return true;
1163 }
1164
1165 return false;
1166 }
1167
revertScheduling()1168 void GCNSchedStage::revertScheduling() {
1169 DAG.RegionsWithMinOcc[RegionIdx] =
1170 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1171 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1172 DAG.RescheduleRegions[RegionIdx] =
1173 S.hasNextStage() &&
1174 S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1175 DAG.RegionEnd = DAG.RegionBegin;
1176 int SkippedDebugInstr = 0;
1177 for (MachineInstr *MI : Unsched) {
1178 if (MI->isDebugInstr()) {
1179 ++SkippedDebugInstr;
1180 continue;
1181 }
1182
1183 if (MI->getIterator() != DAG.RegionEnd) {
1184 DAG.BB->remove(MI);
1185 DAG.BB->insert(DAG.RegionEnd, MI);
1186 if (!MI->isDebugInstr())
1187 DAG.LIS->handleMove(*MI, true);
1188 }
1189
1190 // Reset read-undef flags and update them later.
1191 for (auto &Op : MI->operands())
1192 if (Op.isReg() && Op.isDef())
1193 Op.setIsUndef(false);
1194 RegisterOperands RegOpers;
1195 RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
1196 if (!MI->isDebugInstr()) {
1197 if (DAG.ShouldTrackLaneMasks) {
1198 // Adjust liveness and add missing dead+read-undef flags.
1199 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1200 RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);
1201 } else {
1202 // Adjust for missing dead-def flags.
1203 RegOpers.detectDeadDefs(*MI, *DAG.LIS);
1204 }
1205 }
1206 DAG.RegionEnd = MI->getIterator();
1207 ++DAG.RegionEnd;
1208 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1209 }
1210
1211 // After reverting schedule, debug instrs will now be at the end of the block
1212 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1213 // pass debug instrs to the actual end of the scheduling region.
1214 while (SkippedDebugInstr-- > 0)
1215 ++DAG.RegionEnd;
1216
1217 // If Unsched.front() instruction is a debug instruction, this will actually
1218 // shrink the region since we moved all debug instructions to the end of the
1219 // block. Find the first instruction that is not a debug instruction.
1220 DAG.RegionBegin = Unsched.front()->getIterator();
1221 if (DAG.RegionBegin->isDebugInstr()) {
1222 for (MachineInstr *MI : Unsched) {
1223 if (MI->isDebugInstr())
1224 continue;
1225 DAG.RegionBegin = MI->getIterator();
1226 break;
1227 }
1228 }
1229
1230 // Then move the debug instructions back into their correct place and set
1231 // RegionBegin and RegionEnd if needed.
1232 DAG.placeDebugValues();
1233
1234 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1235 }
1236
collectRematerializableInstructions()1237 void PreRARematStage::collectRematerializableInstructions() {
1238 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1239 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1240 Register Reg = Register::index2VirtReg(I);
1241 if (!DAG.LIS->hasInterval(Reg))
1242 continue;
1243
1244 // TODO: Handle AGPR and SGPR rematerialization
1245 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1246 !DAG.MRI.hasOneDef(Reg) || !DAG.MRI.hasOneNonDBGUse(Reg))
1247 continue;
1248
1249 MachineOperand *Op = DAG.MRI.getOneDef(Reg);
1250 MachineInstr *Def = Op->getParent();
1251 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1252 continue;
1253
1254 MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
1255 if (Def->getParent() == UseI->getParent())
1256 continue;
1257
1258 // We are only collecting defs that are defined in another block and are
1259 // live-through or used inside regions at MinOccupancy. This means that the
1260 // register must be in the live-in set for the region.
1261 bool AddedToRematList = false;
1262 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1263 auto It = DAG.LiveIns[I].find(Reg);
1264 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1265 if (DAG.RegionsWithMinOcc[I]) {
1266 RematerializableInsts[I][Def] = UseI;
1267 AddedToRematList = true;
1268 }
1269
1270 // Collect regions with rematerializable reg as live-in to avoid
1271 // searching later when updating RP.
1272 RematDefToLiveInRegions[Def].push_back(I);
1273 }
1274 }
1275 if (!AddedToRematList)
1276 RematDefToLiveInRegions.erase(Def);
1277 }
1278 }
1279
sinkTriviallyRematInsts(const GCNSubtarget & ST,const TargetInstrInfo * TII)1280 bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1281 const TargetInstrInfo *TII) {
1282 // Temporary copies of cached variables we will be modifying and replacing if
1283 // sinking succeeds.
1284 SmallVector<
1285 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1286 NewRegions;
1287 DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
1288 DenseMap<unsigned, GCNRegPressure> NewPressure;
1289 BitVector NewRescheduleRegions;
1290 LiveIntervals *LIS = DAG.LIS;
1291
1292 NewRegions.resize(DAG.Regions.size());
1293 NewRescheduleRegions.resize(DAG.Regions.size());
1294
1295 // Collect only regions that has a rematerializable def as a live-in.
1296 SmallSet<unsigned, 16> ImpactedRegions;
1297 for (const auto &It : RematDefToLiveInRegions)
1298 ImpactedRegions.insert(It.second.begin(), It.second.end());
1299
1300 // Make copies of register pressure and live-ins cache that will be updated
1301 // as we rematerialize.
1302 for (auto Idx : ImpactedRegions) {
1303 NewPressure[Idx] = DAG.Pressure[Idx];
1304 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1305 }
1306 NewRegions = DAG.Regions;
1307 NewRescheduleRegions.reset();
1308
1309 DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
1310 bool Improved = false;
1311 for (auto I : ImpactedRegions) {
1312 if (!DAG.RegionsWithMinOcc[I])
1313 continue;
1314
1315 Improved = false;
1316 int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
1317 int SGPRUsage = NewPressure[I].getSGPRNum();
1318
1319 // TODO: Handle occupancy drop due to AGPR and SGPR.
1320 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1321 if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == DAG.MinOccupancy)
1322 break;
1323
1324 // The occupancy of this region could have been improved by a previous
1325 // iteration's sinking of defs.
1326 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1327 NewRescheduleRegions[I] = true;
1328 Improved = true;
1329 continue;
1330 }
1331
1332 // First check if we have enough trivially rematerializable instructions to
1333 // improve occupancy. Optimistically assume all instructions we are able to
1334 // sink decreased RP.
1335 int TotalSinkableRegs = 0;
1336 for (const auto &It : RematerializableInsts[I]) {
1337 MachineInstr *Def = It.first;
1338 Register DefReg = Def->getOperand(0).getReg();
1339 TotalSinkableRegs +=
1340 SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1341 }
1342 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1343 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
1344 // If in the most optimistic scenario, we cannot improve occupancy, then do
1345 // not attempt to sink any instructions.
1346 if (OptimisticOccupancy <= DAG.MinOccupancy)
1347 break;
1348
1349 unsigned ImproveOccupancy = 0;
1350 SmallVector<MachineInstr *, 4> SinkedDefs;
1351 for (auto &It : RematerializableInsts[I]) {
1352 MachineInstr *Def = It.first;
1353 MachineBasicBlock::iterator InsertPos =
1354 MachineBasicBlock::iterator(It.second);
1355 Register Reg = Def->getOperand(0).getReg();
1356 // Rematerialize MI to its use block. Since we are only rematerializing
1357 // instructions that do not have any virtual reg uses, we do not need to
1358 // call LiveRangeEdit::allUsesAvailableAt() and
1359 // LiveRangeEdit::canRematerializeAt().
1360 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1361 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1362 MachineInstr *NewMI = &*std::prev(InsertPos);
1363 LIS->InsertMachineInstrInMaps(*NewMI);
1364 LIS->removeInterval(Reg);
1365 LIS->createAndComputeVirtRegInterval(Reg);
1366 InsertedMIToOldDef[NewMI] = Def;
1367
1368 // Update region boundaries in scheduling region we sinked from since we
1369 // may sink an instruction that was at the beginning or end of its region
1370 DAG.updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
1371 /*Removing =*/true);
1372
1373 // Update region boundaries in region we sinked to.
1374 DAG.updateRegionBoundaries(NewRegions, InsertPos, NewMI);
1375
1376 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1377 // FIXME: Also update cached pressure for where the def was sinked from.
1378 // Update RP for all regions that has this reg as a live-in and remove
1379 // the reg from all regions as a live-in.
1380 for (auto Idx : RematDefToLiveInRegions[Def]) {
1381 NewLiveIns[Idx].erase(Reg);
1382 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1383 // Def is live-through and not used in this block.
1384 NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);
1385 } else {
1386 // Def is used and rematerialized into this block.
1387 GCNDownwardRPTracker RPT(*LIS);
1388 auto *NonDbgMI = &*skipDebugInstructionsForward(
1389 NewRegions[Idx].first, NewRegions[Idx].second);
1390 RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
1391 RPT.advance(NewRegions[Idx].second);
1392 NewPressure[Idx] = RPT.moveMaxPressure();
1393 }
1394 }
1395
1396 SinkedDefs.push_back(Def);
1397 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1398 if (ImproveOccupancy > DAG.MinOccupancy)
1399 break;
1400 }
1401
1402 // Remove defs we just sinked from all regions' list of sinkable defs
1403 for (auto &Def : SinkedDefs)
1404 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1405 RematerializableInsts[TrackedIdx].erase(Def);
1406
1407 if (ImproveOccupancy <= DAG.MinOccupancy)
1408 break;
1409
1410 NewRescheduleRegions[I] = true;
1411 Improved = true;
1412 }
1413
1414 if (!Improved) {
1415 // Occupancy was not improved for all regions that were at MinOccupancy.
1416 // Undo sinking and remove newly rematerialized instructions.
1417 for (auto &Entry : InsertedMIToOldDef) {
1418 MachineInstr *MI = Entry.first;
1419 MachineInstr *OldMI = Entry.second;
1420 Register Reg = MI->getOperand(0).getReg();
1421 LIS->RemoveMachineInstrFromMaps(*MI);
1422 MI->eraseFromParent();
1423 OldMI->clearRegisterDeads(Reg);
1424 LIS->removeInterval(Reg);
1425 LIS->createAndComputeVirtRegInterval(Reg);
1426 }
1427 return false;
1428 }
1429
1430 // Occupancy was improved for all regions.
1431 for (auto &Entry : InsertedMIToOldDef) {
1432 MachineInstr *MI = Entry.first;
1433 MachineInstr *OldMI = Entry.second;
1434
1435 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1436 DAG.BBLiveInMap.erase(OldMI);
1437
1438 // Remove OldMI and update LIS
1439 Register Reg = MI->getOperand(0).getReg();
1440 LIS->RemoveMachineInstrFromMaps(*OldMI);
1441 OldMI->eraseFromParent();
1442 LIS->removeInterval(Reg);
1443 LIS->createAndComputeVirtRegInterval(Reg);
1444 }
1445
1446 // Update live-ins, register pressure, and regions caches.
1447 for (auto Idx : ImpactedRegions) {
1448 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1449 DAG.Pressure[Idx] = NewPressure[Idx];
1450 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1451 }
1452 DAG.Regions = NewRegions;
1453 DAG.RescheduleRegions = NewRescheduleRegions;
1454
1455 SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1456 MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);
1457
1458 return true;
1459 }
1460
1461 // Copied from MachineLICM
isTriviallyReMaterializable(const MachineInstr & MI)1462 bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1463 if (!DAG.TII->isTriviallyReMaterializable(MI))
1464 return false;
1465
1466 for (const MachineOperand &MO : MI.operands())
1467 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
1468 return false;
1469
1470 return true;
1471 }
1472
1473 // When removing, we will have to check both beginning and ending of the region.
1474 // When inserting, we will only have to check if we are inserting NewMI in front
1475 // of a scheduling region and do not need to check the ending since we will only
1476 // ever be inserting before an already existing MI.
updateRegionBoundaries(SmallVectorImpl<std::pair<MachineBasicBlock::iterator,MachineBasicBlock::iterator>> & RegionBoundaries,MachineBasicBlock::iterator MI,MachineInstr * NewMI,bool Removing)1477 void GCNScheduleDAGMILive::updateRegionBoundaries(
1478 SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
1479 MachineBasicBlock::iterator>> &RegionBoundaries,
1480 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1481 unsigned I = 0, E = RegionBoundaries.size();
1482 // Search for first region of the block where MI is located
1483 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1484 ++I;
1485
1486 for (; I != E; ++I) {
1487 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1488 return;
1489
1490 if (Removing && MI == RegionBoundaries[I].first &&
1491 MI == RegionBoundaries[I].second) {
1492 // MI is in a region with size 1, after removing, the region will be
1493 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1494 RegionBoundaries[I] =
1495 std::pair(MI->getParent()->end(), MI->getParent()->end());
1496 return;
1497 }
1498 if (MI == RegionBoundaries[I].first) {
1499 if (Removing)
1500 RegionBoundaries[I] =
1501 std::pair(std::next(MI), RegionBoundaries[I].second);
1502 else
1503 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1504 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1505 RegionBoundaries[I].second);
1506 return;
1507 }
1508 if (Removing && MI == RegionBoundaries[I].second) {
1509 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(MI));
1510 return;
1511 }
1512 }
1513 }
1514
hasIGLPInstrs(ScheduleDAGInstrs * DAG)1515 static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1516 return std::any_of(
1517 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1518 unsigned Opc = MI->getOpcode();
1519 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1520 });
1521 }
1522
GCNPostScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)1523 GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1524 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1525 bool RemoveKillFlags)
1526 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1527
schedule()1528 void GCNPostScheduleDAGMILive::schedule() {
1529 HasIGLPInstrs = hasIGLPInstrs(this);
1530 if (HasIGLPInstrs) {
1531 SavedMutations.clear();
1532 SavedMutations.swap(Mutations);
1533 addMutation(createIGroupLPDAGMutation());
1534 }
1535
1536 ScheduleDAGMI::schedule();
1537 }
1538
finalizeSchedule()1539 void GCNPostScheduleDAGMILive::finalizeSchedule() {
1540 if (HasIGLPInstrs)
1541 SavedMutations.swap(Mutations);
1542
1543 ScheduleDAGMI::finalizeSchedule();
1544 }
1545