15321dcd6SQingShan Zhang //===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
25321dcd6SQingShan Zhang //
35321dcd6SQingShan Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45321dcd6SQingShan Zhang // See https://llvm.org/LICENSE.txt for license information.
55321dcd6SQingShan Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65321dcd6SQingShan Zhang //
75321dcd6SQingShan Zhang //===----------------------------------------------------------------------===//
8067a17b5SDmitri Gribenko
95321dcd6SQingShan Zhang #include "PPCMachineScheduler.h"
10067a17b5SDmitri Gribenko #include "MCTargetDesc/PPCMCTargetDesc.h"
11067a17b5SDmitri Gribenko
125321dcd6SQingShan Zhang using namespace llvm;
135321dcd6SQingShan Zhang
14449bfdd1SQingShan Zhang static cl::opt<bool>
15449bfdd1SQingShan Zhang DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
16449bfdd1SQingShan Zhang cl::desc("Disable scheduling addi instruction before"
17449bfdd1SQingShan Zhang "load for ppc"), cl::Hidden);
18f8eabd6dSQingShan Zhang static cl::opt<bool>
19f8eabd6dSQingShan Zhang EnableAddiHeuristic("ppc-postra-bias-addi",
20f8eabd6dSQingShan Zhang cl::desc("Enable scheduling addi instruction as early"
21f8eabd6dSQingShan Zhang "as possible post ra"),
22f8eabd6dSQingShan Zhang cl::Hidden, cl::init(true));
23f8eabd6dSQingShan Zhang
isADDIInstr(const GenericScheduler::SchedCandidate & Cand)24f8eabd6dSQingShan Zhang static bool isADDIInstr(const GenericScheduler::SchedCandidate &Cand) {
25f8eabd6dSQingShan Zhang return Cand.SU->getInstr()->getOpcode() == PPC::ADDI ||
26f8eabd6dSQingShan Zhang Cand.SU->getInstr()->getOpcode() == PPC::ADDI8;
273f0cc7acSQingShan Zhang }
28449bfdd1SQingShan Zhang
biasAddiLoadCandidate(SchedCandidate & Cand,SchedCandidate & TryCand,SchedBoundary & Zone) const29449bfdd1SQingShan Zhang bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
30449bfdd1SQingShan Zhang SchedCandidate &TryCand,
31449bfdd1SQingShan Zhang SchedBoundary &Zone) const {
32449bfdd1SQingShan Zhang if (DisableAddiLoadHeuristic)
33449bfdd1SQingShan Zhang return false;
34449bfdd1SQingShan Zhang
35449bfdd1SQingShan Zhang SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
36449bfdd1SQingShan Zhang SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
37f8eabd6dSQingShan Zhang if (isADDIInstr(FirstCand) && SecondCand.SU->getInstr()->mayLoad()) {
38449bfdd1SQingShan Zhang TryCand.Reason = Stall;
39449bfdd1SQingShan Zhang return true;
40449bfdd1SQingShan Zhang }
41f8eabd6dSQingShan Zhang if (FirstCand.SU->getInstr()->mayLoad() && isADDIInstr(SecondCand)) {
42449bfdd1SQingShan Zhang TryCand.Reason = NoCand;
43449bfdd1SQingShan Zhang return true;
44449bfdd1SQingShan Zhang }
45449bfdd1SQingShan Zhang
46449bfdd1SQingShan Zhang return false;
47449bfdd1SQingShan Zhang }
48449bfdd1SQingShan Zhang
tryCandidate(SchedCandidate & Cand,SchedCandidate & TryCand,SchedBoundary * Zone) const49*07f0faedSQiu Chaofan bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
50449bfdd1SQingShan Zhang SchedCandidate &TryCand,
51449bfdd1SQingShan Zhang SchedBoundary *Zone) const {
52449f2f71SQiu Chaofan // From GenericScheduler::tryCandidate
53449bfdd1SQingShan Zhang
54449f2f71SQiu Chaofan // Initialize the candidate if needed.
55449f2f71SQiu Chaofan if (!Cand.isValid()) {
56449f2f71SQiu Chaofan TryCand.Reason = NodeOrder;
57*07f0faedSQiu Chaofan return true;
58449f2f71SQiu Chaofan }
59449f2f71SQiu Chaofan
60449f2f71SQiu Chaofan // Bias PhysReg Defs and copies to their uses and defined respectively.
61449f2f71SQiu Chaofan if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
62449f2f71SQiu Chaofan biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
63*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
64449f2f71SQiu Chaofan
65449f2f71SQiu Chaofan // Avoid exceeding the target's limit.
66449f2f71SQiu Chaofan if (DAG->isTrackingPressure() &&
67449f2f71SQiu Chaofan tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
68449f2f71SQiu Chaofan RegExcess, TRI, DAG->MF))
69*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
70449f2f71SQiu Chaofan
71449f2f71SQiu Chaofan // Avoid increasing the max critical pressure in the scheduled region.
72449f2f71SQiu Chaofan if (DAG->isTrackingPressure() &&
73449f2f71SQiu Chaofan tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
74449f2f71SQiu Chaofan TryCand, Cand, RegCritical, TRI, DAG->MF))
75*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
76449f2f71SQiu Chaofan
77449f2f71SQiu Chaofan // We only compare a subset of features when comparing nodes between
78449f2f71SQiu Chaofan // Top and Bottom boundary. Some properties are simply incomparable, in many
79449f2f71SQiu Chaofan // other instances we should only override the other boundary if something
80449f2f71SQiu Chaofan // is a clear good pick on one boundary. Skip heuristics that are more
81449f2f71SQiu Chaofan // "tie-breaking" in nature.
82449f2f71SQiu Chaofan bool SameBoundary = Zone != nullptr;
83449f2f71SQiu Chaofan if (SameBoundary) {
84449f2f71SQiu Chaofan // For loops that are acyclic path limited, aggressively schedule for
85449f2f71SQiu Chaofan // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
86449f2f71SQiu Chaofan // heuristics to take precedence.
87449f2f71SQiu Chaofan if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
88449f2f71SQiu Chaofan tryLatency(TryCand, Cand, *Zone))
89*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
90449f2f71SQiu Chaofan
91449f2f71SQiu Chaofan // Prioritize instructions that read unbuffered resources by stall cycles.
92449f2f71SQiu Chaofan if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
93449f2f71SQiu Chaofan Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
94*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
95449f2f71SQiu Chaofan }
96449f2f71SQiu Chaofan
97449f2f71SQiu Chaofan // Keep clustered nodes together to encourage downstream peephole
98449f2f71SQiu Chaofan // optimizations which may reduce resource requirements.
99449f2f71SQiu Chaofan //
100449f2f71SQiu Chaofan // This is a best effort to set things up for a post-RA pass. Optimizations
101449f2f71SQiu Chaofan // like generating loads of multiple registers should ideally be done within
102449f2f71SQiu Chaofan // the scheduler pass by combining the loads during DAG postprocessing.
103449f2f71SQiu Chaofan const SUnit *CandNextClusterSU =
104449f2f71SQiu Chaofan Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
105449f2f71SQiu Chaofan const SUnit *TryCandNextClusterSU =
106449f2f71SQiu Chaofan TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
107449f2f71SQiu Chaofan if (tryGreater(TryCand.SU == TryCandNextClusterSU,
108449f2f71SQiu Chaofan Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
109*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
110449f2f71SQiu Chaofan
111449f2f71SQiu Chaofan if (SameBoundary) {
112449f2f71SQiu Chaofan // Weak edges are for clustering and other constraints.
113449f2f71SQiu Chaofan if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
114449f2f71SQiu Chaofan getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))
115*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
116449f2f71SQiu Chaofan }
117449f2f71SQiu Chaofan
118449f2f71SQiu Chaofan // Avoid increasing the max pressure of the entire region.
119449f2f71SQiu Chaofan if (DAG->isTrackingPressure() &&
120449f2f71SQiu Chaofan tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,
121449f2f71SQiu Chaofan Cand, RegMax, TRI, DAG->MF))
122*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
123449f2f71SQiu Chaofan
124449f2f71SQiu Chaofan if (SameBoundary) {
125449f2f71SQiu Chaofan // Avoid critical resource consumption and balance the schedule.
126449f2f71SQiu Chaofan TryCand.initResourceDelta(DAG, SchedModel);
127449f2f71SQiu Chaofan if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
128449f2f71SQiu Chaofan TryCand, Cand, ResourceReduce))
129*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
130449f2f71SQiu Chaofan if (tryGreater(TryCand.ResDelta.DemandedResources,
131449f2f71SQiu Chaofan Cand.ResDelta.DemandedResources, TryCand, Cand,
132449f2f71SQiu Chaofan ResourceDemand))
133*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
134449f2f71SQiu Chaofan
135449f2f71SQiu Chaofan // Avoid serializing long latency dependence chains.
136449f2f71SQiu Chaofan // For acyclic path limited loops, latency was already checked above.
137449f2f71SQiu Chaofan if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
138449f2f71SQiu Chaofan !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
139*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
140449f2f71SQiu Chaofan
141449f2f71SQiu Chaofan // Fall through to original instruction order.
142449f2f71SQiu Chaofan if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
143449f2f71SQiu Chaofan (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
144449f2f71SQiu Chaofan TryCand.Reason = NodeOrder;
145449f2f71SQiu Chaofan }
146449f2f71SQiu Chaofan }
147449f2f71SQiu Chaofan
148449f2f71SQiu Chaofan // GenericScheduler::tryCandidate end
149449bfdd1SQingShan Zhang
150449bfdd1SQingShan Zhang // Add powerpc specific heuristic only when TryCand isn't selected or
151449bfdd1SQingShan Zhang // selected as node order.
152449bfdd1SQingShan Zhang if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
153*07f0faedSQiu Chaofan return true;
154449bfdd1SQingShan Zhang
155449bfdd1SQingShan Zhang // There are some benefits to schedule the ADDI before the load to hide the
156449bfdd1SQingShan Zhang // latency, as RA may create a true dependency between the load and addi.
157449f2f71SQiu Chaofan if (SameBoundary) {
158449bfdd1SQingShan Zhang if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
159*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
160449bfdd1SQingShan Zhang }
161*07f0faedSQiu Chaofan
162*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
163449f2f71SQiu Chaofan }
164449bfdd1SQingShan Zhang
biasAddiCandidate(SchedCandidate & Cand,SchedCandidate & TryCand) const165f8eabd6dSQingShan Zhang bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand,
166f8eabd6dSQingShan Zhang SchedCandidate &TryCand) const {
167f8eabd6dSQingShan Zhang if (!EnableAddiHeuristic)
168f8eabd6dSQingShan Zhang return false;
169f8eabd6dSQingShan Zhang
170f8eabd6dSQingShan Zhang if (isADDIInstr(TryCand) && !isADDIInstr(Cand)) {
171f8eabd6dSQingShan Zhang TryCand.Reason = Stall;
172f8eabd6dSQingShan Zhang return true;
173f8eabd6dSQingShan Zhang }
174f8eabd6dSQingShan Zhang return false;
175f8eabd6dSQingShan Zhang }
176f8eabd6dSQingShan Zhang
tryCandidate(SchedCandidate & Cand,SchedCandidate & TryCand)177*07f0faedSQiu Chaofan bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
178f8eabd6dSQingShan Zhang SchedCandidate &TryCand) {
179449f2f71SQiu Chaofan // From PostGenericScheduler::tryCandidate
180f8eabd6dSQingShan Zhang
181449f2f71SQiu Chaofan // Initialize the candidate if needed.
182449f2f71SQiu Chaofan if (!Cand.isValid()) {
183449f2f71SQiu Chaofan TryCand.Reason = NodeOrder;
184*07f0faedSQiu Chaofan return true;
185449f2f71SQiu Chaofan }
186449f2f71SQiu Chaofan
187449f2f71SQiu Chaofan // Prioritize instructions that read unbuffered resources by stall cycles.
188449f2f71SQiu Chaofan if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
189449f2f71SQiu Chaofan Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
190*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
191449f2f71SQiu Chaofan
192449f2f71SQiu Chaofan // Keep clustered nodes together.
193449f2f71SQiu Chaofan if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
194449f2f71SQiu Chaofan Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster))
195*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
196449f2f71SQiu Chaofan
197449f2f71SQiu Chaofan // Avoid critical resource consumption and balance the schedule.
198449f2f71SQiu Chaofan if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
199449f2f71SQiu Chaofan TryCand, Cand, ResourceReduce))
200*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
201449f2f71SQiu Chaofan if (tryGreater(TryCand.ResDelta.DemandedResources,
202449f2f71SQiu Chaofan Cand.ResDelta.DemandedResources, TryCand, Cand,
203449f2f71SQiu Chaofan ResourceDemand))
204*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
205449f2f71SQiu Chaofan
206449f2f71SQiu Chaofan // Avoid serializing long latency dependence chains.
207449f2f71SQiu Chaofan if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
208*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
209449f2f71SQiu Chaofan }
210449f2f71SQiu Chaofan
211449f2f71SQiu Chaofan // Fall through to original instruction order.
212449f2f71SQiu Chaofan if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
213449f2f71SQiu Chaofan TryCand.Reason = NodeOrder;
214449f2f71SQiu Chaofan
215449f2f71SQiu Chaofan // PostGenericScheduler::tryCandidate end
216f8eabd6dSQingShan Zhang
217f8eabd6dSQingShan Zhang // Add powerpc post ra specific heuristic only when TryCand isn't selected or
218f8eabd6dSQingShan Zhang // selected as node order.
219f8eabd6dSQingShan Zhang if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
220*07f0faedSQiu Chaofan return true;
221f8eabd6dSQingShan Zhang
222f8eabd6dSQingShan Zhang // There are some benefits to schedule the ADDI as early as possible post ra
223f8eabd6dSQingShan Zhang // to avoid stalled by vector instructions which take up all the hw units.
224f8eabd6dSQingShan Zhang // And ADDI is usually used to post inc the loop indvar, which matters the
225f8eabd6dSQingShan Zhang // performance.
226f8eabd6dSQingShan Zhang if (biasAddiCandidate(Cand, TryCand))
227*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
228*07f0faedSQiu Chaofan
229*07f0faedSQiu Chaofan return TryCand.Reason != NoCand;
230f8eabd6dSQingShan Zhang }
231f8eabd6dSQingShan Zhang
enterMBB(MachineBasicBlock * MBB)2325321dcd6SQingShan Zhang void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
2335321dcd6SQingShan Zhang // Custom PPC PostRA specific behavior here.
2345321dcd6SQingShan Zhang PostGenericScheduler::enterMBB(MBB);
2355321dcd6SQingShan Zhang }
2365321dcd6SQingShan Zhang
leaveMBB()2375321dcd6SQingShan Zhang void PPCPostRASchedStrategy::leaveMBB() {
2385321dcd6SQingShan Zhang // Custom PPC PostRA specific behavior here.
2395321dcd6SQingShan Zhang PostGenericScheduler::leaveMBB();
2405321dcd6SQingShan Zhang }
2415321dcd6SQingShan Zhang
initialize(ScheduleDAGMI * Dag)2425321dcd6SQingShan Zhang void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
2435321dcd6SQingShan Zhang // Custom PPC PostRA specific initialization here.
2445321dcd6SQingShan Zhang PostGenericScheduler::initialize(Dag);
2455321dcd6SQingShan Zhang }
2465321dcd6SQingShan Zhang
pickNode(bool & IsTopNode)2475321dcd6SQingShan Zhang SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
2485321dcd6SQingShan Zhang // Custom PPC PostRA specific scheduling here.
2495321dcd6SQingShan Zhang return PostGenericScheduler::pickNode(IsTopNode);
2505321dcd6SQingShan Zhang }
2515321dcd6SQingShan Zhang
252