xref: /llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp (revision ceb1f12d9a5a6bb9135796cfaccc84baf23a1a0f)
1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements hazard recognizers for scheduling on PowerPC processors.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "PPCHazardRecognizers.h"
16 #include "PPC.h"
17 #include "PPCInstrInfo.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/CodeGen/ScheduleDAG.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 using namespace llvm;
24 
25 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
26   // FIXME: Move this.
27   if (isBCTRAfterSet(SU))
28     return true;
29 
30   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
31   if (!MCID)
32     return false;
33 
34   if (!MCID->mayLoad())
35     return false;
36 
37   // SU is a load; for any predecessors in this dispatch group, that are stores,
38   // and with which we have an ordering dependency, return true.
39   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
40     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
41     if (!PredMCID || !PredMCID->mayStore())
42       continue;
43 
44     if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
45       continue;
46 
47     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
48       if (SU->Preds[i].getSUnit() == CurGroup[j])
49         return true;
50   }
51 
52   return false;
53 }
54 
55 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
56   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
57   if (!MCID)
58     return false;
59 
60   if (!MCID->isBranch())
61     return false;
62 
63   // SU is a branch; for any predecessors in this dispatch group, with which we
64   // have a data dependence and set the counter register, return true.
65   for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
66     const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
67     if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
68       continue;
69 
70     if (SU->Preds[i].isCtrl())
71       continue;
72 
73     for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
74       if (SU->Preds[i].getSUnit() == CurGroup[j])
75         return true;
76   }
77 
78   return false;
79 }
80 
81 // FIXME: Remove this when we don't need this:
82 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
83 
84 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
85 
86 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
87                                                        unsigned &NSlots) {
88   // FIXME: Indirectly, this information is contained in the itinerary, and
89   // we should derive it from there instead of separately specifying it
90   // here.
91   unsigned IIC = MCID->getSchedClass();
92   switch (IIC) {
93   default:
94     NSlots = 1;
95     break;
96   case PPC::Sched::IIC_IntDivW:
97   case PPC::Sched::IIC_IntDivD:
98   case PPC::Sched::IIC_LdStLoadUpd:
99   case PPC::Sched::IIC_LdStLDU:
100   case PPC::Sched::IIC_LdStLFDU:
101   case PPC::Sched::IIC_LdStLFDUX:
102   case PPC::Sched::IIC_LdStLHA:
103   case PPC::Sched::IIC_LdStLHAU:
104   case PPC::Sched::IIC_LdStLWA:
105   case PPC::Sched::IIC_LdStSTDU:
106   case PPC::Sched::IIC_LdStSTFDU:
107     NSlots = 2;
108     break;
109   case PPC::Sched::IIC_LdStLoadUpdX:
110   case PPC::Sched::IIC_LdStLDUX:
111   case PPC::Sched::IIC_LdStLHAUX:
112   case PPC::Sched::IIC_LdStLWARX:
113   case PPC::Sched::IIC_LdStLDARX:
114   case PPC::Sched::IIC_LdStSTDUX:
115   case PPC::Sched::IIC_LdStSTDCX:
116   case PPC::Sched::IIC_LdStSTWCX:
117   case PPC::Sched::IIC_BrMCRX: // mtcr
118   // FIXME: Add sync/isync (here and in the itinerary).
119     NSlots = 4;
120     break;
121   }
122 
123   // FIXME: record-form instructions need a different itinerary class.
124   if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
125     NSlots = 2;
126 
127   switch (IIC) {
128   default:
129     // All multi-slot instructions must come first.
130     return NSlots > 1;
131   case PPC::Sched::IIC_SprMFCR:
132   case PPC::Sched::IIC_SprMFCRF:
133   case PPC::Sched::IIC_SprMTSPR:
134     return true;
135   }
136 }
137 
138 ScheduleHazardRecognizer::HazardType
139 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
140   if (Stalls == 0 && isLoadAfterStore(SU))
141     return NoopHazard;
142 
143   return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
144 }
145 
146 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
147   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
148   unsigned NSlots;
149   if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
150     return true;
151 
152   return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
153 }
154 
155 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
156   // We only need to fill out a maximum of 5 slots here: The 6th slot could
157   // only be a second branch, and otherwise the next instruction will start a
158   // new group.
159   if (isLoadAfterStore(SU) && CurSlots < 6) {
160     unsigned Directive =
161       DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
162     // If we're using a special group-terminating nop, then we need only one.
163     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
164       return 1;
165 
166     return 5 - CurSlots;
167   }
168 
169   return ScoreboardHazardRecognizer::PreEmitNoops(SU);
170 }
171 
172 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
173   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
174   if (MCID) {
175     if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
176       CurGroup.clear();
177       CurSlots = CurBranches = 0;
178     } else {
179       DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
180                       SU->NodeNum << "): ");
181       DEBUG(DAG->dumpNode(SU));
182 
183       unsigned NSlots;
184       bool MustBeFirst = mustComeFirst(MCID, NSlots);
185 
186       // If this instruction must come first, but does not, then it starts a
187       // new group.
188       if (MustBeFirst && CurSlots) {
189         CurSlots = CurBranches = 0;
190         CurGroup.clear();
191       }
192 
193       CurSlots += NSlots;
194       CurGroup.push_back(SU);
195 
196       if (MCID->isBranch())
197         ++CurBranches;
198     }
199   }
200 
201   return ScoreboardHazardRecognizer::EmitInstruction(SU);
202 }
203 
204 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
205   return ScoreboardHazardRecognizer::AdvanceCycle();
206 }
207 
208 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
209   llvm_unreachable("Bottom-up scheduling not supported");
210 }
211 
212 void PPCDispatchGroupSBHazardRecognizer::Reset() {
213   CurGroup.clear();
214   CurSlots = CurBranches = 0;
215   return ScoreboardHazardRecognizer::Reset();
216 }
217 
218 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
219   unsigned Directive =
220     DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
221   // If the group has now filled all of its slots, or if we're using a special
222   // group-terminating nop, the group is complete.
223   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
224       CurSlots == 6)  {
225     CurGroup.clear();
226     CurSlots = CurBranches = 0;
227   } else {
228     CurGroup.push_back(0);
229     ++CurSlots;
230   }
231 }
232 
233 //===----------------------------------------------------------------------===//
234 // PowerPC 970 Hazard Recognizer
235 //
236 // This models the dispatch group formation of the PPC970 processor.  Dispatch
237 // groups are bundles of up to five instructions that can contain various mixes
238 // of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
239 // branch instruction per-cycle.
240 //
241 // There are a number of restrictions to dispatch group formation: some
242 // instructions can only be issued in the first slot of a dispatch group, & some
243 // instructions fill an entire dispatch group.  Additionally, only branches can
244 // issue in the 5th (last) slot.
245 //
246 // Finally, there are a number of "structural" hazards on the PPC970.  These
247 // conditions cause large performance penalties due to misprediction, recovery,
248 // and replay logic that has to happen.  These cases include setting a CTR and
249 // branching through it in the same dispatch group, and storing to an address,
250 // then loading from the same address within a dispatch group.  To avoid these
251 // conditions, we insert no-op instructions when appropriate.
252 //
253 // FIXME: This is missing some significant cases:
254 //   1. Modeling of microcoded instructions.
255 //   2. Handling of serialized operations.
256 //   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
257 //
258 
259 PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
260   : TM(TM) {
261   EndDispatchGroup();
262 }
263 
264 void PPCHazardRecognizer970::EndDispatchGroup() {
265   DEBUG(errs() << "=== Start of dispatch group\n");
266   NumIssued = 0;
267 
268   // Structural hazard info.
269   HasCTRSet = false;
270   NumStores = 0;
271 }
272 
273 
274 PPCII::PPC970_Unit
275 PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
276                                      bool &isFirst, bool &isSingle,
277                                      bool &isCracked,
278                                      bool &isLoad, bool &isStore) {
279   const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
280 
281   isLoad  = MCID.mayLoad();
282   isStore = MCID.mayStore();
283 
284   uint64_t TSFlags = MCID.TSFlags;
285 
286   isFirst   = TSFlags & PPCII::PPC970_First;
287   isSingle  = TSFlags & PPCII::PPC970_Single;
288   isCracked = TSFlags & PPCII::PPC970_Cracked;
289   return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
290 }
291 
292 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
293 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
294 bool PPCHazardRecognizer970::
295 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
296   const Value *LoadValue) const {
297   for (unsigned i = 0, e = NumStores; i != e; ++i) {
298     // Handle exact and commuted addresses.
299     if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
300       return true;
301 
302     // Okay, we don't have an exact match, if this is an indexed offset, see if
303     // we have overlap (which happens during fp->int conversion for example).
304     if (StoreValue[i] == LoadValue) {
305       // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
306       // to see if the load and store actually overlap.
307       if (StoreOffset[i] < LoadOffset) {
308         if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
309       } else {
310         if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
311       }
312     }
313   }
314   return false;
315 }
316 
317 /// getHazardType - We return hazard for any non-branch instruction that would
318 /// terminate the dispatch group.  We turn NoopHazard for any
319 /// instructions that wouldn't terminate the dispatch group that would cause a
320 /// pipeline flush.
321 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
322 getHazardType(SUnit *SU, int Stalls) {
323   assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
324 
325   MachineInstr *MI = SU->getInstr();
326 
327   if (MI->isDebugValue())
328     return NoHazard;
329 
330   unsigned Opcode = MI->getOpcode();
331   bool isFirst, isSingle, isCracked, isLoad, isStore;
332   PPCII::PPC970_Unit InstrType =
333     GetInstrType(Opcode, isFirst, isSingle, isCracked,
334                  isLoad, isStore);
335   if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
336 
337   // We can only issue a PPC970_First/PPC970_Single instruction (such as
338   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
339   if (NumIssued != 0 && (isFirst || isSingle))
340     return Hazard;
341 
342   // If this instruction is cracked into two ops by the decoder, we know that
343   // it is not a branch and that it cannot issue if 3 other instructions are
344   // already in the dispatch group.
345   if (isCracked && NumIssued > 2)
346     return Hazard;
347 
348   switch (InstrType) {
349   default: llvm_unreachable("Unknown instruction type!");
350   case PPCII::PPC970_FXU:
351   case PPCII::PPC970_LSU:
352   case PPCII::PPC970_FPU:
353   case PPCII::PPC970_VALU:
354   case PPCII::PPC970_VPERM:
355     // We can only issue a branch as the last instruction in a group.
356     if (NumIssued == 4) return Hazard;
357     break;
358   case PPCII::PPC970_CRU:
359     // We can only issue a CR instruction in the first two slots.
360     if (NumIssued >= 2) return Hazard;
361     break;
362   case PPCII::PPC970_BRU:
363     break;
364   }
365 
366   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
367   if (HasCTRSet && Opcode == PPC::BCTRL)
368     return NoopHazard;
369 
370   // If this is a load following a store, make sure it's not to the same or
371   // overlapping address.
372   if (isLoad && NumStores && !MI->memoperands_empty()) {
373     MachineMemOperand *MO = *MI->memoperands_begin();
374     if (isLoadOfStoredAddress(MO->getSize(),
375                               MO->getOffset(), MO->getValue()))
376       return NoopHazard;
377   }
378 
379   return NoHazard;
380 }
381 
382 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
383   MachineInstr *MI = SU->getInstr();
384 
385   if (MI->isDebugValue())
386     return;
387 
388   unsigned Opcode = MI->getOpcode();
389   bool isFirst, isSingle, isCracked, isLoad, isStore;
390   PPCII::PPC970_Unit InstrType =
391     GetInstrType(Opcode, isFirst, isSingle, isCracked,
392                  isLoad, isStore);
393   if (InstrType == PPCII::PPC970_Pseudo) return;
394 
395   // Update structural hazard information.
396   if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
397 
398   // Track the address stored to.
399   if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
400     MachineMemOperand *MO = *MI->memoperands_begin();
401     StoreSize[NumStores] = MO->getSize();
402     StoreOffset[NumStores] = MO->getOffset();
403     StoreValue[NumStores] = MO->getValue();
404     ++NumStores;
405   }
406 
407   if (InstrType == PPCII::PPC970_BRU || isSingle)
408     NumIssued = 4;  // Terminate a d-group.
409   ++NumIssued;
410 
411   // If this instruction is cracked into two ops by the decoder, remember that
412   // we issued two pieces.
413   if (isCracked)
414     ++NumIssued;
415 
416   if (NumIssued == 5)
417     EndDispatchGroup();
418 }
419 
420 void PPCHazardRecognizer970::AdvanceCycle() {
421   assert(NumIssued < 5 && "Illegal dispatch group!");
422   ++NumIssued;
423   if (NumIssued == 5)
424     EndDispatchGroup();
425 }
426 
427 void PPCHazardRecognizer970::Reset() {
428   EndDispatchGroup();
429 }
430 
431