1 //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements hazard recognizers for scheduling on PowerPC processors. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "pre-RA-sched" 15 #include "PPCHazardRecognizers.h" 16 #include "PPC.h" 17 #include "PPCInstrInfo.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/CodeGen/ScheduleDAG.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/raw_ostream.h" 23 using namespace llvm; 24 25 bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { 26 // FIXME: Move this. 27 if (isBCTRAfterSet(SU)) 28 return true; 29 30 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 31 if (!MCID) 32 return false; 33 34 if (!MCID->mayLoad()) 35 return false; 36 37 // SU is a load; for any predecessors in this dispatch group, that are stores, 38 // and with which we have an ordering dependency, return true. 39 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 40 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 41 if (!PredMCID || !PredMCID->mayStore()) 42 continue; 43 44 if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) 45 continue; 46 47 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 48 if (SU->Preds[i].getSUnit() == CurGroup[j]) 49 return true; 50 } 51 52 return false; 53 } 54 55 bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { 56 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 57 if (!MCID) 58 return false; 59 60 if (!MCID->isBranch()) 61 return false; 62 63 // SU is a branch; for any predecessors in this dispatch group, with which we 64 // have a data dependence and set the counter register, return true. 65 for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { 66 const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); 67 if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) 68 continue; 69 70 if (SU->Preds[i].isCtrl()) 71 continue; 72 73 for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) 74 if (SU->Preds[i].getSUnit() == CurGroup[j]) 75 return true; 76 } 77 78 return false; 79 } 80 81 // FIXME: Remove this when we don't need this: 82 namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } 83 84 // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. 85 86 bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, 87 unsigned &NSlots) { 88 // FIXME: Indirectly, this information is contained in the itinerary, and 89 // we should derive it from there instead of separately specifying it 90 // here. 91 unsigned IIC = MCID->getSchedClass(); 92 switch (IIC) { 93 default: 94 NSlots = 1; 95 break; 96 case PPC::Sched::IIC_IntDivW: 97 case PPC::Sched::IIC_IntDivD: 98 case PPC::Sched::IIC_LdStLoadUpd: 99 case PPC::Sched::IIC_LdStLDU: 100 case PPC::Sched::IIC_LdStLFDU: 101 case PPC::Sched::IIC_LdStLFDUX: 102 case PPC::Sched::IIC_LdStLHA: 103 case PPC::Sched::IIC_LdStLHAU: 104 case PPC::Sched::IIC_LdStLWA: 105 case PPC::Sched::IIC_LdStSTDU: 106 case PPC::Sched::IIC_LdStSTFDU: 107 NSlots = 2; 108 break; 109 case PPC::Sched::IIC_LdStLoadUpdX: 110 case PPC::Sched::IIC_LdStLDUX: 111 case PPC::Sched::IIC_LdStLHAUX: 112 case PPC::Sched::IIC_LdStLWARX: 113 case PPC::Sched::IIC_LdStLDARX: 114 case PPC::Sched::IIC_LdStSTDUX: 115 case PPC::Sched::IIC_LdStSTDCX: 116 case PPC::Sched::IIC_LdStSTWCX: 117 case PPC::Sched::IIC_BrMCRX: // mtcr 118 // FIXME: Add sync/isync (here and in the itinerary). 119 NSlots = 4; 120 break; 121 } 122 123 // FIXME: record-form instructions need a different itinerary class. 124 if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) 125 NSlots = 2; 126 127 switch (IIC) { 128 default: 129 // All multi-slot instructions must come first. 130 return NSlots > 1; 131 case PPC::Sched::IIC_SprMFCR: 132 case PPC::Sched::IIC_SprMFCRF: 133 case PPC::Sched::IIC_SprMTSPR: 134 return true; 135 } 136 } 137 138 ScheduleHazardRecognizer::HazardType 139 PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { 140 if (Stalls == 0 && isLoadAfterStore(SU)) 141 return NoopHazard; 142 143 return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); 144 } 145 146 bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { 147 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 148 unsigned NSlots; 149 if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) 150 return true; 151 152 return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); 153 } 154 155 unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { 156 // We only need to fill out a maximum of 5 slots here: The 6th slot could 157 // only be a second branch, and otherwise the next instruction will start a 158 // new group. 159 if (isLoadAfterStore(SU) && CurSlots < 6) { 160 unsigned Directive = 161 DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); 162 // If we're using a special group-terminating nop, then we need only one. 163 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7) 164 return 1; 165 166 return 5 - CurSlots; 167 } 168 169 return ScoreboardHazardRecognizer::PreEmitNoops(SU); 170 } 171 172 void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { 173 const MCInstrDesc *MCID = DAG->getInstrDesc(SU); 174 if (MCID) { 175 if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { 176 CurGroup.clear(); 177 CurSlots = CurBranches = 0; 178 } else { 179 DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << 180 SU->NodeNum << "): "); 181 DEBUG(DAG->dumpNode(SU)); 182 183 unsigned NSlots; 184 bool MustBeFirst = mustComeFirst(MCID, NSlots); 185 186 // If this instruction must come first, but does not, then it starts a 187 // new group. 188 if (MustBeFirst && CurSlots) { 189 CurSlots = CurBranches = 0; 190 CurGroup.clear(); 191 } 192 193 CurSlots += NSlots; 194 CurGroup.push_back(SU); 195 196 if (MCID->isBranch()) 197 ++CurBranches; 198 } 199 } 200 201 return ScoreboardHazardRecognizer::EmitInstruction(SU); 202 } 203 204 void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { 205 return ScoreboardHazardRecognizer::AdvanceCycle(); 206 } 207 208 void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { 209 llvm_unreachable("Bottom-up scheduling not supported"); 210 } 211 212 void PPCDispatchGroupSBHazardRecognizer::Reset() { 213 CurGroup.clear(); 214 CurSlots = CurBranches = 0; 215 return ScoreboardHazardRecognizer::Reset(); 216 } 217 218 void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { 219 unsigned Directive = 220 DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); 221 // If the group has now filled all of its slots, or if we're using a special 222 // group-terminating nop, the group is complete. 223 if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || 224 CurSlots == 6) { 225 CurGroup.clear(); 226 CurSlots = CurBranches = 0; 227 } else { 228 CurGroup.push_back(0); 229 ++CurSlots; 230 } 231 } 232 233 //===----------------------------------------------------------------------===// 234 // PowerPC 970 Hazard Recognizer 235 // 236 // This models the dispatch group formation of the PPC970 processor. Dispatch 237 // groups are bundles of up to five instructions that can contain various mixes 238 // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one 239 // branch instruction per-cycle. 240 // 241 // There are a number of restrictions to dispatch group formation: some 242 // instructions can only be issued in the first slot of a dispatch group, & some 243 // instructions fill an entire dispatch group. Additionally, only branches can 244 // issue in the 5th (last) slot. 245 // 246 // Finally, there are a number of "structural" hazards on the PPC970. These 247 // conditions cause large performance penalties due to misprediction, recovery, 248 // and replay logic that has to happen. These cases include setting a CTR and 249 // branching through it in the same dispatch group, and storing to an address, 250 // then loading from the same address within a dispatch group. To avoid these 251 // conditions, we insert no-op instructions when appropriate. 252 // 253 // FIXME: This is missing some significant cases: 254 // 1. Modeling of microcoded instructions. 255 // 2. Handling of serialized operations. 256 // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". 257 // 258 259 PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM) 260 : TM(TM) { 261 EndDispatchGroup(); 262 } 263 264 void PPCHazardRecognizer970::EndDispatchGroup() { 265 DEBUG(errs() << "=== Start of dispatch group\n"); 266 NumIssued = 0; 267 268 // Structural hazard info. 269 HasCTRSet = false; 270 NumStores = 0; 271 } 272 273 274 PPCII::PPC970_Unit 275 PPCHazardRecognizer970::GetInstrType(unsigned Opcode, 276 bool &isFirst, bool &isSingle, 277 bool &isCracked, 278 bool &isLoad, bool &isStore) { 279 const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode); 280 281 isLoad = MCID.mayLoad(); 282 isStore = MCID.mayStore(); 283 284 uint64_t TSFlags = MCID.TSFlags; 285 286 isFirst = TSFlags & PPCII::PPC970_First; 287 isSingle = TSFlags & PPCII::PPC970_Single; 288 isCracked = TSFlags & PPCII::PPC970_Cracked; 289 return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask); 290 } 291 292 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer 293 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. 294 bool PPCHazardRecognizer970:: 295 isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, 296 const Value *LoadValue) const { 297 for (unsigned i = 0, e = NumStores; i != e; ++i) { 298 // Handle exact and commuted addresses. 299 if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) 300 return true; 301 302 // Okay, we don't have an exact match, if this is an indexed offset, see if 303 // we have overlap (which happens during fp->int conversion for example). 304 if (StoreValue[i] == LoadValue) { 305 // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check 306 // to see if the load and store actually overlap. 307 if (StoreOffset[i] < LoadOffset) { 308 if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; 309 } else { 310 if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; 311 } 312 } 313 } 314 return false; 315 } 316 317 /// getHazardType - We return hazard for any non-branch instruction that would 318 /// terminate the dispatch group. We turn NoopHazard for any 319 /// instructions that wouldn't terminate the dispatch group that would cause a 320 /// pipeline flush. 321 ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: 322 getHazardType(SUnit *SU, int Stalls) { 323 assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); 324 325 MachineInstr *MI = SU->getInstr(); 326 327 if (MI->isDebugValue()) 328 return NoHazard; 329 330 unsigned Opcode = MI->getOpcode(); 331 bool isFirst, isSingle, isCracked, isLoad, isStore; 332 PPCII::PPC970_Unit InstrType = 333 GetInstrType(Opcode, isFirst, isSingle, isCracked, 334 isLoad, isStore); 335 if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; 336 337 // We can only issue a PPC970_First/PPC970_Single instruction (such as 338 // crand/mtspr/etc) if this is the first cycle of the dispatch group. 339 if (NumIssued != 0 && (isFirst || isSingle)) 340 return Hazard; 341 342 // If this instruction is cracked into two ops by the decoder, we know that 343 // it is not a branch and that it cannot issue if 3 other instructions are 344 // already in the dispatch group. 345 if (isCracked && NumIssued > 2) 346 return Hazard; 347 348 switch (InstrType) { 349 default: llvm_unreachable("Unknown instruction type!"); 350 case PPCII::PPC970_FXU: 351 case PPCII::PPC970_LSU: 352 case PPCII::PPC970_FPU: 353 case PPCII::PPC970_VALU: 354 case PPCII::PPC970_VPERM: 355 // We can only issue a branch as the last instruction in a group. 356 if (NumIssued == 4) return Hazard; 357 break; 358 case PPCII::PPC970_CRU: 359 // We can only issue a CR instruction in the first two slots. 360 if (NumIssued >= 2) return Hazard; 361 break; 362 case PPCII::PPC970_BRU: 363 break; 364 } 365 366 // Do not allow MTCTR and BCTRL to be in the same dispatch group. 367 if (HasCTRSet && Opcode == PPC::BCTRL) 368 return NoopHazard; 369 370 // If this is a load following a store, make sure it's not to the same or 371 // overlapping address. 372 if (isLoad && NumStores && !MI->memoperands_empty()) { 373 MachineMemOperand *MO = *MI->memoperands_begin(); 374 if (isLoadOfStoredAddress(MO->getSize(), 375 MO->getOffset(), MO->getValue())) 376 return NoopHazard; 377 } 378 379 return NoHazard; 380 } 381 382 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { 383 MachineInstr *MI = SU->getInstr(); 384 385 if (MI->isDebugValue()) 386 return; 387 388 unsigned Opcode = MI->getOpcode(); 389 bool isFirst, isSingle, isCracked, isLoad, isStore; 390 PPCII::PPC970_Unit InstrType = 391 GetInstrType(Opcode, isFirst, isSingle, isCracked, 392 isLoad, isStore); 393 if (InstrType == PPCII::PPC970_Pseudo) return; 394 395 // Update structural hazard information. 396 if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; 397 398 // Track the address stored to. 399 if (isStore && NumStores < 4 && !MI->memoperands_empty()) { 400 MachineMemOperand *MO = *MI->memoperands_begin(); 401 StoreSize[NumStores] = MO->getSize(); 402 StoreOffset[NumStores] = MO->getOffset(); 403 StoreValue[NumStores] = MO->getValue(); 404 ++NumStores; 405 } 406 407 if (InstrType == PPCII::PPC970_BRU || isSingle) 408 NumIssued = 4; // Terminate a d-group. 409 ++NumIssued; 410 411 // If this instruction is cracked into two ops by the decoder, remember that 412 // we issued two pieces. 413 if (isCracked) 414 ++NumIssued; 415 416 if (NumIssued == 5) 417 EndDispatchGroup(); 418 } 419 420 void PPCHazardRecognizer970::AdvanceCycle() { 421 assert(NumIssued < 5 && "Illegal dispatch group!"); 422 ++NumIssued; 423 if (NumIssued == 5) 424 EndDispatchGroup(); 425 } 426 427 void PPCHazardRecognizer970::Reset() { 428 EndDispatchGroup(); 429 } 430 431