1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/CodeGen/MachineTraceMetrics.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/ADT/DenseMap.h" 12 #include "llvm/ADT/PostOrderIterator.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/SparseSet.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/MachineOperand.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/TargetRegisterInfo.h" 24 #include "llvm/CodeGen/TargetSchedule.h" 25 #include "llvm/CodeGen/TargetSubtargetInfo.h" 26 #include "llvm/InitializePasses.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Format.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "machine-trace-metrics" 36 37 char MachineTraceMetrics::ID = 0; 38 39 char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; 40 41 INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, 42 "Machine Trace Metrics", false, true) 43 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) 44 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 45 INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE, 46 "Machine Trace Metrics", false, true) 47 48 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) { 49 std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); 50 } 51 52 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { 53 AU.setPreservesAll(); 54 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); 55 AU.addRequired<MachineLoopInfoWrapperPass>(); 56 MachineFunctionPass::getAnalysisUsage(AU); 57 } 58 59 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { 60 MF = &Func; 61 const TargetSubtargetInfo &ST = MF->getSubtarget(); 62 TII = ST.getInstrInfo(); 63 TRI = ST.getRegisterInfo(); 64 MRI = &MF->getRegInfo(); 65 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 66 SchedModel.init(&ST); 67 BlockInfo.resize(MF->getNumBlockIDs()); 68 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() * 69 SchedModel.getNumProcResourceKinds()); 70 return false; 71 } 72 73 void MachineTraceMetrics::releaseMemory() { 74 MF = nullptr; 75 BlockInfo.clear(); 76 for (Ensemble *&E : Ensembles) { 77 delete E; 78 E = nullptr; 79 } 80 } 81 82 //===----------------------------------------------------------------------===// 83 // Fixed block information 84 //===----------------------------------------------------------------------===// 85 // 86 // The number of instructions in a basic block and the CPU resources used by 87 // those instructions don't depend on any given trace strategy. 88 89 /// Compute the resource usage in basic block MBB. 90 const MachineTraceMetrics::FixedBlockInfo* 91 MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { 92 assert(MBB && "No basic block"); 93 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; 94 if (FBI->hasResources()) 95 return FBI; 96 97 // Compute resource usage in the block. 98 FBI->HasCalls = false; 99 unsigned InstrCount = 0; 100 101 // Add up per-processor resource cycles as well. 102 unsigned PRKinds = SchedModel.getNumProcResourceKinds(); 103 SmallVector<unsigned, 32> PRCycles(PRKinds); 104 105 for (const auto &MI : *MBB) { 106 if (MI.isTransient()) 107 continue; 108 ++InstrCount; 109 if (MI.isCall()) 110 FBI->HasCalls = true; 111 112 // Count processor resources used. 113 if (!SchedModel.hasInstrSchedModel()) 114 continue; 115 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); 116 if (!SC->isValid()) 117 continue; 118 119 for (TargetSchedModel::ProcResIter 120 PI = SchedModel.getWriteProcResBegin(SC), 121 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { 122 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); 123 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle; 124 } 125 } 126 FBI->InstrCount = InstrCount; 127 128 // Scale the resource cycles so they are comparable. 129 unsigned PROffset = MBB->getNumber() * PRKinds; 130 for (unsigned K = 0; K < PRKinds; ++K) 131 ProcReleaseAtCycles[PROffset + K] = 132 PRCycles[K] * SchedModel.getResourceFactor(K); 133 134 return FBI; 135 } 136 137 ArrayRef<unsigned> 138 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const { 139 assert(BlockInfo[MBBNum].hasResources() && 140 "getResources() must be called before getProcReleaseAtCycles()"); 141 unsigned PRKinds = SchedModel.getNumProcResourceKinds(); 142 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size()); 143 return ArrayRef{ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds}; 144 } 145 146 //===----------------------------------------------------------------------===// 147 // Ensemble utility functions 148 //===----------------------------------------------------------------------===// 149 150 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *CT) : MTM(*CT) { 151 BlockInfo.resize(MTM.BlockInfo.size()); 152 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 153 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); 154 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); 155 } 156 157 // Virtual destructor serves as an anchor. 158 MachineTraceMetrics::Ensemble::~Ensemble() = default; 159 160 const MachineLoop* 161 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { 162 return MTM.Loops->getLoopFor(MBB); 163 } 164 165 // Update resource-related information in the TraceBlockInfo for MBB. 166 // Only update resources related to the trace above MBB. 167 void MachineTraceMetrics::Ensemble:: 168 computeDepthResources(const MachineBasicBlock *MBB) { 169 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 170 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 171 unsigned PROffset = MBB->getNumber() * PRKinds; 172 173 // Compute resources from trace above. The top block is simple. 174 if (!TBI->Pred) { 175 TBI->InstrDepth = 0; 176 TBI->Head = MBB->getNumber(); 177 std::fill(ProcResourceDepths.begin() + PROffset, 178 ProcResourceDepths.begin() + PROffset + PRKinds, 0); 179 return; 180 } 181 182 // Compute from the block above. A post-order traversal ensures the 183 // predecessor is always computed first. 184 unsigned PredNum = TBI->Pred->getNumber(); 185 TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; 186 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); 187 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); 188 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; 189 TBI->Head = PredTBI->Head; 190 191 // Compute per-resource depths. 192 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); 193 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum); 194 for (unsigned K = 0; K < PRKinds; ++K) 195 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; 196 } 197 198 // Update resource-related information in the TraceBlockInfo for MBB. 199 // Only update resources related to the trace below MBB. 200 void MachineTraceMetrics::Ensemble:: 201 computeHeightResources(const MachineBasicBlock *MBB) { 202 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 203 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 204 unsigned PROffset = MBB->getNumber() * PRKinds; 205 206 // Compute resources for the current block. 207 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; 208 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber()); 209 210 // The trace tail is done. 211 if (!TBI->Succ) { 212 TBI->Tail = MBB->getNumber(); 213 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset); 214 return; 215 } 216 217 // Compute from the block below. A post-order traversal ensures the 218 // predecessor is always computed first. 219 unsigned SuccNum = TBI->Succ->getNumber(); 220 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; 221 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); 222 TBI->InstrHeight += SuccTBI->InstrHeight; 223 TBI->Tail = SuccTBI->Tail; 224 225 // Compute per-resource heights. 226 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); 227 for (unsigned K = 0; K < PRKinds; ++K) 228 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; 229 } 230 231 // Check if depth resources for MBB are valid and return the TBI. 232 // Return NULL if the resources have been invalidated. 233 const MachineTraceMetrics::TraceBlockInfo* 234 MachineTraceMetrics::Ensemble:: 235 getDepthResources(const MachineBasicBlock *MBB) const { 236 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 237 return TBI->hasValidDepth() ? TBI : nullptr; 238 } 239 240 // Check if height resources for MBB are valid and return the TBI. 241 // Return NULL if the resources have been invalidated. 242 const MachineTraceMetrics::TraceBlockInfo* 243 MachineTraceMetrics::Ensemble:: 244 getHeightResources(const MachineBasicBlock *MBB) const { 245 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 246 return TBI->hasValidHeight() ? TBI : nullptr; 247 } 248 249 /// Get an array of processor resource depths for MBB. Indexed by processor 250 /// resource kind, this array contains the scaled processor resources consumed 251 /// by all blocks preceding MBB in its trace. It does not include instructions 252 /// in MBB. 253 /// 254 /// Compare TraceBlockInfo::InstrDepth. 255 ArrayRef<unsigned> 256 MachineTraceMetrics::Ensemble:: 257 getProcResourceDepths(unsigned MBBNum) const { 258 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 259 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); 260 return ArrayRef{ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds}; 261 } 262 263 /// Get an array of processor resource heights for MBB. Indexed by processor 264 /// resource kind, this array contains the scaled processor resources consumed 265 /// by this block and all blocks following it in its trace. 266 /// 267 /// Compare TraceBlockInfo::InstrHeight. 268 ArrayRef<unsigned> 269 MachineTraceMetrics::Ensemble:: 270 getProcResourceHeights(unsigned MBBNum) const { 271 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 272 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); 273 return ArrayRef{ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds}; 274 } 275 276 //===----------------------------------------------------------------------===// 277 // Trace Selection Strategies 278 //===----------------------------------------------------------------------===// 279 // 280 // A trace selection strategy is implemented as a sub-class of Ensemble. The 281 // trace through a block B is computed by two DFS traversals of the CFG 282 // starting from B. One upwards, and one downwards. During the upwards DFS, 283 // pickTracePred() is called on the post-ordered blocks. During the downwards 284 // DFS, pickTraceSucc() is called in a post-order. 285 // 286 287 // We never allow traces that leave loops, but we do allow traces to enter 288 // nested loops. We also never allow traces to contain back-edges. 289 // 290 // This means that a loop header can never appear above the center block of a 291 // trace, except as the trace head. Below the center block, loop exiting edges 292 // are banned. 293 // 294 // Return true if an edge from the From loop to the To loop is leaving a loop. 295 // Either of To and From can be null. 296 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { 297 return From && !From->contains(To); 298 } 299 300 // MinInstrCountEnsemble - Pick the trace that executes the least number of 301 // instructions. 302 namespace { 303 304 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { 305 const char *getName() const override { return "MinInstr"; } 306 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; 307 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; 308 309 public: 310 MinInstrCountEnsemble(MachineTraceMetrics *MTM) 311 : MachineTraceMetrics::Ensemble(MTM) {} 312 }; 313 314 /// Pick only the current basic block for the trace and do not choose any 315 /// predecessors/successors. 316 class LocalEnsemble : public MachineTraceMetrics::Ensemble { 317 const char *getName() const override { return "Local"; } 318 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override { 319 return nullptr; 320 }; 321 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override { 322 return nullptr; 323 }; 324 325 public: 326 LocalEnsemble(MachineTraceMetrics *MTM) 327 : MachineTraceMetrics::Ensemble(MTM) {} 328 }; 329 } // end anonymous namespace 330 331 // Select the preferred predecessor for MBB. 332 const MachineBasicBlock* 333 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { 334 if (MBB->pred_empty()) 335 return nullptr; 336 const MachineLoop *CurLoop = getLoopFor(MBB); 337 // Don't leave loops, and never follow back-edges. 338 if (CurLoop && MBB == CurLoop->getHeader()) 339 return nullptr; 340 unsigned CurCount = MTM.getResources(MBB)->InstrCount; 341 const MachineBasicBlock *Best = nullptr; 342 unsigned BestDepth = 0; 343 for (const MachineBasicBlock *Pred : MBB->predecessors()) { 344 const MachineTraceMetrics::TraceBlockInfo *PredTBI = 345 getDepthResources(Pred); 346 // Ignore cycles that aren't natural loops. 347 if (!PredTBI) 348 continue; 349 // Pick the predecessor that would give this block the smallest InstrDepth. 350 unsigned Depth = PredTBI->InstrDepth + CurCount; 351 if (!Best || Depth < BestDepth) { 352 Best = Pred; 353 BestDepth = Depth; 354 } 355 } 356 return Best; 357 } 358 359 // Select the preferred successor for MBB. 360 const MachineBasicBlock* 361 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { 362 if (MBB->succ_empty()) 363 return nullptr; 364 const MachineLoop *CurLoop = getLoopFor(MBB); 365 const MachineBasicBlock *Best = nullptr; 366 unsigned BestHeight = 0; 367 for (const MachineBasicBlock *Succ : MBB->successors()) { 368 // Don't consider back-edges. 369 if (CurLoop && Succ == CurLoop->getHeader()) 370 continue; 371 // Don't consider successors exiting CurLoop. 372 if (isExitingLoop(CurLoop, getLoopFor(Succ))) 373 continue; 374 const MachineTraceMetrics::TraceBlockInfo *SuccTBI = 375 getHeightResources(Succ); 376 // Ignore cycles that aren't natural loops. 377 if (!SuccTBI) 378 continue; 379 // Pick the successor that would give this block the smallest InstrHeight. 380 unsigned Height = SuccTBI->InstrHeight; 381 if (!Best || Height < BestHeight) { 382 Best = Succ; 383 BestHeight = Height; 384 } 385 } 386 return Best; 387 } 388 389 // Get an Ensemble sub-class for the requested trace strategy. 390 MachineTraceMetrics::Ensemble * 391 MachineTraceMetrics::getEnsemble(MachineTraceStrategy Strategy) { 392 assert(Strategy < MachineTraceStrategy::TS_NumStrategies && 393 "Invalid trace strategy enum"); 394 Ensemble *&E = Ensembles[static_cast<size_t>(Strategy)]; 395 if (E) 396 return E; 397 398 // Allocate new Ensemble on demand. 399 switch (Strategy) { 400 case MachineTraceStrategy::TS_MinInstrCount: 401 return (E = new MinInstrCountEnsemble(this)); 402 case MachineTraceStrategy::TS_Local: 403 return (E = new LocalEnsemble(this)); 404 default: llvm_unreachable("Invalid trace strategy enum"); 405 } 406 } 407 408 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { 409 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) 410 << '\n'); 411 BlockInfo[MBB->getNumber()].invalidate(); 412 for (Ensemble *E : Ensembles) 413 if (E) 414 E->invalidate(MBB); 415 } 416 417 void MachineTraceMetrics::verifyAnalysis() const { 418 if (!MF) 419 return; 420 #ifndef NDEBUG 421 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); 422 for (Ensemble *E : Ensembles) 423 if (E) 424 E->verify(); 425 #endif 426 } 427 428 //===----------------------------------------------------------------------===// 429 // Trace building 430 //===----------------------------------------------------------------------===// 431 // 432 // Traces are built by two CFG traversals. To avoid recomputing too much, use a 433 // set abstraction that confines the search to the current loop, and doesn't 434 // revisit blocks. 435 436 namespace { 437 438 struct LoopBounds { 439 MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; 440 SmallPtrSet<const MachineBasicBlock*, 8> Visited; 441 const MachineLoopInfo *Loops; 442 bool Downward = false; 443 444 LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks, 445 const MachineLoopInfo *Loops) 446 : Blocks(Blocks), Loops(Loops) {} 447 }; 448 449 } // end anonymous namespace 450 451 // Specialize po_iterator_storage in order to prune the post-order traversal so 452 // it is limited to the current loop and doesn't traverse the loop back edges. 453 namespace llvm { 454 455 template<> 456 class po_iterator_storage<LoopBounds, true> { 457 LoopBounds &LB; 458 459 public: 460 po_iterator_storage(LoopBounds &LB) : LB(LB) {} 461 462 void finishPostorder(const MachineBasicBlock*) {} 463 464 bool insertEdge(std::optional<const MachineBasicBlock *> From, 465 const MachineBasicBlock *To) { 466 // Skip already visited To blocks. 467 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; 468 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) 469 return false; 470 // From is null once when To is the trace center block. 471 if (From) { 472 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) { 473 // Don't follow backedges, don't leave FromLoop when going upwards. 474 if ((LB.Downward ? To : *From) == FromLoop->getHeader()) 475 return false; 476 // Don't leave FromLoop. 477 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) 478 return false; 479 } 480 } 481 // To is a new block. Mark the block as visited in case the CFG has cycles 482 // that MachineLoopInfo didn't recognize as a natural loop. 483 return LB.Visited.insert(To).second; 484 } 485 }; 486 487 } // end namespace llvm 488 489 /// Compute the trace through MBB. 490 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { 491 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " 492 << printMBBReference(*MBB) << '\n'); 493 // Set up loop bounds for the backwards post-order traversal. 494 LoopBounds Bounds(BlockInfo, MTM.Loops); 495 496 // Run an upwards post-order search for the trace start. 497 Bounds.Downward = false; 498 Bounds.Visited.clear(); 499 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) { 500 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); 501 TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; 502 // All the predecessors have been visited, pick the preferred one. 503 TBI.Pred = pickTracePred(I); 504 LLVM_DEBUG({ 505 if (TBI.Pred) 506 dbgs() << printMBBReference(*TBI.Pred) << '\n'; 507 else 508 dbgs() << "null\n"; 509 }); 510 // The trace leading to I is now known, compute the depth resources. 511 computeDepthResources(I); 512 } 513 514 // Run a downwards post-order search for the trace end. 515 Bounds.Downward = true; 516 Bounds.Visited.clear(); 517 for (const auto *I : post_order_ext(MBB, Bounds)) { 518 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); 519 TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; 520 // All the successors have been visited, pick the preferred one. 521 TBI.Succ = pickTraceSucc(I); 522 LLVM_DEBUG({ 523 if (TBI.Succ) 524 dbgs() << printMBBReference(*TBI.Succ) << '\n'; 525 else 526 dbgs() << "null\n"; 527 }); 528 // The trace leaving I is now known, compute the height resources. 529 computeHeightResources(I); 530 } 531 } 532 533 /// Invalidate traces through BadMBB. 534 void 535 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { 536 SmallVector<const MachineBasicBlock*, 16> WorkList; 537 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; 538 539 // Invalidate height resources of blocks above MBB. 540 if (BadTBI.hasValidHeight()) { 541 BadTBI.invalidateHeight(); 542 WorkList.push_back(BadMBB); 543 while (!WorkList.empty()) { 544 const MachineBasicBlock *MBB = WorkList.pop_back_val(); 545 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' 546 << getName() << " height.\n"); 547 // Find any MBB predecessors that have MBB as their preferred successor. 548 // They are the only ones that need to be invalidated. 549 for (const MachineBasicBlock *Pred : MBB->predecessors()) { 550 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()]; 551 if (!TBI.hasValidHeight()) 552 continue; 553 if (TBI.Succ == MBB) { 554 TBI.invalidateHeight(); 555 WorkList.push_back(Pred); 556 continue; 557 } 558 // Verify that TBI.Succ is actually a *I successor. 559 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed"); 560 } 561 } 562 } 563 564 // Invalidate depth resources of blocks below MBB. 565 if (BadTBI.hasValidDepth()) { 566 BadTBI.invalidateDepth(); 567 WorkList.push_back(BadMBB); 568 while (!WorkList.empty()) { 569 const MachineBasicBlock *MBB = WorkList.pop_back_val(); 570 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' 571 << getName() << " depth.\n"); 572 // Find any MBB successors that have MBB as their preferred predecessor. 573 // They are the only ones that need to be invalidated. 574 for (const MachineBasicBlock *Succ : MBB->successors()) { 575 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()]; 576 if (!TBI.hasValidDepth()) 577 continue; 578 if (TBI.Pred == MBB) { 579 TBI.invalidateDepth(); 580 WorkList.push_back(Succ); 581 continue; 582 } 583 // Verify that TBI.Pred is actually a *I predecessor. 584 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed"); 585 } 586 } 587 } 588 589 // Clear any per-instruction data. We only have to do this for BadMBB itself 590 // because the instructions in that block may change. Other blocks may be 591 // invalidated, but their instructions will stay the same, so there is no 592 // need to erase the Cycle entries. They will be overwritten when we 593 // recompute. 594 for (const auto &I : *BadMBB) 595 Cycles.erase(&I); 596 } 597 598 void MachineTraceMetrics::Ensemble::verify() const { 599 #ifndef NDEBUG 600 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && 601 "Outdated BlockInfo size"); 602 for (unsigned Num = 0; Num < BlockInfo.size(); ++Num) { 603 const TraceBlockInfo &TBI = BlockInfo[Num]; 604 if (TBI.hasValidDepth() && TBI.Pred) { 605 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); 606 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); 607 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && 608 "Trace is broken, depth should have been invalidated."); 609 const MachineLoop *Loop = getLoopFor(MBB); 610 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); 611 } 612 if (TBI.hasValidHeight() && TBI.Succ) { 613 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); 614 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); 615 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && 616 "Trace is broken, height should have been invalidated."); 617 const MachineLoop *Loop = getLoopFor(MBB); 618 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); 619 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && 620 "Trace contains backedge"); 621 } 622 } 623 #endif 624 } 625 626 //===----------------------------------------------------------------------===// 627 // Data Dependencies 628 //===----------------------------------------------------------------------===// 629 // 630 // Compute the depth and height of each instruction based on data dependencies 631 // and instruction latencies. These cycle numbers assume that the CPU can issue 632 // an infinite number of instructions per cycle as long as their dependencies 633 // are ready. 634 635 // A data dependency is represented as a defining MI and operand numbers on the 636 // defining and using MI. 637 namespace { 638 639 struct DataDep { 640 const MachineInstr *DefMI; 641 unsigned DefOp; 642 unsigned UseOp; 643 644 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) 645 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} 646 647 /// Create a DataDep from an SSA form virtual register. 648 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) 649 : UseOp(UseOp) { 650 assert(Register::isVirtualRegister(VirtReg)); 651 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); 652 assert(!DefI.atEnd() && "Register has no defs"); 653 DefMI = DefI->getParent(); 654 DefOp = DefI.getOperandNo(); 655 assert((++DefI).atEnd() && "Register has multiple defs"); 656 } 657 }; 658 659 } // end anonymous namespace 660 661 // Get the input data dependencies that must be ready before UseMI can issue. 662 // Return true if UseMI has any physreg operands. 663 static bool getDataDeps(const MachineInstr &UseMI, 664 SmallVectorImpl<DataDep> &Deps, 665 const MachineRegisterInfo *MRI) { 666 // Debug values should not be included in any calculations. 667 if (UseMI.isDebugInstr()) 668 return false; 669 670 bool HasPhysRegs = false; 671 for (const MachineOperand &MO : UseMI.operands()) { 672 if (!MO.isReg()) 673 continue; 674 Register Reg = MO.getReg(); 675 if (!Reg) 676 continue; 677 if (Reg.isPhysical()) { 678 HasPhysRegs = true; 679 continue; 680 } 681 // Collect virtual register reads. 682 if (MO.readsReg()) 683 Deps.emplace_back(MRI, Reg, MO.getOperandNo()); 684 } 685 return HasPhysRegs; 686 } 687 688 // Get the input data dependencies of a PHI instruction, using Pred as the 689 // preferred predecessor. 690 // This will add at most one dependency to Deps. 691 static void getPHIDeps(const MachineInstr &UseMI, 692 SmallVectorImpl<DataDep> &Deps, 693 const MachineBasicBlock *Pred, 694 const MachineRegisterInfo *MRI) { 695 // No predecessor at the beginning of a trace. Ignore dependencies. 696 if (!Pred) 697 return; 698 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); 699 for (unsigned Idx = 1; Idx < UseMI.getNumOperands(); Idx += 2) { 700 if (UseMI.getOperand(Idx + 1).getMBB() == Pred) { 701 Register Reg = UseMI.getOperand(Idx).getReg(); 702 Deps.emplace_back(MRI, Reg, Idx); 703 return; 704 } 705 } 706 } 707 708 // Identify physreg dependencies for UseMI, and update the live regunit 709 // tracking set when scanning instructions downwards. 710 static void updatePhysDepsDownwards(const MachineInstr *UseMI, 711 SmallVectorImpl<DataDep> &Deps, 712 SparseSet<LiveRegUnit> &RegUnits, 713 const TargetRegisterInfo *TRI) { 714 SmallVector<MCRegister, 8> Kills; 715 SmallVector<unsigned, 8> LiveDefOps; 716 717 for (const MachineOperand &MO : UseMI->operands()) { 718 if (!MO.isReg() || !MO.getReg().isPhysical()) 719 continue; 720 MCRegister Reg = MO.getReg().asMCReg(); 721 // Track live defs and kills for updating RegUnits. 722 if (MO.isDef()) { 723 if (MO.isDead()) 724 Kills.push_back(Reg); 725 else 726 LiveDefOps.push_back(MO.getOperandNo()); 727 } else if (MO.isKill()) 728 Kills.push_back(Reg); 729 // Identify dependencies. 730 if (!MO.readsReg()) 731 continue; 732 for (MCRegUnit Unit : TRI->regunits(Reg)) { 733 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit); 734 if (I == RegUnits.end()) 735 continue; 736 Deps.emplace_back(I->MI, I->Op, MO.getOperandNo()); 737 break; 738 } 739 } 740 741 // Update RegUnits to reflect live registers after UseMI. 742 // First kills. 743 for (MCRegister Kill : Kills) 744 for (MCRegUnit Unit : TRI->regunits(Kill)) 745 RegUnits.erase(Unit); 746 747 // Second, live defs. 748 for (unsigned DefOp : LiveDefOps) { 749 for (MCRegUnit Unit : 750 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) { 751 LiveRegUnit &LRU = RegUnits[Unit]; 752 LRU.MI = UseMI; 753 LRU.Op = DefOp; 754 } 755 } 756 } 757 758 /// The length of the critical path through a trace is the maximum of two path 759 /// lengths: 760 /// 761 /// 1. The maximum height+depth over all instructions in the trace center block. 762 /// 763 /// 2. The longest cross-block dependency chain. For small blocks, it is 764 /// possible that the critical path through the trace doesn't include any 765 /// instructions in the block. 766 /// 767 /// This function computes the second number from the live-in list of the 768 /// center block. 769 unsigned MachineTraceMetrics::Ensemble:: 770 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { 771 assert(TBI.HasValidInstrDepths && "Missing depth info"); 772 assert(TBI.HasValidInstrHeights && "Missing height info"); 773 unsigned MaxLen = 0; 774 for (const LiveInReg &LIR : TBI.LiveIns) { 775 if (!LIR.Reg.isVirtual()) 776 continue; 777 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); 778 // Ignore dependencies outside the current trace. 779 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; 780 if (!DefTBI.isUsefulDominator(TBI)) 781 continue; 782 unsigned Len = LIR.Height + Cycles[DefMI].Depth; 783 MaxLen = std::max(MaxLen, Len); 784 } 785 return MaxLen; 786 } 787 788 void MachineTraceMetrics::Ensemble:: 789 updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, 790 SparseSet<LiveRegUnit> &RegUnits) { 791 SmallVector<DataDep, 8> Deps; 792 // Collect all data dependencies. 793 if (UseMI.isPHI()) 794 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); 795 else if (getDataDeps(UseMI, Deps, MTM.MRI)) 796 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); 797 798 // Filter and process dependencies, computing the earliest issue cycle. 799 unsigned Cycle = 0; 800 for (const DataDep &Dep : Deps) { 801 const TraceBlockInfo&DepTBI = 802 BlockInfo[Dep.DefMI->getParent()->getNumber()]; 803 // Ignore dependencies from outside the current trace. 804 if (!DepTBI.isUsefulDominator(TBI)) 805 continue; 806 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); 807 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; 808 // Add latency if DefMI is a real instruction. Transients get latency 0. 809 if (!Dep.DefMI->isTransient()) 810 DepCycle += MTM.SchedModel 811 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); 812 Cycle = std::max(Cycle, DepCycle); 813 } 814 // Remember the instruction depth. 815 InstrCycles &MICycles = Cycles[&UseMI]; 816 MICycles.Depth = Cycle; 817 818 if (TBI.HasValidInstrHeights) { 819 // Update critical path length. 820 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); 821 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); 822 } else { 823 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI); 824 } 825 } 826 827 void MachineTraceMetrics::Ensemble:: 828 updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI, 829 SparseSet<LiveRegUnit> &RegUnits) { 830 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits); 831 } 832 833 void MachineTraceMetrics::Ensemble:: 834 updateDepths(MachineBasicBlock::iterator Start, 835 MachineBasicBlock::iterator End, 836 SparseSet<LiveRegUnit> &RegUnits) { 837 for (; Start != End; Start++) 838 updateDepth(Start->getParent(), *Start, RegUnits); 839 } 840 841 /// Compute instruction depths for all instructions above or in MBB in its 842 /// trace. This assumes that the trace through MBB has already been computed. 843 void MachineTraceMetrics::Ensemble:: 844 computeInstrDepths(const MachineBasicBlock *MBB) { 845 // The top of the trace may already be computed, and HasValidInstrDepths 846 // implies Head->HasValidInstrDepths, so we only need to start from the first 847 // block in the trace that needs to be recomputed. 848 SmallVector<const MachineBasicBlock*, 8> Stack; 849 while (MBB) { 850 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 851 assert(TBI.hasValidDepth() && "Incomplete trace"); 852 if (TBI.HasValidInstrDepths) 853 break; 854 Stack.push_back(MBB); 855 MBB = TBI.Pred; 856 } 857 858 // FIXME: If MBB is non-null at this point, it is the last pre-computed block 859 // in the trace. We should track any live-out physregs that were defined in 860 // the trace. This is quite rare in SSA form, typically created by CSE 861 // hoisting a compare. 862 SparseSet<LiveRegUnit> RegUnits; 863 RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); 864 865 // Go through trace blocks in top-down order, stopping after the center block. 866 while (!Stack.empty()) { 867 MBB = Stack.pop_back_val(); 868 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); 869 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 870 TBI.HasValidInstrDepths = true; 871 TBI.CriticalPath = 0; 872 873 // Print out resource depths here as well. 874 LLVM_DEBUG({ 875 dbgs() << format("%7u Instructions\n", TBI.InstrDepth); 876 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); 877 for (unsigned K = 0; K < PRDepths.size(); ++K) 878 if (PRDepths[K]) { 879 unsigned Factor = MTM.SchedModel.getResourceFactor(K); 880 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) 881 << MTM.SchedModel.getProcResource(K)->Name << " (" 882 << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; 883 } 884 }); 885 886 // Also compute the critical path length through MBB when possible. 887 if (TBI.HasValidInstrHeights) 888 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); 889 890 for (const auto &UseMI : *MBB) { 891 updateDepth(TBI, UseMI, RegUnits); 892 } 893 } 894 } 895 896 // Identify physreg dependencies for MI when scanning instructions upwards. 897 // Return the issue height of MI after considering any live regunits. 898 // Height is the issue height computed from virtual register dependencies alone. 899 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, 900 SparseSet<LiveRegUnit> &RegUnits, 901 const TargetSchedModel &SchedModel, 902 const TargetInstrInfo *TII, 903 const TargetRegisterInfo *TRI) { 904 SmallVector<unsigned, 8> ReadOps; 905 906 for (const MachineOperand &MO : MI.operands()) { 907 if (!MO.isReg()) 908 continue; 909 Register Reg = MO.getReg(); 910 if (!Reg.isPhysical()) 911 continue; 912 if (MO.readsReg()) 913 ReadOps.push_back(MO.getOperandNo()); 914 if (!MO.isDef()) 915 continue; 916 // This is a def of Reg. Remove corresponding entries from RegUnits, and 917 // update MI Height to consider the physreg dependencies. 918 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) { 919 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit); 920 if (I == RegUnits.end()) 921 continue; 922 unsigned DepHeight = I->Cycle; 923 if (!MI.isTransient()) { 924 // We may not know the UseMI of this dependency, if it came from the 925 // live-in list. SchedModel can handle a NULL UseMI. 926 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(), 927 I->MI, I->Op); 928 } 929 Height = std::max(Height, DepHeight); 930 // This regunit is dead above MI. 931 RegUnits.erase(I); 932 } 933 } 934 935 // Now we know the height of MI. Update any regunits read. 936 for (unsigned Op : ReadOps) { 937 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg(); 938 for (MCRegUnit Unit : TRI->regunits(Reg)) { 939 LiveRegUnit &LRU = RegUnits[Unit]; 940 // Set the height to the highest reader of the unit. 941 if (LRU.Cycle <= Height && LRU.MI != &MI) { 942 LRU.Cycle = Height; 943 LRU.MI = &MI; 944 LRU.Op = Op; 945 } 946 } 947 } 948 949 return Height; 950 } 951 952 using MIHeightMap = DenseMap<const MachineInstr *, unsigned>; 953 954 // Push the height of DefMI upwards if required to match UseMI. 955 // Return true if this is the first time DefMI was seen. 956 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, 957 unsigned UseHeight, MIHeightMap &Heights, 958 const TargetSchedModel &SchedModel, 959 const TargetInstrInfo *TII) { 960 // Adjust height by Dep.DefMI latency. 961 if (!Dep.DefMI->isTransient()) 962 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, 963 Dep.UseOp); 964 965 // Update Heights[DefMI] to be the maximum height seen. 966 const auto &[I, Inserted] = Heights.insert({Dep.DefMI, UseHeight}); 967 if (Inserted) 968 return true; 969 970 // DefMI has been pushed before. Give it the max height. 971 if (I->second < UseHeight) 972 I->second = UseHeight; 973 return false; 974 } 975 976 /// Assuming that the virtual register defined by DefMI:DefOp was used by 977 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop 978 /// when reaching the block that contains DefMI. 979 void MachineTraceMetrics::Ensemble:: 980 addLiveIns(const MachineInstr *DefMI, unsigned DefOp, 981 ArrayRef<const MachineBasicBlock*> Trace) { 982 assert(!Trace.empty() && "Trace should contain at least one block"); 983 Register Reg = DefMI->getOperand(DefOp).getReg(); 984 assert(Reg.isVirtual()); 985 const MachineBasicBlock *DefMBB = DefMI->getParent(); 986 987 // Reg is live-in to all blocks in Trace that follow DefMBB. 988 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) { 989 if (MBB == DefMBB) 990 return; 991 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 992 // Just add the register. The height will be updated later. 993 TBI.LiveIns.push_back(Reg); 994 } 995 } 996 997 /// Compute instruction heights in the trace through MBB. This updates MBB and 998 /// the blocks below it in the trace. It is assumed that the trace has already 999 /// been computed. 1000 void MachineTraceMetrics::Ensemble:: 1001 computeInstrHeights(const MachineBasicBlock *MBB) { 1002 // The bottom of the trace may already be computed. 1003 // Find the blocks that need updating. 1004 SmallVector<const MachineBasicBlock*, 8> Stack; 1005 while (MBB) { 1006 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1007 assert(TBI.hasValidHeight() && "Incomplete trace"); 1008 if (TBI.HasValidInstrHeights) 1009 break; 1010 Stack.push_back(MBB); 1011 TBI.LiveIns.clear(); 1012 MBB = TBI.Succ; 1013 } 1014 1015 // As we move upwards in the trace, keep track of instructions that are 1016 // required by deeper trace instructions. Map MI -> height required so far. 1017 MIHeightMap Heights; 1018 1019 // For physregs, the def isn't known when we see the use. 1020 // Instead, keep track of the highest use of each regunit. 1021 SparseSet<LiveRegUnit> RegUnits; 1022 RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); 1023 1024 // If the bottom of the trace was already precomputed, initialize heights 1025 // from its live-in list. 1026 // MBB is the highest precomputed block in the trace. 1027 if (MBB) { 1028 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1029 for (LiveInReg &LI : TBI.LiveIns) { 1030 if (LI.Reg.isVirtual()) { 1031 // For virtual registers, the def latency is included. 1032 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; 1033 if (Height < LI.Height) 1034 Height = LI.Height; 1035 } else { 1036 // For register units, the def latency is not included because we don't 1037 // know the def yet. 1038 RegUnits[LI.Reg].Cycle = LI.Height; 1039 } 1040 } 1041 } 1042 1043 // Go through the trace blocks in bottom-up order. 1044 SmallVector<DataDep, 8> Deps; 1045 for (;!Stack.empty(); Stack.pop_back()) { 1046 MBB = Stack.back(); 1047 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); 1048 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1049 TBI.HasValidInstrHeights = true; 1050 TBI.CriticalPath = 0; 1051 1052 LLVM_DEBUG({ 1053 dbgs() << format("%7u Instructions\n", TBI.InstrHeight); 1054 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); 1055 for (unsigned K = 0; K < PRHeights.size(); ++K) 1056 if (PRHeights[K]) { 1057 unsigned Factor = MTM.SchedModel.getResourceFactor(K); 1058 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) 1059 << MTM.SchedModel.getProcResource(K)->Name << " (" 1060 << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; 1061 } 1062 }); 1063 1064 // Get dependencies from PHIs in the trace successor. 1065 const MachineBasicBlock *Succ = TBI.Succ; 1066 // If MBB is the last block in the trace, and it has a back-edge to the 1067 // loop header, get loop-carried dependencies from PHIs in the header. For 1068 // that purpose, pretend that all the loop header PHIs have height 0. 1069 if (!Succ) 1070 if (const MachineLoop *Loop = getLoopFor(MBB)) 1071 if (MBB->isSuccessor(Loop->getHeader())) 1072 Succ = Loop->getHeader(); 1073 1074 if (Succ) { 1075 for (const auto &PHI : *Succ) { 1076 if (!PHI.isPHI()) 1077 break; 1078 Deps.clear(); 1079 getPHIDeps(PHI, Deps, MBB, MTM.MRI); 1080 if (!Deps.empty()) { 1081 // Loop header PHI heights are all 0. 1082 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; 1083 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); 1084 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel, 1085 MTM.TII)) 1086 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); 1087 } 1088 } 1089 } 1090 1091 // Go through the block backwards. 1092 for (const MachineInstr &MI : reverse(*MBB)) { 1093 // Find the MI height as determined by virtual register uses in the 1094 // trace below. 1095 unsigned Cycle = 0; 1096 MIHeightMap::iterator HeightI = Heights.find(&MI); 1097 if (HeightI != Heights.end()) { 1098 Cycle = HeightI->second; 1099 // We won't be seeing any more MI uses. 1100 Heights.erase(HeightI); 1101 } 1102 1103 // Don't process PHI deps. They depend on the specific predecessor, and 1104 // we'll get them when visiting the predecessor. 1105 Deps.clear(); 1106 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI); 1107 1108 // There may also be regunit dependencies to include in the height. 1109 if (HasPhysRegs) 1110 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel, 1111 MTM.TII, MTM.TRI); 1112 1113 // Update the required height of any virtual registers read by MI. 1114 for (const DataDep &Dep : Deps) 1115 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) 1116 addLiveIns(Dep.DefMI, Dep.DefOp, Stack); 1117 1118 InstrCycles &MICycles = Cycles[&MI]; 1119 MICycles.Height = Cycle; 1120 if (!TBI.HasValidInstrDepths) { 1121 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI); 1122 continue; 1123 } 1124 // Update critical path length. 1125 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); 1126 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); 1127 } 1128 1129 // Update virtual live-in heights. They were added by addLiveIns() with a 0 1130 // height because the final height isn't known until now. 1131 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); 1132 for (LiveInReg &LIR : TBI.LiveIns) { 1133 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); 1134 LIR.Height = Heights.lookup(DefMI); 1135 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); 1136 } 1137 1138 // Transfer the live regunits to the live-in list. 1139 for (const LiveRegUnit &RU : RegUnits) { 1140 TBI.LiveIns.emplace_back(RU.RegUnit, RU.Cycle); 1141 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@' 1142 << RU.Cycle); 1143 } 1144 LLVM_DEBUG(dbgs() << '\n'); 1145 1146 if (!TBI.HasValidInstrDepths) 1147 continue; 1148 // Add live-ins to the critical path length. 1149 TBI.CriticalPath = std::max(TBI.CriticalPath, 1150 computeCrossBlockCriticalPath(TBI)); 1151 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); 1152 } 1153 } 1154 1155 MachineTraceMetrics::Trace 1156 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { 1157 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1158 1159 if (!TBI.hasValidDepth() || !TBI.hasValidHeight()) 1160 computeTrace(MBB); 1161 if (!TBI.HasValidInstrDepths) 1162 computeInstrDepths(MBB); 1163 if (!TBI.HasValidInstrHeights) 1164 computeInstrHeights(MBB); 1165 1166 return Trace(*this, TBI); 1167 } 1168 1169 unsigned 1170 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const { 1171 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) && 1172 "MI must be in the trace center block"); 1173 InstrCycles Cyc = getInstrCycles(MI); 1174 return getCriticalPath() - (Cyc.Depth + Cyc.Height); 1175 } 1176 1177 unsigned 1178 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const { 1179 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); 1180 SmallVector<DataDep, 1> Deps; 1181 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); 1182 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); 1183 DataDep &Dep = Deps.front(); 1184 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth; 1185 // Add latency if DefMI is a real instruction. Transients get latency 0. 1186 if (!Dep.DefMI->isTransient()) 1187 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, 1188 &PHI, Dep.UseOp); 1189 return DepCycle; 1190 } 1191 1192 /// When bottom is set include instructions in current block in estimate. 1193 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { 1194 // Find the limiting processor resource. 1195 // Numbers have been pre-scaled to be comparable. 1196 unsigned PRMax = 0; 1197 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); 1198 if (Bottom) { 1199 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum()); 1200 for (unsigned K = 0; K < PRDepths.size(); ++K) 1201 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); 1202 } else { 1203 for (unsigned PRD : PRDepths) 1204 PRMax = std::max(PRMax, PRD); 1205 } 1206 // Convert to cycle count. 1207 PRMax = TE.MTM.getCycles(PRMax); 1208 1209 /// All instructions before current block 1210 unsigned Instrs = TBI.InstrDepth; 1211 // plus instructions in current block 1212 if (Bottom) 1213 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; 1214 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) 1215 Instrs /= IW; 1216 // Assume issue width 1 without a schedule model. 1217 return std::max(Instrs, PRMax); 1218 } 1219 1220 unsigned MachineTraceMetrics::Trace::getResourceLength( 1221 ArrayRef<const MachineBasicBlock *> Extrablocks, 1222 ArrayRef<const MCSchedClassDesc *> ExtraInstrs, 1223 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const { 1224 // Add up resources above and below the center block. 1225 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); 1226 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); 1227 unsigned PRMax = 0; 1228 1229 // Capture computing cycles from extra instructions 1230 auto ExtraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs, 1231 unsigned ResourceIdx) -> unsigned { 1232 unsigned Cycles = 0; 1233 for (const MCSchedClassDesc *SC : Instrs) { 1234 if (!SC->isValid()) 1235 continue; 1236 for (TargetSchedModel::ProcResIter 1237 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), 1238 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); 1239 PI != PE; ++PI) { 1240 if (PI->ProcResourceIdx != ResourceIdx) 1241 continue; 1242 Cycles += (PI->ReleaseAtCycle * 1243 TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); 1244 } 1245 } 1246 return Cycles; 1247 }; 1248 1249 for (unsigned K = 0; K < PRDepths.size(); ++K) { 1250 unsigned PRCycles = PRDepths[K] + PRHeights[K]; 1251 for (const MachineBasicBlock *MBB : Extrablocks) 1252 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K]; 1253 PRCycles += ExtraCycles(ExtraInstrs, K); 1254 PRCycles -= ExtraCycles(RemoveInstrs, K); 1255 PRMax = std::max(PRMax, PRCycles); 1256 } 1257 // Convert to cycle count. 1258 PRMax = TE.MTM.getCycles(PRMax); 1259 1260 // Instrs: #instructions in current trace outside current block. 1261 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; 1262 // Add instruction count from the extra blocks. 1263 for (const MachineBasicBlock *MBB : Extrablocks) 1264 Instrs += TE.MTM.getResources(MBB)->InstrCount; 1265 Instrs += ExtraInstrs.size(); 1266 Instrs -= RemoveInstrs.size(); 1267 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) 1268 Instrs /= IW; 1269 // Assume issue width 1 without a schedule model. 1270 return std::max(Instrs, PRMax); 1271 } 1272 1273 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI, 1274 const MachineInstr &UseMI) const { 1275 if (DefMI.getParent() == UseMI.getParent()) 1276 return true; 1277 1278 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()]; 1279 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()]; 1280 1281 return DepTBI.isUsefulDominator(TBI); 1282 } 1283 1284 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { 1285 OS << getName() << " ensemble:\n"; 1286 for (unsigned Idx = 0; Idx < BlockInfo.size(); ++Idx) { 1287 OS << " %bb." << Idx << '\t'; 1288 BlockInfo[Idx].print(OS); 1289 OS << '\n'; 1290 } 1291 } 1292 1293 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { 1294 if (hasValidDepth()) { 1295 OS << "depth=" << InstrDepth; 1296 if (Pred) 1297 OS << " pred=" << printMBBReference(*Pred); 1298 else 1299 OS << " pred=null"; 1300 OS << " head=%bb." << Head; 1301 if (HasValidInstrDepths) 1302 OS << " +instrs"; 1303 } else 1304 OS << "depth invalid"; 1305 OS << ", "; 1306 if (hasValidHeight()) { 1307 OS << "height=" << InstrHeight; 1308 if (Succ) 1309 OS << " succ=" << printMBBReference(*Succ); 1310 else 1311 OS << " succ=null"; 1312 OS << " tail=%bb." << Tail; 1313 if (HasValidInstrHeights) 1314 OS << " +instrs"; 1315 } else 1316 OS << "height invalid"; 1317 if (HasValidInstrDepths && HasValidInstrHeights) 1318 OS << ", crit=" << CriticalPath; 1319 } 1320 1321 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { 1322 unsigned MBBNum = &TBI - &TE.BlockInfo[0]; 1323 1324 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum 1325 << " --> %bb." << TBI.Tail << ':'; 1326 if (TBI.hasValidHeight() && TBI.hasValidDepth()) 1327 OS << ' ' << getInstrCount() << " instrs."; 1328 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) 1329 OS << ' ' << TBI.CriticalPath << " cycles."; 1330 1331 const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; 1332 OS << "\n%bb." << MBBNum; 1333 while (Block->hasValidDepth() && Block->Pred) { 1334 unsigned Num = Block->Pred->getNumber(); 1335 OS << " <- " << printMBBReference(*Block->Pred); 1336 Block = &TE.BlockInfo[Num]; 1337 } 1338 1339 Block = &TBI; 1340 OS << "\n "; 1341 while (Block->hasValidHeight() && Block->Succ) { 1342 unsigned Num = Block->Succ->getNumber(); 1343 OS << " -> " << printMBBReference(*Block->Succ); 1344 Block = &TE.BlockInfo[Num]; 1345 } 1346 OS << '\n'; 1347 } 1348