1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/CodeGen/MachineTraceMetrics.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/ADT/DenseMap.h" 12 #include "llvm/ADT/PostOrderIterator.h" 13 #include "llvm/ADT/SmallPtrSet.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/SparseSet.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/MachineInstr.h" 19 #include "llvm/CodeGen/MachineLoopInfo.h" 20 #include "llvm/CodeGen/MachineOperand.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetRegisterInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/InitializePasses.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/Format.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <algorithm> 32 #include <cassert> 33 #include <tuple> 34 #include <utility> 35 36 using namespace llvm; 37 38 #define DEBUG_TYPE "machine-trace-metrics" 39 40 AnalysisKey MachineTraceMetricsAnalysis::Key; 41 42 MachineTraceMetricsAnalysis::Result 43 MachineTraceMetricsAnalysis::run(MachineFunction &MF, 44 MachineFunctionAnalysisManager &MFAM) { 45 return Result(MF, MFAM.getResult<MachineLoopAnalysis>(MF)); 46 } 47 48 PreservedAnalyses 49 MachineTraceMetricsVerifierPass::run(MachineFunction &MF, 50 MachineFunctionAnalysisManager &MFAM) { 51 MFAM.getResult<MachineTraceMetricsAnalysis>(MF).verifyAnalysis(); 52 return PreservedAnalyses::all(); 53 } 54 55 char MachineTraceMetricsWrapperPass::ID = 0; 56 57 char &llvm::MachineTraceMetricsID = MachineTraceMetricsWrapperPass::ID; 58 59 INITIALIZE_PASS_BEGIN(MachineTraceMetricsWrapperPass, DEBUG_TYPE, 60 "Machine Trace Metrics", false, true) 61 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 62 INITIALIZE_PASS_END(MachineTraceMetricsWrapperPass, DEBUG_TYPE, 63 "Machine Trace Metrics", false, true) 64 65 MachineTraceMetricsWrapperPass::MachineTraceMetricsWrapperPass() 66 : MachineFunctionPass(ID) {} 67 68 void MachineTraceMetricsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 69 AU.setPreservesAll(); 70 AU.addRequired<MachineLoopInfoWrapperPass>(); 71 MachineFunctionPass::getAnalysisUsage(AU); 72 } 73 74 void MachineTraceMetrics::init(MachineFunction &Func, 75 const MachineLoopInfo &LI) { 76 MF = &Func; 77 const TargetSubtargetInfo &ST = MF->getSubtarget(); 78 TII = ST.getInstrInfo(); 79 TRI = ST.getRegisterInfo(); 80 MRI = &MF->getRegInfo(); 81 Loops = &LI; 82 SchedModel.init(&ST); 83 BlockInfo.resize(MF->getNumBlockIDs()); 84 ProcReleaseAtCycles.resize(MF->getNumBlockIDs() * 85 SchedModel.getNumProcResourceKinds()); 86 } 87 88 bool MachineTraceMetricsWrapperPass::runOnMachineFunction(MachineFunction &MF) { 89 MTM.init(MF, getAnalysis<MachineLoopInfoWrapperPass>().getLI()); 90 return false; 91 } 92 93 MachineTraceMetrics::~MachineTraceMetrics() { clear(); } 94 95 void MachineTraceMetrics::clear() { 96 MF = nullptr; 97 BlockInfo.clear(); 98 for (auto &E : Ensembles) 99 E.reset(); 100 } 101 102 //===----------------------------------------------------------------------===// 103 // Fixed block information 104 //===----------------------------------------------------------------------===// 105 // 106 // The number of instructions in a basic block and the CPU resources used by 107 // those instructions don't depend on any given trace strategy. 108 109 /// Compute the resource usage in basic block MBB. 110 const MachineTraceMetrics::FixedBlockInfo* 111 MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { 112 assert(MBB && "No basic block"); 113 FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; 114 if (FBI->hasResources()) 115 return FBI; 116 117 // Compute resource usage in the block. 118 FBI->HasCalls = false; 119 unsigned InstrCount = 0; 120 121 // Add up per-processor resource cycles as well. 122 unsigned PRKinds = SchedModel.getNumProcResourceKinds(); 123 SmallVector<unsigned, 32> PRCycles(PRKinds); 124 125 for (const auto &MI : *MBB) { 126 if (MI.isTransient()) 127 continue; 128 ++InstrCount; 129 if (MI.isCall()) 130 FBI->HasCalls = true; 131 132 // Count processor resources used. 133 if (!SchedModel.hasInstrSchedModel()) 134 continue; 135 const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); 136 if (!SC->isValid()) 137 continue; 138 139 for (TargetSchedModel::ProcResIter 140 PI = SchedModel.getWriteProcResBegin(SC), 141 PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { 142 assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); 143 PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle; 144 } 145 } 146 FBI->InstrCount = InstrCount; 147 148 // Scale the resource cycles so they are comparable. 149 unsigned PROffset = MBB->getNumber() * PRKinds; 150 for (unsigned K = 0; K != PRKinds; ++K) 151 ProcReleaseAtCycles[PROffset + K] = 152 PRCycles[K] * SchedModel.getResourceFactor(K); 153 154 return FBI; 155 } 156 157 ArrayRef<unsigned> 158 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const { 159 assert(BlockInfo[MBBNum].hasResources() && 160 "getResources() must be called before getProcReleaseAtCycles()"); 161 unsigned PRKinds = SchedModel.getNumProcResourceKinds(); 162 assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size()); 163 return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds); 164 } 165 166 //===----------------------------------------------------------------------===// 167 // Ensemble utility functions 168 //===----------------------------------------------------------------------===// 169 170 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) 171 : MTM(*ct) { 172 BlockInfo.resize(MTM.BlockInfo.size()); 173 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 174 ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); 175 ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); 176 } 177 178 // Virtual destructor serves as an anchor. 179 MachineTraceMetrics::Ensemble::~Ensemble() = default; 180 181 const MachineLoop* 182 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { 183 return MTM.Loops->getLoopFor(MBB); 184 } 185 186 // Update resource-related information in the TraceBlockInfo for MBB. 187 // Only update resources related to the trace above MBB. 188 void MachineTraceMetrics::Ensemble:: 189 computeDepthResources(const MachineBasicBlock *MBB) { 190 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 191 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 192 unsigned PROffset = MBB->getNumber() * PRKinds; 193 194 // Compute resources from trace above. The top block is simple. 195 if (!TBI->Pred) { 196 TBI->InstrDepth = 0; 197 TBI->Head = MBB->getNumber(); 198 std::fill(ProcResourceDepths.begin() + PROffset, 199 ProcResourceDepths.begin() + PROffset + PRKinds, 0); 200 return; 201 } 202 203 // Compute from the block above. A post-order traversal ensures the 204 // predecessor is always computed first. 205 unsigned PredNum = TBI->Pred->getNumber(); 206 TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; 207 assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); 208 const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); 209 TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; 210 TBI->Head = PredTBI->Head; 211 212 // Compute per-resource depths. 213 ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); 214 ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum); 215 for (unsigned K = 0; K != PRKinds; ++K) 216 ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; 217 } 218 219 // Update resource-related information in the TraceBlockInfo for MBB. 220 // Only update resources related to the trace below MBB. 221 void MachineTraceMetrics::Ensemble:: 222 computeHeightResources(const MachineBasicBlock *MBB) { 223 TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 224 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 225 unsigned PROffset = MBB->getNumber() * PRKinds; 226 227 // Compute resources for the current block. 228 TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; 229 ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber()); 230 231 // The trace tail is done. 232 if (!TBI->Succ) { 233 TBI->Tail = MBB->getNumber(); 234 llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset); 235 return; 236 } 237 238 // Compute from the block below. A post-order traversal ensures the 239 // predecessor is always computed first. 240 unsigned SuccNum = TBI->Succ->getNumber(); 241 TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; 242 assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); 243 TBI->InstrHeight += SuccTBI->InstrHeight; 244 TBI->Tail = SuccTBI->Tail; 245 246 // Compute per-resource heights. 247 ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); 248 for (unsigned K = 0; K != PRKinds; ++K) 249 ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; 250 } 251 252 // Check if depth resources for MBB are valid and return the TBI. 253 // Return NULL if the resources have been invalidated. 254 const MachineTraceMetrics::TraceBlockInfo* 255 MachineTraceMetrics::Ensemble:: 256 getDepthResources(const MachineBasicBlock *MBB) const { 257 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 258 return TBI->hasValidDepth() ? TBI : nullptr; 259 } 260 261 // Check if height resources for MBB are valid and return the TBI. 262 // Return NULL if the resources have been invalidated. 263 const MachineTraceMetrics::TraceBlockInfo* 264 MachineTraceMetrics::Ensemble:: 265 getHeightResources(const MachineBasicBlock *MBB) const { 266 const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; 267 return TBI->hasValidHeight() ? TBI : nullptr; 268 } 269 270 /// Get an array of processor resource depths for MBB. Indexed by processor 271 /// resource kind, this array contains the scaled processor resources consumed 272 /// by all blocks preceding MBB in its trace. It does not include instructions 273 /// in MBB. 274 /// 275 /// Compare TraceBlockInfo::InstrDepth. 276 ArrayRef<unsigned> 277 MachineTraceMetrics::Ensemble:: 278 getProcResourceDepths(unsigned MBBNum) const { 279 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 280 assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); 281 return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); 282 } 283 284 /// Get an array of processor resource heights for MBB. Indexed by processor 285 /// resource kind, this array contains the scaled processor resources consumed 286 /// by this block and all blocks following it in its trace. 287 /// 288 /// Compare TraceBlockInfo::InstrHeight. 289 ArrayRef<unsigned> 290 MachineTraceMetrics::Ensemble:: 291 getProcResourceHeights(unsigned MBBNum) const { 292 unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); 293 assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); 294 return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); 295 } 296 297 //===----------------------------------------------------------------------===// 298 // Trace Selection Strategies 299 //===----------------------------------------------------------------------===// 300 // 301 // A trace selection strategy is implemented as a sub-class of Ensemble. The 302 // trace through a block B is computed by two DFS traversals of the CFG 303 // starting from B. One upwards, and one downwards. During the upwards DFS, 304 // pickTracePred() is called on the post-ordered blocks. During the downwards 305 // DFS, pickTraceSucc() is called in a post-order. 306 // 307 308 // We never allow traces that leave loops, but we do allow traces to enter 309 // nested loops. We also never allow traces to contain back-edges. 310 // 311 // This means that a loop header can never appear above the center block of a 312 // trace, except as the trace head. Below the center block, loop exiting edges 313 // are banned. 314 // 315 // Return true if an edge from the From loop to the To loop is leaving a loop. 316 // Either of To and From can be null. 317 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { 318 return From && !From->contains(To); 319 } 320 321 // MinInstrCountEnsemble - Pick the trace that executes the least number of 322 // instructions. 323 namespace { 324 325 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { 326 const char *getName() const override { return "MinInstr"; } 327 const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; 328 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; 329 330 public: 331 MinInstrCountEnsemble(MachineTraceMetrics *mtm) 332 : MachineTraceMetrics::Ensemble(mtm) {} 333 }; 334 335 /// Pick only the current basic block for the trace and do not choose any 336 /// predecessors/successors. 337 class LocalEnsemble : public MachineTraceMetrics::Ensemble { 338 const char *getName() const override { return "Local"; } 339 const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override { 340 return nullptr; 341 }; 342 const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override { 343 return nullptr; 344 }; 345 346 public: 347 LocalEnsemble(MachineTraceMetrics *MTM) 348 : MachineTraceMetrics::Ensemble(MTM) {} 349 }; 350 } // end anonymous namespace 351 352 // Select the preferred predecessor for MBB. 353 const MachineBasicBlock* 354 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { 355 if (MBB->pred_empty()) 356 return nullptr; 357 const MachineLoop *CurLoop = getLoopFor(MBB); 358 // Don't leave loops, and never follow back-edges. 359 if (CurLoop && MBB == CurLoop->getHeader()) 360 return nullptr; 361 unsigned CurCount = MTM.getResources(MBB)->InstrCount; 362 const MachineBasicBlock *Best = nullptr; 363 unsigned BestDepth = 0; 364 for (const MachineBasicBlock *Pred : MBB->predecessors()) { 365 const MachineTraceMetrics::TraceBlockInfo *PredTBI = 366 getDepthResources(Pred); 367 // Ignore cycles that aren't natural loops. 368 if (!PredTBI) 369 continue; 370 // Pick the predecessor that would give this block the smallest InstrDepth. 371 unsigned Depth = PredTBI->InstrDepth + CurCount; 372 if (!Best || Depth < BestDepth) { 373 Best = Pred; 374 BestDepth = Depth; 375 } 376 } 377 return Best; 378 } 379 380 // Select the preferred successor for MBB. 381 const MachineBasicBlock* 382 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { 383 if (MBB->succ_empty()) 384 return nullptr; 385 const MachineLoop *CurLoop = getLoopFor(MBB); 386 const MachineBasicBlock *Best = nullptr; 387 unsigned BestHeight = 0; 388 for (const MachineBasicBlock *Succ : MBB->successors()) { 389 // Don't consider back-edges. 390 if (CurLoop && Succ == CurLoop->getHeader()) 391 continue; 392 // Don't consider successors exiting CurLoop. 393 if (isExitingLoop(CurLoop, getLoopFor(Succ))) 394 continue; 395 const MachineTraceMetrics::TraceBlockInfo *SuccTBI = 396 getHeightResources(Succ); 397 // Ignore cycles that aren't natural loops. 398 if (!SuccTBI) 399 continue; 400 // Pick the successor that would give this block the smallest InstrHeight. 401 unsigned Height = SuccTBI->InstrHeight; 402 if (!Best || Height < BestHeight) { 403 Best = Succ; 404 BestHeight = Height; 405 } 406 } 407 return Best; 408 } 409 410 // Get an Ensemble sub-class for the requested trace strategy. 411 MachineTraceMetrics::Ensemble * 412 MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) { 413 assert(strategy < MachineTraceStrategy::TS_NumStrategies && 414 "Invalid trace strategy enum"); 415 std::unique_ptr<MachineTraceMetrics::Ensemble> &E = 416 Ensembles[static_cast<size_t>(strategy)]; 417 if (E) 418 return E.get(); 419 420 // Allocate new Ensemble on demand. 421 switch (strategy) { 422 case MachineTraceStrategy::TS_MinInstrCount: 423 E = std::make_unique<MinInstrCountEnsemble>(MinInstrCountEnsemble(this)); 424 break; 425 case MachineTraceStrategy::TS_Local: 426 E = std::make_unique<LocalEnsemble>(LocalEnsemble(this)); 427 break; 428 default: llvm_unreachable("Invalid trace strategy enum"); 429 } 430 return E.get(); 431 } 432 433 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { 434 LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) 435 << '\n'); 436 BlockInfo[MBB->getNumber()].invalidate(); 437 for (auto &E : Ensembles) 438 if (E) 439 E->invalidate(MBB); 440 } 441 442 bool MachineTraceMetrics::invalidate( 443 MachineFunction &, const PreservedAnalyses &PA, 444 MachineFunctionAnalysisManager::Invalidator &) { 445 // Check whether the analysis, all analyses on machine functions, or the 446 // machine function's CFG have been preserved. 447 auto PAC = PA.getChecker<MachineTraceMetricsAnalysis>(); 448 return !PAC.preserved() && 449 !PAC.preservedSet<AllAnalysesOn<MachineFunction>>() && 450 !PAC.preservedSet<CFGAnalyses>(); 451 } 452 453 void MachineTraceMetrics::verifyAnalysis() const { 454 if (!MF) 455 return; 456 #ifndef NDEBUG 457 assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); 458 for (auto &E : Ensembles) 459 if (E) 460 E->verify(); 461 #endif 462 } 463 464 //===----------------------------------------------------------------------===// 465 // Trace building 466 //===----------------------------------------------------------------------===// 467 // 468 // Traces are built by two CFG traversals. To avoid recomputing too much, use a 469 // set abstraction that confines the search to the current loop, and doesn't 470 // revisit blocks. 471 472 namespace { 473 474 struct LoopBounds { 475 MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; 476 SmallPtrSet<const MachineBasicBlock*, 8> Visited; 477 const MachineLoopInfo *Loops; 478 bool Downward = false; 479 480 LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, 481 const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {} 482 }; 483 484 } // end anonymous namespace 485 486 // Specialize po_iterator_storage in order to prune the post-order traversal so 487 // it is limited to the current loop and doesn't traverse the loop back edges. 488 namespace llvm { 489 490 template<> 491 class po_iterator_storage<LoopBounds, true> { 492 LoopBounds &LB; 493 494 public: 495 po_iterator_storage(LoopBounds &lb) : LB(lb) {} 496 497 void finishPostorder(const MachineBasicBlock*) {} 498 499 bool insertEdge(std::optional<const MachineBasicBlock *> From, 500 const MachineBasicBlock *To) { 501 // Skip already visited To blocks. 502 MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; 503 if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) 504 return false; 505 // From is null once when To is the trace center block. 506 if (From) { 507 if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) { 508 // Don't follow backedges, don't leave FromLoop when going upwards. 509 if ((LB.Downward ? To : *From) == FromLoop->getHeader()) 510 return false; 511 // Don't leave FromLoop. 512 if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) 513 return false; 514 } 515 } 516 // To is a new block. Mark the block as visited in case the CFG has cycles 517 // that MachineLoopInfo didn't recognize as a natural loop. 518 return LB.Visited.insert(To).second; 519 } 520 }; 521 522 } // end namespace llvm 523 524 /// Compute the trace through MBB. 525 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { 526 LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " 527 << printMBBReference(*MBB) << '\n'); 528 // Set up loop bounds for the backwards post-order traversal. 529 LoopBounds Bounds(BlockInfo, MTM.Loops); 530 531 // Run an upwards post-order search for the trace start. 532 Bounds.Downward = false; 533 Bounds.Visited.clear(); 534 for (const auto *I : inverse_post_order_ext(MBB, Bounds)) { 535 LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); 536 TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; 537 // All the predecessors have been visited, pick the preferred one. 538 TBI.Pred = pickTracePred(I); 539 LLVM_DEBUG({ 540 if (TBI.Pred) 541 dbgs() << printMBBReference(*TBI.Pred) << '\n'; 542 else 543 dbgs() << "null\n"; 544 }); 545 // The trace leading to I is now known, compute the depth resources. 546 computeDepthResources(I); 547 } 548 549 // Run a downwards post-order search for the trace end. 550 Bounds.Downward = true; 551 Bounds.Visited.clear(); 552 for (const auto *I : post_order_ext(MBB, Bounds)) { 553 LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); 554 TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; 555 // All the successors have been visited, pick the preferred one. 556 TBI.Succ = pickTraceSucc(I); 557 LLVM_DEBUG({ 558 if (TBI.Succ) 559 dbgs() << printMBBReference(*TBI.Succ) << '\n'; 560 else 561 dbgs() << "null\n"; 562 }); 563 // The trace leaving I is now known, compute the height resources. 564 computeHeightResources(I); 565 } 566 } 567 568 /// Invalidate traces through BadMBB. 569 void 570 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { 571 SmallVector<const MachineBasicBlock*, 16> WorkList; 572 TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; 573 574 // Invalidate height resources of blocks above MBB. 575 if (BadTBI.hasValidHeight()) { 576 BadTBI.invalidateHeight(); 577 WorkList.push_back(BadMBB); 578 do { 579 const MachineBasicBlock *MBB = WorkList.pop_back_val(); 580 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' 581 << getName() << " height.\n"); 582 // Find any MBB predecessors that have MBB as their preferred successor. 583 // They are the only ones that need to be invalidated. 584 for (const MachineBasicBlock *Pred : MBB->predecessors()) { 585 TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()]; 586 if (!TBI.hasValidHeight()) 587 continue; 588 if (TBI.Succ == MBB) { 589 TBI.invalidateHeight(); 590 WorkList.push_back(Pred); 591 continue; 592 } 593 // Verify that TBI.Succ is actually a *I successor. 594 assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed"); 595 } 596 } while (!WorkList.empty()); 597 } 598 599 // Invalidate depth resources of blocks below MBB. 600 if (BadTBI.hasValidDepth()) { 601 BadTBI.invalidateDepth(); 602 WorkList.push_back(BadMBB); 603 do { 604 const MachineBasicBlock *MBB = WorkList.pop_back_val(); 605 LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' 606 << getName() << " depth.\n"); 607 // Find any MBB successors that have MBB as their preferred predecessor. 608 // They are the only ones that need to be invalidated. 609 for (const MachineBasicBlock *Succ : MBB->successors()) { 610 TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()]; 611 if (!TBI.hasValidDepth()) 612 continue; 613 if (TBI.Pred == MBB) { 614 TBI.invalidateDepth(); 615 WorkList.push_back(Succ); 616 continue; 617 } 618 // Verify that TBI.Pred is actually a *I predecessor. 619 assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed"); 620 } 621 } while (!WorkList.empty()); 622 } 623 624 // Clear any per-instruction data. We only have to do this for BadMBB itself 625 // because the instructions in that block may change. Other blocks may be 626 // invalidated, but their instructions will stay the same, so there is no 627 // need to erase the Cycle entries. They will be overwritten when we 628 // recompute. 629 for (const auto &I : *BadMBB) 630 Cycles.erase(&I); 631 } 632 633 void MachineTraceMetrics::Ensemble::verify() const { 634 #ifndef NDEBUG 635 assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && 636 "Outdated BlockInfo size"); 637 for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { 638 const TraceBlockInfo &TBI = BlockInfo[Num]; 639 if (TBI.hasValidDepth() && TBI.Pred) { 640 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); 641 assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); 642 assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && 643 "Trace is broken, depth should have been invalidated."); 644 const MachineLoop *Loop = getLoopFor(MBB); 645 assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); 646 } 647 if (TBI.hasValidHeight() && TBI.Succ) { 648 const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); 649 assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); 650 assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && 651 "Trace is broken, height should have been invalidated."); 652 const MachineLoop *Loop = getLoopFor(MBB); 653 const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); 654 assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && 655 "Trace contains backedge"); 656 } 657 } 658 #endif 659 } 660 661 //===----------------------------------------------------------------------===// 662 // Data Dependencies 663 //===----------------------------------------------------------------------===// 664 // 665 // Compute the depth and height of each instruction based on data dependencies 666 // and instruction latencies. These cycle numbers assume that the CPU can issue 667 // an infinite number of instructions per cycle as long as their dependencies 668 // are ready. 669 670 // A data dependency is represented as a defining MI and operand numbers on the 671 // defining and using MI. 672 namespace { 673 674 struct DataDep { 675 const MachineInstr *DefMI; 676 unsigned DefOp; 677 unsigned UseOp; 678 679 DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) 680 : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} 681 682 /// Create a DataDep from an SSA form virtual register. 683 DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) 684 : UseOp(UseOp) { 685 assert(Register::isVirtualRegister(VirtReg)); 686 MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); 687 assert(!DefI.atEnd() && "Register has no defs"); 688 DefMI = DefI->getParent(); 689 DefOp = DefI.getOperandNo(); 690 assert((++DefI).atEnd() && "Register has multiple defs"); 691 } 692 }; 693 694 } // end anonymous namespace 695 696 // Get the input data dependencies that must be ready before UseMI can issue. 697 // Return true if UseMI has any physreg operands. 698 static bool getDataDeps(const MachineInstr &UseMI, 699 SmallVectorImpl<DataDep> &Deps, 700 const MachineRegisterInfo *MRI) { 701 // Debug values should not be included in any calculations. 702 if (UseMI.isDebugInstr()) 703 return false; 704 705 bool HasPhysRegs = false; 706 for (const MachineOperand &MO : UseMI.operands()) { 707 if (!MO.isReg()) 708 continue; 709 Register Reg = MO.getReg(); 710 if (!Reg) 711 continue; 712 if (Reg.isPhysical()) { 713 HasPhysRegs = true; 714 continue; 715 } 716 // Collect virtual register reads. 717 if (MO.readsReg()) 718 Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); 719 } 720 return HasPhysRegs; 721 } 722 723 // Get the input data dependencies of a PHI instruction, using Pred as the 724 // preferred predecessor. 725 // This will add at most one dependency to Deps. 726 static void getPHIDeps(const MachineInstr &UseMI, 727 SmallVectorImpl<DataDep> &Deps, 728 const MachineBasicBlock *Pred, 729 const MachineRegisterInfo *MRI) { 730 // No predecessor at the beginning of a trace. Ignore dependencies. 731 if (!Pred) 732 return; 733 assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); 734 for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) { 735 if (UseMI.getOperand(i + 1).getMBB() == Pred) { 736 Register Reg = UseMI.getOperand(i).getReg(); 737 Deps.push_back(DataDep(MRI, Reg, i)); 738 return; 739 } 740 } 741 } 742 743 // Identify physreg dependencies for UseMI, and update the live regunit 744 // tracking set when scanning instructions downwards. 745 static void updatePhysDepsDownwards(const MachineInstr *UseMI, 746 SmallVectorImpl<DataDep> &Deps, 747 SparseSet<LiveRegUnit> &RegUnits, 748 const TargetRegisterInfo *TRI) { 749 SmallVector<MCRegister, 8> Kills; 750 SmallVector<unsigned, 8> LiveDefOps; 751 752 for (const MachineOperand &MO : UseMI->operands()) { 753 if (!MO.isReg() || !MO.getReg().isPhysical()) 754 continue; 755 MCRegister Reg = MO.getReg().asMCReg(); 756 // Track live defs and kills for updating RegUnits. 757 if (MO.isDef()) { 758 if (MO.isDead()) 759 Kills.push_back(Reg); 760 else 761 LiveDefOps.push_back(MO.getOperandNo()); 762 } else if (MO.isKill()) 763 Kills.push_back(Reg); 764 // Identify dependencies. 765 if (!MO.readsReg()) 766 continue; 767 for (MCRegUnit Unit : TRI->regunits(Reg)) { 768 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit); 769 if (I == RegUnits.end()) 770 continue; 771 Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); 772 break; 773 } 774 } 775 776 // Update RegUnits to reflect live registers after UseMI. 777 // First kills. 778 for (MCRegister Kill : Kills) 779 for (MCRegUnit Unit : TRI->regunits(Kill)) 780 RegUnits.erase(Unit); 781 782 // Second, live defs. 783 for (unsigned DefOp : LiveDefOps) { 784 for (MCRegUnit Unit : 785 TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) { 786 LiveRegUnit &LRU = RegUnits[Unit]; 787 LRU.MI = UseMI; 788 LRU.Op = DefOp; 789 } 790 } 791 } 792 793 /// The length of the critical path through a trace is the maximum of two path 794 /// lengths: 795 /// 796 /// 1. The maximum height+depth over all instructions in the trace center block. 797 /// 798 /// 2. The longest cross-block dependency chain. For small blocks, it is 799 /// possible that the critical path through the trace doesn't include any 800 /// instructions in the block. 801 /// 802 /// This function computes the second number from the live-in list of the 803 /// center block. 804 unsigned MachineTraceMetrics::Ensemble:: 805 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { 806 assert(TBI.HasValidInstrDepths && "Missing depth info"); 807 assert(TBI.HasValidInstrHeights && "Missing height info"); 808 unsigned MaxLen = 0; 809 for (const LiveInReg &LIR : TBI.LiveIns) { 810 if (!LIR.Reg.isVirtual()) 811 continue; 812 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); 813 // Ignore dependencies outside the current trace. 814 const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; 815 if (!DefTBI.isUsefulDominator(TBI)) 816 continue; 817 unsigned Len = LIR.Height + Cycles[DefMI].Depth; 818 MaxLen = std::max(MaxLen, Len); 819 } 820 return MaxLen; 821 } 822 823 void MachineTraceMetrics::Ensemble:: 824 updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI, 825 SparseSet<LiveRegUnit> &RegUnits) { 826 SmallVector<DataDep, 8> Deps; 827 // Collect all data dependencies. 828 if (UseMI.isPHI()) 829 getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); 830 else if (getDataDeps(UseMI, Deps, MTM.MRI)) 831 updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); 832 833 // Filter and process dependencies, computing the earliest issue cycle. 834 unsigned Cycle = 0; 835 for (const DataDep &Dep : Deps) { 836 const TraceBlockInfo&DepTBI = 837 BlockInfo[Dep.DefMI->getParent()->getNumber()]; 838 // Ignore dependencies from outside the current trace. 839 if (!DepTBI.isUsefulDominator(TBI)) 840 continue; 841 assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); 842 unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; 843 // Add latency if DefMI is a real instruction. Transients get latency 0. 844 if (!Dep.DefMI->isTransient()) 845 DepCycle += MTM.SchedModel 846 .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); 847 Cycle = std::max(Cycle, DepCycle); 848 } 849 // Remember the instruction depth. 850 InstrCycles &MICycles = Cycles[&UseMI]; 851 MICycles.Depth = Cycle; 852 853 if (TBI.HasValidInstrHeights) { 854 // Update critical path length. 855 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); 856 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); 857 } else { 858 LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI); 859 } 860 } 861 862 void MachineTraceMetrics::Ensemble:: 863 updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI, 864 SparseSet<LiveRegUnit> &RegUnits) { 865 updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits); 866 } 867 868 void MachineTraceMetrics::Ensemble:: 869 updateDepths(MachineBasicBlock::iterator Start, 870 MachineBasicBlock::iterator End, 871 SparseSet<LiveRegUnit> &RegUnits) { 872 for (; Start != End; Start++) 873 updateDepth(Start->getParent(), *Start, RegUnits); 874 } 875 876 /// Compute instruction depths for all instructions above or in MBB in its 877 /// trace. This assumes that the trace through MBB has already been computed. 878 void MachineTraceMetrics::Ensemble:: 879 computeInstrDepths(const MachineBasicBlock *MBB) { 880 // The top of the trace may already be computed, and HasValidInstrDepths 881 // implies Head->HasValidInstrDepths, so we only need to start from the first 882 // block in the trace that needs to be recomputed. 883 SmallVector<const MachineBasicBlock*, 8> Stack; 884 do { 885 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 886 assert(TBI.hasValidDepth() && "Incomplete trace"); 887 if (TBI.HasValidInstrDepths) 888 break; 889 Stack.push_back(MBB); 890 MBB = TBI.Pred; 891 } while (MBB); 892 893 // FIXME: If MBB is non-null at this point, it is the last pre-computed block 894 // in the trace. We should track any live-out physregs that were defined in 895 // the trace. This is quite rare in SSA form, typically created by CSE 896 // hoisting a compare. 897 SparseSet<LiveRegUnit> RegUnits; 898 RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); 899 900 // Go through trace blocks in top-down order, stopping after the center block. 901 while (!Stack.empty()) { 902 MBB = Stack.pop_back_val(); 903 LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); 904 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 905 TBI.HasValidInstrDepths = true; 906 TBI.CriticalPath = 0; 907 908 // Print out resource depths here as well. 909 LLVM_DEBUG({ 910 dbgs() << format("%7u Instructions\n", TBI.InstrDepth); 911 ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); 912 for (unsigned K = 0; K != PRDepths.size(); ++K) 913 if (PRDepths[K]) { 914 unsigned Factor = MTM.SchedModel.getResourceFactor(K); 915 dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) 916 << MTM.SchedModel.getProcResource(K)->Name << " (" 917 << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; 918 } 919 }); 920 921 // Also compute the critical path length through MBB when possible. 922 if (TBI.HasValidInstrHeights) 923 TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); 924 925 for (const auto &UseMI : *MBB) { 926 updateDepth(TBI, UseMI, RegUnits); 927 } 928 } 929 } 930 931 // Identify physreg dependencies for MI when scanning instructions upwards. 932 // Return the issue height of MI after considering any live regunits. 933 // Height is the issue height computed from virtual register dependencies alone. 934 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, 935 SparseSet<LiveRegUnit> &RegUnits, 936 const TargetSchedModel &SchedModel, 937 const TargetInstrInfo *TII, 938 const TargetRegisterInfo *TRI) { 939 SmallVector<unsigned, 8> ReadOps; 940 941 for (const MachineOperand &MO : MI.operands()) { 942 if (!MO.isReg()) 943 continue; 944 Register Reg = MO.getReg(); 945 if (!Reg.isPhysical()) 946 continue; 947 if (MO.readsReg()) 948 ReadOps.push_back(MO.getOperandNo()); 949 if (!MO.isDef()) 950 continue; 951 // This is a def of Reg. Remove corresponding entries from RegUnits, and 952 // update MI Height to consider the physreg dependencies. 953 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) { 954 SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit); 955 if (I == RegUnits.end()) 956 continue; 957 unsigned DepHeight = I->Cycle; 958 if (!MI.isTransient()) { 959 // We may not know the UseMI of this dependency, if it came from the 960 // live-in list. SchedModel can handle a NULL UseMI. 961 DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(), 962 I->MI, I->Op); 963 } 964 Height = std::max(Height, DepHeight); 965 // This regunit is dead above MI. 966 RegUnits.erase(I); 967 } 968 } 969 970 // Now we know the height of MI. Update any regunits read. 971 for (unsigned Op : ReadOps) { 972 MCRegister Reg = MI.getOperand(Op).getReg().asMCReg(); 973 for (MCRegUnit Unit : TRI->regunits(Reg)) { 974 LiveRegUnit &LRU = RegUnits[Unit]; 975 // Set the height to the highest reader of the unit. 976 if (LRU.Cycle <= Height && LRU.MI != &MI) { 977 LRU.Cycle = Height; 978 LRU.MI = &MI; 979 LRU.Op = Op; 980 } 981 } 982 } 983 984 return Height; 985 } 986 987 using MIHeightMap = DenseMap<const MachineInstr *, unsigned>; 988 989 // Push the height of DefMI upwards if required to match UseMI. 990 // Return true if this is the first time DefMI was seen. 991 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, 992 unsigned UseHeight, MIHeightMap &Heights, 993 const TargetSchedModel &SchedModel, 994 const TargetInstrInfo *TII) { 995 // Adjust height by Dep.DefMI latency. 996 if (!Dep.DefMI->isTransient()) 997 UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, 998 Dep.UseOp); 999 1000 // Update Heights[DefMI] to be the maximum height seen. 1001 MIHeightMap::iterator I; 1002 bool New; 1003 std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); 1004 if (New) 1005 return true; 1006 1007 // DefMI has been pushed before. Give it the max height. 1008 if (I->second < UseHeight) 1009 I->second = UseHeight; 1010 return false; 1011 } 1012 1013 /// Assuming that the virtual register defined by DefMI:DefOp was used by 1014 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop 1015 /// when reaching the block that contains DefMI. 1016 void MachineTraceMetrics::Ensemble:: 1017 addLiveIns(const MachineInstr *DefMI, unsigned DefOp, 1018 ArrayRef<const MachineBasicBlock*> Trace) { 1019 assert(!Trace.empty() && "Trace should contain at least one block"); 1020 Register Reg = DefMI->getOperand(DefOp).getReg(); 1021 assert(Reg.isVirtual()); 1022 const MachineBasicBlock *DefMBB = DefMI->getParent(); 1023 1024 // Reg is live-in to all blocks in Trace that follow DefMBB. 1025 for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) { 1026 if (MBB == DefMBB) 1027 return; 1028 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1029 // Just add the register. The height will be updated later. 1030 TBI.LiveIns.push_back(Reg); 1031 } 1032 } 1033 1034 /// Compute instruction heights in the trace through MBB. This updates MBB and 1035 /// the blocks below it in the trace. It is assumed that the trace has already 1036 /// been computed. 1037 void MachineTraceMetrics::Ensemble:: 1038 computeInstrHeights(const MachineBasicBlock *MBB) { 1039 // The bottom of the trace may already be computed. 1040 // Find the blocks that need updating. 1041 SmallVector<const MachineBasicBlock*, 8> Stack; 1042 do { 1043 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1044 assert(TBI.hasValidHeight() && "Incomplete trace"); 1045 if (TBI.HasValidInstrHeights) 1046 break; 1047 Stack.push_back(MBB); 1048 TBI.LiveIns.clear(); 1049 MBB = TBI.Succ; 1050 } while (MBB); 1051 1052 // As we move upwards in the trace, keep track of instructions that are 1053 // required by deeper trace instructions. Map MI -> height required so far. 1054 MIHeightMap Heights; 1055 1056 // For physregs, the def isn't known when we see the use. 1057 // Instead, keep track of the highest use of each regunit. 1058 SparseSet<LiveRegUnit> RegUnits; 1059 RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); 1060 1061 // If the bottom of the trace was already precomputed, initialize heights 1062 // from its live-in list. 1063 // MBB is the highest precomputed block in the trace. 1064 if (MBB) { 1065 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1066 for (LiveInReg &LI : TBI.LiveIns) { 1067 if (LI.Reg.isVirtual()) { 1068 // For virtual registers, the def latency is included. 1069 unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; 1070 if (Height < LI.Height) 1071 Height = LI.Height; 1072 } else { 1073 // For register units, the def latency is not included because we don't 1074 // know the def yet. 1075 RegUnits[LI.Reg].Cycle = LI.Height; 1076 } 1077 } 1078 } 1079 1080 // Go through the trace blocks in bottom-up order. 1081 SmallVector<DataDep, 8> Deps; 1082 for (;!Stack.empty(); Stack.pop_back()) { 1083 MBB = Stack.back(); 1084 LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); 1085 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1086 TBI.HasValidInstrHeights = true; 1087 TBI.CriticalPath = 0; 1088 1089 LLVM_DEBUG({ 1090 dbgs() << format("%7u Instructions\n", TBI.InstrHeight); 1091 ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); 1092 for (unsigned K = 0; K != PRHeights.size(); ++K) 1093 if (PRHeights[K]) { 1094 unsigned Factor = MTM.SchedModel.getResourceFactor(K); 1095 dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) 1096 << MTM.SchedModel.getProcResource(K)->Name << " (" 1097 << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; 1098 } 1099 }); 1100 1101 // Get dependencies from PHIs in the trace successor. 1102 const MachineBasicBlock *Succ = TBI.Succ; 1103 // If MBB is the last block in the trace, and it has a back-edge to the 1104 // loop header, get loop-carried dependencies from PHIs in the header. For 1105 // that purpose, pretend that all the loop header PHIs have height 0. 1106 if (!Succ) 1107 if (const MachineLoop *Loop = getLoopFor(MBB)) 1108 if (MBB->isSuccessor(Loop->getHeader())) 1109 Succ = Loop->getHeader(); 1110 1111 if (Succ) { 1112 for (const auto &PHI : *Succ) { 1113 if (!PHI.isPHI()) 1114 break; 1115 Deps.clear(); 1116 getPHIDeps(PHI, Deps, MBB, MTM.MRI); 1117 if (!Deps.empty()) { 1118 // Loop header PHI heights are all 0. 1119 unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; 1120 LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); 1121 if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel, 1122 MTM.TII)) 1123 addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); 1124 } 1125 } 1126 } 1127 1128 // Go through the block backwards. 1129 for (const MachineInstr &MI : reverse(*MBB)) { 1130 // Find the MI height as determined by virtual register uses in the 1131 // trace below. 1132 unsigned Cycle = 0; 1133 MIHeightMap::iterator HeightI = Heights.find(&MI); 1134 if (HeightI != Heights.end()) { 1135 Cycle = HeightI->second; 1136 // We won't be seeing any more MI uses. 1137 Heights.erase(HeightI); 1138 } 1139 1140 // Don't process PHI deps. They depend on the specific predecessor, and 1141 // we'll get them when visiting the predecessor. 1142 Deps.clear(); 1143 bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI); 1144 1145 // There may also be regunit dependencies to include in the height. 1146 if (HasPhysRegs) 1147 Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel, 1148 MTM.TII, MTM.TRI); 1149 1150 // Update the required height of any virtual registers read by MI. 1151 for (const DataDep &Dep : Deps) 1152 if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) 1153 addLiveIns(Dep.DefMI, Dep.DefOp, Stack); 1154 1155 InstrCycles &MICycles = Cycles[&MI]; 1156 MICycles.Height = Cycle; 1157 if (!TBI.HasValidInstrDepths) { 1158 LLVM_DEBUG(dbgs() << Cycle << '\t' << MI); 1159 continue; 1160 } 1161 // Update critical path length. 1162 TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); 1163 LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); 1164 } 1165 1166 // Update virtual live-in heights. They were added by addLiveIns() with a 0 1167 // height because the final height isn't known until now. 1168 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); 1169 for (LiveInReg &LIR : TBI.LiveIns) { 1170 const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); 1171 LIR.Height = Heights.lookup(DefMI); 1172 LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); 1173 } 1174 1175 // Transfer the live regunits to the live-in list. 1176 for (const LiveRegUnit &RU : RegUnits) { 1177 TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle)); 1178 LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@' 1179 << RU.Cycle); 1180 } 1181 LLVM_DEBUG(dbgs() << '\n'); 1182 1183 if (!TBI.HasValidInstrDepths) 1184 continue; 1185 // Add live-ins to the critical path length. 1186 TBI.CriticalPath = std::max(TBI.CriticalPath, 1187 computeCrossBlockCriticalPath(TBI)); 1188 LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); 1189 } 1190 } 1191 1192 MachineTraceMetrics::Trace 1193 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { 1194 TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; 1195 1196 if (!TBI.hasValidDepth() || !TBI.hasValidHeight()) 1197 computeTrace(MBB); 1198 if (!TBI.HasValidInstrDepths) 1199 computeInstrDepths(MBB); 1200 if (!TBI.HasValidInstrHeights) 1201 computeInstrHeights(MBB); 1202 1203 return Trace(*this, TBI); 1204 } 1205 1206 unsigned 1207 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const { 1208 assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) && 1209 "MI must be in the trace center block"); 1210 InstrCycles Cyc = getInstrCycles(MI); 1211 return getCriticalPath() - (Cyc.Depth + Cyc.Height); 1212 } 1213 1214 unsigned 1215 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const { 1216 const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); 1217 SmallVector<DataDep, 1> Deps; 1218 getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); 1219 assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); 1220 DataDep &Dep = Deps.front(); 1221 unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth; 1222 // Add latency if DefMI is a real instruction. Transients get latency 0. 1223 if (!Dep.DefMI->isTransient()) 1224 DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, 1225 &PHI, Dep.UseOp); 1226 return DepCycle; 1227 } 1228 1229 /// When bottom is set include instructions in current block in estimate. 1230 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { 1231 // Find the limiting processor resource. 1232 // Numbers have been pre-scaled to be comparable. 1233 unsigned PRMax = 0; 1234 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); 1235 if (Bottom) { 1236 ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum()); 1237 for (unsigned K = 0; K != PRDepths.size(); ++K) 1238 PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); 1239 } else { 1240 for (unsigned PRD : PRDepths) 1241 PRMax = std::max(PRMax, PRD); 1242 } 1243 // Convert to cycle count. 1244 PRMax = TE.MTM.getCycles(PRMax); 1245 1246 /// All instructions before current block 1247 unsigned Instrs = TBI.InstrDepth; 1248 // plus instructions in current block 1249 if (Bottom) 1250 Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; 1251 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) 1252 Instrs /= IW; 1253 // Assume issue width 1 without a schedule model. 1254 return std::max(Instrs, PRMax); 1255 } 1256 1257 unsigned MachineTraceMetrics::Trace::getResourceLength( 1258 ArrayRef<const MachineBasicBlock *> Extrablocks, 1259 ArrayRef<const MCSchedClassDesc *> ExtraInstrs, 1260 ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const { 1261 // Add up resources above and below the center block. 1262 ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); 1263 ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); 1264 unsigned PRMax = 0; 1265 1266 // Capture computing cycles from extra instructions 1267 auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs, 1268 unsigned ResourceIdx) 1269 ->unsigned { 1270 unsigned Cycles = 0; 1271 for (const MCSchedClassDesc *SC : Instrs) { 1272 if (!SC->isValid()) 1273 continue; 1274 for (TargetSchedModel::ProcResIter 1275 PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), 1276 PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); 1277 PI != PE; ++PI) { 1278 if (PI->ProcResourceIdx != ResourceIdx) 1279 continue; 1280 Cycles += (PI->ReleaseAtCycle * 1281 TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); 1282 } 1283 } 1284 return Cycles; 1285 }; 1286 1287 for (unsigned K = 0; K != PRDepths.size(); ++K) { 1288 unsigned PRCycles = PRDepths[K] + PRHeights[K]; 1289 for (const MachineBasicBlock *MBB : Extrablocks) 1290 PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K]; 1291 PRCycles += extraCycles(ExtraInstrs, K); 1292 PRCycles -= extraCycles(RemoveInstrs, K); 1293 PRMax = std::max(PRMax, PRCycles); 1294 } 1295 // Convert to cycle count. 1296 PRMax = TE.MTM.getCycles(PRMax); 1297 1298 // Instrs: #instructions in current trace outside current block. 1299 unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; 1300 // Add instruction count from the extra blocks. 1301 for (const MachineBasicBlock *MBB : Extrablocks) 1302 Instrs += TE.MTM.getResources(MBB)->InstrCount; 1303 Instrs += ExtraInstrs.size(); 1304 Instrs -= RemoveInstrs.size(); 1305 if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) 1306 Instrs /= IW; 1307 // Assume issue width 1 without a schedule model. 1308 return std::max(Instrs, PRMax); 1309 } 1310 1311 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI, 1312 const MachineInstr &UseMI) const { 1313 if (DefMI.getParent() == UseMI.getParent()) 1314 return true; 1315 1316 const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()]; 1317 const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()]; 1318 1319 return DepTBI.isUsefulDominator(TBI); 1320 } 1321 1322 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { 1323 OS << getName() << " ensemble:\n"; 1324 for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { 1325 OS << " %bb." << i << '\t'; 1326 BlockInfo[i].print(OS); 1327 OS << '\n'; 1328 } 1329 } 1330 1331 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { 1332 if (hasValidDepth()) { 1333 OS << "depth=" << InstrDepth; 1334 if (Pred) 1335 OS << " pred=" << printMBBReference(*Pred); 1336 else 1337 OS << " pred=null"; 1338 OS << " head=%bb." << Head; 1339 if (HasValidInstrDepths) 1340 OS << " +instrs"; 1341 } else 1342 OS << "depth invalid"; 1343 OS << ", "; 1344 if (hasValidHeight()) { 1345 OS << "height=" << InstrHeight; 1346 if (Succ) 1347 OS << " succ=" << printMBBReference(*Succ); 1348 else 1349 OS << " succ=null"; 1350 OS << " tail=%bb." << Tail; 1351 if (HasValidInstrHeights) 1352 OS << " +instrs"; 1353 } else 1354 OS << "height invalid"; 1355 if (HasValidInstrDepths && HasValidInstrHeights) 1356 OS << ", crit=" << CriticalPath; 1357 } 1358 1359 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { 1360 unsigned MBBNum = &TBI - &TE.BlockInfo[0]; 1361 1362 OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum 1363 << " --> %bb." << TBI.Tail << ':'; 1364 if (TBI.hasValidHeight() && TBI.hasValidDepth()) 1365 OS << ' ' << getInstrCount() << " instrs."; 1366 if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) 1367 OS << ' ' << TBI.CriticalPath << " cycles."; 1368 1369 const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; 1370 OS << "\n%bb." << MBBNum; 1371 while (Block->hasValidDepth() && Block->Pred) { 1372 unsigned Num = Block->Pred->getNumber(); 1373 OS << " <- " << printMBBReference(*Block->Pred); 1374 Block = &TE.BlockInfo[Num]; 1375 } 1376 1377 Block = &TBI; 1378 OS << "\n "; 1379 while (Block->hasValidHeight() && Block->Succ) { 1380 unsigned Num = Block->Succ->getNumber(); 1381 OS << " -> " << printMBBReference(*Block->Succ); 1382 Block = &TE.BlockInfo[Num]; 1383 } 1384 OS << '\n'; 1385 } 1386