xref: /llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp (revision eccf4d44d346eee498b0ff709e625e3104448751)
1 //===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/CodeGen/MachineTraceMetrics.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/PostOrderIterator.h"
13 #include "llvm/ADT/SmallPtrSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/SparseSet.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineLoopInfo.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/InitializePasses.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Format.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include <algorithm>
33 #include <cassert>
34 #include <iterator>
35 #include <tuple>
36 #include <utility>
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "machine-trace-metrics"
41 
42 char MachineTraceMetrics::ID = 0;
43 
44 char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
45 
46 INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, "Machine Trace Metrics",
47                       false, true)
48 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
49 INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
50                     "Machine Trace Metrics", false, true)
51 
52 MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
53   std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
54 }
55 
56 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
57   AU.setPreservesAll();
58   AU.addRequired<MachineLoopInfoWrapperPass>();
59   MachineFunctionPass::getAnalysisUsage(AU);
60 }
61 
62 bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
63   MF = &Func;
64   const TargetSubtargetInfo &ST = MF->getSubtarget();
65   TII = ST.getInstrInfo();
66   TRI = ST.getRegisterInfo();
67   MRI = &MF->getRegInfo();
68   Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
69   SchedModel.init(&ST);
70   BlockInfo.resize(MF->getNumBlockIDs());
71   ProcReleaseAtCycles.resize(MF->getNumBlockIDs() *
72                             SchedModel.getNumProcResourceKinds());
73   return false;
74 }
75 
76 void MachineTraceMetrics::releaseMemory() {
77   MF = nullptr;
78   BlockInfo.clear();
79   for (Ensemble *&E : Ensembles) {
80     delete E;
81     E = nullptr;
82   }
83 }
84 
85 //===----------------------------------------------------------------------===//
86 //                          Fixed block information
87 //===----------------------------------------------------------------------===//
88 //
89 // The number of instructions in a basic block and the CPU resources used by
90 // those instructions don't depend on any given trace strategy.
91 
92 /// Compute the resource usage in basic block MBB.
93 const MachineTraceMetrics::FixedBlockInfo*
94 MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
95   assert(MBB && "No basic block");
96   FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
97   if (FBI->hasResources())
98     return FBI;
99 
100   // Compute resource usage in the block.
101   FBI->HasCalls = false;
102   unsigned InstrCount = 0;
103 
104   // Add up per-processor resource cycles as well.
105   unsigned PRKinds = SchedModel.getNumProcResourceKinds();
106   SmallVector<unsigned, 32> PRCycles(PRKinds);
107 
108   for (const auto &MI : *MBB) {
109     if (MI.isTransient())
110       continue;
111     ++InstrCount;
112     if (MI.isCall())
113       FBI->HasCalls = true;
114 
115     // Count processor resources used.
116     if (!SchedModel.hasInstrSchedModel())
117       continue;
118     const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
119     if (!SC->isValid())
120       continue;
121 
122     for (TargetSchedModel::ProcResIter
123          PI = SchedModel.getWriteProcResBegin(SC),
124          PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
125       assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
126       PRCycles[PI->ProcResourceIdx] += PI->ReleaseAtCycle;
127     }
128   }
129   FBI->InstrCount = InstrCount;
130 
131   // Scale the resource cycles so they are comparable.
132   unsigned PROffset = MBB->getNumber() * PRKinds;
133   for (unsigned K = 0; K != PRKinds; ++K)
134     ProcReleaseAtCycles[PROffset + K] =
135       PRCycles[K] * SchedModel.getResourceFactor(K);
136 
137   return FBI;
138 }
139 
140 ArrayRef<unsigned>
141 MachineTraceMetrics::getProcReleaseAtCycles(unsigned MBBNum) const {
142   assert(BlockInfo[MBBNum].hasResources() &&
143          "getResources() must be called before getProcReleaseAtCycles()");
144   unsigned PRKinds = SchedModel.getNumProcResourceKinds();
145   assert((MBBNum+1) * PRKinds <= ProcReleaseAtCycles.size());
146   return ArrayRef(ProcReleaseAtCycles.data() + MBBNum * PRKinds, PRKinds);
147 }
148 
149 //===----------------------------------------------------------------------===//
150 //                         Ensemble utility functions
151 //===----------------------------------------------------------------------===//
152 
153 MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
154   : MTM(*ct) {
155   BlockInfo.resize(MTM.BlockInfo.size());
156   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
157   ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
158   ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
159 }
160 
161 // Virtual destructor serves as an anchor.
162 MachineTraceMetrics::Ensemble::~Ensemble() = default;
163 
164 const MachineLoop*
165 MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
166   return MTM.Loops->getLoopFor(MBB);
167 }
168 
169 // Update resource-related information in the TraceBlockInfo for MBB.
170 // Only update resources related to the trace above MBB.
171 void MachineTraceMetrics::Ensemble::
172 computeDepthResources(const MachineBasicBlock *MBB) {
173   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
174   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
175   unsigned PROffset = MBB->getNumber() * PRKinds;
176 
177   // Compute resources from trace above. The top block is simple.
178   if (!TBI->Pred) {
179     TBI->InstrDepth = 0;
180     TBI->Head = MBB->getNumber();
181     std::fill(ProcResourceDepths.begin() + PROffset,
182               ProcResourceDepths.begin() + PROffset + PRKinds, 0);
183     return;
184   }
185 
186   // Compute from the block above. A post-order traversal ensures the
187   // predecessor is always computed first.
188   unsigned PredNum = TBI->Pred->getNumber();
189   TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
190   assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
191   const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
192   TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
193   TBI->Head = PredTBI->Head;
194 
195   // Compute per-resource depths.
196   ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
197   ArrayRef<unsigned> PredPRCycles = MTM.getProcReleaseAtCycles(PredNum);
198   for (unsigned K = 0; K != PRKinds; ++K)
199     ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
200 }
201 
202 // Update resource-related information in the TraceBlockInfo for MBB.
203 // Only update resources related to the trace below MBB.
204 void MachineTraceMetrics::Ensemble::
205 computeHeightResources(const MachineBasicBlock *MBB) {
206   TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
207   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
208   unsigned PROffset = MBB->getNumber() * PRKinds;
209 
210   // Compute resources for the current block.
211   TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
212   ArrayRef<unsigned> PRCycles = MTM.getProcReleaseAtCycles(MBB->getNumber());
213 
214   // The trace tail is done.
215   if (!TBI->Succ) {
216     TBI->Tail = MBB->getNumber();
217     llvm::copy(PRCycles, ProcResourceHeights.begin() + PROffset);
218     return;
219   }
220 
221   // Compute from the block below. A post-order traversal ensures the
222   // predecessor is always computed first.
223   unsigned SuccNum = TBI->Succ->getNumber();
224   TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
225   assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
226   TBI->InstrHeight += SuccTBI->InstrHeight;
227   TBI->Tail = SuccTBI->Tail;
228 
229   // Compute per-resource heights.
230   ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
231   for (unsigned K = 0; K != PRKinds; ++K)
232     ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
233 }
234 
235 // Check if depth resources for MBB are valid and return the TBI.
236 // Return NULL if the resources have been invalidated.
237 const MachineTraceMetrics::TraceBlockInfo*
238 MachineTraceMetrics::Ensemble::
239 getDepthResources(const MachineBasicBlock *MBB) const {
240   const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
241   return TBI->hasValidDepth() ? TBI : nullptr;
242 }
243 
244 // Check if height resources for MBB are valid and return the TBI.
245 // Return NULL if the resources have been invalidated.
246 const MachineTraceMetrics::TraceBlockInfo*
247 MachineTraceMetrics::Ensemble::
248 getHeightResources(const MachineBasicBlock *MBB) const {
249   const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
250   return TBI->hasValidHeight() ? TBI : nullptr;
251 }
252 
253 /// Get an array of processor resource depths for MBB. Indexed by processor
254 /// resource kind, this array contains the scaled processor resources consumed
255 /// by all blocks preceding MBB in its trace. It does not include instructions
256 /// in MBB.
257 ///
258 /// Compare TraceBlockInfo::InstrDepth.
259 ArrayRef<unsigned>
260 MachineTraceMetrics::Ensemble::
261 getProcResourceDepths(unsigned MBBNum) const {
262   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
263   assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
264   return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
265 }
266 
267 /// Get an array of processor resource heights for MBB. Indexed by processor
268 /// resource kind, this array contains the scaled processor resources consumed
269 /// by this block and all blocks following it in its trace.
270 ///
271 /// Compare TraceBlockInfo::InstrHeight.
272 ArrayRef<unsigned>
273 MachineTraceMetrics::Ensemble::
274 getProcResourceHeights(unsigned MBBNum) const {
275   unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
276   assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
277   return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
278 }
279 
280 //===----------------------------------------------------------------------===//
281 //                         Trace Selection Strategies
282 //===----------------------------------------------------------------------===//
283 //
284 // A trace selection strategy is implemented as a sub-class of Ensemble. The
285 // trace through a block B is computed by two DFS traversals of the CFG
286 // starting from B. One upwards, and one downwards. During the upwards DFS,
287 // pickTracePred() is called on the post-ordered blocks. During the downwards
288 // DFS, pickTraceSucc() is called in a post-order.
289 //
290 
291 // We never allow traces that leave loops, but we do allow traces to enter
292 // nested loops. We also never allow traces to contain back-edges.
293 //
294 // This means that a loop header can never appear above the center block of a
295 // trace, except as the trace head. Below the center block, loop exiting edges
296 // are banned.
297 //
298 // Return true if an edge from the From loop to the To loop is leaving a loop.
299 // Either of To and From can be null.
300 static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
301   return From && !From->contains(To);
302 }
303 
304 // MinInstrCountEnsemble - Pick the trace that executes the least number of
305 // instructions.
306 namespace {
307 
308 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
309   const char *getName() const override { return "MinInstr"; }
310   const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
311   const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
312 
313 public:
314   MinInstrCountEnsemble(MachineTraceMetrics *mtm)
315     : MachineTraceMetrics::Ensemble(mtm) {}
316 };
317 
318 /// Pick only the current basic block for the trace and do not choose any
319 /// predecessors/successors.
320 class LocalEnsemble : public MachineTraceMetrics::Ensemble {
321   const char *getName() const override { return "Local"; }
322   const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
323     return nullptr;
324   };
325   const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
326     return nullptr;
327   };
328 
329 public:
330   LocalEnsemble(MachineTraceMetrics *MTM)
331       : MachineTraceMetrics::Ensemble(MTM) {}
332 };
333 } // end anonymous namespace
334 
335 // Select the preferred predecessor for MBB.
336 const MachineBasicBlock*
337 MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
338   if (MBB->pred_empty())
339     return nullptr;
340   const MachineLoop *CurLoop = getLoopFor(MBB);
341   // Don't leave loops, and never follow back-edges.
342   if (CurLoop && MBB == CurLoop->getHeader())
343     return nullptr;
344   unsigned CurCount = MTM.getResources(MBB)->InstrCount;
345   const MachineBasicBlock *Best = nullptr;
346   unsigned BestDepth = 0;
347   for (const MachineBasicBlock *Pred : MBB->predecessors()) {
348     const MachineTraceMetrics::TraceBlockInfo *PredTBI =
349       getDepthResources(Pred);
350     // Ignore cycles that aren't natural loops.
351     if (!PredTBI)
352       continue;
353     // Pick the predecessor that would give this block the smallest InstrDepth.
354     unsigned Depth = PredTBI->InstrDepth + CurCount;
355     if (!Best || Depth < BestDepth) {
356       Best = Pred;
357       BestDepth = Depth;
358     }
359   }
360   return Best;
361 }
362 
363 // Select the preferred successor for MBB.
364 const MachineBasicBlock*
365 MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
366   if (MBB->succ_empty())
367     return nullptr;
368   const MachineLoop *CurLoop = getLoopFor(MBB);
369   const MachineBasicBlock *Best = nullptr;
370   unsigned BestHeight = 0;
371   for (const MachineBasicBlock *Succ : MBB->successors()) {
372     // Don't consider back-edges.
373     if (CurLoop && Succ == CurLoop->getHeader())
374       continue;
375     // Don't consider successors exiting CurLoop.
376     if (isExitingLoop(CurLoop, getLoopFor(Succ)))
377       continue;
378     const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
379       getHeightResources(Succ);
380     // Ignore cycles that aren't natural loops.
381     if (!SuccTBI)
382       continue;
383     // Pick the successor that would give this block the smallest InstrHeight.
384     unsigned Height = SuccTBI->InstrHeight;
385     if (!Best || Height < BestHeight) {
386       Best = Succ;
387       BestHeight = Height;
388     }
389   }
390   return Best;
391 }
392 
393 // Get an Ensemble sub-class for the requested trace strategy.
394 MachineTraceMetrics::Ensemble *
395 MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) {
396   assert(strategy < MachineTraceStrategy::TS_NumStrategies &&
397          "Invalid trace strategy enum");
398   Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
399   if (E)
400     return E;
401 
402   // Allocate new Ensemble on demand.
403   switch (strategy) {
404   case MachineTraceStrategy::TS_MinInstrCount:
405     return (E = new MinInstrCountEnsemble(this));
406   case MachineTraceStrategy::TS_Local:
407     return (E = new LocalEnsemble(this));
408   default: llvm_unreachable("Invalid trace strategy enum");
409   }
410 }
411 
412 void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
413   LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB)
414                     << '\n');
415   BlockInfo[MBB->getNumber()].invalidate();
416   for (Ensemble *E : Ensembles)
417     if (E)
418       E->invalidate(MBB);
419 }
420 
421 void MachineTraceMetrics::verifyAnalysis() const {
422   if (!MF)
423     return;
424 #ifndef NDEBUG
425   assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
426   for (Ensemble *E : Ensembles)
427     if (E)
428       E->verify();
429 #endif
430 }
431 
432 //===----------------------------------------------------------------------===//
433 //                               Trace building
434 //===----------------------------------------------------------------------===//
435 //
436 // Traces are built by two CFG traversals. To avoid recomputing too much, use a
437 // set abstraction that confines the search to the current loop, and doesn't
438 // revisit blocks.
439 
440 namespace {
441 
442 struct LoopBounds {
443   MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
444   SmallPtrSet<const MachineBasicBlock*, 8> Visited;
445   const MachineLoopInfo *Loops;
446   bool Downward = false;
447 
448   LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
449              const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
450 };
451 
452 } // end anonymous namespace
453 
454 // Specialize po_iterator_storage in order to prune the post-order traversal so
455 // it is limited to the current loop and doesn't traverse the loop back edges.
456 namespace llvm {
457 
458 template<>
459 class po_iterator_storage<LoopBounds, true> {
460   LoopBounds &LB;
461 
462 public:
463   po_iterator_storage(LoopBounds &lb) : LB(lb) {}
464 
465   void finishPostorder(const MachineBasicBlock*) {}
466 
467   bool insertEdge(std::optional<const MachineBasicBlock *> From,
468                   const MachineBasicBlock *To) {
469     // Skip already visited To blocks.
470     MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
471     if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
472       return false;
473     // From is null once when To is the trace center block.
474     if (From) {
475       if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
476         // Don't follow backedges, don't leave FromLoop when going upwards.
477         if ((LB.Downward ? To : *From) == FromLoop->getHeader())
478           return false;
479         // Don't leave FromLoop.
480         if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
481           return false;
482       }
483     }
484     // To is a new block. Mark the block as visited in case the CFG has cycles
485     // that MachineLoopInfo didn't recognize as a natural loop.
486     return LB.Visited.insert(To).second;
487   }
488 };
489 
490 } // end namespace llvm
491 
492 /// Compute the trace through MBB.
493 void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
494   LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through "
495                     << printMBBReference(*MBB) << '\n');
496   // Set up loop bounds for the backwards post-order traversal.
497   LoopBounds Bounds(BlockInfo, MTM.Loops);
498 
499   // Run an upwards post-order search for the trace start.
500   Bounds.Downward = false;
501   Bounds.Visited.clear();
502   for (const auto *I : inverse_post_order_ext(MBB, Bounds)) {
503     LLVM_DEBUG(dbgs() << "  pred for " << printMBBReference(*I) << ": ");
504     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
505     // All the predecessors have been visited, pick the preferred one.
506     TBI.Pred = pickTracePred(I);
507     LLVM_DEBUG({
508       if (TBI.Pred)
509         dbgs() << printMBBReference(*TBI.Pred) << '\n';
510       else
511         dbgs() << "null\n";
512     });
513     // The trace leading to I is now known, compute the depth resources.
514     computeDepthResources(I);
515   }
516 
517   // Run a downwards post-order search for the trace end.
518   Bounds.Downward = true;
519   Bounds.Visited.clear();
520   for (const auto *I : post_order_ext(MBB, Bounds)) {
521     LLVM_DEBUG(dbgs() << "  succ for " << printMBBReference(*I) << ": ");
522     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
523     // All the successors have been visited, pick the preferred one.
524     TBI.Succ = pickTraceSucc(I);
525     LLVM_DEBUG({
526       if (TBI.Succ)
527         dbgs() << printMBBReference(*TBI.Succ) << '\n';
528       else
529         dbgs() << "null\n";
530     });
531     // The trace leaving I is now known, compute the height resources.
532     computeHeightResources(I);
533   }
534 }
535 
536 /// Invalidate traces through BadMBB.
537 void
538 MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
539   SmallVector<const MachineBasicBlock*, 16> WorkList;
540   TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
541 
542   // Invalidate height resources of blocks above MBB.
543   if (BadTBI.hasValidHeight()) {
544     BadTBI.invalidateHeight();
545     WorkList.push_back(BadMBB);
546     do {
547       const MachineBasicBlock *MBB = WorkList.pop_back_val();
548       LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
549                         << getName() << " height.\n");
550       // Find any MBB predecessors that have MBB as their preferred successor.
551       // They are the only ones that need to be invalidated.
552       for (const MachineBasicBlock *Pred : MBB->predecessors()) {
553         TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
554         if (!TBI.hasValidHeight())
555           continue;
556         if (TBI.Succ == MBB) {
557           TBI.invalidateHeight();
558           WorkList.push_back(Pred);
559           continue;
560         }
561         // Verify that TBI.Succ is actually a *I successor.
562         assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
563       }
564     } while (!WorkList.empty());
565   }
566 
567   // Invalidate depth resources of blocks below MBB.
568   if (BadTBI.hasValidDepth()) {
569     BadTBI.invalidateDepth();
570     WorkList.push_back(BadMBB);
571     do {
572       const MachineBasicBlock *MBB = WorkList.pop_back_val();
573       LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' '
574                         << getName() << " depth.\n");
575       // Find any MBB successors that have MBB as their preferred predecessor.
576       // They are the only ones that need to be invalidated.
577       for (const MachineBasicBlock *Succ : MBB->successors()) {
578         TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
579         if (!TBI.hasValidDepth())
580           continue;
581         if (TBI.Pred == MBB) {
582           TBI.invalidateDepth();
583           WorkList.push_back(Succ);
584           continue;
585         }
586         // Verify that TBI.Pred is actually a *I predecessor.
587         assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
588       }
589     } while (!WorkList.empty());
590   }
591 
592   // Clear any per-instruction data. We only have to do this for BadMBB itself
593   // because the instructions in that block may change. Other blocks may be
594   // invalidated, but their instructions will stay the same, so there is no
595   // need to erase the Cycle entries. They will be overwritten when we
596   // recompute.
597   for (const auto &I : *BadMBB)
598     Cycles.erase(&I);
599 }
600 
601 void MachineTraceMetrics::Ensemble::verify() const {
602 #ifndef NDEBUG
603   assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
604          "Outdated BlockInfo size");
605   for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
606     const TraceBlockInfo &TBI = BlockInfo[Num];
607     if (TBI.hasValidDepth() && TBI.Pred) {
608       const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
609       assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
610       assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
611              "Trace is broken, depth should have been invalidated.");
612       const MachineLoop *Loop = getLoopFor(MBB);
613       assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
614     }
615     if (TBI.hasValidHeight() && TBI.Succ) {
616       const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
617       assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
618       assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
619              "Trace is broken, height should have been invalidated.");
620       const MachineLoop *Loop = getLoopFor(MBB);
621       const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
622       assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
623              "Trace contains backedge");
624     }
625   }
626 #endif
627 }
628 
629 //===----------------------------------------------------------------------===//
630 //                             Data Dependencies
631 //===----------------------------------------------------------------------===//
632 //
633 // Compute the depth and height of each instruction based on data dependencies
634 // and instruction latencies. These cycle numbers assume that the CPU can issue
635 // an infinite number of instructions per cycle as long as their dependencies
636 // are ready.
637 
638 // A data dependency is represented as a defining MI and operand numbers on the
639 // defining and using MI.
640 namespace {
641 
642 struct DataDep {
643   const MachineInstr *DefMI;
644   unsigned DefOp;
645   unsigned UseOp;
646 
647   DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
648     : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
649 
650   /// Create a DataDep from an SSA form virtual register.
651   DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
652     : UseOp(UseOp) {
653     assert(Register::isVirtualRegister(VirtReg));
654     MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
655     assert(!DefI.atEnd() && "Register has no defs");
656     DefMI = DefI->getParent();
657     DefOp = DefI.getOperandNo();
658     assert((++DefI).atEnd() && "Register has multiple defs");
659   }
660 };
661 
662 } // end anonymous namespace
663 
664 // Get the input data dependencies that must be ready before UseMI can issue.
665 // Return true if UseMI has any physreg operands.
666 static bool getDataDeps(const MachineInstr &UseMI,
667                         SmallVectorImpl<DataDep> &Deps,
668                         const MachineRegisterInfo *MRI) {
669   // Debug values should not be included in any calculations.
670   if (UseMI.isDebugInstr())
671     return false;
672 
673   bool HasPhysRegs = false;
674   for (const MachineOperand &MO : UseMI.operands()) {
675     if (!MO.isReg())
676       continue;
677     Register Reg = MO.getReg();
678     if (!Reg)
679       continue;
680     if (Reg.isPhysical()) {
681       HasPhysRegs = true;
682       continue;
683     }
684     // Collect virtual register reads.
685     if (MO.readsReg())
686       Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
687   }
688   return HasPhysRegs;
689 }
690 
691 // Get the input data dependencies of a PHI instruction, using Pred as the
692 // preferred predecessor.
693 // This will add at most one dependency to Deps.
694 static void getPHIDeps(const MachineInstr &UseMI,
695                        SmallVectorImpl<DataDep> &Deps,
696                        const MachineBasicBlock *Pred,
697                        const MachineRegisterInfo *MRI) {
698   // No predecessor at the beginning of a trace. Ignore dependencies.
699   if (!Pred)
700     return;
701   assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
702   for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
703     if (UseMI.getOperand(i + 1).getMBB() == Pred) {
704       Register Reg = UseMI.getOperand(i).getReg();
705       Deps.push_back(DataDep(MRI, Reg, i));
706       return;
707     }
708   }
709 }
710 
711 // Identify physreg dependencies for UseMI, and update the live regunit
712 // tracking set when scanning instructions downwards.
713 static void updatePhysDepsDownwards(const MachineInstr *UseMI,
714                                     SmallVectorImpl<DataDep> &Deps,
715                                     SparseSet<LiveRegUnit> &RegUnits,
716                                     const TargetRegisterInfo *TRI) {
717   SmallVector<MCRegister, 8> Kills;
718   SmallVector<unsigned, 8> LiveDefOps;
719 
720   for (const MachineOperand &MO : UseMI->operands()) {
721     if (!MO.isReg() || !MO.getReg().isPhysical())
722       continue;
723     MCRegister Reg = MO.getReg().asMCReg();
724     // Track live defs and kills for updating RegUnits.
725     if (MO.isDef()) {
726       if (MO.isDead())
727         Kills.push_back(Reg);
728       else
729         LiveDefOps.push_back(MO.getOperandNo());
730     } else if (MO.isKill())
731       Kills.push_back(Reg);
732     // Identify dependencies.
733     if (!MO.readsReg())
734       continue;
735     for (MCRegUnit Unit : TRI->regunits(Reg)) {
736       SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
737       if (I == RegUnits.end())
738         continue;
739       Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
740       break;
741     }
742   }
743 
744   // Update RegUnits to reflect live registers after UseMI.
745   // First kills.
746   for (MCRegister Kill : Kills)
747     for (MCRegUnit Unit : TRI->regunits(Kill))
748       RegUnits.erase(Unit);
749 
750   // Second, live defs.
751   for (unsigned DefOp : LiveDefOps) {
752     for (MCRegUnit Unit :
753          TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
754       LiveRegUnit &LRU = RegUnits[Unit];
755       LRU.MI = UseMI;
756       LRU.Op = DefOp;
757     }
758   }
759 }
760 
761 /// The length of the critical path through a trace is the maximum of two path
762 /// lengths:
763 ///
764 /// 1. The maximum height+depth over all instructions in the trace center block.
765 ///
766 /// 2. The longest cross-block dependency chain. For small blocks, it is
767 ///    possible that the critical path through the trace doesn't include any
768 ///    instructions in the block.
769 ///
770 /// This function computes the second number from the live-in list of the
771 /// center block.
772 unsigned MachineTraceMetrics::Ensemble::
773 computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
774   assert(TBI.HasValidInstrDepths && "Missing depth info");
775   assert(TBI.HasValidInstrHeights && "Missing height info");
776   unsigned MaxLen = 0;
777   for (const LiveInReg &LIR : TBI.LiveIns) {
778     if (!LIR.Reg.isVirtual())
779       continue;
780     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
781     // Ignore dependencies outside the current trace.
782     const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
783     if (!DefTBI.isUsefulDominator(TBI))
784       continue;
785     unsigned Len = LIR.Height + Cycles[DefMI].Depth;
786     MaxLen = std::max(MaxLen, Len);
787   }
788   return MaxLen;
789 }
790 
791 void MachineTraceMetrics::Ensemble::
792 updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,
793             SparseSet<LiveRegUnit> &RegUnits) {
794   SmallVector<DataDep, 8> Deps;
795   // Collect all data dependencies.
796   if (UseMI.isPHI())
797     getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
798   else if (getDataDeps(UseMI, Deps, MTM.MRI))
799     updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
800 
801   // Filter and process dependencies, computing the earliest issue cycle.
802   unsigned Cycle = 0;
803   for (const DataDep &Dep : Deps) {
804     const TraceBlockInfo&DepTBI =
805       BlockInfo[Dep.DefMI->getParent()->getNumber()];
806     // Ignore dependencies from outside the current trace.
807     if (!DepTBI.isUsefulDominator(TBI))
808       continue;
809     assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
810     unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
811     // Add latency if DefMI is a real instruction. Transients get latency 0.
812     if (!Dep.DefMI->isTransient())
813       DepCycle += MTM.SchedModel
814         .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
815     Cycle = std::max(Cycle, DepCycle);
816   }
817   // Remember the instruction depth.
818   InstrCycles &MICycles = Cycles[&UseMI];
819   MICycles.Depth = Cycle;
820 
821   if (TBI.HasValidInstrHeights) {
822     // Update critical path length.
823     TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
824     LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
825   } else {
826     LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);
827   }
828 }
829 
830 void MachineTraceMetrics::Ensemble::
831 updateDepth(const MachineBasicBlock *MBB, const MachineInstr &UseMI,
832             SparseSet<LiveRegUnit> &RegUnits) {
833   updateDepth(BlockInfo[MBB->getNumber()], UseMI, RegUnits);
834 }
835 
836 void MachineTraceMetrics::Ensemble::
837 updateDepths(MachineBasicBlock::iterator Start,
838              MachineBasicBlock::iterator End,
839              SparseSet<LiveRegUnit> &RegUnits) {
840     for (; Start != End; Start++)
841       updateDepth(Start->getParent(), *Start, RegUnits);
842 }
843 
844 /// Compute instruction depths for all instructions above or in MBB in its
845 /// trace. This assumes that the trace through MBB has already been computed.
846 void MachineTraceMetrics::Ensemble::
847 computeInstrDepths(const MachineBasicBlock *MBB) {
848   // The top of the trace may already be computed, and HasValidInstrDepths
849   // implies Head->HasValidInstrDepths, so we only need to start from the first
850   // block in the trace that needs to be recomputed.
851   SmallVector<const MachineBasicBlock*, 8> Stack;
852   do {
853     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
854     assert(TBI.hasValidDepth() && "Incomplete trace");
855     if (TBI.HasValidInstrDepths)
856       break;
857     Stack.push_back(MBB);
858     MBB = TBI.Pred;
859   } while (MBB);
860 
861   // FIXME: If MBB is non-null at this point, it is the last pre-computed block
862   // in the trace. We should track any live-out physregs that were defined in
863   // the trace. This is quite rare in SSA form, typically created by CSE
864   // hoisting a compare.
865   SparseSet<LiveRegUnit> RegUnits;
866   RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
867 
868   // Go through trace blocks in top-down order, stopping after the center block.
869   while (!Stack.empty()) {
870     MBB = Stack.pop_back_val();
871     LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");
872     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
873     TBI.HasValidInstrDepths = true;
874     TBI.CriticalPath = 0;
875 
876     // Print out resource depths here as well.
877     LLVM_DEBUG({
878       dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
879       ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
880       for (unsigned K = 0; K != PRDepths.size(); ++K)
881         if (PRDepths[K]) {
882           unsigned Factor = MTM.SchedModel.getResourceFactor(K);
883           dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
884                  << MTM.SchedModel.getProcResource(K)->Name << " ("
885                  << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
886         }
887     });
888 
889     // Also compute the critical path length through MBB when possible.
890     if (TBI.HasValidInstrHeights)
891       TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
892 
893     for (const auto &UseMI : *MBB) {
894       updateDepth(TBI, UseMI, RegUnits);
895     }
896   }
897 }
898 
899 // Identify physreg dependencies for MI when scanning instructions upwards.
900 // Return the issue height of MI after considering any live regunits.
901 // Height is the issue height computed from virtual register dependencies alone.
902 static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
903                                       SparseSet<LiveRegUnit> &RegUnits,
904                                       const TargetSchedModel &SchedModel,
905                                       const TargetInstrInfo *TII,
906                                       const TargetRegisterInfo *TRI) {
907   SmallVector<unsigned, 8> ReadOps;
908 
909   for (const MachineOperand &MO : MI.operands()) {
910     if (!MO.isReg())
911       continue;
912     Register Reg = MO.getReg();
913     if (!Reg.isPhysical())
914       continue;
915     if (MO.readsReg())
916       ReadOps.push_back(MO.getOperandNo());
917     if (!MO.isDef())
918       continue;
919     // This is a def of Reg. Remove corresponding entries from RegUnits, and
920     // update MI Height to consider the physreg dependencies.
921     for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
922       SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
923       if (I == RegUnits.end())
924         continue;
925       unsigned DepHeight = I->Cycle;
926       if (!MI.isTransient()) {
927         // We may not know the UseMI of this dependency, if it came from the
928         // live-in list. SchedModel can handle a NULL UseMI.
929         DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
930                                                       I->MI, I->Op);
931       }
932       Height = std::max(Height, DepHeight);
933       // This regunit is dead above MI.
934       RegUnits.erase(I);
935     }
936   }
937 
938   // Now we know the height of MI. Update any regunits read.
939   for (unsigned Op : ReadOps) {
940     MCRegister Reg = MI.getOperand(Op).getReg().asMCReg();
941     for (MCRegUnit Unit : TRI->regunits(Reg)) {
942       LiveRegUnit &LRU = RegUnits[Unit];
943       // Set the height to the highest reader of the unit.
944       if (LRU.Cycle <= Height && LRU.MI != &MI) {
945         LRU.Cycle = Height;
946         LRU.MI = &MI;
947         LRU.Op = Op;
948       }
949     }
950   }
951 
952   return Height;
953 }
954 
955 using MIHeightMap = DenseMap<const MachineInstr *, unsigned>;
956 
957 // Push the height of DefMI upwards if required to match UseMI.
958 // Return true if this is the first time DefMI was seen.
959 static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
960                           unsigned UseHeight, MIHeightMap &Heights,
961                           const TargetSchedModel &SchedModel,
962                           const TargetInstrInfo *TII) {
963   // Adjust height by Dep.DefMI latency.
964   if (!Dep.DefMI->isTransient())
965     UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
966                                                   Dep.UseOp);
967 
968   // Update Heights[DefMI] to be the maximum height seen.
969   MIHeightMap::iterator I;
970   bool New;
971   std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
972   if (New)
973     return true;
974 
975   // DefMI has been pushed before. Give it the max height.
976   if (I->second < UseHeight)
977     I->second = UseHeight;
978   return false;
979 }
980 
981 /// Assuming that the virtual register defined by DefMI:DefOp was used by
982 /// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
983 /// when reaching the block that contains DefMI.
984 void MachineTraceMetrics::Ensemble::
985 addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
986            ArrayRef<const MachineBasicBlock*> Trace) {
987   assert(!Trace.empty() && "Trace should contain at least one block");
988   Register Reg = DefMI->getOperand(DefOp).getReg();
989   assert(Reg.isVirtual());
990   const MachineBasicBlock *DefMBB = DefMI->getParent();
991 
992   // Reg is live-in to all blocks in Trace that follow DefMBB.
993   for (const MachineBasicBlock *MBB : llvm::reverse(Trace)) {
994     if (MBB == DefMBB)
995       return;
996     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
997     // Just add the register. The height will be updated later.
998     TBI.LiveIns.push_back(Reg);
999   }
1000 }
1001 
1002 /// Compute instruction heights in the trace through MBB. This updates MBB and
1003 /// the blocks below it in the trace. It is assumed that the trace has already
1004 /// been computed.
1005 void MachineTraceMetrics::Ensemble::
1006 computeInstrHeights(const MachineBasicBlock *MBB) {
1007   // The bottom of the trace may already be computed.
1008   // Find the blocks that need updating.
1009   SmallVector<const MachineBasicBlock*, 8> Stack;
1010   do {
1011     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1012     assert(TBI.hasValidHeight() && "Incomplete trace");
1013     if (TBI.HasValidInstrHeights)
1014       break;
1015     Stack.push_back(MBB);
1016     TBI.LiveIns.clear();
1017     MBB = TBI.Succ;
1018   } while (MBB);
1019 
1020   // As we move upwards in the trace, keep track of instructions that are
1021   // required by deeper trace instructions. Map MI -> height required so far.
1022   MIHeightMap Heights;
1023 
1024   // For physregs, the def isn't known when we see the use.
1025   // Instead, keep track of the highest use of each regunit.
1026   SparseSet<LiveRegUnit> RegUnits;
1027   RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
1028 
1029   // If the bottom of the trace was already precomputed, initialize heights
1030   // from its live-in list.
1031   // MBB is the highest precomputed block in the trace.
1032   if (MBB) {
1033     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1034     for (LiveInReg &LI : TBI.LiveIns) {
1035       if (LI.Reg.isVirtual()) {
1036         // For virtual registers, the def latency is included.
1037         unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
1038         if (Height < LI.Height)
1039           Height = LI.Height;
1040       } else {
1041         // For register units, the def latency is not included because we don't
1042         // know the def yet.
1043         RegUnits[LI.Reg].Cycle = LI.Height;
1044       }
1045     }
1046   }
1047 
1048   // Go through the trace blocks in bottom-up order.
1049   SmallVector<DataDep, 8> Deps;
1050   for (;!Stack.empty(); Stack.pop_back()) {
1051     MBB = Stack.back();
1052     LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");
1053     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1054     TBI.HasValidInstrHeights = true;
1055     TBI.CriticalPath = 0;
1056 
1057     LLVM_DEBUG({
1058       dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
1059       ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
1060       for (unsigned K = 0; K != PRHeights.size(); ++K)
1061         if (PRHeights[K]) {
1062           unsigned Factor = MTM.SchedModel.getResourceFactor(K);
1063           dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
1064                  << MTM.SchedModel.getProcResource(K)->Name << " ("
1065                  << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
1066         }
1067     });
1068 
1069     // Get dependencies from PHIs in the trace successor.
1070     const MachineBasicBlock *Succ = TBI.Succ;
1071     // If MBB is the last block in the trace, and it has a back-edge to the
1072     // loop header, get loop-carried dependencies from PHIs in the header. For
1073     // that purpose, pretend that all the loop header PHIs have height 0.
1074     if (!Succ)
1075       if (const MachineLoop *Loop = getLoopFor(MBB))
1076         if (MBB->isSuccessor(Loop->getHeader()))
1077           Succ = Loop->getHeader();
1078 
1079     if (Succ) {
1080       for (const auto &PHI : *Succ) {
1081         if (!PHI.isPHI())
1082           break;
1083         Deps.clear();
1084         getPHIDeps(PHI, Deps, MBB, MTM.MRI);
1085         if (!Deps.empty()) {
1086           // Loop header PHI heights are all 0.
1087           unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
1088           LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
1089           if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
1090                             MTM.TII))
1091             addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
1092         }
1093       }
1094     }
1095 
1096     // Go through the block backwards.
1097     for (const MachineInstr &MI : reverse(*MBB)) {
1098       // Find the MI height as determined by virtual register uses in the
1099       // trace below.
1100       unsigned Cycle = 0;
1101       MIHeightMap::iterator HeightI = Heights.find(&MI);
1102       if (HeightI != Heights.end()) {
1103         Cycle = HeightI->second;
1104         // We won't be seeing any more MI uses.
1105         Heights.erase(HeightI);
1106       }
1107 
1108       // Don't process PHI deps. They depend on the specific predecessor, and
1109       // we'll get them when visiting the predecessor.
1110       Deps.clear();
1111       bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
1112 
1113       // There may also be regunit dependencies to include in the height.
1114       if (HasPhysRegs)
1115         Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
1116                                       MTM.TII, MTM.TRI);
1117 
1118       // Update the required height of any virtual registers read by MI.
1119       for (const DataDep &Dep : Deps)
1120         if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
1121           addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
1122 
1123       InstrCycles &MICycles = Cycles[&MI];
1124       MICycles.Height = Cycle;
1125       if (!TBI.HasValidInstrDepths) {
1126         LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);
1127         continue;
1128       }
1129       // Update critical path length.
1130       TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
1131       LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
1132     }
1133 
1134     // Update virtual live-in heights. They were added by addLiveIns() with a 0
1135     // height because the final height isn't known until now.
1136     LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");
1137     for (LiveInReg &LIR : TBI.LiveIns) {
1138       const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
1139       LIR.Height = Heights.lookup(DefMI);
1140       LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);
1141     }
1142 
1143     // Transfer the live regunits to the live-in list.
1144     for (const LiveRegUnit &RU : RegUnits) {
1145       TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
1146       LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
1147                         << RU.Cycle);
1148     }
1149     LLVM_DEBUG(dbgs() << '\n');
1150 
1151     if (!TBI.HasValidInstrDepths)
1152       continue;
1153     // Add live-ins to the critical path length.
1154     TBI.CriticalPath = std::max(TBI.CriticalPath,
1155                                 computeCrossBlockCriticalPath(TBI));
1156     LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
1157   }
1158 }
1159 
1160 MachineTraceMetrics::Trace
1161 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
1162   TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
1163 
1164   if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
1165     computeTrace(MBB);
1166   if (!TBI.HasValidInstrDepths)
1167     computeInstrDepths(MBB);
1168   if (!TBI.HasValidInstrHeights)
1169     computeInstrHeights(MBB);
1170 
1171   return Trace(*this, TBI);
1172 }
1173 
1174 unsigned
1175 MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
1176   assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
1177          "MI must be in the trace center block");
1178   InstrCycles Cyc = getInstrCycles(MI);
1179   return getCriticalPath() - (Cyc.Depth + Cyc.Height);
1180 }
1181 
1182 unsigned
1183 MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
1184   const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
1185   SmallVector<DataDep, 1> Deps;
1186   getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
1187   assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
1188   DataDep &Dep = Deps.front();
1189   unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
1190   // Add latency if DefMI is a real instruction. Transients get latency 0.
1191   if (!Dep.DefMI->isTransient())
1192     DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
1193                                                         &PHI, Dep.UseOp);
1194   return DepCycle;
1195 }
1196 
1197 /// When bottom is set include instructions in current block in estimate.
1198 unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
1199   // Find the limiting processor resource.
1200   // Numbers have been pre-scaled to be comparable.
1201   unsigned PRMax = 0;
1202   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1203   if (Bottom) {
1204     ArrayRef<unsigned> PRCycles = TE.MTM.getProcReleaseAtCycles(getBlockNum());
1205     for (unsigned K = 0; K != PRDepths.size(); ++K)
1206       PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
1207   } else {
1208     for (unsigned PRD : PRDepths)
1209       PRMax = std::max(PRMax, PRD);
1210   }
1211   // Convert to cycle count.
1212   PRMax = TE.MTM.getCycles(PRMax);
1213 
1214   /// All instructions before current block
1215   unsigned Instrs = TBI.InstrDepth;
1216   // plus instructions in current block
1217   if (Bottom)
1218     Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
1219   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1220     Instrs /= IW;
1221   // Assume issue width 1 without a schedule model.
1222   return std::max(Instrs, PRMax);
1223 }
1224 
1225 unsigned MachineTraceMetrics::Trace::getResourceLength(
1226     ArrayRef<const MachineBasicBlock *> Extrablocks,
1227     ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
1228     ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
1229   // Add up resources above and below the center block.
1230   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
1231   ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
1232   unsigned PRMax = 0;
1233 
1234   // Capture computing cycles from extra instructions
1235   auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
1236                             unsigned ResourceIdx)
1237                          ->unsigned {
1238     unsigned Cycles = 0;
1239     for (const MCSchedClassDesc *SC : Instrs) {
1240       if (!SC->isValid())
1241         continue;
1242       for (TargetSchedModel::ProcResIter
1243                PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
1244                PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
1245            PI != PE; ++PI) {
1246         if (PI->ProcResourceIdx != ResourceIdx)
1247           continue;
1248         Cycles += (PI->ReleaseAtCycle *
1249                    TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
1250       }
1251     }
1252     return Cycles;
1253   };
1254 
1255   for (unsigned K = 0; K != PRDepths.size(); ++K) {
1256     unsigned PRCycles = PRDepths[K] + PRHeights[K];
1257     for (const MachineBasicBlock *MBB : Extrablocks)
1258       PRCycles += TE.MTM.getProcReleaseAtCycles(MBB->getNumber())[K];
1259     PRCycles += extraCycles(ExtraInstrs, K);
1260     PRCycles -= extraCycles(RemoveInstrs, K);
1261     PRMax = std::max(PRMax, PRCycles);
1262   }
1263   // Convert to cycle count.
1264   PRMax = TE.MTM.getCycles(PRMax);
1265 
1266   // Instrs: #instructions in current trace outside current block.
1267   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
1268   // Add instruction count from the extra blocks.
1269   for (const MachineBasicBlock *MBB : Extrablocks)
1270     Instrs += TE.MTM.getResources(MBB)->InstrCount;
1271   Instrs += ExtraInstrs.size();
1272   Instrs -= RemoveInstrs.size();
1273   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
1274     Instrs /= IW;
1275   // Assume issue width 1 without a schedule model.
1276   return std::max(Instrs, PRMax);
1277 }
1278 
1279 bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
1280                                               const MachineInstr &UseMI) const {
1281   if (DefMI.getParent() == UseMI.getParent())
1282     return true;
1283 
1284   const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
1285   const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
1286 
1287   return DepTBI.isUsefulDominator(TBI);
1288 }
1289 
1290 void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
1291   OS << getName() << " ensemble:\n";
1292   for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
1293     OS << "  %bb." << i << '\t';
1294     BlockInfo[i].print(OS);
1295     OS << '\n';
1296   }
1297 }
1298 
1299 void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
1300   if (hasValidDepth()) {
1301     OS << "depth=" << InstrDepth;
1302     if (Pred)
1303       OS << " pred=" << printMBBReference(*Pred);
1304     else
1305       OS << " pred=null";
1306     OS << " head=%bb." << Head;
1307     if (HasValidInstrDepths)
1308       OS << " +instrs";
1309   } else
1310     OS << "depth invalid";
1311   OS << ", ";
1312   if (hasValidHeight()) {
1313     OS << "height=" << InstrHeight;
1314     if (Succ)
1315       OS << " succ=" << printMBBReference(*Succ);
1316     else
1317       OS << " succ=null";
1318     OS << " tail=%bb." << Tail;
1319     if (HasValidInstrHeights)
1320       OS << " +instrs";
1321   } else
1322     OS << "height invalid";
1323   if (HasValidInstrDepths && HasValidInstrHeights)
1324     OS << ", crit=" << CriticalPath;
1325 }
1326 
1327 void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
1328   unsigned MBBNum = &TBI - &TE.BlockInfo[0];
1329 
1330   OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum
1331      << " --> %bb." << TBI.Tail << ':';
1332   if (TBI.hasValidHeight() && TBI.hasValidDepth())
1333     OS << ' ' << getInstrCount() << " instrs.";
1334   if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
1335     OS << ' ' << TBI.CriticalPath << " cycles.";
1336 
1337   const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
1338   OS << "\n%bb." << MBBNum;
1339   while (Block->hasValidDepth() && Block->Pred) {
1340     unsigned Num = Block->Pred->getNumber();
1341     OS << " <- " << printMBBReference(*Block->Pred);
1342     Block = &TE.BlockInfo[Num];
1343   }
1344 
1345   Block = &TBI;
1346   OS << "\n    ";
1347   while (Block->hasValidHeight() && Block->Succ) {
1348     unsigned Num = Block->Succ->getNumber();
1349     OS << " -> " << printMBBReference(*Block->Succ);
1350     Block = &TE.BlockInfo[Num];
1351   }
1352   OS << '\n';
1353 }
1354