xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 49fad1cbf2bba88a5c0d596e3aa46d142f9d73ad)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <unordered_set>
15 #include <vector>
16 
17 namespace exegesis {
18 
19 static const char kCsvSep = ',';
20 
21 namespace {
22 
23 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
24 
25 template <EscapeTag Tag>
26 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
27 
28 template <>
29 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
30   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
31     OS << S;
32   } else {
33     // Needs escaping.
34     OS << '"';
35     for (const char C : S) {
36       if (C == '"')
37         OS << "\"\"";
38       else
39         OS << C;
40     }
41     OS << '"';
42   }
43 }
44 
45 template <>
46 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
47   for (const char C : S) {
48     if (C == '<')
49       OS << "&lt;";
50     else if (C == '>')
51       OS << "&gt;";
52     else if (C == '&')
53       OS << "&amp;";
54     else
55       OS << C;
56   }
57 }
58 
59 template <>
60 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS, const llvm::StringRef S) {
61   for (const char C : S) {
62     if (C == '"')
63       OS << "\\\"";
64     else
65       OS << C;
66   }
67 }
68 
69 } // namespace
70 
71 template <EscapeTag Tag>
72 static void
73 writeClusterId(llvm::raw_ostream &OS,
74                const InstructionBenchmarkClustering::ClusterId &CID) {
75   if (CID.isNoise())
76     writeEscaped<Tag>(OS, "[noise]");
77   else if (CID.isError())
78     writeEscaped<Tag>(OS, "[error]");
79   else
80     OS << CID.getId();
81 }
82 
83 template <EscapeTag Tag>
84 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
85   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
86 }
87 
88 template <EscapeTag Tag>
89 static void writeSnippet(llvm::raw_ostream &OS,
90                          const std::vector<llvm::MCInst> &Instructions,
91                          const llvm::MCInstrInfo &InstrInfo,
92                          const char* Separator) {
93   // FIXME: Print operands.
94   llvm::SmallVector<llvm::StringRef, 3> Opcodes;
95   for (const llvm::MCInst &Instr : Instructions) {
96     Opcodes.push_back(InstrInfo.getName(Instr.getOpcode()));
97   }
98   writeEscaped<Tag>(OS, llvm::join(Opcodes, Separator));
99 }
100 
101 // Prints a row representing an instruction, along with scheduling info and
102 // point coordinates (measurements).
103 void Analysis::printInstructionRowCsv(const size_t PointId,
104                                       llvm::raw_ostream &OS) const {
105   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
106   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
107   OS << kCsvSep;
108   writeSnippet<kEscapeCsv>(OS, Point.Key.Instructions, *InstrInfo_, "; ");
109   OS << kCsvSep;
110   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
111   OS << kCsvSep;
112   assert(!Point.Key.Instructions.empty());
113   // FIXME: Resolve variant classes.
114   const unsigned SchedClassId =
115       InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
116 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
117   const auto &SchedModel = SubtargetInfo_->getSchedModel();
118   const llvm::MCSchedClassDesc *const SCDesc =
119       SchedModel.getSchedClassDesc(SchedClassId);
120   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
121 #else
122   OS << SchedClassId;
123 #endif
124   for (const auto &Measurement : Point.Measurements) {
125     OS << kCsvSep;
126     writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
127   }
128   OS << "\n";
129 }
130 
131 Analysis::Analysis(const llvm::Target &Target,
132                    const InstructionBenchmarkClustering &Clustering)
133     : Clustering_(Clustering) {
134   if (Clustering.getPoints().empty())
135     return;
136 
137   InstrInfo_.reset(Target.createMCInstrInfo());
138   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
139   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
140                                                     FirstPoint.CpuName, ""));
141 }
142 
143 template <>
144 llvm::Error
145 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
146   if (Clustering_.getPoints().empty())
147     return llvm::Error::success();
148 
149   // Write the header.
150   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
151      << kCsvSep << "sched_class";
152   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
153     OS << kCsvSep;
154     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
155   }
156   OS << "\n";
157 
158   // Write the points.
159   const auto &Clusters = Clustering_.getValidClusters();
160   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
161     for (const size_t PointId : Clusters[I].PointIndices) {
162       printInstructionRowCsv(PointId, OS);
163     }
164     OS << "\n\n";
165   }
166   return llvm::Error::success();
167 }
168 
169 std::unordered_map<unsigned, std::vector<size_t>>
170 Analysis::makePointsPerSchedClass() const {
171   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
172   const auto &Points = Clustering_.getPoints();
173   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
174     const InstructionBenchmark &Point = Points[PointId];
175     if (!Point.Error.empty())
176       continue;
177     assert(!Point.Key.Instructions.empty());
178     const auto Opcode = Point.Key.Instructions[0].getOpcode();
179     // FIXME: Resolve variant classes.
180     PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
181         PointId);
182   }
183   return PointsPerSchedClass;
184 }
185 
186 // Uops repeat the same opcode over again. Just show this opcode and show the
187 // whole snippet only on hover.
188 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
189                                  const std::vector<llvm::MCInst> &Instructions,
190                                  const llvm::MCInstrInfo &InstrInfo) {
191   if (Instructions.empty())
192     return;
193   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
194   if (Instructions.size() > 1)
195     OS << " (x" << Instructions.size() << ")";
196 }
197 
198 // Latency tries to find a serial path. Just show the opcode path and show the
199 // whole snippet only on hover.
200 static void writeLatencySnippetHtml(llvm::raw_ostream &OS,
201                                  const std::vector<llvm::MCInst> &Instructions,
202                                  const llvm::MCInstrInfo &InstrInfo) {
203   bool First = true;
204   for (const llvm::MCInst &Instr : Instructions) {
205     if (First)
206       First = false;
207     else
208       OS << " &rarr; ";
209     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
210   }
211 }
212 
213 void Analysis::printSchedClassClustersHtml(
214     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
215     llvm::raw_ostream &OS) const {
216   const auto &Points = Clustering_.getPoints();
217   OS << "<table class=\"sched-class-clusters\">";
218   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
219   assert(!Clusters.empty());
220   for (const auto &Measurement :
221        Points[Clusters[0].getPointIds()[0]].Measurements) {
222     OS << "<th>";
223     if (Measurement.DebugString.empty())
224       writeEscaped<kEscapeHtml>(OS, Measurement.Key);
225     else
226       writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
227     OS << "</th>";
228   }
229   OS << "</tr>";
230   for (const SchedClassCluster &Cluster : Clusters) {
231     OS << "<tr class=\""
232        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
233                ? "good-cluster"
234                : "bad-cluster")
235        << "\"><td>";
236     writeClusterId<kEscapeHtml>(OS, Cluster.id());
237     OS << "</td><td><ul>";
238     for (const size_t PointId : Cluster.getPointIds()) {
239       const auto &Point = Points[PointId];
240       OS << "<li><span class=\"mono\" title=\"";
241       writeSnippet<kEscapeHtmlString>(OS, Point.Key.Instructions, *InstrInfo_, "\n");
242       OS << "\">";
243       switch (Point.Mode) {
244         case InstructionBenchmark::Latency:
245           writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
246           break;
247         case InstructionBenchmark::Uops:
248           writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
249           break;
250         default:
251           llvm_unreachable("invalid mode");
252       }
253       OS << "</span> <span class=\"mono\">";
254       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
255       OS << "</span></li>";
256     }
257     OS << "</ul></td>";
258     for (const auto &Stats : Cluster.getRepresentative()) {
259       OS << "<td class=\"measurement\">";
260       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
261       OS << "<br><span class=\"minmax\">[";
262       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
263       OS << ";";
264       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
265       OS << "]</span></td>";
266     }
267     OS << "</tr>";
268   }
269   OS << "</table>";
270 }
271 
272 // Return the non-redundant list of WriteProcRes used by the given sched class.
273 // The scheduling model for LLVM is such that each instruction has a certain
274 // number of uops which consume resources which are described by WriteProcRes
275 // entries. Each entry describe how many cycles are spent on a specific ProcRes
276 // kind.
277 // For example, an instruction might have 3 uOps, one dispatching on P0
278 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
279 // Note that LLVM additionally denormalizes resource consumption to include
280 // usage of super resources by subresources. So in practice if there exists a
281 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
282 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
283 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
284 // denote implied usage of superresources by subresources:
285 //            P0      P06    P016
286 //     uOp1    1      (1)     (1)
287 //     uOp2            1      (1)
288 //     uOp3            1      (1)
289 //     =============================
290 //             1       3       3
291 // Eventually we end up with three entries for the WriteProcRes of the
292 // instruction:
293 //    {ProcResIdx=1,  Cycles=1}  // P0
294 //    {ProcResIdx=7,  Cycles=3}  // P06
295 //    {ProcResIdx=10, Cycles=3}  // P016
296 //
297 // Note that in this case, P016 does not contribute any cycles, so it would
298 // be removed by this function.
299 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
300 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
301 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
302                             const llvm::MCSubtargetInfo &STI) {
303   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
304   const auto &SM = STI.getSchedModel();
305   const unsigned NumProcRes = SM.getNumProcResourceKinds();
306 
307   // This assumes that the ProcResDescs are sorted in topological order, which
308   // is guaranteed by the tablegen backend.
309   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
310   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
311                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
312        WPR != WPREnd; ++WPR) {
313     const llvm::MCProcResourceDesc *const ProcResDesc =
314         SM.getProcResource(WPR->ProcResourceIdx);
315     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
316       // This is a ProcResUnit.
317       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
318       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
319     } else {
320       // This is a ProcResGroup. First see if it contributes any cycles or if
321       // it has cycles just from subunits.
322       float RemainingCycles = WPR->Cycles;
323       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
324            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
325            ++SubResIdx) {
326         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
327       }
328       if (RemainingCycles < 0.01f) {
329         // The ProcResGroup contributes no cycles of its own.
330         continue;
331       }
332       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
333       Result.push_back({WPR->ProcResourceIdx,
334                         static_cast<uint16_t>(std::round(RemainingCycles))});
335       // Spread the remaining cycles over all subunits.
336       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
337            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
338            ++SubResIdx) {
339         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
340       }
341     }
342   }
343   return Result;
344 }
345 
346 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
347                                  const llvm::MCSubtargetInfo &STI)
348     : SCDesc(SD),
349       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
350       IdealizedProcResPressure(computeIdealizedProcResPressure(
351           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
352 
353 void Analysis::SchedClassCluster::addPoint(
354     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
355   PointIds.push_back(PointId);
356   const auto &Point = Clustering.getPoints()[PointId];
357   if (ClusterId.isUndef()) {
358     ClusterId = Clustering.getClusterIdForPoint(PointId);
359     Representative.resize(Point.Measurements.size());
360   }
361   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
362     Representative[I].push(Point.Measurements[I]);
363   }
364   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
365 }
366 
367 bool Analysis::SchedClassCluster::measurementsMatch(
368     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
369     const InstructionBenchmarkClustering &Clustering) const {
370   const size_t NumMeasurements = Representative.size();
371   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
372   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
373   // Latency case.
374   assert(!Clustering.getPoints().empty());
375   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
376   if (Mode == InstructionBenchmark::Latency) {
377     if (NumMeasurements != 1) {
378       llvm::errs()
379           << "invalid number of measurements in latency mode: expected 1, got "
380           << NumMeasurements << "\n";
381       return false;
382     }
383     // Find the latency.
384     SchedClassPoint[0].Value = 0.0;
385     for (unsigned I = 0; I < SC.SCDesc.NumWriteLatencyEntries; ++I) {
386       const llvm::MCWriteLatencyEntry *const WLE =
387           STI.getWriteLatencyEntry(&SC.SCDesc, I);
388       SchedClassPoint[0].Value =
389           std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
390     }
391     ClusterCenterPoint[0].Value = Representative[0].avg();
392   } else if (Mode == InstructionBenchmark::Uops) {
393     for (int I = 0, E = Representative.size(); I < E; ++I) {
394       // Find the pressure on ProcResIdx `Key`.
395       uint16_t ProcResIdx = 0;
396       if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
397         llvm::errs() << "expected ProcResIdx key, got "
398                      << Representative[I].key() << "\n";
399         return false;
400       }
401       const auto ProcResPressureIt =
402           std::find_if(SC.IdealizedProcResPressure.begin(),
403                        SC.IdealizedProcResPressure.end(),
404                        [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
405                          return WPR.first == ProcResIdx;
406                        });
407       SchedClassPoint[I].Value =
408           ProcResPressureIt == SC.IdealizedProcResPressure.end()
409               ? 0.0
410               : ProcResPressureIt->second;
411       ClusterCenterPoint[I].Value = Representative[I].avg();
412     }
413   } else {
414     llvm::errs() << "unimplemented measurement matching for mode " << Mode
415                  << "\n";
416     return false;
417   }
418   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
419 }
420 
421 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
422                                        llvm::raw_ostream &OS) const {
423   OS << "<table class=\"sched-class-desc\">";
424   OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
425         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
426         "resource (port) pressure assuming ideal distribution\">Idealized "
427         "Resource Pressure</th></tr>";
428   if (SC.SCDesc.isValid()) {
429     const auto &SM = SubtargetInfo_->getSchedModel();
430     OS << "<tr><td>&#10004;</td>";
431     OS << "<td>" << (SC.SCDesc.isVariant() ? "&#10004;" : "&#10005;")
432        << "</td>";
433     OS << "<td>" << SC.SCDesc.NumMicroOps << "</td>";
434     // Latencies.
435     OS << "<td><ul>";
436     for (int I = 0, E = SC.SCDesc.NumWriteLatencyEntries; I < E; ++I) {
437       const auto *const Entry =
438           SubtargetInfo_->getWriteLatencyEntry(&SC.SCDesc, I);
439       OS << "<li>" << Entry->Cycles;
440       if (SC.SCDesc.NumWriteLatencyEntries > 1) {
441         // Dismabiguate if more than 1 latency.
442         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
443       }
444       OS << "</li>";
445     }
446     OS << "</ul></td>";
447     // WriteProcRes.
448     OS << "<td><ul>";
449     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
450       OS << "<li><span class=\"mono\">";
451       writeEscaped<kEscapeHtml>(OS,
452                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
453       OS << "</span>: " << WPR.Cycles << "</li>";
454     }
455     OS << "</ul></td>";
456     // Idealized port pressure.
457     OS << "<td><ul>";
458     for (const auto &Pressure : SC.IdealizedProcResPressure) {
459       OS << "<li><span class=\"mono\">";
460       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
461                                         .getProcResource(Pressure.first)
462                                         ->Name);
463       OS << "</span>: ";
464       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
465       OS << "</li>";
466     }
467     OS << "</ul></td>";
468     OS << "</tr>";
469   } else {
470     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
471   }
472   OS << "</table>";
473 }
474 
475 static constexpr const char kHtmlHead[] = R"(
476 <head>
477 <title>llvm-exegesis Analysis Results</title>
478 <style>
479 body {
480   font-family: sans-serif
481 }
482 span.sched-class-name {
483   font-weight: bold;
484   font-family: monospace;
485 }
486 span.opcode {
487   font-family: monospace;
488 }
489 span.config {
490   font-family: monospace;
491 }
492 div.inconsistency {
493   margin-top: 50px;
494 }
495 table {
496   margin-left: 50px;
497   border-collapse: collapse;
498 }
499 table, table tr,td,th {
500   border: 1px solid #444;
501 }
502 table ul {
503   padding-left: 0px;
504   margin: 0px;
505   list-style-type: none;
506 }
507 table.sched-class-clusters td {
508   padding-left: 10px;
509   padding-right: 10px;
510   padding-top: 10px;
511   padding-bottom: 10px;
512 }
513 table.sched-class-desc td {
514   padding-left: 10px;
515   padding-right: 10px;
516   padding-top: 2px;
517   padding-bottom: 2px;
518 }
519 span.mono {
520   font-family: monospace;
521 }
522 td.measurement {
523   text-align: center;
524 }
525 tr.good-cluster td.measurement {
526   color: #292
527 }
528 tr.bad-cluster td.measurement {
529   color: #922
530 }
531 tr.good-cluster td.measurement span.minmax {
532   color: #888;
533 }
534 tr.bad-cluster td.measurement span.minmax {
535   color: #888;
536 }
537 </style>
538 </head>
539 )";
540 
541 template <>
542 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
543     llvm::raw_ostream &OS) const {
544   const auto &FirstPoint = Clustering_.getPoints()[0];
545   // Print the header.
546   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
547   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
548   OS << "<h3>Triple: <span class=\"mono\">";
549   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
550   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
551   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
552   OS << "</span></h3>";
553 
554   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
555     const auto SchedClassId = SchedClassAndPoints.first;
556     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
557     const auto &SchedModel = SubtargetInfo_->getSchedModel();
558     const llvm::MCSchedClassDesc *const SCDesc =
559         SchedModel.getSchedClassDesc(SchedClassId);
560     if (!SCDesc)
561       continue;
562     const SchedClass SC(*SCDesc, *SubtargetInfo_);
563 
564     // Bucket sched class points into sched class clusters.
565     std::vector<SchedClassCluster> SchedClassClusters;
566     for (const size_t PointId : SchedClassPoints) {
567       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
568       if (!ClusterId.isValid())
569         continue; // Ignore noise and errors. FIXME: take noise into account ?
570       auto SchedClassClusterIt =
571           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
572                        [ClusterId](const SchedClassCluster &C) {
573                          return C.id() == ClusterId;
574                        });
575       if (SchedClassClusterIt == SchedClassClusters.end()) {
576         SchedClassClusters.emplace_back();
577         SchedClassClusterIt = std::prev(SchedClassClusters.end());
578       }
579       SchedClassClusterIt->addPoint(PointId, Clustering_);
580     }
581 
582     // Print any scheduling class that has at least one cluster that does not
583     // match the checked-in data.
584     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
585                     [this, &SC](const SchedClassCluster &C) {
586                       return C.measurementsMatch(*SubtargetInfo_, SC,
587                                                  Clustering_);
588                     }))
589       continue; // Nothing weird.
590 
591     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
592           "class=\"sched-class-name\">";
593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
594     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
595 #else
596     OS << SchedClassId;
597 #endif
598     OS << "</span> contains instructions whose performance characteristics do"
599           " not match that of LLVM:</p>";
600     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
601     OS << "<p>llvm SchedModel data:</p>";
602     printSchedClassDescHtml(SC, OS);
603     OS << "</div>";
604   }
605 
606   OS << "</body></html>";
607   return llvm::Error::success();
608 }
609 
610 // Distributes a pressure budget as evenly as possible on the provided subunits
611 // given the already existing port pressure distribution.
612 //
613 // The algorithm is as follows: while there is remaining pressure to
614 // distribute, find the subunits with minimal pressure, and distribute
615 // remaining pressure equally up to the pressure of the unit with
616 // second-to-minimal pressure.
617 // For example, let's assume we want to distribute 2*P1256
618 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
619 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
620 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
621 //     RemainingPressure = 2.0
622 // We sort the subunits by pressure:
623 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
624 // We'll first start by the subunits with minimal pressure, which are at
625 // the beginning of the sorted array. In this example there is one (P2).
626 // The subunit with second-to-minimal pressure is the next one in the
627 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
628 // from the budget.
629 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
630 //     RemainingPressure = 1.9
631 // We repeat this process: distribute 0.2 pressure on each of the minimal
632 // P2 and P1, decrease budget by 2*0.2:
633 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
634 //     RemainingPressure = 1.5
635 // There are no second-to-minimal subunits so we just share the remaining
636 // budget (1.5 cycles) equally:
637 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
638 //     RemainingPressure = 0.0
639 // We stop as there is no remaining budget to distribute.
640 void distributePressure(float RemainingPressure,
641                         llvm::SmallVector<uint16_t, 32> Subunits,
642                         llvm::SmallVector<float, 32> &DensePressure) {
643   // Find the number of subunits with minimal pressure (they are at the
644   // front).
645   llvm::sort(Subunits.begin(), Subunits.end(),
646              [&DensePressure](const uint16_t A, const uint16_t B) {
647                return DensePressure[A] < DensePressure[B];
648              });
649   const auto getPressureForSubunit = [&DensePressure,
650                                       &Subunits](size_t I) -> float & {
651     return DensePressure[Subunits[I]];
652   };
653   size_t NumMinimalSU = 1;
654   while (NumMinimalSU < Subunits.size() &&
655          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
656     ++NumMinimalSU;
657   }
658   while (RemainingPressure > 0.0f) {
659     if (NumMinimalSU == Subunits.size()) {
660       // All units are minimal, just distribute evenly and be done.
661       for (size_t I = 0; I < NumMinimalSU; ++I) {
662         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
663       }
664       return;
665     }
666     // Distribute the remaining pressure equally.
667     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
668     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
669     assert(MinimalPressure < SecondToMinimalPressure);
670     const float Increment = SecondToMinimalPressure - MinimalPressure;
671     if (RemainingPressure <= NumMinimalSU * Increment) {
672       // There is not enough remaining pressure.
673       for (size_t I = 0; I < NumMinimalSU; ++I) {
674         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
675       }
676       return;
677     }
678     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
679     for (size_t I = 0; I < NumMinimalSU; ++I) {
680       getPressureForSubunit(I) = SecondToMinimalPressure;
681       RemainingPressure -= SecondToMinimalPressure;
682     }
683     while (NumMinimalSU < Subunits.size() &&
684            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
685       ++NumMinimalSU;
686     }
687   }
688 }
689 
690 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
691     const llvm::MCSchedModel &SM,
692     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
693   // DensePressure[I] is the port pressure for Proc Resource I.
694   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
695   llvm::sort(WPRS.begin(), WPRS.end(),
696              [](const llvm::MCWriteProcResEntry &A,
697                 const llvm::MCWriteProcResEntry &B) {
698                return A.ProcResourceIdx < B.ProcResourceIdx;
699              });
700   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
701     // Get units for the entry.
702     const llvm::MCProcResourceDesc *const ProcResDesc =
703         SM.getProcResource(WPR.ProcResourceIdx);
704     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
705       // This is a ProcResUnit.
706       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
707     } else {
708       // This is a ProcResGroup.
709       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
710                                                ProcResDesc->SubUnitsIdxBegin +
711                                                    ProcResDesc->NumUnits);
712       distributePressure(WPR.Cycles, Subunits, DensePressure);
713     }
714   }
715   // Turn dense pressure into sparse pressure by removing zero entries.
716   std::vector<std::pair<uint16_t, float>> Pressure;
717   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
718     if (DensePressure[I] > 0.0f)
719       Pressure.emplace_back(I, DensePressure[I]);
720   }
721   return Pressure;
722 }
723 
724 } // namespace exegesis
725