xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 0cac726a00a2be06a959a89efe6587dba28e5822)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace exegesis {
19 
20 static const char kCsvSep = ',';
21 
22 namespace {
23 
24 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
25 
26 template <EscapeTag Tag>
27 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
28 
29 template <>
30 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
31   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
32     OS << S;
33   } else {
34     // Needs escaping.
35     OS << '"';
36     for (const char C : S) {
37       if (C == '"')
38         OS << "\"\"";
39       else
40         OS << C;
41     }
42     OS << '"';
43   }
44 }
45 
46 template <>
47 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
48   for (const char C : S) {
49     if (C == '<')
50       OS << "&lt;";
51     else if (C == '>')
52       OS << "&gt;";
53     else if (C == '&')
54       OS << "&amp;";
55     else
56       OS << C;
57   }
58 }
59 
60 template <>
61 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
62                                      const llvm::StringRef S) {
63   for (const char C : S) {
64     if (C == '"')
65       OS << "\\\"";
66     else
67       OS << C;
68   }
69 }
70 
71 } // namespace
72 
73 template <EscapeTag Tag>
74 static void
75 writeClusterId(llvm::raw_ostream &OS,
76                const InstructionBenchmarkClustering::ClusterId &CID) {
77   if (CID.isNoise())
78     writeEscaped<Tag>(OS, "[noise]");
79   else if (CID.isError())
80     writeEscaped<Tag>(OS, "[error]");
81   else
82     OS << CID.getId();
83 }
84 
85 template <EscapeTag Tag>
86 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
87   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
88 }
89 
90 template <typename EscapeTag, EscapeTag Tag>
91 void Analysis::writeSnippet(llvm::raw_ostream &OS,
92                             llvm::ArrayRef<uint8_t> Bytes,
93                             const char *Separator) const {
94   llvm::SmallVector<std::string, 3> Lines;
95   // Parse the asm snippet and print it.
96   while (!Bytes.empty()) {
97     llvm::MCInst MI;
98     uint64_t MISize = 0;
99     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
100                                  llvm::nulls())) {
101       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
102       writeEscaped<Tag>(OS, Separator);
103       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
104       return;
105     }
106     Lines.emplace_back();
107     std::string &Line = Lines.back();
108     llvm::raw_string_ostream OSS(Line);
109     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
110     Bytes = Bytes.drop_front(MISize);
111     OSS.flush();
112     Line = llvm::StringRef(Line).trim().str();
113   }
114   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
115 }
116 
117 // Prints a row representing an instruction, along with scheduling info and
118 // point coordinates (measurements).
119 void Analysis::printInstructionRowCsv(const size_t PointId,
120                                       llvm::raw_ostream &OS) const {
121   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
122   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
123   OS << kCsvSep;
124   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
125   OS << kCsvSep;
126   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
127   OS << kCsvSep;
128   assert(!Point.Key.Instructions.empty());
129   // FIXME: Resolve variant classes.
130   const unsigned SchedClassId =
131       InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
133   const auto &SchedModel = SubtargetInfo_->getSchedModel();
134   const llvm::MCSchedClassDesc *const SCDesc =
135       SchedModel.getSchedClassDesc(SchedClassId);
136   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
137 #else
138   OS << SchedClassId;
139 #endif
140   for (const auto &Measurement : Point.Measurements) {
141     OS << kCsvSep;
142     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
143   }
144   OS << "\n";
145 }
146 
147 Analysis::Analysis(const llvm::Target &Target,
148                    const InstructionBenchmarkClustering &Clustering)
149     : Clustering_(Clustering) {
150   if (Clustering.getPoints().empty())
151     return;
152 
153   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
154   InstrInfo_.reset(Target.createMCInstrInfo());
155   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
156   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
157   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
158                                                     FirstPoint.CpuName, ""));
159   InstPrinter_.reset(Target.createMCInstPrinter(
160       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
161       *InstrInfo_, *RegInfo_));
162 
163   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
164                                                 &ObjectFileInfo_);
165   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
166   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
167                     "InitializeXXXTargetDisassembler ?");
168 }
169 
170 template <>
171 llvm::Error
172 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
173   if (Clustering_.getPoints().empty())
174     return llvm::Error::success();
175 
176   // Write the header.
177   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
178      << kCsvSep << "sched_class";
179   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
180     OS << kCsvSep;
181     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
182   }
183   OS << "\n";
184 
185   // Write the points.
186   const auto &Clusters = Clustering_.getValidClusters();
187   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
188     for (const size_t PointId : Clusters[I].PointIndices) {
189       printInstructionRowCsv(PointId, OS);
190     }
191     OS << "\n\n";
192   }
193   return llvm::Error::success();
194 }
195 
196 std::unordered_map<unsigned, std::vector<size_t>>
197 Analysis::makePointsPerSchedClass() const {
198   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
199   const auto &Points = Clustering_.getPoints();
200   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
201     const InstructionBenchmark &Point = Points[PointId];
202     if (!Point.Error.empty())
203       continue;
204     assert(!Point.Key.Instructions.empty());
205     const auto Opcode = Point.Key.Instructions[0].getOpcode();
206     // FIXME: Resolve variant classes.
207     PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
208         PointId);
209   }
210   return PointsPerSchedClass;
211 }
212 
213 // Uops repeat the same opcode over again. Just show this opcode and show the
214 // whole snippet only on hover.
215 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
216                                  const std::vector<llvm::MCInst> &Instructions,
217                                  const llvm::MCInstrInfo &InstrInfo) {
218   if (Instructions.empty())
219     return;
220   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
221   if (Instructions.size() > 1)
222     OS << " (x" << Instructions.size() << ")";
223 }
224 
225 // Latency tries to find a serial path. Just show the opcode path and show the
226 // whole snippet only on hover.
227 static void
228 writeLatencySnippetHtml(llvm::raw_ostream &OS,
229                         const std::vector<llvm::MCInst> &Instructions,
230                         const llvm::MCInstrInfo &InstrInfo) {
231   bool First = true;
232   for (const llvm::MCInst &Instr : Instructions) {
233     if (First)
234       First = false;
235     else
236       OS << " &rarr; ";
237     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
238   }
239 }
240 
241 void Analysis::printSchedClassClustersHtml(
242     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
243     llvm::raw_ostream &OS) const {
244   const auto &Points = Clustering_.getPoints();
245   OS << "<table class=\"sched-class-clusters\">";
246   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
247   assert(!Clusters.empty());
248   for (const auto &Measurement :
249        Points[Clusters[0].getPointIds()[0]].Measurements) {
250     OS << "<th>";
251     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
252     OS << "</th>";
253   }
254   OS << "</tr>";
255   for (const SchedClassCluster &Cluster : Clusters) {
256     OS << "<tr class=\""
257        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
258                ? "good-cluster"
259                : "bad-cluster")
260        << "\"><td>";
261     writeClusterId<kEscapeHtml>(OS, Cluster.id());
262     OS << "</td><td><ul>";
263     for (const size_t PointId : Cluster.getPointIds()) {
264       const auto &Point = Points[PointId];
265       OS << "<li><span class=\"mono\" title=\"";
266       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
267                                                  "\n");
268       OS << "\">";
269       switch (Point.Mode) {
270       case InstructionBenchmark::Latency:
271         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
272         break;
273       case InstructionBenchmark::Uops:
274         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
275         break;
276       default:
277         llvm_unreachable("invalid mode");
278       }
279       OS << "</span> <span class=\"mono\">";
280       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
281       OS << "</span></li>";
282     }
283     OS << "</ul></td>";
284     for (const auto &Stats : Cluster.getRepresentative()) {
285       OS << "<td class=\"measurement\">";
286       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
287       OS << "<br><span class=\"minmax\">[";
288       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
289       OS << ";";
290       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
291       OS << "]</span></td>";
292     }
293     OS << "</tr>";
294   }
295   OS << "</table>";
296 }
297 
298 // Return the non-redundant list of WriteProcRes used by the given sched class.
299 // The scheduling model for LLVM is such that each instruction has a certain
300 // number of uops which consume resources which are described by WriteProcRes
301 // entries. Each entry describe how many cycles are spent on a specific ProcRes
302 // kind.
303 // For example, an instruction might have 3 uOps, one dispatching on P0
304 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
305 // Note that LLVM additionally denormalizes resource consumption to include
306 // usage of super resources by subresources. So in practice if there exists a
307 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
308 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
309 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
310 // denote implied usage of superresources by subresources:
311 //            P0      P06    P016
312 //     uOp1    1      (1)     (1)
313 //     uOp2            1      (1)
314 //     uOp3            1      (1)
315 //     =============================
316 //             1       3       3
317 // Eventually we end up with three entries for the WriteProcRes of the
318 // instruction:
319 //    {ProcResIdx=1,  Cycles=1}  // P0
320 //    {ProcResIdx=7,  Cycles=3}  // P06
321 //    {ProcResIdx=10, Cycles=3}  // P016
322 //
323 // Note that in this case, P016 does not contribute any cycles, so it would
324 // be removed by this function.
325 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
326 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
327 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
328                             const llvm::MCSubtargetInfo &STI) {
329   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
330   const auto &SM = STI.getSchedModel();
331   const unsigned NumProcRes = SM.getNumProcResourceKinds();
332 
333   // This assumes that the ProcResDescs are sorted in topological order, which
334   // is guaranteed by the tablegen backend.
335   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
336   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
337                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
338        WPR != WPREnd; ++WPR) {
339     const llvm::MCProcResourceDesc *const ProcResDesc =
340         SM.getProcResource(WPR->ProcResourceIdx);
341     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
342       // This is a ProcResUnit.
343       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
344       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
345     } else {
346       // This is a ProcResGroup. First see if it contributes any cycles or if
347       // it has cycles just from subunits.
348       float RemainingCycles = WPR->Cycles;
349       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
350            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
351            ++SubResIdx) {
352         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
353       }
354       if (RemainingCycles < 0.01f) {
355         // The ProcResGroup contributes no cycles of its own.
356         continue;
357       }
358       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
359       Result.push_back({WPR->ProcResourceIdx,
360                         static_cast<uint16_t>(std::round(RemainingCycles))});
361       // Spread the remaining cycles over all subunits.
362       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
363            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
364            ++SubResIdx) {
365         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
366       }
367     }
368   }
369   return Result;
370 }
371 
372 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
373                                  const llvm::MCSubtargetInfo &STI)
374     : SCDesc(&SD),
375       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
376       IdealizedProcResPressure(computeIdealizedProcResPressure(
377           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
378 
379 void Analysis::SchedClassCluster::addPoint(
380     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
381   PointIds.push_back(PointId);
382   const auto &Point = Clustering.getPoints()[PointId];
383   if (ClusterId.isUndef()) {
384     ClusterId = Clustering.getClusterIdForPoint(PointId);
385     Representative.resize(Point.Measurements.size());
386   }
387   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
388     Representative[I].push(Point.Measurements[I]);
389   }
390   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
391 }
392 
393 bool Analysis::SchedClassCluster::measurementsMatch(
394     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
395     const InstructionBenchmarkClustering &Clustering) const {
396   const size_t NumMeasurements = Representative.size();
397   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
398   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
399   // Latency case.
400   assert(!Clustering.getPoints().empty());
401   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
402   if (Mode == InstructionBenchmark::Latency) {
403     if (NumMeasurements != 1) {
404       llvm::errs()
405           << "invalid number of measurements in latency mode: expected 1, got "
406           << NumMeasurements << "\n";
407       return false;
408     }
409     // Find the latency.
410     SchedClassPoint[0].PerInstructionValue = 0.0;
411     for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
412       const llvm::MCWriteLatencyEntry *const WLE =
413           STI.getWriteLatencyEntry(SC.SCDesc, I);
414       SchedClassPoint[0].PerInstructionValue =
415           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
416     }
417     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
418   } else if (Mode == InstructionBenchmark::Uops) {
419     for (int I = 0, E = Representative.size(); I < E; ++I) {
420       // Find the pressure on ProcResIdx `Key`.
421       uint16_t ProcResIdx = 0;
422       if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
423         llvm::errs() << "expected ProcResIdx key, got "
424                      << Representative[I].key() << "\n";
425         return false;
426       }
427       const auto ProcResPressureIt =
428           std::find_if(SC.IdealizedProcResPressure.begin(),
429                        SC.IdealizedProcResPressure.end(),
430                        [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
431                          return WPR.first == ProcResIdx;
432                        });
433       SchedClassPoint[I].PerInstructionValue =
434           ProcResPressureIt == SC.IdealizedProcResPressure.end()
435               ? 0.0
436               : ProcResPressureIt->second;
437       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
438     }
439   } else {
440     llvm::errs() << "unimplemented measurement matching for mode " << Mode
441                  << "\n";
442     return false;
443   }
444   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
445 }
446 
447 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
448                                        llvm::raw_ostream &OS) const {
449   OS << "<table class=\"sched-class-desc\">";
450   OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
451         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
452         "resource (port) pressure assuming ideal distribution\">Idealized "
453         "Resource Pressure</th></tr>";
454   if (SC.SCDesc->isValid()) {
455     const auto &SM = SubtargetInfo_->getSchedModel();
456     OS << "<tr><td>&#10004;</td>";
457     OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
458        << "</td>";
459     OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
460     // Latencies.
461     OS << "<td><ul>";
462     for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
463       const auto *const Entry =
464           SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
465       OS << "<li>" << Entry->Cycles;
466       if (SC.SCDesc->NumWriteLatencyEntries > 1) {
467         // Dismabiguate if more than 1 latency.
468         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
469       }
470       OS << "</li>";
471     }
472     OS << "</ul></td>";
473     // WriteProcRes.
474     OS << "<td><ul>";
475     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
476       OS << "<li><span class=\"mono\">";
477       writeEscaped<kEscapeHtml>(OS,
478                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
479       OS << "</span>: " << WPR.Cycles << "</li>";
480     }
481     OS << "</ul></td>";
482     // Idealized port pressure.
483     OS << "<td><ul>";
484     for (const auto &Pressure : SC.IdealizedProcResPressure) {
485       OS << "<li><span class=\"mono\">";
486       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
487                                         .getProcResource(Pressure.first)
488                                         ->Name);
489       OS << "</span>: ";
490       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
491       OS << "</li>";
492     }
493     OS << "</ul></td>";
494     OS << "</tr>";
495   } else {
496     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
497   }
498   OS << "</table>";
499 }
500 
501 static constexpr const char kHtmlHead[] = R"(
502 <head>
503 <title>llvm-exegesis Analysis Results</title>
504 <style>
505 body {
506   font-family: sans-serif
507 }
508 span.sched-class-name {
509   font-weight: bold;
510   font-family: monospace;
511 }
512 span.opcode {
513   font-family: monospace;
514 }
515 span.config {
516   font-family: monospace;
517 }
518 div.inconsistency {
519   margin-top: 50px;
520 }
521 table {
522   margin-left: 50px;
523   border-collapse: collapse;
524 }
525 table, table tr,td,th {
526   border: 1px solid #444;
527 }
528 table ul {
529   padding-left: 0px;
530   margin: 0px;
531   list-style-type: none;
532 }
533 table.sched-class-clusters td {
534   padding-left: 10px;
535   padding-right: 10px;
536   padding-top: 10px;
537   padding-bottom: 10px;
538 }
539 table.sched-class-desc td {
540   padding-left: 10px;
541   padding-right: 10px;
542   padding-top: 2px;
543   padding-bottom: 2px;
544 }
545 span.mono {
546   font-family: monospace;
547 }
548 td.measurement {
549   text-align: center;
550 }
551 tr.good-cluster td.measurement {
552   color: #292
553 }
554 tr.bad-cluster td.measurement {
555   color: #922
556 }
557 tr.good-cluster td.measurement span.minmax {
558   color: #888;
559 }
560 tr.bad-cluster td.measurement span.minmax {
561   color: #888;
562 }
563 </style>
564 </head>
565 )";
566 
567 template <>
568 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
569     llvm::raw_ostream &OS) const {
570   const auto &FirstPoint = Clustering_.getPoints()[0];
571   // Print the header.
572   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
573   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
574   OS << "<h3>Triple: <span class=\"mono\">";
575   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
576   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
577   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
578   OS << "</span></h3>";
579 
580   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
581     const auto SchedClassId = SchedClassAndPoints.first;
582     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
583     const auto &SchedModel = SubtargetInfo_->getSchedModel();
584     const llvm::MCSchedClassDesc *const SCDesc =
585         SchedModel.getSchedClassDesc(SchedClassId);
586     if (!SCDesc)
587       continue;
588     const SchedClass SC(*SCDesc, *SubtargetInfo_);
589 
590     // Bucket sched class points into sched class clusters.
591     std::vector<SchedClassCluster> SchedClassClusters;
592     for (const size_t PointId : SchedClassPoints) {
593       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
594       if (!ClusterId.isValid())
595         continue; // Ignore noise and errors. FIXME: take noise into account ?
596       auto SchedClassClusterIt =
597           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
598                        [ClusterId](const SchedClassCluster &C) {
599                          return C.id() == ClusterId;
600                        });
601       if (SchedClassClusterIt == SchedClassClusters.end()) {
602         SchedClassClusters.emplace_back();
603         SchedClassClusterIt = std::prev(SchedClassClusters.end());
604       }
605       SchedClassClusterIt->addPoint(PointId, Clustering_);
606     }
607 
608     // Print any scheduling class that has at least one cluster that does not
609     // match the checked-in data.
610     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
611                     [this, &SC](const SchedClassCluster &C) {
612                       return C.measurementsMatch(*SubtargetInfo_, SC,
613                                                  Clustering_);
614                     }))
615       continue; // Nothing weird.
616 
617     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
618           "class=\"sched-class-name\">";
619 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
620     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
621 #else
622     OS << SchedClassId;
623 #endif
624     OS << "</span> contains instructions whose performance characteristics do"
625           " not match that of LLVM:</p>";
626     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
627     OS << "<p>llvm SchedModel data:</p>";
628     printSchedClassDescHtml(SC, OS);
629     OS << "</div>";
630   }
631 
632   OS << "</body></html>";
633   return llvm::Error::success();
634 }
635 
636 // Distributes a pressure budget as evenly as possible on the provided subunits
637 // given the already existing port pressure distribution.
638 //
639 // The algorithm is as follows: while there is remaining pressure to
640 // distribute, find the subunits with minimal pressure, and distribute
641 // remaining pressure equally up to the pressure of the unit with
642 // second-to-minimal pressure.
643 // For example, let's assume we want to distribute 2*P1256
644 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
645 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
646 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
647 //     RemainingPressure = 2.0
648 // We sort the subunits by pressure:
649 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
650 // We'll first start by the subunits with minimal pressure, which are at
651 // the beginning of the sorted array. In this example there is one (P2).
652 // The subunit with second-to-minimal pressure is the next one in the
653 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
654 // from the budget.
655 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
656 //     RemainingPressure = 1.9
657 // We repeat this process: distribute 0.2 pressure on each of the minimal
658 // P2 and P1, decrease budget by 2*0.2:
659 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
660 //     RemainingPressure = 1.5
661 // There are no second-to-minimal subunits so we just share the remaining
662 // budget (1.5 cycles) equally:
663 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
664 //     RemainingPressure = 0.0
665 // We stop as there is no remaining budget to distribute.
666 void distributePressure(float RemainingPressure,
667                         llvm::SmallVector<uint16_t, 32> Subunits,
668                         llvm::SmallVector<float, 32> &DensePressure) {
669   // Find the number of subunits with minimal pressure (they are at the
670   // front).
671   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
672     return DensePressure[A] < DensePressure[B];
673   });
674   const auto getPressureForSubunit = [&DensePressure,
675                                       &Subunits](size_t I) -> float & {
676     return DensePressure[Subunits[I]];
677   };
678   size_t NumMinimalSU = 1;
679   while (NumMinimalSU < Subunits.size() &&
680          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
681     ++NumMinimalSU;
682   }
683   while (RemainingPressure > 0.0f) {
684     if (NumMinimalSU == Subunits.size()) {
685       // All units are minimal, just distribute evenly and be done.
686       for (size_t I = 0; I < NumMinimalSU; ++I) {
687         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
688       }
689       return;
690     }
691     // Distribute the remaining pressure equally.
692     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
693     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
694     assert(MinimalPressure < SecondToMinimalPressure);
695     const float Increment = SecondToMinimalPressure - MinimalPressure;
696     if (RemainingPressure <= NumMinimalSU * Increment) {
697       // There is not enough remaining pressure.
698       for (size_t I = 0; I < NumMinimalSU; ++I) {
699         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
700       }
701       return;
702     }
703     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
704     for (size_t I = 0; I < NumMinimalSU; ++I) {
705       getPressureForSubunit(I) = SecondToMinimalPressure;
706       RemainingPressure -= SecondToMinimalPressure;
707     }
708     while (NumMinimalSU < Subunits.size() &&
709            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
710       ++NumMinimalSU;
711     }
712   }
713 }
714 
715 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
716     const llvm::MCSchedModel &SM,
717     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
718   // DensePressure[I] is the port pressure for Proc Resource I.
719   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
720   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
721                       const llvm::MCWriteProcResEntry &B) {
722     return A.ProcResourceIdx < B.ProcResourceIdx;
723   });
724   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
725     // Get units for the entry.
726     const llvm::MCProcResourceDesc *const ProcResDesc =
727         SM.getProcResource(WPR.ProcResourceIdx);
728     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
729       // This is a ProcResUnit.
730       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
731     } else {
732       // This is a ProcResGroup.
733       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
734                                                ProcResDesc->SubUnitsIdxBegin +
735                                                    ProcResDesc->NumUnits);
736       distributePressure(WPR.Cycles, Subunits, DensePressure);
737     }
738   }
739   // Turn dense pressure into sparse pressure by removing zero entries.
740   std::vector<std::pair<uint16_t, float>> Pressure;
741   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
742     if (DensePressure[I] > 0.0f)
743       Pressure.emplace_back(I, DensePressure[I]);
744   }
745   return Pressure;
746 }
747 
748 } // namespace exegesis
749