xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision c630f95f33e31fe11ec6242560d9bf5d57007673)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Analysis.h"
10 #include "BenchmarkResult.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/MC/MCAsmInfo.h"
13 #include "llvm/MC/MCTargetOptions.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <limits>
16 #include <vector>
17 
18 namespace llvm {
19 namespace exegesis {
20 
21 static const char kCsvSep = ',';
22 
23 namespace {
24 
25 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
26 
27 template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S);
28 
29 template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) {
30   if (!S.contains(kCsvSep)) {
31     OS << S;
32   } else {
33     // Needs escaping.
34     OS << '"';
35     for (const char C : S) {
36       if (C == '"')
37         OS << "\"\"";
38       else
39         OS << C;
40     }
41     OS << '"';
42   }
43 }
44 
45 template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) {
46   for (const char C : S) {
47     if (C == '<')
48       OS << "&lt;";
49     else if (C == '>')
50       OS << "&gt;";
51     else if (C == '&')
52       OS << "&amp;";
53     else
54       OS << C;
55   }
56 }
57 
58 template <>
59 void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) {
60   for (const char C : S) {
61     if (C == '"')
62       OS << "\\\"";
63     else
64       OS << C;
65   }
66 }
67 
68 } // namespace
69 
70 template <EscapeTag Tag>
71 static void
72 writeClusterId(raw_ostream &OS,
73                const BenchmarkClustering::ClusterId &CID) {
74   if (CID.isNoise())
75     writeEscaped<Tag>(OS, "[noise]");
76   else if (CID.isError())
77     writeEscaped<Tag>(OS, "[error]");
78   else
79     OS << CID.getId();
80 }
81 
82 template <EscapeTag Tag>
83 static void writeMeasurementValue(raw_ostream &OS, const double Value) {
84   // Given Value, if we wanted to serialize it to a string,
85   // how many base-10 digits will we need to store, max?
86   static constexpr auto MaxDigitCount =
87       std::numeric_limits<decltype(Value)>::max_digits10;
88   // Also, we will need a decimal separator.
89   static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
90   // So how long of a string will the serialization produce, max?
91   static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
92 
93   // WARNING: when changing the format, also adjust the small-size estimate ^.
94   static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
95 
96   writeEscaped<Tag>(
97       OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
98 }
99 
100 template <typename EscapeTag, EscapeTag Tag>
101 void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
102                             const char *Separator) const {
103   SmallVector<std::string, 3> Lines;
104   // Parse the asm snippet and print it.
105   while (!Bytes.empty()) {
106     MCInst MI;
107     uint64_t MISize = 0;
108     if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) {
109       writeEscaped<Tag>(OS, join(Lines, Separator));
110       writeEscaped<Tag>(OS, Separator);
111       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
112       return;
113     }
114     SmallString<128> InstPrinterStr; // FIXME: magic number.
115     raw_svector_ostream OSS(InstPrinterStr);
116     DisasmHelper_->printInst(&MI, OSS);
117     Bytes = Bytes.drop_front(MISize);
118     Lines.emplace_back(InstPrinterStr.str().trim());
119   }
120   writeEscaped<Tag>(OS, join(Lines, Separator));
121 }
122 
123 // Prints a row representing an instruction, along with scheduling info and
124 // point coordinates (measurements).
125 void Analysis::printInstructionRowCsv(const size_t PointId,
126                                       raw_ostream &OS) const {
127   const Benchmark &Point = Clustering_.getPoints()[PointId];
128   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
129   OS << kCsvSep;
130   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
131   OS << kCsvSep;
132   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
133   OS << kCsvSep;
134   assert(!Point.Key.Instructions.empty());
135   const MCInst &MCI = Point.keyInstruction();
136   unsigned SchedClassId;
137   std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId(
138       State_.getSubtargetInfo(), State_.getInstrInfo(), MCI);
139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
140   const MCSchedClassDesc *const SCDesc =
141       State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId);
142   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
143 #else
144   OS << SchedClassId;
145 #endif
146   for (const auto &Measurement : Point.Measurements) {
147     OS << kCsvSep;
148     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
149   }
150   OS << "\n";
151 }
152 
153 Analysis::Analysis(const LLVMState &State,
154                    const BenchmarkClustering &Clustering,
155                    double AnalysisInconsistencyEpsilon,
156                    bool AnalysisDisplayUnstableOpcodes)
157     : Clustering_(Clustering), State_(State),
158       AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
159                                            AnalysisInconsistencyEpsilon),
160       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
161   if (Clustering.getPoints().empty())
162     return;
163 
164   DisasmHelper_ = std::make_unique<DisassemblerHelper>(State);
165 }
166 
167 template <>
168 Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const {
169   if (Clustering_.getPoints().empty())
170     return Error::success();
171 
172   // Write the header.
173   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
174      << kCsvSep << "sched_class";
175   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
176     OS << kCsvSep;
177     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
178   }
179   OS << "\n";
180 
181   // Write the points.
182   for (const auto &ClusterIt : Clustering_.getValidClusters()) {
183     for (const size_t PointId : ClusterIt.PointIndices) {
184       printInstructionRowCsv(PointId, OS);
185     }
186     OS << "\n\n";
187   }
188   return Error::success();
189 }
190 
191 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
192     ResolvedSchedClass &&RSC)
193     : RSC(std::move(RSC)) {}
194 
195 std::vector<Analysis::ResolvedSchedClassAndPoints>
196 Analysis::makePointsPerSchedClass() const {
197   std::vector<ResolvedSchedClassAndPoints> Entries;
198   // Maps SchedClassIds to index in result.
199   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
200   const auto &Points = Clustering_.getPoints();
201   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
202     const Benchmark &Point = Points[PointId];
203     if (!Point.Error.empty())
204       continue;
205     assert(!Point.Key.Instructions.empty());
206     // FIXME: we should be using the tuple of classes for instructions in the
207     // snippet as key.
208     const MCInst &MCI = Point.keyInstruction();
209     unsigned SchedClassId;
210     bool WasVariant;
211     std::tie(SchedClassId, WasVariant) =
212         ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(),
213                                                 State_.getInstrInfo(), MCI);
214     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
215     if (IndexIt == SchedClassIdToIndex.end()) {
216       // Create a new entry.
217       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
218       ResolvedSchedClassAndPoints Entry(ResolvedSchedClass(
219           State_.getSubtargetInfo(), SchedClassId, WasVariant));
220       Entry.PointIds.push_back(PointId);
221       Entries.push_back(std::move(Entry));
222     } else {
223       // Append to the existing entry.
224       Entries[IndexIt->second].PointIds.push_back(PointId);
225     }
226   }
227   return Entries;
228 }
229 
230 // Parallel benchmarks repeat the same opcode multiple times. Just show this
231 // opcode and show the whole snippet only on hover.
232 static void writeParallelSnippetHtml(raw_ostream &OS,
233                                  const std::vector<MCInst> &Instructions,
234                                  const MCInstrInfo &InstrInfo) {
235   if (Instructions.empty())
236     return;
237   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
238   if (Instructions.size() > 1)
239     OS << " (x" << Instructions.size() << ")";
240 }
241 
242 // Latency tries to find a serial path. Just show the opcode path and show the
243 // whole snippet only on hover.
244 static void writeLatencySnippetHtml(raw_ostream &OS,
245                                     const std::vector<MCInst> &Instructions,
246                                     const MCInstrInfo &InstrInfo) {
247   bool First = true;
248   for (const MCInst &Instr : Instructions) {
249     if (First)
250       First = false;
251     else
252       OS << " &rarr; ";
253     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
254   }
255 }
256 
257 void Analysis::printPointHtml(const Benchmark &Point,
258                               llvm::raw_ostream &OS) const {
259   OS << "<li><span class=\"mono\" title=\"";
260   writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n");
261   OS << "\">";
262   switch (Point.Mode) {
263   case Benchmark::Latency:
264     writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo());
265     break;
266   case Benchmark::Uops:
267   case Benchmark::InverseThroughput:
268     writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo());
269     break;
270   default:
271     llvm_unreachable("invalid mode");
272   }
273   OS << "</span> <span class=\"mono\">";
274   writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
275   OS << "</span></li>";
276 }
277 
278 void Analysis::printSchedClassClustersHtml(
279     const std::vector<SchedClassCluster> &Clusters,
280     const ResolvedSchedClass &RSC, raw_ostream &OS) const {
281   const auto &Points = Clustering_.getPoints();
282   OS << "<table class=\"sched-class-clusters\">";
283   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
284   assert(!Clusters.empty());
285   for (const auto &Measurement :
286        Points[Clusters[0].getPointIds()[0]].Measurements) {
287     OS << "<th>";
288     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
289     OS << "</th>";
290   }
291   OS << "</tr>";
292   for (const SchedClassCluster &Cluster : Clusters) {
293     OS << "<tr class=\""
294        << (Cluster.measurementsMatch(State_.getSubtargetInfo(), RSC,
295                                      Clustering_,
296                                      AnalysisInconsistencyEpsilonSquared_)
297                ? "good-cluster"
298                : "bad-cluster")
299        << "\"><td>";
300     writeClusterId<kEscapeHtml>(OS, Cluster.id());
301     OS << "</td><td><ul>";
302     for (const size_t PointId : Cluster.getPointIds()) {
303       printPointHtml(Points[PointId], OS);
304     }
305     OS << "</ul></td>";
306     for (const auto &Stats : Cluster.getCentroid().getStats()) {
307       OS << "<td class=\"measurement\">";
308       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
309       OS << "<br><span class=\"minmax\">[";
310       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
311       OS << ";";
312       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
313       OS << "]</span></td>";
314     }
315     OS << "</tr>";
316   }
317   OS << "</table>";
318 }
319 
320 void Analysis::SchedClassCluster::addPoint(
321     size_t PointId, const BenchmarkClustering &Clustering) {
322   PointIds.push_back(PointId);
323   const auto &Point = Clustering.getPoints()[PointId];
324   if (ClusterId.isUndef())
325     ClusterId = Clustering.getClusterIdForPoint(PointId);
326   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
327 
328   Centroid.addPoint(Point.Measurements);
329 }
330 
331 bool Analysis::SchedClassCluster::measurementsMatch(
332     const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
333     const BenchmarkClustering &Clustering,
334     const double AnalysisInconsistencyEpsilonSquared_) const {
335   assert(!Clustering.getPoints().empty());
336   const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
337 
338   if (!Centroid.validate(Mode))
339     return false;
340 
341   const std::vector<BenchmarkMeasure> ClusterCenterPoint =
342       Centroid.getAsPoint();
343 
344   const std::vector<BenchmarkMeasure> SchedClassPoint =
345       RSC.getAsPoint(Mode, STI, Centroid.getStats());
346   if (SchedClassPoint.empty())
347     return false; // In Uops mode validate() may not be enough.
348 
349   assert(ClusterCenterPoint.size() == SchedClassPoint.size() &&
350          "Expected measured/sched data dimensions to match.");
351 
352   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint,
353                                 AnalysisInconsistencyEpsilonSquared_);
354 }
355 
356 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
357                                        raw_ostream &OS) const {
358   OS << "<table class=\"sched-class-desc\">";
359   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
360         "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the "
361         "idealized unit resource (port) pressure assuming ideal "
362         "distribution\">Idealized Resource Pressure</th></tr>";
363   if (RSC.SCDesc->isValid()) {
364     const auto &SI = State_.getSubtargetInfo();
365     const auto &SM = SI.getSchedModel();
366     OS << "<tr><td>&#10004;</td>";
367     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
368     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
369     // Latencies.
370     OS << "<td><ul>";
371     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
372       const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I);
373       OS << "<li>" << Entry->Cycles;
374       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
375         // Dismabiguate if more than 1 latency.
376         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
377       }
378       OS << "</li>";
379     }
380     OS << "</ul></td>";
381     // inverse throughput.
382     OS << "<td>";
383     writeMeasurementValue<kEscapeHtml>(
384         OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc));
385     OS << "</td>";
386     // WriteProcRes.
387     OS << "<td><ul>";
388     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
389       OS << "<li><span class=\"mono\">";
390       writeEscaped<kEscapeHtml>(OS,
391                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
392       OS << "</span>: " << WPR.ReleaseAtCycle << "</li>";
393     }
394     OS << "</ul></td>";
395     // Idealized port pressure.
396     OS << "<td><ul>";
397     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
398       OS << "<li><span class=\"mono\">";
399       writeEscaped<kEscapeHtml>(
400           OS, SI.getSchedModel().getProcResource(Pressure.first)->Name);
401       OS << "</span>: ";
402       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
403       OS << "</li>";
404     }
405     OS << "</ul></td>";
406     OS << "</tr>";
407   } else {
408     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
409   }
410   OS << "</table>";
411 }
412 
413 void Analysis::printClusterRawHtml(
414     const BenchmarkClustering::ClusterId &Id, StringRef display_name,
415     llvm::raw_ostream &OS) const {
416   const auto &Points = Clustering_.getPoints();
417   const auto &Cluster = Clustering_.getCluster(Id);
418   if (Cluster.PointIndices.empty())
419     return;
420 
421   OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster ("
422      << Cluster.PointIndices.size() << " points)</p>";
423   OS << "<table class=\"sched-class-clusters\">";
424   // Table Header.
425   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
426   for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) {
427     OS << "<th>";
428     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
429     OS << "</th>";
430   }
431   OS << "</tr>";
432 
433   // Point data.
434   for (const auto &PointId : Cluster.PointIndices) {
435     OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>";
436     printPointHtml(Points[PointId], OS);
437     OS << "</ul></td>";
438     for (const auto &Measurement : Points[PointId].Measurements) {
439       OS << "<td class=\"measurement\">";
440       writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue);
441     }
442     OS << "</tr>";
443   }
444   OS << "</table>";
445 
446   OS << "</div>";
447 
448 } // namespace exegesis
449 
450 static constexpr const char kHtmlHead[] = R"(
451 <head>
452 <title>llvm-exegesis Analysis Results</title>
453 <style>
454 body {
455   font-family: sans-serif
456 }
457 span.sched-class-name {
458   font-weight: bold;
459   font-family: monospace;
460 }
461 span.opcode {
462   font-family: monospace;
463 }
464 span.config {
465   font-family: monospace;
466 }
467 div.inconsistency {
468   margin-top: 50px;
469 }
470 table {
471   margin-left: 50px;
472   border-collapse: collapse;
473 }
474 table, table tr,td,th {
475   border: 1px solid #444;
476 }
477 table ul {
478   padding-left: 0px;
479   margin: 0px;
480   list-style-type: none;
481 }
482 table.sched-class-clusters td {
483   padding-left: 10px;
484   padding-right: 10px;
485   padding-top: 10px;
486   padding-bottom: 10px;
487 }
488 table.sched-class-desc td {
489   padding-left: 10px;
490   padding-right: 10px;
491   padding-top: 2px;
492   padding-bottom: 2px;
493 }
494 span.mono {
495   font-family: monospace;
496 }
497 td.measurement {
498   text-align: center;
499 }
500 tr.good-cluster td.measurement {
501   color: #292
502 }
503 tr.bad-cluster td.measurement {
504   color: #922
505 }
506 tr.good-cluster td.measurement span.minmax {
507   color: #888;
508 }
509 tr.bad-cluster td.measurement span.minmax {
510   color: #888;
511 }
512 </style>
513 </head>
514 )";
515 
516 template <>
517 Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
518     raw_ostream &OS) const {
519   const auto &FirstPoint = Clustering_.getPoints()[0];
520   // Print the header.
521   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
522   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
523   OS << "<h3>Triple: <span class=\"mono\">";
524   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
525   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
526   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
527   OS << "</span></h3>";
528 
529   const auto &SI = State_.getSubtargetInfo();
530   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
531     if (!RSCAndPoints.RSC.SCDesc)
532       continue;
533     // Bucket sched class points into sched class clusters.
534     std::vector<SchedClassCluster> SchedClassClusters;
535     for (const size_t PointId : RSCAndPoints.PointIds) {
536       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
537       if (!ClusterId.isValid())
538         continue; // Ignore noise and errors. FIXME: take noise into account ?
539       if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_)
540         continue; // Either display stable or unstable clusters only.
541       auto SchedClassClusterIt = llvm::find_if(
542           SchedClassClusters, [ClusterId](const SchedClassCluster &C) {
543             return C.id() == ClusterId;
544           });
545       if (SchedClassClusterIt == SchedClassClusters.end()) {
546         SchedClassClusters.emplace_back();
547         SchedClassClusterIt = std::prev(SchedClassClusters.end());
548       }
549       SchedClassClusterIt->addPoint(PointId, Clustering_);
550     }
551 
552     // Print any scheduling class that has at least one cluster that does not
553     // match the checked-in data.
554     if (all_of(SchedClassClusters, [this, &RSCAndPoints,
555                                     &SI](const SchedClassCluster &C) {
556           return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_,
557                                      AnalysisInconsistencyEpsilonSquared_);
558         }))
559       continue; // Nothing weird.
560 
561     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
562           "class=\"sched-class-name\">";
563 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
564     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
565 #else
566     OS << RSCAndPoints.RSC.SchedClassId;
567 #endif
568     OS << "</span> contains instructions whose performance characteristics do"
569           " not match that of LLVM:</p>";
570     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
571     OS << "<p>llvm SchedModel data:</p>";
572     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
573     OS << "</div>";
574   }
575 
576   printClusterRawHtml(BenchmarkClustering::ClusterId::noise(),
577                       "[noise]", OS);
578 
579   OS << "</body></html>";
580   return Error::success();
581 }
582 
583 } // namespace exegesis
584 } // namespace llvm
585