xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision b8fb15d4122b04d620c1d4a89449b6eba2f4b0c0)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Analysis.h"
10 #include "BenchmarkResult.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/MC/MCAsmInfo.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <limits>
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace llvm {
19 namespace exegesis {
20 
21 static const char kCsvSep = ',';
22 
23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
24                                     unsigned SchedClassId,
25                                     const llvm::MCInst &MCI) {
26   const auto &SM = STI.getSchedModel();
27   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
28     SchedClassId =
29         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
30   return SchedClassId;
31 }
32 
33 namespace {
34 
35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
36 
37 template <EscapeTag Tag>
38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
39 
40 template <>
41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
42   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
43     OS << S;
44   } else {
45     // Needs escaping.
46     OS << '"';
47     for (const char C : S) {
48       if (C == '"')
49         OS << "\"\"";
50       else
51         OS << C;
52     }
53     OS << '"';
54   }
55 }
56 
57 template <>
58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
59   for (const char C : S) {
60     if (C == '<')
61       OS << "&lt;";
62     else if (C == '>')
63       OS << "&gt;";
64     else if (C == '&')
65       OS << "&amp;";
66     else
67       OS << C;
68   }
69 }
70 
71 template <>
72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
73                                      const llvm::StringRef S) {
74   for (const char C : S) {
75     if (C == '"')
76       OS << "\\\"";
77     else
78       OS << C;
79   }
80 }
81 
82 } // namespace
83 
84 template <EscapeTag Tag>
85 static void
86 writeClusterId(llvm::raw_ostream &OS,
87                const InstructionBenchmarkClustering::ClusterId &CID) {
88   if (CID.isNoise())
89     writeEscaped<Tag>(OS, "[noise]");
90   else if (CID.isError())
91     writeEscaped<Tag>(OS, "[error]");
92   else
93     OS << CID.getId();
94 }
95 
96 template <EscapeTag Tag>
97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
98   // Given Value, if we wanted to serialize it to a string,
99   // how many base-10 digits will we need to store, max?
100   static constexpr auto MaxDigitCount =
101       std::numeric_limits<decltype(Value)>::max_digits10;
102   // Also, we will need a decimal separator.
103   static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
104   // So how long of a string will the serialization produce, max?
105   static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
106 
107   // WARNING: when changing the format, also adjust the small-size estimate ^.
108   static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
109 
110   writeEscaped<Tag>(
111       OS,
112       llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
113 }
114 
115 template <typename EscapeTag, EscapeTag Tag>
116 void Analysis::writeSnippet(llvm::raw_ostream &OS,
117                             llvm::ArrayRef<uint8_t> Bytes,
118                             const char *Separator) const {
119   llvm::SmallVector<std::string, 3> Lines;
120   // Parse the asm snippet and print it.
121   while (!Bytes.empty()) {
122     llvm::MCInst MI;
123     uint64_t MISize = 0;
124     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
125                                  llvm::nulls())) {
126       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
127       writeEscaped<Tag>(OS, Separator);
128       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
129       return;
130     }
131     llvm::SmallString<128> InstPrinterStr; // FIXME: magic number.
132     llvm::raw_svector_ostream OSS(InstPrinterStr);
133     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
134     Bytes = Bytes.drop_front(MISize);
135     Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim());
136   }
137   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
138 }
139 
140 // Prints a row representing an instruction, along with scheduling info and
141 // point coordinates (measurements).
142 void Analysis::printInstructionRowCsv(const size_t PointId,
143                                       llvm::raw_ostream &OS) const {
144   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
145   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
146   OS << kCsvSep;
147   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
148   OS << kCsvSep;
149   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
150   OS << kCsvSep;
151   assert(!Point.Key.Instructions.empty());
152   const llvm::MCInst &MCI = Point.keyInstruction();
153   const unsigned SchedClassId = resolveSchedClassId(
154       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
155 
156 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157   const llvm::MCSchedClassDesc *const SCDesc =
158       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
159   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
160 #else
161   OS << SchedClassId;
162 #endif
163   for (const auto &Measurement : Point.Measurements) {
164     OS << kCsvSep;
165     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
166   }
167   OS << "\n";
168 }
169 
170 Analysis::Analysis(const llvm::Target &Target,
171                    std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
172                    const InstructionBenchmarkClustering &Clustering,
173                    double AnalysisInconsistencyEpsilon,
174                    bool AnalysisDisplayUnstableOpcodes)
175     : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
176       AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
177                                            AnalysisInconsistencyEpsilon),
178       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
179   if (Clustering.getPoints().empty())
180     return;
181 
182   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
183   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
184   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
185   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
186                                                     FirstPoint.CpuName, ""));
187   InstPrinter_.reset(Target.createMCInstPrinter(
188       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
189       *InstrInfo_, *RegInfo_));
190 
191   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
192                                                 &ObjectFileInfo_);
193   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
194   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
195                     "InitializeXXXTargetDisassembler ?");
196 }
197 
198 template <>
199 llvm::Error
200 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
201   if (Clustering_.getPoints().empty())
202     return llvm::Error::success();
203 
204   // Write the header.
205   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
206      << kCsvSep << "sched_class";
207   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
208     OS << kCsvSep;
209     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
210   }
211   OS << "\n";
212 
213   // Write the points.
214   const auto &Clusters = Clustering_.getValidClusters();
215   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
216     for (const size_t PointId : Clusters[I].PointIndices) {
217       printInstructionRowCsv(PointId, OS);
218     }
219     OS << "\n\n";
220   }
221   return llvm::Error::success();
222 }
223 
224 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
225     ResolvedSchedClass &&RSC)
226     : RSC(std::move(RSC)) {}
227 
228 std::vector<Analysis::ResolvedSchedClassAndPoints>
229 Analysis::makePointsPerSchedClass() const {
230   std::vector<ResolvedSchedClassAndPoints> Entries;
231   // Maps SchedClassIds to index in result.
232   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
233   const auto &Points = Clustering_.getPoints();
234   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
235     const InstructionBenchmark &Point = Points[PointId];
236     if (!Point.Error.empty())
237       continue;
238     assert(!Point.Key.Instructions.empty());
239     // FIXME: we should be using the tuple of classes for instructions in the
240     // snippet as key.
241     const llvm::MCInst &MCI = Point.keyInstruction();
242     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
243     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
244                                                 .getSchedClassDesc(SchedClassId)
245                                                 ->isVariant();
246     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
247     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
248     if (IndexIt == SchedClassIdToIndex.end()) {
249       // Create a new entry.
250       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
251       ResolvedSchedClassAndPoints Entry(
252           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
253       Entry.PointIds.push_back(PointId);
254       Entries.push_back(std::move(Entry));
255     } else {
256       // Append to the existing entry.
257       Entries[IndexIt->second].PointIds.push_back(PointId);
258     }
259   }
260   return Entries;
261 }
262 
263 // Uops repeat the same opcode over again. Just show this opcode and show the
264 // whole snippet only on hover.
265 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
266                                  const std::vector<llvm::MCInst> &Instructions,
267                                  const llvm::MCInstrInfo &InstrInfo) {
268   if (Instructions.empty())
269     return;
270   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
271   if (Instructions.size() > 1)
272     OS << " (x" << Instructions.size() << ")";
273 }
274 
275 // Latency tries to find a serial path. Just show the opcode path and show the
276 // whole snippet only on hover.
277 static void
278 writeLatencySnippetHtml(llvm::raw_ostream &OS,
279                         const std::vector<llvm::MCInst> &Instructions,
280                         const llvm::MCInstrInfo &InstrInfo) {
281   bool First = true;
282   for (const llvm::MCInst &Instr : Instructions) {
283     if (First)
284       First = false;
285     else
286       OS << " &rarr; ";
287     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
288   }
289 }
290 
291 void Analysis::printSchedClassClustersHtml(
292     const std::vector<SchedClassCluster> &Clusters,
293     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
294   const auto &Points = Clustering_.getPoints();
295   OS << "<table class=\"sched-class-clusters\">";
296   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
297   assert(!Clusters.empty());
298   for (const auto &Measurement :
299        Points[Clusters[0].getPointIds()[0]].Measurements) {
300     OS << "<th>";
301     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
302     OS << "</th>";
303   }
304   OS << "</tr>";
305   for (const SchedClassCluster &Cluster : Clusters) {
306     OS << "<tr class=\""
307        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_,
308                                      AnalysisInconsistencyEpsilonSquared_)
309                ? "good-cluster"
310                : "bad-cluster")
311        << "\"><td>";
312     writeClusterId<kEscapeHtml>(OS, Cluster.id());
313     OS << "</td><td><ul>";
314     for (const size_t PointId : Cluster.getPointIds()) {
315       const auto &Point = Points[PointId];
316       OS << "<li><span class=\"mono\" title=\"";
317       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
318                                                  "\n");
319       OS << "\">";
320       switch (Point.Mode) {
321       case InstructionBenchmark::Latency:
322         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
323         break;
324       case InstructionBenchmark::Uops:
325       case InstructionBenchmark::InverseThroughput:
326         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
327         break;
328       default:
329         llvm_unreachable("invalid mode");
330       }
331       OS << "</span> <span class=\"mono\">";
332       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
333       OS << "</span></li>";
334     }
335     OS << "</ul></td>";
336     for (const auto &Stats : Cluster.getCentroid().getStats()) {
337       OS << "<td class=\"measurement\">";
338       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
339       OS << "<br><span class=\"minmax\">[";
340       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
341       OS << ";";
342       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
343       OS << "]</span></td>";
344     }
345     OS << "</tr>";
346   }
347   OS << "</table>";
348 }
349 
350 // Return the non-redundant list of WriteProcRes used by the given sched class.
351 // The scheduling model for LLVM is such that each instruction has a certain
352 // number of uops which consume resources which are described by WriteProcRes
353 // entries. Each entry describe how many cycles are spent on a specific ProcRes
354 // kind.
355 // For example, an instruction might have 3 uOps, one dispatching on P0
356 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
357 // Note that LLVM additionally denormalizes resource consumption to include
358 // usage of super resources by subresources. So in practice if there exists a
359 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
360 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
361 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
362 // denote implied usage of superresources by subresources:
363 //            P0      P06    P016
364 //     uOp1    1      (1)     (1)
365 //     uOp2            1      (1)
366 //     uOp3            1      (1)
367 //     =============================
368 //             1       3       3
369 // Eventually we end up with three entries for the WriteProcRes of the
370 // instruction:
371 //    {ProcResIdx=1,  Cycles=1}  // P0
372 //    {ProcResIdx=7,  Cycles=3}  // P06
373 //    {ProcResIdx=10, Cycles=3}  // P016
374 //
375 // Note that in this case, P016 does not contribute any cycles, so it would
376 // be removed by this function.
377 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
378 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
379 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
380                             const llvm::MCSubtargetInfo &STI) {
381   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
382   const auto &SM = STI.getSchedModel();
383   const unsigned NumProcRes = SM.getNumProcResourceKinds();
384 
385   // This assumes that the ProcResDescs are sorted in topological order, which
386   // is guaranteed by the tablegen backend.
387   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
388   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
389                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
390        WPR != WPREnd; ++WPR) {
391     const llvm::MCProcResourceDesc *const ProcResDesc =
392         SM.getProcResource(WPR->ProcResourceIdx);
393     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
394       // This is a ProcResUnit.
395       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
396       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
397     } else {
398       // This is a ProcResGroup. First see if it contributes any cycles or if
399       // it has cycles just from subunits.
400       float RemainingCycles = WPR->Cycles;
401       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
402            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
403            ++SubResIdx) {
404         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
405       }
406       if (RemainingCycles < 0.01f) {
407         // The ProcResGroup contributes no cycles of its own.
408         continue;
409       }
410       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
411       Result.push_back({WPR->ProcResourceIdx,
412                         static_cast<uint16_t>(std::round(RemainingCycles))});
413       // Spread the remaining cycles over all subunits.
414       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
415            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
416            ++SubResIdx) {
417         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
418       }
419     }
420   }
421   return Result;
422 }
423 
424 Analysis::ResolvedSchedClass::ResolvedSchedClass(
425     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
426     bool WasVariant)
427     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
428       WasVariant(WasVariant),
429       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
430       IdealizedProcResPressure(computeIdealizedProcResPressure(
431           STI.getSchedModel(), NonRedundantWriteProcRes)) {
432   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
433          "ResolvedSchedClass should never be variant");
434 }
435 
436 void Analysis::SchedClassCluster::addPoint(
437     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
438   PointIds.push_back(PointId);
439   const auto &Point = Clustering.getPoints()[PointId];
440   if (ClusterId.isUndef())
441     ClusterId = Clustering.getClusterIdForPoint(PointId);
442   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
443 
444   Centroid.addPoint(Point.Measurements);
445 }
446 
447 // Returns a ProxResIdx by id or name.
448 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
449                                const llvm::StringRef NameOrId) {
450   // Interpret the key as an ProcResIdx.
451   unsigned ProcResIdx = 0;
452   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
453     return ProcResIdx;
454   // Interpret the key as a ProcRes name.
455   const auto &SchedModel = STI.getSchedModel();
456   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
457     if (NameOrId == SchedModel.getProcResource(I)->Name)
458       return I;
459   }
460   return 0;
461 }
462 
463 std::vector<BenchmarkMeasure> Analysis::SchedClassCluster::getSchedClassPoint(
464     InstructionBenchmark::ModeE Mode, const llvm::MCSubtargetInfo &STI,
465     const ResolvedSchedClass &RSC,
466     ArrayRef<PerInstructionStats> Representative) const {
467   const size_t NumMeasurements = Representative.size();
468 
469   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
470 
471   if (Mode == InstructionBenchmark::Latency) {
472     assert(NumMeasurements == 1 && "Latency is a single measure.");
473     BenchmarkMeasure &LatencyMeasure = SchedClassPoint[0];
474 
475     // Find the latency.
476     LatencyMeasure.PerInstructionValue = 0.0;
477 
478     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
479       const llvm::MCWriteLatencyEntry *const WLE =
480           STI.getWriteLatencyEntry(RSC.SCDesc, I);
481       LatencyMeasure.PerInstructionValue =
482           std::max<double>(LatencyMeasure.PerInstructionValue, WLE->Cycles);
483     }
484   } else if (Mode == InstructionBenchmark::Uops) {
485     for (const auto &I : llvm::zip(SchedClassPoint, Representative)) {
486       BenchmarkMeasure &Measure = std::get<0>(I);
487       const PerInstructionStats &Stats = std::get<1>(I);
488 
489       StringRef Key = Stats.key();
490       uint16_t ProcResIdx = findProcResIdx(STI, Key);
491       if (ProcResIdx > 0) {
492         // Find the pressure on ProcResIdx `Key`.
493         const auto ProcResPressureIt =
494             std::find_if(RSC.IdealizedProcResPressure.begin(),
495                          RSC.IdealizedProcResPressure.end(),
496                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
497                            return WPR.first == ProcResIdx;
498                          });
499         Measure.PerInstructionValue =
500             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
501                 ? 0.0
502                 : ProcResPressureIt->second;
503       } else if (Key == "NumMicroOps") {
504         Measure.PerInstructionValue = RSC.SCDesc->NumMicroOps;
505       } else {
506         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
507                         "name, got "
508                      << Key << "\n";
509         return {};
510       }
511     }
512   } else if (Mode == InstructionBenchmark::InverseThroughput) {
513     assert(NumMeasurements == 1 && "Inverse Throughput is a single measure.");
514     BenchmarkMeasure &RThroughputMeasure = SchedClassPoint[0];
515 
516     RThroughputMeasure.PerInstructionValue =
517         MCSchedModel::getReciprocalThroughput(STI, *RSC.SCDesc);
518   } else {
519     llvm_unreachable("unimplemented measurement matching mode");
520   }
521 
522   return SchedClassPoint;
523 }
524 
525 bool Analysis::SchedClassCluster::measurementsMatch(
526     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
527     const InstructionBenchmarkClustering &Clustering,
528     const double AnalysisInconsistencyEpsilonSquared_) const {
529   assert(!Clustering.getPoints().empty());
530   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
531 
532   if (!Centroid.validate(Mode))
533     return false;
534 
535   const std::vector<BenchmarkMeasure> ClusterCenterPoint =
536       Centroid.getAsPoint();
537 
538   const std::vector<BenchmarkMeasure> SchedClassPoint =
539       getSchedClassPoint(Mode, STI, RSC, Centroid.getStats());
540   if (SchedClassPoint.empty())
541     return false; // In Uops mode validate() may not be enough.
542 
543   assert(ClusterCenterPoint.size() == SchedClassPoint.size() &&
544          "Expected measured/sched data dimensions to match.");
545 
546   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint,
547                                 AnalysisInconsistencyEpsilonSquared_);
548 }
549 
550 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
551                                        llvm::raw_ostream &OS) const {
552   OS << "<table class=\"sched-class-desc\">";
553   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
554         "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the "
555         "idealized unit resource (port) pressure assuming ideal "
556         "distribution\">Idealized Resource Pressure</th></tr>";
557   if (RSC.SCDesc->isValid()) {
558     const auto &SM = SubtargetInfo_->getSchedModel();
559     OS << "<tr><td>&#10004;</td>";
560     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
561     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
562     // Latencies.
563     OS << "<td><ul>";
564     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
565       const auto *const Entry =
566           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
567       OS << "<li>" << Entry->Cycles;
568       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
569         // Dismabiguate if more than 1 latency.
570         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
571       }
572       OS << "</li>";
573     }
574     OS << "</ul></td>";
575     // inverse throughput.
576     OS << "<td>";
577     writeMeasurementValue<kEscapeHtml>(
578         OS,
579         MCSchedModel::getReciprocalThroughput(*SubtargetInfo_, *RSC.SCDesc));
580     OS << "</td>";
581     // WriteProcRes.
582     OS << "<td><ul>";
583     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
584       OS << "<li><span class=\"mono\">";
585       writeEscaped<kEscapeHtml>(OS,
586                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
587       OS << "</span>: " << WPR.Cycles << "</li>";
588     }
589     OS << "</ul></td>";
590     // Idealized port pressure.
591     OS << "<td><ul>";
592     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
593       OS << "<li><span class=\"mono\">";
594       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
595                                         .getProcResource(Pressure.first)
596                                         ->Name);
597       OS << "</span>: ";
598       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
599       OS << "</li>";
600     }
601     OS << "</ul></td>";
602     OS << "</tr>";
603   } else {
604     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
605   }
606   OS << "</table>";
607 }
608 
609 static constexpr const char kHtmlHead[] = R"(
610 <head>
611 <title>llvm-exegesis Analysis Results</title>
612 <style>
613 body {
614   font-family: sans-serif
615 }
616 span.sched-class-name {
617   font-weight: bold;
618   font-family: monospace;
619 }
620 span.opcode {
621   font-family: monospace;
622 }
623 span.config {
624   font-family: monospace;
625 }
626 div.inconsistency {
627   margin-top: 50px;
628 }
629 table {
630   margin-left: 50px;
631   border-collapse: collapse;
632 }
633 table, table tr,td,th {
634   border: 1px solid #444;
635 }
636 table ul {
637   padding-left: 0px;
638   margin: 0px;
639   list-style-type: none;
640 }
641 table.sched-class-clusters td {
642   padding-left: 10px;
643   padding-right: 10px;
644   padding-top: 10px;
645   padding-bottom: 10px;
646 }
647 table.sched-class-desc td {
648   padding-left: 10px;
649   padding-right: 10px;
650   padding-top: 2px;
651   padding-bottom: 2px;
652 }
653 span.mono {
654   font-family: monospace;
655 }
656 td.measurement {
657   text-align: center;
658 }
659 tr.good-cluster td.measurement {
660   color: #292
661 }
662 tr.bad-cluster td.measurement {
663   color: #922
664 }
665 tr.good-cluster td.measurement span.minmax {
666   color: #888;
667 }
668 tr.bad-cluster td.measurement span.minmax {
669   color: #888;
670 }
671 </style>
672 </head>
673 )";
674 
675 template <>
676 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
677     llvm::raw_ostream &OS) const {
678   const auto &FirstPoint = Clustering_.getPoints()[0];
679   // Print the header.
680   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
681   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
682   OS << "<h3>Triple: <span class=\"mono\">";
683   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
684   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
685   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
686   OS << "</span></h3>";
687 
688   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
689     if (!RSCAndPoints.RSC.SCDesc)
690       continue;
691     // Bucket sched class points into sched class clusters.
692     std::vector<SchedClassCluster> SchedClassClusters;
693     for (const size_t PointId : RSCAndPoints.PointIds) {
694       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
695       if (!ClusterId.isValid())
696         continue; // Ignore noise and errors. FIXME: take noise into account ?
697       if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_)
698         continue; // Either display stable or unstable clusters only.
699       auto SchedClassClusterIt =
700           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
701                        [ClusterId](const SchedClassCluster &C) {
702                          return C.id() == ClusterId;
703                        });
704       if (SchedClassClusterIt == SchedClassClusters.end()) {
705         SchedClassClusters.emplace_back();
706         SchedClassClusterIt = std::prev(SchedClassClusters.end());
707       }
708       SchedClassClusterIt->addPoint(PointId, Clustering_);
709     }
710 
711     // Print any scheduling class that has at least one cluster that does not
712     // match the checked-in data.
713     if (llvm::all_of(SchedClassClusters,
714                      [this, &RSCAndPoints](const SchedClassCluster &C) {
715                        return C.measurementsMatch(
716                            *SubtargetInfo_, RSCAndPoints.RSC, Clustering_,
717                            AnalysisInconsistencyEpsilonSquared_);
718                      }))
719       continue; // Nothing weird.
720 
721     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
722           "class=\"sched-class-name\">";
723 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
724     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
725 #else
726     OS << RSCAndPoints.RSC.SchedClassId;
727 #endif
728     OS << "</span> contains instructions whose performance characteristics do"
729           " not match that of LLVM:</p>";
730     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
731     OS << "<p>llvm SchedModel data:</p>";
732     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
733     OS << "</div>";
734   }
735 
736   OS << "</body></html>";
737   return llvm::Error::success();
738 }
739 
740 // Distributes a pressure budget as evenly as possible on the provided subunits
741 // given the already existing port pressure distribution.
742 //
743 // The algorithm is as follows: while there is remaining pressure to
744 // distribute, find the subunits with minimal pressure, and distribute
745 // remaining pressure equally up to the pressure of the unit with
746 // second-to-minimal pressure.
747 // For example, let's assume we want to distribute 2*P1256
748 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
749 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
750 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
751 //     RemainingPressure = 2.0
752 // We sort the subunits by pressure:
753 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
754 // We'll first start by the subunits with minimal pressure, which are at
755 // the beginning of the sorted array. In this example there is one (P2).
756 // The subunit with second-to-minimal pressure is the next one in the
757 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
758 // from the budget.
759 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
760 //     RemainingPressure = 1.9
761 // We repeat this process: distribute 0.2 pressure on each of the minimal
762 // P2 and P1, decrease budget by 2*0.2:
763 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
764 //     RemainingPressure = 1.5
765 // There are no second-to-minimal subunits so we just share the remaining
766 // budget (1.5 cycles) equally:
767 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
768 //     RemainingPressure = 0.0
769 // We stop as there is no remaining budget to distribute.
770 void distributePressure(float RemainingPressure,
771                         llvm::SmallVector<uint16_t, 32> Subunits,
772                         llvm::SmallVector<float, 32> &DensePressure) {
773   // Find the number of subunits with minimal pressure (they are at the
774   // front).
775   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
776     return DensePressure[A] < DensePressure[B];
777   });
778   const auto getPressureForSubunit = [&DensePressure,
779                                       &Subunits](size_t I) -> float & {
780     return DensePressure[Subunits[I]];
781   };
782   size_t NumMinimalSU = 1;
783   while (NumMinimalSU < Subunits.size() &&
784          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
785     ++NumMinimalSU;
786   }
787   while (RemainingPressure > 0.0f) {
788     if (NumMinimalSU == Subunits.size()) {
789       // All units are minimal, just distribute evenly and be done.
790       for (size_t I = 0; I < NumMinimalSU; ++I) {
791         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
792       }
793       return;
794     }
795     // Distribute the remaining pressure equally.
796     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
797     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
798     assert(MinimalPressure < SecondToMinimalPressure);
799     const float Increment = SecondToMinimalPressure - MinimalPressure;
800     if (RemainingPressure <= NumMinimalSU * Increment) {
801       // There is not enough remaining pressure.
802       for (size_t I = 0; I < NumMinimalSU; ++I) {
803         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
804       }
805       return;
806     }
807     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
808     for (size_t I = 0; I < NumMinimalSU; ++I) {
809       getPressureForSubunit(I) = SecondToMinimalPressure;
810       RemainingPressure -= SecondToMinimalPressure;
811     }
812     while (NumMinimalSU < Subunits.size() &&
813            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
814       ++NumMinimalSU;
815     }
816   }
817 }
818 
819 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
820     const llvm::MCSchedModel &SM,
821     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
822   // DensePressure[I] is the port pressure for Proc Resource I.
823   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
824   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
825                       const llvm::MCWriteProcResEntry &B) {
826     return A.ProcResourceIdx < B.ProcResourceIdx;
827   });
828   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
829     // Get units for the entry.
830     const llvm::MCProcResourceDesc *const ProcResDesc =
831         SM.getProcResource(WPR.ProcResourceIdx);
832     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
833       // This is a ProcResUnit.
834       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
835     } else {
836       // This is a ProcResGroup.
837       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
838                                                ProcResDesc->SubUnitsIdxBegin +
839                                                    ProcResDesc->NumUnits);
840       distributePressure(WPR.Cycles, Subunits, DensePressure);
841     }
842   }
843   // Turn dense pressure into sparse pressure by removing zero entries.
844   std::vector<std::pair<uint16_t, float>> Pressure;
845   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
846     if (DensePressure[I] > 0.0f)
847       Pressure.emplace_back(I, DensePressure[I]);
848   }
849   return Pressure;
850 }
851 
852 } // namespace exegesis
853 } // namespace llvm
854