xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 69716394f3d65210c8ca62bf380b75f3f1346ec6)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Analysis.h"
10 #include "BenchmarkResult.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/MC/MCAsmInfo.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <limits>
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace llvm {
19 namespace exegesis {
20 
21 static const char kCsvSep = ',';
22 
23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
24                                     unsigned SchedClassId,
25                                     const llvm::MCInst &MCI) {
26   const auto &SM = STI.getSchedModel();
27   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
28     SchedClassId =
29         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
30   return SchedClassId;
31 }
32 
33 namespace {
34 
35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
36 
37 template <EscapeTag Tag>
38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
39 
40 template <>
41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
42   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
43     OS << S;
44   } else {
45     // Needs escaping.
46     OS << '"';
47     for (const char C : S) {
48       if (C == '"')
49         OS << "\"\"";
50       else
51         OS << C;
52     }
53     OS << '"';
54   }
55 }
56 
57 template <>
58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
59   for (const char C : S) {
60     if (C == '<')
61       OS << "&lt;";
62     else if (C == '>')
63       OS << "&gt;";
64     else if (C == '&')
65       OS << "&amp;";
66     else
67       OS << C;
68   }
69 }
70 
71 template <>
72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
73                                      const llvm::StringRef S) {
74   for (const char C : S) {
75     if (C == '"')
76       OS << "\\\"";
77     else
78       OS << C;
79   }
80 }
81 
82 } // namespace
83 
84 template <EscapeTag Tag>
85 static void
86 writeClusterId(llvm::raw_ostream &OS,
87                const InstructionBenchmarkClustering::ClusterId &CID) {
88   if (CID.isNoise())
89     writeEscaped<Tag>(OS, "[noise]");
90   else if (CID.isError())
91     writeEscaped<Tag>(OS, "[error]");
92   else
93     OS << CID.getId();
94 }
95 
96 template <EscapeTag Tag>
97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
98   // Given Value, if we wanted to serialize it to a string,
99   // how many base-10 digits will we need to store, max?
100   static constexpr auto MaxDigitCount =
101       std::numeric_limits<decltype(Value)>::max_digits10;
102   // Also, we will need a decimal separator.
103   static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
104   // So how long of a string will the serialization produce, max?
105   static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
106 
107   // WARNING: when changing the format, also adjust the small-size estimate ^.
108   static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
109 
110   writeEscaped<Tag>(
111       OS,
112       llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
113 }
114 
115 template <typename EscapeTag, EscapeTag Tag>
116 void Analysis::writeSnippet(llvm::raw_ostream &OS,
117                             llvm::ArrayRef<uint8_t> Bytes,
118                             const char *Separator) const {
119   llvm::SmallVector<std::string, 3> Lines;
120   // Parse the asm snippet and print it.
121   while (!Bytes.empty()) {
122     llvm::MCInst MI;
123     uint64_t MISize = 0;
124     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
125                                  llvm::nulls())) {
126       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
127       writeEscaped<Tag>(OS, Separator);
128       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
129       return;
130     }
131     llvm::SmallString<128> InstPrinterStr; // FIXME: magic number.
132     llvm::raw_svector_ostream OSS(InstPrinterStr);
133     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
134     Bytes = Bytes.drop_front(MISize);
135     Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim());
136   }
137   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
138 }
139 
140 // Prints a row representing an instruction, along with scheduling info and
141 // point coordinates (measurements).
142 void Analysis::printInstructionRowCsv(const size_t PointId,
143                                       llvm::raw_ostream &OS) const {
144   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
145   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
146   OS << kCsvSep;
147   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
148   OS << kCsvSep;
149   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
150   OS << kCsvSep;
151   assert(!Point.Key.Instructions.empty());
152   const llvm::MCInst &MCI = Point.keyInstruction();
153   const unsigned SchedClassId = resolveSchedClassId(
154       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
155 
156 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157   const llvm::MCSchedClassDesc *const SCDesc =
158       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
159   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
160 #else
161   OS << SchedClassId;
162 #endif
163   for (const auto &Measurement : Point.Measurements) {
164     OS << kCsvSep;
165     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
166   }
167   OS << "\n";
168 }
169 
170 Analysis::Analysis(const llvm::Target &Target,
171                    std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
172                    const InstructionBenchmarkClustering &Clustering,
173                    bool AnalysisDisplayUnstableOpcodes)
174     : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
175       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
176   if (Clustering.getPoints().empty())
177     return;
178 
179   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
180   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
181   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
182   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
183                                                     FirstPoint.CpuName, ""));
184   InstPrinter_.reset(Target.createMCInstPrinter(
185       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
186       *InstrInfo_, *RegInfo_));
187 
188   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
189                                                 &ObjectFileInfo_);
190   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
191   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
192                     "InitializeXXXTargetDisassembler ?");
193 }
194 
195 template <>
196 llvm::Error
197 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
198   if (Clustering_.getPoints().empty())
199     return llvm::Error::success();
200 
201   // Write the header.
202   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
203      << kCsvSep << "sched_class";
204   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
205     OS << kCsvSep;
206     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
207   }
208   OS << "\n";
209 
210   // Write the points.
211   const auto &Clusters = Clustering_.getValidClusters();
212   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
213     for (const size_t PointId : Clusters[I].PointIndices) {
214       printInstructionRowCsv(PointId, OS);
215     }
216     OS << "\n\n";
217   }
218   return llvm::Error::success();
219 }
220 
221 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
222     ResolvedSchedClass &&RSC)
223     : RSC(std::move(RSC)) {}
224 
225 std::vector<Analysis::ResolvedSchedClassAndPoints>
226 Analysis::makePointsPerSchedClass() const {
227   std::vector<ResolvedSchedClassAndPoints> Entries;
228   // Maps SchedClassIds to index in result.
229   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
230   const auto &Points = Clustering_.getPoints();
231   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
232     const InstructionBenchmark &Point = Points[PointId];
233     if (!Point.Error.empty())
234       continue;
235     assert(!Point.Key.Instructions.empty());
236     // FIXME: we should be using the tuple of classes for instructions in the
237     // snippet as key.
238     const llvm::MCInst &MCI = Point.keyInstruction();
239     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
240     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
241                                                 .getSchedClassDesc(SchedClassId)
242                                                 ->isVariant();
243     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
244     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
245     if (IndexIt == SchedClassIdToIndex.end()) {
246       // Create a new entry.
247       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
248       ResolvedSchedClassAndPoints Entry(
249           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
250       Entry.PointIds.push_back(PointId);
251       Entries.push_back(std::move(Entry));
252     } else {
253       // Append to the existing entry.
254       Entries[IndexIt->second].PointIds.push_back(PointId);
255     }
256   }
257   return Entries;
258 }
259 
260 // Uops repeat the same opcode over again. Just show this opcode and show the
261 // whole snippet only on hover.
262 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
263                                  const std::vector<llvm::MCInst> &Instructions,
264                                  const llvm::MCInstrInfo &InstrInfo) {
265   if (Instructions.empty())
266     return;
267   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
268   if (Instructions.size() > 1)
269     OS << " (x" << Instructions.size() << ")";
270 }
271 
272 // Latency tries to find a serial path. Just show the opcode path and show the
273 // whole snippet only on hover.
274 static void
275 writeLatencySnippetHtml(llvm::raw_ostream &OS,
276                         const std::vector<llvm::MCInst> &Instructions,
277                         const llvm::MCInstrInfo &InstrInfo) {
278   bool First = true;
279   for (const llvm::MCInst &Instr : Instructions) {
280     if (First)
281       First = false;
282     else
283       OS << " &rarr; ";
284     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
285   }
286 }
287 
288 void Analysis::printSchedClassClustersHtml(
289     const std::vector<SchedClassCluster> &Clusters,
290     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
291   const auto &Points = Clustering_.getPoints();
292   OS << "<table class=\"sched-class-clusters\">";
293   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
294   assert(!Clusters.empty());
295   for (const auto &Measurement :
296        Points[Clusters[0].getPointIds()[0]].Measurements) {
297     OS << "<th>";
298     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
299     OS << "</th>";
300   }
301   OS << "</tr>";
302   for (const SchedClassCluster &Cluster : Clusters) {
303     OS << "<tr class=\""
304        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
305                ? "good-cluster"
306                : "bad-cluster")
307        << "\"><td>";
308     writeClusterId<kEscapeHtml>(OS, Cluster.id());
309     OS << "</td><td><ul>";
310     for (const size_t PointId : Cluster.getPointIds()) {
311       const auto &Point = Points[PointId];
312       OS << "<li><span class=\"mono\" title=\"";
313       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
314                                                  "\n");
315       OS << "\">";
316       switch (Point.Mode) {
317       case InstructionBenchmark::Latency:
318         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
319         break;
320       case InstructionBenchmark::Uops:
321       case InstructionBenchmark::InverseThroughput:
322         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
323         break;
324       default:
325         llvm_unreachable("invalid mode");
326       }
327       OS << "</span> <span class=\"mono\">";
328       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
329       OS << "</span></li>";
330     }
331     OS << "</ul></td>";
332     for (const auto &Stats : Cluster.getRepresentative()) {
333       OS << "<td class=\"measurement\">";
334       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
335       OS << "<br><span class=\"minmax\">[";
336       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
337       OS << ";";
338       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
339       OS << "]</span></td>";
340     }
341     OS << "</tr>";
342   }
343   OS << "</table>";
344 }
345 
346 // Return the non-redundant list of WriteProcRes used by the given sched class.
347 // The scheduling model for LLVM is such that each instruction has a certain
348 // number of uops which consume resources which are described by WriteProcRes
349 // entries. Each entry describe how many cycles are spent on a specific ProcRes
350 // kind.
351 // For example, an instruction might have 3 uOps, one dispatching on P0
352 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
353 // Note that LLVM additionally denormalizes resource consumption to include
354 // usage of super resources by subresources. So in practice if there exists a
355 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
356 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
357 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
358 // denote implied usage of superresources by subresources:
359 //            P0      P06    P016
360 //     uOp1    1      (1)     (1)
361 //     uOp2            1      (1)
362 //     uOp3            1      (1)
363 //     =============================
364 //             1       3       3
365 // Eventually we end up with three entries for the WriteProcRes of the
366 // instruction:
367 //    {ProcResIdx=1,  Cycles=1}  // P0
368 //    {ProcResIdx=7,  Cycles=3}  // P06
369 //    {ProcResIdx=10, Cycles=3}  // P016
370 //
371 // Note that in this case, P016 does not contribute any cycles, so it would
372 // be removed by this function.
373 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
374 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
375 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
376                             const llvm::MCSubtargetInfo &STI) {
377   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
378   const auto &SM = STI.getSchedModel();
379   const unsigned NumProcRes = SM.getNumProcResourceKinds();
380 
381   // This assumes that the ProcResDescs are sorted in topological order, which
382   // is guaranteed by the tablegen backend.
383   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
384   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
385                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
386        WPR != WPREnd; ++WPR) {
387     const llvm::MCProcResourceDesc *const ProcResDesc =
388         SM.getProcResource(WPR->ProcResourceIdx);
389     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
390       // This is a ProcResUnit.
391       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
392       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
393     } else {
394       // This is a ProcResGroup. First see if it contributes any cycles or if
395       // it has cycles just from subunits.
396       float RemainingCycles = WPR->Cycles;
397       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
398            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
399            ++SubResIdx) {
400         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
401       }
402       if (RemainingCycles < 0.01f) {
403         // The ProcResGroup contributes no cycles of its own.
404         continue;
405       }
406       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
407       Result.push_back({WPR->ProcResourceIdx,
408                         static_cast<uint16_t>(std::round(RemainingCycles))});
409       // Spread the remaining cycles over all subunits.
410       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
411            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
412            ++SubResIdx) {
413         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
414       }
415     }
416   }
417   return Result;
418 }
419 
420 Analysis::ResolvedSchedClass::ResolvedSchedClass(
421     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
422     bool WasVariant)
423     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
424       WasVariant(WasVariant),
425       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
426       IdealizedProcResPressure(computeIdealizedProcResPressure(
427           STI.getSchedModel(), NonRedundantWriteProcRes)) {
428   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
429          "ResolvedSchedClass should never be variant");
430 }
431 
432 void Analysis::SchedClassCluster::addPoint(
433     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
434   PointIds.push_back(PointId);
435   const auto &Point = Clustering.getPoints()[PointId];
436   if (ClusterId.isUndef()) {
437     ClusterId = Clustering.getClusterIdForPoint(PointId);
438     Representative.resize(Point.Measurements.size());
439   }
440   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
441     Representative[I].push(Point.Measurements[I]);
442   }
443   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
444 }
445 
446 // Returns a ProxResIdx by id or name.
447 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
448                                const llvm::StringRef NameOrId) {
449   // Interpret the key as an ProcResIdx.
450   unsigned ProcResIdx = 0;
451   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
452     return ProcResIdx;
453   // Interpret the key as a ProcRes name.
454   const auto &SchedModel = STI.getSchedModel();
455   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
456     if (NameOrId == SchedModel.getProcResource(I)->Name)
457       return I;
458   }
459   return 0;
460 }
461 
462 bool Analysis::SchedClassCluster::measurementsMatch(
463     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
464     const InstructionBenchmarkClustering &Clustering) const {
465   const size_t NumMeasurements = Representative.size();
466   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
467   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
468   // Latency case.
469   assert(!Clustering.getPoints().empty());
470   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
471   if (Mode == InstructionBenchmark::Latency) {
472     if (NumMeasurements != 1) {
473       llvm::errs()
474           << "invalid number of measurements in latency mode: expected 1, got "
475           << NumMeasurements << "\n";
476       return false;
477     }
478     // Find the latency.
479     SchedClassPoint[0].PerInstructionValue = 0.0;
480     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
481       const llvm::MCWriteLatencyEntry *const WLE =
482           STI.getWriteLatencyEntry(RSC.SCDesc, I);
483       SchedClassPoint[0].PerInstructionValue =
484           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
485     }
486     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
487   } else if (Mode == InstructionBenchmark::Uops) {
488     for (int I = 0, E = Representative.size(); I < E; ++I) {
489       const auto Key = Representative[I].key();
490       uint16_t ProcResIdx = findProcResIdx(STI, Key);
491       if (ProcResIdx > 0) {
492         // Find the pressure on ProcResIdx `Key`.
493         const auto ProcResPressureIt =
494             std::find_if(RSC.IdealizedProcResPressure.begin(),
495                          RSC.IdealizedProcResPressure.end(),
496                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
497                            return WPR.first == ProcResIdx;
498                          });
499         SchedClassPoint[I].PerInstructionValue =
500             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
501                 ? 0.0
502                 : ProcResPressureIt->second;
503       } else if (Key == "NumMicroOps") {
504         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
505       } else {
506         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
507                         "name, got "
508                      << Key << "\n";
509         return false;
510       }
511       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
512     }
513   } else if (Mode == InstructionBenchmark::InverseThroughput) {
514     for (int I = 0, E = Representative.size(); I < E; ++I) {
515       SchedClassPoint[I].PerInstructionValue =
516           MCSchedModel::getReciprocalThroughput(STI, *RSC.SCDesc);
517       ClusterCenterPoint[I].PerInstructionValue = Representative[I].min();
518     }
519   } else {
520     llvm_unreachable("unimplemented measurement matching mode");
521     return false;
522   }
523   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
524 }
525 
526 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
527                                        llvm::raw_ostream &OS) const {
528   OS << "<table class=\"sched-class-desc\">";
529   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
530         "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the "
531         "idealized unit resource (port) pressure assuming ideal "
532         "distribution\">Idealized Resource Pressure</th></tr>";
533   if (RSC.SCDesc->isValid()) {
534     const auto &SM = SubtargetInfo_->getSchedModel();
535     OS << "<tr><td>&#10004;</td>";
536     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
537     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
538     // Latencies.
539     OS << "<td><ul>";
540     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
541       const auto *const Entry =
542           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
543       OS << "<li>" << Entry->Cycles;
544       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
545         // Dismabiguate if more than 1 latency.
546         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
547       }
548       OS << "</li>";
549     }
550     OS << "</ul></td>";
551     // inverse throughput.
552     OS << "<td>";
553     writeMeasurementValue<kEscapeHtml>(
554         OS,
555         MCSchedModel::getReciprocalThroughput(*SubtargetInfo_, *RSC.SCDesc));
556     OS << "</td>";
557     // WriteProcRes.
558     OS << "<td><ul>";
559     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
560       OS << "<li><span class=\"mono\">";
561       writeEscaped<kEscapeHtml>(OS,
562                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
563       OS << "</span>: " << WPR.Cycles << "</li>";
564     }
565     OS << "</ul></td>";
566     // Idealized port pressure.
567     OS << "<td><ul>";
568     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
569       OS << "<li><span class=\"mono\">";
570       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
571                                         .getProcResource(Pressure.first)
572                                         ->Name);
573       OS << "</span>: ";
574       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
575       OS << "</li>";
576     }
577     OS << "</ul></td>";
578     OS << "</tr>";
579   } else {
580     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
581   }
582   OS << "</table>";
583 }
584 
585 static constexpr const char kHtmlHead[] = R"(
586 <head>
587 <title>llvm-exegesis Analysis Results</title>
588 <style>
589 body {
590   font-family: sans-serif
591 }
592 span.sched-class-name {
593   font-weight: bold;
594   font-family: monospace;
595 }
596 span.opcode {
597   font-family: monospace;
598 }
599 span.config {
600   font-family: monospace;
601 }
602 div.inconsistency {
603   margin-top: 50px;
604 }
605 table {
606   margin-left: 50px;
607   border-collapse: collapse;
608 }
609 table, table tr,td,th {
610   border: 1px solid #444;
611 }
612 table ul {
613   padding-left: 0px;
614   margin: 0px;
615   list-style-type: none;
616 }
617 table.sched-class-clusters td {
618   padding-left: 10px;
619   padding-right: 10px;
620   padding-top: 10px;
621   padding-bottom: 10px;
622 }
623 table.sched-class-desc td {
624   padding-left: 10px;
625   padding-right: 10px;
626   padding-top: 2px;
627   padding-bottom: 2px;
628 }
629 span.mono {
630   font-family: monospace;
631 }
632 td.measurement {
633   text-align: center;
634 }
635 tr.good-cluster td.measurement {
636   color: #292
637 }
638 tr.bad-cluster td.measurement {
639   color: #922
640 }
641 tr.good-cluster td.measurement span.minmax {
642   color: #888;
643 }
644 tr.bad-cluster td.measurement span.minmax {
645   color: #888;
646 }
647 </style>
648 </head>
649 )";
650 
651 template <>
652 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
653     llvm::raw_ostream &OS) const {
654   const auto &FirstPoint = Clustering_.getPoints()[0];
655   // Print the header.
656   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
657   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
658   OS << "<h3>Triple: <span class=\"mono\">";
659   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
660   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
661   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
662   OS << "</span></h3>";
663 
664   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
665     if (!RSCAndPoints.RSC.SCDesc)
666       continue;
667     // Bucket sched class points into sched class clusters.
668     std::vector<SchedClassCluster> SchedClassClusters;
669     for (const size_t PointId : RSCAndPoints.PointIds) {
670       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
671       if (!ClusterId.isValid())
672         continue; // Ignore noise and errors. FIXME: take noise into account ?
673       if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_)
674         continue; // Either display stable or unstable clusters only.
675       auto SchedClassClusterIt =
676           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
677                        [ClusterId](const SchedClassCluster &C) {
678                          return C.id() == ClusterId;
679                        });
680       if (SchedClassClusterIt == SchedClassClusters.end()) {
681         SchedClassClusters.emplace_back();
682         SchedClassClusterIt = std::prev(SchedClassClusters.end());
683       }
684       SchedClassClusterIt->addPoint(PointId, Clustering_);
685     }
686 
687     // Print any scheduling class that has at least one cluster that does not
688     // match the checked-in data.
689     if (llvm::all_of(SchedClassClusters,
690                      [this, &RSCAndPoints](const SchedClassCluster &C) {
691                        return C.measurementsMatch(
692                            *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
693                      }))
694       continue; // Nothing weird.
695 
696     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
697           "class=\"sched-class-name\">";
698 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
699     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
700 #else
701     OS << RSCAndPoints.RSC.SchedClassId;
702 #endif
703     OS << "</span> contains instructions whose performance characteristics do"
704           " not match that of LLVM:</p>";
705     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
706     OS << "<p>llvm SchedModel data:</p>";
707     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
708     OS << "</div>";
709   }
710 
711   OS << "</body></html>";
712   return llvm::Error::success();
713 }
714 
715 // Distributes a pressure budget as evenly as possible on the provided subunits
716 // given the already existing port pressure distribution.
717 //
718 // The algorithm is as follows: while there is remaining pressure to
719 // distribute, find the subunits with minimal pressure, and distribute
720 // remaining pressure equally up to the pressure of the unit with
721 // second-to-minimal pressure.
722 // For example, let's assume we want to distribute 2*P1256
723 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
724 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
725 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
726 //     RemainingPressure = 2.0
727 // We sort the subunits by pressure:
728 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
729 // We'll first start by the subunits with minimal pressure, which are at
730 // the beginning of the sorted array. In this example there is one (P2).
731 // The subunit with second-to-minimal pressure is the next one in the
732 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
733 // from the budget.
734 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
735 //     RemainingPressure = 1.9
736 // We repeat this process: distribute 0.2 pressure on each of the minimal
737 // P2 and P1, decrease budget by 2*0.2:
738 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
739 //     RemainingPressure = 1.5
740 // There are no second-to-minimal subunits so we just share the remaining
741 // budget (1.5 cycles) equally:
742 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
743 //     RemainingPressure = 0.0
744 // We stop as there is no remaining budget to distribute.
745 void distributePressure(float RemainingPressure,
746                         llvm::SmallVector<uint16_t, 32> Subunits,
747                         llvm::SmallVector<float, 32> &DensePressure) {
748   // Find the number of subunits with minimal pressure (they are at the
749   // front).
750   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
751     return DensePressure[A] < DensePressure[B];
752   });
753   const auto getPressureForSubunit = [&DensePressure,
754                                       &Subunits](size_t I) -> float & {
755     return DensePressure[Subunits[I]];
756   };
757   size_t NumMinimalSU = 1;
758   while (NumMinimalSU < Subunits.size() &&
759          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
760     ++NumMinimalSU;
761   }
762   while (RemainingPressure > 0.0f) {
763     if (NumMinimalSU == Subunits.size()) {
764       // All units are minimal, just distribute evenly and be done.
765       for (size_t I = 0; I < NumMinimalSU; ++I) {
766         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
767       }
768       return;
769     }
770     // Distribute the remaining pressure equally.
771     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
772     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
773     assert(MinimalPressure < SecondToMinimalPressure);
774     const float Increment = SecondToMinimalPressure - MinimalPressure;
775     if (RemainingPressure <= NumMinimalSU * Increment) {
776       // There is not enough remaining pressure.
777       for (size_t I = 0; I < NumMinimalSU; ++I) {
778         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
779       }
780       return;
781     }
782     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
783     for (size_t I = 0; I < NumMinimalSU; ++I) {
784       getPressureForSubunit(I) = SecondToMinimalPressure;
785       RemainingPressure -= SecondToMinimalPressure;
786     }
787     while (NumMinimalSU < Subunits.size() &&
788            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
789       ++NumMinimalSU;
790     }
791   }
792 }
793 
794 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
795     const llvm::MCSchedModel &SM,
796     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
797   // DensePressure[I] is the port pressure for Proc Resource I.
798   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
799   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
800                       const llvm::MCWriteProcResEntry &B) {
801     return A.ProcResourceIdx < B.ProcResourceIdx;
802   });
803   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
804     // Get units for the entry.
805     const llvm::MCProcResourceDesc *const ProcResDesc =
806         SM.getProcResource(WPR.ProcResourceIdx);
807     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
808       // This is a ProcResUnit.
809       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
810     } else {
811       // This is a ProcResGroup.
812       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
813                                                ProcResDesc->SubUnitsIdxBegin +
814                                                    ProcResDesc->NumUnits);
815       distributePressure(WPR.Cycles, Subunits, DensePressure);
816     }
817   }
818   // Turn dense pressure into sparse pressure by removing zero entries.
819   std::vector<std::pair<uint16_t, float>> Pressure;
820   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
821     if (DensePressure[I] > 0.0f)
822       Pressure.emplace_back(I, DensePressure[I]);
823   }
824   return Pressure;
825 }
826 
827 } // namespace exegesis
828 } // namespace llvm
829