xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 2946cd701067404b99c39fb29dc9c74bd7193eb3)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Analysis.h"
10 #include "BenchmarkResult.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/MC/MCAsmInfo.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <limits>
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace llvm {
19 namespace exegesis {
20 
21 static const char kCsvSep = ',';
22 
23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
24                                     unsigned SchedClassId,
25                                     const llvm::MCInst &MCI) {
26   const auto &SM = STI.getSchedModel();
27   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
28     SchedClassId =
29         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
30   return SchedClassId;
31 }
32 
33 namespace {
34 
35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
36 
37 template <EscapeTag Tag>
38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
39 
40 template <>
41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
42   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
43     OS << S;
44   } else {
45     // Needs escaping.
46     OS << '"';
47     for (const char C : S) {
48       if (C == '"')
49         OS << "\"\"";
50       else
51         OS << C;
52     }
53     OS << '"';
54   }
55 }
56 
57 template <>
58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
59   for (const char C : S) {
60     if (C == '<')
61       OS << "&lt;";
62     else if (C == '>')
63       OS << "&gt;";
64     else if (C == '&')
65       OS << "&amp;";
66     else
67       OS << C;
68   }
69 }
70 
71 template <>
72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
73                                      const llvm::StringRef S) {
74   for (const char C : S) {
75     if (C == '"')
76       OS << "\\\"";
77     else
78       OS << C;
79   }
80 }
81 
82 } // namespace
83 
84 template <EscapeTag Tag>
85 static void
86 writeClusterId(llvm::raw_ostream &OS,
87                const InstructionBenchmarkClustering::ClusterId &CID) {
88   if (CID.isNoise())
89     writeEscaped<Tag>(OS, "[noise]");
90   else if (CID.isError())
91     writeEscaped<Tag>(OS, "[error]");
92   else
93     OS << CID.getId();
94 }
95 
96 template <EscapeTag Tag>
97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
98   // Given Value, if we wanted to serialize it to a string,
99   // how many base-10 digits will we need to store, max?
100   static constexpr auto MaxDigitCount =
101       std::numeric_limits<decltype(Value)>::max_digits10;
102   // Also, we will need a decimal separator.
103   static constexpr auto DecimalSeparatorLen = 1; // '.' e.g.
104   // So how long of a string will the serialization produce, max?
105   static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen;
106 
107   // WARNING: when changing the format, also adjust the small-size estimate ^.
108   static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}");
109 
110   writeEscaped<Tag>(
111       OS,
112       llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>());
113 }
114 
115 template <typename EscapeTag, EscapeTag Tag>
116 void Analysis::writeSnippet(llvm::raw_ostream &OS,
117                             llvm::ArrayRef<uint8_t> Bytes,
118                             const char *Separator) const {
119   llvm::SmallVector<std::string, 3> Lines;
120   // Parse the asm snippet and print it.
121   while (!Bytes.empty()) {
122     llvm::MCInst MI;
123     uint64_t MISize = 0;
124     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
125                                  llvm::nulls())) {
126       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
127       writeEscaped<Tag>(OS, Separator);
128       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
129       return;
130     }
131     llvm::SmallString<128> InstPrinterStr; // FIXME: magic number.
132     llvm::raw_svector_ostream OSS(InstPrinterStr);
133     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
134     Bytes = Bytes.drop_front(MISize);
135     Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim());
136   }
137   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
138 }
139 
140 // Prints a row representing an instruction, along with scheduling info and
141 // point coordinates (measurements).
142 void Analysis::printInstructionRowCsv(const size_t PointId,
143                                       llvm::raw_ostream &OS) const {
144   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
145   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
146   OS << kCsvSep;
147   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
148   OS << kCsvSep;
149   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
150   OS << kCsvSep;
151   assert(!Point.Key.Instructions.empty());
152   const llvm::MCInst &MCI = Point.Key.Instructions[0];
153   const unsigned SchedClassId = resolveSchedClassId(
154       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
155 
156 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157   const llvm::MCSchedClassDesc *const SCDesc =
158       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
159   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
160 #else
161   OS << SchedClassId;
162 #endif
163   for (const auto &Measurement : Point.Measurements) {
164     OS << kCsvSep;
165     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
166   }
167   OS << "\n";
168 }
169 
170 Analysis::Analysis(const llvm::Target &Target,
171                    const InstructionBenchmarkClustering &Clustering)
172     : Clustering_(Clustering) {
173   if (Clustering.getPoints().empty())
174     return;
175 
176   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
177   InstrInfo_.reset(Target.createMCInstrInfo());
178   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
179   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
180   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
181                                                     FirstPoint.CpuName, ""));
182   InstPrinter_.reset(Target.createMCInstPrinter(
183       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
184       *InstrInfo_, *RegInfo_));
185 
186   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
187                                                 &ObjectFileInfo_);
188   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
189   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
190                     "InitializeXXXTargetDisassembler ?");
191 }
192 
193 template <>
194 llvm::Error
195 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
196   if (Clustering_.getPoints().empty())
197     return llvm::Error::success();
198 
199   // Write the header.
200   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
201      << kCsvSep << "sched_class";
202   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
203     OS << kCsvSep;
204     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
205   }
206   OS << "\n";
207 
208   // Write the points.
209   const auto &Clusters = Clustering_.getValidClusters();
210   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
211     for (const size_t PointId : Clusters[I].PointIndices) {
212       printInstructionRowCsv(PointId, OS);
213     }
214     OS << "\n\n";
215   }
216   return llvm::Error::success();
217 }
218 
219 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
220     ResolvedSchedClass &&RSC)
221     : RSC(std::move(RSC)) {}
222 
223 std::vector<Analysis::ResolvedSchedClassAndPoints>
224 Analysis::makePointsPerSchedClass() const {
225   std::vector<ResolvedSchedClassAndPoints> Entries;
226   // Maps SchedClassIds to index in result.
227   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
228   const auto &Points = Clustering_.getPoints();
229   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
230     const InstructionBenchmark &Point = Points[PointId];
231     if (!Point.Error.empty())
232       continue;
233     assert(!Point.Key.Instructions.empty());
234     // FIXME: we should be using the tuple of classes for instructions in the
235     // snippet as key.
236     const llvm::MCInst &MCI = Point.Key.Instructions[0];
237     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
238     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
239                                                 .getSchedClassDesc(SchedClassId)
240                                                 ->isVariant();
241     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
242     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
243     if (IndexIt == SchedClassIdToIndex.end()) {
244       // Create a new entry.
245       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
246       ResolvedSchedClassAndPoints Entry(
247           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
248       Entry.PointIds.push_back(PointId);
249       Entries.push_back(std::move(Entry));
250     } else {
251       // Append to the existing entry.
252       Entries[IndexIt->second].PointIds.push_back(PointId);
253     }
254   }
255   return Entries;
256 }
257 
258 // Uops repeat the same opcode over again. Just show this opcode and show the
259 // whole snippet only on hover.
260 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
261                                  const std::vector<llvm::MCInst> &Instructions,
262                                  const llvm::MCInstrInfo &InstrInfo) {
263   if (Instructions.empty())
264     return;
265   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
266   if (Instructions.size() > 1)
267     OS << " (x" << Instructions.size() << ")";
268 }
269 
270 // Latency tries to find a serial path. Just show the opcode path and show the
271 // whole snippet only on hover.
272 static void
273 writeLatencySnippetHtml(llvm::raw_ostream &OS,
274                         const std::vector<llvm::MCInst> &Instructions,
275                         const llvm::MCInstrInfo &InstrInfo) {
276   bool First = true;
277   for (const llvm::MCInst &Instr : Instructions) {
278     if (First)
279       First = false;
280     else
281       OS << " &rarr; ";
282     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
283   }
284 }
285 
286 void Analysis::printSchedClassClustersHtml(
287     const std::vector<SchedClassCluster> &Clusters,
288     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
289   const auto &Points = Clustering_.getPoints();
290   OS << "<table class=\"sched-class-clusters\">";
291   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
292   assert(!Clusters.empty());
293   for (const auto &Measurement :
294        Points[Clusters[0].getPointIds()[0]].Measurements) {
295     OS << "<th>";
296     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
297     OS << "</th>";
298   }
299   OS << "</tr>";
300   for (const SchedClassCluster &Cluster : Clusters) {
301     OS << "<tr class=\""
302        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
303                ? "good-cluster"
304                : "bad-cluster")
305        << "\"><td>";
306     writeClusterId<kEscapeHtml>(OS, Cluster.id());
307     OS << "</td><td><ul>";
308     for (const size_t PointId : Cluster.getPointIds()) {
309       const auto &Point = Points[PointId];
310       OS << "<li><span class=\"mono\" title=\"";
311       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
312                                                  "\n");
313       OS << "\">";
314       switch (Point.Mode) {
315       case InstructionBenchmark::Latency:
316         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
317         break;
318       case InstructionBenchmark::Uops:
319         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
320         break;
321       default:
322         llvm_unreachable("invalid mode");
323       }
324       OS << "</span> <span class=\"mono\">";
325       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
326       OS << "</span></li>";
327     }
328     OS << "</ul></td>";
329     for (const auto &Stats : Cluster.getRepresentative()) {
330       OS << "<td class=\"measurement\">";
331       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
332       OS << "<br><span class=\"minmax\">[";
333       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
334       OS << ";";
335       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
336       OS << "]</span></td>";
337     }
338     OS << "</tr>";
339   }
340   OS << "</table>";
341 }
342 
343 // Return the non-redundant list of WriteProcRes used by the given sched class.
344 // The scheduling model for LLVM is such that each instruction has a certain
345 // number of uops which consume resources which are described by WriteProcRes
346 // entries. Each entry describe how many cycles are spent on a specific ProcRes
347 // kind.
348 // For example, an instruction might have 3 uOps, one dispatching on P0
349 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
350 // Note that LLVM additionally denormalizes resource consumption to include
351 // usage of super resources by subresources. So in practice if there exists a
352 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
353 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
354 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
355 // denote implied usage of superresources by subresources:
356 //            P0      P06    P016
357 //     uOp1    1      (1)     (1)
358 //     uOp2            1      (1)
359 //     uOp3            1      (1)
360 //     =============================
361 //             1       3       3
362 // Eventually we end up with three entries for the WriteProcRes of the
363 // instruction:
364 //    {ProcResIdx=1,  Cycles=1}  // P0
365 //    {ProcResIdx=7,  Cycles=3}  // P06
366 //    {ProcResIdx=10, Cycles=3}  // P016
367 //
368 // Note that in this case, P016 does not contribute any cycles, so it would
369 // be removed by this function.
370 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
371 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
372 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
373                             const llvm::MCSubtargetInfo &STI) {
374   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
375   const auto &SM = STI.getSchedModel();
376   const unsigned NumProcRes = SM.getNumProcResourceKinds();
377 
378   // This assumes that the ProcResDescs are sorted in topological order, which
379   // is guaranteed by the tablegen backend.
380   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
381   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
382                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
383        WPR != WPREnd; ++WPR) {
384     const llvm::MCProcResourceDesc *const ProcResDesc =
385         SM.getProcResource(WPR->ProcResourceIdx);
386     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
387       // This is a ProcResUnit.
388       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
389       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
390     } else {
391       // This is a ProcResGroup. First see if it contributes any cycles or if
392       // it has cycles just from subunits.
393       float RemainingCycles = WPR->Cycles;
394       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
395            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
396            ++SubResIdx) {
397         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
398       }
399       if (RemainingCycles < 0.01f) {
400         // The ProcResGroup contributes no cycles of its own.
401         continue;
402       }
403       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
404       Result.push_back({WPR->ProcResourceIdx,
405                         static_cast<uint16_t>(std::round(RemainingCycles))});
406       // Spread the remaining cycles over all subunits.
407       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
408            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
409            ++SubResIdx) {
410         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
411       }
412     }
413   }
414   return Result;
415 }
416 
417 Analysis::ResolvedSchedClass::ResolvedSchedClass(
418     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
419     bool WasVariant)
420     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
421       WasVariant(WasVariant),
422       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
423       IdealizedProcResPressure(computeIdealizedProcResPressure(
424           STI.getSchedModel(), NonRedundantWriteProcRes)) {
425   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
426          "ResolvedSchedClass should never be variant");
427 }
428 
429 void Analysis::SchedClassCluster::addPoint(
430     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
431   PointIds.push_back(PointId);
432   const auto &Point = Clustering.getPoints()[PointId];
433   if (ClusterId.isUndef()) {
434     ClusterId = Clustering.getClusterIdForPoint(PointId);
435     Representative.resize(Point.Measurements.size());
436   }
437   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
438     Representative[I].push(Point.Measurements[I]);
439   }
440   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
441 }
442 
443 // Returns a ProxResIdx by id or name.
444 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
445                                const llvm::StringRef NameOrId) {
446   // Interpret the key as an ProcResIdx.
447   unsigned ProcResIdx = 0;
448   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
449     return ProcResIdx;
450   // Interpret the key as a ProcRes name.
451   const auto &SchedModel = STI.getSchedModel();
452   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
453     if (NameOrId == SchedModel.getProcResource(I)->Name)
454       return I;
455   }
456   return 0;
457 }
458 
459 bool Analysis::SchedClassCluster::measurementsMatch(
460     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
461     const InstructionBenchmarkClustering &Clustering) const {
462   const size_t NumMeasurements = Representative.size();
463   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
464   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
465   // Latency case.
466   assert(!Clustering.getPoints().empty());
467   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
468   if (Mode == InstructionBenchmark::Latency) {
469     if (NumMeasurements != 1) {
470       llvm::errs()
471           << "invalid number of measurements in latency mode: expected 1, got "
472           << NumMeasurements << "\n";
473       return false;
474     }
475     // Find the latency.
476     SchedClassPoint[0].PerInstructionValue = 0.0;
477     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
478       const llvm::MCWriteLatencyEntry *const WLE =
479           STI.getWriteLatencyEntry(RSC.SCDesc, I);
480       SchedClassPoint[0].PerInstructionValue =
481           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
482     }
483     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
484   } else if (Mode == InstructionBenchmark::Uops) {
485     for (int I = 0, E = Representative.size(); I < E; ++I) {
486       const auto Key = Representative[I].key();
487       uint16_t ProcResIdx = findProcResIdx(STI, Key);
488       if (ProcResIdx > 0) {
489         // Find the pressure on ProcResIdx `Key`.
490         const auto ProcResPressureIt =
491             std::find_if(RSC.IdealizedProcResPressure.begin(),
492                          RSC.IdealizedProcResPressure.end(),
493                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
494                            return WPR.first == ProcResIdx;
495                          });
496         SchedClassPoint[I].PerInstructionValue =
497             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
498                 ? 0.0
499                 : ProcResPressureIt->second;
500       } else if (Key == "NumMicroOps") {
501         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
502       } else {
503         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
504                         "name, got "
505                      << Key << "\n";
506         return false;
507       }
508       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
509     }
510   } else {
511     llvm::errs() << "unimplemented measurement matching for mode " << Mode
512                  << "\n";
513     return false;
514   }
515   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
516 }
517 
518 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
519                                        llvm::raw_ostream &OS) const {
520   OS << "<table class=\"sched-class-desc\">";
521   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
522         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
523         "resource (port) pressure assuming ideal distribution\">Idealized "
524         "Resource Pressure</th></tr>";
525   if (RSC.SCDesc->isValid()) {
526     const auto &SM = SubtargetInfo_->getSchedModel();
527     OS << "<tr><td>&#10004;</td>";
528     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
529     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
530     // Latencies.
531     OS << "<td><ul>";
532     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
533       const auto *const Entry =
534           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
535       OS << "<li>" << Entry->Cycles;
536       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
537         // Dismabiguate if more than 1 latency.
538         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
539       }
540       OS << "</li>";
541     }
542     OS << "</ul></td>";
543     // WriteProcRes.
544     OS << "<td><ul>";
545     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
546       OS << "<li><span class=\"mono\">";
547       writeEscaped<kEscapeHtml>(OS,
548                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
549       OS << "</span>: " << WPR.Cycles << "</li>";
550     }
551     OS << "</ul></td>";
552     // Idealized port pressure.
553     OS << "<td><ul>";
554     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
555       OS << "<li><span class=\"mono\">";
556       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
557                                         .getProcResource(Pressure.first)
558                                         ->Name);
559       OS << "</span>: ";
560       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
561       OS << "</li>";
562     }
563     OS << "</ul></td>";
564     OS << "</tr>";
565   } else {
566     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
567   }
568   OS << "</table>";
569 }
570 
571 static constexpr const char kHtmlHead[] = R"(
572 <head>
573 <title>llvm-exegesis Analysis Results</title>
574 <style>
575 body {
576   font-family: sans-serif
577 }
578 span.sched-class-name {
579   font-weight: bold;
580   font-family: monospace;
581 }
582 span.opcode {
583   font-family: monospace;
584 }
585 span.config {
586   font-family: monospace;
587 }
588 div.inconsistency {
589   margin-top: 50px;
590 }
591 table {
592   margin-left: 50px;
593   border-collapse: collapse;
594 }
595 table, table tr,td,th {
596   border: 1px solid #444;
597 }
598 table ul {
599   padding-left: 0px;
600   margin: 0px;
601   list-style-type: none;
602 }
603 table.sched-class-clusters td {
604   padding-left: 10px;
605   padding-right: 10px;
606   padding-top: 10px;
607   padding-bottom: 10px;
608 }
609 table.sched-class-desc td {
610   padding-left: 10px;
611   padding-right: 10px;
612   padding-top: 2px;
613   padding-bottom: 2px;
614 }
615 span.mono {
616   font-family: monospace;
617 }
618 td.measurement {
619   text-align: center;
620 }
621 tr.good-cluster td.measurement {
622   color: #292
623 }
624 tr.bad-cluster td.measurement {
625   color: #922
626 }
627 tr.good-cluster td.measurement span.minmax {
628   color: #888;
629 }
630 tr.bad-cluster td.measurement span.minmax {
631   color: #888;
632 }
633 </style>
634 </head>
635 )";
636 
637 template <>
638 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
639     llvm::raw_ostream &OS) const {
640   const auto &FirstPoint = Clustering_.getPoints()[0];
641   // Print the header.
642   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
643   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
644   OS << "<h3>Triple: <span class=\"mono\">";
645   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
646   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
647   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
648   OS << "</span></h3>";
649 
650   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
651     if (!RSCAndPoints.RSC.SCDesc)
652       continue;
653     // Bucket sched class points into sched class clusters.
654     std::vector<SchedClassCluster> SchedClassClusters;
655     for (const size_t PointId : RSCAndPoints.PointIds) {
656       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
657       if (!ClusterId.isValid())
658         continue; // Ignore noise and errors. FIXME: take noise into account ?
659       auto SchedClassClusterIt =
660           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
661                        [ClusterId](const SchedClassCluster &C) {
662                          return C.id() == ClusterId;
663                        });
664       if (SchedClassClusterIt == SchedClassClusters.end()) {
665         SchedClassClusters.emplace_back();
666         SchedClassClusterIt = std::prev(SchedClassClusters.end());
667       }
668       SchedClassClusterIt->addPoint(PointId, Clustering_);
669     }
670 
671     // Print any scheduling class that has at least one cluster that does not
672     // match the checked-in data.
673     if (llvm::all_of(SchedClassClusters,
674                      [this, &RSCAndPoints](const SchedClassCluster &C) {
675                        return C.measurementsMatch(
676                            *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
677                      }))
678       continue; // Nothing weird.
679 
680     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
681           "class=\"sched-class-name\">";
682 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
683     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
684 #else
685     OS << RSCAndPoints.RSC.SchedClassId;
686 #endif
687     OS << "</span> contains instructions whose performance characteristics do"
688           " not match that of LLVM:</p>";
689     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
690     OS << "<p>llvm SchedModel data:</p>";
691     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
692     OS << "</div>";
693   }
694 
695   OS << "</body></html>";
696   return llvm::Error::success();
697 }
698 
699 // Distributes a pressure budget as evenly as possible on the provided subunits
700 // given the already existing port pressure distribution.
701 //
702 // The algorithm is as follows: while there is remaining pressure to
703 // distribute, find the subunits with minimal pressure, and distribute
704 // remaining pressure equally up to the pressure of the unit with
705 // second-to-minimal pressure.
706 // For example, let's assume we want to distribute 2*P1256
707 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
708 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
709 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
710 //     RemainingPressure = 2.0
711 // We sort the subunits by pressure:
712 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
713 // We'll first start by the subunits with minimal pressure, which are at
714 // the beginning of the sorted array. In this example there is one (P2).
715 // The subunit with second-to-minimal pressure is the next one in the
716 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
717 // from the budget.
718 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
719 //     RemainingPressure = 1.9
720 // We repeat this process: distribute 0.2 pressure on each of the minimal
721 // P2 and P1, decrease budget by 2*0.2:
722 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
723 //     RemainingPressure = 1.5
724 // There are no second-to-minimal subunits so we just share the remaining
725 // budget (1.5 cycles) equally:
726 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
727 //     RemainingPressure = 0.0
728 // We stop as there is no remaining budget to distribute.
729 void distributePressure(float RemainingPressure,
730                         llvm::SmallVector<uint16_t, 32> Subunits,
731                         llvm::SmallVector<float, 32> &DensePressure) {
732   // Find the number of subunits with minimal pressure (they are at the
733   // front).
734   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
735     return DensePressure[A] < DensePressure[B];
736   });
737   const auto getPressureForSubunit = [&DensePressure,
738                                       &Subunits](size_t I) -> float & {
739     return DensePressure[Subunits[I]];
740   };
741   size_t NumMinimalSU = 1;
742   while (NumMinimalSU < Subunits.size() &&
743          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
744     ++NumMinimalSU;
745   }
746   while (RemainingPressure > 0.0f) {
747     if (NumMinimalSU == Subunits.size()) {
748       // All units are minimal, just distribute evenly and be done.
749       for (size_t I = 0; I < NumMinimalSU; ++I) {
750         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
751       }
752       return;
753     }
754     // Distribute the remaining pressure equally.
755     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
756     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
757     assert(MinimalPressure < SecondToMinimalPressure);
758     const float Increment = SecondToMinimalPressure - MinimalPressure;
759     if (RemainingPressure <= NumMinimalSU * Increment) {
760       // There is not enough remaining pressure.
761       for (size_t I = 0; I < NumMinimalSU; ++I) {
762         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
763       }
764       return;
765     }
766     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
767     for (size_t I = 0; I < NumMinimalSU; ++I) {
768       getPressureForSubunit(I) = SecondToMinimalPressure;
769       RemainingPressure -= SecondToMinimalPressure;
770     }
771     while (NumMinimalSU < Subunits.size() &&
772            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
773       ++NumMinimalSU;
774     }
775   }
776 }
777 
778 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
779     const llvm::MCSchedModel &SM,
780     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
781   // DensePressure[I] is the port pressure for Proc Resource I.
782   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
783   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
784                       const llvm::MCWriteProcResEntry &B) {
785     return A.ProcResourceIdx < B.ProcResourceIdx;
786   });
787   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
788     // Get units for the entry.
789     const llvm::MCProcResourceDesc *const ProcResDesc =
790         SM.getProcResource(WPR.ProcResourceIdx);
791     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
792       // This is a ProcResUnit.
793       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
794     } else {
795       // This is a ProcResGroup.
796       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
797                                                ProcResDesc->SubUnitsIdxBegin +
798                                                    ProcResDesc->NumUnits);
799       distributePressure(WPR.Cycles, Subunits, DensePressure);
800     }
801   }
802   // Turn dense pressure into sparse pressure by removing zero entries.
803   std::vector<std::pair<uint16_t, float>> Pressure;
804   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
805     if (DensePressure[I] > 0.0f)
806       Pressure.emplace_back(I, DensePressure[I]);
807   }
808   return Pressure;
809 }
810 
811 } // namespace exegesis
812 } // namespace llvm
813