xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision f8b28e9bf4720fc737d4db0985f0d173e3b4f2fa)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace llvm {
19 namespace exegesis {
20 
21 static const char kCsvSep = ',';
22 
23 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
24                                     unsigned SchedClassId,
25                                     const llvm::MCInst &MCI) {
26   const auto &SM = STI.getSchedModel();
27   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
28     SchedClassId =
29         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
30   return SchedClassId;
31 }
32 
33 namespace {
34 
35 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
36 
37 template <EscapeTag Tag>
38 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
39 
40 template <>
41 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
42   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
43     OS << S;
44   } else {
45     // Needs escaping.
46     OS << '"';
47     for (const char C : S) {
48       if (C == '"')
49         OS << "\"\"";
50       else
51         OS << C;
52     }
53     OS << '"';
54   }
55 }
56 
57 template <>
58 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
59   for (const char C : S) {
60     if (C == '<')
61       OS << "&lt;";
62     else if (C == '>')
63       OS << "&gt;";
64     else if (C == '&')
65       OS << "&amp;";
66     else
67       OS << C;
68   }
69 }
70 
71 template <>
72 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
73                                      const llvm::StringRef S) {
74   for (const char C : S) {
75     if (C == '"')
76       OS << "\\\"";
77     else
78       OS << C;
79   }
80 }
81 
82 } // namespace
83 
84 template <EscapeTag Tag>
85 static void
86 writeClusterId(llvm::raw_ostream &OS,
87                const InstructionBenchmarkClustering::ClusterId &CID) {
88   if (CID.isNoise())
89     writeEscaped<Tag>(OS, "[noise]");
90   else if (CID.isError())
91     writeEscaped<Tag>(OS, "[error]");
92   else
93     OS << CID.getId();
94 }
95 
96 template <EscapeTag Tag>
97 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
98   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
99 }
100 
101 template <typename EscapeTag, EscapeTag Tag>
102 void Analysis::writeSnippet(llvm::raw_ostream &OS,
103                             llvm::ArrayRef<uint8_t> Bytes,
104                             const char *Separator) const {
105   llvm::SmallVector<std::string, 3> Lines;
106   // Parse the asm snippet and print it.
107   while (!Bytes.empty()) {
108     llvm::MCInst MI;
109     uint64_t MISize = 0;
110     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
111                                  llvm::nulls())) {
112       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
113       writeEscaped<Tag>(OS, Separator);
114       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
115       return;
116     }
117     llvm::SmallString<128> InstPrinterStr; // FIXME: magic number.
118     llvm::raw_svector_ostream OSS(InstPrinterStr);
119     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
120     Bytes = Bytes.drop_front(MISize);
121     Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim());
122   }
123   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
124 }
125 
126 // Prints a row representing an instruction, along with scheduling info and
127 // point coordinates (measurements).
128 void Analysis::printInstructionRowCsv(const size_t PointId,
129                                       llvm::raw_ostream &OS) const {
130   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
131   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
132   OS << kCsvSep;
133   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
134   OS << kCsvSep;
135   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
136   OS << kCsvSep;
137   assert(!Point.Key.Instructions.empty());
138   const llvm::MCInst &MCI = Point.Key.Instructions[0];
139   const unsigned SchedClassId = resolveSchedClassId(
140       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
141 
142 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
143   const llvm::MCSchedClassDesc *const SCDesc =
144       SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId);
145   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
146 #else
147   OS << SchedClassId;
148 #endif
149   for (const auto &Measurement : Point.Measurements) {
150     OS << kCsvSep;
151     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
152   }
153   OS << "\n";
154 }
155 
156 Analysis::Analysis(const llvm::Target &Target,
157                    const InstructionBenchmarkClustering &Clustering)
158     : Clustering_(Clustering) {
159   if (Clustering.getPoints().empty())
160     return;
161 
162   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
163   InstrInfo_.reset(Target.createMCInstrInfo());
164   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
165   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
166   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
167                                                     FirstPoint.CpuName, ""));
168   InstPrinter_.reset(Target.createMCInstPrinter(
169       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
170       *InstrInfo_, *RegInfo_));
171 
172   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
173                                                 &ObjectFileInfo_);
174   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
175   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
176                     "InitializeXXXTargetDisassembler ?");
177 }
178 
179 template <>
180 llvm::Error
181 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
182   if (Clustering_.getPoints().empty())
183     return llvm::Error::success();
184 
185   // Write the header.
186   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
187      << kCsvSep << "sched_class";
188   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
189     OS << kCsvSep;
190     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
191   }
192   OS << "\n";
193 
194   // Write the points.
195   const auto &Clusters = Clustering_.getValidClusters();
196   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
197     for (const size_t PointId : Clusters[I].PointIndices) {
198       printInstructionRowCsv(PointId, OS);
199     }
200     OS << "\n\n";
201   }
202   return llvm::Error::success();
203 }
204 
205 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
206     ResolvedSchedClass &&RSC)
207     : RSC(std::move(RSC)) {}
208 
209 std::vector<Analysis::ResolvedSchedClassAndPoints>
210 Analysis::makePointsPerSchedClass() const {
211   std::vector<ResolvedSchedClassAndPoints> Entries;
212   // Maps SchedClassIds to index in result.
213   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
214   const auto &Points = Clustering_.getPoints();
215   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
216     const InstructionBenchmark &Point = Points[PointId];
217     if (!Point.Error.empty())
218       continue;
219     assert(!Point.Key.Instructions.empty());
220     // FIXME: we should be using the tuple of classes for instructions in the
221     // snippet as key.
222     const llvm::MCInst &MCI = Point.Key.Instructions[0];
223     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
224     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
225                                                 .getSchedClassDesc(SchedClassId)
226                                                 ->isVariant();
227     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
228     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
229     if (IndexIt == SchedClassIdToIndex.end()) {
230       // Create a new entry.
231       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
232       ResolvedSchedClassAndPoints Entry(
233           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
234       Entry.PointIds.push_back(PointId);
235       Entries.push_back(std::move(Entry));
236     } else {
237       // Append to the existing entry.
238       Entries[IndexIt->second].PointIds.push_back(PointId);
239     }
240   }
241   return Entries;
242 }
243 
244 // Uops repeat the same opcode over again. Just show this opcode and show the
245 // whole snippet only on hover.
246 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
247                                  const std::vector<llvm::MCInst> &Instructions,
248                                  const llvm::MCInstrInfo &InstrInfo) {
249   if (Instructions.empty())
250     return;
251   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
252   if (Instructions.size() > 1)
253     OS << " (x" << Instructions.size() << ")";
254 }
255 
256 // Latency tries to find a serial path. Just show the opcode path and show the
257 // whole snippet only on hover.
258 static void
259 writeLatencySnippetHtml(llvm::raw_ostream &OS,
260                         const std::vector<llvm::MCInst> &Instructions,
261                         const llvm::MCInstrInfo &InstrInfo) {
262   bool First = true;
263   for (const llvm::MCInst &Instr : Instructions) {
264     if (First)
265       First = false;
266     else
267       OS << " &rarr; ";
268     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
269   }
270 }
271 
272 void Analysis::printSchedClassClustersHtml(
273     const std::vector<SchedClassCluster> &Clusters,
274     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
275   const auto &Points = Clustering_.getPoints();
276   OS << "<table class=\"sched-class-clusters\">";
277   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
278   assert(!Clusters.empty());
279   for (const auto &Measurement :
280        Points[Clusters[0].getPointIds()[0]].Measurements) {
281     OS << "<th>";
282     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
283     OS << "</th>";
284   }
285   OS << "</tr>";
286   for (const SchedClassCluster &Cluster : Clusters) {
287     OS << "<tr class=\""
288        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
289                ? "good-cluster"
290                : "bad-cluster")
291        << "\"><td>";
292     writeClusterId<kEscapeHtml>(OS, Cluster.id());
293     OS << "</td><td><ul>";
294     for (const size_t PointId : Cluster.getPointIds()) {
295       const auto &Point = Points[PointId];
296       OS << "<li><span class=\"mono\" title=\"";
297       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
298                                                  "\n");
299       OS << "\">";
300       switch (Point.Mode) {
301       case InstructionBenchmark::Latency:
302         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
303         break;
304       case InstructionBenchmark::Uops:
305         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
306         break;
307       default:
308         llvm_unreachable("invalid mode");
309       }
310       OS << "</span> <span class=\"mono\">";
311       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
312       OS << "</span></li>";
313     }
314     OS << "</ul></td>";
315     for (const auto &Stats : Cluster.getRepresentative()) {
316       OS << "<td class=\"measurement\">";
317       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
318       OS << "<br><span class=\"minmax\">[";
319       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
320       OS << ";";
321       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
322       OS << "]</span></td>";
323     }
324     OS << "</tr>";
325   }
326   OS << "</table>";
327 }
328 
329 // Return the non-redundant list of WriteProcRes used by the given sched class.
330 // The scheduling model for LLVM is such that each instruction has a certain
331 // number of uops which consume resources which are described by WriteProcRes
332 // entries. Each entry describe how many cycles are spent on a specific ProcRes
333 // kind.
334 // For example, an instruction might have 3 uOps, one dispatching on P0
335 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
336 // Note that LLVM additionally denormalizes resource consumption to include
337 // usage of super resources by subresources. So in practice if there exists a
338 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
339 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
340 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
341 // denote implied usage of superresources by subresources:
342 //            P0      P06    P016
343 //     uOp1    1      (1)     (1)
344 //     uOp2            1      (1)
345 //     uOp3            1      (1)
346 //     =============================
347 //             1       3       3
348 // Eventually we end up with three entries for the WriteProcRes of the
349 // instruction:
350 //    {ProcResIdx=1,  Cycles=1}  // P0
351 //    {ProcResIdx=7,  Cycles=3}  // P06
352 //    {ProcResIdx=10, Cycles=3}  // P016
353 //
354 // Note that in this case, P016 does not contribute any cycles, so it would
355 // be removed by this function.
356 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
357 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
358 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
359                             const llvm::MCSubtargetInfo &STI) {
360   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
361   const auto &SM = STI.getSchedModel();
362   const unsigned NumProcRes = SM.getNumProcResourceKinds();
363 
364   // This assumes that the ProcResDescs are sorted in topological order, which
365   // is guaranteed by the tablegen backend.
366   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
367   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
368                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
369        WPR != WPREnd; ++WPR) {
370     const llvm::MCProcResourceDesc *const ProcResDesc =
371         SM.getProcResource(WPR->ProcResourceIdx);
372     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
373       // This is a ProcResUnit.
374       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
375       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
376     } else {
377       // This is a ProcResGroup. First see if it contributes any cycles or if
378       // it has cycles just from subunits.
379       float RemainingCycles = WPR->Cycles;
380       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
381            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
382            ++SubResIdx) {
383         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
384       }
385       if (RemainingCycles < 0.01f) {
386         // The ProcResGroup contributes no cycles of its own.
387         continue;
388       }
389       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
390       Result.push_back({WPR->ProcResourceIdx,
391                         static_cast<uint16_t>(std::round(RemainingCycles))});
392       // Spread the remaining cycles over all subunits.
393       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
394            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
395            ++SubResIdx) {
396         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
397       }
398     }
399   }
400   return Result;
401 }
402 
403 Analysis::ResolvedSchedClass::ResolvedSchedClass(
404     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
405     bool WasVariant)
406     : SchedClassId(ResolvedSchedClassId), SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
407       WasVariant(WasVariant),
408       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
409       IdealizedProcResPressure(computeIdealizedProcResPressure(
410           STI.getSchedModel(), NonRedundantWriteProcRes)) {
411   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
412          "ResolvedSchedClass should never be variant");
413 }
414 
415 void Analysis::SchedClassCluster::addPoint(
416     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
417   PointIds.push_back(PointId);
418   const auto &Point = Clustering.getPoints()[PointId];
419   if (ClusterId.isUndef()) {
420     ClusterId = Clustering.getClusterIdForPoint(PointId);
421     Representative.resize(Point.Measurements.size());
422   }
423   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
424     Representative[I].push(Point.Measurements[I]);
425   }
426   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
427 }
428 
429 // Returns a ProxResIdx by id or name.
430 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
431                                const llvm::StringRef NameOrId) {
432   // Interpret the key as an ProcResIdx.
433   unsigned ProcResIdx = 0;
434   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
435     return ProcResIdx;
436   // Interpret the key as a ProcRes name.
437   const auto &SchedModel = STI.getSchedModel();
438   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
439     if (NameOrId == SchedModel.getProcResource(I)->Name)
440       return I;
441   }
442   return 0;
443 }
444 
445 bool Analysis::SchedClassCluster::measurementsMatch(
446     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
447     const InstructionBenchmarkClustering &Clustering) const {
448   const size_t NumMeasurements = Representative.size();
449   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
450   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
451   // Latency case.
452   assert(!Clustering.getPoints().empty());
453   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
454   if (Mode == InstructionBenchmark::Latency) {
455     if (NumMeasurements != 1) {
456       llvm::errs()
457           << "invalid number of measurements in latency mode: expected 1, got "
458           << NumMeasurements << "\n";
459       return false;
460     }
461     // Find the latency.
462     SchedClassPoint[0].PerInstructionValue = 0.0;
463     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
464       const llvm::MCWriteLatencyEntry *const WLE =
465           STI.getWriteLatencyEntry(RSC.SCDesc, I);
466       SchedClassPoint[0].PerInstructionValue =
467           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
468     }
469     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
470   } else if (Mode == InstructionBenchmark::Uops) {
471     for (int I = 0, E = Representative.size(); I < E; ++I) {
472       const auto Key = Representative[I].key();
473       uint16_t ProcResIdx = findProcResIdx(STI, Key);
474       if (ProcResIdx > 0) {
475         // Find the pressure on ProcResIdx `Key`.
476         const auto ProcResPressureIt =
477             std::find_if(RSC.IdealizedProcResPressure.begin(),
478                          RSC.IdealizedProcResPressure.end(),
479                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
480                            return WPR.first == ProcResIdx;
481                          });
482         SchedClassPoint[I].PerInstructionValue =
483             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
484                 ? 0.0
485                 : ProcResPressureIt->second;
486       } else if (Key == "NumMicroOps") {
487         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
488       } else {
489         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
490                         "name, got "
491                      << Key << "\n";
492         return false;
493       }
494       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
495     }
496   } else {
497     llvm::errs() << "unimplemented measurement matching for mode " << Mode
498                  << "\n";
499     return false;
500   }
501   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
502 }
503 
504 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
505                                        llvm::raw_ostream &OS) const {
506   OS << "<table class=\"sched-class-desc\">";
507   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
508         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
509         "resource (port) pressure assuming ideal distribution\">Idealized "
510         "Resource Pressure</th></tr>";
511   if (RSC.SCDesc->isValid()) {
512     const auto &SM = SubtargetInfo_->getSchedModel();
513     OS << "<tr><td>&#10004;</td>";
514     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
515     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
516     // Latencies.
517     OS << "<td><ul>";
518     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
519       const auto *const Entry =
520           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
521       OS << "<li>" << Entry->Cycles;
522       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
523         // Dismabiguate if more than 1 latency.
524         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
525       }
526       OS << "</li>";
527     }
528     OS << "</ul></td>";
529     // WriteProcRes.
530     OS << "<td><ul>";
531     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
532       OS << "<li><span class=\"mono\">";
533       writeEscaped<kEscapeHtml>(OS,
534                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
535       OS << "</span>: " << WPR.Cycles << "</li>";
536     }
537     OS << "</ul></td>";
538     // Idealized port pressure.
539     OS << "<td><ul>";
540     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
541       OS << "<li><span class=\"mono\">";
542       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
543                                         .getProcResource(Pressure.first)
544                                         ->Name);
545       OS << "</span>: ";
546       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
547       OS << "</li>";
548     }
549     OS << "</ul></td>";
550     OS << "</tr>";
551   } else {
552     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
553   }
554   OS << "</table>";
555 }
556 
557 static constexpr const char kHtmlHead[] = R"(
558 <head>
559 <title>llvm-exegesis Analysis Results</title>
560 <style>
561 body {
562   font-family: sans-serif
563 }
564 span.sched-class-name {
565   font-weight: bold;
566   font-family: monospace;
567 }
568 span.opcode {
569   font-family: monospace;
570 }
571 span.config {
572   font-family: monospace;
573 }
574 div.inconsistency {
575   margin-top: 50px;
576 }
577 table {
578   margin-left: 50px;
579   border-collapse: collapse;
580 }
581 table, table tr,td,th {
582   border: 1px solid #444;
583 }
584 table ul {
585   padding-left: 0px;
586   margin: 0px;
587   list-style-type: none;
588 }
589 table.sched-class-clusters td {
590   padding-left: 10px;
591   padding-right: 10px;
592   padding-top: 10px;
593   padding-bottom: 10px;
594 }
595 table.sched-class-desc td {
596   padding-left: 10px;
597   padding-right: 10px;
598   padding-top: 2px;
599   padding-bottom: 2px;
600 }
601 span.mono {
602   font-family: monospace;
603 }
604 td.measurement {
605   text-align: center;
606 }
607 tr.good-cluster td.measurement {
608   color: #292
609 }
610 tr.bad-cluster td.measurement {
611   color: #922
612 }
613 tr.good-cluster td.measurement span.minmax {
614   color: #888;
615 }
616 tr.bad-cluster td.measurement span.minmax {
617   color: #888;
618 }
619 </style>
620 </head>
621 )";
622 
623 template <>
624 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
625     llvm::raw_ostream &OS) const {
626   const auto &FirstPoint = Clustering_.getPoints()[0];
627   // Print the header.
628   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
629   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
630   OS << "<h3>Triple: <span class=\"mono\">";
631   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
632   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
633   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
634   OS << "</span></h3>";
635 
636   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
637     if (!RSCAndPoints.RSC.SCDesc)
638       continue;
639     // Bucket sched class points into sched class clusters.
640     std::vector<SchedClassCluster> SchedClassClusters;
641     for (const size_t PointId : RSCAndPoints.PointIds) {
642       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
643       if (!ClusterId.isValid())
644         continue; // Ignore noise and errors. FIXME: take noise into account ?
645       auto SchedClassClusterIt =
646           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
647                        [ClusterId](const SchedClassCluster &C) {
648                          return C.id() == ClusterId;
649                        });
650       if (SchedClassClusterIt == SchedClassClusters.end()) {
651         SchedClassClusters.emplace_back();
652         SchedClassClusterIt = std::prev(SchedClassClusters.end());
653       }
654       SchedClassClusterIt->addPoint(PointId, Clustering_);
655     }
656 
657     // Print any scheduling class that has at least one cluster that does not
658     // match the checked-in data.
659     if (llvm::all_of(SchedClassClusters,
660                      [this, &RSCAndPoints](const SchedClassCluster &C) {
661                        return C.measurementsMatch(
662                            *SubtargetInfo_, RSCAndPoints.RSC, Clustering_);
663                      }))
664       continue; // Nothing weird.
665 
666     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
667           "class=\"sched-class-name\">";
668 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
670 #else
671     OS << RSCAndPoints.RSC.SchedClassId;
672 #endif
673     OS << "</span> contains instructions whose performance characteristics do"
674           " not match that of LLVM:</p>";
675     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
676     OS << "<p>llvm SchedModel data:</p>";
677     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
678     OS << "</div>";
679   }
680 
681   OS << "</body></html>";
682   return llvm::Error::success();
683 }
684 
685 // Distributes a pressure budget as evenly as possible on the provided subunits
686 // given the already existing port pressure distribution.
687 //
688 // The algorithm is as follows: while there is remaining pressure to
689 // distribute, find the subunits with minimal pressure, and distribute
690 // remaining pressure equally up to the pressure of the unit with
691 // second-to-minimal pressure.
692 // For example, let's assume we want to distribute 2*P1256
693 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
694 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
695 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
696 //     RemainingPressure = 2.0
697 // We sort the subunits by pressure:
698 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
699 // We'll first start by the subunits with minimal pressure, which are at
700 // the beginning of the sorted array. In this example there is one (P2).
701 // The subunit with second-to-minimal pressure is the next one in the
702 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
703 // from the budget.
704 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
705 //     RemainingPressure = 1.9
706 // We repeat this process: distribute 0.2 pressure on each of the minimal
707 // P2 and P1, decrease budget by 2*0.2:
708 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
709 //     RemainingPressure = 1.5
710 // There are no second-to-minimal subunits so we just share the remaining
711 // budget (1.5 cycles) equally:
712 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
713 //     RemainingPressure = 0.0
714 // We stop as there is no remaining budget to distribute.
715 void distributePressure(float RemainingPressure,
716                         llvm::SmallVector<uint16_t, 32> Subunits,
717                         llvm::SmallVector<float, 32> &DensePressure) {
718   // Find the number of subunits with minimal pressure (they are at the
719   // front).
720   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
721     return DensePressure[A] < DensePressure[B];
722   });
723   const auto getPressureForSubunit = [&DensePressure,
724                                       &Subunits](size_t I) -> float & {
725     return DensePressure[Subunits[I]];
726   };
727   size_t NumMinimalSU = 1;
728   while (NumMinimalSU < Subunits.size() &&
729          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
730     ++NumMinimalSU;
731   }
732   while (RemainingPressure > 0.0f) {
733     if (NumMinimalSU == Subunits.size()) {
734       // All units are minimal, just distribute evenly and be done.
735       for (size_t I = 0; I < NumMinimalSU; ++I) {
736         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
737       }
738       return;
739     }
740     // Distribute the remaining pressure equally.
741     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
742     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
743     assert(MinimalPressure < SecondToMinimalPressure);
744     const float Increment = SecondToMinimalPressure - MinimalPressure;
745     if (RemainingPressure <= NumMinimalSU * Increment) {
746       // There is not enough remaining pressure.
747       for (size_t I = 0; I < NumMinimalSU; ++I) {
748         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
749       }
750       return;
751     }
752     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
753     for (size_t I = 0; I < NumMinimalSU; ++I) {
754       getPressureForSubunit(I) = SecondToMinimalPressure;
755       RemainingPressure -= SecondToMinimalPressure;
756     }
757     while (NumMinimalSU < Subunits.size() &&
758            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
759       ++NumMinimalSU;
760     }
761   }
762 }
763 
764 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
765     const llvm::MCSchedModel &SM,
766     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
767   // DensePressure[I] is the port pressure for Proc Resource I.
768   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
769   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
770                       const llvm::MCWriteProcResEntry &B) {
771     return A.ProcResourceIdx < B.ProcResourceIdx;
772   });
773   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
774     // Get units for the entry.
775     const llvm::MCProcResourceDesc *const ProcResDesc =
776         SM.getProcResource(WPR.ProcResourceIdx);
777     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
778       // This is a ProcResUnit.
779       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
780     } else {
781       // This is a ProcResGroup.
782       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
783                                                ProcResDesc->SubUnitsIdxBegin +
784                                                    ProcResDesc->NumUnits);
785       distributePressure(WPR.Cycles, Subunits, DensePressure);
786     }
787   }
788   // Turn dense pressure into sparse pressure by removing zero entries.
789   std::vector<std::pair<uint16_t, float>> Pressure;
790   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
791     if (DensePressure[I] > 0.0f)
792       Pressure.emplace_back(I, DensePressure[I]);
793   }
794   return Pressure;
795 }
796 
797 } // namespace exegesis
798 } // namespace llvm
799