xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 30183093abf2b7d563c1c233a89aa3e115e4109c)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace exegesis {
19 
20 static const char kCsvSep = ',';
21 
22 namespace {
23 
24 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
25 
26 template <EscapeTag Tag>
27 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
28 
29 template <>
30 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
31   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
32     OS << S;
33   } else {
34     // Needs escaping.
35     OS << '"';
36     for (const char C : S) {
37       if (C == '"')
38         OS << "\"\"";
39       else
40         OS << C;
41     }
42     OS << '"';
43   }
44 }
45 
46 template <>
47 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
48   for (const char C : S) {
49     if (C == '<')
50       OS << "&lt;";
51     else if (C == '>')
52       OS << "&gt;";
53     else if (C == '&')
54       OS << "&amp;";
55     else
56       OS << C;
57   }
58 }
59 
60 template <>
61 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
62                                      const llvm::StringRef S) {
63   for (const char C : S) {
64     if (C == '"')
65       OS << "\\\"";
66     else
67       OS << C;
68   }
69 }
70 
71 } // namespace
72 
73 template <EscapeTag Tag>
74 static void
75 writeClusterId(llvm::raw_ostream &OS,
76                const InstructionBenchmarkClustering::ClusterId &CID) {
77   if (CID.isNoise())
78     writeEscaped<Tag>(OS, "[noise]");
79   else if (CID.isError())
80     writeEscaped<Tag>(OS, "[error]");
81   else
82     OS << CID.getId();
83 }
84 
85 template <EscapeTag Tag>
86 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
87   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
88 }
89 
90 template <typename EscapeTag, EscapeTag Tag>
91 void Analysis::writeSnippet(llvm::raw_ostream &OS,
92                             llvm::ArrayRef<uint8_t> Bytes,
93                             const char *Separator) const {
94   llvm::SmallVector<std::string, 3> Lines;
95   // Parse the asm snippet and print it.
96   while (!Bytes.empty()) {
97     llvm::MCInst MI;
98     uint64_t MISize = 0;
99     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
100                                  llvm::nulls())) {
101       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
102       writeEscaped<Tag>(OS, Separator);
103       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
104       return;
105     }
106     Lines.emplace_back();
107     std::string &Line = Lines.back();
108     llvm::raw_string_ostream OSS(Line);
109     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
110     Bytes = Bytes.drop_front(MISize);
111     OSS.flush();
112     Line = llvm::StringRef(Line).trim().str();
113   }
114   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
115 }
116 
117 // Prints a row representing an instruction, along with scheduling info and
118 // point coordinates (measurements).
119 void Analysis::printInstructionRowCsv(const size_t PointId,
120                                       llvm::raw_ostream &OS) const {
121   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
122   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
123   OS << kCsvSep;
124   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
125   OS << kCsvSep;
126   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
127   OS << kCsvSep;
128   assert(!Point.Key.Instructions.empty());
129   // FIXME: Resolve variant classes.
130   const unsigned SchedClassId =
131       InstrInfo_->get(Point.Key.Instructions[0].getOpcode()).getSchedClass();
132 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
133   const auto &SchedModel = SubtargetInfo_->getSchedModel();
134   const llvm::MCSchedClassDesc *const SCDesc =
135       SchedModel.getSchedClassDesc(SchedClassId);
136   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
137 #else
138   OS << SchedClassId;
139 #endif
140   for (const auto &Measurement : Point.Measurements) {
141     OS << kCsvSep;
142     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
143   }
144   OS << "\n";
145 }
146 
147 Analysis::Analysis(const llvm::Target &Target,
148                    const InstructionBenchmarkClustering &Clustering)
149     : Clustering_(Clustering) {
150   if (Clustering.getPoints().empty())
151     return;
152 
153   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
154   InstrInfo_.reset(Target.createMCInstrInfo());
155   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
156   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
157   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
158                                                     FirstPoint.CpuName, ""));
159   InstPrinter_.reset(Target.createMCInstPrinter(
160       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
161       *InstrInfo_, *RegInfo_));
162 
163   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
164                                                 &ObjectFileInfo_);
165   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
166   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
167                     "InitializeXXXTargetDisassembler ?");
168 }
169 
170 template <>
171 llvm::Error
172 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
173   if (Clustering_.getPoints().empty())
174     return llvm::Error::success();
175 
176   // Write the header.
177   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
178      << kCsvSep << "sched_class";
179   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
180     OS << kCsvSep;
181     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
182   }
183   OS << "\n";
184 
185   // Write the points.
186   const auto &Clusters = Clustering_.getValidClusters();
187   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
188     for (const size_t PointId : Clusters[I].PointIndices) {
189       printInstructionRowCsv(PointId, OS);
190     }
191     OS << "\n\n";
192   }
193   return llvm::Error::success();
194 }
195 
196 std::unordered_map<unsigned, std::vector<size_t>>
197 Analysis::makePointsPerSchedClass() const {
198   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
199   const auto &Points = Clustering_.getPoints();
200   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
201     const InstructionBenchmark &Point = Points[PointId];
202     if (!Point.Error.empty())
203       continue;
204     assert(!Point.Key.Instructions.empty());
205     const auto Opcode = Point.Key.Instructions[0].getOpcode();
206     // FIXME: Resolve variant classes.
207     PointsPerSchedClass[InstrInfo_->get(Opcode).getSchedClass()].push_back(
208         PointId);
209   }
210   return PointsPerSchedClass;
211 }
212 
213 // Uops repeat the same opcode over again. Just show this opcode and show the
214 // whole snippet only on hover.
215 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
216                                  const std::vector<llvm::MCInst> &Instructions,
217                                  const llvm::MCInstrInfo &InstrInfo) {
218   if (Instructions.empty())
219     return;
220   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
221   if (Instructions.size() > 1)
222     OS << " (x" << Instructions.size() << ")";
223 }
224 
225 // Latency tries to find a serial path. Just show the opcode path and show the
226 // whole snippet only on hover.
227 static void
228 writeLatencySnippetHtml(llvm::raw_ostream &OS,
229                         const std::vector<llvm::MCInst> &Instructions,
230                         const llvm::MCInstrInfo &InstrInfo) {
231   bool First = true;
232   for (const llvm::MCInst &Instr : Instructions) {
233     if (First)
234       First = false;
235     else
236       OS << " &rarr; ";
237     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
238   }
239 }
240 
241 void Analysis::printSchedClassClustersHtml(
242     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
243     llvm::raw_ostream &OS) const {
244   const auto &Points = Clustering_.getPoints();
245   OS << "<table class=\"sched-class-clusters\">";
246   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
247   assert(!Clusters.empty());
248   for (const auto &Measurement :
249        Points[Clusters[0].getPointIds()[0]].Measurements) {
250     OS << "<th>";
251     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
252     OS << "</th>";
253   }
254   OS << "</tr>";
255   for (const SchedClassCluster &Cluster : Clusters) {
256     OS << "<tr class=\""
257        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
258                ? "good-cluster"
259                : "bad-cluster")
260        << "\"><td>";
261     writeClusterId<kEscapeHtml>(OS, Cluster.id());
262     OS << "</td><td><ul>";
263     for (const size_t PointId : Cluster.getPointIds()) {
264       const auto &Point = Points[PointId];
265       OS << "<li><span class=\"mono\" title=\"";
266       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
267                                                  "\n");
268       OS << "\">";
269       switch (Point.Mode) {
270       case InstructionBenchmark::Latency:
271         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
272         break;
273       case InstructionBenchmark::Uops:
274         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
275         break;
276       default:
277         llvm_unreachable("invalid mode");
278       }
279       OS << "</span> <span class=\"mono\">";
280       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
281       OS << "</span></li>";
282     }
283     OS << "</ul></td>";
284     for (const auto &Stats : Cluster.getRepresentative()) {
285       OS << "<td class=\"measurement\">";
286       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
287       OS << "<br><span class=\"minmax\">[";
288       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
289       OS << ";";
290       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
291       OS << "]</span></td>";
292     }
293     OS << "</tr>";
294   }
295   OS << "</table>";
296 }
297 
298 // Return the non-redundant list of WriteProcRes used by the given sched class.
299 // The scheduling model for LLVM is such that each instruction has a certain
300 // number of uops which consume resources which are described by WriteProcRes
301 // entries. Each entry describe how many cycles are spent on a specific ProcRes
302 // kind.
303 // For example, an instruction might have 3 uOps, one dispatching on P0
304 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
305 // Note that LLVM additionally denormalizes resource consumption to include
306 // usage of super resources by subresources. So in practice if there exists a
307 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
308 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
309 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
310 // denote implied usage of superresources by subresources:
311 //            P0      P06    P016
312 //     uOp1    1      (1)     (1)
313 //     uOp2            1      (1)
314 //     uOp3            1      (1)
315 //     =============================
316 //             1       3       3
317 // Eventually we end up with three entries for the WriteProcRes of the
318 // instruction:
319 //    {ProcResIdx=1,  Cycles=1}  // P0
320 //    {ProcResIdx=7,  Cycles=3}  // P06
321 //    {ProcResIdx=10, Cycles=3}  // P016
322 //
323 // Note that in this case, P016 does not contribute any cycles, so it would
324 // be removed by this function.
325 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
326 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
327 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
328                             const llvm::MCSubtargetInfo &STI) {
329   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
330   const auto &SM = STI.getSchedModel();
331   const unsigned NumProcRes = SM.getNumProcResourceKinds();
332 
333   // This assumes that the ProcResDescs are sorted in topological order, which
334   // is guaranteed by the tablegen backend.
335   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
336   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
337                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
338        WPR != WPREnd; ++WPR) {
339     const llvm::MCProcResourceDesc *const ProcResDesc =
340         SM.getProcResource(WPR->ProcResourceIdx);
341     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
342       // This is a ProcResUnit.
343       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
344       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
345     } else {
346       // This is a ProcResGroup. First see if it contributes any cycles or if
347       // it has cycles just from subunits.
348       float RemainingCycles = WPR->Cycles;
349       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
350            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
351            ++SubResIdx) {
352         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
353       }
354       if (RemainingCycles < 0.01f) {
355         // The ProcResGroup contributes no cycles of its own.
356         continue;
357       }
358       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
359       Result.push_back({WPR->ProcResourceIdx,
360                         static_cast<uint16_t>(std::round(RemainingCycles))});
361       // Spread the remaining cycles over all subunits.
362       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
363            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
364            ++SubResIdx) {
365         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
366       }
367     }
368   }
369   return Result;
370 }
371 
372 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
373                                  const llvm::MCSubtargetInfo &STI)
374     : SCDesc(&SD),
375       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
376       IdealizedProcResPressure(computeIdealizedProcResPressure(
377           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
378 
379 void Analysis::SchedClassCluster::addPoint(
380     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
381   PointIds.push_back(PointId);
382   const auto &Point = Clustering.getPoints()[PointId];
383   if (ClusterId.isUndef()) {
384     ClusterId = Clustering.getClusterIdForPoint(PointId);
385     Representative.resize(Point.Measurements.size());
386   }
387   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
388     Representative[I].push(Point.Measurements[I]);
389   }
390   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
391 }
392 
393 // Returns a ProxResIdx by id or name.
394 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
395                                const llvm::StringRef NameOrId) {
396   // Interpret the key as an ProcResIdx.
397   unsigned ProcResIdx = 0;
398   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
399     return ProcResIdx;
400   // Interpret the key as a ProcRes name.
401   const auto &SchedModel = STI.getSchedModel();
402   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
403     if (NameOrId == SchedModel.getProcResource(I)->Name)
404       return I;
405   }
406   return 0;
407 }
408 
409 bool Analysis::SchedClassCluster::measurementsMatch(
410     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
411     const InstructionBenchmarkClustering &Clustering) const {
412   const size_t NumMeasurements = Representative.size();
413   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
414   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
415   // Latency case.
416   assert(!Clustering.getPoints().empty());
417   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
418   if (Mode == InstructionBenchmark::Latency) {
419     if (NumMeasurements != 1) {
420       llvm::errs()
421           << "invalid number of measurements in latency mode: expected 1, got "
422           << NumMeasurements << "\n";
423       return false;
424     }
425     // Find the latency.
426     SchedClassPoint[0].PerInstructionValue = 0.0;
427     for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) {
428       const llvm::MCWriteLatencyEntry *const WLE =
429           STI.getWriteLatencyEntry(SC.SCDesc, I);
430       SchedClassPoint[0].PerInstructionValue =
431           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
432     }
433     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
434   } else if (Mode == InstructionBenchmark::Uops) {
435     for (int I = 0, E = Representative.size(); I < E; ++I) {
436       const auto Key = Representative[I].key();
437       uint16_t ProcResIdx = findProcResIdx(STI, Key);
438       if (ProcResIdx > 0) {
439         // Find the pressure on ProcResIdx `Key`.
440         const auto ProcResPressureIt =
441             std::find_if(SC.IdealizedProcResPressure.begin(),
442                          SC.IdealizedProcResPressure.end(),
443                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
444                            return WPR.first == ProcResIdx;
445                          });
446         SchedClassPoint[I].PerInstructionValue =
447             ProcResPressureIt == SC.IdealizedProcResPressure.end()
448                 ? 0.0
449                 : ProcResPressureIt->second;
450       } else if (Key == "NumMicroOps") {
451         SchedClassPoint[I].PerInstructionValue = SC.SCDesc->NumMicroOps;
452       } else {
453         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
454                         "name, got "
455                      << Key << "\n";
456         return false;
457       }
458       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
459     }
460   } else {
461     llvm::errs() << "unimplemented measurement matching for mode " << Mode
462                  << "\n";
463     return false;
464   }
465   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
466 }
467 
468 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
469                                        llvm::raw_ostream &OS) const {
470   OS << "<table class=\"sched-class-desc\">";
471   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
472         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
473         "resource (port) pressure assuming ideal distribution\">Idealized "
474         "Resource Pressure</th></tr>";
475   if (SC.SCDesc->isValid()) {
476     const auto &SM = SubtargetInfo_->getSchedModel();
477     OS << "<tr><td>&#10004;</td>";
478     OS << "<td>" << (SC.SCDesc->isVariant() ? "&#10004;" : "&#10005;")
479        << "</td>";
480     OS << "<td>" << SC.SCDesc->NumMicroOps << "</td>";
481     // Latencies.
482     OS << "<td><ul>";
483     for (int I = 0, E = SC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
484       const auto *const Entry =
485           SubtargetInfo_->getWriteLatencyEntry(SC.SCDesc, I);
486       OS << "<li>" << Entry->Cycles;
487       if (SC.SCDesc->NumWriteLatencyEntries > 1) {
488         // Dismabiguate if more than 1 latency.
489         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
490       }
491       OS << "</li>";
492     }
493     OS << "</ul></td>";
494     // WriteProcRes.
495     OS << "<td><ul>";
496     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
497       OS << "<li><span class=\"mono\">";
498       writeEscaped<kEscapeHtml>(OS,
499                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
500       OS << "</span>: " << WPR.Cycles << "</li>";
501     }
502     OS << "</ul></td>";
503     // Idealized port pressure.
504     OS << "<td><ul>";
505     for (const auto &Pressure : SC.IdealizedProcResPressure) {
506       OS << "<li><span class=\"mono\">";
507       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
508                                         .getProcResource(Pressure.first)
509                                         ->Name);
510       OS << "</span>: ";
511       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
512       OS << "</li>";
513     }
514     OS << "</ul></td>";
515     OS << "</tr>";
516   } else {
517     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
518   }
519   OS << "</table>";
520 }
521 
522 static constexpr const char kHtmlHead[] = R"(
523 <head>
524 <title>llvm-exegesis Analysis Results</title>
525 <style>
526 body {
527   font-family: sans-serif
528 }
529 span.sched-class-name {
530   font-weight: bold;
531   font-family: monospace;
532 }
533 span.opcode {
534   font-family: monospace;
535 }
536 span.config {
537   font-family: monospace;
538 }
539 div.inconsistency {
540   margin-top: 50px;
541 }
542 table {
543   margin-left: 50px;
544   border-collapse: collapse;
545 }
546 table, table tr,td,th {
547   border: 1px solid #444;
548 }
549 table ul {
550   padding-left: 0px;
551   margin: 0px;
552   list-style-type: none;
553 }
554 table.sched-class-clusters td {
555   padding-left: 10px;
556   padding-right: 10px;
557   padding-top: 10px;
558   padding-bottom: 10px;
559 }
560 table.sched-class-desc td {
561   padding-left: 10px;
562   padding-right: 10px;
563   padding-top: 2px;
564   padding-bottom: 2px;
565 }
566 span.mono {
567   font-family: monospace;
568 }
569 td.measurement {
570   text-align: center;
571 }
572 tr.good-cluster td.measurement {
573   color: #292
574 }
575 tr.bad-cluster td.measurement {
576   color: #922
577 }
578 tr.good-cluster td.measurement span.minmax {
579   color: #888;
580 }
581 tr.bad-cluster td.measurement span.minmax {
582   color: #888;
583 }
584 </style>
585 </head>
586 )";
587 
588 template <>
589 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
590     llvm::raw_ostream &OS) const {
591   const auto &FirstPoint = Clustering_.getPoints()[0];
592   // Print the header.
593   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
594   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
595   OS << "<h3>Triple: <span class=\"mono\">";
596   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
597   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
598   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
599   OS << "</span></h3>";
600 
601   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
602     const auto SchedClassId = SchedClassAndPoints.first;
603     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
604     const auto &SchedModel = SubtargetInfo_->getSchedModel();
605     const llvm::MCSchedClassDesc *const SCDesc =
606         SchedModel.getSchedClassDesc(SchedClassId);
607     if (!SCDesc)
608       continue;
609     const SchedClass SC(*SCDesc, *SubtargetInfo_);
610 
611     // Bucket sched class points into sched class clusters.
612     std::vector<SchedClassCluster> SchedClassClusters;
613     for (const size_t PointId : SchedClassPoints) {
614       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
615       if (!ClusterId.isValid())
616         continue; // Ignore noise and errors. FIXME: take noise into account ?
617       auto SchedClassClusterIt =
618           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
619                        [ClusterId](const SchedClassCluster &C) {
620                          return C.id() == ClusterId;
621                        });
622       if (SchedClassClusterIt == SchedClassClusters.end()) {
623         SchedClassClusters.emplace_back();
624         SchedClassClusterIt = std::prev(SchedClassClusters.end());
625       }
626       SchedClassClusterIt->addPoint(PointId, Clustering_);
627     }
628 
629     // Print any scheduling class that has at least one cluster that does not
630     // match the checked-in data.
631     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
632                     [this, &SC](const SchedClassCluster &C) {
633                       return C.measurementsMatch(*SubtargetInfo_, SC,
634                                                  Clustering_);
635                     }))
636       continue; // Nothing weird.
637 
638     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
639           "class=\"sched-class-name\">";
640 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
641     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
642 #else
643     OS << SchedClassId;
644 #endif
645     OS << "</span> contains instructions whose performance characteristics do"
646           " not match that of LLVM:</p>";
647     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
648     OS << "<p>llvm SchedModel data:</p>";
649     printSchedClassDescHtml(SC, OS);
650     OS << "</div>";
651   }
652 
653   OS << "</body></html>";
654   return llvm::Error::success();
655 }
656 
657 // Distributes a pressure budget as evenly as possible on the provided subunits
658 // given the already existing port pressure distribution.
659 //
660 // The algorithm is as follows: while there is remaining pressure to
661 // distribute, find the subunits with minimal pressure, and distribute
662 // remaining pressure equally up to the pressure of the unit with
663 // second-to-minimal pressure.
664 // For example, let's assume we want to distribute 2*P1256
665 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
666 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
667 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
668 //     RemainingPressure = 2.0
669 // We sort the subunits by pressure:
670 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
671 // We'll first start by the subunits with minimal pressure, which are at
672 // the beginning of the sorted array. In this example there is one (P2).
673 // The subunit with second-to-minimal pressure is the next one in the
674 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
675 // from the budget.
676 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
677 //     RemainingPressure = 1.9
678 // We repeat this process: distribute 0.2 pressure on each of the minimal
679 // P2 and P1, decrease budget by 2*0.2:
680 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
681 //     RemainingPressure = 1.5
682 // There are no second-to-minimal subunits so we just share the remaining
683 // budget (1.5 cycles) equally:
684 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
685 //     RemainingPressure = 0.0
686 // We stop as there is no remaining budget to distribute.
687 void distributePressure(float RemainingPressure,
688                         llvm::SmallVector<uint16_t, 32> Subunits,
689                         llvm::SmallVector<float, 32> &DensePressure) {
690   // Find the number of subunits with minimal pressure (they are at the
691   // front).
692   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
693     return DensePressure[A] < DensePressure[B];
694   });
695   const auto getPressureForSubunit = [&DensePressure,
696                                       &Subunits](size_t I) -> float & {
697     return DensePressure[Subunits[I]];
698   };
699   size_t NumMinimalSU = 1;
700   while (NumMinimalSU < Subunits.size() &&
701          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
702     ++NumMinimalSU;
703   }
704   while (RemainingPressure > 0.0f) {
705     if (NumMinimalSU == Subunits.size()) {
706       // All units are minimal, just distribute evenly and be done.
707       for (size_t I = 0; I < NumMinimalSU; ++I) {
708         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
709       }
710       return;
711     }
712     // Distribute the remaining pressure equally.
713     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
714     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
715     assert(MinimalPressure < SecondToMinimalPressure);
716     const float Increment = SecondToMinimalPressure - MinimalPressure;
717     if (RemainingPressure <= NumMinimalSU * Increment) {
718       // There is not enough remaining pressure.
719       for (size_t I = 0; I < NumMinimalSU; ++I) {
720         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
721       }
722       return;
723     }
724     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
725     for (size_t I = 0; I < NumMinimalSU; ++I) {
726       getPressureForSubunit(I) = SecondToMinimalPressure;
727       RemainingPressure -= SecondToMinimalPressure;
728     }
729     while (NumMinimalSU < Subunits.size() &&
730            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
731       ++NumMinimalSU;
732     }
733   }
734 }
735 
736 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
737     const llvm::MCSchedModel &SM,
738     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
739   // DensePressure[I] is the port pressure for Proc Resource I.
740   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
741   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
742                       const llvm::MCWriteProcResEntry &B) {
743     return A.ProcResourceIdx < B.ProcResourceIdx;
744   });
745   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
746     // Get units for the entry.
747     const llvm::MCProcResourceDesc *const ProcResDesc =
748         SM.getProcResource(WPR.ProcResourceIdx);
749     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
750       // This is a ProcResUnit.
751       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
752     } else {
753       // This is a ProcResGroup.
754       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
755                                                ProcResDesc->SubUnitsIdxBegin +
756                                                    ProcResDesc->NumUnits);
757       distributePressure(WPR.Cycles, Subunits, DensePressure);
758     }
759   }
760   // Turn dense pressure into sparse pressure by removing zero entries.
761   std::vector<std::pair<uint16_t, float>> Pressure;
762   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
763     if (DensePressure[I] > 0.0f)
764       Pressure.emplace_back(I, DensePressure[I]);
765   }
766   return Pressure;
767 }
768 
769 } // namespace exegesis
770