xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision af50a5b85fe82d8b965142ddf5bfd843efed67d0)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include <unordered_set>
16 #include <vector>
17 
18 namespace exegesis {
19 
20 static const char kCsvSep = ',';
21 
22 static unsigned resolveSchedClassId(const llvm::MCSubtargetInfo &STI,
23                                     unsigned SchedClassId,
24                                     const llvm::MCInst &MCI) {
25   const auto &SM = STI.getSchedModel();
26   while (SchedClassId && SM.getSchedClassDesc(SchedClassId)->isVariant())
27     SchedClassId =
28         STI.resolveVariantSchedClass(SchedClassId, &MCI, SM.getProcessorID());
29   return SchedClassId;
30 }
31 
32 namespace {
33 
34 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString };
35 
36 template <EscapeTag Tag>
37 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
38 
39 template <>
40 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
41   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
42     OS << S;
43   } else {
44     // Needs escaping.
45     OS << '"';
46     for (const char C : S) {
47       if (C == '"')
48         OS << "\"\"";
49       else
50         OS << C;
51     }
52     OS << '"';
53   }
54 }
55 
56 template <>
57 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
58   for (const char C : S) {
59     if (C == '<')
60       OS << "&lt;";
61     else if (C == '>')
62       OS << "&gt;";
63     else if (C == '&')
64       OS << "&amp;";
65     else
66       OS << C;
67   }
68 }
69 
70 template <>
71 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS,
72                                      const llvm::StringRef S) {
73   for (const char C : S) {
74     if (C == '"')
75       OS << "\\\"";
76     else
77       OS << C;
78   }
79 }
80 
81 } // namespace
82 
83 template <EscapeTag Tag>
84 static void
85 writeClusterId(llvm::raw_ostream &OS,
86                const InstructionBenchmarkClustering::ClusterId &CID) {
87   if (CID.isNoise())
88     writeEscaped<Tag>(OS, "[noise]");
89   else if (CID.isError())
90     writeEscaped<Tag>(OS, "[error]");
91   else
92     OS << CID.getId();
93 }
94 
95 template <EscapeTag Tag>
96 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
97   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
98 }
99 
100 template <typename EscapeTag, EscapeTag Tag>
101 void Analysis::writeSnippet(llvm::raw_ostream &OS,
102                             llvm::ArrayRef<uint8_t> Bytes,
103                             const char *Separator) const {
104   llvm::SmallVector<std::string, 3> Lines;
105   // Parse the asm snippet and print it.
106   while (!Bytes.empty()) {
107     llvm::MCInst MI;
108     uint64_t MISize = 0;
109     if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(),
110                                  llvm::nulls())) {
111       writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
112       writeEscaped<Tag>(OS, Separator);
113       writeEscaped<Tag>(OS, "[error decoding asm snippet]");
114       return;
115     }
116     Lines.emplace_back();
117     std::string &Line = Lines.back();
118     llvm::raw_string_ostream OSS(Line);
119     InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_);
120     Bytes = Bytes.drop_front(MISize);
121     OSS.flush();
122     Line = llvm::StringRef(Line).trim().str();
123   }
124   writeEscaped<Tag>(OS, llvm::join(Lines, Separator));
125 }
126 
127 // Prints a row representing an instruction, along with scheduling info and
128 // point coordinates (measurements).
129 void Analysis::printInstructionRowCsv(const size_t PointId,
130                                       llvm::raw_ostream &OS) const {
131   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
132   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
133   OS << kCsvSep;
134   writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; ");
135   OS << kCsvSep;
136   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
137   OS << kCsvSep;
138   assert(!Point.Key.Instructions.empty());
139   const llvm::MCInst &MCI = Point.Key.Instructions[0];
140   const unsigned SchedClassId = resolveSchedClassId(
141       *SubtargetInfo_, InstrInfo_->get(MCI.getOpcode()).getSchedClass(), MCI);
142 
143 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
144   const llvm::MCSchedClassDesc *const SCDesc =
145       SchedModel.getSchedClassDesc(SchedClassId);
146   writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
147 #else
148   OS << SchedClassId;
149 #endif
150   for (const auto &Measurement : Point.Measurements) {
151     OS << kCsvSep;
152     writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue);
153   }
154   OS << "\n";
155 }
156 
157 Analysis::Analysis(const llvm::Target &Target,
158                    const InstructionBenchmarkClustering &Clustering)
159     : Clustering_(Clustering) {
160   if (Clustering.getPoints().empty())
161     return;
162 
163   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
164   InstrInfo_.reset(Target.createMCInstrInfo());
165   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
166   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
167   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
168                                                     FirstPoint.CpuName, ""));
169   InstPrinter_.reset(Target.createMCInstPrinter(
170       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
171       *InstrInfo_, *RegInfo_));
172 
173   Context_ = llvm::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(),
174                                                 &ObjectFileInfo_);
175   Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_));
176   assert(Disasm_ && "cannot create MCDisassembler. missing call to "
177                     "InitializeXXXTargetDisassembler ?");
178 }
179 
180 template <>
181 llvm::Error
182 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
183   if (Clustering_.getPoints().empty())
184     return llvm::Error::success();
185 
186   // Write the header.
187   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
188      << kCsvSep << "sched_class";
189   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
190     OS << kCsvSep;
191     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
192   }
193   OS << "\n";
194 
195   // Write the points.
196   const auto &Clusters = Clustering_.getValidClusters();
197   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
198     for (const size_t PointId : Clusters[I].PointIndices) {
199       printInstructionRowCsv(PointId, OS);
200     }
201     OS << "\n\n";
202   }
203   return llvm::Error::success();
204 }
205 
206 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints(
207     ResolvedSchedClass &&RSC)
208     : RSC(std::move(RSC)) {}
209 
210 std::vector<Analysis::ResolvedSchedClassAndPoints>
211 Analysis::makePointsPerSchedClass() const {
212   std::vector<ResolvedSchedClassAndPoints> Entries;
213   // Maps SchedClassIds to index in result.
214   std::unordered_map<unsigned, size_t> SchedClassIdToIndex;
215   const auto &Points = Clustering_.getPoints();
216   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
217     const InstructionBenchmark &Point = Points[PointId];
218     if (!Point.Error.empty())
219       continue;
220     assert(!Point.Key.Instructions.empty());
221     // FIXME: we should be using the tuple of classes for instructions in the
222     // snippet as key.
223     const llvm::MCInst &MCI = Point.Key.Instructions[0];
224     unsigned SchedClassId = InstrInfo_->get(MCI.getOpcode()).getSchedClass();
225     const bool WasVariant = SchedClassId && SubtargetInfo_->getSchedModel()
226                                                 .getSchedClassDesc(SchedClassId)
227                                                 ->isVariant();
228     SchedClassId = resolveSchedClassId(*SubtargetInfo_, SchedClassId, MCI);
229     const auto IndexIt = SchedClassIdToIndex.find(SchedClassId);
230     if (IndexIt == SchedClassIdToIndex.end()) {
231       // Create a new entry.
232       SchedClassIdToIndex.emplace(SchedClassId, Entries.size());
233       ResolvedSchedClassAndPoints Entry(
234           ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant));
235       Entry.PointIds.push_back(PointId);
236       Entries.push_back(std::move(Entry));
237     } else {
238       // Append to the existing entry.
239       Entries[IndexIt->second].PointIds.push_back(PointId);
240     }
241   }
242   return Entries;
243 }
244 
245 // Uops repeat the same opcode over again. Just show this opcode and show the
246 // whole snippet only on hover.
247 static void writeUopsSnippetHtml(llvm::raw_ostream &OS,
248                                  const std::vector<llvm::MCInst> &Instructions,
249                                  const llvm::MCInstrInfo &InstrInfo) {
250   if (Instructions.empty())
251     return;
252   writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode()));
253   if (Instructions.size() > 1)
254     OS << " (x" << Instructions.size() << ")";
255 }
256 
257 // Latency tries to find a serial path. Just show the opcode path and show the
258 // whole snippet only on hover.
259 static void
260 writeLatencySnippetHtml(llvm::raw_ostream &OS,
261                         const std::vector<llvm::MCInst> &Instructions,
262                         const llvm::MCInstrInfo &InstrInfo) {
263   bool First = true;
264   for (const llvm::MCInst &Instr : Instructions) {
265     if (First)
266       First = false;
267     else
268       OS << " &rarr; ";
269     writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode()));
270   }
271 }
272 
273 void Analysis::printSchedClassClustersHtml(
274     const std::vector<SchedClassCluster> &Clusters,
275     const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const {
276   const auto &Points = Clustering_.getPoints();
277   OS << "<table class=\"sched-class-clusters\">";
278   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
279   assert(!Clusters.empty());
280   for (const auto &Measurement :
281        Points[Clusters[0].getPointIds()[0]].Measurements) {
282     OS << "<th>";
283     writeEscaped<kEscapeHtml>(OS, Measurement.Key);
284     OS << "</th>";
285   }
286   OS << "</tr>";
287   for (const SchedClassCluster &Cluster : Clusters) {
288     OS << "<tr class=\""
289        << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_)
290                ? "good-cluster"
291                : "bad-cluster")
292        << "\"><td>";
293     writeClusterId<kEscapeHtml>(OS, Cluster.id());
294     OS << "</td><td><ul>";
295     for (const size_t PointId : Cluster.getPointIds()) {
296       const auto &Point = Points[PointId];
297       OS << "<li><span class=\"mono\" title=\"";
298       writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet,
299                                                  "\n");
300       OS << "\">";
301       switch (Point.Mode) {
302       case InstructionBenchmark::Latency:
303         writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
304         break;
305       case InstructionBenchmark::Uops:
306         writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_);
307         break;
308       default:
309         llvm_unreachable("invalid mode");
310       }
311       OS << "</span> <span class=\"mono\">";
312       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
313       OS << "</span></li>";
314     }
315     OS << "</ul></td>";
316     for (const auto &Stats : Cluster.getRepresentative()) {
317       OS << "<td class=\"measurement\">";
318       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
319       OS << "<br><span class=\"minmax\">[";
320       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
321       OS << ";";
322       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
323       OS << "]</span></td>";
324     }
325     OS << "</tr>";
326   }
327   OS << "</table>";
328 }
329 
330 // Return the non-redundant list of WriteProcRes used by the given sched class.
331 // The scheduling model for LLVM is such that each instruction has a certain
332 // number of uops which consume resources which are described by WriteProcRes
333 // entries. Each entry describe how many cycles are spent on a specific ProcRes
334 // kind.
335 // For example, an instruction might have 3 uOps, one dispatching on P0
336 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
337 // Note that LLVM additionally denormalizes resource consumption to include
338 // usage of super resources by subresources. So in practice if there exists a
339 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
340 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
341 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
342 // denote implied usage of superresources by subresources:
343 //            P0      P06    P016
344 //     uOp1    1      (1)     (1)
345 //     uOp2            1      (1)
346 //     uOp3            1      (1)
347 //     =============================
348 //             1       3       3
349 // Eventually we end up with three entries for the WriteProcRes of the
350 // instruction:
351 //    {ProcResIdx=1,  Cycles=1}  // P0
352 //    {ProcResIdx=7,  Cycles=3}  // P06
353 //    {ProcResIdx=10, Cycles=3}  // P016
354 //
355 // Note that in this case, P016 does not contribute any cycles, so it would
356 // be removed by this function.
357 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
358 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
359 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
360                             const llvm::MCSubtargetInfo &STI) {
361   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
362   const auto &SM = STI.getSchedModel();
363   const unsigned NumProcRes = SM.getNumProcResourceKinds();
364 
365   // This assumes that the ProcResDescs are sorted in topological order, which
366   // is guaranteed by the tablegen backend.
367   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
368   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
369                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
370        WPR != WPREnd; ++WPR) {
371     const llvm::MCProcResourceDesc *const ProcResDesc =
372         SM.getProcResource(WPR->ProcResourceIdx);
373     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
374       // This is a ProcResUnit.
375       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
376       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
377     } else {
378       // This is a ProcResGroup. First see if it contributes any cycles or if
379       // it has cycles just from subunits.
380       float RemainingCycles = WPR->Cycles;
381       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
382            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
383            ++SubResIdx) {
384         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
385       }
386       if (RemainingCycles < 0.01f) {
387         // The ProcResGroup contributes no cycles of its own.
388         continue;
389       }
390       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
391       Result.push_back({WPR->ProcResourceIdx,
392                         static_cast<uint16_t>(std::round(RemainingCycles))});
393       // Spread the remaining cycles over all subunits.
394       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
395            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
396            ++SubResIdx) {
397         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
398       }
399     }
400   }
401   return Result;
402 }
403 
404 Analysis::ResolvedSchedClass::ResolvedSchedClass(
405     const llvm::MCSubtargetInfo &STI, unsigned ResolvedSchedClassId,
406     bool WasVariant)
407     : SCDesc(STI.getSchedModel().getSchedClassDesc(ResolvedSchedClassId)),
408       WasVariant(WasVariant),
409       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(*SCDesc, STI)),
410       IdealizedProcResPressure(computeIdealizedProcResPressure(
411           STI.getSchedModel(), NonRedundantWriteProcRes)) {
412   assert((SCDesc == nullptr || !SCDesc->isVariant()) &&
413          "ResolvedSchedClass should never be variant");
414 }
415 
416 void Analysis::SchedClassCluster::addPoint(
417     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
418   PointIds.push_back(PointId);
419   const auto &Point = Clustering.getPoints()[PointId];
420   if (ClusterId.isUndef()) {
421     ClusterId = Clustering.getClusterIdForPoint(PointId);
422     Representative.resize(Point.Measurements.size());
423   }
424   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
425     Representative[I].push(Point.Measurements[I]);
426   }
427   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
428 }
429 
430 // Returns a ProxResIdx by id or name.
431 static unsigned findProcResIdx(const llvm::MCSubtargetInfo &STI,
432                                const llvm::StringRef NameOrId) {
433   // Interpret the key as an ProcResIdx.
434   unsigned ProcResIdx = 0;
435   if (llvm::to_integer(NameOrId, ProcResIdx, 10))
436     return ProcResIdx;
437   // Interpret the key as a ProcRes name.
438   const auto &SchedModel = STI.getSchedModel();
439   for (int I = 0, E = SchedModel.getNumProcResourceKinds(); I < E; ++I) {
440     if (NameOrId == SchedModel.getProcResource(I)->Name)
441       return I;
442   }
443   return 0;
444 }
445 
446 bool Analysis::SchedClassCluster::measurementsMatch(
447     const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC,
448     const InstructionBenchmarkClustering &Clustering) const {
449   const size_t NumMeasurements = Representative.size();
450   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
451   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
452   // Latency case.
453   assert(!Clustering.getPoints().empty());
454   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
455   if (Mode == InstructionBenchmark::Latency) {
456     if (NumMeasurements != 1) {
457       llvm::errs()
458           << "invalid number of measurements in latency mode: expected 1, got "
459           << NumMeasurements << "\n";
460       return false;
461     }
462     // Find the latency.
463     SchedClassPoint[0].PerInstructionValue = 0.0;
464     for (unsigned I = 0; I < RSC.SCDesc->NumWriteLatencyEntries; ++I) {
465       const llvm::MCWriteLatencyEntry *const WLE =
466           STI.getWriteLatencyEntry(RSC.SCDesc, I);
467       SchedClassPoint[0].PerInstructionValue =
468           std::max<double>(SchedClassPoint[0].PerInstructionValue, WLE->Cycles);
469     }
470     ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg();
471   } else if (Mode == InstructionBenchmark::Uops) {
472     for (int I = 0, E = Representative.size(); I < E; ++I) {
473       const auto Key = Representative[I].key();
474       uint16_t ProcResIdx = findProcResIdx(STI, Key);
475       if (ProcResIdx > 0) {
476         // Find the pressure on ProcResIdx `Key`.
477         const auto ProcResPressureIt =
478             std::find_if(RSC.IdealizedProcResPressure.begin(),
479                          RSC.IdealizedProcResPressure.end(),
480                          [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
481                            return WPR.first == ProcResIdx;
482                          });
483         SchedClassPoint[I].PerInstructionValue =
484             ProcResPressureIt == RSC.IdealizedProcResPressure.end()
485                 ? 0.0
486                 : ProcResPressureIt->second;
487       } else if (Key == "NumMicroOps") {
488         SchedClassPoint[I].PerInstructionValue = RSC.SCDesc->NumMicroOps;
489       } else {
490         llvm::errs() << "expected `key` to be either a ProcResIdx or a ProcRes "
491                         "name, got "
492                      << Key << "\n";
493         return false;
494       }
495       ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg();
496     }
497   } else {
498     llvm::errs() << "unimplemented measurement matching for mode " << Mode
499                  << "\n";
500     return false;
501   }
502   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
503 }
504 
505 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC,
506                                        llvm::raw_ostream &OS) const {
507   OS << "<table class=\"sched-class-desc\">";
508   OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</"
509         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
510         "resource (port) pressure assuming ideal distribution\">Idealized "
511         "Resource Pressure</th></tr>";
512   if (RSC.SCDesc->isValid()) {
513     const auto &SM = SubtargetInfo_->getSchedModel();
514     OS << "<tr><td>&#10004;</td>";
515     OS << "<td>" << (RSC.WasVariant ? "&#10004;" : "&#10005;") << "</td>";
516     OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>";
517     // Latencies.
518     OS << "<td><ul>";
519     for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) {
520       const auto *const Entry =
521           SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I);
522       OS << "<li>" << Entry->Cycles;
523       if (RSC.SCDesc->NumWriteLatencyEntries > 1) {
524         // Dismabiguate if more than 1 latency.
525         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
526       }
527       OS << "</li>";
528     }
529     OS << "</ul></td>";
530     // WriteProcRes.
531     OS << "<td><ul>";
532     for (const auto &WPR : RSC.NonRedundantWriteProcRes) {
533       OS << "<li><span class=\"mono\">";
534       writeEscaped<kEscapeHtml>(OS,
535                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
536       OS << "</span>: " << WPR.Cycles << "</li>";
537     }
538     OS << "</ul></td>";
539     // Idealized port pressure.
540     OS << "<td><ul>";
541     for (const auto &Pressure : RSC.IdealizedProcResPressure) {
542       OS << "<li><span class=\"mono\">";
543       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
544                                         .getProcResource(Pressure.first)
545                                         ->Name);
546       OS << "</span>: ";
547       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
548       OS << "</li>";
549     }
550     OS << "</ul></td>";
551     OS << "</tr>";
552   } else {
553     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
554   }
555   OS << "</table>";
556 }
557 
558 static constexpr const char kHtmlHead[] = R"(
559 <head>
560 <title>llvm-exegesis Analysis Results</title>
561 <style>
562 body {
563   font-family: sans-serif
564 }
565 span.sched-class-name {
566   font-weight: bold;
567   font-family: monospace;
568 }
569 span.opcode {
570   font-family: monospace;
571 }
572 span.config {
573   font-family: monospace;
574 }
575 div.inconsistency {
576   margin-top: 50px;
577 }
578 table {
579   margin-left: 50px;
580   border-collapse: collapse;
581 }
582 table, table tr,td,th {
583   border: 1px solid #444;
584 }
585 table ul {
586   padding-left: 0px;
587   margin: 0px;
588   list-style-type: none;
589 }
590 table.sched-class-clusters td {
591   padding-left: 10px;
592   padding-right: 10px;
593   padding-top: 10px;
594   padding-bottom: 10px;
595 }
596 table.sched-class-desc td {
597   padding-left: 10px;
598   padding-right: 10px;
599   padding-top: 2px;
600   padding-bottom: 2px;
601 }
602 span.mono {
603   font-family: monospace;
604 }
605 td.measurement {
606   text-align: center;
607 }
608 tr.good-cluster td.measurement {
609   color: #292
610 }
611 tr.bad-cluster td.measurement {
612   color: #922
613 }
614 tr.good-cluster td.measurement span.minmax {
615   color: #888;
616 }
617 tr.bad-cluster td.measurement span.minmax {
618   color: #888;
619 }
620 </style>
621 </head>
622 )";
623 
624 template <>
625 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
626     llvm::raw_ostream &OS) const {
627   const auto &FirstPoint = Clustering_.getPoints()[0];
628   // Print the header.
629   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
630   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
631   OS << "<h3>Triple: <span class=\"mono\">";
632   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
633   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
634   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
635   OS << "</span></h3>";
636 
637   for (const auto &RSCAndPoints : makePointsPerSchedClass()) {
638     if (!RSCAndPoints.RSC.SCDesc)
639       continue;
640     // Bucket sched class points into sched class clusters.
641     std::vector<SchedClassCluster> SchedClassClusters;
642     for (const size_t PointId : RSCAndPoints.PointIds) {
643       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
644       if (!ClusterId.isValid())
645         continue; // Ignore noise and errors. FIXME: take noise into account ?
646       auto SchedClassClusterIt =
647           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
648                        [ClusterId](const SchedClassCluster &C) {
649                          return C.id() == ClusterId;
650                        });
651       if (SchedClassClusterIt == SchedClassClusters.end()) {
652         SchedClassClusters.emplace_back();
653         SchedClassClusterIt = std::prev(SchedClassClusters.end());
654       }
655       SchedClassClusterIt->addPoint(PointId, Clustering_);
656     }
657 
658     // Print any scheduling class that has at least one cluster that does not
659     // match the checked-in data.
660     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
661                     [this, &RSCAndPoints](const SchedClassCluster &C) {
662                       return C.measurementsMatch(*SubtargetInfo_,
663                                                  RSCAndPoints.RSC, Clustering_);
664                     }))
665       continue; // Nothing weird.
666 
667     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
668           "class=\"sched-class-name\">";
669 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
670     writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name);
671 #else
672     OS << SchedClassId;
673 #endif
674     OS << "</span> contains instructions whose performance characteristics do"
675           " not match that of LLVM:</p>";
676     printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS);
677     OS << "<p>llvm SchedModel data:</p>";
678     printSchedClassDescHtml(RSCAndPoints.RSC, OS);
679     OS << "</div>";
680   }
681 
682   OS << "</body></html>";
683   return llvm::Error::success();
684 }
685 
686 // Distributes a pressure budget as evenly as possible on the provided subunits
687 // given the already existing port pressure distribution.
688 //
689 // The algorithm is as follows: while there is remaining pressure to
690 // distribute, find the subunits with minimal pressure, and distribute
691 // remaining pressure equally up to the pressure of the unit with
692 // second-to-minimal pressure.
693 // For example, let's assume we want to distribute 2*P1256
694 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
695 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
696 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
697 //     RemainingPressure = 2.0
698 // We sort the subunits by pressure:
699 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
700 // We'll first start by the subunits with minimal pressure, which are at
701 // the beginning of the sorted array. In this example there is one (P2).
702 // The subunit with second-to-minimal pressure is the next one in the
703 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
704 // from the budget.
705 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
706 //     RemainingPressure = 1.9
707 // We repeat this process: distribute 0.2 pressure on each of the minimal
708 // P2 and P1, decrease budget by 2*0.2:
709 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
710 //     RemainingPressure = 1.5
711 // There are no second-to-minimal subunits so we just share the remaining
712 // budget (1.5 cycles) equally:
713 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
714 //     RemainingPressure = 0.0
715 // We stop as there is no remaining budget to distribute.
716 void distributePressure(float RemainingPressure,
717                         llvm::SmallVector<uint16_t, 32> Subunits,
718                         llvm::SmallVector<float, 32> &DensePressure) {
719   // Find the number of subunits with minimal pressure (they are at the
720   // front).
721   llvm::sort(Subunits, [&DensePressure](const uint16_t A, const uint16_t B) {
722     return DensePressure[A] < DensePressure[B];
723   });
724   const auto getPressureForSubunit = [&DensePressure,
725                                       &Subunits](size_t I) -> float & {
726     return DensePressure[Subunits[I]];
727   };
728   size_t NumMinimalSU = 1;
729   while (NumMinimalSU < Subunits.size() &&
730          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
731     ++NumMinimalSU;
732   }
733   while (RemainingPressure > 0.0f) {
734     if (NumMinimalSU == Subunits.size()) {
735       // All units are minimal, just distribute evenly and be done.
736       for (size_t I = 0; I < NumMinimalSU; ++I) {
737         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
738       }
739       return;
740     }
741     // Distribute the remaining pressure equally.
742     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
743     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
744     assert(MinimalPressure < SecondToMinimalPressure);
745     const float Increment = SecondToMinimalPressure - MinimalPressure;
746     if (RemainingPressure <= NumMinimalSU * Increment) {
747       // There is not enough remaining pressure.
748       for (size_t I = 0; I < NumMinimalSU; ++I) {
749         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
750       }
751       return;
752     }
753     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
754     for (size_t I = 0; I < NumMinimalSU; ++I) {
755       getPressureForSubunit(I) = SecondToMinimalPressure;
756       RemainingPressure -= SecondToMinimalPressure;
757     }
758     while (NumMinimalSU < Subunits.size() &&
759            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
760       ++NumMinimalSU;
761     }
762   }
763 }
764 
765 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
766     const llvm::MCSchedModel &SM,
767     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
768   // DensePressure[I] is the port pressure for Proc Resource I.
769   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
770   llvm::sort(WPRS, [](const llvm::MCWriteProcResEntry &A,
771                       const llvm::MCWriteProcResEntry &B) {
772     return A.ProcResourceIdx < B.ProcResourceIdx;
773   });
774   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
775     // Get units for the entry.
776     const llvm::MCProcResourceDesc *const ProcResDesc =
777         SM.getProcResource(WPR.ProcResourceIdx);
778     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
779       // This is a ProcResUnit.
780       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
781     } else {
782       // This is a ProcResGroup.
783       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
784                                                ProcResDesc->SubUnitsIdxBegin +
785                                                    ProcResDesc->NumUnits);
786       distributePressure(WPR.Cycles, Subunits, DensePressure);
787     }
788   }
789   // Turn dense pressure into sparse pressure by removing zero entries.
790   std::vector<std::pair<uint16_t, float>> Pressure;
791   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
792     if (DensePressure[I] > 0.0f)
793       Pressure.emplace_back(I, DensePressure[I]);
794   }
795   return Pressure;
796 }
797 
798 } // namespace exegesis
799