xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Analysis.cpp (revision 62b34fa89a9cabfebe56848e25be4413ee009520)
1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Analysis.h"
11 #include "BenchmarkResult.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/Support/FormatVariadic.h"
14 #include <unordered_set>
15 #include <vector>
16 
17 namespace exegesis {
18 
19 static const char kCsvSep = ',';
20 
21 namespace {
22 
23 enum EscapeTag { kEscapeCsv, kEscapeHtml };
24 
25 template <EscapeTag Tag>
26 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S);
27 
28 template <>
29 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) {
30   if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) {
31     OS << S;
32   } else {
33     // Needs escaping.
34     OS << '"';
35     for (const char C : S) {
36       if (C == '"')
37         OS << "\"\"";
38       else
39         OS << C;
40     }
41     OS << '"';
42   }
43 }
44 
45 template <>
46 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) {
47   for (const char C : S) {
48     if (C == '<')
49       OS << "&lt;";
50     else if (C == '>')
51       OS << "&gt;";
52     else if (C == '&')
53       OS << "&amp;";
54     else
55       OS << C;
56   }
57 }
58 
59 } // namespace
60 
61 template <EscapeTag Tag>
62 static void
63 writeClusterId(llvm::raw_ostream &OS,
64                const InstructionBenchmarkClustering::ClusterId &CID) {
65   if (CID.isNoise())
66     writeEscaped<Tag>(OS, "[noise]");
67   else if (CID.isError())
68     writeEscaped<Tag>(OS, "[error]");
69   else
70     OS << CID.getId();
71 }
72 
73 template <EscapeTag Tag>
74 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) {
75   writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str());
76 }
77 
78 // Prints a row representing an instruction, along with scheduling info and
79 // point coordinates (measurements).
80 void Analysis::printInstructionRowCsv(const size_t PointId,
81                                       llvm::raw_ostream &OS) const {
82   const InstructionBenchmark &Point = Clustering_.getPoints()[PointId];
83   writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId));
84   OS << kCsvSep;
85   writeEscaped<kEscapeCsv>(OS, Point.Key.OpcodeName);
86   OS << kCsvSep;
87   writeEscaped<kEscapeCsv>(OS, Point.Key.Config);
88   OS << kCsvSep;
89   const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName);
90   if (OpcodeIt != MnemonicToOpcode_.end()) {
91     const unsigned SchedClassId =
92         InstrInfo_->get(OpcodeIt->second).getSchedClass();
93 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
94     const auto &SchedModel = SubtargetInfo_->getSchedModel();
95     const llvm::MCSchedClassDesc *const SCDesc =
96         SchedModel.getSchedClassDesc(SchedClassId);
97     writeEscaped<kEscapeCsv>(OS, SCDesc->Name);
98 #else
99     OS << SchedClassId;
100 #endif
101   }
102   // FIXME: Print the sched class once InstructionBenchmark separates key into
103   // (mnemonic, mode, opaque).
104   for (const auto &Measurement : Point.Measurements) {
105     OS << kCsvSep;
106     writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value);
107   }
108   OS << "\n";
109 }
110 
111 Analysis::Analysis(const llvm::Target &Target,
112                    const InstructionBenchmarkClustering &Clustering)
113     : Clustering_(Clustering) {
114   if (Clustering.getPoints().empty())
115     return;
116 
117   InstrInfo_.reset(Target.createMCInstrInfo());
118   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
119   SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
120                                                     FirstPoint.CpuName, ""));
121 
122   // Build an index of mnemonic->opcode.
123   for (int I = 0, E = InstrInfo_->getNumOpcodes(); I < E; ++I)
124     MnemonicToOpcode_.emplace(InstrInfo_->getName(I), I);
125 }
126 
127 template <>
128 llvm::Error
129 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const {
130   if (Clustering_.getPoints().empty())
131     return llvm::Error::success();
132 
133   // Write the header.
134   OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config"
135      << kCsvSep << "sched_class";
136   for (const auto &Measurement : Clustering_.getPoints().front().Measurements) {
137     OS << kCsvSep;
138     writeEscaped<kEscapeCsv>(OS, Measurement.Key);
139   }
140   OS << "\n";
141 
142   // Write the points.
143   const auto &Clusters = Clustering_.getValidClusters();
144   for (size_t I = 0, E = Clusters.size(); I < E; ++I) {
145     for (const size_t PointId : Clusters[I].PointIndices) {
146       printInstructionRowCsv(PointId, OS);
147     }
148     OS << "\n\n";
149   }
150   return llvm::Error::success();
151 }
152 
153 std::unordered_map<unsigned, std::vector<size_t>>
154 Analysis::makePointsPerSchedClass() const {
155   std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass;
156   const auto &Points = Clustering_.getPoints();
157   for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) {
158     const InstructionBenchmark &Point = Points[PointId];
159     if (!Point.Error.empty())
160       continue;
161     const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName);
162     if (OpcodeIt == MnemonicToOpcode_.end())
163       continue;
164     const unsigned SchedClassId =
165         InstrInfo_->get(OpcodeIt->second).getSchedClass();
166     PointsPerSchedClass[SchedClassId].push_back(PointId);
167   }
168   return PointsPerSchedClass;
169 }
170 
171 void Analysis::printSchedClassClustersHtml(
172     const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
173     llvm::raw_ostream &OS) const {
174   const auto &Points = Clustering_.getPoints();
175   OS << "<table class=\"sched-class-clusters\">";
176   OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>";
177   assert(!Clusters.empty());
178   for (const auto &Measurement :
179        Points[Clusters[0].getPointIds()[0]].Measurements) {
180     OS << "<th>";
181     if (Measurement.DebugString.empty())
182       writeEscaped<kEscapeHtml>(OS, Measurement.Key);
183     else
184       writeEscaped<kEscapeHtml>(OS, Measurement.DebugString);
185     OS << "</th>";
186   }
187   OS << "</tr>";
188   for (const SchedClassCluster &Cluster : Clusters) {
189     OS << "<tr class=\""
190        << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_)
191                ? "good-cluster"
192                : "bad-cluster")
193        << "\"><td>";
194     writeClusterId<kEscapeHtml>(OS, Cluster.id());
195     OS << "</td><td><ul>";
196     for (const size_t PointId : Cluster.getPointIds()) {
197       const auto &Point = Points[PointId];
198       OS << "<li><span class=\"mono\">";
199       writeEscaped<kEscapeHtml>(OS, Point.Key.OpcodeName);
200       OS << "</span> <span class=\"mono\">";
201       writeEscaped<kEscapeHtml>(OS, Point.Key.Config);
202       OS << "</span></li>";
203     }
204     OS << "</ul></td>";
205     for (const auto &Stats : Cluster.getRepresentative()) {
206       OS << "<td class=\"measurement\">";
207       writeMeasurementValue<kEscapeHtml>(OS, Stats.avg());
208       OS << "<br><span class=\"minmax\">[";
209       writeMeasurementValue<kEscapeHtml>(OS, Stats.min());
210       OS << ";";
211       writeMeasurementValue<kEscapeHtml>(OS, Stats.max());
212       OS << "]</span></td>";
213     }
214     OS << "</tr>";
215   }
216   OS << "</table>";
217 }
218 
219 // Return the non-redundant list of WriteProcRes used by the given sched class.
220 // The scheduling model for LLVM is such that each instruction has a certain
221 // number of uops which consume resources which are described by WriteProcRes
222 // entries. Each entry describe how many cycles are spent on a specific ProcRes
223 // kind.
224 // For example, an instruction might have 3 uOps, one dispatching on P0
225 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7).
226 // Note that LLVM additionally denormalizes resource consumption to include
227 // usage of super resources by subresources. So in practice if there exists a
228 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by
229 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed
230 // by P06 are also consumed by P016. In the figure below, parenthesized cycles
231 // denote implied usage of superresources by subresources:
232 //            P0      P06    P016
233 //     uOp1    1      (1)     (1)
234 //     uOp2            1      (1)
235 //     uOp3            1      (1)
236 //     =============================
237 //             1       3       3
238 // Eventually we end up with three entries for the WriteProcRes of the
239 // instruction:
240 //    {ProcResIdx=1,  Cycles=1}  // P0
241 //    {ProcResIdx=7,  Cycles=3}  // P06
242 //    {ProcResIdx=10, Cycles=3}  // P016
243 //
244 // Note that in this case, P016 does not contribute any cycles, so it would
245 // be removed by this function.
246 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca.
247 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8>
248 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
249                             const llvm::MCSubtargetInfo &STI) {
250   llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result;
251   const auto &SM = STI.getSchedModel();
252   const unsigned NumProcRes = SM.getNumProcResourceKinds();
253 
254   // This assumes that the ProcResDescs are sorted in topological order, which
255   // is guaranteed by the tablegen backend.
256   llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes);
257   for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc),
258                   *const WPREnd = STI.getWriteProcResEnd(&SCDesc);
259        WPR != WPREnd; ++WPR) {
260     const llvm::MCProcResourceDesc *const ProcResDesc =
261         SM.getProcResource(WPR->ProcResourceIdx);
262     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
263       // This is a ProcResUnit.
264       Result.push_back({WPR->ProcResourceIdx, WPR->Cycles});
265       ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles;
266     } else {
267       // This is a ProcResGroup. First see if it contributes any cycles or if
268       // it has cycles just from subunits.
269       float RemainingCycles = WPR->Cycles;
270       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
271            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
272            ++SubResIdx) {
273         RemainingCycles -= ProcResUnitUsage[*SubResIdx];
274       }
275       if (RemainingCycles < 0.01f) {
276         // The ProcResGroup contributes no cycles of its own.
277         continue;
278       }
279       // The ProcResGroup contributes `RemainingCycles` cycles of its own.
280       Result.push_back({WPR->ProcResourceIdx,
281                         static_cast<uint16_t>(std::round(RemainingCycles))});
282       // Spread the remaining cycles over all subunits.
283       for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin;
284            SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits;
285            ++SubResIdx) {
286         ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits;
287       }
288     }
289   }
290   return Result;
291 }
292 
293 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD,
294                                  const llvm::MCSubtargetInfo &STI)
295     : SCDesc(SD),
296       NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)),
297       IdealizedProcResPressure(computeIdealizedProcResPressure(
298           STI.getSchedModel(), NonRedundantWriteProcRes)) {}
299 
300 void Analysis::SchedClassCluster::addPoint(
301     size_t PointId, const InstructionBenchmarkClustering &Clustering) {
302   PointIds.push_back(PointId);
303   const auto &Point = Clustering.getPoints()[PointId];
304   if (ClusterId.isUndef()) {
305     ClusterId = Clustering.getClusterIdForPoint(PointId);
306     Representative.resize(Point.Measurements.size());
307   }
308   for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) {
309     Representative[I].push(Point.Measurements[I]);
310   }
311   assert(ClusterId == Clustering.getClusterIdForPoint(PointId));
312 }
313 
314 bool Analysis::SchedClassCluster::measurementsMatch(
315     const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
316     const InstructionBenchmarkClustering &Clustering) const {
317   const size_t NumMeasurements = Representative.size();
318   std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements);
319   std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements);
320   // Latency case.
321   assert(!Clustering.getPoints().empty());
322   const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode;
323   if (Mode == InstructionBenchmark::Latency) {
324     if (NumMeasurements != 1) {
325       llvm::errs()
326           << "invalid number of measurements in latency mode: expected 1, got "
327           << NumMeasurements << "\n";
328       return false;
329     }
330     // Find the latency.
331     SchedClassPoint[0].Value = 0.0;
332     for (unsigned I = 0; I < SC.SCDesc.NumWriteLatencyEntries; ++I) {
333       const llvm::MCWriteLatencyEntry *const WLE =
334           STI.getWriteLatencyEntry(&SC.SCDesc, I);
335       SchedClassPoint[0].Value =
336           std::max<double>(SchedClassPoint[0].Value, WLE->Cycles);
337     }
338     ClusterCenterPoint[0].Value = Representative[0].avg();
339   } else if (Mode == InstructionBenchmark::Uops) {
340     for (int I = 0, E = Representative.size(); I < E; ++I) {
341       // Find the pressure on ProcResIdx `Key`.
342       uint16_t ProcResIdx = 0;
343       if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) {
344         llvm::errs() << "expected ProcResIdx key, got "
345                      << Representative[I].key() << "\n";
346         return false;
347       }
348       const auto ProcResPressureIt =
349           std::find_if(SC.IdealizedProcResPressure.begin(),
350                        SC.IdealizedProcResPressure.end(),
351                        [ProcResIdx](const std::pair<uint16_t, float> &WPR) {
352                          return WPR.first == ProcResIdx;
353                        });
354       SchedClassPoint[I].Value =
355           ProcResPressureIt == SC.IdealizedProcResPressure.end()
356               ? 0.0
357               : ProcResPressureIt->second;
358       ClusterCenterPoint[I].Value = Representative[I].avg();
359     }
360   } else {
361     llvm::errs() << "unimplemented measurement matching for mode " << Mode
362                  << "\n";
363     return false;
364   }
365   return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint);
366 }
367 
368 void Analysis::printSchedClassDescHtml(const SchedClass &SC,
369                                        llvm::raw_ostream &OS) const {
370   OS << "<table class=\"sched-class-desc\">";
371   OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
372         "th><th>WriteProcRes</th><th title=\"This is the idealized unit "
373         "resource (port) pressure assuming ideal distribution\">Idealized "
374         "Resource Pressure</th></tr>";
375   if (SC.SCDesc.isValid()) {
376     const auto &SM = SubtargetInfo_->getSchedModel();
377     OS << "<tr><td>&#10004;</td>";
378     OS << "<td>" << (SC.SCDesc.isVariant() ? "&#10004;" : "&#10005;")
379        << "</td>";
380     OS << "<td>" << SC.SCDesc.NumMicroOps << "</td>";
381     // Latencies.
382     OS << "<td><ul>";
383     for (int I = 0, E = SC.SCDesc.NumWriteLatencyEntries; I < E; ++I) {
384       const auto *const Entry =
385           SubtargetInfo_->getWriteLatencyEntry(&SC.SCDesc, I);
386       OS << "<li>" << Entry->Cycles;
387       if (SC.SCDesc.NumWriteLatencyEntries > 1) {
388         // Dismabiguate if more than 1 latency.
389         OS << " (WriteResourceID " << Entry->WriteResourceID << ")";
390       }
391       OS << "</li>";
392     }
393     OS << "</ul></td>";
394     // WriteProcRes.
395     OS << "<td><ul>";
396     for (const auto &WPR : SC.NonRedundantWriteProcRes) {
397       OS << "<li><span class=\"mono\">";
398       writeEscaped<kEscapeHtml>(OS,
399                                 SM.getProcResource(WPR.ProcResourceIdx)->Name);
400       OS << "</span>: " << WPR.Cycles << "</li>";
401     }
402     OS << "</ul></td>";
403     // Idealized port pressure.
404     OS << "<td><ul>";
405     for (const auto &Pressure : SC.IdealizedProcResPressure) {
406       OS << "<li><span class=\"mono\">";
407       writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel()
408                                         .getProcResource(Pressure.first)
409                                         ->Name);
410       OS << "</span>: ";
411       writeMeasurementValue<kEscapeHtml>(OS, Pressure.second);
412       OS << "</li>";
413     }
414     OS << "</ul></td>";
415     OS << "</tr>";
416   } else {
417     OS << "<tr><td>&#10005;</td><td></td><td></td></tr>";
418   }
419   OS << "</table>";
420 }
421 
422 static constexpr const char kHtmlHead[] = R"(
423 <head>
424 <title>llvm-exegesis Analysis Results</title>
425 <style>
426 body {
427   font-family: sans-serif
428 }
429 span.sched-class-name {
430   font-weight: bold;
431   font-family: monospace;
432 }
433 span.opcode {
434   font-family: monospace;
435 }
436 span.config {
437   font-family: monospace;
438 }
439 div.inconsistency {
440   margin-top: 50px;
441 }
442 table {
443   margin-left: 50px;
444   border-collapse: collapse;
445 }
446 table, table tr,td,th {
447   border: 1px solid #444;
448 }
449 table ul {
450   padding-left: 0px;
451   margin: 0px;
452   list-style-type: none;
453 }
454 table.sched-class-clusters td {
455   padding-left: 10px;
456   padding-right: 10px;
457   padding-top: 10px;
458   padding-bottom: 10px;
459 }
460 table.sched-class-desc td {
461   padding-left: 10px;
462   padding-right: 10px;
463   padding-top: 2px;
464   padding-bottom: 2px;
465 }
466 span.mono {
467   font-family: monospace;
468 }
469 td.measurement {
470   text-align: center;
471 }
472 tr.good-cluster td.measurement {
473   color: #292
474 }
475 tr.bad-cluster td.measurement {
476   color: #922
477 }
478 tr.good-cluster td.measurement span.minmax {
479   color: #888;
480 }
481 tr.bad-cluster td.measurement span.minmax {
482   color: #888;
483 }
484 </style>
485 </head>
486 )";
487 
488 template <>
489 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
490     llvm::raw_ostream &OS) const {
491   const auto &FirstPoint = Clustering_.getPoints()[0];
492   // Print the header.
493   OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>";
494   OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>";
495   OS << "<h3>Triple: <span class=\"mono\">";
496   writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple);
497   OS << "</span></h3><h3>Cpu: <span class=\"mono\">";
498   writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName);
499   OS << "</span></h3>";
500 
501   for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) {
502     const auto SchedClassId = SchedClassAndPoints.first;
503     const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second;
504     const auto &SchedModel = SubtargetInfo_->getSchedModel();
505     const llvm::MCSchedClassDesc *const SCDesc =
506         SchedModel.getSchedClassDesc(SchedClassId);
507     if (!SCDesc)
508       continue;
509     const SchedClass SC(*SCDesc, *SubtargetInfo_);
510 
511     // Bucket sched class points into sched class clusters.
512     std::vector<SchedClassCluster> SchedClassClusters;
513     for (const size_t PointId : SchedClassPoints) {
514       const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId);
515       if (!ClusterId.isValid())
516         continue; // Ignore noise and errors. FIXME: take noise into account ?
517       auto SchedClassClusterIt =
518           std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(),
519                        [ClusterId](const SchedClassCluster &C) {
520                          return C.id() == ClusterId;
521                        });
522       if (SchedClassClusterIt == SchedClassClusters.end()) {
523         SchedClassClusters.emplace_back();
524         SchedClassClusterIt = std::prev(SchedClassClusters.end());
525       }
526       SchedClassClusterIt->addPoint(PointId, Clustering_);
527     }
528 
529     // Print any scheduling class that has at least one cluster that does not
530     // match the checked-in data.
531     if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(),
532                     [this, &SC](const SchedClassCluster &C) {
533                       return C.measurementsMatch(*SubtargetInfo_, SC,
534                                                  Clustering_);
535                     }))
536       continue; // Nothing weird.
537 
538     OS << "<div class=\"inconsistency\"><p>Sched Class <span "
539           "class=\"sched-class-name\">";
540 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
541     writeEscaped<kEscapeHtml>(OS, SCDesc->Name);
542 #else
543     OS << SchedClassId;
544 #endif
545     OS << "</span> contains instructions whose performance characteristics do"
546           " not match that of LLVM:</p>";
547     printSchedClassClustersHtml(SchedClassClusters, SC, OS);
548     OS << "<p>llvm SchedModel data:</p>";
549     printSchedClassDescHtml(SC, OS);
550     OS << "</div>";
551   }
552 
553   OS << "</body></html>";
554   return llvm::Error::success();
555 }
556 
557 // Distributes a pressure budget as evenly as possible on the provided subunits
558 // given the already existing port pressure distribution.
559 //
560 // The algorithm is as follows: while there is remaining pressure to
561 // distribute, find the subunits with minimal pressure, and distribute
562 // remaining pressure equally up to the pressure of the unit with
563 // second-to-minimal pressure.
564 // For example, let's assume we want to distribute 2*P1256
565 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is:
566 //     DensePressure =        P0   P1   P2   P3   P4   P5   P6   P7
567 //                           0.1  0.3  0.2  0.0  0.0  0.5  0.5  0.5
568 //     RemainingPressure = 2.0
569 // We sort the subunits by pressure:
570 //     Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)]
571 // We'll first start by the subunits with minimal pressure, which are at
572 // the beginning of the sorted array. In this example there is one (P2).
573 // The subunit with second-to-minimal pressure is the next one in the
574 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles
575 // from the budget.
576 //     Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)]
577 //     RemainingPressure = 1.9
578 // We repeat this process: distribute 0.2 pressure on each of the minimal
579 // P2 and P1, decrease budget by 2*0.2:
580 //     Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)]
581 //     RemainingPressure = 1.5
582 // There are no second-to-minimal subunits so we just share the remaining
583 // budget (1.5 cycles) equally:
584 //     Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)]
585 //     RemainingPressure = 0.0
586 // We stop as there is no remaining budget to distribute.
587 void distributePressure(float RemainingPressure,
588                         llvm::SmallVector<uint16_t, 32> Subunits,
589                         llvm::SmallVector<float, 32> &DensePressure) {
590   // Find the number of subunits with minimal pressure (they are at the
591   // front).
592   llvm::sort(Subunits.begin(), Subunits.end(),
593              [&DensePressure](const uint16_t A, const uint16_t B) {
594                return DensePressure[A] < DensePressure[B];
595              });
596   const auto getPressureForSubunit = [&DensePressure,
597                                       &Subunits](size_t I) -> float & {
598     return DensePressure[Subunits[I]];
599   };
600   size_t NumMinimalSU = 1;
601   while (NumMinimalSU < Subunits.size() &&
602          getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) {
603     ++NumMinimalSU;
604   }
605   while (RemainingPressure > 0.0f) {
606     if (NumMinimalSU == Subunits.size()) {
607       // All units are minimal, just distribute evenly and be done.
608       for (size_t I = 0; I < NumMinimalSU; ++I) {
609         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
610       }
611       return;
612     }
613     // Distribute the remaining pressure equally.
614     const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1);
615     const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU);
616     assert(MinimalPressure < SecondToMinimalPressure);
617     const float Increment = SecondToMinimalPressure - MinimalPressure;
618     if (RemainingPressure <= NumMinimalSU * Increment) {
619       // There is not enough remaining pressure.
620       for (size_t I = 0; I < NumMinimalSU; ++I) {
621         getPressureForSubunit(I) += RemainingPressure / NumMinimalSU;
622       }
623       return;
624     }
625     // Bump all minimal pressure subunits to `SecondToMinimalPressure`.
626     for (size_t I = 0; I < NumMinimalSU; ++I) {
627       getPressureForSubunit(I) = SecondToMinimalPressure;
628       RemainingPressure -= SecondToMinimalPressure;
629     }
630     while (NumMinimalSU < Subunits.size() &&
631            getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) {
632       ++NumMinimalSU;
633     }
634   }
635 }
636 
637 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure(
638     const llvm::MCSchedModel &SM,
639     llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) {
640   // DensePressure[I] is the port pressure for Proc Resource I.
641   llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds());
642   llvm::sort(WPRS.begin(), WPRS.end(),
643              [](const llvm::MCWriteProcResEntry &A,
644                 const llvm::MCWriteProcResEntry &B) {
645                return A.ProcResourceIdx < B.ProcResourceIdx;
646              });
647   for (const llvm::MCWriteProcResEntry &WPR : WPRS) {
648     // Get units for the entry.
649     const llvm::MCProcResourceDesc *const ProcResDesc =
650         SM.getProcResource(WPR.ProcResourceIdx);
651     if (ProcResDesc->SubUnitsIdxBegin == nullptr) {
652       // This is a ProcResUnit.
653       DensePressure[WPR.ProcResourceIdx] += WPR.Cycles;
654     } else {
655       // This is a ProcResGroup.
656       llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin,
657                                                ProcResDesc->SubUnitsIdxBegin +
658                                                    ProcResDesc->NumUnits);
659       distributePressure(WPR.Cycles, Subunits, DensePressure);
660     }
661   }
662   // Turn dense pressure into sparse pressure by removing zero entries.
663   std::vector<std::pair<uint16_t, float>> Pressure;
664   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
665     if (DensePressure[I] > 0.0f)
666       Pressure.emplace_back(I, DensePressure[I]);
667   }
668   return Pressure;
669 }
670 
671 } // namespace exegesis
672