1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Analysis.h" 10 #include "BenchmarkResult.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/MC/MCAsmInfo.h" 13 #include "llvm/MC/MCTargetOptions.h" 14 #include "llvm/Support/FormatVariadic.h" 15 #include <limits> 16 #include <vector> 17 18 namespace llvm { 19 namespace exegesis { 20 21 static const char kCsvSep = ','; 22 23 namespace { 24 25 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; 26 27 template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S); 28 29 template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) { 30 if (!S.contains(kCsvSep)) { 31 OS << S; 32 } else { 33 // Needs escaping. 34 OS << '"'; 35 for (const char C : S) { 36 if (C == '"') 37 OS << "\"\""; 38 else 39 OS << C; 40 } 41 OS << '"'; 42 } 43 } 44 45 template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) { 46 for (const char C : S) { 47 if (C == '<') 48 OS << "<"; 49 else if (C == '>') 50 OS << ">"; 51 else if (C == '&') 52 OS << "&"; 53 else 54 OS << C; 55 } 56 } 57 58 template <> 59 void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) { 60 for (const char C : S) { 61 if (C == '"') 62 OS << "\\\""; 63 else 64 OS << C; 65 } 66 } 67 68 } // namespace 69 70 template <EscapeTag Tag> 71 static void 72 writeClusterId(raw_ostream &OS, 73 const BenchmarkClustering::ClusterId &CID) { 74 if (CID.isNoise()) 75 writeEscaped<Tag>(OS, "[noise]"); 76 else if (CID.isError()) 77 writeEscaped<Tag>(OS, "[error]"); 78 else 79 OS << CID.getId(); 80 } 81 82 template <EscapeTag Tag> 83 static void writeMeasurementValue(raw_ostream &OS, const double Value) { 84 // Given Value, if we wanted to serialize it to a string, 85 // how many base-10 digits will we need to store, max? 86 static constexpr auto MaxDigitCount = 87 std::numeric_limits<decltype(Value)>::max_digits10; 88 // Also, we will need a decimal separator. 89 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. 90 // So how long of a string will the serialization produce, max? 91 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; 92 93 // WARNING: when changing the format, also adjust the small-size estimate ^. 94 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); 95 96 writeEscaped<Tag>( 97 OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); 98 } 99 100 template <typename EscapeTag, EscapeTag Tag> 101 void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes, 102 const char *Separator) const { 103 SmallVector<std::string, 3> Lines; 104 // Parse the asm snippet and print it. 105 while (!Bytes.empty()) { 106 MCInst MI; 107 uint64_t MISize = 0; 108 if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) { 109 writeEscaped<Tag>(OS, join(Lines, Separator)); 110 writeEscaped<Tag>(OS, Separator); 111 writeEscaped<Tag>(OS, "[error decoding asm snippet]"); 112 return; 113 } 114 SmallString<128> InstPrinterStr; // FIXME: magic number. 115 raw_svector_ostream OSS(InstPrinterStr); 116 DisasmHelper_->printInst(&MI, OSS); 117 Bytes = Bytes.drop_front(MISize); 118 Lines.emplace_back(InstPrinterStr.str().trim()); 119 } 120 writeEscaped<Tag>(OS, join(Lines, Separator)); 121 } 122 123 // Prints a row representing an instruction, along with scheduling info and 124 // point coordinates (measurements). 125 void Analysis::printInstructionRowCsv(const size_t PointId, 126 raw_ostream &OS) const { 127 const Benchmark &Point = Clustering_.getPoints()[PointId]; 128 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); 129 OS << kCsvSep; 130 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; "); 131 OS << kCsvSep; 132 writeEscaped<kEscapeCsv>(OS, Point.Key.Config); 133 OS << kCsvSep; 134 assert(!Point.Key.Instructions.empty()); 135 const MCInst &MCI = Point.keyInstruction(); 136 unsigned SchedClassId; 137 std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( 138 State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); 139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 140 const MCSchedClassDesc *const SCDesc = 141 State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); 142 writeEscaped<kEscapeCsv>(OS, SCDesc->Name); 143 #else 144 OS << SchedClassId; 145 #endif 146 for (const auto &Measurement : Point.Measurements) { 147 OS << kCsvSep; 148 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); 149 } 150 OS << "\n"; 151 } 152 153 Analysis::Analysis(const LLVMState &State, 154 const BenchmarkClustering &Clustering, 155 double AnalysisInconsistencyEpsilon, 156 bool AnalysisDisplayUnstableOpcodes) 157 : Clustering_(Clustering), State_(State), 158 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * 159 AnalysisInconsistencyEpsilon), 160 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { 161 if (Clustering.getPoints().empty()) 162 return; 163 164 DisasmHelper_ = std::make_unique<DisassemblerHelper>(State); 165 } 166 167 template <> 168 Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const { 169 if (Clustering_.getPoints().empty()) 170 return Error::success(); 171 172 // Write the header. 173 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" 174 << kCsvSep << "sched_class"; 175 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { 176 OS << kCsvSep; 177 writeEscaped<kEscapeCsv>(OS, Measurement.Key); 178 } 179 OS << "\n"; 180 181 // Write the points. 182 for (const auto &ClusterIt : Clustering_.getValidClusters()) { 183 for (const size_t PointId : ClusterIt.PointIndices) { 184 printInstructionRowCsv(PointId, OS); 185 } 186 OS << "\n\n"; 187 } 188 return Error::success(); 189 } 190 191 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( 192 ResolvedSchedClass &&RSC) 193 : RSC(std::move(RSC)) {} 194 195 std::vector<Analysis::ResolvedSchedClassAndPoints> 196 Analysis::makePointsPerSchedClass() const { 197 std::vector<ResolvedSchedClassAndPoints> Entries; 198 // Maps SchedClassIds to index in result. 199 std::unordered_map<unsigned, size_t> SchedClassIdToIndex; 200 const auto &Points = Clustering_.getPoints(); 201 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { 202 const Benchmark &Point = Points[PointId]; 203 if (!Point.Error.empty()) 204 continue; 205 assert(!Point.Key.Instructions.empty()); 206 // FIXME: we should be using the tuple of classes for instructions in the 207 // snippet as key. 208 const MCInst &MCI = Point.keyInstruction(); 209 unsigned SchedClassId; 210 bool WasVariant; 211 std::tie(SchedClassId, WasVariant) = 212 ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(), 213 State_.getInstrInfo(), MCI); 214 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); 215 if (IndexIt == SchedClassIdToIndex.end()) { 216 // Create a new entry. 217 SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); 218 ResolvedSchedClassAndPoints Entry(ResolvedSchedClass( 219 State_.getSubtargetInfo(), SchedClassId, WasVariant)); 220 Entry.PointIds.push_back(PointId); 221 Entries.push_back(std::move(Entry)); 222 } else { 223 // Append to the existing entry. 224 Entries[IndexIt->second].PointIds.push_back(PointId); 225 } 226 } 227 return Entries; 228 } 229 230 // Parallel benchmarks repeat the same opcode multiple times. Just show this 231 // opcode and show the whole snippet only on hover. 232 static void writeParallelSnippetHtml(raw_ostream &OS, 233 const std::vector<MCInst> &Instructions, 234 const MCInstrInfo &InstrInfo) { 235 if (Instructions.empty()) 236 return; 237 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode())); 238 if (Instructions.size() > 1) 239 OS << " (x" << Instructions.size() << ")"; 240 } 241 242 // Latency tries to find a serial path. Just show the opcode path and show the 243 // whole snippet only on hover. 244 static void writeLatencySnippetHtml(raw_ostream &OS, 245 const std::vector<MCInst> &Instructions, 246 const MCInstrInfo &InstrInfo) { 247 bool First = true; 248 for (const MCInst &Instr : Instructions) { 249 if (First) 250 First = false; 251 else 252 OS << " → "; 253 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode())); 254 } 255 } 256 257 void Analysis::printPointHtml(const Benchmark &Point, raw_ostream &OS) const { 258 OS << "<li><span class=\"mono\" title=\""; 259 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n"); 260 OS << "\">"; 261 switch (Point.Mode) { 262 case Benchmark::Latency: 263 writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 264 break; 265 case Benchmark::Uops: 266 case Benchmark::InverseThroughput: 267 writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 268 break; 269 default: 270 llvm_unreachable("invalid mode"); 271 } 272 OS << "</span> <span class=\"mono\">"; 273 writeEscaped<kEscapeHtml>(OS, Point.Key.Config); 274 OS << "</span></li>"; 275 } 276 277 void Analysis::printSchedClassClustersHtml( 278 const std::vector<SchedClassCluster> &Clusters, 279 const ResolvedSchedClass &RSC, raw_ostream &OS) const { 280 const auto &Points = Clustering_.getPoints(); 281 OS << "<table class=\"sched-class-clusters\">"; 282 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 283 assert(!Clusters.empty()); 284 for (const auto &Measurement : 285 Points[Clusters[0].getPointIds()[0]].Measurements) { 286 OS << "<th>"; 287 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 288 OS << "</th>"; 289 } 290 OS << "</tr>"; 291 for (const SchedClassCluster &Cluster : Clusters) { 292 OS << "<tr class=\"" 293 << (Cluster.measurementsMatch(State_.getSubtargetInfo(), RSC, 294 Clustering_, 295 AnalysisInconsistencyEpsilonSquared_) 296 ? "good-cluster" 297 : "bad-cluster") 298 << "\"><td>"; 299 writeClusterId<kEscapeHtml>(OS, Cluster.id()); 300 OS << "</td><td><ul>"; 301 for (const size_t PointId : Cluster.getPointIds()) { 302 printPointHtml(Points[PointId], OS); 303 } 304 OS << "</ul></td>"; 305 for (const auto &Stats : Cluster.getCentroid().getStats()) { 306 OS << "<td class=\"measurement\">"; 307 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); 308 OS << "<br><span class=\"minmax\">["; 309 writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); 310 OS << ";"; 311 writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); 312 OS << "]</span></td>"; 313 } 314 OS << "</tr>"; 315 } 316 OS << "</table>"; 317 } 318 319 void Analysis::SchedClassCluster::addPoint( 320 size_t PointId, const BenchmarkClustering &Clustering) { 321 PointIds.push_back(PointId); 322 const auto &Point = Clustering.getPoints()[PointId]; 323 if (ClusterId.isUndef()) 324 ClusterId = Clustering.getClusterIdForPoint(PointId); 325 assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); 326 327 Centroid.addPoint(Point.Measurements); 328 } 329 330 bool Analysis::SchedClassCluster::measurementsMatch( 331 const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, 332 const BenchmarkClustering &Clustering, 333 const double AnalysisInconsistencyEpsilonSquared_) const { 334 assert(!Clustering.getPoints().empty()); 335 const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode; 336 337 if (!Centroid.validate(Mode)) 338 return false; 339 340 const std::vector<BenchmarkMeasure> ClusterCenterPoint = 341 Centroid.getAsPoint(); 342 343 const std::vector<BenchmarkMeasure> SchedClassPoint = 344 RSC.getAsPoint(Mode, STI, Centroid.getStats()); 345 if (SchedClassPoint.empty()) 346 return false; // In Uops mode validate() may not be enough. 347 348 assert(ClusterCenterPoint.size() == SchedClassPoint.size() && 349 "Expected measured/sched data dimensions to match."); 350 351 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, 352 AnalysisInconsistencyEpsilonSquared_); 353 } 354 355 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, 356 raw_ostream &OS) const { 357 OS << "<table class=\"sched-class-desc\">"; 358 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" 359 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the " 360 "idealized unit resource (port) pressure assuming ideal " 361 "distribution\">Idealized Resource Pressure</th></tr>"; 362 if (RSC.SCDesc->isValid()) { 363 const auto &SI = State_.getSubtargetInfo(); 364 const auto &SM = SI.getSchedModel(); 365 OS << "<tr><td>✔</td>"; 366 OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; 367 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; 368 // Latencies. 369 OS << "<td><ul>"; 370 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { 371 const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I); 372 OS << "<li>" << Entry->Cycles; 373 if (RSC.SCDesc->NumWriteLatencyEntries > 1) { 374 // Dismabiguate if more than 1 latency. 375 OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; 376 } 377 OS << "</li>"; 378 } 379 OS << "</ul></td>"; 380 // inverse throughput. 381 OS << "<td>"; 382 writeMeasurementValue<kEscapeHtml>( 383 OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc)); 384 OS << "</td>"; 385 // WriteProcRes. 386 OS << "<td><ul>"; 387 for (const auto &WPR : RSC.NonRedundantWriteProcRes) { 388 OS << "<li><span class=\"mono\">"; 389 writeEscaped<kEscapeHtml>(OS, 390 SM.getProcResource(WPR.ProcResourceIdx)->Name); 391 OS << "</span>: " << WPR.ReleaseAtCycle << "</li>"; 392 } 393 OS << "</ul></td>"; 394 // Idealized port pressure. 395 OS << "<td><ul>"; 396 for (const auto &Pressure : RSC.IdealizedProcResPressure) { 397 OS << "<li><span class=\"mono\">"; 398 writeEscaped<kEscapeHtml>( 399 OS, SI.getSchedModel().getProcResource(Pressure.first)->Name); 400 OS << "</span>: "; 401 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); 402 OS << "</li>"; 403 } 404 OS << "</ul></td>"; 405 OS << "</tr>"; 406 } else { 407 OS << "<tr><td>✕</td><td></td><td></td></tr>"; 408 } 409 OS << "</table>"; 410 } 411 412 void Analysis::printClusterRawHtml(const BenchmarkClustering::ClusterId &Id, 413 StringRef display_name, 414 raw_ostream &OS) const { 415 const auto &Points = Clustering_.getPoints(); 416 const auto &Cluster = Clustering_.getCluster(Id); 417 if (Cluster.PointIndices.empty()) 418 return; 419 420 OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster (" 421 << Cluster.PointIndices.size() << " points)</p>"; 422 OS << "<table class=\"sched-class-clusters\">"; 423 // Table Header. 424 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 425 for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { 426 OS << "<th>"; 427 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 428 OS << "</th>"; 429 } 430 OS << "</tr>"; 431 432 // Point data. 433 for (const auto &PointId : Cluster.PointIndices) { 434 OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>"; 435 printPointHtml(Points[PointId], OS); 436 OS << "</ul></td>"; 437 for (const auto &Measurement : Points[PointId].Measurements) { 438 OS << "<td class=\"measurement\">"; 439 writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue); 440 } 441 OS << "</tr>"; 442 } 443 OS << "</table>"; 444 445 OS << "</div>"; 446 447 } // namespace exegesis 448 449 static constexpr const char kHtmlHead[] = R"( 450 <head> 451 <title>llvm-exegesis Analysis Results</title> 452 <style> 453 body { 454 font-family: sans-serif 455 } 456 span.sched-class-name { 457 font-weight: bold; 458 font-family: monospace; 459 } 460 span.opcode { 461 font-family: monospace; 462 } 463 span.config { 464 font-family: monospace; 465 } 466 div.inconsistency { 467 margin-top: 50px; 468 } 469 table { 470 margin-left: 50px; 471 border-collapse: collapse; 472 } 473 table, table tr,td,th { 474 border: 1px solid #444; 475 } 476 table ul { 477 padding-left: 0px; 478 margin: 0px; 479 list-style-type: none; 480 } 481 table.sched-class-clusters td { 482 padding-left: 10px; 483 padding-right: 10px; 484 padding-top: 10px; 485 padding-bottom: 10px; 486 } 487 table.sched-class-desc td { 488 padding-left: 10px; 489 padding-right: 10px; 490 padding-top: 2px; 491 padding-bottom: 2px; 492 } 493 span.mono { 494 font-family: monospace; 495 } 496 td.measurement { 497 text-align: center; 498 } 499 tr.good-cluster td.measurement { 500 color: #292 501 } 502 tr.bad-cluster td.measurement { 503 color: #922 504 } 505 tr.good-cluster td.measurement span.minmax { 506 color: #888; 507 } 508 tr.bad-cluster td.measurement span.minmax { 509 color: #888; 510 } 511 </style> 512 </head> 513 )"; 514 515 template <> 516 Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( 517 raw_ostream &OS) const { 518 const auto &FirstPoint = Clustering_.getPoints()[0]; 519 // Print the header. 520 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; 521 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; 522 OS << "<h3>Triple: <span class=\"mono\">"; 523 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); 524 OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; 525 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); 526 OS << "</span></h3>"; 527 528 const auto &SI = State_.getSubtargetInfo(); 529 for (const auto &RSCAndPoints : makePointsPerSchedClass()) { 530 if (!RSCAndPoints.RSC.SCDesc) 531 continue; 532 // Bucket sched class points into sched class clusters. 533 std::vector<SchedClassCluster> SchedClassClusters; 534 for (const size_t PointId : RSCAndPoints.PointIds) { 535 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); 536 if (!ClusterId.isValid()) 537 continue; // Ignore noise and errors. FIXME: take noise into account ? 538 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) 539 continue; // Either display stable or unstable clusters only. 540 auto SchedClassClusterIt = 541 find_if(SchedClassClusters, [ClusterId](const SchedClassCluster &C) { 542 return C.id() == ClusterId; 543 }); 544 if (SchedClassClusterIt == SchedClassClusters.end()) { 545 SchedClassClusters.emplace_back(); 546 SchedClassClusterIt = std::prev(SchedClassClusters.end()); 547 } 548 SchedClassClusterIt->addPoint(PointId, Clustering_); 549 } 550 551 // Print any scheduling class that has at least one cluster that does not 552 // match the checked-in data. 553 if (all_of(SchedClassClusters, [this, &RSCAndPoints, 554 &SI](const SchedClassCluster &C) { 555 return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_, 556 AnalysisInconsistencyEpsilonSquared_); 557 })) 558 continue; // Nothing weird. 559 560 OS << "<div class=\"inconsistency\"><p>Sched Class <span " 561 "class=\"sched-class-name\">"; 562 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 563 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); 564 #else 565 OS << RSCAndPoints.RSC.SchedClassId; 566 #endif 567 OS << "</span> contains instructions whose performance characteristics do" 568 " not match that of LLVM:</p>"; 569 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); 570 OS << "<p>llvm SchedModel data:</p>"; 571 printSchedClassDescHtml(RSCAndPoints.RSC, OS); 572 OS << "</div>"; 573 } 574 575 printClusterRawHtml(BenchmarkClustering::ClusterId::noise(), 576 "[noise]", OS); 577 578 OS << "</body></html>"; 579 return Error::success(); 580 } 581 582 } // namespace exegesis 583 } // namespace llvm 584