1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Analysis.h" 10 #include "BenchmarkResult.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/MC/MCAsmInfo.h" 13 #include "llvm/MC/MCTargetOptions.h" 14 #include "llvm/Support/FormatVariadic.h" 15 #include <limits> 16 #include <vector> 17 18 namespace llvm { 19 namespace exegesis { 20 21 static const char kCsvSep = ','; 22 23 namespace { 24 25 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; 26 27 template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S); 28 29 template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) { 30 if (!S.contains(kCsvSep)) { 31 OS << S; 32 } else { 33 // Needs escaping. 34 OS << '"'; 35 for (const char C : S) { 36 if (C == '"') 37 OS << "\"\""; 38 else 39 OS << C; 40 } 41 OS << '"'; 42 } 43 } 44 45 template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) { 46 for (const char C : S) { 47 if (C == '<') 48 OS << "<"; 49 else if (C == '>') 50 OS << ">"; 51 else if (C == '&') 52 OS << "&"; 53 else 54 OS << C; 55 } 56 } 57 58 template <> 59 void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) { 60 for (const char C : S) { 61 if (C == '"') 62 OS << "\\\""; 63 else 64 OS << C; 65 } 66 } 67 68 } // namespace 69 70 template <EscapeTag Tag> 71 static void 72 writeClusterId(raw_ostream &OS, 73 const BenchmarkClustering::ClusterId &CID) { 74 if (CID.isNoise()) 75 writeEscaped<Tag>(OS, "[noise]"); 76 else if (CID.isError()) 77 writeEscaped<Tag>(OS, "[error]"); 78 else 79 OS << CID.getId(); 80 } 81 82 template <EscapeTag Tag> 83 static void writeMeasurementValue(raw_ostream &OS, const double Value) { 84 // Given Value, if we wanted to serialize it to a string, 85 // how many base-10 digits will we need to store, max? 86 static constexpr auto MaxDigitCount = 87 std::numeric_limits<decltype(Value)>::max_digits10; 88 // Also, we will need a decimal separator. 89 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. 90 // So how long of a string will the serialization produce, max? 91 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; 92 93 // WARNING: when changing the format, also adjust the small-size estimate ^. 94 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); 95 96 writeEscaped<Tag>( 97 OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); 98 } 99 100 template <typename EscapeTag, EscapeTag Tag> 101 void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes, 102 const char *Separator) const { 103 SmallVector<std::string, 3> Lines; 104 // Parse the asm snippet and print it. 105 while (!Bytes.empty()) { 106 MCInst MI; 107 uint64_t MISize = 0; 108 if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) { 109 writeEscaped<Tag>(OS, join(Lines, Separator)); 110 writeEscaped<Tag>(OS, Separator); 111 writeEscaped<Tag>(OS, "[error decoding asm snippet]"); 112 return; 113 } 114 SmallString<128> InstPrinterStr; // FIXME: magic number. 115 raw_svector_ostream OSS(InstPrinterStr); 116 DisasmHelper_->printInst(&MI, OSS); 117 Bytes = Bytes.drop_front(MISize); 118 Lines.emplace_back(InstPrinterStr.str().trim()); 119 } 120 writeEscaped<Tag>(OS, join(Lines, Separator)); 121 } 122 123 // Prints a row representing an instruction, along with scheduling info and 124 // point coordinates (measurements). 125 void Analysis::printInstructionRowCsv(const size_t PointId, 126 raw_ostream &OS) const { 127 const Benchmark &Point = Clustering_.getPoints()[PointId]; 128 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); 129 OS << kCsvSep; 130 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; "); 131 OS << kCsvSep; 132 writeEscaped<kEscapeCsv>(OS, Point.Key.Config); 133 OS << kCsvSep; 134 assert(!Point.Key.Instructions.empty()); 135 const MCInst &MCI = Point.keyInstruction(); 136 unsigned SchedClassId; 137 std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( 138 State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); 139 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 140 const MCSchedClassDesc *const SCDesc = 141 State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); 142 writeEscaped<kEscapeCsv>(OS, SCDesc->Name); 143 #else 144 OS << SchedClassId; 145 #endif 146 for (const auto &Measurement : Point.Measurements) { 147 OS << kCsvSep; 148 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); 149 } 150 OS << "\n"; 151 } 152 153 Analysis::Analysis(const LLVMState &State, 154 const BenchmarkClustering &Clustering, 155 double AnalysisInconsistencyEpsilon, 156 bool AnalysisDisplayUnstableOpcodes) 157 : Clustering_(Clustering), State_(State), 158 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * 159 AnalysisInconsistencyEpsilon), 160 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { 161 if (Clustering.getPoints().empty()) 162 return; 163 164 DisasmHelper_ = std::make_unique<DisassemblerHelper>(State); 165 } 166 167 template <> 168 Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const { 169 if (Clustering_.getPoints().empty()) 170 return Error::success(); 171 172 // Write the header. 173 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" 174 << kCsvSep << "sched_class"; 175 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { 176 OS << kCsvSep; 177 writeEscaped<kEscapeCsv>(OS, Measurement.Key); 178 } 179 OS << "\n"; 180 181 // Write the points. 182 for (const auto &ClusterIt : Clustering_.getValidClusters()) { 183 for (const size_t PointId : ClusterIt.PointIndices) { 184 printInstructionRowCsv(PointId, OS); 185 } 186 OS << "\n\n"; 187 } 188 return Error::success(); 189 } 190 191 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( 192 ResolvedSchedClass &&RSC) 193 : RSC(std::move(RSC)) {} 194 195 std::vector<Analysis::ResolvedSchedClassAndPoints> 196 Analysis::makePointsPerSchedClass() const { 197 std::vector<ResolvedSchedClassAndPoints> Entries; 198 // Maps SchedClassIds to index in result. 199 std::unordered_map<unsigned, size_t> SchedClassIdToIndex; 200 const auto &Points = Clustering_.getPoints(); 201 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { 202 const Benchmark &Point = Points[PointId]; 203 if (!Point.Error.empty()) 204 continue; 205 assert(!Point.Key.Instructions.empty()); 206 // FIXME: we should be using the tuple of classes for instructions in the 207 // snippet as key. 208 const MCInst &MCI = Point.keyInstruction(); 209 unsigned SchedClassId; 210 bool WasVariant; 211 std::tie(SchedClassId, WasVariant) = 212 ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(), 213 State_.getInstrInfo(), MCI); 214 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); 215 if (IndexIt == SchedClassIdToIndex.end()) { 216 // Create a new entry. 217 SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); 218 ResolvedSchedClassAndPoints Entry(ResolvedSchedClass( 219 State_.getSubtargetInfo(), SchedClassId, WasVariant)); 220 Entry.PointIds.push_back(PointId); 221 Entries.push_back(std::move(Entry)); 222 } else { 223 // Append to the existing entry. 224 Entries[IndexIt->second].PointIds.push_back(PointId); 225 } 226 } 227 return Entries; 228 } 229 230 // Parallel benchmarks repeat the same opcode multiple times. Just show this 231 // opcode and show the whole snippet only on hover. 232 static void writeParallelSnippetHtml(raw_ostream &OS, 233 const std::vector<MCInst> &Instructions, 234 const MCInstrInfo &InstrInfo) { 235 if (Instructions.empty()) 236 return; 237 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode())); 238 if (Instructions.size() > 1) 239 OS << " (x" << Instructions.size() << ")"; 240 } 241 242 // Latency tries to find a serial path. Just show the opcode path and show the 243 // whole snippet only on hover. 244 static void writeLatencySnippetHtml(raw_ostream &OS, 245 const std::vector<MCInst> &Instructions, 246 const MCInstrInfo &InstrInfo) { 247 bool First = true; 248 for (const MCInst &Instr : Instructions) { 249 if (First) 250 First = false; 251 else 252 OS << " → "; 253 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode())); 254 } 255 } 256 257 void Analysis::printPointHtml(const Benchmark &Point, 258 llvm::raw_ostream &OS) const { 259 OS << "<li><span class=\"mono\" title=\""; 260 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n"); 261 OS << "\">"; 262 switch (Point.Mode) { 263 case Benchmark::Latency: 264 writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 265 break; 266 case Benchmark::Uops: 267 case Benchmark::InverseThroughput: 268 writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 269 break; 270 default: 271 llvm_unreachable("invalid mode"); 272 } 273 OS << "</span> <span class=\"mono\">"; 274 writeEscaped<kEscapeHtml>(OS, Point.Key.Config); 275 OS << "</span></li>"; 276 } 277 278 void Analysis::printSchedClassClustersHtml( 279 const std::vector<SchedClassCluster> &Clusters, 280 const ResolvedSchedClass &RSC, raw_ostream &OS) const { 281 const auto &Points = Clustering_.getPoints(); 282 OS << "<table class=\"sched-class-clusters\">"; 283 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 284 assert(!Clusters.empty()); 285 for (const auto &Measurement : 286 Points[Clusters[0].getPointIds()[0]].Measurements) { 287 OS << "<th>"; 288 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 289 OS << "</th>"; 290 } 291 OS << "</tr>"; 292 for (const SchedClassCluster &Cluster : Clusters) { 293 OS << "<tr class=\"" 294 << (Cluster.measurementsMatch(State_.getSubtargetInfo(), RSC, 295 Clustering_, 296 AnalysisInconsistencyEpsilonSquared_) 297 ? "good-cluster" 298 : "bad-cluster") 299 << "\"><td>"; 300 writeClusterId<kEscapeHtml>(OS, Cluster.id()); 301 OS << "</td><td><ul>"; 302 for (const size_t PointId : Cluster.getPointIds()) { 303 printPointHtml(Points[PointId], OS); 304 } 305 OS << "</ul></td>"; 306 for (const auto &Stats : Cluster.getCentroid().getStats()) { 307 OS << "<td class=\"measurement\">"; 308 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); 309 OS << "<br><span class=\"minmax\">["; 310 writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); 311 OS << ";"; 312 writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); 313 OS << "]</span></td>"; 314 } 315 OS << "</tr>"; 316 } 317 OS << "</table>"; 318 } 319 320 void Analysis::SchedClassCluster::addPoint( 321 size_t PointId, const BenchmarkClustering &Clustering) { 322 PointIds.push_back(PointId); 323 const auto &Point = Clustering.getPoints()[PointId]; 324 if (ClusterId.isUndef()) 325 ClusterId = Clustering.getClusterIdForPoint(PointId); 326 assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); 327 328 Centroid.addPoint(Point.Measurements); 329 } 330 331 bool Analysis::SchedClassCluster::measurementsMatch( 332 const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, 333 const BenchmarkClustering &Clustering, 334 const double AnalysisInconsistencyEpsilonSquared_) const { 335 assert(!Clustering.getPoints().empty()); 336 const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode; 337 338 if (!Centroid.validate(Mode)) 339 return false; 340 341 const std::vector<BenchmarkMeasure> ClusterCenterPoint = 342 Centroid.getAsPoint(); 343 344 const std::vector<BenchmarkMeasure> SchedClassPoint = 345 RSC.getAsPoint(Mode, STI, Centroid.getStats()); 346 if (SchedClassPoint.empty()) 347 return false; // In Uops mode validate() may not be enough. 348 349 assert(ClusterCenterPoint.size() == SchedClassPoint.size() && 350 "Expected measured/sched data dimensions to match."); 351 352 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, 353 AnalysisInconsistencyEpsilonSquared_); 354 } 355 356 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, 357 raw_ostream &OS) const { 358 OS << "<table class=\"sched-class-desc\">"; 359 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" 360 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the " 361 "idealized unit resource (port) pressure assuming ideal " 362 "distribution\">Idealized Resource Pressure</th></tr>"; 363 if (RSC.SCDesc->isValid()) { 364 const auto &SI = State_.getSubtargetInfo(); 365 const auto &SM = SI.getSchedModel(); 366 OS << "<tr><td>✔</td>"; 367 OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; 368 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; 369 // Latencies. 370 OS << "<td><ul>"; 371 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { 372 const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I); 373 OS << "<li>" << Entry->Cycles; 374 if (RSC.SCDesc->NumWriteLatencyEntries > 1) { 375 // Dismabiguate if more than 1 latency. 376 OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; 377 } 378 OS << "</li>"; 379 } 380 OS << "</ul></td>"; 381 // inverse throughput. 382 OS << "<td>"; 383 writeMeasurementValue<kEscapeHtml>( 384 OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc)); 385 OS << "</td>"; 386 // WriteProcRes. 387 OS << "<td><ul>"; 388 for (const auto &WPR : RSC.NonRedundantWriteProcRes) { 389 OS << "<li><span class=\"mono\">"; 390 writeEscaped<kEscapeHtml>(OS, 391 SM.getProcResource(WPR.ProcResourceIdx)->Name); 392 OS << "</span>: " << WPR.ReleaseAtCycle << "</li>"; 393 } 394 OS << "</ul></td>"; 395 // Idealized port pressure. 396 OS << "<td><ul>"; 397 for (const auto &Pressure : RSC.IdealizedProcResPressure) { 398 OS << "<li><span class=\"mono\">"; 399 writeEscaped<kEscapeHtml>( 400 OS, SI.getSchedModel().getProcResource(Pressure.first)->Name); 401 OS << "</span>: "; 402 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); 403 OS << "</li>"; 404 } 405 OS << "</ul></td>"; 406 OS << "</tr>"; 407 } else { 408 OS << "<tr><td>✕</td><td></td><td></td></tr>"; 409 } 410 OS << "</table>"; 411 } 412 413 void Analysis::printClusterRawHtml( 414 const BenchmarkClustering::ClusterId &Id, StringRef display_name, 415 llvm::raw_ostream &OS) const { 416 const auto &Points = Clustering_.getPoints(); 417 const auto &Cluster = Clustering_.getCluster(Id); 418 if (Cluster.PointIndices.empty()) 419 return; 420 421 OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster (" 422 << Cluster.PointIndices.size() << " points)</p>"; 423 OS << "<table class=\"sched-class-clusters\">"; 424 // Table Header. 425 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 426 for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { 427 OS << "<th>"; 428 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 429 OS << "</th>"; 430 } 431 OS << "</tr>"; 432 433 // Point data. 434 for (const auto &PointId : Cluster.PointIndices) { 435 OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>"; 436 printPointHtml(Points[PointId], OS); 437 OS << "</ul></td>"; 438 for (const auto &Measurement : Points[PointId].Measurements) { 439 OS << "<td class=\"measurement\">"; 440 writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue); 441 } 442 OS << "</tr>"; 443 } 444 OS << "</table>"; 445 446 OS << "</div>"; 447 448 } // namespace exegesis 449 450 static constexpr const char kHtmlHead[] = R"( 451 <head> 452 <title>llvm-exegesis Analysis Results</title> 453 <style> 454 body { 455 font-family: sans-serif 456 } 457 span.sched-class-name { 458 font-weight: bold; 459 font-family: monospace; 460 } 461 span.opcode { 462 font-family: monospace; 463 } 464 span.config { 465 font-family: monospace; 466 } 467 div.inconsistency { 468 margin-top: 50px; 469 } 470 table { 471 margin-left: 50px; 472 border-collapse: collapse; 473 } 474 table, table tr,td,th { 475 border: 1px solid #444; 476 } 477 table ul { 478 padding-left: 0px; 479 margin: 0px; 480 list-style-type: none; 481 } 482 table.sched-class-clusters td { 483 padding-left: 10px; 484 padding-right: 10px; 485 padding-top: 10px; 486 padding-bottom: 10px; 487 } 488 table.sched-class-desc td { 489 padding-left: 10px; 490 padding-right: 10px; 491 padding-top: 2px; 492 padding-bottom: 2px; 493 } 494 span.mono { 495 font-family: monospace; 496 } 497 td.measurement { 498 text-align: center; 499 } 500 tr.good-cluster td.measurement { 501 color: #292 502 } 503 tr.bad-cluster td.measurement { 504 color: #922 505 } 506 tr.good-cluster td.measurement span.minmax { 507 color: #888; 508 } 509 tr.bad-cluster td.measurement span.minmax { 510 color: #888; 511 } 512 </style> 513 </head> 514 )"; 515 516 template <> 517 Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( 518 raw_ostream &OS) const { 519 const auto &FirstPoint = Clustering_.getPoints()[0]; 520 // Print the header. 521 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; 522 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; 523 OS << "<h3>Triple: <span class=\"mono\">"; 524 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); 525 OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; 526 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); 527 OS << "</span></h3>"; 528 529 const auto &SI = State_.getSubtargetInfo(); 530 for (const auto &RSCAndPoints : makePointsPerSchedClass()) { 531 if (!RSCAndPoints.RSC.SCDesc) 532 continue; 533 // Bucket sched class points into sched class clusters. 534 std::vector<SchedClassCluster> SchedClassClusters; 535 for (const size_t PointId : RSCAndPoints.PointIds) { 536 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); 537 if (!ClusterId.isValid()) 538 continue; // Ignore noise and errors. FIXME: take noise into account ? 539 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) 540 continue; // Either display stable or unstable clusters only. 541 auto SchedClassClusterIt = llvm::find_if( 542 SchedClassClusters, [ClusterId](const SchedClassCluster &C) { 543 return C.id() == ClusterId; 544 }); 545 if (SchedClassClusterIt == SchedClassClusters.end()) { 546 SchedClassClusters.emplace_back(); 547 SchedClassClusterIt = std::prev(SchedClassClusters.end()); 548 } 549 SchedClassClusterIt->addPoint(PointId, Clustering_); 550 } 551 552 // Print any scheduling class that has at least one cluster that does not 553 // match the checked-in data. 554 if (all_of(SchedClassClusters, [this, &RSCAndPoints, 555 &SI](const SchedClassCluster &C) { 556 return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_, 557 AnalysisInconsistencyEpsilonSquared_); 558 })) 559 continue; // Nothing weird. 560 561 OS << "<div class=\"inconsistency\"><p>Sched Class <span " 562 "class=\"sched-class-name\">"; 563 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 564 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); 565 #else 566 OS << RSCAndPoints.RSC.SchedClassId; 567 #endif 568 OS << "</span> contains instructions whose performance characteristics do" 569 " not match that of LLVM:</p>"; 570 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); 571 OS << "<p>llvm SchedModel data:</p>"; 572 printSchedClassDescHtml(RSCAndPoints.RSC, OS); 573 OS << "</div>"; 574 } 575 576 printClusterRawHtml(BenchmarkClustering::ClusterId::noise(), 577 "[noise]", OS); 578 579 OS << "</body></html>"; 580 return Error::success(); 581 } 582 583 } // namespace exegesis 584 } // namespace llvm 585