1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Analysis.h" 10 #include "BenchmarkResult.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/MC/MCAsmInfo.h" 13 #include "llvm/MC/MCTargetOptions.h" 14 #include "llvm/Support/FormatVariadic.h" 15 #include <limits> 16 #include <unordered_set> 17 #include <vector> 18 19 namespace llvm { 20 namespace exegesis { 21 22 static const char kCsvSep = ','; 23 24 namespace { 25 26 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; 27 28 template <EscapeTag Tag> void writeEscaped(raw_ostream &OS, const StringRef S); 29 30 template <> void writeEscaped<kEscapeCsv>(raw_ostream &OS, const StringRef S) { 31 if (!S.contains(kCsvSep)) { 32 OS << S; 33 } else { 34 // Needs escaping. 35 OS << '"'; 36 for (const char C : S) { 37 if (C == '"') 38 OS << "\"\""; 39 else 40 OS << C; 41 } 42 OS << '"'; 43 } 44 } 45 46 template <> void writeEscaped<kEscapeHtml>(raw_ostream &OS, const StringRef S) { 47 for (const char C : S) { 48 if (C == '<') 49 OS << "<"; 50 else if (C == '>') 51 OS << ">"; 52 else if (C == '&') 53 OS << "&"; 54 else 55 OS << C; 56 } 57 } 58 59 template <> 60 void writeEscaped<kEscapeHtmlString>(raw_ostream &OS, const StringRef S) { 61 for (const char C : S) { 62 if (C == '"') 63 OS << "\\\""; 64 else 65 OS << C; 66 } 67 } 68 69 } // namespace 70 71 template <EscapeTag Tag> 72 static void 73 writeClusterId(raw_ostream &OS, 74 const BenchmarkClustering::ClusterId &CID) { 75 if (CID.isNoise()) 76 writeEscaped<Tag>(OS, "[noise]"); 77 else if (CID.isError()) 78 writeEscaped<Tag>(OS, "[error]"); 79 else 80 OS << CID.getId(); 81 } 82 83 template <EscapeTag Tag> 84 static void writeMeasurementValue(raw_ostream &OS, const double Value) { 85 // Given Value, if we wanted to serialize it to a string, 86 // how many base-10 digits will we need to store, max? 87 static constexpr auto MaxDigitCount = 88 std::numeric_limits<decltype(Value)>::max_digits10; 89 // Also, we will need a decimal separator. 90 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. 91 // So how long of a string will the serialization produce, max? 92 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; 93 94 // WARNING: when changing the format, also adjust the small-size estimate ^. 95 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); 96 97 writeEscaped<Tag>( 98 OS, formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); 99 } 100 101 template <typename EscapeTag, EscapeTag Tag> 102 void Analysis::writeSnippet(raw_ostream &OS, ArrayRef<uint8_t> Bytes, 103 const char *Separator) const { 104 SmallVector<std::string, 3> Lines; 105 // Parse the asm snippet and print it. 106 while (!Bytes.empty()) { 107 MCInst MI; 108 uint64_t MISize = 0; 109 if (!DisasmHelper_->decodeInst(MI, MISize, Bytes)) { 110 writeEscaped<Tag>(OS, join(Lines, Separator)); 111 writeEscaped<Tag>(OS, Separator); 112 writeEscaped<Tag>(OS, "[error decoding asm snippet]"); 113 return; 114 } 115 SmallString<128> InstPrinterStr; // FIXME: magic number. 116 raw_svector_ostream OSS(InstPrinterStr); 117 DisasmHelper_->printInst(&MI, OSS); 118 Bytes = Bytes.drop_front(MISize); 119 Lines.emplace_back(InstPrinterStr.str().trim()); 120 } 121 writeEscaped<Tag>(OS, join(Lines, Separator)); 122 } 123 124 // Prints a row representing an instruction, along with scheduling info and 125 // point coordinates (measurements). 126 void Analysis::printInstructionRowCsv(const size_t PointId, 127 raw_ostream &OS) const { 128 const Benchmark &Point = Clustering_.getPoints()[PointId]; 129 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); 130 OS << kCsvSep; 131 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; "); 132 OS << kCsvSep; 133 writeEscaped<kEscapeCsv>(OS, Point.Key.Config); 134 OS << kCsvSep; 135 assert(!Point.Key.Instructions.empty()); 136 const MCInst &MCI = Point.keyInstruction(); 137 unsigned SchedClassId; 138 std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( 139 State_.getSubtargetInfo(), State_.getInstrInfo(), MCI); 140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 141 const MCSchedClassDesc *const SCDesc = 142 State_.getSubtargetInfo().getSchedModel().getSchedClassDesc(SchedClassId); 143 writeEscaped<kEscapeCsv>(OS, SCDesc->Name); 144 #else 145 OS << SchedClassId; 146 #endif 147 for (const auto &Measurement : Point.Measurements) { 148 OS << kCsvSep; 149 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); 150 } 151 OS << "\n"; 152 } 153 154 Analysis::Analysis(const LLVMState &State, 155 const BenchmarkClustering &Clustering, 156 double AnalysisInconsistencyEpsilon, 157 bool AnalysisDisplayUnstableOpcodes) 158 : Clustering_(Clustering), State_(State), 159 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * 160 AnalysisInconsistencyEpsilon), 161 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { 162 if (Clustering.getPoints().empty()) 163 return; 164 165 DisasmHelper_ = std::make_unique<DisassemblerHelper>(State); 166 } 167 168 template <> 169 Error Analysis::run<Analysis::PrintClusters>(raw_ostream &OS) const { 170 if (Clustering_.getPoints().empty()) 171 return Error::success(); 172 173 // Write the header. 174 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" 175 << kCsvSep << "sched_class"; 176 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { 177 OS << kCsvSep; 178 writeEscaped<kEscapeCsv>(OS, Measurement.Key); 179 } 180 OS << "\n"; 181 182 // Write the points. 183 for (const auto &ClusterIt : Clustering_.getValidClusters()) { 184 for (const size_t PointId : ClusterIt.PointIndices) { 185 printInstructionRowCsv(PointId, OS); 186 } 187 OS << "\n\n"; 188 } 189 return Error::success(); 190 } 191 192 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( 193 ResolvedSchedClass &&RSC) 194 : RSC(std::move(RSC)) {} 195 196 std::vector<Analysis::ResolvedSchedClassAndPoints> 197 Analysis::makePointsPerSchedClass() const { 198 std::vector<ResolvedSchedClassAndPoints> Entries; 199 // Maps SchedClassIds to index in result. 200 std::unordered_map<unsigned, size_t> SchedClassIdToIndex; 201 const auto &Points = Clustering_.getPoints(); 202 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { 203 const Benchmark &Point = Points[PointId]; 204 if (!Point.Error.empty()) 205 continue; 206 assert(!Point.Key.Instructions.empty()); 207 // FIXME: we should be using the tuple of classes for instructions in the 208 // snippet as key. 209 const MCInst &MCI = Point.keyInstruction(); 210 unsigned SchedClassId; 211 bool WasVariant; 212 std::tie(SchedClassId, WasVariant) = 213 ResolvedSchedClass::resolveSchedClassId(State_.getSubtargetInfo(), 214 State_.getInstrInfo(), MCI); 215 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); 216 if (IndexIt == SchedClassIdToIndex.end()) { 217 // Create a new entry. 218 SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); 219 ResolvedSchedClassAndPoints Entry(ResolvedSchedClass( 220 State_.getSubtargetInfo(), SchedClassId, WasVariant)); 221 Entry.PointIds.push_back(PointId); 222 Entries.push_back(std::move(Entry)); 223 } else { 224 // Append to the existing entry. 225 Entries[IndexIt->second].PointIds.push_back(PointId); 226 } 227 } 228 return Entries; 229 } 230 231 // Parallel benchmarks repeat the same opcode multiple times. Just show this 232 // opcode and show the whole snippet only on hover. 233 static void writeParallelSnippetHtml(raw_ostream &OS, 234 const std::vector<MCInst> &Instructions, 235 const MCInstrInfo &InstrInfo) { 236 if (Instructions.empty()) 237 return; 238 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode())); 239 if (Instructions.size() > 1) 240 OS << " (x" << Instructions.size() << ")"; 241 } 242 243 // Latency tries to find a serial path. Just show the opcode path and show the 244 // whole snippet only on hover. 245 static void writeLatencySnippetHtml(raw_ostream &OS, 246 const std::vector<MCInst> &Instructions, 247 const MCInstrInfo &InstrInfo) { 248 bool First = true; 249 for (const MCInst &Instr : Instructions) { 250 if (First) 251 First = false; 252 else 253 OS << " → "; 254 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode())); 255 } 256 } 257 258 void Analysis::printPointHtml(const Benchmark &Point, 259 llvm::raw_ostream &OS) const { 260 OS << "<li><span class=\"mono\" title=\""; 261 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, "\n"); 262 OS << "\">"; 263 switch (Point.Mode) { 264 case Benchmark::Latency: 265 writeLatencySnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 266 break; 267 case Benchmark::Uops: 268 case Benchmark::InverseThroughput: 269 writeParallelSnippetHtml(OS, Point.Key.Instructions, State_.getInstrInfo()); 270 break; 271 default: 272 llvm_unreachable("invalid mode"); 273 } 274 OS << "</span> <span class=\"mono\">"; 275 writeEscaped<kEscapeHtml>(OS, Point.Key.Config); 276 OS << "</span></li>"; 277 } 278 279 void Analysis::printSchedClassClustersHtml( 280 const std::vector<SchedClassCluster> &Clusters, 281 const ResolvedSchedClass &RSC, raw_ostream &OS) const { 282 const auto &Points = Clustering_.getPoints(); 283 OS << "<table class=\"sched-class-clusters\">"; 284 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 285 assert(!Clusters.empty()); 286 for (const auto &Measurement : 287 Points[Clusters[0].getPointIds()[0]].Measurements) { 288 OS << "<th>"; 289 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 290 OS << "</th>"; 291 } 292 OS << "</tr>"; 293 for (const SchedClassCluster &Cluster : Clusters) { 294 OS << "<tr class=\"" 295 << (Cluster.measurementsMatch(State_.getSubtargetInfo(), RSC, 296 Clustering_, 297 AnalysisInconsistencyEpsilonSquared_) 298 ? "good-cluster" 299 : "bad-cluster") 300 << "\"><td>"; 301 writeClusterId<kEscapeHtml>(OS, Cluster.id()); 302 OS << "</td><td><ul>"; 303 for (const size_t PointId : Cluster.getPointIds()) { 304 printPointHtml(Points[PointId], OS); 305 } 306 OS << "</ul></td>"; 307 for (const auto &Stats : Cluster.getCentroid().getStats()) { 308 OS << "<td class=\"measurement\">"; 309 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); 310 OS << "<br><span class=\"minmax\">["; 311 writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); 312 OS << ";"; 313 writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); 314 OS << "]</span></td>"; 315 } 316 OS << "</tr>"; 317 } 318 OS << "</table>"; 319 } 320 321 void Analysis::SchedClassCluster::addPoint( 322 size_t PointId, const BenchmarkClustering &Clustering) { 323 PointIds.push_back(PointId); 324 const auto &Point = Clustering.getPoints()[PointId]; 325 if (ClusterId.isUndef()) 326 ClusterId = Clustering.getClusterIdForPoint(PointId); 327 assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); 328 329 Centroid.addPoint(Point.Measurements); 330 } 331 332 bool Analysis::SchedClassCluster::measurementsMatch( 333 const MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, 334 const BenchmarkClustering &Clustering, 335 const double AnalysisInconsistencyEpsilonSquared_) const { 336 assert(!Clustering.getPoints().empty()); 337 const Benchmark::ModeE Mode = Clustering.getPoints()[0].Mode; 338 339 if (!Centroid.validate(Mode)) 340 return false; 341 342 const std::vector<BenchmarkMeasure> ClusterCenterPoint = 343 Centroid.getAsPoint(); 344 345 const std::vector<BenchmarkMeasure> SchedClassPoint = 346 RSC.getAsPoint(Mode, STI, Centroid.getStats()); 347 if (SchedClassPoint.empty()) 348 return false; // In Uops mode validate() may not be enough. 349 350 assert(ClusterCenterPoint.size() == SchedClassPoint.size() && 351 "Expected measured/sched data dimensions to match."); 352 353 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, 354 AnalysisInconsistencyEpsilonSquared_); 355 } 356 357 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, 358 raw_ostream &OS) const { 359 OS << "<table class=\"sched-class-desc\">"; 360 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" 361 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the " 362 "idealized unit resource (port) pressure assuming ideal " 363 "distribution\">Idealized Resource Pressure</th></tr>"; 364 if (RSC.SCDesc->isValid()) { 365 const auto &SI = State_.getSubtargetInfo(); 366 const auto &SM = SI.getSchedModel(); 367 OS << "<tr><td>✔</td>"; 368 OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; 369 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; 370 // Latencies. 371 OS << "<td><ul>"; 372 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { 373 const auto *const Entry = SI.getWriteLatencyEntry(RSC.SCDesc, I); 374 OS << "<li>" << Entry->Cycles; 375 if (RSC.SCDesc->NumWriteLatencyEntries > 1) { 376 // Dismabiguate if more than 1 latency. 377 OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; 378 } 379 OS << "</li>"; 380 } 381 OS << "</ul></td>"; 382 // inverse throughput. 383 OS << "<td>"; 384 writeMeasurementValue<kEscapeHtml>( 385 OS, MCSchedModel::getReciprocalThroughput(SI, *RSC.SCDesc)); 386 OS << "</td>"; 387 // WriteProcRes. 388 OS << "<td><ul>"; 389 for (const auto &WPR : RSC.NonRedundantWriteProcRes) { 390 OS << "<li><span class=\"mono\">"; 391 writeEscaped<kEscapeHtml>(OS, 392 SM.getProcResource(WPR.ProcResourceIdx)->Name); 393 OS << "</span>: " << WPR.Cycles << "</li>"; 394 } 395 OS << "</ul></td>"; 396 // Idealized port pressure. 397 OS << "<td><ul>"; 398 for (const auto &Pressure : RSC.IdealizedProcResPressure) { 399 OS << "<li><span class=\"mono\">"; 400 writeEscaped<kEscapeHtml>( 401 OS, SI.getSchedModel().getProcResource(Pressure.first)->Name); 402 OS << "</span>: "; 403 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); 404 OS << "</li>"; 405 } 406 OS << "</ul></td>"; 407 OS << "</tr>"; 408 } else { 409 OS << "<tr><td>✕</td><td></td><td></td></tr>"; 410 } 411 OS << "</table>"; 412 } 413 414 void Analysis::printClusterRawHtml( 415 const BenchmarkClustering::ClusterId &Id, StringRef display_name, 416 llvm::raw_ostream &OS) const { 417 const auto &Points = Clustering_.getPoints(); 418 const auto &Cluster = Clustering_.getCluster(Id); 419 if (Cluster.PointIndices.empty()) 420 return; 421 422 OS << "<div class=\"inconsistency\"><p>" << display_name << " Cluster (" 423 << Cluster.PointIndices.size() << " points)</p>"; 424 OS << "<table class=\"sched-class-clusters\">"; 425 // Table Header. 426 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 427 for (const auto &Measurement : Points[Cluster.PointIndices[0]].Measurements) { 428 OS << "<th>"; 429 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 430 OS << "</th>"; 431 } 432 OS << "</tr>"; 433 434 // Point data. 435 for (const auto &PointId : Cluster.PointIndices) { 436 OS << "<tr class=\"bad-cluster\"><td>" << display_name << "</td><td><ul>"; 437 printPointHtml(Points[PointId], OS); 438 OS << "</ul></td>"; 439 for (const auto &Measurement : Points[PointId].Measurements) { 440 OS << "<td class=\"measurement\">"; 441 writeMeasurementValue<kEscapeHtml>(OS, Measurement.PerInstructionValue); 442 } 443 OS << "</tr>"; 444 } 445 OS << "</table>"; 446 447 OS << "</div>"; 448 449 } // namespace exegesis 450 451 static constexpr const char kHtmlHead[] = R"( 452 <head> 453 <title>llvm-exegesis Analysis Results</title> 454 <style> 455 body { 456 font-family: sans-serif 457 } 458 span.sched-class-name { 459 font-weight: bold; 460 font-family: monospace; 461 } 462 span.opcode { 463 font-family: monospace; 464 } 465 span.config { 466 font-family: monospace; 467 } 468 div.inconsistency { 469 margin-top: 50px; 470 } 471 table { 472 margin-left: 50px; 473 border-collapse: collapse; 474 } 475 table, table tr,td,th { 476 border: 1px solid #444; 477 } 478 table ul { 479 padding-left: 0px; 480 margin: 0px; 481 list-style-type: none; 482 } 483 table.sched-class-clusters td { 484 padding-left: 10px; 485 padding-right: 10px; 486 padding-top: 10px; 487 padding-bottom: 10px; 488 } 489 table.sched-class-desc td { 490 padding-left: 10px; 491 padding-right: 10px; 492 padding-top: 2px; 493 padding-bottom: 2px; 494 } 495 span.mono { 496 font-family: monospace; 497 } 498 td.measurement { 499 text-align: center; 500 } 501 tr.good-cluster td.measurement { 502 color: #292 503 } 504 tr.bad-cluster td.measurement { 505 color: #922 506 } 507 tr.good-cluster td.measurement span.minmax { 508 color: #888; 509 } 510 tr.bad-cluster td.measurement span.minmax { 511 color: #888; 512 } 513 </style> 514 </head> 515 )"; 516 517 template <> 518 Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( 519 raw_ostream &OS) const { 520 const auto &FirstPoint = Clustering_.getPoints()[0]; 521 // Print the header. 522 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; 523 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; 524 OS << "<h3>Triple: <span class=\"mono\">"; 525 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); 526 OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; 527 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); 528 OS << "</span></h3>"; 529 530 const auto &SI = State_.getSubtargetInfo(); 531 for (const auto &RSCAndPoints : makePointsPerSchedClass()) { 532 if (!RSCAndPoints.RSC.SCDesc) 533 continue; 534 // Bucket sched class points into sched class clusters. 535 std::vector<SchedClassCluster> SchedClassClusters; 536 for (const size_t PointId : RSCAndPoints.PointIds) { 537 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); 538 if (!ClusterId.isValid()) 539 continue; // Ignore noise and errors. FIXME: take noise into account ? 540 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) 541 continue; // Either display stable or unstable clusters only. 542 auto SchedClassClusterIt = llvm::find_if( 543 SchedClassClusters, [ClusterId](const SchedClassCluster &C) { 544 return C.id() == ClusterId; 545 }); 546 if (SchedClassClusterIt == SchedClassClusters.end()) { 547 SchedClassClusters.emplace_back(); 548 SchedClassClusterIt = std::prev(SchedClassClusters.end()); 549 } 550 SchedClassClusterIt->addPoint(PointId, Clustering_); 551 } 552 553 // Print any scheduling class that has at least one cluster that does not 554 // match the checked-in data. 555 if (all_of(SchedClassClusters, [this, &RSCAndPoints, 556 &SI](const SchedClassCluster &C) { 557 return C.measurementsMatch(SI, RSCAndPoints.RSC, Clustering_, 558 AnalysisInconsistencyEpsilonSquared_); 559 })) 560 continue; // Nothing weird. 561 562 OS << "<div class=\"inconsistency\"><p>Sched Class <span " 563 "class=\"sched-class-name\">"; 564 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 565 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); 566 #else 567 OS << RSCAndPoints.RSC.SchedClassId; 568 #endif 569 OS << "</span> contains instructions whose performance characteristics do" 570 " not match that of LLVM:</p>"; 571 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); 572 OS << "<p>llvm SchedModel data:</p>"; 573 printSchedClassDescHtml(RSCAndPoints.RSC, OS); 574 OS << "</div>"; 575 } 576 577 printClusterRawHtml(BenchmarkClustering::ClusterId::noise(), 578 "[noise]", OS); 579 580 OS << "</body></html>"; 581 return Error::success(); 582 } 583 584 } // namespace exegesis 585 } // namespace llvm 586