1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Analysis.h" 10 #include "BenchmarkResult.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/MC/MCAsmInfo.h" 13 #include "llvm/Support/FormatVariadic.h" 14 #include <limits> 15 #include <unordered_set> 16 #include <vector> 17 18 namespace llvm { 19 namespace exegesis { 20 21 static const char kCsvSep = ','; 22 23 namespace { 24 25 enum EscapeTag { kEscapeCsv, kEscapeHtml, kEscapeHtmlString }; 26 27 template <EscapeTag Tag> 28 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S); 29 30 template <> 31 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) { 32 if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) { 33 OS << S; 34 } else { 35 // Needs escaping. 36 OS << '"'; 37 for (const char C : S) { 38 if (C == '"') 39 OS << "\"\""; 40 else 41 OS << C; 42 } 43 OS << '"'; 44 } 45 } 46 47 template <> 48 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) { 49 for (const char C : S) { 50 if (C == '<') 51 OS << "<"; 52 else if (C == '>') 53 OS << ">"; 54 else if (C == '&') 55 OS << "&"; 56 else 57 OS << C; 58 } 59 } 60 61 template <> 62 void writeEscaped<kEscapeHtmlString>(llvm::raw_ostream &OS, 63 const llvm::StringRef S) { 64 for (const char C : S) { 65 if (C == '"') 66 OS << "\\\""; 67 else 68 OS << C; 69 } 70 } 71 72 } // namespace 73 74 template <EscapeTag Tag> 75 static void 76 writeClusterId(llvm::raw_ostream &OS, 77 const InstructionBenchmarkClustering::ClusterId &CID) { 78 if (CID.isNoise()) 79 writeEscaped<Tag>(OS, "[noise]"); 80 else if (CID.isError()) 81 writeEscaped<Tag>(OS, "[error]"); 82 else 83 OS << CID.getId(); 84 } 85 86 template <EscapeTag Tag> 87 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) { 88 // Given Value, if we wanted to serialize it to a string, 89 // how many base-10 digits will we need to store, max? 90 static constexpr auto MaxDigitCount = 91 std::numeric_limits<decltype(Value)>::max_digits10; 92 // Also, we will need a decimal separator. 93 static constexpr auto DecimalSeparatorLen = 1; // '.' e.g. 94 // So how long of a string will the serialization produce, max? 95 static constexpr auto SerializationLen = MaxDigitCount + DecimalSeparatorLen; 96 97 // WARNING: when changing the format, also adjust the small-size estimate ^. 98 static constexpr StringLiteral SimpleFloatFormat = StringLiteral("{0:F}"); 99 100 writeEscaped<Tag>( 101 OS, 102 llvm::formatv(SimpleFloatFormat.data(), Value).sstr<SerializationLen>()); 103 } 104 105 template <typename EscapeTag, EscapeTag Tag> 106 void Analysis::writeSnippet(llvm::raw_ostream &OS, 107 llvm::ArrayRef<uint8_t> Bytes, 108 const char *Separator) const { 109 llvm::SmallVector<std::string, 3> Lines; 110 // Parse the asm snippet and print it. 111 while (!Bytes.empty()) { 112 llvm::MCInst MI; 113 uint64_t MISize = 0; 114 if (!Disasm_->getInstruction(MI, MISize, Bytes, 0, llvm::nulls(), 115 llvm::nulls())) { 116 writeEscaped<Tag>(OS, llvm::join(Lines, Separator)); 117 writeEscaped<Tag>(OS, Separator); 118 writeEscaped<Tag>(OS, "[error decoding asm snippet]"); 119 return; 120 } 121 llvm::SmallString<128> InstPrinterStr; // FIXME: magic number. 122 llvm::raw_svector_ostream OSS(InstPrinterStr); 123 InstPrinter_->printInst(&MI, OSS, "", *SubtargetInfo_); 124 Bytes = Bytes.drop_front(MISize); 125 Lines.emplace_back(llvm::StringRef(InstPrinterStr).trim()); 126 } 127 writeEscaped<Tag>(OS, llvm::join(Lines, Separator)); 128 } 129 130 // Prints a row representing an instruction, along with scheduling info and 131 // point coordinates (measurements). 132 void Analysis::printInstructionRowCsv(const size_t PointId, 133 llvm::raw_ostream &OS) const { 134 const InstructionBenchmark &Point = Clustering_.getPoints()[PointId]; 135 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); 136 OS << kCsvSep; 137 writeSnippet<EscapeTag, kEscapeCsv>(OS, Point.AssembledSnippet, "; "); 138 OS << kCsvSep; 139 writeEscaped<kEscapeCsv>(OS, Point.Key.Config); 140 OS << kCsvSep; 141 assert(!Point.Key.Instructions.empty()); 142 const llvm::MCInst &MCI = Point.keyInstruction(); 143 unsigned SchedClassId; 144 std::tie(SchedClassId, std::ignore) = ResolvedSchedClass::resolveSchedClassId( 145 *SubtargetInfo_, *InstrInfo_, MCI); 146 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 147 const llvm::MCSchedClassDesc *const SCDesc = 148 SubtargetInfo_->getSchedModel().getSchedClassDesc(SchedClassId); 149 writeEscaped<kEscapeCsv>(OS, SCDesc->Name); 150 #else 151 OS << SchedClassId; 152 #endif 153 for (const auto &Measurement : Point.Measurements) { 154 OS << kCsvSep; 155 writeMeasurementValue<kEscapeCsv>(OS, Measurement.PerInstructionValue); 156 } 157 OS << "\n"; 158 } 159 160 Analysis::Analysis(const llvm::Target &Target, 161 std::unique_ptr<llvm::MCInstrInfo> InstrInfo, 162 const InstructionBenchmarkClustering &Clustering, 163 double AnalysisInconsistencyEpsilon, 164 bool AnalysisDisplayUnstableOpcodes) 165 : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)), 166 AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon * 167 AnalysisInconsistencyEpsilon), 168 AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) { 169 if (Clustering.getPoints().empty()) 170 return; 171 172 const InstructionBenchmark &FirstPoint = Clustering.getPoints().front(); 173 RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple)); 174 AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple)); 175 SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple, 176 FirstPoint.CpuName, "")); 177 InstPrinter_.reset(Target.createMCInstPrinter( 178 llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_, 179 *InstrInfo_, *RegInfo_)); 180 181 Context_ = std::make_unique<llvm::MCContext>(AsmInfo_.get(), RegInfo_.get(), 182 &ObjectFileInfo_); 183 Disasm_.reset(Target.createMCDisassembler(*SubtargetInfo_, *Context_)); 184 assert(Disasm_ && "cannot create MCDisassembler. missing call to " 185 "InitializeXXXTargetDisassembler ?"); 186 } 187 188 template <> 189 llvm::Error 190 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const { 191 if (Clustering_.getPoints().empty()) 192 return llvm::Error::success(); 193 194 // Write the header. 195 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" 196 << kCsvSep << "sched_class"; 197 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { 198 OS << kCsvSep; 199 writeEscaped<kEscapeCsv>(OS, Measurement.Key); 200 } 201 OS << "\n"; 202 203 // Write the points. 204 const auto &Clusters = Clustering_.getValidClusters(); 205 for (size_t I = 0, E = Clusters.size(); I < E; ++I) { 206 for (const size_t PointId : Clusters[I].PointIndices) { 207 printInstructionRowCsv(PointId, OS); 208 } 209 OS << "\n\n"; 210 } 211 return llvm::Error::success(); 212 } 213 214 Analysis::ResolvedSchedClassAndPoints::ResolvedSchedClassAndPoints( 215 ResolvedSchedClass &&RSC) 216 : RSC(std::move(RSC)) {} 217 218 std::vector<Analysis::ResolvedSchedClassAndPoints> 219 Analysis::makePointsPerSchedClass() const { 220 std::vector<ResolvedSchedClassAndPoints> Entries; 221 // Maps SchedClassIds to index in result. 222 std::unordered_map<unsigned, size_t> SchedClassIdToIndex; 223 const auto &Points = Clustering_.getPoints(); 224 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { 225 const InstructionBenchmark &Point = Points[PointId]; 226 if (!Point.Error.empty()) 227 continue; 228 assert(!Point.Key.Instructions.empty()); 229 // FIXME: we should be using the tuple of classes for instructions in the 230 // snippet as key. 231 const llvm::MCInst &MCI = Point.keyInstruction(); 232 unsigned SchedClassId; 233 bool WasVariant; 234 std::tie(SchedClassId, WasVariant) = 235 ResolvedSchedClass::resolveSchedClassId(*SubtargetInfo_, *InstrInfo_, 236 MCI); 237 const auto IndexIt = SchedClassIdToIndex.find(SchedClassId); 238 if (IndexIt == SchedClassIdToIndex.end()) { 239 // Create a new entry. 240 SchedClassIdToIndex.emplace(SchedClassId, Entries.size()); 241 ResolvedSchedClassAndPoints Entry( 242 ResolvedSchedClass(*SubtargetInfo_, SchedClassId, WasVariant)); 243 Entry.PointIds.push_back(PointId); 244 Entries.push_back(std::move(Entry)); 245 } else { 246 // Append to the existing entry. 247 Entries[IndexIt->second].PointIds.push_back(PointId); 248 } 249 } 250 return Entries; 251 } 252 253 // Uops repeat the same opcode over again. Just show this opcode and show the 254 // whole snippet only on hover. 255 static void writeUopsSnippetHtml(llvm::raw_ostream &OS, 256 const std::vector<llvm::MCInst> &Instructions, 257 const llvm::MCInstrInfo &InstrInfo) { 258 if (Instructions.empty()) 259 return; 260 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instructions[0].getOpcode())); 261 if (Instructions.size() > 1) 262 OS << " (x" << Instructions.size() << ")"; 263 } 264 265 // Latency tries to find a serial path. Just show the opcode path and show the 266 // whole snippet only on hover. 267 static void 268 writeLatencySnippetHtml(llvm::raw_ostream &OS, 269 const std::vector<llvm::MCInst> &Instructions, 270 const llvm::MCInstrInfo &InstrInfo) { 271 bool First = true; 272 for (const llvm::MCInst &Instr : Instructions) { 273 if (First) 274 First = false; 275 else 276 OS << " → "; 277 writeEscaped<kEscapeHtml>(OS, InstrInfo.getName(Instr.getOpcode())); 278 } 279 } 280 281 void Analysis::printSchedClassClustersHtml( 282 const std::vector<SchedClassCluster> &Clusters, 283 const ResolvedSchedClass &RSC, llvm::raw_ostream &OS) const { 284 const auto &Points = Clustering_.getPoints(); 285 OS << "<table class=\"sched-class-clusters\">"; 286 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 287 assert(!Clusters.empty()); 288 for (const auto &Measurement : 289 Points[Clusters[0].getPointIds()[0]].Measurements) { 290 OS << "<th>"; 291 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 292 OS << "</th>"; 293 } 294 OS << "</tr>"; 295 for (const SchedClassCluster &Cluster : Clusters) { 296 OS << "<tr class=\"" 297 << (Cluster.measurementsMatch(*SubtargetInfo_, RSC, Clustering_, 298 AnalysisInconsistencyEpsilonSquared_) 299 ? "good-cluster" 300 : "bad-cluster") 301 << "\"><td>"; 302 writeClusterId<kEscapeHtml>(OS, Cluster.id()); 303 OS << "</td><td><ul>"; 304 for (const size_t PointId : Cluster.getPointIds()) { 305 const auto &Point = Points[PointId]; 306 OS << "<li><span class=\"mono\" title=\""; 307 writeSnippet<EscapeTag, kEscapeHtmlString>(OS, Point.AssembledSnippet, 308 "\n"); 309 OS << "\">"; 310 switch (Point.Mode) { 311 case InstructionBenchmark::Latency: 312 writeLatencySnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); 313 break; 314 case InstructionBenchmark::Uops: 315 case InstructionBenchmark::InverseThroughput: 316 writeUopsSnippetHtml(OS, Point.Key.Instructions, *InstrInfo_); 317 break; 318 default: 319 llvm_unreachable("invalid mode"); 320 } 321 OS << "</span> <span class=\"mono\">"; 322 writeEscaped<kEscapeHtml>(OS, Point.Key.Config); 323 OS << "</span></li>"; 324 } 325 OS << "</ul></td>"; 326 for (const auto &Stats : Cluster.getCentroid().getStats()) { 327 OS << "<td class=\"measurement\">"; 328 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); 329 OS << "<br><span class=\"minmax\">["; 330 writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); 331 OS << ";"; 332 writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); 333 OS << "]</span></td>"; 334 } 335 OS << "</tr>"; 336 } 337 OS << "</table>"; 338 } 339 340 void Analysis::SchedClassCluster::addPoint( 341 size_t PointId, const InstructionBenchmarkClustering &Clustering) { 342 PointIds.push_back(PointId); 343 const auto &Point = Clustering.getPoints()[PointId]; 344 if (ClusterId.isUndef()) 345 ClusterId = Clustering.getClusterIdForPoint(PointId); 346 assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); 347 348 Centroid.addPoint(Point.Measurements); 349 } 350 351 bool Analysis::SchedClassCluster::measurementsMatch( 352 const llvm::MCSubtargetInfo &STI, const ResolvedSchedClass &RSC, 353 const InstructionBenchmarkClustering &Clustering, 354 const double AnalysisInconsistencyEpsilonSquared_) const { 355 assert(!Clustering.getPoints().empty()); 356 const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode; 357 358 if (!Centroid.validate(Mode)) 359 return false; 360 361 const std::vector<BenchmarkMeasure> ClusterCenterPoint = 362 Centroid.getAsPoint(); 363 364 const std::vector<BenchmarkMeasure> SchedClassPoint = 365 RSC.getAsPoint(Mode, STI, Centroid.getStats()); 366 if (SchedClassPoint.empty()) 367 return false; // In Uops mode validate() may not be enough. 368 369 assert(ClusterCenterPoint.size() == SchedClassPoint.size() && 370 "Expected measured/sched data dimensions to match."); 371 372 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint, 373 AnalysisInconsistencyEpsilonSquared_); 374 } 375 376 void Analysis::printSchedClassDescHtml(const ResolvedSchedClass &RSC, 377 llvm::raw_ostream &OS) const { 378 OS << "<table class=\"sched-class-desc\">"; 379 OS << "<tr><th>Valid</th><th>Variant</th><th>NumMicroOps</th><th>Latency</" 380 "th><th>RThroughput</th><th>WriteProcRes</th><th title=\"This is the " 381 "idealized unit resource (port) pressure assuming ideal " 382 "distribution\">Idealized Resource Pressure</th></tr>"; 383 if (RSC.SCDesc->isValid()) { 384 const auto &SM = SubtargetInfo_->getSchedModel(); 385 OS << "<tr><td>✔</td>"; 386 OS << "<td>" << (RSC.WasVariant ? "✔" : "✕") << "</td>"; 387 OS << "<td>" << RSC.SCDesc->NumMicroOps << "</td>"; 388 // Latencies. 389 OS << "<td><ul>"; 390 for (int I = 0, E = RSC.SCDesc->NumWriteLatencyEntries; I < E; ++I) { 391 const auto *const Entry = 392 SubtargetInfo_->getWriteLatencyEntry(RSC.SCDesc, I); 393 OS << "<li>" << Entry->Cycles; 394 if (RSC.SCDesc->NumWriteLatencyEntries > 1) { 395 // Dismabiguate if more than 1 latency. 396 OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; 397 } 398 OS << "</li>"; 399 } 400 OS << "</ul></td>"; 401 // inverse throughput. 402 OS << "<td>"; 403 writeMeasurementValue<kEscapeHtml>( 404 OS, 405 MCSchedModel::getReciprocalThroughput(*SubtargetInfo_, *RSC.SCDesc)); 406 OS << "</td>"; 407 // WriteProcRes. 408 OS << "<td><ul>"; 409 for (const auto &WPR : RSC.NonRedundantWriteProcRes) { 410 OS << "<li><span class=\"mono\">"; 411 writeEscaped<kEscapeHtml>(OS, 412 SM.getProcResource(WPR.ProcResourceIdx)->Name); 413 OS << "</span>: " << WPR.Cycles << "</li>"; 414 } 415 OS << "</ul></td>"; 416 // Idealized port pressure. 417 OS << "<td><ul>"; 418 for (const auto &Pressure : RSC.IdealizedProcResPressure) { 419 OS << "<li><span class=\"mono\">"; 420 writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel() 421 .getProcResource(Pressure.first) 422 ->Name); 423 OS << "</span>: "; 424 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); 425 OS << "</li>"; 426 } 427 OS << "</ul></td>"; 428 OS << "</tr>"; 429 } else { 430 OS << "<tr><td>✕</td><td></td><td></td></tr>"; 431 } 432 OS << "</table>"; 433 } 434 435 static constexpr const char kHtmlHead[] = R"( 436 <head> 437 <title>llvm-exegesis Analysis Results</title> 438 <style> 439 body { 440 font-family: sans-serif 441 } 442 span.sched-class-name { 443 font-weight: bold; 444 font-family: monospace; 445 } 446 span.opcode { 447 font-family: monospace; 448 } 449 span.config { 450 font-family: monospace; 451 } 452 div.inconsistency { 453 margin-top: 50px; 454 } 455 table { 456 margin-left: 50px; 457 border-collapse: collapse; 458 } 459 table, table tr,td,th { 460 border: 1px solid #444; 461 } 462 table ul { 463 padding-left: 0px; 464 margin: 0px; 465 list-style-type: none; 466 } 467 table.sched-class-clusters td { 468 padding-left: 10px; 469 padding-right: 10px; 470 padding-top: 10px; 471 padding-bottom: 10px; 472 } 473 table.sched-class-desc td { 474 padding-left: 10px; 475 padding-right: 10px; 476 padding-top: 2px; 477 padding-bottom: 2px; 478 } 479 span.mono { 480 font-family: monospace; 481 } 482 td.measurement { 483 text-align: center; 484 } 485 tr.good-cluster td.measurement { 486 color: #292 487 } 488 tr.bad-cluster td.measurement { 489 color: #922 490 } 491 tr.good-cluster td.measurement span.minmax { 492 color: #888; 493 } 494 tr.bad-cluster td.measurement span.minmax { 495 color: #888; 496 } 497 </style> 498 </head> 499 )"; 500 501 template <> 502 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( 503 llvm::raw_ostream &OS) const { 504 const auto &FirstPoint = Clustering_.getPoints()[0]; 505 // Print the header. 506 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; 507 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; 508 OS << "<h3>Triple: <span class=\"mono\">"; 509 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); 510 OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; 511 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); 512 OS << "</span></h3>"; 513 514 for (const auto &RSCAndPoints : makePointsPerSchedClass()) { 515 if (!RSCAndPoints.RSC.SCDesc) 516 continue; 517 // Bucket sched class points into sched class clusters. 518 std::vector<SchedClassCluster> SchedClassClusters; 519 for (const size_t PointId : RSCAndPoints.PointIds) { 520 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); 521 if (!ClusterId.isValid()) 522 continue; // Ignore noise and errors. FIXME: take noise into account ? 523 if (ClusterId.isUnstable() ^ AnalysisDisplayUnstableOpcodes_) 524 continue; // Either display stable or unstable clusters only. 525 auto SchedClassClusterIt = 526 std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(), 527 [ClusterId](const SchedClassCluster &C) { 528 return C.id() == ClusterId; 529 }); 530 if (SchedClassClusterIt == SchedClassClusters.end()) { 531 SchedClassClusters.emplace_back(); 532 SchedClassClusterIt = std::prev(SchedClassClusters.end()); 533 } 534 SchedClassClusterIt->addPoint(PointId, Clustering_); 535 } 536 537 // Print any scheduling class that has at least one cluster that does not 538 // match the checked-in data. 539 if (llvm::all_of(SchedClassClusters, 540 [this, &RSCAndPoints](const SchedClassCluster &C) { 541 return C.measurementsMatch( 542 *SubtargetInfo_, RSCAndPoints.RSC, Clustering_, 543 AnalysisInconsistencyEpsilonSquared_); 544 })) 545 continue; // Nothing weird. 546 547 OS << "<div class=\"inconsistency\"><p>Sched Class <span " 548 "class=\"sched-class-name\">"; 549 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 550 writeEscaped<kEscapeHtml>(OS, RSCAndPoints.RSC.SCDesc->Name); 551 #else 552 OS << RSCAndPoints.RSC.SchedClassId; 553 #endif 554 OS << "</span> contains instructions whose performance characteristics do" 555 " not match that of LLVM:</p>"; 556 printSchedClassClustersHtml(SchedClassClusters, RSCAndPoints.RSC, OS); 557 OS << "<p>llvm SchedModel data:</p>"; 558 printSchedClassDescHtml(RSCAndPoints.RSC, OS); 559 OS << "</div>"; 560 } 561 562 OS << "</body></html>"; 563 return llvm::Error::success(); 564 } 565 566 } // namespace exegesis 567 } // namespace llvm 568