1 //===-- Analysis.cpp --------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Analysis.h" 11 #include "BenchmarkResult.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/Support/FormatVariadic.h" 14 #include <unordered_set> 15 #include <vector> 16 17 namespace exegesis { 18 19 static const char kCsvSep = ','; 20 21 namespace { 22 23 enum EscapeTag { kEscapeCsv, kEscapeHtml }; 24 25 template <EscapeTag Tag> 26 void writeEscaped(llvm::raw_ostream &OS, const llvm::StringRef S); 27 28 template <> 29 void writeEscaped<kEscapeCsv>(llvm::raw_ostream &OS, const llvm::StringRef S) { 30 if (std::find(S.begin(), S.end(), kCsvSep) == S.end()) { 31 OS << S; 32 } else { 33 // Needs escaping. 34 OS << '"'; 35 for (const char C : S) { 36 if (C == '"') 37 OS << "\"\""; 38 else 39 OS << C; 40 } 41 OS << '"'; 42 } 43 } 44 45 template <> 46 void writeEscaped<kEscapeHtml>(llvm::raw_ostream &OS, const llvm::StringRef S) { 47 for (const char C : S) { 48 if (C == '<') 49 OS << "<"; 50 else if (C == '>') 51 OS << ">"; 52 else if (C == '&') 53 OS << "&"; 54 else 55 OS << C; 56 } 57 } 58 59 } // namespace 60 61 template <EscapeTag Tag> 62 static void 63 writeClusterId(llvm::raw_ostream &OS, 64 const InstructionBenchmarkClustering::ClusterId &CID) { 65 if (CID.isNoise()) 66 writeEscaped<Tag>(OS, "[noise]"); 67 else if (CID.isError()) 68 writeEscaped<Tag>(OS, "[error]"); 69 else 70 OS << CID.getId(); 71 } 72 73 template <EscapeTag Tag> 74 static void writeMeasurementValue(llvm::raw_ostream &OS, const double Value) { 75 writeEscaped<Tag>(OS, llvm::formatv("{0:F}", Value).str()); 76 } 77 78 // Prints a row representing an instruction, along with scheduling info and 79 // point coordinates (measurements). 80 void Analysis::printInstructionRowCsv(const size_t PointId, 81 llvm::raw_ostream &OS) const { 82 const InstructionBenchmark &Point = Clustering_.getPoints()[PointId]; 83 writeClusterId<kEscapeCsv>(OS, Clustering_.getClusterIdForPoint(PointId)); 84 OS << kCsvSep; 85 writeEscaped<kEscapeCsv>(OS, Point.Key.OpcodeName); 86 OS << kCsvSep; 87 writeEscaped<kEscapeCsv>(OS, Point.Key.Config); 88 OS << kCsvSep; 89 const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName); 90 if (OpcodeIt != MnemonicToOpcode_.end()) { 91 const unsigned SchedClassId = 92 InstrInfo_->get(OpcodeIt->second).getSchedClass(); 93 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 94 const auto &SchedModel = SubtargetInfo_->getSchedModel(); 95 const llvm::MCSchedClassDesc *const SCDesc = 96 SchedModel.getSchedClassDesc(SchedClassId); 97 writeEscaped<kEscapeCsv>(OS, SCDesc->Name); 98 #else 99 OS << SchedClassId; 100 #endif 101 } 102 // FIXME: Print the sched class once InstructionBenchmark separates key into 103 // (mnemonic, mode, opaque). 104 for (const auto &Measurement : Point.Measurements) { 105 OS << kCsvSep; 106 writeMeasurementValue<kEscapeCsv>(OS, Measurement.Value); 107 } 108 OS << "\n"; 109 } 110 111 Analysis::Analysis(const llvm::Target &Target, 112 const InstructionBenchmarkClustering &Clustering) 113 : Clustering_(Clustering) { 114 if (Clustering.getPoints().empty()) 115 return; 116 117 InstrInfo_.reset(Target.createMCInstrInfo()); 118 const InstructionBenchmark &FirstPoint = Clustering.getPoints().front(); 119 SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple, 120 FirstPoint.CpuName, "")); 121 122 // Build an index of mnemonic->opcode. 123 for (int I = 0, E = InstrInfo_->getNumOpcodes(); I < E; ++I) 124 MnemonicToOpcode_.emplace(InstrInfo_->getName(I), I); 125 } 126 127 template <> 128 llvm::Error 129 Analysis::run<Analysis::PrintClusters>(llvm::raw_ostream &OS) const { 130 if (Clustering_.getPoints().empty()) 131 return llvm::Error::success(); 132 133 // Write the header. 134 OS << "cluster_id" << kCsvSep << "opcode_name" << kCsvSep << "config" 135 << kCsvSep << "sched_class"; 136 for (const auto &Measurement : Clustering_.getPoints().front().Measurements) { 137 OS << kCsvSep; 138 writeEscaped<kEscapeCsv>(OS, Measurement.Key); 139 } 140 OS << "\n"; 141 142 // Write the points. 143 const auto &Clusters = Clustering_.getValidClusters(); 144 for (size_t I = 0, E = Clusters.size(); I < E; ++I) { 145 for (const size_t PointId : Clusters[I].PointIndices) { 146 printInstructionRowCsv(PointId, OS); 147 } 148 OS << "\n\n"; 149 } 150 return llvm::Error::success(); 151 } 152 153 std::unordered_map<unsigned, std::vector<size_t>> 154 Analysis::makePointsPerSchedClass() const { 155 std::unordered_map<unsigned, std::vector<size_t>> PointsPerSchedClass; 156 const auto &Points = Clustering_.getPoints(); 157 for (size_t PointId = 0, E = Points.size(); PointId < E; ++PointId) { 158 const InstructionBenchmark &Point = Points[PointId]; 159 if (!Point.Error.empty()) 160 continue; 161 const auto OpcodeIt = MnemonicToOpcode_.find(Point.Key.OpcodeName); 162 if (OpcodeIt == MnemonicToOpcode_.end()) 163 continue; 164 const unsigned SchedClassId = 165 InstrInfo_->get(OpcodeIt->second).getSchedClass(); 166 PointsPerSchedClass[SchedClassId].push_back(PointId); 167 } 168 return PointsPerSchedClass; 169 } 170 171 void Analysis::printSchedClassClustersHtml( 172 const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC, 173 llvm::raw_ostream &OS) const { 174 const auto &Points = Clustering_.getPoints(); 175 OS << "<table class=\"sched-class-clusters\">"; 176 OS << "<tr><th>ClusterId</th><th>Opcode/Config</th>"; 177 assert(!Clusters.empty()); 178 for (const auto &Measurement : 179 Points[Clusters[0].getPointIds()[0]].Measurements) { 180 OS << "<th>"; 181 if (Measurement.DebugString.empty()) 182 writeEscaped<kEscapeHtml>(OS, Measurement.Key); 183 else 184 writeEscaped<kEscapeHtml>(OS, Measurement.DebugString); 185 OS << "</th>"; 186 } 187 OS << "</tr>"; 188 for (const SchedClassCluster &Cluster : Clusters) { 189 OS << "<tr class=\"" 190 << (Cluster.measurementsMatch(*SubtargetInfo_, SC, Clustering_) 191 ? "good-cluster" 192 : "bad-cluster") 193 << "\"><td>"; 194 writeClusterId<kEscapeHtml>(OS, Cluster.id()); 195 OS << "</td><td><ul>"; 196 for (const size_t PointId : Cluster.getPointIds()) { 197 const auto &Point = Points[PointId]; 198 OS << "<li><span class=\"mono\">"; 199 writeEscaped<kEscapeHtml>(OS, Point.Key.OpcodeName); 200 OS << "</span> <span class=\"mono\">"; 201 writeEscaped<kEscapeHtml>(OS, Point.Key.Config); 202 OS << "</span></li>"; 203 } 204 OS << "</ul></td>"; 205 for (const auto &Stats : Cluster.getRepresentative()) { 206 OS << "<td class=\"measurement\">"; 207 writeMeasurementValue<kEscapeHtml>(OS, Stats.avg()); 208 OS << "<br><span class=\"minmax\">["; 209 writeMeasurementValue<kEscapeHtml>(OS, Stats.min()); 210 OS << ";"; 211 writeMeasurementValue<kEscapeHtml>(OS, Stats.max()); 212 OS << "]</span></td>"; 213 } 214 OS << "</tr>"; 215 } 216 OS << "</table>"; 217 } 218 219 // Return the non-redundant list of WriteProcRes used by the given sched class. 220 // The scheduling model for LLVM is such that each instruction has a certain 221 // number of uops which consume resources which are described by WriteProcRes 222 // entries. Each entry describe how many cycles are spent on a specific ProcRes 223 // kind. 224 // For example, an instruction might have 3 uOps, one dispatching on P0 225 // (ProcResIdx=1) and two on P06 (ProcResIdx = 7). 226 // Note that LLVM additionally denormalizes resource consumption to include 227 // usage of super resources by subresources. So in practice if there exists a 228 // P016 (ProcResIdx=10), then the cycles consumed by P0 are also consumed by 229 // P06 (ProcResIdx = 7) and P016 (ProcResIdx = 10), and the resources consumed 230 // by P06 are also consumed by P016. In the figure below, parenthesized cycles 231 // denote implied usage of superresources by subresources: 232 // P0 P06 P016 233 // uOp1 1 (1) (1) 234 // uOp2 1 (1) 235 // uOp3 1 (1) 236 // ============================= 237 // 1 3 3 238 // Eventually we end up with three entries for the WriteProcRes of the 239 // instruction: 240 // {ProcResIdx=1, Cycles=1} // P0 241 // {ProcResIdx=7, Cycles=3} // P06 242 // {ProcResIdx=10, Cycles=3} // P016 243 // 244 // Note that in this case, P016 does not contribute any cycles, so it would 245 // be removed by this function. 246 // FIXME: Move this to MCSubtargetInfo and use it in llvm-mca. 247 static llvm::SmallVector<llvm::MCWriteProcResEntry, 8> 248 getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc, 249 const llvm::MCSubtargetInfo &STI) { 250 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> Result; 251 const auto &SM = STI.getSchedModel(); 252 const unsigned NumProcRes = SM.getNumProcResourceKinds(); 253 254 // This assumes that the ProcResDescs are sorted in topological order, which 255 // is guaranteed by the tablegen backend. 256 llvm::SmallVector<float, 32> ProcResUnitUsage(NumProcRes); 257 for (const auto *WPR = STI.getWriteProcResBegin(&SCDesc), 258 *const WPREnd = STI.getWriteProcResEnd(&SCDesc); 259 WPR != WPREnd; ++WPR) { 260 const llvm::MCProcResourceDesc *const ProcResDesc = 261 SM.getProcResource(WPR->ProcResourceIdx); 262 if (ProcResDesc->SubUnitsIdxBegin == nullptr) { 263 // This is a ProcResUnit. 264 Result.push_back({WPR->ProcResourceIdx, WPR->Cycles}); 265 ProcResUnitUsage[WPR->ProcResourceIdx] += WPR->Cycles; 266 } else { 267 // This is a ProcResGroup. First see if it contributes any cycles or if 268 // it has cycles just from subunits. 269 float RemainingCycles = WPR->Cycles; 270 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; 271 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; 272 ++SubResIdx) { 273 RemainingCycles -= ProcResUnitUsage[*SubResIdx]; 274 } 275 if (RemainingCycles < 0.01f) { 276 // The ProcResGroup contributes no cycles of its own. 277 continue; 278 } 279 // The ProcResGroup contributes `RemainingCycles` cycles of its own. 280 Result.push_back({WPR->ProcResourceIdx, 281 static_cast<uint16_t>(std::round(RemainingCycles))}); 282 // Spread the remaining cycles over all subunits. 283 for (const auto *SubResIdx = ProcResDesc->SubUnitsIdxBegin; 284 SubResIdx != ProcResDesc->SubUnitsIdxBegin + ProcResDesc->NumUnits; 285 ++SubResIdx) { 286 ProcResUnitUsage[*SubResIdx] += RemainingCycles / ProcResDesc->NumUnits; 287 } 288 } 289 } 290 return Result; 291 } 292 293 Analysis::SchedClass::SchedClass(const llvm::MCSchedClassDesc &SD, 294 const llvm::MCSubtargetInfo &STI) 295 : SCDesc(SD), 296 NonRedundantWriteProcRes(getNonRedundantWriteProcRes(SD, STI)), 297 IdealizedProcResPressure(computeIdealizedProcResPressure( 298 STI.getSchedModel(), NonRedundantWriteProcRes)) {} 299 300 void Analysis::SchedClassCluster::addPoint( 301 size_t PointId, const InstructionBenchmarkClustering &Clustering) { 302 PointIds.push_back(PointId); 303 const auto &Point = Clustering.getPoints()[PointId]; 304 if (ClusterId.isUndef()) { 305 ClusterId = Clustering.getClusterIdForPoint(PointId); 306 Representative.resize(Point.Measurements.size()); 307 } 308 for (size_t I = 0, E = Point.Measurements.size(); I < E; ++I) { 309 Representative[I].push(Point.Measurements[I]); 310 } 311 assert(ClusterId == Clustering.getClusterIdForPoint(PointId)); 312 } 313 314 bool Analysis::SchedClassCluster::measurementsMatch( 315 const llvm::MCSubtargetInfo &STI, const SchedClass &SC, 316 const InstructionBenchmarkClustering &Clustering) const { 317 const size_t NumMeasurements = Representative.size(); 318 std::vector<BenchmarkMeasure> ClusterCenterPoint(NumMeasurements); 319 std::vector<BenchmarkMeasure> SchedClassPoint(NumMeasurements); 320 // Latency case. 321 assert(!Clustering.getPoints().empty()); 322 const InstructionBenchmark::ModeE Mode = Clustering.getPoints()[0].Mode; 323 if (Mode == InstructionBenchmark::Latency) { 324 if (NumMeasurements != 1) { 325 llvm::errs() 326 << "invalid number of measurements in latency mode: expected 1, got " 327 << NumMeasurements << "\n"; 328 return false; 329 } 330 // Find the latency. 331 SchedClassPoint[0].Value = 0.0; 332 for (unsigned I = 0; I < SC.SCDesc.NumWriteLatencyEntries; ++I) { 333 const llvm::MCWriteLatencyEntry *const WLE = 334 STI.getWriteLatencyEntry(&SC.SCDesc, I); 335 SchedClassPoint[0].Value = 336 std::max<double>(SchedClassPoint[0].Value, WLE->Cycles); 337 } 338 ClusterCenterPoint[0].Value = Representative[0].avg(); 339 } else if (Mode == InstructionBenchmark::Uops) { 340 for (int I = 0, E = Representative.size(); I < E; ++I) { 341 // Find the pressure on ProcResIdx `Key`. 342 uint16_t ProcResIdx = 0; 343 if (!llvm::to_integer(Representative[I].key(), ProcResIdx, 10)) { 344 llvm::errs() << "expected ProcResIdx key, got " 345 << Representative[I].key() << "\n"; 346 return false; 347 } 348 const auto ProcResPressureIt = 349 std::find_if(SC.IdealizedProcResPressure.begin(), 350 SC.IdealizedProcResPressure.end(), 351 [ProcResIdx](const std::pair<uint16_t, float> &WPR) { 352 return WPR.first == ProcResIdx; 353 }); 354 SchedClassPoint[I].Value = 355 ProcResPressureIt == SC.IdealizedProcResPressure.end() 356 ? 0.0 357 : ProcResPressureIt->second; 358 ClusterCenterPoint[I].Value = Representative[I].avg(); 359 } 360 } else { 361 llvm::errs() << "unimplemented measurement matching for mode " << Mode 362 << "\n"; 363 return false; 364 } 365 return Clustering.isNeighbour(ClusterCenterPoint, SchedClassPoint); 366 } 367 368 void Analysis::printSchedClassDescHtml(const SchedClass &SC, 369 llvm::raw_ostream &OS) const { 370 OS << "<table class=\"sched-class-desc\">"; 371 OS << "<tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</" 372 "th><th>WriteProcRes</th><th title=\"This is the idealized unit " 373 "resource (port) pressure assuming ideal distribution\">Idealized " 374 "Resource Pressure</th></tr>"; 375 if (SC.SCDesc.isValid()) { 376 const auto &SM = SubtargetInfo_->getSchedModel(); 377 OS << "<tr><td>✔</td>"; 378 OS << "<td>" << (SC.SCDesc.isVariant() ? "✔" : "✕") 379 << "</td>"; 380 OS << "<td>" << SC.SCDesc.NumMicroOps << "</td>"; 381 // Latencies. 382 OS << "<td><ul>"; 383 for (int I = 0, E = SC.SCDesc.NumWriteLatencyEntries; I < E; ++I) { 384 const auto *const Entry = 385 SubtargetInfo_->getWriteLatencyEntry(&SC.SCDesc, I); 386 OS << "<li>" << Entry->Cycles; 387 if (SC.SCDesc.NumWriteLatencyEntries > 1) { 388 // Dismabiguate if more than 1 latency. 389 OS << " (WriteResourceID " << Entry->WriteResourceID << ")"; 390 } 391 OS << "</li>"; 392 } 393 OS << "</ul></td>"; 394 // WriteProcRes. 395 OS << "<td><ul>"; 396 for (const auto &WPR : SC.NonRedundantWriteProcRes) { 397 OS << "<li><span class=\"mono\">"; 398 writeEscaped<kEscapeHtml>(OS, 399 SM.getProcResource(WPR.ProcResourceIdx)->Name); 400 OS << "</span>: " << WPR.Cycles << "</li>"; 401 } 402 OS << "</ul></td>"; 403 // Idealized port pressure. 404 OS << "<td><ul>"; 405 for (const auto &Pressure : SC.IdealizedProcResPressure) { 406 OS << "<li><span class=\"mono\">"; 407 writeEscaped<kEscapeHtml>(OS, SubtargetInfo_->getSchedModel() 408 .getProcResource(Pressure.first) 409 ->Name); 410 OS << "</span>: "; 411 writeMeasurementValue<kEscapeHtml>(OS, Pressure.second); 412 OS << "</li>"; 413 } 414 OS << "</ul></td>"; 415 OS << "</tr>"; 416 } else { 417 OS << "<tr><td>✕</td><td></td><td></td></tr>"; 418 } 419 OS << "</table>"; 420 } 421 422 static constexpr const char kHtmlHead[] = R"( 423 <head> 424 <title>llvm-exegesis Analysis Results</title> 425 <style> 426 body { 427 font-family: sans-serif 428 } 429 span.sched-class-name { 430 font-weight: bold; 431 font-family: monospace; 432 } 433 span.opcode { 434 font-family: monospace; 435 } 436 span.config { 437 font-family: monospace; 438 } 439 div.inconsistency { 440 margin-top: 50px; 441 } 442 table { 443 margin-left: 50px; 444 border-collapse: collapse; 445 } 446 table, table tr,td,th { 447 border: 1px solid #444; 448 } 449 table ul { 450 padding-left: 0px; 451 margin: 0px; 452 list-style-type: none; 453 } 454 table.sched-class-clusters td { 455 padding-left: 10px; 456 padding-right: 10px; 457 padding-top: 10px; 458 padding-bottom: 10px; 459 } 460 table.sched-class-desc td { 461 padding-left: 10px; 462 padding-right: 10px; 463 padding-top: 2px; 464 padding-bottom: 2px; 465 } 466 span.mono { 467 font-family: monospace; 468 } 469 td.measurement { 470 text-align: center; 471 } 472 tr.good-cluster td.measurement { 473 color: #292 474 } 475 tr.bad-cluster td.measurement { 476 color: #922 477 } 478 tr.good-cluster td.measurement span.minmax { 479 color: #888; 480 } 481 tr.bad-cluster td.measurement span.minmax { 482 color: #888; 483 } 484 </style> 485 </head> 486 )"; 487 488 template <> 489 llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>( 490 llvm::raw_ostream &OS) const { 491 const auto &FirstPoint = Clustering_.getPoints()[0]; 492 // Print the header. 493 OS << "<!DOCTYPE html><html>" << kHtmlHead << "<body>"; 494 OS << "<h1><span class=\"mono\">llvm-exegesis</span> Analysis Results</h1>"; 495 OS << "<h3>Triple: <span class=\"mono\">"; 496 writeEscaped<kEscapeHtml>(OS, FirstPoint.LLVMTriple); 497 OS << "</span></h3><h3>Cpu: <span class=\"mono\">"; 498 writeEscaped<kEscapeHtml>(OS, FirstPoint.CpuName); 499 OS << "</span></h3>"; 500 501 for (const auto &SchedClassAndPoints : makePointsPerSchedClass()) { 502 const auto SchedClassId = SchedClassAndPoints.first; 503 const std::vector<size_t> &SchedClassPoints = SchedClassAndPoints.second; 504 const auto &SchedModel = SubtargetInfo_->getSchedModel(); 505 const llvm::MCSchedClassDesc *const SCDesc = 506 SchedModel.getSchedClassDesc(SchedClassId); 507 if (!SCDesc) 508 continue; 509 const SchedClass SC(*SCDesc, *SubtargetInfo_); 510 511 // Bucket sched class points into sched class clusters. 512 std::vector<SchedClassCluster> SchedClassClusters; 513 for (const size_t PointId : SchedClassPoints) { 514 const auto &ClusterId = Clustering_.getClusterIdForPoint(PointId); 515 if (!ClusterId.isValid()) 516 continue; // Ignore noise and errors. FIXME: take noise into account ? 517 auto SchedClassClusterIt = 518 std::find_if(SchedClassClusters.begin(), SchedClassClusters.end(), 519 [ClusterId](const SchedClassCluster &C) { 520 return C.id() == ClusterId; 521 }); 522 if (SchedClassClusterIt == SchedClassClusters.end()) { 523 SchedClassClusters.emplace_back(); 524 SchedClassClusterIt = std::prev(SchedClassClusters.end()); 525 } 526 SchedClassClusterIt->addPoint(PointId, Clustering_); 527 } 528 529 // Print any scheduling class that has at least one cluster that does not 530 // match the checked-in data. 531 if (std::all_of(SchedClassClusters.begin(), SchedClassClusters.end(), 532 [this, &SC](const SchedClassCluster &C) { 533 return C.measurementsMatch(*SubtargetInfo_, SC, 534 Clustering_); 535 })) 536 continue; // Nothing weird. 537 538 OS << "<div class=\"inconsistency\"><p>Sched Class <span " 539 "class=\"sched-class-name\">"; 540 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 541 writeEscaped<kEscapeHtml>(OS, SCDesc->Name); 542 #else 543 OS << SchedClassId; 544 #endif 545 OS << "</span> contains instructions whose performance characteristics do" 546 " not match that of LLVM:</p>"; 547 printSchedClassClustersHtml(SchedClassClusters, SC, OS); 548 OS << "<p>llvm SchedModel data:</p>"; 549 printSchedClassDescHtml(SC, OS); 550 OS << "</div>"; 551 } 552 553 OS << "</body></html>"; 554 return llvm::Error::success(); 555 } 556 557 // Distributes a pressure budget as evenly as possible on the provided subunits 558 // given the already existing port pressure distribution. 559 // 560 // The algorithm is as follows: while there is remaining pressure to 561 // distribute, find the subunits with minimal pressure, and distribute 562 // remaining pressure equally up to the pressure of the unit with 563 // second-to-minimal pressure. 564 // For example, let's assume we want to distribute 2*P1256 565 // (Subunits = [P1,P2,P5,P6]), and the starting DensePressure is: 566 // DensePressure = P0 P1 P2 P3 P4 P5 P6 P7 567 // 0.1 0.3 0.2 0.0 0.0 0.5 0.5 0.5 568 // RemainingPressure = 2.0 569 // We sort the subunits by pressure: 570 // Subunits = [(P2,p=0.2), (P1,p=0.3), (P5,p=0.5), (P6, p=0.5)] 571 // We'll first start by the subunits with minimal pressure, which are at 572 // the beginning of the sorted array. In this example there is one (P2). 573 // The subunit with second-to-minimal pressure is the next one in the 574 // array (P1). So we distribute 0.1 pressure to P2, and remove 0.1 cycles 575 // from the budget. 576 // Subunits = [(P2,p=0.3), (P1,p=0.3), (P5,p=0.5), (P5,p=0.5)] 577 // RemainingPressure = 1.9 578 // We repeat this process: distribute 0.2 pressure on each of the minimal 579 // P2 and P1, decrease budget by 2*0.2: 580 // Subunits = [(P2,p=0.5), (P1,p=0.5), (P5,p=0.5), (P5,p=0.5)] 581 // RemainingPressure = 1.5 582 // There are no second-to-minimal subunits so we just share the remaining 583 // budget (1.5 cycles) equally: 584 // Subunits = [(P2,p=0.875), (P1,p=0.875), (P5,p=0.875), (P5,p=0.875)] 585 // RemainingPressure = 0.0 586 // We stop as there is no remaining budget to distribute. 587 void distributePressure(float RemainingPressure, 588 llvm::SmallVector<uint16_t, 32> Subunits, 589 llvm::SmallVector<float, 32> &DensePressure) { 590 // Find the number of subunits with minimal pressure (they are at the 591 // front). 592 llvm::sort(Subunits.begin(), Subunits.end(), 593 [&DensePressure](const uint16_t A, const uint16_t B) { 594 return DensePressure[A] < DensePressure[B]; 595 }); 596 const auto getPressureForSubunit = [&DensePressure, 597 &Subunits](size_t I) -> float & { 598 return DensePressure[Subunits[I]]; 599 }; 600 size_t NumMinimalSU = 1; 601 while (NumMinimalSU < Subunits.size() && 602 getPressureForSubunit(NumMinimalSU) == getPressureForSubunit(0)) { 603 ++NumMinimalSU; 604 } 605 while (RemainingPressure > 0.0f) { 606 if (NumMinimalSU == Subunits.size()) { 607 // All units are minimal, just distribute evenly and be done. 608 for (size_t I = 0; I < NumMinimalSU; ++I) { 609 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; 610 } 611 return; 612 } 613 // Distribute the remaining pressure equally. 614 const float MinimalPressure = getPressureForSubunit(NumMinimalSU - 1); 615 const float SecondToMinimalPressure = getPressureForSubunit(NumMinimalSU); 616 assert(MinimalPressure < SecondToMinimalPressure); 617 const float Increment = SecondToMinimalPressure - MinimalPressure; 618 if (RemainingPressure <= NumMinimalSU * Increment) { 619 // There is not enough remaining pressure. 620 for (size_t I = 0; I < NumMinimalSU; ++I) { 621 getPressureForSubunit(I) += RemainingPressure / NumMinimalSU; 622 } 623 return; 624 } 625 // Bump all minimal pressure subunits to `SecondToMinimalPressure`. 626 for (size_t I = 0; I < NumMinimalSU; ++I) { 627 getPressureForSubunit(I) = SecondToMinimalPressure; 628 RemainingPressure -= SecondToMinimalPressure; 629 } 630 while (NumMinimalSU < Subunits.size() && 631 getPressureForSubunit(NumMinimalSU) == SecondToMinimalPressure) { 632 ++NumMinimalSU; 633 } 634 } 635 } 636 637 std::vector<std::pair<uint16_t, float>> computeIdealizedProcResPressure( 638 const llvm::MCSchedModel &SM, 639 llvm::SmallVector<llvm::MCWriteProcResEntry, 8> WPRS) { 640 // DensePressure[I] is the port pressure for Proc Resource I. 641 llvm::SmallVector<float, 32> DensePressure(SM.getNumProcResourceKinds()); 642 llvm::sort(WPRS.begin(), WPRS.end(), 643 [](const llvm::MCWriteProcResEntry &A, 644 const llvm::MCWriteProcResEntry &B) { 645 return A.ProcResourceIdx < B.ProcResourceIdx; 646 }); 647 for (const llvm::MCWriteProcResEntry &WPR : WPRS) { 648 // Get units for the entry. 649 const llvm::MCProcResourceDesc *const ProcResDesc = 650 SM.getProcResource(WPR.ProcResourceIdx); 651 if (ProcResDesc->SubUnitsIdxBegin == nullptr) { 652 // This is a ProcResUnit. 653 DensePressure[WPR.ProcResourceIdx] += WPR.Cycles; 654 } else { 655 // This is a ProcResGroup. 656 llvm::SmallVector<uint16_t, 32> Subunits(ProcResDesc->SubUnitsIdxBegin, 657 ProcResDesc->SubUnitsIdxBegin + 658 ProcResDesc->NumUnits); 659 distributePressure(WPR.Cycles, Subunits, DensePressure); 660 } 661 } 662 // Turn dense pressure into sparse pressure by removing zero entries. 663 std::vector<std::pair<uint16_t, float>> Pressure; 664 for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) { 665 if (DensePressure[I] > 0.0f) 666 Pressure.emplace_back(I, DensePressure[I]); 667 } 668 return Pressure; 669 } 670 671 } // namespace exegesis 672