1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Measures execution properties (latencies/uops) of an instruction. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "lib/Analysis.h" 15 #include "lib/BenchmarkResult.h" 16 #include "lib/BenchmarkRunner.h" 17 #include "lib/Clustering.h" 18 #include "lib/CodeTemplate.h" 19 #include "lib/Error.h" 20 #include "lib/LlvmState.h" 21 #include "lib/PerfHelper.h" 22 #include "lib/ProgressMeter.h" 23 #include "lib/ResultAggregator.h" 24 #include "lib/SnippetFile.h" 25 #include "lib/SnippetRepetitor.h" 26 #include "lib/Target.h" 27 #include "lib/TargetSelect.h" 28 #include "lib/ValidationEvent.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/Twine.h" 31 #include "llvm/MC/MCInstBuilder.h" 32 #include "llvm/MC/MCObjectFileInfo.h" 33 #include "llvm/MC/MCParser/MCAsmParser.h" 34 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 35 #include "llvm/MC/MCRegisterInfo.h" 36 #include "llvm/MC/MCSubtargetInfo.h" 37 #include "llvm/MC/TargetRegistry.h" 38 #include "llvm/Object/ObjectFile.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/FileSystem.h" 41 #include "llvm/Support/Format.h" 42 #include "llvm/Support/InitLLVM.h" 43 #include "llvm/Support/Path.h" 44 #include "llvm/Support/SourceMgr.h" 45 #include "llvm/Support/TargetSelect.h" 46 #include "llvm/TargetParser/Host.h" 47 #include <algorithm> 48 #include <string> 49 50 namespace llvm { 51 namespace exegesis { 52 53 static cl::opt<int> OpcodeIndex( 54 "opcode-index", 55 cl::desc("opcode to measure, by index, or -1 to measure all opcodes"), 56 cl::cat(BenchmarkOptions), cl::init(0)); 57 58 static cl::opt<std::string> 59 OpcodeNames("opcode-name", 60 cl::desc("comma-separated list of opcodes to measure, by name"), 61 cl::cat(BenchmarkOptions), cl::init("")); 62 63 static cl::opt<std::string> SnippetsFile("snippets-file", 64 cl::desc("code snippets to measure"), 65 cl::cat(BenchmarkOptions), 66 cl::init("")); 67 68 static cl::opt<std::string> 69 BenchmarkFile("benchmarks-file", 70 cl::desc("File to read (analysis mode) or write " 71 "(latency/uops/inverse_throughput modes) benchmark " 72 "results. “-” uses stdin/stdout."), 73 cl::cat(Options), cl::init("")); 74 75 static cl::opt<Benchmark::ModeE> BenchmarkMode( 76 "mode", cl::desc("the mode to run"), cl::cat(Options), 77 cl::values(clEnumValN(Benchmark::Latency, "latency", "Instruction Latency"), 78 clEnumValN(Benchmark::InverseThroughput, "inverse_throughput", 79 "Instruction Inverse Throughput"), 80 clEnumValN(Benchmark::Uops, "uops", "Uop Decomposition"), 81 // When not asking for a specific benchmark mode, 82 // we'll analyse the results. 83 clEnumValN(Benchmark::Unknown, "analysis", "Analysis"))); 84 85 static cl::opt<Benchmark::ResultAggregationModeE> ResultAggMode( 86 "result-aggregation-mode", cl::desc("How to aggregate multi-values result"), 87 cl::cat(BenchmarkOptions), 88 cl::values(clEnumValN(Benchmark::Min, "min", "Keep min reading"), 89 clEnumValN(Benchmark::Max, "max", "Keep max reading"), 90 clEnumValN(Benchmark::Mean, "mean", 91 "Compute mean of all readings"), 92 clEnumValN(Benchmark::MinVariance, "min-variance", 93 "Keep readings set with min-variance")), 94 cl::init(Benchmark::Min)); 95 96 static cl::opt<Benchmark::RepetitionModeE> RepetitionMode( 97 "repetition-mode", cl::desc("how to repeat the instruction snippet"), 98 cl::cat(BenchmarkOptions), 99 cl::values( 100 clEnumValN(Benchmark::Duplicate, "duplicate", "Duplicate the snippet"), 101 clEnumValN(Benchmark::Loop, "loop", "Loop over the snippet"), 102 clEnumValN(Benchmark::AggregateMin, "min", 103 "All of the above and take the minimum of measurements"), 104 clEnumValN(Benchmark::MiddleHalfDuplicate, "middle-half-duplicate", 105 "Middle half duplicate mode"), 106 clEnumValN(Benchmark::MiddleHalfLoop, "middle-half-loop", 107 "Middle half loop mode")), 108 cl::init(Benchmark::Duplicate)); 109 110 static cl::opt<bool> BenchmarkMeasurementsPrintProgress( 111 "measurements-print-progress", 112 cl::desc("Produce progress indicator when performing measurements"), 113 cl::cat(BenchmarkOptions), cl::init(false)); 114 115 static cl::opt<BenchmarkPhaseSelectorE> BenchmarkPhaseSelector( 116 "benchmark-phase", 117 cl::desc( 118 "it is possible to stop the benchmarking process after some phase"), 119 cl::cat(BenchmarkOptions), 120 cl::values( 121 clEnumValN(BenchmarkPhaseSelectorE::PrepareSnippet, "prepare-snippet", 122 "Only generate the minimal instruction sequence"), 123 clEnumValN(BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet, 124 "prepare-and-assemble-snippet", 125 "Same as prepare-snippet, but also dumps an excerpt of the " 126 "sequence (hex encoded)"), 127 clEnumValN(BenchmarkPhaseSelectorE::AssembleMeasuredCode, 128 "assemble-measured-code", 129 "Same as prepare-and-assemble-snippet, but also creates the " 130 "full sequence " 131 "that can be dumped to a file using --dump-object-to-disk"), 132 clEnumValN( 133 BenchmarkPhaseSelectorE::Measure, "measure", 134 "Same as prepare-measured-code, but also runs the measurement " 135 "(default)")), 136 cl::init(BenchmarkPhaseSelectorE::Measure)); 137 138 static cl::opt<bool> 139 UseDummyPerfCounters("use-dummy-perf-counters", 140 cl::desc("Do not read real performance counters, use " 141 "dummy values (for testing)"), 142 cl::cat(BenchmarkOptions), cl::init(false)); 143 144 static cl::opt<unsigned> 145 MinInstructions("min-instructions", 146 cl::desc("The minimum number of instructions that should " 147 "be included in the snippet"), 148 cl::cat(BenchmarkOptions), cl::init(10000)); 149 150 static cl::opt<unsigned> 151 LoopBodySize("loop-body-size", 152 cl::desc("when repeating the instruction snippet by looping " 153 "over it, duplicate the snippet until the loop body " 154 "contains at least this many instruction"), 155 cl::cat(BenchmarkOptions), cl::init(0)); 156 157 static cl::opt<unsigned> MaxConfigsPerOpcode( 158 "max-configs-per-opcode", 159 cl::desc( 160 "allow to snippet generator to generate at most that many configs"), 161 cl::cat(BenchmarkOptions), cl::init(1)); 162 163 static cl::opt<bool> IgnoreInvalidSchedClass( 164 "ignore-invalid-sched-class", 165 cl::desc("ignore instructions that do not define a sched class"), 166 cl::cat(BenchmarkOptions), cl::init(false)); 167 168 static cl::opt<BenchmarkFilter> AnalysisSnippetFilter( 169 "analysis-filter", cl::desc("Filter the benchmarks before analysing them"), 170 cl::cat(BenchmarkOptions), 171 cl::values( 172 clEnumValN(BenchmarkFilter::All, "all", 173 "Keep all benchmarks (default)"), 174 clEnumValN(BenchmarkFilter::RegOnly, "reg-only", 175 "Keep only those benchmarks that do *NOT* involve memory"), 176 clEnumValN(BenchmarkFilter::WithMem, "mem-only", 177 "Keep only the benchmarks that *DO* involve memory")), 178 cl::init(BenchmarkFilter::All)); 179 180 static cl::opt<BenchmarkClustering::ModeE> AnalysisClusteringAlgorithm( 181 "analysis-clustering", cl::desc("the clustering algorithm to use"), 182 cl::cat(AnalysisOptions), 183 cl::values(clEnumValN(BenchmarkClustering::Dbscan, "dbscan", 184 "use DBSCAN/OPTICS algorithm"), 185 clEnumValN(BenchmarkClustering::Naive, "naive", 186 "one cluster per opcode")), 187 cl::init(BenchmarkClustering::Dbscan)); 188 189 static cl::opt<unsigned> AnalysisDbscanNumPoints( 190 "analysis-numpoints", 191 cl::desc("minimum number of points in an analysis cluster (dbscan only)"), 192 cl::cat(AnalysisOptions), cl::init(3)); 193 194 static cl::opt<float> AnalysisClusteringEpsilon( 195 "analysis-clustering-epsilon", 196 cl::desc("epsilon for benchmark point clustering"), 197 cl::cat(AnalysisOptions), cl::init(0.1)); 198 199 static cl::opt<float> AnalysisInconsistencyEpsilon( 200 "analysis-inconsistency-epsilon", 201 cl::desc("epsilon for detection of when the cluster is different from the " 202 "LLVM schedule profile values"), 203 cl::cat(AnalysisOptions), cl::init(0.1)); 204 205 static cl::opt<std::string> 206 AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""), 207 cl::cat(AnalysisOptions), cl::init("")); 208 static cl::opt<std::string> 209 AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file", 210 cl::desc(""), cl::cat(AnalysisOptions), 211 cl::init("")); 212 213 static cl::opt<bool> AnalysisDisplayUnstableOpcodes( 214 "analysis-display-unstable-clusters", 215 cl::desc("if there is more than one benchmark for an opcode, said " 216 "benchmarks may end up not being clustered into the same cluster " 217 "if the measured performance characteristics are different. by " 218 "default all such opcodes are filtered out. this flag will " 219 "instead show only such unstable opcodes"), 220 cl::cat(AnalysisOptions), cl::init(false)); 221 222 static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu( 223 "analysis-override-benchmark-triple-and-cpu", 224 cl::desc("By default, we analyze the benchmarks for the triple/CPU they " 225 "were measured for, but if you want to analyze them for some " 226 "other combination (specified via -mtriple/-mcpu), you can " 227 "pass this flag."), 228 cl::cat(AnalysisOptions), cl::init(false)); 229 230 static cl::opt<std::string> 231 TripleName("mtriple", 232 cl::desc("Target triple. See -version for available targets"), 233 cl::cat(Options)); 234 235 static cl::opt<std::string> 236 MCPU("mcpu", 237 cl::desc("Target a specific cpu type (-mcpu=help for details)"), 238 cl::value_desc("cpu-name"), cl::cat(Options), cl::init("native")); 239 240 static cl::opt<std::string> 241 DumpObjectToDisk("dump-object-to-disk", 242 cl::desc("dumps the generated benchmark object to disk " 243 "and prints a message to access it"), 244 cl::ValueOptional, cl::cat(BenchmarkOptions)); 245 246 static cl::opt<BenchmarkRunner::ExecutionModeE> ExecutionMode( 247 "execution-mode", 248 cl::desc("Selects the execution mode to use for running snippets"), 249 cl::cat(BenchmarkOptions), 250 cl::values(clEnumValN(BenchmarkRunner::ExecutionModeE::InProcess, 251 "inprocess", 252 "Executes the snippets within the same process"), 253 clEnumValN(BenchmarkRunner::ExecutionModeE::SubProcess, 254 "subprocess", 255 "Spawns a subprocess for each snippet execution, " 256 "allows for the use of memory annotations")), 257 cl::init(BenchmarkRunner::ExecutionModeE::InProcess)); 258 259 static cl::opt<unsigned> BenchmarkRepeatCount( 260 "benchmark-repeat-count", 261 cl::desc("The number of times to repeat measurements on the benchmark k " 262 "before aggregating the results"), 263 cl::cat(BenchmarkOptions), cl::init(30)); 264 265 static cl::list<ValidationEvent> ValidationCounters( 266 "validation-counter", 267 cl::desc( 268 "The name of a validation counter to run concurrently with the main " 269 "counter to validate benchmarking assumptions"), 270 cl::CommaSeparated, cl::cat(BenchmarkOptions), ValidationEventOptions()); 271 272 static cl::opt<int> BenchmarkProcessCPU( 273 "benchmark-process-cpu", 274 cl::desc("The CPU number that the benchmarking process should executon on"), 275 cl::cat(BenchmarkOptions), cl::init(-1)); 276 277 static cl::opt<std::string> MAttr( 278 "mattr", cl::desc("comma-separated list of target architecture features"), 279 cl::value_desc("+feature1,-feature2,..."), cl::cat(Options), cl::init("")); 280 281 static ExitOnError ExitOnErr("llvm-exegesis error: "); 282 283 // Helper function that logs the error(s) and exits. 284 template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) { 285 ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...)); 286 } 287 288 // Check Err. If it's in a failure state log the file error(s) and exit. 289 static void ExitOnFileError(const Twine &FileName, Error Err) { 290 if (Err) { 291 ExitOnErr(createFileError(FileName, std::move(Err))); 292 } 293 } 294 295 // Check E. If it's in a success state then return the contained value. 296 // If it's in a failure state log the file error(s) and exit. 297 template <typename T> 298 T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { 299 ExitOnFileError(FileName, E.takeError()); 300 return std::move(*E); 301 } 302 303 static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, 304 unsigned Opcode) { 305 const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description; 306 if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) 307 return "Unsupported opcode: isPseudo/usesCustomInserter"; 308 if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) 309 return "Unsupported opcode: isBranch/isIndirectBranch"; 310 if (InstrDesc.isCall() || InstrDesc.isReturn()) 311 return "Unsupported opcode: isCall/isReturn"; 312 return nullptr; 313 } 314 315 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, 316 // and returns the opcode indices or {} if snippets should be read from 317 // `SnippetsFile`. 318 static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { 319 const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) + 320 (OpcodeIndex == 0 ? 0 : 1) + 321 (SnippetsFile.empty() ? 0 : 1); 322 const auto &ET = State.getExegesisTarget(); 323 const auto AvailableFeatures = State.getSubtargetInfo().getFeatureBits(); 324 325 if (NumSetFlags != 1) { 326 ExitOnErr.setBanner("llvm-exegesis: "); 327 ExitWithError("please provide one and only one of 'opcode-index', " 328 "'opcode-name' or 'snippets-file'"); 329 } 330 if (!SnippetsFile.empty()) 331 return {}; 332 if (OpcodeIndex > 0) 333 return {static_cast<unsigned>(OpcodeIndex)}; 334 if (OpcodeIndex < 0) { 335 std::vector<unsigned> Result; 336 unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes(); 337 Result.reserve(NumOpcodes); 338 for (unsigned I = 0, E = NumOpcodes; I < E; ++I) { 339 if (!ET.isOpcodeAvailable(I, AvailableFeatures)) 340 continue; 341 Result.push_back(I); 342 } 343 return Result; 344 } 345 // Resolve opcode name -> opcode. 346 const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned { 347 const auto &Map = State.getOpcodeNameToOpcodeIdxMapping(); 348 auto I = Map.find(OpcodeName); 349 if (I != Map.end()) 350 return I->getSecond(); 351 return 0u; 352 }; 353 354 SmallVector<StringRef, 2> Pieces; 355 StringRef(OpcodeNames.getValue()) 356 .split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false); 357 std::vector<unsigned> Result; 358 Result.reserve(Pieces.size()); 359 for (const StringRef &OpcodeName : Pieces) { 360 if (unsigned Opcode = ResolveName(OpcodeName)) 361 Result.push_back(Opcode); 362 else 363 ExitWithError(Twine("unknown opcode ").concat(OpcodeName)); 364 } 365 return Result; 366 } 367 368 // Generates code snippets for opcode `Opcode`. 369 static Expected<std::vector<BenchmarkCode>> 370 generateSnippets(const LLVMState &State, unsigned Opcode, 371 const BitVector &ForbiddenRegs) { 372 // Ignore instructions that we cannot run. 373 if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode)) 374 return make_error<Failure>(Reason); 375 376 const Instruction &Instr = State.getIC().getInstr(Opcode); 377 const std::vector<InstructionTemplate> InstructionVariants = 378 State.getExegesisTarget().generateInstructionVariants( 379 Instr, MaxConfigsPerOpcode); 380 381 SnippetGenerator::Options SnippetOptions; 382 SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode; 383 const std::unique_ptr<SnippetGenerator> Generator = 384 State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State, 385 SnippetOptions); 386 if (!Generator) 387 ExitWithError("cannot create snippet generator"); 388 389 std::vector<BenchmarkCode> Benchmarks; 390 for (const InstructionTemplate &Variant : InstructionVariants) { 391 if (Benchmarks.size() >= MaxConfigsPerOpcode) 392 break; 393 if (auto Err = Generator->generateConfigurations(Variant, Benchmarks, 394 ForbiddenRegs)) 395 return std::move(Err); 396 } 397 return Benchmarks; 398 } 399 400 static void runBenchmarkConfigurations( 401 const LLVMState &State, ArrayRef<BenchmarkCode> Configurations, 402 ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors, 403 const BenchmarkRunner &Runner) { 404 assert(!Configurations.empty() && "Don't have any configurations to run."); 405 std::optional<raw_fd_ostream> FileOstr; 406 if (BenchmarkFile != "-") { 407 int ResultFD = 0; 408 // Create output file or open existing file and truncate it, once. 409 ExitOnErr(errorCodeToError(openFileForWrite(BenchmarkFile, ResultFD, 410 sys::fs::CD_CreateAlways, 411 sys::fs::OF_TextWithCRLF))); 412 FileOstr.emplace(ResultFD, true /*shouldClose*/); 413 } 414 raw_ostream &Ostr = FileOstr ? *FileOstr : outs(); 415 416 std::optional<ProgressMeter<>> Meter; 417 if (BenchmarkMeasurementsPrintProgress) 418 Meter.emplace(Configurations.size()); 419 420 SmallVector<unsigned, 2> MinInstructionCounts = {MinInstructions}; 421 if (RepetitionMode == Benchmark::MiddleHalfDuplicate || 422 RepetitionMode == Benchmark::MiddleHalfLoop) 423 MinInstructionCounts.push_back(MinInstructions * 2); 424 425 for (const BenchmarkCode &Conf : Configurations) { 426 ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr); 427 SmallVector<Benchmark, 2> AllResults; 428 429 for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : 430 Repetitors) { 431 for (unsigned IterationRepetitions : MinInstructionCounts) { 432 auto RC = ExitOnErr(Runner.getRunnableConfiguration( 433 Conf, IterationRepetitions, LoopBodySize, *Repetitor)); 434 std::optional<StringRef> DumpFile; 435 if (DumpObjectToDisk.getNumOccurrences()) 436 DumpFile = DumpObjectToDisk; 437 const std::optional<int> BenchmarkCPU = 438 BenchmarkProcessCPU == -1 439 ? std::nullopt 440 : std::optional(BenchmarkProcessCPU.getValue()); 441 auto [Err, BenchmarkResult] = 442 Runner.runConfiguration(std::move(RC), DumpFile, BenchmarkCPU); 443 if (Err) { 444 // Errors from executing the snippets are fine. 445 // All other errors are a framework issue and should fail. 446 if (!Err.isA<SnippetExecutionFailure>()) 447 ExitOnErr(std::move(Err)); 448 449 BenchmarkResult.Error = toString(std::move(Err)); 450 } 451 AllResults.push_back(std::move(BenchmarkResult)); 452 } 453 } 454 455 Benchmark &Result = AllResults.front(); 456 457 // If any of our measurements failed, pretend they all have failed. 458 if (AllResults.size() > 1 && 459 any_of(AllResults, [](const Benchmark &R) { 460 return R.Measurements.empty(); 461 })) 462 Result.Measurements.clear(); 463 464 std::unique_ptr<ResultAggregator> ResultAgg = 465 ResultAggregator::CreateAggregator(RepetitionMode); 466 ResultAgg->AggregateResults(Result, 467 ArrayRef<Benchmark>(AllResults).drop_front()); 468 469 // With dummy counters, measurements are rather meaningless, 470 // so drop them altogether. 471 if (UseDummyPerfCounters) 472 Result.Measurements.clear(); 473 474 ExitOnFileError(BenchmarkFile, Result.writeYamlTo(State, Ostr)); 475 } 476 } 477 478 void benchmarkMain() { 479 if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure && 480 !UseDummyPerfCounters) { 481 #ifndef HAVE_LIBPFM 482 ExitWithError( 483 "benchmarking unavailable, LLVM was built without libpfm. You can " 484 "pass --benchmark-phase=... to skip the actual benchmarking or " 485 "--use-dummy-perf-counters to not query the kernel for real event " 486 "counts."); 487 #else 488 if (pfm::pfmInitialize()) 489 ExitWithError("cannot initialize libpfm"); 490 #endif 491 } 492 493 InitializeAllExegesisTargets(); 494 #define LLVM_EXEGESIS(TargetName) \ 495 LLVMInitialize##TargetName##AsmPrinter(); \ 496 LLVMInitialize##TargetName##AsmParser(); 497 #include "llvm/Config/TargetExegesis.def" 498 499 const LLVMState State = ExitOnErr( 500 LLVMState::Create(TripleName, MCPU, MAttr, UseDummyPerfCounters)); 501 502 // Preliminary check to ensure features needed for requested 503 // benchmark mode are present on target CPU and/or OS. 504 if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) 505 ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); 506 507 if (ExecutionMode == BenchmarkRunner::ExecutionModeE::SubProcess && 508 UseDummyPerfCounters) 509 ExitWithError("Dummy perf counters are not supported in the subprocess " 510 "execution mode."); 511 512 const std::unique_ptr<BenchmarkRunner> Runner = 513 ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( 514 BenchmarkMode, State, BenchmarkPhaseSelector, ExecutionMode, 515 BenchmarkRepeatCount, ValidationCounters, ResultAggMode)); 516 if (!Runner) { 517 ExitWithError("cannot create benchmark runner"); 518 } 519 520 const auto Opcodes = getOpcodesOrDie(State); 521 std::vector<BenchmarkCode> Configurations; 522 523 MCRegister LoopRegister = 524 State.getExegesisTarget().getDefaultLoopCounterRegister( 525 State.getTargetMachine().getTargetTriple()); 526 527 if (Opcodes.empty()) { 528 Configurations = ExitOnErr(readSnippets(State, SnippetsFile)); 529 for (const auto &Configuration : Configurations) { 530 if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess && 531 (Configuration.Key.MemoryMappings.size() != 0 || 532 Configuration.Key.MemoryValues.size() != 0 || 533 Configuration.Key.SnippetAddress != 0)) 534 ExitWithError("Memory and snippet address annotations are only " 535 "supported in subprocess " 536 "execution mode"); 537 } 538 LoopRegister = Configurations[0].Key.LoopRegister; 539 } 540 541 SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors; 542 if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin) 543 Repetitors.emplace_back( 544 SnippetRepetitor::Create(RepetitionMode, State, LoopRegister)); 545 else { 546 for (Benchmark::RepetitionModeE RepMode : 547 {Benchmark::RepetitionModeE::Duplicate, 548 Benchmark::RepetitionModeE::Loop}) 549 Repetitors.emplace_back( 550 SnippetRepetitor::Create(RepMode, State, LoopRegister)); 551 } 552 553 BitVector AllReservedRegs; 554 for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors) 555 AllReservedRegs |= Repetitor->getReservedRegs(); 556 557 if (!Opcodes.empty()) { 558 for (const unsigned Opcode : Opcodes) { 559 // Ignore instructions without a sched class if 560 // -ignore-invalid-sched-class is passed. 561 if (IgnoreInvalidSchedClass && 562 State.getInstrInfo().get(Opcode).getSchedClass() == 0) { 563 errs() << State.getInstrInfo().getName(Opcode) 564 << ": ignoring instruction without sched class\n"; 565 continue; 566 } 567 568 auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs); 569 if (!ConfigsForInstr) { 570 logAllUnhandledErrors( 571 ConfigsForInstr.takeError(), errs(), 572 Twine(State.getInstrInfo().getName(Opcode)).concat(": ")); 573 continue; 574 } 575 std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(), 576 std::back_inserter(Configurations)); 577 } 578 } 579 580 if (MinInstructions == 0) { 581 ExitOnErr.setBanner("llvm-exegesis: "); 582 ExitWithError("--min-instructions must be greater than zero"); 583 } 584 585 // Write to standard output if file is not set. 586 if (BenchmarkFile.empty()) 587 BenchmarkFile = "-"; 588 589 if (!Configurations.empty()) 590 runBenchmarkConfigurations(State, Configurations, Repetitors, *Runner); 591 592 pfm::pfmTerminate(); 593 } 594 595 // Prints the results of running analysis pass `Pass` to file `OutputFilename` 596 // if OutputFilename is non-empty. 597 template <typename Pass> 598 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name, 599 const std::string &OutputFilename) { 600 if (OutputFilename.empty()) 601 return; 602 if (OutputFilename != "-") { 603 errs() << "Printing " << Name << " results to file '" << OutputFilename 604 << "'\n"; 605 } 606 std::error_code ErrorCode; 607 raw_fd_ostream ClustersOS(OutputFilename, ErrorCode, 608 sys::fs::FA_Read | sys::fs::FA_Write); 609 if (ErrorCode) 610 ExitOnFileError(OutputFilename, errorCodeToError(ErrorCode)); 611 if (auto Err = Analyzer.run<Pass>(ClustersOS)) 612 ExitOnFileError(OutputFilename, std::move(Err)); 613 } 614 615 static void filterPoints(MutableArrayRef<Benchmark> Points, 616 const MCInstrInfo &MCII) { 617 if (AnalysisSnippetFilter == BenchmarkFilter::All) 618 return; 619 620 bool WantPointsWithMemOps = AnalysisSnippetFilter == BenchmarkFilter::WithMem; 621 for (Benchmark &Point : Points) { 622 if (!Point.Error.empty()) 623 continue; 624 if (WantPointsWithMemOps == 625 any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) { 626 const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode()); 627 return MCDesc.mayLoad() || MCDesc.mayStore(); 628 })) 629 continue; 630 Point.Error = "filtered out by user"; 631 } 632 } 633 634 static void analysisMain() { 635 ExitOnErr.setBanner("llvm-exegesis: "); 636 if (BenchmarkFile.empty()) 637 ExitWithError("--benchmarks-file must be set"); 638 639 if (AnalysisClustersOutputFile.empty() && 640 AnalysisInconsistenciesOutputFile.empty()) { 641 ExitWithError( 642 "for --mode=analysis: At least one of --analysis-clusters-output-file " 643 "and --analysis-inconsistencies-output-file must be specified"); 644 } 645 646 InitializeAllExegesisTargets(); 647 #define LLVM_EXEGESIS(TargetName) \ 648 LLVMInitialize##TargetName##AsmPrinter(); \ 649 LLVMInitialize##TargetName##Disassembler(); 650 #include "llvm/Config/TargetExegesis.def" 651 652 auto MemoryBuffer = ExitOnFileError( 653 BenchmarkFile, 654 errorOrToExpected(MemoryBuffer::getFile(BenchmarkFile, /*IsText=*/true))); 655 656 const auto TriplesAndCpus = ExitOnFileError( 657 BenchmarkFile, 658 Benchmark::readTriplesAndCpusFromYamls(*MemoryBuffer)); 659 if (TriplesAndCpus.empty()) { 660 errs() << "no benchmarks to analyze\n"; 661 return; 662 } 663 if (TriplesAndCpus.size() > 1) { 664 ExitWithError("analysis file contains benchmarks from several CPUs. This " 665 "is unsupported."); 666 } 667 auto TripleAndCpu = *TriplesAndCpus.begin(); 668 if (AnalysisOverrideBenchmarksTripleAndCpu) { 669 errs() << "overridding file CPU name (" << TripleAndCpu.CpuName 670 << ") with provided tripled (" << TripleName << ") and CPU name (" 671 << MCPU << ")\n"; 672 TripleAndCpu.LLVMTriple = TripleName; 673 TripleAndCpu.CpuName = MCPU; 674 } 675 errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" 676 << TripleAndCpu.CpuName << "'\n"; 677 678 // Read benchmarks. 679 const LLVMState State = ExitOnErr( 680 LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName)); 681 std::vector<Benchmark> Points = ExitOnFileError( 682 BenchmarkFile, Benchmark::readYamls(State, *MemoryBuffer)); 683 684 outs() << "Parsed " << Points.size() << " benchmark points\n"; 685 if (Points.empty()) { 686 errs() << "no benchmarks to analyze\n"; 687 return; 688 } 689 // FIXME: Merge points from several runs (latency and uops). 690 691 filterPoints(Points, State.getInstrInfo()); 692 693 const auto Clustering = ExitOnErr(BenchmarkClustering::create( 694 Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints, 695 AnalysisClusteringEpsilon, &State.getSubtargetInfo(), 696 &State.getInstrInfo())); 697 698 const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon, 699 AnalysisDisplayUnstableOpcodes); 700 701 maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters", 702 AnalysisClustersOutputFile); 703 maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>( 704 Analyzer, "sched class consistency analysis", 705 AnalysisInconsistenciesOutputFile); 706 } 707 708 } // namespace exegesis 709 } // namespace llvm 710 711 int main(int Argc, char **Argv) { 712 using namespace llvm; 713 714 InitLLVM X(Argc, Argv); 715 716 // Initialize targets so we can print them when flag --version is specified. 717 #define LLVM_EXEGESIS(TargetName) \ 718 LLVMInitialize##TargetName##Target(); \ 719 LLVMInitialize##TargetName##TargetInfo(); \ 720 LLVMInitialize##TargetName##TargetMC(); 721 #include "llvm/Config/TargetExegesis.def" 722 723 // Register the Target and CPU printer for --version. 724 cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU); 725 726 // Enable printing of available targets when flag --version is specified. 727 cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); 728 729 cl::HideUnrelatedOptions({&exegesis::Options, &exegesis::BenchmarkOptions, 730 &exegesis::AnalysisOptions}); 731 732 cl::ParseCommandLineOptions(Argc, Argv, 733 "llvm host machine instruction characteristics " 734 "measurment and analysis.\n"); 735 736 exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) { 737 if (Err.isA<exegesis::ClusteringError>()) 738 return EXIT_SUCCESS; 739 return EXIT_FAILURE; 740 }); 741 742 if (exegesis::BenchmarkMode == exegesis::Benchmark::Unknown) { 743 exegesis::analysisMain(); 744 } else { 745 exegesis::benchmarkMain(); 746 } 747 return EXIT_SUCCESS; 748 } 749