xref: /openbsd-src/gnu/llvm/llvm/tools/llvm-exegesis/llvm-exegesis.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1 //===-- llvm-exegesis.cpp ---------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Measures execution properties (latencies/uops) of an instruction.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "lib/Analysis.h"
15 #include "lib/BenchmarkResult.h"
16 #include "lib/BenchmarkRunner.h"
17 #include "lib/Clustering.h"
18 #include "lib/Error.h"
19 #include "lib/LlvmState.h"
20 #include "lib/PerfHelper.h"
21 #include "lib/ProgressMeter.h"
22 #include "lib/SnippetFile.h"
23 #include "lib/SnippetRepetitor.h"
24 #include "lib/Target.h"
25 #include "lib/TargetSelect.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/MC/MCInstBuilder.h"
29 #include "llvm/MC/MCObjectFileInfo.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Object/ObjectFile.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/Host.h"
40 #include "llvm/Support/InitLLVM.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/SourceMgr.h"
43 #include "llvm/Support/TargetSelect.h"
44 #include <algorithm>
45 #include <string>
46 
47 namespace llvm {
48 namespace exegesis {
49 
50 static cl::opt<int> OpcodeIndex(
51     "opcode-index",
52     cl::desc("opcode to measure, by index, or -1 to measure all opcodes"),
53     cl::cat(BenchmarkOptions), cl::init(0));
54 
55 static cl::opt<std::string>
56     OpcodeNames("opcode-name",
57                 cl::desc("comma-separated list of opcodes to measure, by name"),
58                 cl::cat(BenchmarkOptions), cl::init(""));
59 
60 static cl::opt<std::string> SnippetsFile("snippets-file",
61                                          cl::desc("code snippets to measure"),
62                                          cl::cat(BenchmarkOptions),
63                                          cl::init(""));
64 
65 static cl::opt<std::string>
66     BenchmarkFile("benchmarks-file",
67                   cl::desc("File to read (analysis mode) or write "
68                            "(latency/uops/inverse_throughput modes) benchmark "
69                            "results. “-” uses stdin/stdout."),
70                   cl::cat(Options), cl::init(""));
71 
72 static cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
73     "mode", cl::desc("the mode to run"), cl::cat(Options),
74     cl::values(clEnumValN(exegesis::InstructionBenchmark::Latency, "latency",
75                           "Instruction Latency"),
76                clEnumValN(exegesis::InstructionBenchmark::InverseThroughput,
77                           "inverse_throughput",
78                           "Instruction Inverse Throughput"),
79                clEnumValN(exegesis::InstructionBenchmark::Uops, "uops",
80                           "Uop Decomposition"),
81                // When not asking for a specific benchmark mode,
82                // we'll analyse the results.
83                clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis",
84                           "Analysis")));
85 
86 static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
87     ResultAggMode(
88         "result-aggregation-mode",
89         cl::desc("How to aggregate multi-values result"),
90         cl::cat(BenchmarkOptions),
91         cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
92                               "Keep min reading"),
93                    clEnumValN(exegesis::InstructionBenchmark::Max, "max",
94                               "Keep max reading"),
95                    clEnumValN(exegesis::InstructionBenchmark::Mean, "mean",
96                               "Compute mean of all readings"),
97                    clEnumValN(exegesis::InstructionBenchmark::MinVariance,
98                               "min-variance",
99                               "Keep readings set with min-variance")),
100         cl::init(exegesis::InstructionBenchmark::Min));
101 
102 static cl::opt<exegesis::InstructionBenchmark::RepetitionModeE> RepetitionMode(
103     "repetition-mode", cl::desc("how to repeat the instruction snippet"),
104     cl::cat(BenchmarkOptions),
105     cl::values(
106         clEnumValN(exegesis::InstructionBenchmark::Duplicate, "duplicate",
107                    "Duplicate the snippet"),
108         clEnumValN(exegesis::InstructionBenchmark::Loop, "loop",
109                    "Loop over the snippet"),
110         clEnumValN(exegesis::InstructionBenchmark::AggregateMin, "min",
111                    "All of the above and take the minimum of measurements")),
112     cl::init(exegesis::InstructionBenchmark::Duplicate));
113 
114 static cl::opt<bool> BenchmarkMeasurementsPrintProgress(
115     "measurements-print-progress",
116     cl::desc("Produce progress indicator when performing measurements"),
117     cl::cat(BenchmarkOptions), cl::init(false));
118 
119 static cl::opt<exegesis::BenchmarkPhaseSelectorE> BenchmarkPhaseSelector(
120     "benchmark-phase",
121     cl::desc(
122         "it is possible to stop the benchmarking process after some phase"),
123     cl::cat(BenchmarkOptions),
124     cl::values(
125         clEnumValN(exegesis::BenchmarkPhaseSelectorE::PrepareSnippet,
126                    "prepare-snippet",
127                    "Only generate the minimal instruction sequence"),
128         clEnumValN(exegesis::BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet,
129                    "prepare-and-assemble-snippet",
130                    "Same as prepare-snippet, but also dumps an excerpt of the "
131                    "sequence (hex encoded)"),
132         clEnumValN(exegesis::BenchmarkPhaseSelectorE::AssembleMeasuredCode,
133                    "assemble-measured-code",
134                    "Same as prepare-and-assemble-snippet, but also creates the "
135                    "full sequence "
136                    "that can be dumped to a file using --dump-object-to-disk"),
137         clEnumValN(
138             exegesis::BenchmarkPhaseSelectorE::Measure, "measure",
139             "Same as prepare-measured-code, but also runs the measurement "
140             "(default)")),
141     cl::init(exegesis::BenchmarkPhaseSelectorE::Measure));
142 
143 static cl::opt<unsigned>
144     NumRepetitions("num-repetitions",
145                    cl::desc("number of time to repeat the asm snippet"),
146                    cl::cat(BenchmarkOptions), cl::init(10000));
147 
148 static cl::opt<unsigned>
149     LoopBodySize("loop-body-size",
150                  cl::desc("when repeating the instruction snippet by looping "
151                           "over it, duplicate the snippet until the loop body "
152                           "contains at least this many instruction"),
153                  cl::cat(BenchmarkOptions), cl::init(0));
154 
155 static cl::opt<unsigned> MaxConfigsPerOpcode(
156     "max-configs-per-opcode",
157     cl::desc(
158         "allow to snippet generator to generate at most that many configs"),
159     cl::cat(BenchmarkOptions), cl::init(1));
160 
161 static cl::opt<bool> IgnoreInvalidSchedClass(
162     "ignore-invalid-sched-class",
163     cl::desc("ignore instructions that do not define a sched class"),
164     cl::cat(BenchmarkOptions), cl::init(false));
165 
166 static cl::opt<exegesis::InstructionBenchmarkFilter> AnalysisSnippetFilter(
167     "analysis-filter", cl::desc("Filter the benchmarks before analysing them"),
168     cl::cat(BenchmarkOptions),
169     cl::values(
170         clEnumValN(exegesis::InstructionBenchmarkFilter::All, "all",
171                    "Keep all benchmarks (default)"),
172         clEnumValN(exegesis::InstructionBenchmarkFilter::RegOnly, "reg-only",
173                    "Keep only those benchmarks that do *NOT* involve memory"),
174         clEnumValN(exegesis::InstructionBenchmarkFilter::WithMem, "mem-only",
175                    "Keep only the benchmarks that *DO* involve memory")),
176     cl::init(exegesis::InstructionBenchmarkFilter::All));
177 
178 static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
179     AnalysisClusteringAlgorithm(
180         "analysis-clustering", cl::desc("the clustering algorithm to use"),
181         cl::cat(AnalysisOptions),
182         cl::values(clEnumValN(exegesis::InstructionBenchmarkClustering::Dbscan,
183                               "dbscan", "use DBSCAN/OPTICS algorithm"),
184                    clEnumValN(exegesis::InstructionBenchmarkClustering::Naive,
185                               "naive", "one cluster per opcode")),
186         cl::init(exegesis::InstructionBenchmarkClustering::Dbscan));
187 
188 static cl::opt<unsigned> AnalysisDbscanNumPoints(
189     "analysis-numpoints",
190     cl::desc("minimum number of points in an analysis cluster (dbscan only)"),
191     cl::cat(AnalysisOptions), cl::init(3));
192 
193 static cl::opt<float> AnalysisClusteringEpsilon(
194     "analysis-clustering-epsilon",
195     cl::desc("epsilon for benchmark point clustering"),
196     cl::cat(AnalysisOptions), cl::init(0.1));
197 
198 static cl::opt<float> AnalysisInconsistencyEpsilon(
199     "analysis-inconsistency-epsilon",
200     cl::desc("epsilon for detection of when the cluster is different from the "
201              "LLVM schedule profile values"),
202     cl::cat(AnalysisOptions), cl::init(0.1));
203 
204 static cl::opt<std::string>
205     AnalysisClustersOutputFile("analysis-clusters-output-file", cl::desc(""),
206                                cl::cat(AnalysisOptions), cl::init(""));
207 static cl::opt<std::string>
208     AnalysisInconsistenciesOutputFile("analysis-inconsistencies-output-file",
209                                       cl::desc(""), cl::cat(AnalysisOptions),
210                                       cl::init(""));
211 
212 static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
213     "analysis-display-unstable-clusters",
214     cl::desc("if there is more than one benchmark for an opcode, said "
215              "benchmarks may end up not being clustered into the same cluster "
216              "if the measured performance characteristics are different. by "
217              "default all such opcodes are filtered out. this flag will "
218              "instead show only such unstable opcodes"),
219     cl::cat(AnalysisOptions), cl::init(false));
220 
221 static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
222     "analysis-override-benchmark-triple-and-cpu",
223     cl::desc("By default, we analyze the benchmarks for the triple/CPU they "
224              "were measured for, but if you want to analyze them for some "
225              "other combination (specified via -mtriple/-mcpu), you can "
226              "pass this flag."),
227     cl::cat(AnalysisOptions), cl::init(false));
228 
229 static cl::opt<std::string>
230     TripleName("mtriple",
231                cl::desc("Target triple. See -version for available targets"),
232                cl::cat(Options));
233 
234 static cl::opt<std::string>
235     MCPU("mcpu",
236          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
237          cl::value_desc("cpu-name"), cl::cat(Options), cl::init("native"));
238 
239 static cl::opt<bool> DumpObjectToDisk(
240     "dump-object-to-disk",
241     cl::desc("dumps the generated benchmark object to disk "
242              "and prints a message to access it (default = false)"),
243     cl::cat(BenchmarkOptions), cl::init(false));
244 
245 static ExitOnError ExitOnErr("llvm-exegesis error: ");
246 
247 // Helper function that logs the error(s) and exits.
ExitWithError(ArgTs &&...Args)248 template <typename... ArgTs> static void ExitWithError(ArgTs &&... Args) {
249   ExitOnErr(make_error<Failure>(std::forward<ArgTs>(Args)...));
250 }
251 
252 // Check Err. If it's in a failure state log the file error(s) and exit.
ExitOnFileError(const Twine & FileName,Error Err)253 static void ExitOnFileError(const Twine &FileName, Error Err) {
254   if (Err) {
255     ExitOnErr(createFileError(FileName, std::move(Err)));
256   }
257 }
258 
259 // Check E. If it's in a success state then return the contained value.
260 // If it's in a failure state log the file error(s) and exit.
261 template <typename T>
ExitOnFileError(const Twine & FileName,Expected<T> && E)262 T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
263   ExitOnFileError(FileName, E.takeError());
264   return std::move(*E);
265 }
266 
267 // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
268 // and returns the opcode indices or {} if snippets should be read from
269 // `SnippetsFile`.
getOpcodesOrDie(const LLVMState & State)270 static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
271   const size_t NumSetFlags = (OpcodeNames.empty() ? 0 : 1) +
272                              (OpcodeIndex == 0 ? 0 : 1) +
273                              (SnippetsFile.empty() ? 0 : 1);
274   if (NumSetFlags != 1) {
275     ExitOnErr.setBanner("llvm-exegesis: ");
276     ExitWithError("please provide one and only one of 'opcode-index', "
277                   "'opcode-name' or 'snippets-file'");
278   }
279   if (!SnippetsFile.empty())
280     return {};
281   if (OpcodeIndex > 0)
282     return {static_cast<unsigned>(OpcodeIndex)};
283   if (OpcodeIndex < 0) {
284     std::vector<unsigned> Result;
285     unsigned NumOpcodes = State.getInstrInfo().getNumOpcodes();
286     Result.reserve(NumOpcodes);
287     for (unsigned I = 0, E = NumOpcodes; I < E; ++I)
288       Result.push_back(I);
289     return Result;
290   }
291   // Resolve opcode name -> opcode.
292   const auto ResolveName = [&State](StringRef OpcodeName) -> unsigned {
293     const auto &Map = State.getOpcodeNameToOpcodeIdxMapping();
294     auto I = Map.find(OpcodeName);
295     if (I != Map.end())
296       return I->getSecond();
297     return 0u;
298   };
299   SmallVector<StringRef, 2> Pieces;
300   StringRef(OpcodeNames.getValue())
301       .split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
302   std::vector<unsigned> Result;
303   Result.reserve(Pieces.size());
304   for (const StringRef &OpcodeName : Pieces) {
305     if (unsigned Opcode = ResolveName(OpcodeName))
306       Result.push_back(Opcode);
307     else
308       ExitWithError(Twine("unknown opcode ").concat(OpcodeName));
309   }
310   return Result;
311 }
312 
313 // Generates code snippets for opcode `Opcode`.
314 static Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState & State,unsigned Opcode,const BitVector & ForbiddenRegs)315 generateSnippets(const LLVMState &State, unsigned Opcode,
316                  const BitVector &ForbiddenRegs) {
317   const Instruction &Instr = State.getIC().getInstr(Opcode);
318   const MCInstrDesc &InstrDesc = Instr.Description;
319   // Ignore instructions that we cannot run.
320   if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
321     return make_error<Failure>(
322         "Unsupported opcode: isPseudo/usesCustomInserter");
323   if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
324     return make_error<Failure>("Unsupported opcode: isBranch/isIndirectBranch");
325   if (InstrDesc.isCall() || InstrDesc.isReturn())
326     return make_error<Failure>("Unsupported opcode: isCall/isReturn");
327 
328   const std::vector<InstructionTemplate> InstructionVariants =
329       State.getExegesisTarget().generateInstructionVariants(
330           Instr, MaxConfigsPerOpcode);
331 
332   SnippetGenerator::Options SnippetOptions;
333   SnippetOptions.MaxConfigsPerOpcode = MaxConfigsPerOpcode;
334   const std::unique_ptr<SnippetGenerator> Generator =
335       State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State,
336                                                        SnippetOptions);
337   if (!Generator)
338     ExitWithError("cannot create snippet generator");
339 
340   std::vector<BenchmarkCode> Benchmarks;
341   for (const InstructionTemplate &Variant : InstructionVariants) {
342     if (Benchmarks.size() >= MaxConfigsPerOpcode)
343       break;
344     if (auto Err = Generator->generateConfigurations(Variant, Benchmarks,
345                                                      ForbiddenRegs))
346       return std::move(Err);
347   }
348   return Benchmarks;
349 }
350 
runBenchmarkConfigurations(const LLVMState & State,ArrayRef<BenchmarkCode> Configurations,ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,const BenchmarkRunner & Runner)351 static void runBenchmarkConfigurations(
352     const LLVMState &State, ArrayRef<BenchmarkCode> Configurations,
353     ArrayRef<std::unique_ptr<const SnippetRepetitor>> Repetitors,
354     const BenchmarkRunner &Runner) {
355   assert(!Configurations.empty() && "Don't have any configurations to run.");
356   std::optional<raw_fd_ostream> FileOstr;
357   if (BenchmarkFile != "-") {
358     int ResultFD = 0;
359     // Create output file or open existing file and truncate it, once.
360     ExitOnErr(errorCodeToError(openFileForWrite(BenchmarkFile, ResultFD,
361                                                 sys::fs::CD_CreateAlways,
362                                                 sys::fs::OF_TextWithCRLF)));
363     FileOstr.emplace(ResultFD, true /*shouldClose*/);
364   }
365   raw_ostream &Ostr = FileOstr ? *FileOstr : outs();
366 
367   std::optional<ProgressMeter<>> Meter;
368   if (BenchmarkMeasurementsPrintProgress)
369     Meter.emplace(Configurations.size());
370   for (const BenchmarkCode &Conf : Configurations) {
371     ProgressMeter<>::ProgressMeterStep MeterStep(Meter ? &*Meter : nullptr);
372     SmallVector<InstructionBenchmark, 2> AllResults;
373 
374     for (const std::unique_ptr<const SnippetRepetitor> &Repetitor :
375          Repetitors) {
376       auto RC = ExitOnErr(Runner.getRunnableConfiguration(
377           Conf, NumRepetitions, LoopBodySize, *Repetitor));
378       AllResults.emplace_back(
379           ExitOnErr(Runner.runConfiguration(std::move(RC), DumpObjectToDisk)));
380     }
381     InstructionBenchmark &Result = AllResults.front();
382 
383     // If any of our measurements failed, pretend they all have failed.
384     if (AllResults.size() > 1 &&
385         any_of(AllResults, [](const InstructionBenchmark &R) {
386           return R.Measurements.empty();
387         }))
388       Result.Measurements.clear();
389 
390     if (RepetitionMode == InstructionBenchmark::RepetitionModeE::AggregateMin) {
391       for (const InstructionBenchmark &OtherResult :
392            ArrayRef<InstructionBenchmark>(AllResults).drop_front()) {
393         llvm::append_range(Result.AssembledSnippet,
394                            OtherResult.AssembledSnippet);
395         // Aggregate measurements, but only iff all measurements succeeded.
396         if (Result.Measurements.empty())
397           continue;
398         assert(OtherResult.Measurements.size() == Result.Measurements.size() &&
399                "Expected to have identical number of measurements.");
400         for (auto I : zip(Result.Measurements, OtherResult.Measurements)) {
401           BenchmarkMeasure &Measurement = std::get<0>(I);
402           const BenchmarkMeasure &NewMeasurement = std::get<1>(I);
403           assert(Measurement.Key == NewMeasurement.Key &&
404                  "Expected measurements to be symmetric");
405 
406           Measurement.PerInstructionValue =
407               std::min(Measurement.PerInstructionValue,
408                        NewMeasurement.PerInstructionValue);
409           Measurement.PerSnippetValue = std::min(
410               Measurement.PerSnippetValue, NewMeasurement.PerSnippetValue);
411         }
412       }
413     }
414 
415     ExitOnFileError(BenchmarkFile, Result.writeYamlTo(State, Ostr));
416   }
417 }
418 
benchmarkMain()419 void benchmarkMain() {
420   if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure) {
421 #ifndef HAVE_LIBPFM
422     ExitWithError(
423         "benchmarking unavailable, LLVM was built without libpfm. You can pass "
424         "--skip-measurements to skip the actual benchmarking.");
425 #else
426     if (exegesis::pfm::pfmInitialize())
427       ExitWithError("cannot initialize libpfm");
428 #endif
429   }
430 
431   InitializeAllAsmPrinters();
432   InitializeAllAsmParsers();
433   InitializeAllExegesisTargets();
434 
435   const LLVMState State = ExitOnErr(LLVMState::Create(TripleName, MCPU));
436 
437   // Preliminary check to ensure features needed for requested
438   // benchmark mode are present on target CPU and/or OS.
439   if (BenchmarkPhaseSelector == BenchmarkPhaseSelectorE::Measure)
440     ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
441 
442   const std::unique_ptr<BenchmarkRunner> Runner =
443       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
444           BenchmarkMode, State, BenchmarkPhaseSelector, ResultAggMode));
445   if (!Runner) {
446     ExitWithError("cannot create benchmark runner");
447   }
448 
449   const auto Opcodes = getOpcodesOrDie(State);
450 
451   SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
452   if (RepetitionMode != InstructionBenchmark::RepetitionModeE::AggregateMin)
453     Repetitors.emplace_back(SnippetRepetitor::Create(RepetitionMode, State));
454   else {
455     for (InstructionBenchmark::RepetitionModeE RepMode :
456          {InstructionBenchmark::RepetitionModeE::Duplicate,
457           InstructionBenchmark::RepetitionModeE::Loop})
458       Repetitors.emplace_back(SnippetRepetitor::Create(RepMode, State));
459   }
460 
461   BitVector AllReservedRegs;
462   llvm::for_each(Repetitors,
463                  [&AllReservedRegs](
464                      const std::unique_ptr<const SnippetRepetitor> &Repetitor) {
465                    AllReservedRegs |= Repetitor->getReservedRegs();
466                  });
467 
468   std::vector<BenchmarkCode> Configurations;
469   if (!Opcodes.empty()) {
470     for (const unsigned Opcode : Opcodes) {
471       // Ignore instructions without a sched class if
472       // -ignore-invalid-sched-class is passed.
473       if (IgnoreInvalidSchedClass &&
474           State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
475         errs() << State.getInstrInfo().getName(Opcode)
476                << ": ignoring instruction without sched class\n";
477         continue;
478       }
479 
480       auto ConfigsForInstr = generateSnippets(State, Opcode, AllReservedRegs);
481       if (!ConfigsForInstr) {
482         logAllUnhandledErrors(
483             ConfigsForInstr.takeError(), errs(),
484             Twine(State.getInstrInfo().getName(Opcode)).concat(": "));
485         continue;
486       }
487       std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
488                 std::back_inserter(Configurations));
489     }
490   } else {
491     Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
492   }
493 
494   if (NumRepetitions == 0) {
495     ExitOnErr.setBanner("llvm-exegesis: ");
496     ExitWithError("--num-repetitions must be greater than zero");
497   }
498 
499   // Write to standard output if file is not set.
500   if (BenchmarkFile.empty())
501     BenchmarkFile = "-";
502 
503   if (!Configurations.empty())
504     runBenchmarkConfigurations(State, Configurations, Repetitors, *Runner);
505 
506   exegesis::pfm::pfmTerminate();
507 }
508 
509 // Prints the results of running analysis pass `Pass` to file `OutputFilename`
510 // if OutputFilename is non-empty.
511 template <typename Pass>
maybeRunAnalysis(const Analysis & Analyzer,const std::string & Name,const std::string & OutputFilename)512 static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
513                              const std::string &OutputFilename) {
514   if (OutputFilename.empty())
515     return;
516   if (OutputFilename != "-") {
517     errs() << "Printing " << Name << " results to file '" << OutputFilename
518            << "'\n";
519   }
520   std::error_code ErrorCode;
521   raw_fd_ostream ClustersOS(OutputFilename, ErrorCode,
522                             sys::fs::FA_Read | sys::fs::FA_Write);
523   if (ErrorCode)
524     ExitOnFileError(OutputFilename, errorCodeToError(ErrorCode));
525   if (auto Err = Analyzer.run<Pass>(ClustersOS))
526     ExitOnFileError(OutputFilename, std::move(Err));
527 }
528 
filterPoints(MutableArrayRef<InstructionBenchmark> Points,const MCInstrInfo & MCII)529 static void filterPoints(MutableArrayRef<InstructionBenchmark> Points,
530                          const MCInstrInfo &MCII) {
531   if (AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::All)
532     return;
533 
534   bool WantPointsWithMemOps =
535       AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::WithMem;
536   for (InstructionBenchmark &Point : Points) {
537     if (!Point.Error.empty())
538       continue;
539     if (WantPointsWithMemOps ==
540         any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) {
541           const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
542           return MCDesc.mayLoad() || MCDesc.mayStore();
543         }))
544       continue;
545     Point.Error = "filtered out by user";
546   }
547 }
548 
analysisMain()549 static void analysisMain() {
550   ExitOnErr.setBanner("llvm-exegesis: ");
551   if (BenchmarkFile.empty())
552     ExitWithError("--benchmarks-file must be set");
553 
554   if (AnalysisClustersOutputFile.empty() &&
555       AnalysisInconsistenciesOutputFile.empty()) {
556     ExitWithError(
557         "for --mode=analysis: At least one of --analysis-clusters-output-file "
558         "and --analysis-inconsistencies-output-file must be specified");
559   }
560 
561   InitializeAllAsmPrinters();
562   InitializeAllDisassemblers();
563   InitializeAllExegesisTargets();
564 
565   auto MemoryBuffer = ExitOnFileError(
566       BenchmarkFile,
567       errorOrToExpected(MemoryBuffer::getFile(BenchmarkFile, /*IsText=*/true)));
568 
569   const auto TriplesAndCpus = ExitOnFileError(
570       BenchmarkFile,
571       InstructionBenchmark::readTriplesAndCpusFromYamls(*MemoryBuffer));
572   if (TriplesAndCpus.empty()) {
573     errs() << "no benchmarks to analyze\n";
574     return;
575   }
576   if (TriplesAndCpus.size() > 1) {
577     ExitWithError("analysis file contains benchmarks from several CPUs. This "
578                   "is unsupported.");
579   }
580   auto TripleAndCpu = *TriplesAndCpus.begin();
581   if (AnalysisOverrideBenchmarksTripleAndCpu) {
582     llvm::errs() << "overridding file CPU name (" << TripleAndCpu.CpuName
583                  << ") with provided tripled (" << TripleName
584                  << ") and CPU name (" << MCPU << ")\n";
585     TripleAndCpu.LLVMTriple = TripleName;
586     TripleAndCpu.CpuName = MCPU;
587   }
588   llvm::errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"
589                << TripleAndCpu.CpuName << "'\n";
590 
591   // Read benchmarks.
592   const LLVMState State = ExitOnErr(
593       LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName));
594   std::vector<InstructionBenchmark> Points = ExitOnFileError(
595       BenchmarkFile, InstructionBenchmark::readYamls(State, *MemoryBuffer));
596 
597   outs() << "Parsed " << Points.size() << " benchmark points\n";
598   if (Points.empty()) {
599     errs() << "no benchmarks to analyze\n";
600     return;
601   }
602   // FIXME: Merge points from several runs (latency and uops).
603 
604   filterPoints(Points, State.getInstrInfo());
605 
606   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
607       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
608       AnalysisClusteringEpsilon, &State.getSubtargetInfo(),
609       &State.getInstrInfo()));
610 
611   const Analysis Analyzer(State, Clustering, AnalysisInconsistencyEpsilon,
612                           AnalysisDisplayUnstableOpcodes);
613 
614   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
615                                             AnalysisClustersOutputFile);
616   maybeRunAnalysis<Analysis::PrintSchedClassInconsistencies>(
617       Analyzer, "sched class consistency analysis",
618       AnalysisInconsistenciesOutputFile);
619 }
620 
621 } // namespace exegesis
622 } // namespace llvm
623 
main(int Argc,char ** Argv)624 int main(int Argc, char **Argv) {
625   using namespace llvm;
626 
627   InitLLVM X(Argc, Argv);
628 
629   // Initialize targets so we can print them when flag --version is specified.
630   InitializeAllTargetInfos();
631   InitializeAllTargets();
632   InitializeAllTargetMCs();
633 
634   // Register the Target and CPU printer for --version.
635   cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU);
636 
637   // Enable printing of available targets when flag --version is specified.
638   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
639 
640   cl::HideUnrelatedOptions({&llvm::exegesis::Options,
641                             &llvm::exegesis::BenchmarkOptions,
642                             &llvm::exegesis::AnalysisOptions});
643 
644   cl::ParseCommandLineOptions(Argc, Argv,
645                               "llvm host machine instruction characteristics "
646                               "measurment and analysis.\n");
647 
648   exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
649     if (Err.isA<exegesis::ClusteringError>())
650       return EXIT_SUCCESS;
651     return EXIT_FAILURE;
652   });
653 
654   if (exegesis::BenchmarkMode == exegesis::InstructionBenchmark::Unknown) {
655     exegesis::analysisMain();
656   } else {
657     exegesis::benchmarkMain();
658   }
659   return EXIT_SUCCESS;
660 }
661