xref: /llvm-project/llvm/tools/llvm-exegesis/lib/Target.h (revision ff1b01bb7897bf2401540096af775d35b12eb247)
1 //===-- Target.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// Classes that handle the creation of target-specific objects. This is
12 /// similar to Target/TargetRegistry.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
17 #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
18 
19 #include "BenchmarkResult.h"
20 #include "BenchmarkRunner.h"
21 #include "Error.h"
22 #include "LlvmState.h"
23 #include "PerfHelper.h"
24 #include "SnippetGenerator.h"
25 #include "ValidationEvent.h"
26 #include "llvm/CodeGen/TargetPassConfig.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/LegacyPassManager.h"
29 #include "llvm/MC/MCInst.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/TargetParser/SubtargetFeature.h"
34 #include "llvm/TargetParser/Triple.h"
35 
36 namespace llvm {
37 namespace exegesis {
38 
39 extern cl::OptionCategory Options;
40 extern cl::OptionCategory BenchmarkOptions;
41 extern cl::OptionCategory AnalysisOptions;
42 
43 struct PfmCountersInfo {
44   // An optional name of a performance counter that can be used to measure
45   // cycles.
46   const char *CycleCounter;
47 
48   // An optional name of a performance counter that can be used to measure
49   // uops.
50   const char *UopsCounter;
51 
52   // An IssueCounter specifies how to measure uops issued to specific proc
53   // resources.
54   struct IssueCounter {
55     const char *Counter;
56     // The name of the ProcResource that this counter measures.
57     const char *ProcResName;
58   };
59   // An optional list of IssueCounters.
60   const IssueCounter *IssueCounters;
61   unsigned NumIssueCounters;
62 
63   const std::pair<ValidationEvent, const char *> *ValidationEvents;
64   unsigned NumValidationEvents;
65 
66   static const PfmCountersInfo Default;
67   static const PfmCountersInfo Dummy;
68 };
69 
70 struct CpuAndPfmCounters {
71   const char *CpuName;
72   const PfmCountersInfo *PCI;
73   bool operator<(StringRef S) const { return StringRef(CpuName) < S; }
74 };
75 
76 class ExegesisTarget {
77 public:
78   typedef bool (*OpcodeAvailabilityChecker)(unsigned, const FeatureBitset &);
79   ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters,
80                  OpcodeAvailabilityChecker IsOpcodeAvailable)
81       : CpuPfmCounters(CpuPfmCounters), IsOpcodeAvailable(IsOpcodeAvailable) {}
82 
83   // Targets can use this to create target-specific perf counters.
84   virtual Expected<std::unique_ptr<pfm::CounterGroup>>
85   createCounter(StringRef CounterName, const LLVMState &State,
86                 ArrayRef<const char *> ValidationCounters,
87                 const pid_t ProcessID = 0) const;
88 
89   // Targets can use this to add target-specific passes in assembleToStream();
90   virtual void addTargetSpecificPasses(PassManagerBase &PM) const {}
91 
92   // Generates code to move a constant into a the given register.
93   // Precondition: Value must fit into Reg.
94   virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI,
95                                        MCRegister Reg,
96                                        const APInt &Value) const = 0;
97 
98   // Generates the code for the lower munmap call. The code generated by this
99   // function may clobber registers.
100   virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const {
101     report_fatal_error(
102         "generateLowerMunmap is not implemented on the current architecture");
103   }
104 
105   // Generates the upper munmap call. The code generated by this function may
106   // clobber registers.
107   virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const {
108     report_fatal_error(
109         "generateUpperMunmap is not implemented on the current architecture");
110   }
111 
112   // Generates the code for an exit syscall. The code generated by this function
113   // may clobber registers.
114   virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const {
115     report_fatal_error(
116         "generateExitSyscall is not implemented on the current architecture");
117   }
118 
119   // Generates the code to mmap a region of code. The code generated by this
120   // function may clobber registers.
121   virtual std::vector<MCInst>
122   generateMmap(uintptr_t Address, size_t Length,
123                uintptr_t FileDescriptorAddress) const {
124     report_fatal_error(
125         "generateMmap is not implemented on the current architecture");
126   }
127 
128   // Generates the mmap code for the aux memory. The code generated by this
129   // function may clobber registers.
130   virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const {
131     report_fatal_error(
132         "generateMmapAuxMem is not implemented on the current architecture\n");
133   }
134 
135   // Moves argument registers into other registers that won't get clobbered
136   // while making syscalls. The code generated by this function may clobber
137   // registers.
138   virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const {
139     report_fatal_error("moveArgumentRegisters is not implemented on the "
140                        "current architecture\n");
141   }
142 
143   // Generates code to move argument registers, unmap memory above and below the
144   // snippet, and map the auxiliary memory into the subprocess. The code
145   // generated by this function may clobber registers.
146   virtual std::vector<MCInst> generateMemoryInitialSetup() const {
147     report_fatal_error("generateMemoryInitialSetup is not supported on the "
148                        "current architecture\n");
149   }
150 
151   // Returns true if all features are available that are required by Opcode.
152   virtual bool isOpcodeAvailable(unsigned Opcode,
153                                  const FeatureBitset &Features) const {
154     return IsOpcodeAvailable(Opcode, Features);
155   }
156 
157   // Sets the stack register to the auxiliary memory so that operations
158   // requiring the stack can be formed (e.g., setting large registers). The code
159   // generated by this function may clobber registers.
160   virtual std::vector<MCInst> setStackRegisterToAuxMem() const {
161     report_fatal_error("setStackRegisterToAuxMem is not implemented on the "
162                        "current architectures");
163   }
164 
165   virtual uintptr_t getAuxiliaryMemoryStartAddress() const {
166     report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on "
167                        "the current architecture");
168   }
169 
170   // Generates the necessary ioctl system calls to configure the perf counters.
171   // The code generated by this function preserves all registers if the
172   // parameter SaveRegisters is set to true.
173   virtual std::vector<MCInst> configurePerfCounter(long Request,
174                                                    bool SaveRegisters) const {
175     report_fatal_error(
176         "configurePerfCounter is not implemented on the current architecture");
177   }
178 
179   // Gets the ABI dependent registers that are used to pass arguments in a
180   // function call.
181   virtual std::vector<MCRegister> getArgumentRegisters() const {
182     report_fatal_error(
183         "getArgumentRegisters is not implemented on the current architecture");
184   };
185 
186   // Gets the registers that might potentially need to be saved by while
187   // the setup in the test harness executes.
188   virtual std::vector<MCRegister> getRegistersNeedSaving() const {
189     report_fatal_error("getRegistersNeedSaving is not implemented on the "
190                        "current architecture");
191   };
192 
193   // Returns the register pointing to scratch memory, or 0 if this target
194   // does not support memory operands. The benchmark function uses the
195   // default calling convention.
196   virtual MCRegister getScratchMemoryRegister(const Triple &) const {
197     return MCRegister();
198   }
199 
200   // Fills memory operands with references to the address at [Reg] + Offset.
201   virtual void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
202                                   unsigned Offset) const {
203     llvm_unreachable(
204         "fillMemoryOperands() requires getScratchMemoryRegister() > 0");
205   }
206 
207   // Returns a counter usable as a loop counter.
208   virtual MCRegister getDefaultLoopCounterRegister(const Triple &) const {
209     return MCRegister();
210   }
211 
212   // Adds the code to decrement the loop counter and
213   virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
214                                            MachineBasicBlock &TargetMBB,
215                                            const MCInstrInfo &MII,
216                                            MCRegister LoopRegister) const {
217     llvm_unreachable("decrementLoopCounterAndBranch() requires "
218                      "getLoopCounterRegister() > 0");
219   }
220 
221   // Returns a list of unavailable registers.
222   // Targets can use this to prevent some registers to be automatically selected
223   // for use in snippets.
224   virtual ArrayRef<MCPhysReg> getUnavailableRegisters() const { return {}; }
225 
226   // Returns the maximum number of bytes a load/store instruction can access at
227   // once. This is typically the size of the largest register available on the
228   // processor. Note that this only used as a hint to generate independant
229   // load/stores to/from memory, so the exact returned value does not really
230   // matter as long as it's large enough.
231   virtual unsigned getMaxMemoryAccessSize() const { return 0; }
232 
233   // Assigns a random operand of the right type to variable Var.
234   // The target is responsible for handling any operand starting from
235   // OPERAND_FIRST_TARGET.
236   virtual Error randomizeTargetMCOperand(const Instruction &Instr,
237                                          const Variable &Var,
238                                          MCOperand &AssignedValue,
239                                          const BitVector &ForbiddenRegs) const {
240     return make_error<Failure>(
241         "targets with target-specific operands should implement this");
242   }
243 
244   // Returns true if this instruction is supported as a back-to-back
245   // instructions.
246   // FIXME: Eventually we should discover this dynamically.
247   virtual bool allowAsBackToBack(const Instruction &Instr) const {
248     return true;
249   }
250 
251   // For some instructions, it is interesting to measure how it's performance
252   // characteristics differ depending on it's operands.
253   // This allows us to produce all the interesting variants.
254   virtual std::vector<InstructionTemplate>
255   generateInstructionVariants(const Instruction &Instr,
256                               unsigned MaxConfigsPerOpcode) const {
257     // By default, we're happy with whatever randomizer will give us.
258     return {&Instr};
259   }
260 
261   // Checks hardware and software support for current benchmark mode.
262   // Returns an error if the target host does not have support to run the
263   // benchmark.
264   virtual Error checkFeatureSupport() const { return Error::success(); }
265 
266   // Creates a snippet generator for the given mode.
267   std::unique_ptr<SnippetGenerator>
268   createSnippetGenerator(Benchmark::ModeE Mode,
269                          const LLVMState &State,
270                          const SnippetGenerator::Options &Opts) const;
271   // Creates a benchmark runner for the given mode.
272   Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
273       Benchmark::ModeE Mode, const LLVMState &State,
274       BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
275       BenchmarkRunner::ExecutionModeE ExecutionMode,
276       unsigned BenchmarkRepeatCount,
277       ArrayRef<ValidationEvent> ValidationCounters,
278       Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const;
279 
280   // Returns the ExegesisTarget for the given triple or nullptr if the target
281   // does not exist.
282   static const ExegesisTarget *lookup(Triple TT);
283   // Returns the default (unspecialized) ExegesisTarget.
284   static const ExegesisTarget &getDefault();
285   // Registers a target. Not thread safe.
286   static void registerTarget(ExegesisTarget *T);
287 
288   virtual ~ExegesisTarget();
289 
290   // Returns the Pfm counters for the given CPU (or the default if no pfm
291   // counters are defined for this CPU).
292   const PfmCountersInfo &getPfmCounters(StringRef CpuName) const;
293 
294   // Returns dummy Pfm counters which can be used to execute generated snippet
295   // without access to performance counters.
296   const PfmCountersInfo &getDummyPfmCounters() const;
297 
298   // Saves the CPU state that needs to be preserved when running a benchmark,
299   // and returns and RAII object that restores the state on destruction.
300   // By default no state is preserved.
301   struct SavedState {
302     virtual ~SavedState();
303   };
304   virtual std::unique_ptr<SavedState> withSavedState() const {
305     return std::make_unique<SavedState>();
306   }
307 
308 private:
309   virtual bool matchesArch(Triple::ArchType Arch) const = 0;
310 
311   // Targets can implement their own snippet generators/benchmarks runners by
312   // implementing these.
313   std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator(
314       const LLVMState &State, const SnippetGenerator::Options &Opts) const;
315   std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator(
316       const LLVMState &State, const SnippetGenerator::Options &Opts) const;
317   std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
318       const LLVMState &State, Benchmark::ModeE Mode,
319       BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
320       Benchmark::ResultAggregationModeE ResultAggMode,
321       BenchmarkRunner::ExecutionModeE ExecutionMode,
322       ArrayRef<ValidationEvent> ValidationCounters,
323       unsigned BenchmarkRepeatCount) const;
324   std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
325       const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
326       Benchmark::ResultAggregationModeE ResultAggMode,
327       BenchmarkRunner::ExecutionModeE ExecutionMode,
328       ArrayRef<ValidationEvent> ValidationCounters) const;
329 
330   const ExegesisTarget *Next = nullptr;
331   const ArrayRef<CpuAndPfmCounters> CpuPfmCounters;
332   const OpcodeAvailabilityChecker IsOpcodeAvailable;
333 };
334 
335 } // namespace exegesis
336 } // namespace llvm
337 
338 #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H
339