1 //===-- Target.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// 11 /// Classes that handle the creation of target-specific objects. This is 12 /// similar to Target/TargetRegistry. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H 17 #define LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H 18 19 #include "BenchmarkResult.h" 20 #include "BenchmarkRunner.h" 21 #include "Error.h" 22 #include "LlvmState.h" 23 #include "PerfHelper.h" 24 #include "SnippetGenerator.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/LegacyPassManager.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Error.h" 32 #include "llvm/TargetParser/Triple.h" 33 34 namespace llvm { 35 namespace exegesis { 36 37 extern cl::OptionCategory Options; 38 extern cl::OptionCategory BenchmarkOptions; 39 extern cl::OptionCategory AnalysisOptions; 40 41 struct PfmCountersInfo { 42 // An optional name of a performance counter that can be used to measure 43 // cycles. 44 const char *CycleCounter; 45 46 // An optional name of a performance counter that can be used to measure 47 // uops. 48 const char *UopsCounter; 49 50 // An IssueCounter specifies how to measure uops issued to specific proc 51 // resources. 52 struct IssueCounter { 53 const char *Counter; 54 // The name of the ProcResource that this counter measures. 55 const char *ProcResName; 56 }; 57 // An optional list of IssueCounters. 58 const IssueCounter *IssueCounters; 59 unsigned NumIssueCounters; 60 61 static const PfmCountersInfo Default; 62 static const PfmCountersInfo Dummy; 63 }; 64 65 struct CpuAndPfmCounters { 66 const char *CpuName; 67 const PfmCountersInfo *PCI; 68 bool operator<(StringRef S) const { return StringRef(CpuName) < S; } 69 }; 70 71 class ExegesisTarget { 72 public: 73 explicit ExegesisTarget(ArrayRef<CpuAndPfmCounters> CpuPfmCounters) 74 : CpuPfmCounters(CpuPfmCounters) {} 75 76 // Targets can use this to create target-specific perf counters. 77 virtual Expected<std::unique_ptr<pfm::Counter>> 78 createCounter(StringRef CounterName, const LLVMState &State, 79 const pid_t ProcessID = 0) const; 80 81 // Targets can use this to add target-specific passes in assembleToStream(); 82 virtual void addTargetSpecificPasses(PassManagerBase &PM) const {} 83 84 // Generates code to move a constant into a the given register. 85 // Precondition: Value must fit into Reg. 86 virtual std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, 87 const APInt &Value) const = 0; 88 89 // Generates the code for the lower munmap call. The code generated by this 90 // function may clobber registers. 91 virtual void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const { 92 report_fatal_error( 93 "generateLowerMunmap is not implemented on the current architecture"); 94 } 95 96 // Generates the upper munmap call. The code generated by this function may 97 // clobber registers. 98 virtual void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const { 99 report_fatal_error( 100 "generateUpperMunmap is not implemented on the current architecture"); 101 } 102 103 // Generates the code for an exit syscall. The code generated by this function 104 // may clobber registers. 105 virtual std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const { 106 report_fatal_error( 107 "generateExitSyscall is not implemented on the current architecture"); 108 } 109 110 // Generates the code to mmap a region of code. The code generated by this 111 // function may clobber registers. 112 virtual std::vector<MCInst> 113 generateMmap(intptr_t Address, size_t Length, 114 intptr_t FileDescriptorAddress) const { 115 report_fatal_error( 116 "generateMmap is not implemented on the current architecture"); 117 } 118 119 // Generates the mmap code for the aux memory. The code generated by this 120 // function may clobber registers. 121 virtual void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const { 122 report_fatal_error( 123 "generateMmapAuxMem is not implemented on the current architecture\n"); 124 } 125 126 // Moves argument registers into other registers that won't get clobbered 127 // while making syscalls. The code generated by this function may clobber 128 // registers. 129 virtual void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const { 130 report_fatal_error("moveArgumentRegisters is not implemented on the " 131 "current architecture\n"); 132 } 133 134 // Generates code to move argument registers, unmap memory above and below the 135 // snippet, and map the auxiliary memory into the subprocess. The code 136 // generated by this function may clobber registers. 137 virtual std::vector<MCInst> generateMemoryInitialSetup() const { 138 report_fatal_error("generateMemoryInitialSetup is not supported on the " 139 "current architecture\n"); 140 } 141 142 // Sets the stack register to the auxiliary memory so that operations 143 // requiring the stack can be formed (e.g., setting large registers). The code 144 // generated by this function may clobber registers. 145 virtual std::vector<MCInst> setStackRegisterToAuxMem() const { 146 report_fatal_error("setStackRegisterToAuxMem is not implemented on the " 147 "current architectures"); 148 } 149 150 virtual intptr_t getAuxiliaryMemoryStartAddress() const { 151 report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on " 152 "the current architecture"); 153 } 154 155 // Generates the necessary ioctl system calls to configure the perf counters. 156 // The code generated by this function preserves all registers if the 157 // parameter SaveRegisters is set to true. 158 virtual std::vector<MCInst> configurePerfCounter(long Request, 159 bool SaveRegisters) const { 160 report_fatal_error( 161 "configurePerfCounter is not implemented on the current architecture"); 162 } 163 164 // Gets the ABI dependent registers that are used to pass arguments in a 165 // function call. 166 virtual std::vector<unsigned> getArgumentRegisters() const { 167 report_fatal_error( 168 "getArgumentRegisters is not implemented on the current architecture"); 169 }; 170 171 // Gets the registers that might potentially need to be saved by while 172 // the setup in the test harness executes. 173 virtual std::vector<unsigned> getRegistersNeedSaving() const { 174 report_fatal_error("getRegistersNeedSaving is not implemented on the " 175 "current architecture"); 176 }; 177 178 // Returns the register pointing to scratch memory, or 0 if this target 179 // does not support memory operands. The benchmark function uses the 180 // default calling convention. 181 virtual unsigned getScratchMemoryRegister(const Triple &) const { return 0; } 182 183 // Fills memory operands with references to the address at [Reg] + Offset. 184 virtual void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, 185 unsigned Offset) const { 186 llvm_unreachable( 187 "fillMemoryOperands() requires getScratchMemoryRegister() > 0"); 188 } 189 190 // Returns a counter usable as a loop counter. 191 virtual unsigned getLoopCounterRegister(const Triple &) const { return 0; } 192 193 // Adds the code to decrement the loop counter and 194 virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB, 195 MachineBasicBlock &TargetMBB, 196 const MCInstrInfo &MII) const { 197 llvm_unreachable("decrementLoopCounterAndBranch() requires " 198 "getLoopCounterRegister() > 0"); 199 } 200 201 // Returns a list of unavailable registers. 202 // Targets can use this to prevent some registers to be automatically selected 203 // for use in snippets. 204 virtual ArrayRef<unsigned> getUnavailableRegisters() const { return {}; } 205 206 // Returns the maximum number of bytes a load/store instruction can access at 207 // once. This is typically the size of the largest register available on the 208 // processor. Note that this only used as a hint to generate independant 209 // load/stores to/from memory, so the exact returned value does not really 210 // matter as long as it's large enough. 211 virtual unsigned getMaxMemoryAccessSize() const { return 0; } 212 213 // Assigns a random operand of the right type to variable Var. 214 // The target is responsible for handling any operand starting from 215 // OPERAND_FIRST_TARGET. 216 virtual Error randomizeTargetMCOperand(const Instruction &Instr, 217 const Variable &Var, 218 MCOperand &AssignedValue, 219 const BitVector &ForbiddenRegs) const { 220 return make_error<Failure>( 221 "targets with target-specific operands should implement this"); 222 } 223 224 // Returns true if this instruction is supported as a back-to-back 225 // instructions. 226 // FIXME: Eventually we should discover this dynamically. 227 virtual bool allowAsBackToBack(const Instruction &Instr) const { 228 return true; 229 } 230 231 // For some instructions, it is interesting to measure how it's performance 232 // characteristics differ depending on it's operands. 233 // This allows us to produce all the interesting variants. 234 virtual std::vector<InstructionTemplate> 235 generateInstructionVariants(const Instruction &Instr, 236 unsigned MaxConfigsPerOpcode) const { 237 // By default, we're happy with whatever randomizer will give us. 238 return {&Instr}; 239 } 240 241 // Checks hardware and software support for current benchmark mode. 242 // Returns an error if the target host does not have support to run the 243 // benchmark. 244 virtual Error checkFeatureSupport() const { return Error::success(); } 245 246 // Creates a snippet generator for the given mode. 247 std::unique_ptr<SnippetGenerator> 248 createSnippetGenerator(Benchmark::ModeE Mode, 249 const LLVMState &State, 250 const SnippetGenerator::Options &Opts) const; 251 // Creates a benchmark runner for the given mode. 252 Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner( 253 Benchmark::ModeE Mode, const LLVMState &State, 254 BenchmarkPhaseSelectorE BenchmarkPhaseSelector, 255 BenchmarkRunner::ExecutionModeE ExecutionMode, 256 Benchmark::ResultAggregationModeE ResultAggMode = Benchmark::Min) const; 257 258 // Returns the ExegesisTarget for the given triple or nullptr if the target 259 // does not exist. 260 static const ExegesisTarget *lookup(Triple TT); 261 // Returns the default (unspecialized) ExegesisTarget. 262 static const ExegesisTarget &getDefault(); 263 // Registers a target. Not thread safe. 264 static void registerTarget(ExegesisTarget *T); 265 266 virtual ~ExegesisTarget(); 267 268 // Returns the Pfm counters for the given CPU (or the default if no pfm 269 // counters are defined for this CPU). 270 const PfmCountersInfo &getPfmCounters(StringRef CpuName) const; 271 272 // Returns dummy Pfm counters which can be used to execute generated snippet 273 // without access to performance counters. 274 const PfmCountersInfo &getDummyPfmCounters() const; 275 276 // Saves the CPU state that needs to be preserved when running a benchmark, 277 // and returns and RAII object that restores the state on destruction. 278 // By default no state is preserved. 279 struct SavedState { 280 virtual ~SavedState(); 281 }; 282 virtual std::unique_ptr<SavedState> withSavedState() const { 283 return std::make_unique<SavedState>(); 284 } 285 286 private: 287 virtual bool matchesArch(Triple::ArchType Arch) const = 0; 288 289 // Targets can implement their own snippet generators/benchmarks runners by 290 // implementing these. 291 std::unique_ptr<SnippetGenerator> virtual createSerialSnippetGenerator( 292 const LLVMState &State, const SnippetGenerator::Options &Opts) const; 293 std::unique_ptr<SnippetGenerator> virtual createParallelSnippetGenerator( 294 const LLVMState &State, const SnippetGenerator::Options &Opts) const; 295 std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner( 296 const LLVMState &State, Benchmark::ModeE Mode, 297 BenchmarkPhaseSelectorE BenchmarkPhaseSelector, 298 Benchmark::ResultAggregationModeE ResultAggMode, 299 BenchmarkRunner::ExecutionModeE ExecutionMode) const; 300 std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner( 301 const LLVMState &State, BenchmarkPhaseSelectorE BenchmarkPhaseSelector, 302 Benchmark::ResultAggregationModeE ResultAggMode, 303 BenchmarkRunner::ExecutionModeE ExecutionMode) const; 304 305 const ExegesisTarget *Next = nullptr; 306 const ArrayRef<CpuAndPfmCounters> CpuPfmCounters; 307 }; 308 309 } // namespace exegesis 310 } // namespace llvm 311 312 #endif // LLVM_TOOLS_LLVM_EXEGESIS_TARGET_H 313