//===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "BenchmarkRunner.h" #include "Assembler.h" #include "Error.h" #include "MCInstrDescView.h" #include "MmapUtils.h" #include "PerfHelper.h" #include "SubprocessMemory.h" #include "Target.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/CrashRecoveryContext.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SystemZ/zOSSupport.h" #include #include #include #ifdef __linux__ #ifdef HAVE_LIBPFM #include #endif #include #include #include #include #include #include #include #if defined(__GLIBC__) && __has_include() && defined(HAVE_BUILTIN_THREAD_POINTER) #include #if defined(RSEQ_SIG) && defined(SYS_rseq) #define GLIBC_INITS_RSEQ #endif #endif #endif // __linux__ namespace llvm { namespace exegesis { BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode, BenchmarkPhaseSelectorE BenchmarkPhaseSelector, ExecutionModeE ExecutionMode, ArrayRef ValCounters) : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector), ExecutionMode(ExecutionMode), ValidationCounters(ValCounters), Scratch(std::make_unique()) {} BenchmarkRunner::~BenchmarkRunner() = default; void BenchmarkRunner::FunctionExecutor::accumulateCounterValues( const SmallVectorImpl &NewValues, SmallVectorImpl *Result) { const size_t NumValues = std::max(NewValues.size(), Result->size()); if (NumValues > Result->size()) Result->resize(NumValues, 0); for (size_t I = 0, End = NewValues.size(); I < End; ++I) (*Result)[I] += NewValues[I]; } Expected> BenchmarkRunner::FunctionExecutor::runAndSample( const char *Counters, ArrayRef ValidationCounters, SmallVectorImpl &ValidationCounterValues) const { // We sum counts when there are several counters for a single ProcRes // (e.g. P23 on SandyBridge). SmallVector CounterValues; SmallVector CounterNames; StringRef(Counters).split(CounterNames, '+'); for (auto &CounterName : CounterNames) { CounterName = CounterName.trim(); Expected> ValueOrError = runWithCounter( CounterName, ValidationCounters, ValidationCounterValues); if (!ValueOrError) return ValueOrError.takeError(); accumulateCounterValues(ValueOrError.get(), &CounterValues); } return CounterValues; } namespace { class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { public: static Expected> create(const LLVMState &State, object::OwningBinary Obj, BenchmarkRunner::ScratchSpace *Scratch, std::optional BenchmarkProcessCPU) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); if (!EF) return EF.takeError(); return std::unique_ptr( new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); } private: InProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, BenchmarkRunner::ScratchSpace *Scratch) : State(State), Function(std::move(Function)), Scratch(Scratch) {} static void accumulateCounterValues(const SmallVector &NewValues, SmallVector *Result) { const size_t NumValues = std::max(NewValues.size(), Result->size()); if (NumValues > Result->size()) Result->resize(NumValues, 0); for (size_t I = 0, End = NewValues.size(); I < End; ++I) (*Result)[I] += NewValues[I]; } Expected> runWithCounter( StringRef CounterName, ArrayRef ValidationCounters, SmallVectorImpl &ValidationCounterValues) const override { const ExegesisTarget &ET = State.getExegesisTarget(); char *const ScratchPtr = Scratch->ptr(); auto CounterOrError = ET.createCounter(CounterName, State, ValidationCounters); if (!CounterOrError) return CounterOrError.takeError(); pfm::CounterGroup *Counter = CounterOrError.get().get(); Scratch->clear(); { auto PS = ET.withSavedState(); CrashRecoveryContext CRC; CrashRecoveryContext::Enable(); const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { Counter->start(); this->Function(ScratchPtr); Counter->stop(); }); CrashRecoveryContext::Disable(); PS.reset(); if (Crashed) { #ifdef LLVM_ON_UNIX // See "Exit Status for Commands": // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html constexpr const int kSigOffset = 128; return make_error(CRC.RetCode - kSigOffset); #else // The exit code of the process on windows is not meaningful as a // signal, so simply pass in -1 as the signal into the error. return make_error(-1); #endif // LLVM_ON_UNIX } } auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); if (!ValidationValuesOrErr) return ValidationValuesOrErr.takeError(); ArrayRef RealValidationValues = *ValidationValuesOrErr; for (size_t I = 0; I < RealValidationValues.size(); ++I) ValidationCounterValues[I] = RealValidationValues[I]; return Counter->readOrError(Function.getFunctionBytes()); } const LLVMState &State; const ExecutableFunction Function; BenchmarkRunner::ScratchSpace *const Scratch; }; #ifdef __linux__ // The following class implements a function executor that executes the // benchmark code within a subprocess rather than within the main llvm-exegesis // process. This allows for much more control over the execution context of the // snippet, particularly with regard to memory. This class performs all the // necessary functions to create the subprocess, execute the snippet in the // subprocess, and report results/handle errors. class SubProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { public: static Expected> create(const LLVMState &State, object::OwningBinary Obj, const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) { Expected EF = ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); if (!EF) return EF.takeError(); return std::unique_ptr( new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key, BenchmarkProcessCPU)); } private: SubProcessFunctionExecutorImpl(const LLVMState &State, ExecutableFunction Function, const BenchmarkKey &Key, std::optional BenchmarkCPU) : State(State), Function(std::move(Function)), Key(Key), BenchmarkProcessCPU(BenchmarkCPU) {} enum ChildProcessExitCodeE { CounterFDReadFailed = 1, RSeqDisableFailed, FunctionDataMappingFailed, AuxiliaryMemorySetupFailed, SetCPUAffinityFailed }; StringRef childProcessExitCodeToString(int ExitCode) const { switch (ExitCode) { case ChildProcessExitCodeE::CounterFDReadFailed: return "Counter file descriptor read failed"; case ChildProcessExitCodeE::RSeqDisableFailed: return "Disabling restartable sequences failed"; case ChildProcessExitCodeE::FunctionDataMappingFailed: return "Failed to map memory for assembled snippet"; case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: return "Failed to setup auxiliary memory"; case ChildProcessExitCodeE::SetCPUAffinityFailed: return "Failed to set CPU affinity of the benchmarking process"; default: return "Child process returned with unknown exit code"; } } Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const { struct msghdr Message = {}; char Buffer[CMSG_SPACE(sizeof(FD))]; memset(Buffer, 0, sizeof(Buffer)); Message.msg_control = Buffer; Message.msg_controllen = sizeof(Buffer); struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); ControlMessage->cmsg_level = SOL_SOCKET; ControlMessage->cmsg_type = SCM_RIGHTS; ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD)); memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD)); Message.msg_controllen = CMSG_SPACE(sizeof(FD)); ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0); if (BytesWritten < 0) return make_error("Failed to write FD to socket: " + Twine(strerror(errno))); return Error::success(); } Expected getFileDescriptorFromSocket(int SocketFD) const { struct msghdr Message = {}; char ControlBuffer[256]; Message.msg_control = ControlBuffer; Message.msg_controllen = sizeof(ControlBuffer); ssize_t BytesRead = recvmsg(SocketFD, &Message, 0); if (BytesRead < 0) return make_error("Failed to read FD from socket: " + Twine(strerror(errno))); struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); int FD; if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD))) return make_error("Failed to get correct number of bytes for " "file descriptor from socket."); memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD)); return FD; } Error runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName, SmallVectorImpl &CounterValues, ArrayRef ValidationCounters, SmallVectorImpl &ValidationCounterValues) const { auto WriteFDClose = make_scope_exit([WriteFD]() { close(WriteFD); }); const ExegesisTarget &ET = State.getExegesisTarget(); auto CounterOrError = ET.createCounter(CounterName, State, ValidationCounters, ChildPID); if (!CounterOrError) return CounterOrError.takeError(); pfm::CounterGroup *Counter = CounterOrError.get().get(); // Make sure to attach to the process (and wait for the sigstop to be // delivered and for the process to continue) before we write to the counter // file descriptor. Attaching to the process before writing to the socket // ensures that the subprocess at most has blocked on the read call. If we // attach afterwards, the subprocess might exit before we get to the attach // call due to effects like scheduler contention, introducing transient // failures. if (ptrace(PTRACE_ATTACH, ChildPID, NULL, NULL) != 0) return make_error("Failed to attach to the child process: " + Twine(strerror(errno))); if (waitpid(ChildPID, NULL, 0) == -1) { return make_error( "Failed to wait for child process to stop after attaching: " + Twine(strerror(errno))); } if (ptrace(PTRACE_CONT, ChildPID, NULL, NULL) != 0) return make_error( "Failed to continue execution of the child process: " + Twine(strerror(errno))); int CounterFileDescriptor = Counter->getFileDescriptor(); Error SendError = sendFileDescriptorThroughSocket(WriteFD, CounterFileDescriptor); if (SendError) return SendError; int ChildStatus; if (waitpid(ChildPID, &ChildStatus, 0) == -1) { return make_error( "Waiting for the child process to complete failed: " + Twine(strerror(errno))); } if (WIFEXITED(ChildStatus)) { int ChildExitCode = WEXITSTATUS(ChildStatus); if (ChildExitCode == 0) { // The child exited succesfully, read counter values and return // success. auto CounterValueOrErr = Counter->readOrError(); if (!CounterValueOrErr) return CounterValueOrErr.takeError(); CounterValues = std::move(*CounterValueOrErr); auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); if (!ValidationValuesOrErr) return ValidationValuesOrErr.takeError(); ArrayRef RealValidationValues = *ValidationValuesOrErr; for (size_t I = 0; I < RealValidationValues.size(); ++I) ValidationCounterValues[I] = RealValidationValues[I]; return Error::success(); } // The child exited, but not successfully. return make_error( "Child benchmarking process exited with non-zero exit code: " + childProcessExitCodeToString(ChildExitCode)); } // An error was encountered running the snippet, process it siginfo_t ChildSignalInfo; if (ptrace(PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) { return make_error("Getting signal info from the child failed: " + Twine(strerror(errno))); } // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM // handlers to run, and calling SIGTERM would mean that ptrace will force // it to block in the signal-delivery-stop for the SIGSEGV/other signals, // and upon exit. if (kill(ChildPID, SIGKILL) == -1) return make_error("Failed to kill child benchmarking proces: " + Twine(strerror(errno))); // Wait for the process to exit so that there are no zombie processes left // around. if (waitpid(ChildPID, NULL, 0) == -1) return make_error("Failed to wait for process to die: " + Twine(strerror(errno))); if (ChildSignalInfo.si_signo == SIGSEGV) return make_error( reinterpret_cast(ChildSignalInfo.si_addr)); return make_error(ChildSignalInfo.si_signo); } static void setCPUAffinityIfRequested(int CPUToUse) { // Special case this function for x86_64 for now as certain more esoteric // platforms have different definitions for some of the libc functions that // cause buildtime failures. Additionally, the subprocess executor mode (the // sole mode where this is supported) currently only supports x86_64. // Also check that we have the SYS_getcpu macro defined, meaning the syscall // actually exists within the build environment. We manually use the syscall // rather than the libc wrapper given the wrapper for getcpu is only available // in glibc 2.29 and later. #if defined(__x86_64__) && defined(SYS_getcpu) // Set the CPU affinity for the child process, so that we ensure that if // the user specified a CPU the process should run on, the benchmarking // process is running on that CPU. cpu_set_t CPUMask; CPU_ZERO(&CPUMask); CPU_SET(CPUToUse, &CPUMask); // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they // are available. int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); if (SetAffinityReturn == -1) { exit(ChildProcessExitCodeE::SetCPUAffinityFailed); } // Check (if assertions are enabled) that we are actually running on the // CPU that was specified by the user. [[maybe_unused]] unsigned int CurrentCPU; assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 && "Expected getcpu call to succeed."); assert(static_cast(CurrentCPU) == CPUToUse && "Expected current CPU to equal the CPU requested by the user"); #else exit(ChildProcessExitCodeE::SetCPUAffinityFailed); #endif // defined(__x86_64__) && defined(SYS_getcpu) } Error createSubProcessAndRunBenchmark( StringRef CounterName, SmallVectorImpl &CounterValues, ArrayRef ValidationCounters, SmallVectorImpl &ValidationCounterValues) const { int PipeFiles[2]; int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles); if (PipeSuccessOrErr != 0) { return make_error( "Failed to create a pipe for interprocess communication between " "llvm-exegesis and the benchmarking subprocess: " + Twine(strerror(errno))); } SubprocessMemory SPMemory; Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid()); if (MemoryInitError) return MemoryInitError; Error AddMemDefError = SPMemory.addMemoryDefinition(Key.MemoryValues, getpid()); if (AddMemDefError) return AddMemDefError; long ParentTID = SubprocessMemory::getCurrentTID(); pid_t ParentOrChildPID = fork(); if (ParentOrChildPID == -1) { return make_error("Failed to create child process: " + Twine(strerror(errno))); } if (ParentOrChildPID == 0) { if (BenchmarkProcessCPU.has_value()) { setCPUAffinityIfRequested(*BenchmarkProcessCPU); } // We are in the child process, close the write end of the pipe. close(PipeFiles[1]); // Unregister handlers, signal handling is now handled through ptrace in // the host process. sys::unregisterHandlers(); runChildSubprocess(PipeFiles[0], Key, ParentTID); // The child process terminates in the above function, so we should never // get to this point. llvm_unreachable("Child process didn't exit when expected."); } // Close the read end of the pipe as we only need to write to the subprocess // from the parent process. close(PipeFiles[0]); return runParentProcess(ParentOrChildPID, PipeFiles[1], CounterName, CounterValues, ValidationCounters, ValidationCounterValues); } void disableCoreDumps() const { struct rlimit rlim; rlim.rlim_cur = 0; setrlimit(RLIMIT_CORE, &rlim); } [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key, long ParentTID) const { // Disable core dumps in the child process as otherwise everytime we // encounter an execution failure like a segmentation fault, we will create // a core dump. We report the information directly rather than require the // user inspect a core dump. disableCoreDumps(); // The following occurs within the benchmarking subprocess. pid_t ParentPID = getppid(); Expected CounterFileDescriptorOrError = getFileDescriptorFromSocket(Pipe); if (!CounterFileDescriptorOrError) exit(ChildProcessExitCodeE::CounterFDReadFailed); int CounterFileDescriptor = *CounterFileDescriptorOrError; // Glibc versions greater than 2.35 automatically call rseq during // initialization. Unmapping the region that glibc sets up for this causes // segfaults in the program. Unregister the rseq region so that we can safely // unmap it later #ifdef GLIBC_INITS_RSEQ unsigned int RseqStructSize = __rseq_size; // Glibc v2.40 (the change is also expected to be backported to v2.35) // changes the definition of __rseq_size to be the usable area of the struct // rather than the actual size of the struct. v2.35 uses only 20 bytes of // the 32 byte struct. For now, it should be safe to assume that if the // usable size is less than 32, the actual size of the struct will be 32 // bytes given alignment requirements. if (__rseq_size < 32) RseqStructSize = 32; long RseqDisableOutput = syscall( SYS_rseq, reinterpret_cast(__builtin_thread_pointer()) + __rseq_offset, RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); if (RseqDisableOutput != 0) exit(ChildProcessExitCodeE::RSeqDisableFailed); #endif // GLIBC_INITS_RSEQ // The frontend that generates the memory annotation structures should // validate that the address to map the snippet in at is a multiple of // the page size. Assert that this is true here. assert(Key.SnippetAddress % getpagesize() == 0 && "The snippet address needs to be aligned to a page boundary."); size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); void *MapAddress = NULL; int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS; if (Key.SnippetAddress != 0) { MapAddress = reinterpret_cast(Key.SnippetAddress); MapFlags |= MAP_FIXED_NOREPLACE; } char *FunctionDataCopy = (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE, MapFlags, 0, 0); if (reinterpret_cast(FunctionDataCopy) == -1) exit(ChildProcessExitCodeE::FunctionDataMappingFailed); memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(), this->Function.FunctionBytes.size()); mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC); Expected AuxMemFDOrError = SubprocessMemory::setupAuxiliaryMemoryInSubprocess( Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor); if (!AuxMemFDOrError) exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize, *AuxMemFDOrError); exit(0); } Expected> runWithCounter( StringRef CounterName, ArrayRef ValidationCounters, SmallVectorImpl &ValidationCounterValues) const override { SmallVector Value(1, 0); Error PossibleBenchmarkError = createSubProcessAndRunBenchmark( CounterName, Value, ValidationCounters, ValidationCounterValues); if (PossibleBenchmarkError) return std::move(PossibleBenchmarkError); return Value; } const LLVMState &State; const ExecutableFunction Function; const BenchmarkKey &Key; const std::optional BenchmarkProcessCPU; }; #endif // __linux__ } // namespace Expected> BenchmarkRunner::assembleSnippet( const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, unsigned MinInstructions, unsigned LoopBodySize, bool GenerateMemoryInstructions) const { const std::vector &Instructions = BC.Key.Instructions; SmallString<0> Buffer; raw_svector_ostream OS(Buffer); if (Error E = assembleToStream( State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, GenerateMemoryInstructions), OS, BC.Key, GenerateMemoryInstructions)) { return std::move(E); } return Buffer; } Expected BenchmarkRunner::getRunnableConfiguration( const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize, const SnippetRepetitor &Repetitor) const { RunnableConfiguration RC; Benchmark &BenchmarkResult = RC.BenchmarkResult; BenchmarkResult.Mode = Mode; BenchmarkResult.CpuName = std::string(State.getTargetMachine().getTargetCPU()); BenchmarkResult.LLVMTriple = State.getTargetMachine().getTargetTriple().normalize(); BenchmarkResult.MinInstructions = MinInstructions; BenchmarkResult.Info = BC.Info; const std::vector &Instructions = BC.Key.Instructions; bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; BenchmarkResult.Key = BC.Key; // Assemble at least kMinInstructionsForSnippet instructions by repeating // the snippet for debug/analysis. This is so that the user clearly // understands that the inside instructions are repeated. if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { const int MinInstructionsForSnippet = 4 * Instructions.size(); const int LoopBodySizeForSnippet = 2 * Instructions.size(); auto Snippet = assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, LoopBodySizeForSnippet, GenerateMemoryInstructions); if (Error E = Snippet.takeError()) return std::move(E); if (auto Err = getBenchmarkFunctionBytes(*Snippet, BenchmarkResult.AssembledSnippet)) return std::move(Err); } // Assemble enough repetitions of the snippet so we have at least // MinInstructions instructions. if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { auto Snippet = assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions, LoopBodySize, GenerateMemoryInstructions); if (Error E = Snippet.takeError()) return std::move(E); RC.ObjectFile = getObjectFromBuffer(*Snippet); } return std::move(RC); } Expected> BenchmarkRunner::createFunctionExecutor( object::OwningBinary ObjectFile, const BenchmarkKey &Key, std::optional BenchmarkProcessCPU) const { switch (ExecutionMode) { case ExecutionModeE::InProcess: { if (BenchmarkProcessCPU.has_value()) return make_error("The inprocess execution mode does not " "support benchmark core pinning."); auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); if (!InProcessExecutorOrErr) return InProcessExecutorOrErr.takeError(); return std::move(*InProcessExecutorOrErr); } case ExecutionModeE::SubProcess: { #ifdef __linux__ auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( State, std::move(ObjectFile), Key, BenchmarkProcessCPU); if (!SubProcessExecutorOrErr) return SubProcessExecutorOrErr.takeError(); return std::move(*SubProcessExecutorOrErr); #else return make_error( "The subprocess execution mode is only supported on Linux"); #endif } } llvm_unreachable("ExecutionMode is outside expected range"); } std::pair BenchmarkRunner::runConfiguration( RunnableConfiguration &&RC, const std::optional &DumpFile, std::optional BenchmarkProcessCPU) const { Benchmark &BenchmarkResult = RC.BenchmarkResult; object::OwningBinary &ObjectFile = RC.ObjectFile; if (DumpFile && BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { auto ObjectFilePath = writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile); if (Error E = ObjectFilePath.takeError()) { return {std::move(E), std::move(BenchmarkResult)}; } outs() << "Check generated assembly with: /usr/bin/objdump -d " << *ObjectFilePath << "\n"; } if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) { BenchmarkResult.Error = "actual measurements skipped."; return {Error::success(), std::move(BenchmarkResult)}; } Expected> Executor = createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key, BenchmarkProcessCPU); if (!Executor) return {Executor.takeError(), std::move(BenchmarkResult)}; auto NewMeasurements = runMeasurements(**Executor); if (Error E = NewMeasurements.takeError()) { return {std::move(E), std::move(BenchmarkResult)}; } assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions"); for (BenchmarkMeasure &BM : *NewMeasurements) { // Scale the measurements by the number of instructions. BM.PerInstructionValue /= BenchmarkResult.MinInstructions; // Scale the measurements by the number of times the entire snippet is // repeated. BM.PerSnippetValue /= std::ceil(BenchmarkResult.MinInstructions / static_cast(BenchmarkResult.Key.Instructions.size())); } BenchmarkResult.Measurements = std::move(*NewMeasurements); return {Error::success(), std::move(BenchmarkResult)}; } Expected BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const { int ResultFD = 0; SmallString<256> ResultPath = FileName; if (Error E = errorCodeToError( FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o", ResultFD, ResultPath) : sys::fs::openFileForReadWrite( FileName, ResultFD, sys::fs::CD_CreateAlways, sys::fs::OF_None))) return std::move(E); raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); OFS.write(Buffer.data(), Buffer.size()); OFS.flush(); return std::string(ResultPath); } static bool EventLessThan(const std::pair LHS, const ValidationEvent RHS) { return static_cast(LHS.first) < static_cast(RHS); } Error BenchmarkRunner::getValidationCountersToRun( SmallVector &ValCountersToRun) const { const PfmCountersInfo &PCI = State.getPfmCounters(); ValCountersToRun.reserve(ValidationCounters.size()); ValCountersToRun.reserve(ValidationCounters.size()); ArrayRef TargetValidationEvents(PCI.ValidationEvents, PCI.NumValidationEvents); for (const ValidationEvent RequestedValEvent : ValidationCounters) { auto ValCounterIt = lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan); if (ValCounterIt == TargetValidationEvents.end() || ValCounterIt->first != RequestedValEvent) return make_error("Cannot create validation counter"); assert(ValCounterIt->first == RequestedValEvent && "The array of validation events from the target should be sorted"); ValCountersToRun.push_back(ValCounterIt->second); } return Error::success(); } BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} } // namespace exegesis } // namespace llvm