xref: /llvm-project/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp (revision 72225ca27f561b74da292433400f250592d73b13)
1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BenchmarkRunner.h"
10 #include "Assembler.h"
11 #include "Error.h"
12 #include "MCInstrDescView.h"
13 #include "MmapUtils.h"
14 #include "PerfHelper.h"
15 #include "SubprocessMemory.h"
16 #include "Target.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/CrashRecoveryContext.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Program.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SystemZ/zOSSupport.h"
29 #include <cmath>
30 #include <memory>
31 #include <string>
32 
33 #ifdef __linux__
34 #ifdef HAVE_LIBPFM
35 #include <perfmon/perf_event.h>
36 #endif
37 #include <sys/mman.h>
38 #include <sys/ptrace.h>
39 #include <sys/resource.h>
40 #include <sys/socket.h>
41 #include <sys/syscall.h>
42 #include <sys/wait.h>
43 #include <unistd.h>
44 
45 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER)
46 #include <sys/rseq.h>
47 #if defined(RSEQ_SIG) && defined(SYS_rseq)
48 #define GLIBC_INITS_RSEQ
49 #endif
50 #endif
51 #endif // __linux__
52 
53 namespace llvm {
54 namespace exegesis {
55 
56 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode,
57                                  BenchmarkPhaseSelectorE BenchmarkPhaseSelector,
58                                  ExecutionModeE ExecutionMode,
59                                  ArrayRef<ValidationEvent> ValCounters)
60     : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector),
61       ExecutionMode(ExecutionMode), ValidationCounters(ValCounters),
62       Scratch(std::make_unique<ScratchSpace>()) {}
63 
64 BenchmarkRunner::~BenchmarkRunner() = default;
65 
66 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues(
67     const SmallVectorImpl<int64_t> &NewValues,
68     SmallVectorImpl<int64_t> *Result) {
69   const size_t NumValues = std::max(NewValues.size(), Result->size());
70   if (NumValues > Result->size())
71     Result->resize(NumValues, 0);
72   for (size_t I = 0, End = NewValues.size(); I < End; ++I)
73     (*Result)[I] += NewValues[I];
74 }
75 
76 Expected<SmallVector<int64_t, 4>>
77 BenchmarkRunner::FunctionExecutor::runAndSample(
78     const char *Counters, ArrayRef<const char *> ValidationCounters,
79     SmallVectorImpl<int64_t> &ValidationCounterValues) const {
80   // We sum counts when there are several counters for a single ProcRes
81   // (e.g. P23 on SandyBridge).
82   SmallVector<int64_t, 4> CounterValues;
83   SmallVector<StringRef, 2> CounterNames;
84   StringRef(Counters).split(CounterNames, '+');
85   for (auto &CounterName : CounterNames) {
86     CounterName = CounterName.trim();
87     Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter(
88         CounterName, ValidationCounters, ValidationCounterValues);
89     if (!ValueOrError)
90       return ValueOrError.takeError();
91     accumulateCounterValues(ValueOrError.get(), &CounterValues);
92   }
93   return CounterValues;
94 }
95 
96 namespace {
97 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
98 public:
99   static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>>
100   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
101          BenchmarkRunner::ScratchSpace *Scratch,
102          std::optional<int> BenchmarkProcessCPU) {
103     Expected<ExecutableFunction> EF =
104         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
105 
106     if (!EF)
107       return EF.takeError();
108 
109     return std::unique_ptr<InProcessFunctionExecutorImpl>(
110         new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch));
111   }
112 
113 private:
114   InProcessFunctionExecutorImpl(const LLVMState &State,
115                                 ExecutableFunction Function,
116                                 BenchmarkRunner::ScratchSpace *Scratch)
117       : State(State), Function(std::move(Function)), Scratch(Scratch) {}
118 
119   static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues,
120                                       SmallVector<int64_t, 4> *Result) {
121     const size_t NumValues = std::max(NewValues.size(), Result->size());
122     if (NumValues > Result->size())
123       Result->resize(NumValues, 0);
124     for (size_t I = 0, End = NewValues.size(); I < End; ++I)
125       (*Result)[I] += NewValues[I];
126   }
127 
128   Expected<SmallVector<int64_t, 4>> runWithCounter(
129       StringRef CounterName, ArrayRef<const char *> ValidationCounters,
130       SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
131     const ExegesisTarget &ET = State.getExegesisTarget();
132     char *const ScratchPtr = Scratch->ptr();
133     auto CounterOrError =
134         ET.createCounter(CounterName, State, ValidationCounters);
135 
136     if (!CounterOrError)
137       return CounterOrError.takeError();
138 
139     pfm::CounterGroup *Counter = CounterOrError.get().get();
140     Scratch->clear();
141     {
142       auto PS = ET.withSavedState();
143       CrashRecoveryContext CRC;
144       CrashRecoveryContext::Enable();
145       const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() {
146         Counter->start();
147         this->Function(ScratchPtr);
148         Counter->stop();
149       });
150       CrashRecoveryContext::Disable();
151       PS.reset();
152       if (Crashed) {
153 #ifdef LLVM_ON_UNIX
154         // See "Exit Status for Commands":
155         // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html
156         constexpr const int kSigOffset = 128;
157         return make_error<SnippetSignal>(CRC.RetCode - kSigOffset);
158 #else
159         // The exit code of the process on windows is not meaningful as a
160         // signal, so simply pass in -1 as the signal into the error.
161         return make_error<SnippetSignal>(-1);
162 #endif // LLVM_ON_UNIX
163       }
164     }
165 
166     auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
167     if (!ValidationValuesOrErr)
168       return ValidationValuesOrErr.takeError();
169 
170     ArrayRef RealValidationValues = *ValidationValuesOrErr;
171     for (size_t I = 0; I < RealValidationValues.size(); ++I)
172       ValidationCounterValues[I] = RealValidationValues[I];
173 
174     return Counter->readOrError(Function.getFunctionBytes());
175   }
176 
177   const LLVMState &State;
178   const ExecutableFunction Function;
179   BenchmarkRunner::ScratchSpace *const Scratch;
180 };
181 
182 #ifdef __linux__
183 // The following class implements a function executor that executes the
184 // benchmark code within a subprocess rather than within the main llvm-exegesis
185 // process. This allows for much more control over the execution context of the
186 // snippet, particularly with regard to memory. This class performs all the
187 // necessary functions to create the subprocess, execute the snippet in the
188 // subprocess, and report results/handle errors.
189 class SubProcessFunctionExecutorImpl
190     : public BenchmarkRunner::FunctionExecutor {
191 public:
192   static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>>
193   create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj,
194          const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) {
195     Expected<ExecutableFunction> EF =
196         ExecutableFunction::create(State.createTargetMachine(), std::move(Obj));
197     if (!EF)
198       return EF.takeError();
199 
200     return std::unique_ptr<SubProcessFunctionExecutorImpl>(
201         new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key,
202                                            BenchmarkProcessCPU));
203   }
204 
205 private:
206   SubProcessFunctionExecutorImpl(const LLVMState &State,
207                                  ExecutableFunction Function,
208                                  const BenchmarkKey &Key,
209                                  std::optional<int> BenchmarkCPU)
210       : State(State), Function(std::move(Function)), Key(Key),
211         BenchmarkProcessCPU(BenchmarkCPU) {}
212 
213   enum ChildProcessExitCodeE {
214     CounterFDReadFailed = 1,
215     RSeqDisableFailed,
216     FunctionDataMappingFailed,
217     AuxiliaryMemorySetupFailed,
218     SetCPUAffinityFailed
219   };
220 
221   StringRef childProcessExitCodeToString(int ExitCode) const {
222     switch (ExitCode) {
223     case ChildProcessExitCodeE::CounterFDReadFailed:
224       return "Counter file descriptor read failed";
225     case ChildProcessExitCodeE::RSeqDisableFailed:
226       return "Disabling restartable sequences failed";
227     case ChildProcessExitCodeE::FunctionDataMappingFailed:
228       return "Failed to map memory for assembled snippet";
229     case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed:
230       return "Failed to setup auxiliary memory";
231     case ChildProcessExitCodeE::SetCPUAffinityFailed:
232       return "Failed to set CPU affinity of the benchmarking process";
233     default:
234       return "Child process returned with unknown exit code";
235     }
236   }
237 
238   Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const {
239     struct msghdr Message = {};
240     char Buffer[CMSG_SPACE(sizeof(FD))];
241     memset(Buffer, 0, sizeof(Buffer));
242     Message.msg_control = Buffer;
243     Message.msg_controllen = sizeof(Buffer);
244 
245     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
246     ControlMessage->cmsg_level = SOL_SOCKET;
247     ControlMessage->cmsg_type = SCM_RIGHTS;
248     ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD));
249 
250     memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD));
251 
252     Message.msg_controllen = CMSG_SPACE(sizeof(FD));
253 
254     ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0);
255 
256     if (BytesWritten < 0)
257       return make_error<Failure>("Failed to write FD to socket: " +
258                                  Twine(strerror(errno)));
259 
260     return Error::success();
261   }
262 
263   Expected<int> getFileDescriptorFromSocket(int SocketFD) const {
264     struct msghdr Message = {};
265 
266     char ControlBuffer[256];
267     Message.msg_control = ControlBuffer;
268     Message.msg_controllen = sizeof(ControlBuffer);
269 
270     ssize_t BytesRead = recvmsg(SocketFD, &Message, 0);
271 
272     if (BytesRead < 0)
273       return make_error<Failure>("Failed to read FD from socket: " +
274                                  Twine(strerror(errno)));
275 
276     struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message);
277 
278     int FD;
279 
280     if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD)))
281       return make_error<Failure>("Failed to get correct number of bytes for "
282                                  "file descriptor from socket.");
283 
284     memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD));
285 
286     return FD;
287   }
288 
289   Error
290   runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName,
291                    SmallVectorImpl<int64_t> &CounterValues,
292                    ArrayRef<const char *> ValidationCounters,
293                    SmallVectorImpl<int64_t> &ValidationCounterValues) const {
294     auto WriteFDClose = make_scope_exit([WriteFD]() { close(WriteFD); });
295     const ExegesisTarget &ET = State.getExegesisTarget();
296     auto CounterOrError =
297         ET.createCounter(CounterName, State, ValidationCounters, ChildPID);
298 
299     if (!CounterOrError)
300       return CounterOrError.takeError();
301 
302     pfm::CounterGroup *Counter = CounterOrError.get().get();
303 
304     // Make sure to attach to the process (and wait for the sigstop to be
305     // delivered and for the process to continue) before we write to the counter
306     // file descriptor. Attaching to the process before writing to the socket
307     // ensures that the subprocess at most has blocked on the read call. If we
308     // attach afterwards, the subprocess might exit before we get to the attach
309     // call due to effects like scheduler contention, introducing transient
310     // failures.
311     if (ptrace(PTRACE_ATTACH, ChildPID, NULL, NULL) != 0)
312       return make_error<Failure>("Failed to attach to the child process: " +
313                                  Twine(strerror(errno)));
314 
315     if (waitpid(ChildPID, NULL, 0) == -1) {
316       return make_error<Failure>(
317           "Failed to wait for child process to stop after attaching: " +
318           Twine(strerror(errno)));
319     }
320 
321     if (ptrace(PTRACE_CONT, ChildPID, NULL, NULL) != 0)
322       return make_error<Failure>(
323           "Failed to continue execution of the child process: " +
324           Twine(strerror(errno)));
325 
326     int CounterFileDescriptor = Counter->getFileDescriptor();
327     Error SendError =
328         sendFileDescriptorThroughSocket(WriteFD, CounterFileDescriptor);
329 
330     if (SendError)
331       return SendError;
332 
333     int ChildStatus;
334     if (waitpid(ChildPID, &ChildStatus, 0) == -1) {
335       return make_error<Failure>(
336           "Waiting for the child process to complete failed: " +
337           Twine(strerror(errno)));
338     }
339 
340     if (WIFEXITED(ChildStatus)) {
341       int ChildExitCode = WEXITSTATUS(ChildStatus);
342       if (ChildExitCode == 0) {
343         // The child exited succesfully, read counter values and return
344         // success.
345         auto CounterValueOrErr = Counter->readOrError();
346         if (!CounterValueOrErr)
347           return CounterValueOrErr.takeError();
348         CounterValues = std::move(*CounterValueOrErr);
349 
350         auto ValidationValuesOrErr = Counter->readValidationCountersOrError();
351         if (!ValidationValuesOrErr)
352           return ValidationValuesOrErr.takeError();
353 
354         ArrayRef RealValidationValues = *ValidationValuesOrErr;
355         for (size_t I = 0; I < RealValidationValues.size(); ++I)
356           ValidationCounterValues[I] = RealValidationValues[I];
357 
358         return Error::success();
359       }
360       // The child exited, but not successfully.
361       return make_error<Failure>(
362           "Child benchmarking process exited with non-zero exit code: " +
363           childProcessExitCodeToString(ChildExitCode));
364     }
365 
366     // An error was encountered running the snippet, process it
367     siginfo_t ChildSignalInfo;
368     if (ptrace(PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) {
369       return make_error<Failure>("Getting signal info from the child failed: " +
370                                  Twine(strerror(errno)));
371     }
372 
373     // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM
374     // handlers to run, and calling SIGTERM would mean that ptrace will force
375     // it to block in the signal-delivery-stop for the SIGSEGV/other signals,
376     // and upon exit.
377     if (kill(ChildPID, SIGKILL) == -1)
378       return make_error<Failure>("Failed to kill child benchmarking proces: " +
379                                  Twine(strerror(errno)));
380 
381     // Wait for the process to exit so that there are no zombie processes left
382     // around.
383     if (waitpid(ChildPID, NULL, 0) == -1)
384       return make_error<Failure>("Failed to wait for process to die: " +
385                                  Twine(strerror(errno)));
386 
387     if (ChildSignalInfo.si_signo == SIGSEGV)
388       return make_error<SnippetSegmentationFault>(
389           reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr));
390 
391     return make_error<SnippetSignal>(ChildSignalInfo.si_signo);
392   }
393 
394   static void setCPUAffinityIfRequested(int CPUToUse) {
395 // Special case this function for x86_64 for now as certain more esoteric
396 // platforms have different definitions for some of the libc functions that
397 // cause buildtime failures. Additionally, the subprocess executor mode (the
398 // sole mode where this is supported) currently only supports x86_64.
399 
400 // Also check that we have the SYS_getcpu macro defined, meaning the syscall
401 // actually exists within the build environment. We manually use the syscall
402 // rather than the libc wrapper given the wrapper for getcpu is only available
403 // in glibc 2.29 and later.
404 #if defined(__x86_64__) && defined(SYS_getcpu)
405     // Set the CPU affinity for the child process, so that we ensure that if
406     // the user specified a CPU the process should run on, the benchmarking
407     // process is running on that CPU.
408     cpu_set_t CPUMask;
409     CPU_ZERO(&CPUMask);
410     CPU_SET(CPUToUse, &CPUMask);
411     // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they
412     // are available.
413     int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask);
414     if (SetAffinityReturn == -1) {
415       exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
416     }
417 
418     // Check (if assertions are enabled) that we are actually running on the
419     // CPU that was specified by the user.
420     [[maybe_unused]] unsigned int CurrentCPU;
421     assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 &&
422            "Expected getcpu call to succeed.");
423     assert(static_cast<int>(CurrentCPU) == CPUToUse &&
424            "Expected current CPU to equal the CPU requested by the user");
425 #else
426     exit(ChildProcessExitCodeE::SetCPUAffinityFailed);
427 #endif // defined(__x86_64__) && defined(SYS_getcpu)
428   }
429 
430   Error createSubProcessAndRunBenchmark(
431       StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues,
432       ArrayRef<const char *> ValidationCounters,
433       SmallVectorImpl<int64_t> &ValidationCounterValues) const {
434     int PipeFiles[2];
435     int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles);
436     if (PipeSuccessOrErr != 0) {
437       return make_error<Failure>(
438           "Failed to create a pipe for interprocess communication between "
439           "llvm-exegesis and the benchmarking subprocess: " +
440           Twine(strerror(errno)));
441     }
442 
443     SubprocessMemory SPMemory;
444     Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid());
445     if (MemoryInitError)
446       return MemoryInitError;
447 
448     Error AddMemDefError =
449         SPMemory.addMemoryDefinition(Key.MemoryValues, getpid());
450     if (AddMemDefError)
451       return AddMemDefError;
452 
453     long ParentTID = SubprocessMemory::getCurrentTID();
454     pid_t ParentOrChildPID = fork();
455 
456     if (ParentOrChildPID == -1) {
457       return make_error<Failure>("Failed to create child process: " +
458                                  Twine(strerror(errno)));
459     }
460 
461     if (ParentOrChildPID == 0) {
462       if (BenchmarkProcessCPU.has_value()) {
463         setCPUAffinityIfRequested(*BenchmarkProcessCPU);
464       }
465 
466       // We are in the child process, close the write end of the pipe.
467       close(PipeFiles[1]);
468       // Unregister handlers, signal handling is now handled through ptrace in
469       // the host process.
470       sys::unregisterHandlers();
471       runChildSubprocess(PipeFiles[0], Key, ParentTID);
472       // The child process terminates in the above function, so we should never
473       // get to this point.
474       llvm_unreachable("Child process didn't exit when expected.");
475     }
476 
477     // Close the read end of the pipe as we only need to write to the subprocess
478     // from the parent process.
479     close(PipeFiles[0]);
480     return runParentProcess(ParentOrChildPID, PipeFiles[1], CounterName,
481                             CounterValues, ValidationCounters,
482                             ValidationCounterValues);
483   }
484 
485   void disableCoreDumps() const {
486     struct rlimit rlim;
487 
488     rlim.rlim_cur = 0;
489     setrlimit(RLIMIT_CORE, &rlim);
490   }
491 
492   [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key,
493                                        long ParentTID) const {
494     // Disable core dumps in the child process as otherwise everytime we
495     // encounter an execution failure like a segmentation fault, we will create
496     // a core dump. We report the information directly rather than require the
497     // user inspect a core dump.
498     disableCoreDumps();
499 
500     // The following occurs within the benchmarking subprocess.
501     pid_t ParentPID = getppid();
502 
503     Expected<int> CounterFileDescriptorOrError =
504         getFileDescriptorFromSocket(Pipe);
505 
506     if (!CounterFileDescriptorOrError)
507       exit(ChildProcessExitCodeE::CounterFDReadFailed);
508 
509     int CounterFileDescriptor = *CounterFileDescriptorOrError;
510 
511 // Glibc versions greater than 2.35 automatically call rseq during
512 // initialization. Unmapping the region that glibc sets up for this causes
513 // segfaults in the program. Unregister the rseq region so that we can safely
514 // unmap it later
515 #ifdef GLIBC_INITS_RSEQ
516     unsigned int RseqStructSize = __rseq_size;
517 
518     // Glibc v2.40 (the change is also expected to be backported to v2.35)
519     // changes the definition of __rseq_size to be the usable area of the struct
520     // rather than the actual size of the struct. v2.35 uses only 20 bytes of
521     // the 32 byte struct. For now, it should be safe to assume that if the
522     // usable size is less than 32, the actual size of the struct will be 32
523     // bytes given alignment requirements.
524     if (__rseq_size < 32)
525       RseqStructSize = 32;
526 
527     long RseqDisableOutput = syscall(
528         SYS_rseq,
529         reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset,
530         RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
531     if (RseqDisableOutput != 0)
532       exit(ChildProcessExitCodeE::RSeqDisableFailed);
533 #endif // GLIBC_INITS_RSEQ
534 
535     // The frontend that generates the memory annotation structures should
536     // validate that the address to map the snippet in at is a multiple of
537     // the page size. Assert that this is true here.
538     assert(Key.SnippetAddress % getpagesize() == 0 &&
539            "The snippet address needs to be aligned to a page boundary.");
540 
541     size_t FunctionDataCopySize = this->Function.FunctionBytes.size();
542     void *MapAddress = NULL;
543     int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS;
544 
545     if (Key.SnippetAddress != 0) {
546       MapAddress = reinterpret_cast<void *>(Key.SnippetAddress);
547       MapFlags |= MAP_FIXED_NOREPLACE;
548     }
549 
550     char *FunctionDataCopy =
551         (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE,
552                      MapFlags, 0, 0);
553     if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1)
554       exit(ChildProcessExitCodeE::FunctionDataMappingFailed);
555 
556     memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(),
557            this->Function.FunctionBytes.size());
558     mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC);
559 
560     Expected<int> AuxMemFDOrError =
561         SubprocessMemory::setupAuxiliaryMemoryInSubprocess(
562             Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor);
563     if (!AuxMemFDOrError)
564       exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed);
565 
566     ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize,
567                                                          *AuxMemFDOrError);
568 
569     exit(0);
570   }
571 
572   Expected<SmallVector<int64_t, 4>> runWithCounter(
573       StringRef CounterName, ArrayRef<const char *> ValidationCounters,
574       SmallVectorImpl<int64_t> &ValidationCounterValues) const override {
575     SmallVector<int64_t, 4> Value(1, 0);
576     Error PossibleBenchmarkError = createSubProcessAndRunBenchmark(
577         CounterName, Value, ValidationCounters, ValidationCounterValues);
578 
579     if (PossibleBenchmarkError)
580       return std::move(PossibleBenchmarkError);
581 
582     return Value;
583   }
584 
585   const LLVMState &State;
586   const ExecutableFunction Function;
587   const BenchmarkKey &Key;
588   const std::optional<int> BenchmarkProcessCPU;
589 };
590 #endif // __linux__
591 } // namespace
592 
593 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
594     const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
595     unsigned MinInstructions, unsigned LoopBodySize,
596     bool GenerateMemoryInstructions) const {
597   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
598   SmallString<0> Buffer;
599   raw_svector_ostream OS(Buffer);
600   if (Error E = assembleToStream(
601           State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
602           Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
603                            GenerateMemoryInstructions),
604           OS, BC.Key, GenerateMemoryInstructions)) {
605     return std::move(E);
606   }
607   return Buffer;
608 }
609 
610 Expected<BenchmarkRunner::RunnableConfiguration>
611 BenchmarkRunner::getRunnableConfiguration(
612     const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize,
613     const SnippetRepetitor &Repetitor) const {
614   RunnableConfiguration RC;
615 
616   Benchmark &BenchmarkResult = RC.BenchmarkResult;
617   BenchmarkResult.Mode = Mode;
618   BenchmarkResult.CpuName =
619       std::string(State.getTargetMachine().getTargetCPU());
620   BenchmarkResult.LLVMTriple =
621       State.getTargetMachine().getTargetTriple().normalize();
622   BenchmarkResult.MinInstructions = MinInstructions;
623   BenchmarkResult.Info = BC.Info;
624 
625   const std::vector<MCInst> &Instructions = BC.Key.Instructions;
626 
627   bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess;
628 
629   BenchmarkResult.Key = BC.Key;
630 
631   // Assemble at least kMinInstructionsForSnippet instructions by repeating
632   // the snippet for debug/analysis. This is so that the user clearly
633   // understands that the inside instructions are repeated.
634   if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
635     const int MinInstructionsForSnippet = 4 * Instructions.size();
636     const int LoopBodySizeForSnippet = 2 * Instructions.size();
637     auto Snippet =
638         assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
639                         LoopBodySizeForSnippet, GenerateMemoryInstructions);
640     if (Error E = Snippet.takeError())
641       return std::move(E);
642 
643     if (auto Err = getBenchmarkFunctionBytes(*Snippet,
644                                              BenchmarkResult.AssembledSnippet))
645       return std::move(Err);
646   }
647 
648   // Assemble enough repetitions of the snippet so we have at least
649   // MinInstructions instructions.
650   if (BenchmarkPhaseSelector >
651       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
652     auto Snippet =
653         assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions,
654                         LoopBodySize, GenerateMemoryInstructions);
655     if (Error E = Snippet.takeError())
656       return std::move(E);
657     RC.ObjectFile = getObjectFromBuffer(*Snippet);
658   }
659 
660   return std::move(RC);
661 }
662 
663 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>>
664 BenchmarkRunner::createFunctionExecutor(
665     object::OwningBinary<object::ObjectFile> ObjectFile,
666     const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const {
667   switch (ExecutionMode) {
668   case ExecutionModeE::InProcess: {
669     if (BenchmarkProcessCPU.has_value())
670       return make_error<Failure>("The inprocess execution mode does not "
671                                  "support benchmark core pinning.");
672 
673     auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create(
674         State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU);
675     if (!InProcessExecutorOrErr)
676       return InProcessExecutorOrErr.takeError();
677 
678     return std::move(*InProcessExecutorOrErr);
679   }
680   case ExecutionModeE::SubProcess: {
681 #ifdef __linux__
682     auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create(
683         State, std::move(ObjectFile), Key, BenchmarkProcessCPU);
684     if (!SubProcessExecutorOrErr)
685       return SubProcessExecutorOrErr.takeError();
686 
687     return std::move(*SubProcessExecutorOrErr);
688 #else
689     return make_error<Failure>(
690         "The subprocess execution mode is only supported on Linux");
691 #endif
692   }
693   }
694   llvm_unreachable("ExecutionMode is outside expected range");
695 }
696 
697 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration(
698     RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile,
699     std::optional<int> BenchmarkProcessCPU) const {
700   Benchmark &BenchmarkResult = RC.BenchmarkResult;
701   object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile;
702 
703   if (DumpFile && BenchmarkPhaseSelector >
704                       BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
705     auto ObjectFilePath =
706         writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile);
707     if (Error E = ObjectFilePath.takeError()) {
708       return {std::move(E), std::move(BenchmarkResult)};
709     }
710     outs() << "Check generated assembly with: /usr/bin/objdump -d "
711            << *ObjectFilePath << "\n";
712   }
713 
714   if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) {
715     BenchmarkResult.Error = "actual measurements skipped.";
716     return {Error::success(), std::move(BenchmarkResult)};
717   }
718 
719   Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor =
720       createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key,
721                              BenchmarkProcessCPU);
722   if (!Executor)
723     return {Executor.takeError(), std::move(BenchmarkResult)};
724   auto NewMeasurements = runMeasurements(**Executor);
725 
726   if (Error E = NewMeasurements.takeError()) {
727     return {std::move(E), std::move(BenchmarkResult)};
728   }
729   assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions");
730   for (BenchmarkMeasure &BM : *NewMeasurements) {
731     // Scale the measurements by the number of instructions.
732     BM.PerInstructionValue /= BenchmarkResult.MinInstructions;
733     // Scale the measurements by the number of times the entire snippet is
734     // repeated.
735     BM.PerSnippetValue /=
736         std::ceil(BenchmarkResult.MinInstructions /
737                   static_cast<double>(BenchmarkResult.Key.Instructions.size()));
738   }
739   BenchmarkResult.Measurements = std::move(*NewMeasurements);
740 
741   return {Error::success(), std::move(BenchmarkResult)};
742 }
743 
744 Expected<std::string>
745 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const {
746   int ResultFD = 0;
747   SmallString<256> ResultPath = FileName;
748   if (Error E = errorCodeToError(
749           FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o",
750                                                           ResultFD, ResultPath)
751                            : sys::fs::openFileForReadWrite(
752                                  FileName, ResultFD, sys::fs::CD_CreateAlways,
753                                  sys::fs::OF_None)))
754     return std::move(E);
755   raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/);
756   OFS.write(Buffer.data(), Buffer.size());
757   OFS.flush();
758   return std::string(ResultPath);
759 }
760 
761 static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS,
762                           const ValidationEvent RHS) {
763   return static_cast<int>(LHS.first) < static_cast<int>(RHS);
764 }
765 
766 Error BenchmarkRunner::getValidationCountersToRun(
767     SmallVector<const char *> &ValCountersToRun) const {
768   const PfmCountersInfo &PCI = State.getPfmCounters();
769   ValCountersToRun.reserve(ValidationCounters.size());
770 
771   ValCountersToRun.reserve(ValidationCounters.size());
772   ArrayRef TargetValidationEvents(PCI.ValidationEvents,
773                                   PCI.NumValidationEvents);
774   for (const ValidationEvent RequestedValEvent : ValidationCounters) {
775     auto ValCounterIt =
776         lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan);
777     if (ValCounterIt == TargetValidationEvents.end() ||
778         ValCounterIt->first != RequestedValEvent)
779       return make_error<Failure>("Cannot create validation counter");
780 
781     assert(ValCounterIt->first == RequestedValEvent &&
782            "The array of validation events from the target should be sorted");
783     ValCountersToRun.push_back(ValCounterIt->second);
784   }
785 
786   return Error::success();
787 }
788 
789 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {}
790 
791 } // namespace exegesis
792 } // namespace llvm
793