1 //===-- BenchmarkRunner.cpp -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "BenchmarkRunner.h" 10 #include "Assembler.h" 11 #include "Error.h" 12 #include "MCInstrDescView.h" 13 #include "MmapUtils.h" 14 #include "PerfHelper.h" 15 #include "SubprocessMemory.h" 16 #include "Target.h" 17 #include "llvm/ADT/ScopeExit.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX 22 #include "llvm/Support/CrashRecoveryContext.h" 23 #include "llvm/Support/Error.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/MemoryBuffer.h" 26 #include "llvm/Support/Program.h" 27 #include "llvm/Support/Signals.h" 28 #include "llvm/Support/SystemZ/zOSSupport.h" 29 #include <cmath> 30 #include <memory> 31 #include <string> 32 33 #ifdef __linux__ 34 #ifdef HAVE_LIBPFM 35 #include <perfmon/perf_event.h> 36 #endif 37 #include <sys/mman.h> 38 #include <sys/ptrace.h> 39 #include <sys/resource.h> 40 #include <sys/socket.h> 41 #include <sys/syscall.h> 42 #include <sys/wait.h> 43 #include <unistd.h> 44 45 #if defined(__GLIBC__) && __has_include(<sys/rseq.h>) && defined(HAVE_BUILTIN_THREAD_POINTER) 46 #include <sys/rseq.h> 47 #if defined(RSEQ_SIG) && defined(SYS_rseq) 48 #define GLIBC_INITS_RSEQ 49 #endif 50 #endif 51 #endif // __linux__ 52 53 namespace llvm { 54 namespace exegesis { 55 56 BenchmarkRunner::BenchmarkRunner(const LLVMState &State, Benchmark::ModeE Mode, 57 BenchmarkPhaseSelectorE BenchmarkPhaseSelector, 58 ExecutionModeE ExecutionMode, 59 ArrayRef<ValidationEvent> ValCounters) 60 : State(State), Mode(Mode), BenchmarkPhaseSelector(BenchmarkPhaseSelector), 61 ExecutionMode(ExecutionMode), ValidationCounters(ValCounters), 62 Scratch(std::make_unique<ScratchSpace>()) {} 63 64 BenchmarkRunner::~BenchmarkRunner() = default; 65 66 void BenchmarkRunner::FunctionExecutor::accumulateCounterValues( 67 const SmallVectorImpl<int64_t> &NewValues, 68 SmallVectorImpl<int64_t> *Result) { 69 const size_t NumValues = std::max(NewValues.size(), Result->size()); 70 if (NumValues > Result->size()) 71 Result->resize(NumValues, 0); 72 for (size_t I = 0, End = NewValues.size(); I < End; ++I) 73 (*Result)[I] += NewValues[I]; 74 } 75 76 Expected<SmallVector<int64_t, 4>> 77 BenchmarkRunner::FunctionExecutor::runAndSample( 78 const char *Counters, ArrayRef<const char *> ValidationCounters, 79 SmallVectorImpl<int64_t> &ValidationCounterValues) const { 80 // We sum counts when there are several counters for a single ProcRes 81 // (e.g. P23 on SandyBridge). 82 SmallVector<int64_t, 4> CounterValues; 83 SmallVector<StringRef, 2> CounterNames; 84 StringRef(Counters).split(CounterNames, '+'); 85 for (auto &CounterName : CounterNames) { 86 CounterName = CounterName.trim(); 87 Expected<SmallVector<int64_t, 4>> ValueOrError = runWithCounter( 88 CounterName, ValidationCounters, ValidationCounterValues); 89 if (!ValueOrError) 90 return ValueOrError.takeError(); 91 accumulateCounterValues(ValueOrError.get(), &CounterValues); 92 } 93 return CounterValues; 94 } 95 96 namespace { 97 class InProcessFunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor { 98 public: 99 static Expected<std::unique_ptr<InProcessFunctionExecutorImpl>> 100 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, 101 BenchmarkRunner::ScratchSpace *Scratch, 102 std::optional<int> BenchmarkProcessCPU) { 103 Expected<ExecutableFunction> EF = 104 ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); 105 106 if (!EF) 107 return EF.takeError(); 108 109 return std::unique_ptr<InProcessFunctionExecutorImpl>( 110 new InProcessFunctionExecutorImpl(State, std::move(*EF), Scratch)); 111 } 112 113 private: 114 InProcessFunctionExecutorImpl(const LLVMState &State, 115 ExecutableFunction Function, 116 BenchmarkRunner::ScratchSpace *Scratch) 117 : State(State), Function(std::move(Function)), Scratch(Scratch) {} 118 119 static void accumulateCounterValues(const SmallVector<int64_t, 4> &NewValues, 120 SmallVector<int64_t, 4> *Result) { 121 const size_t NumValues = std::max(NewValues.size(), Result->size()); 122 if (NumValues > Result->size()) 123 Result->resize(NumValues, 0); 124 for (size_t I = 0, End = NewValues.size(); I < End; ++I) 125 (*Result)[I] += NewValues[I]; 126 } 127 128 Expected<SmallVector<int64_t, 4>> runWithCounter( 129 StringRef CounterName, ArrayRef<const char *> ValidationCounters, 130 SmallVectorImpl<int64_t> &ValidationCounterValues) const override { 131 const ExegesisTarget &ET = State.getExegesisTarget(); 132 char *const ScratchPtr = Scratch->ptr(); 133 auto CounterOrError = 134 ET.createCounter(CounterName, State, ValidationCounters); 135 136 if (!CounterOrError) 137 return CounterOrError.takeError(); 138 139 pfm::CounterGroup *Counter = CounterOrError.get().get(); 140 Scratch->clear(); 141 { 142 auto PS = ET.withSavedState(); 143 CrashRecoveryContext CRC; 144 CrashRecoveryContext::Enable(); 145 const bool Crashed = !CRC.RunSafely([this, Counter, ScratchPtr]() { 146 Counter->start(); 147 this->Function(ScratchPtr); 148 Counter->stop(); 149 }); 150 CrashRecoveryContext::Disable(); 151 PS.reset(); 152 if (Crashed) { 153 #ifdef LLVM_ON_UNIX 154 // See "Exit Status for Commands": 155 // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html 156 constexpr const int kSigOffset = 128; 157 return make_error<SnippetSignal>(CRC.RetCode - kSigOffset); 158 #else 159 // The exit code of the process on windows is not meaningful as a 160 // signal, so simply pass in -1 as the signal into the error. 161 return make_error<SnippetSignal>(-1); 162 #endif // LLVM_ON_UNIX 163 } 164 } 165 166 auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); 167 if (!ValidationValuesOrErr) 168 return ValidationValuesOrErr.takeError(); 169 170 ArrayRef RealValidationValues = *ValidationValuesOrErr; 171 for (size_t I = 0; I < RealValidationValues.size(); ++I) 172 ValidationCounterValues[I] = RealValidationValues[I]; 173 174 return Counter->readOrError(Function.getFunctionBytes()); 175 } 176 177 const LLVMState &State; 178 const ExecutableFunction Function; 179 BenchmarkRunner::ScratchSpace *const Scratch; 180 }; 181 182 #ifdef __linux__ 183 // The following class implements a function executor that executes the 184 // benchmark code within a subprocess rather than within the main llvm-exegesis 185 // process. This allows for much more control over the execution context of the 186 // snippet, particularly with regard to memory. This class performs all the 187 // necessary functions to create the subprocess, execute the snippet in the 188 // subprocess, and report results/handle errors. 189 class SubProcessFunctionExecutorImpl 190 : public BenchmarkRunner::FunctionExecutor { 191 public: 192 static Expected<std::unique_ptr<SubProcessFunctionExecutorImpl>> 193 create(const LLVMState &State, object::OwningBinary<object::ObjectFile> Obj, 194 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) { 195 Expected<ExecutableFunction> EF = 196 ExecutableFunction::create(State.createTargetMachine(), std::move(Obj)); 197 if (!EF) 198 return EF.takeError(); 199 200 return std::unique_ptr<SubProcessFunctionExecutorImpl>( 201 new SubProcessFunctionExecutorImpl(State, std::move(*EF), Key, 202 BenchmarkProcessCPU)); 203 } 204 205 private: 206 SubProcessFunctionExecutorImpl(const LLVMState &State, 207 ExecutableFunction Function, 208 const BenchmarkKey &Key, 209 std::optional<int> BenchmarkCPU) 210 : State(State), Function(std::move(Function)), Key(Key), 211 BenchmarkProcessCPU(BenchmarkCPU) {} 212 213 enum ChildProcessExitCodeE { 214 CounterFDReadFailed = 1, 215 RSeqDisableFailed, 216 FunctionDataMappingFailed, 217 AuxiliaryMemorySetupFailed, 218 SetCPUAffinityFailed 219 }; 220 221 StringRef childProcessExitCodeToString(int ExitCode) const { 222 switch (ExitCode) { 223 case ChildProcessExitCodeE::CounterFDReadFailed: 224 return "Counter file descriptor read failed"; 225 case ChildProcessExitCodeE::RSeqDisableFailed: 226 return "Disabling restartable sequences failed"; 227 case ChildProcessExitCodeE::FunctionDataMappingFailed: 228 return "Failed to map memory for assembled snippet"; 229 case ChildProcessExitCodeE::AuxiliaryMemorySetupFailed: 230 return "Failed to setup auxiliary memory"; 231 case ChildProcessExitCodeE::SetCPUAffinityFailed: 232 return "Failed to set CPU affinity of the benchmarking process"; 233 default: 234 return "Child process returned with unknown exit code"; 235 } 236 } 237 238 Error sendFileDescriptorThroughSocket(int SocketFD, int FD) const { 239 struct msghdr Message = {}; 240 char Buffer[CMSG_SPACE(sizeof(FD))]; 241 memset(Buffer, 0, sizeof(Buffer)); 242 Message.msg_control = Buffer; 243 Message.msg_controllen = sizeof(Buffer); 244 245 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); 246 ControlMessage->cmsg_level = SOL_SOCKET; 247 ControlMessage->cmsg_type = SCM_RIGHTS; 248 ControlMessage->cmsg_len = CMSG_LEN(sizeof(FD)); 249 250 memcpy(CMSG_DATA(ControlMessage), &FD, sizeof(FD)); 251 252 Message.msg_controllen = CMSG_SPACE(sizeof(FD)); 253 254 ssize_t BytesWritten = sendmsg(SocketFD, &Message, 0); 255 256 if (BytesWritten < 0) 257 return make_error<Failure>("Failed to write FD to socket: " + 258 Twine(strerror(errno))); 259 260 return Error::success(); 261 } 262 263 Expected<int> getFileDescriptorFromSocket(int SocketFD) const { 264 struct msghdr Message = {}; 265 266 char ControlBuffer[256]; 267 Message.msg_control = ControlBuffer; 268 Message.msg_controllen = sizeof(ControlBuffer); 269 270 ssize_t BytesRead = recvmsg(SocketFD, &Message, 0); 271 272 if (BytesRead < 0) 273 return make_error<Failure>("Failed to read FD from socket: " + 274 Twine(strerror(errno))); 275 276 struct cmsghdr *ControlMessage = CMSG_FIRSTHDR(&Message); 277 278 int FD; 279 280 if (ControlMessage->cmsg_len != CMSG_LEN(sizeof(FD))) 281 return make_error<Failure>("Failed to get correct number of bytes for " 282 "file descriptor from socket."); 283 284 memcpy(&FD, CMSG_DATA(ControlMessage), sizeof(FD)); 285 286 return FD; 287 } 288 289 Error 290 runParentProcess(pid_t ChildPID, int WriteFD, StringRef CounterName, 291 SmallVectorImpl<int64_t> &CounterValues, 292 ArrayRef<const char *> ValidationCounters, 293 SmallVectorImpl<int64_t> &ValidationCounterValues) const { 294 auto WriteFDClose = make_scope_exit([WriteFD]() { close(WriteFD); }); 295 const ExegesisTarget &ET = State.getExegesisTarget(); 296 auto CounterOrError = 297 ET.createCounter(CounterName, State, ValidationCounters, ChildPID); 298 299 if (!CounterOrError) 300 return CounterOrError.takeError(); 301 302 pfm::CounterGroup *Counter = CounterOrError.get().get(); 303 304 // Make sure to attach to the process (and wait for the sigstop to be 305 // delivered and for the process to continue) before we write to the counter 306 // file descriptor. Attaching to the process before writing to the socket 307 // ensures that the subprocess at most has blocked on the read call. If we 308 // attach afterwards, the subprocess might exit before we get to the attach 309 // call due to effects like scheduler contention, introducing transient 310 // failures. 311 if (ptrace(PTRACE_ATTACH, ChildPID, NULL, NULL) != 0) 312 return make_error<Failure>("Failed to attach to the child process: " + 313 Twine(strerror(errno))); 314 315 if (waitpid(ChildPID, NULL, 0) == -1) { 316 return make_error<Failure>( 317 "Failed to wait for child process to stop after attaching: " + 318 Twine(strerror(errno))); 319 } 320 321 if (ptrace(PTRACE_CONT, ChildPID, NULL, NULL) != 0) 322 return make_error<Failure>( 323 "Failed to continue execution of the child process: " + 324 Twine(strerror(errno))); 325 326 int CounterFileDescriptor = Counter->getFileDescriptor(); 327 Error SendError = 328 sendFileDescriptorThroughSocket(WriteFD, CounterFileDescriptor); 329 330 if (SendError) 331 return SendError; 332 333 int ChildStatus; 334 if (waitpid(ChildPID, &ChildStatus, 0) == -1) { 335 return make_error<Failure>( 336 "Waiting for the child process to complete failed: " + 337 Twine(strerror(errno))); 338 } 339 340 if (WIFEXITED(ChildStatus)) { 341 int ChildExitCode = WEXITSTATUS(ChildStatus); 342 if (ChildExitCode == 0) { 343 // The child exited succesfully, read counter values and return 344 // success. 345 auto CounterValueOrErr = Counter->readOrError(); 346 if (!CounterValueOrErr) 347 return CounterValueOrErr.takeError(); 348 CounterValues = std::move(*CounterValueOrErr); 349 350 auto ValidationValuesOrErr = Counter->readValidationCountersOrError(); 351 if (!ValidationValuesOrErr) 352 return ValidationValuesOrErr.takeError(); 353 354 ArrayRef RealValidationValues = *ValidationValuesOrErr; 355 for (size_t I = 0; I < RealValidationValues.size(); ++I) 356 ValidationCounterValues[I] = RealValidationValues[I]; 357 358 return Error::success(); 359 } 360 // The child exited, but not successfully. 361 return make_error<Failure>( 362 "Child benchmarking process exited with non-zero exit code: " + 363 childProcessExitCodeToString(ChildExitCode)); 364 } 365 366 // An error was encountered running the snippet, process it 367 siginfo_t ChildSignalInfo; 368 if (ptrace(PTRACE_GETSIGINFO, ChildPID, NULL, &ChildSignalInfo) == -1) { 369 return make_error<Failure>("Getting signal info from the child failed: " + 370 Twine(strerror(errno))); 371 } 372 373 // Send SIGKILL rather than SIGTERM as the child process has no SIGTERM 374 // handlers to run, and calling SIGTERM would mean that ptrace will force 375 // it to block in the signal-delivery-stop for the SIGSEGV/other signals, 376 // and upon exit. 377 if (kill(ChildPID, SIGKILL) == -1) 378 return make_error<Failure>("Failed to kill child benchmarking proces: " + 379 Twine(strerror(errno))); 380 381 // Wait for the process to exit so that there are no zombie processes left 382 // around. 383 if (waitpid(ChildPID, NULL, 0) == -1) 384 return make_error<Failure>("Failed to wait for process to die: " + 385 Twine(strerror(errno))); 386 387 if (ChildSignalInfo.si_signo == SIGSEGV) 388 return make_error<SnippetSegmentationFault>( 389 reinterpret_cast<uintptr_t>(ChildSignalInfo.si_addr)); 390 391 return make_error<SnippetSignal>(ChildSignalInfo.si_signo); 392 } 393 394 static void setCPUAffinityIfRequested(int CPUToUse) { 395 // Special case this function for x86_64 for now as certain more esoteric 396 // platforms have different definitions for some of the libc functions that 397 // cause buildtime failures. Additionally, the subprocess executor mode (the 398 // sole mode where this is supported) currently only supports x86_64. 399 400 // Also check that we have the SYS_getcpu macro defined, meaning the syscall 401 // actually exists within the build environment. We manually use the syscall 402 // rather than the libc wrapper given the wrapper for getcpu is only available 403 // in glibc 2.29 and later. 404 #if defined(__x86_64__) && defined(SYS_getcpu) 405 // Set the CPU affinity for the child process, so that we ensure that if 406 // the user specified a CPU the process should run on, the benchmarking 407 // process is running on that CPU. 408 cpu_set_t CPUMask; 409 CPU_ZERO(&CPUMask); 410 CPU_SET(CPUToUse, &CPUMask); 411 // TODO(boomanaiden154): Rewrite this to use LLVM primitives once they 412 // are available. 413 int SetAffinityReturn = sched_setaffinity(0, sizeof(CPUMask), &CPUMask); 414 if (SetAffinityReturn == -1) { 415 exit(ChildProcessExitCodeE::SetCPUAffinityFailed); 416 } 417 418 // Check (if assertions are enabled) that we are actually running on the 419 // CPU that was specified by the user. 420 [[maybe_unused]] unsigned int CurrentCPU; 421 assert(syscall(SYS_getcpu, &CurrentCPU, nullptr) == 0 && 422 "Expected getcpu call to succeed."); 423 assert(static_cast<int>(CurrentCPU) == CPUToUse && 424 "Expected current CPU to equal the CPU requested by the user"); 425 #else 426 exit(ChildProcessExitCodeE::SetCPUAffinityFailed); 427 #endif // defined(__x86_64__) && defined(SYS_getcpu) 428 } 429 430 Error createSubProcessAndRunBenchmark( 431 StringRef CounterName, SmallVectorImpl<int64_t> &CounterValues, 432 ArrayRef<const char *> ValidationCounters, 433 SmallVectorImpl<int64_t> &ValidationCounterValues) const { 434 int PipeFiles[2]; 435 int PipeSuccessOrErr = socketpair(AF_UNIX, SOCK_DGRAM, 0, PipeFiles); 436 if (PipeSuccessOrErr != 0) { 437 return make_error<Failure>( 438 "Failed to create a pipe for interprocess communication between " 439 "llvm-exegesis and the benchmarking subprocess: " + 440 Twine(strerror(errno))); 441 } 442 443 SubprocessMemory SPMemory; 444 Error MemoryInitError = SPMemory.initializeSubprocessMemory(getpid()); 445 if (MemoryInitError) 446 return MemoryInitError; 447 448 Error AddMemDefError = 449 SPMemory.addMemoryDefinition(Key.MemoryValues, getpid()); 450 if (AddMemDefError) 451 return AddMemDefError; 452 453 long ParentTID = SubprocessMemory::getCurrentTID(); 454 pid_t ParentOrChildPID = fork(); 455 456 if (ParentOrChildPID == -1) { 457 return make_error<Failure>("Failed to create child process: " + 458 Twine(strerror(errno))); 459 } 460 461 if (ParentOrChildPID == 0) { 462 if (BenchmarkProcessCPU.has_value()) { 463 setCPUAffinityIfRequested(*BenchmarkProcessCPU); 464 } 465 466 // We are in the child process, close the write end of the pipe. 467 close(PipeFiles[1]); 468 // Unregister handlers, signal handling is now handled through ptrace in 469 // the host process. 470 sys::unregisterHandlers(); 471 runChildSubprocess(PipeFiles[0], Key, ParentTID); 472 // The child process terminates in the above function, so we should never 473 // get to this point. 474 llvm_unreachable("Child process didn't exit when expected."); 475 } 476 477 // Close the read end of the pipe as we only need to write to the subprocess 478 // from the parent process. 479 close(PipeFiles[0]); 480 return runParentProcess(ParentOrChildPID, PipeFiles[1], CounterName, 481 CounterValues, ValidationCounters, 482 ValidationCounterValues); 483 } 484 485 void disableCoreDumps() const { 486 struct rlimit rlim; 487 488 rlim.rlim_cur = 0; 489 setrlimit(RLIMIT_CORE, &rlim); 490 } 491 492 [[noreturn]] void runChildSubprocess(int Pipe, const BenchmarkKey &Key, 493 long ParentTID) const { 494 // Disable core dumps in the child process as otherwise everytime we 495 // encounter an execution failure like a segmentation fault, we will create 496 // a core dump. We report the information directly rather than require the 497 // user inspect a core dump. 498 disableCoreDumps(); 499 500 // The following occurs within the benchmarking subprocess. 501 pid_t ParentPID = getppid(); 502 503 Expected<int> CounterFileDescriptorOrError = 504 getFileDescriptorFromSocket(Pipe); 505 506 if (!CounterFileDescriptorOrError) 507 exit(ChildProcessExitCodeE::CounterFDReadFailed); 508 509 int CounterFileDescriptor = *CounterFileDescriptorOrError; 510 511 // Glibc versions greater than 2.35 automatically call rseq during 512 // initialization. Unmapping the region that glibc sets up for this causes 513 // segfaults in the program. Unregister the rseq region so that we can safely 514 // unmap it later 515 #ifdef GLIBC_INITS_RSEQ 516 unsigned int RseqStructSize = __rseq_size; 517 518 // Glibc v2.40 (the change is also expected to be backported to v2.35) 519 // changes the definition of __rseq_size to be the usable area of the struct 520 // rather than the actual size of the struct. v2.35 uses only 20 bytes of 521 // the 32 byte struct. For now, it should be safe to assume that if the 522 // usable size is less than 32, the actual size of the struct will be 32 523 // bytes given alignment requirements. 524 if (__rseq_size < 32) 525 RseqStructSize = 32; 526 527 long RseqDisableOutput = syscall( 528 SYS_rseq, 529 reinterpret_cast<uintptr_t>(__builtin_thread_pointer()) + __rseq_offset, 530 RseqStructSize, RSEQ_FLAG_UNREGISTER, RSEQ_SIG); 531 if (RseqDisableOutput != 0) 532 exit(ChildProcessExitCodeE::RSeqDisableFailed); 533 #endif // GLIBC_INITS_RSEQ 534 535 // The frontend that generates the memory annotation structures should 536 // validate that the address to map the snippet in at is a multiple of 537 // the page size. Assert that this is true here. 538 assert(Key.SnippetAddress % getpagesize() == 0 && 539 "The snippet address needs to be aligned to a page boundary."); 540 541 size_t FunctionDataCopySize = this->Function.FunctionBytes.size(); 542 void *MapAddress = NULL; 543 int MapFlags = MAP_PRIVATE | MAP_ANONYMOUS; 544 545 if (Key.SnippetAddress != 0) { 546 MapAddress = reinterpret_cast<void *>(Key.SnippetAddress); 547 MapFlags |= MAP_FIXED_NOREPLACE; 548 } 549 550 char *FunctionDataCopy = 551 (char *)mmap(MapAddress, FunctionDataCopySize, PROT_READ | PROT_WRITE, 552 MapFlags, 0, 0); 553 if (reinterpret_cast<intptr_t>(FunctionDataCopy) == -1) 554 exit(ChildProcessExitCodeE::FunctionDataMappingFailed); 555 556 memcpy(FunctionDataCopy, this->Function.FunctionBytes.data(), 557 this->Function.FunctionBytes.size()); 558 mprotect(FunctionDataCopy, FunctionDataCopySize, PROT_READ | PROT_EXEC); 559 560 Expected<int> AuxMemFDOrError = 561 SubprocessMemory::setupAuxiliaryMemoryInSubprocess( 562 Key.MemoryValues, ParentPID, ParentTID, CounterFileDescriptor); 563 if (!AuxMemFDOrError) 564 exit(ChildProcessExitCodeE::AuxiliaryMemorySetupFailed); 565 566 ((void (*)(size_t, int))(uintptr_t)FunctionDataCopy)(FunctionDataCopySize, 567 *AuxMemFDOrError); 568 569 exit(0); 570 } 571 572 Expected<SmallVector<int64_t, 4>> runWithCounter( 573 StringRef CounterName, ArrayRef<const char *> ValidationCounters, 574 SmallVectorImpl<int64_t> &ValidationCounterValues) const override { 575 SmallVector<int64_t, 4> Value(1, 0); 576 Error PossibleBenchmarkError = createSubProcessAndRunBenchmark( 577 CounterName, Value, ValidationCounters, ValidationCounterValues); 578 579 if (PossibleBenchmarkError) 580 return std::move(PossibleBenchmarkError); 581 582 return Value; 583 } 584 585 const LLVMState &State; 586 const ExecutableFunction Function; 587 const BenchmarkKey &Key; 588 const std::optional<int> BenchmarkProcessCPU; 589 }; 590 #endif // __linux__ 591 } // namespace 592 593 Expected<SmallString<0>> BenchmarkRunner::assembleSnippet( 594 const BenchmarkCode &BC, const SnippetRepetitor &Repetitor, 595 unsigned MinInstructions, unsigned LoopBodySize, 596 bool GenerateMemoryInstructions) const { 597 const std::vector<MCInst> &Instructions = BC.Key.Instructions; 598 SmallString<0> Buffer; 599 raw_svector_ostream OS(Buffer); 600 if (Error E = assembleToStream( 601 State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns, 602 Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize, 603 GenerateMemoryInstructions), 604 OS, BC.Key, GenerateMemoryInstructions)) { 605 return std::move(E); 606 } 607 return Buffer; 608 } 609 610 Expected<BenchmarkRunner::RunnableConfiguration> 611 BenchmarkRunner::getRunnableConfiguration( 612 const BenchmarkCode &BC, unsigned MinInstructions, unsigned LoopBodySize, 613 const SnippetRepetitor &Repetitor) const { 614 RunnableConfiguration RC; 615 616 Benchmark &BenchmarkResult = RC.BenchmarkResult; 617 BenchmarkResult.Mode = Mode; 618 BenchmarkResult.CpuName = 619 std::string(State.getTargetMachine().getTargetCPU()); 620 BenchmarkResult.LLVMTriple = 621 State.getTargetMachine().getTargetTriple().normalize(); 622 BenchmarkResult.MinInstructions = MinInstructions; 623 BenchmarkResult.Info = BC.Info; 624 625 const std::vector<MCInst> &Instructions = BC.Key.Instructions; 626 627 bool GenerateMemoryInstructions = ExecutionMode == ExecutionModeE::SubProcess; 628 629 BenchmarkResult.Key = BC.Key; 630 631 // Assemble at least kMinInstructionsForSnippet instructions by repeating 632 // the snippet for debug/analysis. This is so that the user clearly 633 // understands that the inside instructions are repeated. 634 if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) { 635 const int MinInstructionsForSnippet = 4 * Instructions.size(); 636 const int LoopBodySizeForSnippet = 2 * Instructions.size(); 637 auto Snippet = 638 assembleSnippet(BC, Repetitor, MinInstructionsForSnippet, 639 LoopBodySizeForSnippet, GenerateMemoryInstructions); 640 if (Error E = Snippet.takeError()) 641 return std::move(E); 642 643 if (auto Err = getBenchmarkFunctionBytes(*Snippet, 644 BenchmarkResult.AssembledSnippet)) 645 return std::move(Err); 646 } 647 648 // Assemble enough repetitions of the snippet so we have at least 649 // MinInstructions instructions. 650 if (BenchmarkPhaseSelector > 651 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { 652 auto Snippet = 653 assembleSnippet(BC, Repetitor, BenchmarkResult.MinInstructions, 654 LoopBodySize, GenerateMemoryInstructions); 655 if (Error E = Snippet.takeError()) 656 return std::move(E); 657 RC.ObjectFile = getObjectFromBuffer(*Snippet); 658 } 659 660 return std::move(RC); 661 } 662 663 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> 664 BenchmarkRunner::createFunctionExecutor( 665 object::OwningBinary<object::ObjectFile> ObjectFile, 666 const BenchmarkKey &Key, std::optional<int> BenchmarkProcessCPU) const { 667 switch (ExecutionMode) { 668 case ExecutionModeE::InProcess: { 669 if (BenchmarkProcessCPU.has_value()) 670 return make_error<Failure>("The inprocess execution mode does not " 671 "support benchmark core pinning."); 672 673 auto InProcessExecutorOrErr = InProcessFunctionExecutorImpl::create( 674 State, std::move(ObjectFile), Scratch.get(), BenchmarkProcessCPU); 675 if (!InProcessExecutorOrErr) 676 return InProcessExecutorOrErr.takeError(); 677 678 return std::move(*InProcessExecutorOrErr); 679 } 680 case ExecutionModeE::SubProcess: { 681 #ifdef __linux__ 682 auto SubProcessExecutorOrErr = SubProcessFunctionExecutorImpl::create( 683 State, std::move(ObjectFile), Key, BenchmarkProcessCPU); 684 if (!SubProcessExecutorOrErr) 685 return SubProcessExecutorOrErr.takeError(); 686 687 return std::move(*SubProcessExecutorOrErr); 688 #else 689 return make_error<Failure>( 690 "The subprocess execution mode is only supported on Linux"); 691 #endif 692 } 693 } 694 llvm_unreachable("ExecutionMode is outside expected range"); 695 } 696 697 std::pair<Error, Benchmark> BenchmarkRunner::runConfiguration( 698 RunnableConfiguration &&RC, const std::optional<StringRef> &DumpFile, 699 std::optional<int> BenchmarkProcessCPU) const { 700 Benchmark &BenchmarkResult = RC.BenchmarkResult; 701 object::OwningBinary<object::ObjectFile> &ObjectFile = RC.ObjectFile; 702 703 if (DumpFile && BenchmarkPhaseSelector > 704 BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) { 705 auto ObjectFilePath = 706 writeObjectFile(ObjectFile.getBinary()->getData(), *DumpFile); 707 if (Error E = ObjectFilePath.takeError()) { 708 return {std::move(E), std::move(BenchmarkResult)}; 709 } 710 outs() << "Check generated assembly with: /usr/bin/objdump -d " 711 << *ObjectFilePath << "\n"; 712 } 713 714 if (BenchmarkPhaseSelector < BenchmarkPhaseSelectorE::Measure) { 715 BenchmarkResult.Error = "actual measurements skipped."; 716 return {Error::success(), std::move(BenchmarkResult)}; 717 } 718 719 Expected<std::unique_ptr<BenchmarkRunner::FunctionExecutor>> Executor = 720 createFunctionExecutor(std::move(ObjectFile), RC.BenchmarkResult.Key, 721 BenchmarkProcessCPU); 722 if (!Executor) 723 return {Executor.takeError(), std::move(BenchmarkResult)}; 724 auto NewMeasurements = runMeasurements(**Executor); 725 726 if (Error E = NewMeasurements.takeError()) { 727 return {std::move(E), std::move(BenchmarkResult)}; 728 } 729 assert(BenchmarkResult.MinInstructions > 0 && "invalid MinInstructions"); 730 for (BenchmarkMeasure &BM : *NewMeasurements) { 731 // Scale the measurements by the number of instructions. 732 BM.PerInstructionValue /= BenchmarkResult.MinInstructions; 733 // Scale the measurements by the number of times the entire snippet is 734 // repeated. 735 BM.PerSnippetValue /= 736 std::ceil(BenchmarkResult.MinInstructions / 737 static_cast<double>(BenchmarkResult.Key.Instructions.size())); 738 } 739 BenchmarkResult.Measurements = std::move(*NewMeasurements); 740 741 return {Error::success(), std::move(BenchmarkResult)}; 742 } 743 744 Expected<std::string> 745 BenchmarkRunner::writeObjectFile(StringRef Buffer, StringRef FileName) const { 746 int ResultFD = 0; 747 SmallString<256> ResultPath = FileName; 748 if (Error E = errorCodeToError( 749 FileName.empty() ? sys::fs::createTemporaryFile("snippet", "o", 750 ResultFD, ResultPath) 751 : sys::fs::openFileForReadWrite( 752 FileName, ResultFD, sys::fs::CD_CreateAlways, 753 sys::fs::OF_None))) 754 return std::move(E); 755 raw_fd_ostream OFS(ResultFD, true /*ShouldClose*/); 756 OFS.write(Buffer.data(), Buffer.size()); 757 OFS.flush(); 758 return std::string(ResultPath); 759 } 760 761 static bool EventLessThan(const std::pair<ValidationEvent, const char *> LHS, 762 const ValidationEvent RHS) { 763 return static_cast<int>(LHS.first) < static_cast<int>(RHS); 764 } 765 766 Error BenchmarkRunner::getValidationCountersToRun( 767 SmallVector<const char *> &ValCountersToRun) const { 768 const PfmCountersInfo &PCI = State.getPfmCounters(); 769 ValCountersToRun.reserve(ValidationCounters.size()); 770 771 ValCountersToRun.reserve(ValidationCounters.size()); 772 ArrayRef TargetValidationEvents(PCI.ValidationEvents, 773 PCI.NumValidationEvents); 774 for (const ValidationEvent RequestedValEvent : ValidationCounters) { 775 auto ValCounterIt = 776 lower_bound(TargetValidationEvents, RequestedValEvent, EventLessThan); 777 if (ValCounterIt == TargetValidationEvents.end() || 778 ValCounterIt->first != RequestedValEvent) 779 return make_error<Failure>("Cannot create validation counter"); 780 781 assert(ValCounterIt->first == RequestedValEvent && 782 "The array of validation events from the target should be sorted"); 783 ValCountersToRun.push_back(ValCounterIt->second); 784 } 785 786 return Error::success(); 787 } 788 789 BenchmarkRunner::FunctionExecutor::~FunctionExecutor() {} 790 791 } // namespace exegesis 792 } // namespace llvm 793