xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
168d75effSDimitry Andric #include "cpuid.h"
268d75effSDimitry Andric #include "sanitizer_common/sanitizer_common.h"
368d75effSDimitry Andric #if !SANITIZER_FUCHSIA
468d75effSDimitry Andric #include "sanitizer_common/sanitizer_posix.h"
568d75effSDimitry Andric #endif
668d75effSDimitry Andric #include "xray_defs.h"
768d75effSDimitry Andric #include "xray_interface_internal.h"
868d75effSDimitry Andric 
981ad6265SDimitry Andric #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
1068d75effSDimitry Andric #include <sys/types.h>
1168d75effSDimitry Andric #include <sys/sysctl.h>
1268d75effSDimitry Andric #elif SANITIZER_FUCHSIA
1368d75effSDimitry Andric #include <zircon/syscalls.h>
1468d75effSDimitry Andric #endif
1568d75effSDimitry Andric 
1668d75effSDimitry Andric #include <atomic>
1768d75effSDimitry Andric #include <cstdint>
1868d75effSDimitry Andric #include <errno.h>
1968d75effSDimitry Andric #include <fcntl.h>
2068d75effSDimitry Andric #include <iterator>
2168d75effSDimitry Andric #include <limits>
2268d75effSDimitry Andric #include <tuple>
2368d75effSDimitry Andric #include <unistd.h>
2468d75effSDimitry Andric 
2568d75effSDimitry Andric namespace __xray {
2668d75effSDimitry Andric 
2768d75effSDimitry Andric #if SANITIZER_LINUX
2868d75effSDimitry Andric static std::pair<ssize_t, bool>
retryingReadSome(int Fd,char * Begin,char * End)2968d75effSDimitry Andric retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
3068d75effSDimitry Andric   auto BytesToRead = std::distance(Begin, End);
3168d75effSDimitry Andric   ssize_t BytesRead;
3268d75effSDimitry Andric   ssize_t TotalBytesRead = 0;
3368d75effSDimitry Andric   while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
3468d75effSDimitry Andric     if (BytesRead == -1) {
3568d75effSDimitry Andric       if (errno == EINTR)
3668d75effSDimitry Andric         continue;
3768d75effSDimitry Andric       Report("Read error; errno = %d\n", errno);
3868d75effSDimitry Andric       return std::make_pair(TotalBytesRead, false);
3968d75effSDimitry Andric     }
4068d75effSDimitry Andric 
4168d75effSDimitry Andric     TotalBytesRead += BytesRead;
4268d75effSDimitry Andric     BytesToRead -= BytesRead;
4368d75effSDimitry Andric     Begin += BytesRead;
4468d75effSDimitry Andric   }
4568d75effSDimitry Andric   return std::make_pair(TotalBytesRead, true);
4668d75effSDimitry Andric }
4768d75effSDimitry Andric 
readValueFromFile(const char * Filename,long long * Value)4868d75effSDimitry Andric static bool readValueFromFile(const char *Filename,
4968d75effSDimitry Andric                               long long *Value) XRAY_NEVER_INSTRUMENT {
5068d75effSDimitry Andric   int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
5168d75effSDimitry Andric   if (Fd == -1)
5268d75effSDimitry Andric     return false;
5368d75effSDimitry Andric   static constexpr size_t BufSize = 256;
5468d75effSDimitry Andric   char Line[BufSize] = {};
5568d75effSDimitry Andric   ssize_t BytesRead;
5668d75effSDimitry Andric   bool Success;
5768d75effSDimitry Andric   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
5868d75effSDimitry Andric   close(Fd);
5968d75effSDimitry Andric   if (!Success)
6068d75effSDimitry Andric     return false;
6168d75effSDimitry Andric   const char *End = nullptr;
6268d75effSDimitry Andric   long long Tmp = internal_simple_strtoll(Line, &End, 10);
6368d75effSDimitry Andric   bool Result = false;
6468d75effSDimitry Andric   if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
6568d75effSDimitry Andric     *Value = Tmp;
6668d75effSDimitry Andric     Result = true;
6768d75effSDimitry Andric   }
6868d75effSDimitry Andric   return Result;
6968d75effSDimitry Andric }
7068d75effSDimitry Andric 
getTSCFrequency()7168d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
7268d75effSDimitry Andric   long long TSCFrequency = -1;
7368d75effSDimitry Andric   if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
7468d75effSDimitry Andric                         &TSCFrequency)) {
7568d75effSDimitry Andric     TSCFrequency *= 1000;
7668d75effSDimitry Andric   } else if (readValueFromFile(
7768d75effSDimitry Andric                  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
7868d75effSDimitry Andric                  &TSCFrequency)) {
7968d75effSDimitry Andric     TSCFrequency *= 1000;
8068d75effSDimitry Andric   } else {
8168d75effSDimitry Andric     Report("Unable to determine CPU frequency for TSC accounting.\n");
8268d75effSDimitry Andric   }
8368d75effSDimitry Andric   return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
8468d75effSDimitry Andric }
8581ad6265SDimitry Andric #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
8668d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
8768d75effSDimitry Andric     long long TSCFrequency = -1;
8868d75effSDimitry Andric     size_t tscfreqsz = sizeof(TSCFrequency);
8981ad6265SDimitry Andric #if SANITIZER_APPLE
9068d75effSDimitry Andric     if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
9168d75effSDimitry Andric                               &tscfreqsz, NULL, 0) != -1) {
9268d75effSDimitry Andric 
9368d75effSDimitry Andric #else
9468d75effSDimitry Andric     if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
9568d75effSDimitry Andric                               NULL, 0) != -1) {
9668d75effSDimitry Andric #endif
9768d75effSDimitry Andric         return static_cast<uint64_t>(TSCFrequency);
9868d75effSDimitry Andric     } else {
9968d75effSDimitry Andric       Report("Unable to determine CPU frequency for TSC accounting.\n");
10068d75effSDimitry Andric     }
10168d75effSDimitry Andric 
10268d75effSDimitry Andric     return 0;
10368d75effSDimitry Andric }
10468d75effSDimitry Andric #elif !SANITIZER_FUCHSIA
10568d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
10668d75effSDimitry Andric     /* Not supported */
10768d75effSDimitry Andric     return 0;
10868d75effSDimitry Andric }
10968d75effSDimitry Andric #endif
11068d75effSDimitry Andric 
11168d75effSDimitry Andric static constexpr uint8_t CallOpCode = 0xe8;
11268d75effSDimitry Andric static constexpr uint16_t MovR10Seq = 0xba41;
11368d75effSDimitry Andric static constexpr uint16_t Jmp9Seq = 0x09eb;
11468d75effSDimitry Andric static constexpr uint16_t Jmp20Seq = 0x14eb;
11568d75effSDimitry Andric static constexpr uint16_t Jmp15Seq = 0x0feb;
11668d75effSDimitry Andric static constexpr uint8_t JmpOpCode = 0xe9;
11768d75effSDimitry Andric static constexpr uint8_t RetOpCode = 0xc3;
11868d75effSDimitry Andric static constexpr uint16_t NopwSeq = 0x9066;
11968d75effSDimitry Andric 
12068d75effSDimitry Andric static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
12168d75effSDimitry Andric static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
12268d75effSDimitry Andric 
patchFunctionEntry(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled,void (* Trampoline)())12368d75effSDimitry Andric bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
12468d75effSDimitry Andric                         const XRaySledEntry &Sled,
12568d75effSDimitry Andric                         void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
12668d75effSDimitry Andric   // Here we do the dance of replacing the following sled:
12768d75effSDimitry Andric   //
12868d75effSDimitry Andric   // xray_sled_n:
12968d75effSDimitry Andric   //   jmp +9
13068d75effSDimitry Andric   //   <9 byte nop>
13168d75effSDimitry Andric   //
13268d75effSDimitry Andric   // With the following:
13368d75effSDimitry Andric   //
13468d75effSDimitry Andric   //   mov r10d, <function id>
13568d75effSDimitry Andric   //   call <relative 32bit offset to entry trampoline>
13668d75effSDimitry Andric   //
13768d75effSDimitry Andric   // We need to do this in the following order:
13868d75effSDimitry Andric   //
13968d75effSDimitry Andric   // 1. Put the function id first, 2 bytes from the start of the sled (just
14068d75effSDimitry Andric   // after the 2-byte jmp instruction).
14168d75effSDimitry Andric   // 2. Put the call opcode 6 bytes from the start of the sled.
14268d75effSDimitry Andric   // 3. Put the relative offset 7 bytes from the start of the sled.
14368d75effSDimitry Andric   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
14468d75effSDimitry Andric   // opcode and first operand.
14568d75effSDimitry Andric   //
14668d75effSDimitry Andric   // Prerequisite is to compute the relative offset to the trampoline's address.
1475ffd83dbSDimitry Andric   const uint64_t Address = Sled.address();
14868d75effSDimitry Andric   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
1495ffd83dbSDimitry Andric                              (static_cast<int64_t>(Address) + 11);
15068d75effSDimitry Andric   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
151349cc55cSDimitry Andric     Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
152349cc55cSDimitry Andric            reinterpret_cast<void *>(Trampoline),
1535ffd83dbSDimitry Andric            reinterpret_cast<void *>(Address));
15468d75effSDimitry Andric     return false;
15568d75effSDimitry Andric   }
15668d75effSDimitry Andric   if (Enable) {
1575ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
1585ffd83dbSDimitry Andric     *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
1595ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
16068d75effSDimitry Andric     std::atomic_store_explicit(
1615ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
16268d75effSDimitry Andric         std::memory_order_release);
16368d75effSDimitry Andric   } else {
16468d75effSDimitry Andric     std::atomic_store_explicit(
1655ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
16668d75effSDimitry Andric         std::memory_order_release);
16768d75effSDimitry Andric     // FIXME: Write out the nops still?
16868d75effSDimitry Andric   }
16968d75effSDimitry Andric   return true;
17068d75effSDimitry Andric }
17168d75effSDimitry Andric 
patchFunctionExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)17268d75effSDimitry Andric bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
17368d75effSDimitry Andric                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
17468d75effSDimitry Andric   // Here we do the dance of replacing the following sled:
17568d75effSDimitry Andric   //
17668d75effSDimitry Andric   // xray_sled_n:
17768d75effSDimitry Andric   //   ret
17868d75effSDimitry Andric   //   <10 byte nop>
17968d75effSDimitry Andric   //
18068d75effSDimitry Andric   // With the following:
18168d75effSDimitry Andric   //
18268d75effSDimitry Andric   //   mov r10d, <function id>
18368d75effSDimitry Andric   //   jmp <relative 32bit offset to exit trampoline>
18468d75effSDimitry Andric   //
18568d75effSDimitry Andric   // 1. Put the function id first, 2 bytes from the start of the sled (just
18668d75effSDimitry Andric   // after the 1-byte ret instruction).
18768d75effSDimitry Andric   // 2. Put the jmp opcode 6 bytes from the start of the sled.
18868d75effSDimitry Andric   // 3. Put the relative offset 7 bytes from the start of the sled.
18968d75effSDimitry Andric   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
19068d75effSDimitry Andric   // opcode and first operand.
19168d75effSDimitry Andric   //
19268d75effSDimitry Andric   // Prerequisite is to compute the relative offset fo the
19368d75effSDimitry Andric   // __xray_FunctionExit function's address.
1945ffd83dbSDimitry Andric   const uint64_t Address = Sled.address();
19568d75effSDimitry Andric   int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
1965ffd83dbSDimitry Andric                              (static_cast<int64_t>(Address) + 11);
19768d75effSDimitry Andric   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
19868d75effSDimitry Andric     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
199349cc55cSDimitry Andric            reinterpret_cast<void *>(__xray_FunctionExit),
200349cc55cSDimitry Andric            reinterpret_cast<void *>(Address));
20168d75effSDimitry Andric     return false;
20268d75effSDimitry Andric   }
20368d75effSDimitry Andric   if (Enable) {
2045ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
2055ffd83dbSDimitry Andric     *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
2065ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
20768d75effSDimitry Andric     std::atomic_store_explicit(
2085ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
20968d75effSDimitry Andric         std::memory_order_release);
21068d75effSDimitry Andric   } else {
21168d75effSDimitry Andric     std::atomic_store_explicit(
2125ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
21368d75effSDimitry Andric         std::memory_order_release);
21468d75effSDimitry Andric     // FIXME: Write out the nops still?
21568d75effSDimitry Andric   }
21668d75effSDimitry Andric   return true;
21768d75effSDimitry Andric }
21868d75effSDimitry Andric 
patchFunctionTailExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)21968d75effSDimitry Andric bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
22068d75effSDimitry Andric                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
22168d75effSDimitry Andric   // Here we do the dance of replacing the tail call sled with a similar
22268d75effSDimitry Andric   // sequence as the entry sled, but calls the tail exit sled instead.
2235ffd83dbSDimitry Andric   const uint64_t Address = Sled.address();
22468d75effSDimitry Andric   int64_t TrampolineOffset =
22568d75effSDimitry Andric       reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
2265ffd83dbSDimitry Andric       (static_cast<int64_t>(Address) + 11);
22768d75effSDimitry Andric   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
22868d75effSDimitry Andric     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
229349cc55cSDimitry Andric            reinterpret_cast<void *>(__xray_FunctionTailExit),
230349cc55cSDimitry Andric            reinterpret_cast<void *>(Address));
23168d75effSDimitry Andric     return false;
23268d75effSDimitry Andric   }
23368d75effSDimitry Andric   if (Enable) {
2345ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
2355ffd83dbSDimitry Andric     *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
2365ffd83dbSDimitry Andric     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
23768d75effSDimitry Andric     std::atomic_store_explicit(
2385ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
23968d75effSDimitry Andric         std::memory_order_release);
24068d75effSDimitry Andric   } else {
24168d75effSDimitry Andric     std::atomic_store_explicit(
2425ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
24368d75effSDimitry Andric         std::memory_order_release);
24468d75effSDimitry Andric     // FIXME: Write out the nops still?
24568d75effSDimitry Andric   }
24668d75effSDimitry Andric   return true;
24768d75effSDimitry Andric }
24868d75effSDimitry Andric 
patchCustomEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)24968d75effSDimitry Andric bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
25068d75effSDimitry Andric                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
25168d75effSDimitry Andric   // Here we do the dance of replacing the following sled:
25268d75effSDimitry Andric   //
25368d75effSDimitry Andric   // xray_sled_n:
254*06c3fb27SDimitry Andric   //   jmp +15          // 2 bytes
25568d75effSDimitry Andric   //   ...
25668d75effSDimitry Andric   //
25768d75effSDimitry Andric   // With the following:
25868d75effSDimitry Andric   //
25968d75effSDimitry Andric   //   nopw             // 2 bytes*
26068d75effSDimitry Andric   //   ...
26168d75effSDimitry Andric   //
26268d75effSDimitry Andric   //
263*06c3fb27SDimitry Andric   // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'.
2645ffd83dbSDimitry Andric   const uint64_t Address = Sled.address();
26568d75effSDimitry Andric   if (Enable) {
26668d75effSDimitry Andric     std::atomic_store_explicit(
2675ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
26868d75effSDimitry Andric         std::memory_order_release);
26968d75effSDimitry Andric   } else {
27068d75effSDimitry Andric     std::atomic_store_explicit(
2715ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
27268d75effSDimitry Andric         std::memory_order_release);
27368d75effSDimitry Andric   }
27468d75effSDimitry Andric   return false;
27568d75effSDimitry Andric }
27668d75effSDimitry Andric 
patchTypedEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)27768d75effSDimitry Andric bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
27868d75effSDimitry Andric                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
27968d75effSDimitry Andric   // Here we do the dance of replacing the following sled:
28068d75effSDimitry Andric   //
28168d75effSDimitry Andric   // xray_sled_n:
28268d75effSDimitry Andric   //   jmp +20          // 2 byte instruction
28368d75effSDimitry Andric   //   ...
28468d75effSDimitry Andric   //
28568d75effSDimitry Andric   // With the following:
28668d75effSDimitry Andric   //
28768d75effSDimitry Andric   //   nopw             // 2 bytes
28868d75effSDimitry Andric   //   ...
28968d75effSDimitry Andric   //
29068d75effSDimitry Andric   //
29168d75effSDimitry Andric   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
29268d75effSDimitry Andric   // The 20 byte sled stashes three argument registers, calls the trampoline,
29368d75effSDimitry Andric   // unstashes the registers and returns. If the arguments are already in
29468d75effSDimitry Andric   // the correct registers, the stashing and unstashing become equivalently
29568d75effSDimitry Andric   // sized nops.
2965ffd83dbSDimitry Andric   const uint64_t Address = Sled.address();
29768d75effSDimitry Andric   if (Enable) {
29868d75effSDimitry Andric     std::atomic_store_explicit(
2995ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
30068d75effSDimitry Andric         std::memory_order_release);
30168d75effSDimitry Andric   } else {
30268d75effSDimitry Andric     std::atomic_store_explicit(
3035ffd83dbSDimitry Andric         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
30468d75effSDimitry Andric         std::memory_order_release);
30568d75effSDimitry Andric   }
30668d75effSDimitry Andric   return false;
30768d75effSDimitry Andric }
30868d75effSDimitry Andric 
30968d75effSDimitry Andric #if !SANITIZER_FUCHSIA
31068d75effSDimitry Andric // We determine whether the CPU we're running on has the correct features we
31168d75effSDimitry Andric // need. In x86_64 this will be rdtscp support.
probeRequiredCPUFeatures()31268d75effSDimitry Andric bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
31368d75effSDimitry Andric   unsigned int EAX, EBX, ECX, EDX;
31468d75effSDimitry Andric 
31568d75effSDimitry Andric   // We check whether rdtscp support is enabled. According to the x86_64 manual,
31668d75effSDimitry Andric   // level should be set at 0x80000001, and we should have a look at bit 27 in
31768d75effSDimitry Andric   // EDX. That's 0x8000000 (or 1u << 27).
31868d75effSDimitry Andric   __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
31968d75effSDimitry Andric     : "0"(0x80000001));
32068d75effSDimitry Andric   if (!(EDX & (1u << 27))) {
32168d75effSDimitry Andric     Report("Missing rdtscp support.\n");
32268d75effSDimitry Andric     return false;
32368d75effSDimitry Andric   }
32468d75effSDimitry Andric   // Also check whether we can determine the CPU frequency, since if we cannot,
32568d75effSDimitry Andric   // we should use the emulated TSC instead.
32668d75effSDimitry Andric   if (!getTSCFrequency()) {
32768d75effSDimitry Andric     Report("Unable to determine CPU frequency.\n");
32868d75effSDimitry Andric     return false;
32968d75effSDimitry Andric   }
33068d75effSDimitry Andric   return true;
33168d75effSDimitry Andric }
33268d75effSDimitry Andric #endif
33368d75effSDimitry Andric 
33468d75effSDimitry Andric } // namespace __xray
335