168d75effSDimitry Andric #include "cpuid.h"
268d75effSDimitry Andric #include "sanitizer_common/sanitizer_common.h"
368d75effSDimitry Andric #if !SANITIZER_FUCHSIA
468d75effSDimitry Andric #include "sanitizer_common/sanitizer_posix.h"
568d75effSDimitry Andric #endif
668d75effSDimitry Andric #include "xray_defs.h"
768d75effSDimitry Andric #include "xray_interface_internal.h"
868d75effSDimitry Andric
981ad6265SDimitry Andric #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
1068d75effSDimitry Andric #include <sys/types.h>
1168d75effSDimitry Andric #include <sys/sysctl.h>
1268d75effSDimitry Andric #elif SANITIZER_FUCHSIA
1368d75effSDimitry Andric #include <zircon/syscalls.h>
1468d75effSDimitry Andric #endif
1568d75effSDimitry Andric
1668d75effSDimitry Andric #include <atomic>
1768d75effSDimitry Andric #include <cstdint>
1868d75effSDimitry Andric #include <errno.h>
1968d75effSDimitry Andric #include <fcntl.h>
2068d75effSDimitry Andric #include <iterator>
2168d75effSDimitry Andric #include <limits>
2268d75effSDimitry Andric #include <tuple>
2368d75effSDimitry Andric #include <unistd.h>
2468d75effSDimitry Andric
2568d75effSDimitry Andric namespace __xray {
2668d75effSDimitry Andric
2768d75effSDimitry Andric #if SANITIZER_LINUX
2868d75effSDimitry Andric static std::pair<ssize_t, bool>
retryingReadSome(int Fd,char * Begin,char * End)2968d75effSDimitry Andric retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
3068d75effSDimitry Andric auto BytesToRead = std::distance(Begin, End);
3168d75effSDimitry Andric ssize_t BytesRead;
3268d75effSDimitry Andric ssize_t TotalBytesRead = 0;
3368d75effSDimitry Andric while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
3468d75effSDimitry Andric if (BytesRead == -1) {
3568d75effSDimitry Andric if (errno == EINTR)
3668d75effSDimitry Andric continue;
3768d75effSDimitry Andric Report("Read error; errno = %d\n", errno);
3868d75effSDimitry Andric return std::make_pair(TotalBytesRead, false);
3968d75effSDimitry Andric }
4068d75effSDimitry Andric
4168d75effSDimitry Andric TotalBytesRead += BytesRead;
4268d75effSDimitry Andric BytesToRead -= BytesRead;
4368d75effSDimitry Andric Begin += BytesRead;
4468d75effSDimitry Andric }
4568d75effSDimitry Andric return std::make_pair(TotalBytesRead, true);
4668d75effSDimitry Andric }
4768d75effSDimitry Andric
readValueFromFile(const char * Filename,long long * Value)4868d75effSDimitry Andric static bool readValueFromFile(const char *Filename,
4968d75effSDimitry Andric long long *Value) XRAY_NEVER_INSTRUMENT {
5068d75effSDimitry Andric int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
5168d75effSDimitry Andric if (Fd == -1)
5268d75effSDimitry Andric return false;
5368d75effSDimitry Andric static constexpr size_t BufSize = 256;
5468d75effSDimitry Andric char Line[BufSize] = {};
5568d75effSDimitry Andric ssize_t BytesRead;
5668d75effSDimitry Andric bool Success;
5768d75effSDimitry Andric std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
5868d75effSDimitry Andric close(Fd);
5968d75effSDimitry Andric if (!Success)
6068d75effSDimitry Andric return false;
6168d75effSDimitry Andric const char *End = nullptr;
6268d75effSDimitry Andric long long Tmp = internal_simple_strtoll(Line, &End, 10);
6368d75effSDimitry Andric bool Result = false;
6468d75effSDimitry Andric if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
6568d75effSDimitry Andric *Value = Tmp;
6668d75effSDimitry Andric Result = true;
6768d75effSDimitry Andric }
6868d75effSDimitry Andric return Result;
6968d75effSDimitry Andric }
7068d75effSDimitry Andric
getTSCFrequency()7168d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
7268d75effSDimitry Andric long long TSCFrequency = -1;
7368d75effSDimitry Andric if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
7468d75effSDimitry Andric &TSCFrequency)) {
7568d75effSDimitry Andric TSCFrequency *= 1000;
7668d75effSDimitry Andric } else if (readValueFromFile(
7768d75effSDimitry Andric "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
7868d75effSDimitry Andric &TSCFrequency)) {
7968d75effSDimitry Andric TSCFrequency *= 1000;
8068d75effSDimitry Andric } else {
8168d75effSDimitry Andric Report("Unable to determine CPU frequency for TSC accounting.\n");
8268d75effSDimitry Andric }
8368d75effSDimitry Andric return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
8468d75effSDimitry Andric }
8581ad6265SDimitry Andric #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
8668d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
8768d75effSDimitry Andric long long TSCFrequency = -1;
8868d75effSDimitry Andric size_t tscfreqsz = sizeof(TSCFrequency);
8981ad6265SDimitry Andric #if SANITIZER_APPLE
9068d75effSDimitry Andric if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
9168d75effSDimitry Andric &tscfreqsz, NULL, 0) != -1) {
9268d75effSDimitry Andric
9368d75effSDimitry Andric #else
9468d75effSDimitry Andric if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
9568d75effSDimitry Andric NULL, 0) != -1) {
9668d75effSDimitry Andric #endif
9768d75effSDimitry Andric return static_cast<uint64_t>(TSCFrequency);
9868d75effSDimitry Andric } else {
9968d75effSDimitry Andric Report("Unable to determine CPU frequency for TSC accounting.\n");
10068d75effSDimitry Andric }
10168d75effSDimitry Andric
10268d75effSDimitry Andric return 0;
10368d75effSDimitry Andric }
10468d75effSDimitry Andric #elif !SANITIZER_FUCHSIA
10568d75effSDimitry Andric uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
10668d75effSDimitry Andric /* Not supported */
10768d75effSDimitry Andric return 0;
10868d75effSDimitry Andric }
10968d75effSDimitry Andric #endif
11068d75effSDimitry Andric
11168d75effSDimitry Andric static constexpr uint8_t CallOpCode = 0xe8;
11268d75effSDimitry Andric static constexpr uint16_t MovR10Seq = 0xba41;
11368d75effSDimitry Andric static constexpr uint16_t Jmp9Seq = 0x09eb;
11468d75effSDimitry Andric static constexpr uint16_t Jmp20Seq = 0x14eb;
11568d75effSDimitry Andric static constexpr uint16_t Jmp15Seq = 0x0feb;
11668d75effSDimitry Andric static constexpr uint8_t JmpOpCode = 0xe9;
11768d75effSDimitry Andric static constexpr uint8_t RetOpCode = 0xc3;
11868d75effSDimitry Andric static constexpr uint16_t NopwSeq = 0x9066;
11968d75effSDimitry Andric
12068d75effSDimitry Andric static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
12168d75effSDimitry Andric static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
12268d75effSDimitry Andric
patchFunctionEntry(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled,void (* Trampoline)())12368d75effSDimitry Andric bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
12468d75effSDimitry Andric const XRaySledEntry &Sled,
12568d75effSDimitry Andric void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
12668d75effSDimitry Andric // Here we do the dance of replacing the following sled:
12768d75effSDimitry Andric //
12868d75effSDimitry Andric // xray_sled_n:
12968d75effSDimitry Andric // jmp +9
13068d75effSDimitry Andric // <9 byte nop>
13168d75effSDimitry Andric //
13268d75effSDimitry Andric // With the following:
13368d75effSDimitry Andric //
13468d75effSDimitry Andric // mov r10d, <function id>
13568d75effSDimitry Andric // call <relative 32bit offset to entry trampoline>
13668d75effSDimitry Andric //
13768d75effSDimitry Andric // We need to do this in the following order:
13868d75effSDimitry Andric //
13968d75effSDimitry Andric // 1. Put the function id first, 2 bytes from the start of the sled (just
14068d75effSDimitry Andric // after the 2-byte jmp instruction).
14168d75effSDimitry Andric // 2. Put the call opcode 6 bytes from the start of the sled.
14268d75effSDimitry Andric // 3. Put the relative offset 7 bytes from the start of the sled.
14368d75effSDimitry Andric // 4. Do an atomic write over the jmp instruction for the "mov r10d"
14468d75effSDimitry Andric // opcode and first operand.
14568d75effSDimitry Andric //
14668d75effSDimitry Andric // Prerequisite is to compute the relative offset to the trampoline's address.
1475ffd83dbSDimitry Andric const uint64_t Address = Sled.address();
14868d75effSDimitry Andric int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
1495ffd83dbSDimitry Andric (static_cast<int64_t>(Address) + 11);
15068d75effSDimitry Andric if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
151349cc55cSDimitry Andric Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
152349cc55cSDimitry Andric reinterpret_cast<void *>(Trampoline),
1535ffd83dbSDimitry Andric reinterpret_cast<void *>(Address));
15468d75effSDimitry Andric return false;
15568d75effSDimitry Andric }
15668d75effSDimitry Andric if (Enable) {
1575ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
1585ffd83dbSDimitry Andric *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
1595ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
16068d75effSDimitry Andric std::atomic_store_explicit(
1615ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
16268d75effSDimitry Andric std::memory_order_release);
16368d75effSDimitry Andric } else {
16468d75effSDimitry Andric std::atomic_store_explicit(
1655ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
16668d75effSDimitry Andric std::memory_order_release);
16768d75effSDimitry Andric // FIXME: Write out the nops still?
16868d75effSDimitry Andric }
16968d75effSDimitry Andric return true;
17068d75effSDimitry Andric }
17168d75effSDimitry Andric
patchFunctionExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)17268d75effSDimitry Andric bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
17368d75effSDimitry Andric const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
17468d75effSDimitry Andric // Here we do the dance of replacing the following sled:
17568d75effSDimitry Andric //
17668d75effSDimitry Andric // xray_sled_n:
17768d75effSDimitry Andric // ret
17868d75effSDimitry Andric // <10 byte nop>
17968d75effSDimitry Andric //
18068d75effSDimitry Andric // With the following:
18168d75effSDimitry Andric //
18268d75effSDimitry Andric // mov r10d, <function id>
18368d75effSDimitry Andric // jmp <relative 32bit offset to exit trampoline>
18468d75effSDimitry Andric //
18568d75effSDimitry Andric // 1. Put the function id first, 2 bytes from the start of the sled (just
18668d75effSDimitry Andric // after the 1-byte ret instruction).
18768d75effSDimitry Andric // 2. Put the jmp opcode 6 bytes from the start of the sled.
18868d75effSDimitry Andric // 3. Put the relative offset 7 bytes from the start of the sled.
18968d75effSDimitry Andric // 4. Do an atomic write over the jmp instruction for the "mov r10d"
19068d75effSDimitry Andric // opcode and first operand.
19168d75effSDimitry Andric //
19268d75effSDimitry Andric // Prerequisite is to compute the relative offset fo the
19368d75effSDimitry Andric // __xray_FunctionExit function's address.
1945ffd83dbSDimitry Andric const uint64_t Address = Sled.address();
19568d75effSDimitry Andric int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
1965ffd83dbSDimitry Andric (static_cast<int64_t>(Address) + 11);
19768d75effSDimitry Andric if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
19868d75effSDimitry Andric Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
199349cc55cSDimitry Andric reinterpret_cast<void *>(__xray_FunctionExit),
200349cc55cSDimitry Andric reinterpret_cast<void *>(Address));
20168d75effSDimitry Andric return false;
20268d75effSDimitry Andric }
20368d75effSDimitry Andric if (Enable) {
2045ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
2055ffd83dbSDimitry Andric *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
2065ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
20768d75effSDimitry Andric std::atomic_store_explicit(
2085ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
20968d75effSDimitry Andric std::memory_order_release);
21068d75effSDimitry Andric } else {
21168d75effSDimitry Andric std::atomic_store_explicit(
2125ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
21368d75effSDimitry Andric std::memory_order_release);
21468d75effSDimitry Andric // FIXME: Write out the nops still?
21568d75effSDimitry Andric }
21668d75effSDimitry Andric return true;
21768d75effSDimitry Andric }
21868d75effSDimitry Andric
patchFunctionTailExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)21968d75effSDimitry Andric bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
22068d75effSDimitry Andric const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
22168d75effSDimitry Andric // Here we do the dance of replacing the tail call sled with a similar
22268d75effSDimitry Andric // sequence as the entry sled, but calls the tail exit sled instead.
2235ffd83dbSDimitry Andric const uint64_t Address = Sled.address();
22468d75effSDimitry Andric int64_t TrampolineOffset =
22568d75effSDimitry Andric reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
2265ffd83dbSDimitry Andric (static_cast<int64_t>(Address) + 11);
22768d75effSDimitry Andric if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
22868d75effSDimitry Andric Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
229349cc55cSDimitry Andric reinterpret_cast<void *>(__xray_FunctionTailExit),
230349cc55cSDimitry Andric reinterpret_cast<void *>(Address));
23168d75effSDimitry Andric return false;
23268d75effSDimitry Andric }
23368d75effSDimitry Andric if (Enable) {
2345ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
2355ffd83dbSDimitry Andric *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
2365ffd83dbSDimitry Andric *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
23768d75effSDimitry Andric std::atomic_store_explicit(
2385ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
23968d75effSDimitry Andric std::memory_order_release);
24068d75effSDimitry Andric } else {
24168d75effSDimitry Andric std::atomic_store_explicit(
2425ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
24368d75effSDimitry Andric std::memory_order_release);
24468d75effSDimitry Andric // FIXME: Write out the nops still?
24568d75effSDimitry Andric }
24668d75effSDimitry Andric return true;
24768d75effSDimitry Andric }
24868d75effSDimitry Andric
patchCustomEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)24968d75effSDimitry Andric bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
25068d75effSDimitry Andric const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
25168d75effSDimitry Andric // Here we do the dance of replacing the following sled:
25268d75effSDimitry Andric //
25368d75effSDimitry Andric // xray_sled_n:
254*06c3fb27SDimitry Andric // jmp +15 // 2 bytes
25568d75effSDimitry Andric // ...
25668d75effSDimitry Andric //
25768d75effSDimitry Andric // With the following:
25868d75effSDimitry Andric //
25968d75effSDimitry Andric // nopw // 2 bytes*
26068d75effSDimitry Andric // ...
26168d75effSDimitry Andric //
26268d75effSDimitry Andric //
263*06c3fb27SDimitry Andric // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'.
2645ffd83dbSDimitry Andric const uint64_t Address = Sled.address();
26568d75effSDimitry Andric if (Enable) {
26668d75effSDimitry Andric std::atomic_store_explicit(
2675ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
26868d75effSDimitry Andric std::memory_order_release);
26968d75effSDimitry Andric } else {
27068d75effSDimitry Andric std::atomic_store_explicit(
2715ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
27268d75effSDimitry Andric std::memory_order_release);
27368d75effSDimitry Andric }
27468d75effSDimitry Andric return false;
27568d75effSDimitry Andric }
27668d75effSDimitry Andric
patchTypedEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)27768d75effSDimitry Andric bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
27868d75effSDimitry Andric const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
27968d75effSDimitry Andric // Here we do the dance of replacing the following sled:
28068d75effSDimitry Andric //
28168d75effSDimitry Andric // xray_sled_n:
28268d75effSDimitry Andric // jmp +20 // 2 byte instruction
28368d75effSDimitry Andric // ...
28468d75effSDimitry Andric //
28568d75effSDimitry Andric // With the following:
28668d75effSDimitry Andric //
28768d75effSDimitry Andric // nopw // 2 bytes
28868d75effSDimitry Andric // ...
28968d75effSDimitry Andric //
29068d75effSDimitry Andric //
29168d75effSDimitry Andric // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
29268d75effSDimitry Andric // The 20 byte sled stashes three argument registers, calls the trampoline,
29368d75effSDimitry Andric // unstashes the registers and returns. If the arguments are already in
29468d75effSDimitry Andric // the correct registers, the stashing and unstashing become equivalently
29568d75effSDimitry Andric // sized nops.
2965ffd83dbSDimitry Andric const uint64_t Address = Sled.address();
29768d75effSDimitry Andric if (Enable) {
29868d75effSDimitry Andric std::atomic_store_explicit(
2995ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
30068d75effSDimitry Andric std::memory_order_release);
30168d75effSDimitry Andric } else {
30268d75effSDimitry Andric std::atomic_store_explicit(
3035ffd83dbSDimitry Andric reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
30468d75effSDimitry Andric std::memory_order_release);
30568d75effSDimitry Andric }
30668d75effSDimitry Andric return false;
30768d75effSDimitry Andric }
30868d75effSDimitry Andric
30968d75effSDimitry Andric #if !SANITIZER_FUCHSIA
31068d75effSDimitry Andric // We determine whether the CPU we're running on has the correct features we
31168d75effSDimitry Andric // need. In x86_64 this will be rdtscp support.
probeRequiredCPUFeatures()31268d75effSDimitry Andric bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
31368d75effSDimitry Andric unsigned int EAX, EBX, ECX, EDX;
31468d75effSDimitry Andric
31568d75effSDimitry Andric // We check whether rdtscp support is enabled. According to the x86_64 manual,
31668d75effSDimitry Andric // level should be set at 0x80000001, and we should have a look at bit 27 in
31768d75effSDimitry Andric // EDX. That's 0x8000000 (or 1u << 27).
31868d75effSDimitry Andric __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
31968d75effSDimitry Andric : "0"(0x80000001));
32068d75effSDimitry Andric if (!(EDX & (1u << 27))) {
32168d75effSDimitry Andric Report("Missing rdtscp support.\n");
32268d75effSDimitry Andric return false;
32368d75effSDimitry Andric }
32468d75effSDimitry Andric // Also check whether we can determine the CPU frequency, since if we cannot,
32568d75effSDimitry Andric // we should use the emulated TSC instead.
32668d75effSDimitry Andric if (!getTSCFrequency()) {
32768d75effSDimitry Andric Report("Unable to determine CPU frequency.\n");
32868d75effSDimitry Andric return false;
32968d75effSDimitry Andric }
33068d75effSDimitry Andric return true;
33168d75effSDimitry Andric }
33268d75effSDimitry Andric #endif
33368d75effSDimitry Andric
33468d75effSDimitry Andric } // namespace __xray
335