1 #include "cpuid.h" 2 #include "sanitizer_common/sanitizer_common.h" 3 #if !SANITIZER_FUCHSIA 4 #include "sanitizer_common/sanitizer_posix.h" 5 #endif 6 #include "xray_defs.h" 7 #include "xray_interface_internal.h" 8 9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE 10 #include <sys/types.h> 11 #include <sys/sysctl.h> 12 #elif SANITIZER_FUCHSIA 13 #include <zircon/syscalls.h> 14 #endif 15 16 #include <atomic> 17 #include <cstdint> 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <iterator> 21 #include <limits> 22 #include <tuple> 23 #include <unistd.h> 24 25 namespace __xray { 26 27 #if SANITIZER_LINUX 28 static std::pair<ssize_t, bool> 29 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { 30 auto BytesToRead = std::distance(Begin, End); 31 ssize_t BytesRead; 32 ssize_t TotalBytesRead = 0; 33 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { 34 if (BytesRead == -1) { 35 if (errno == EINTR) 36 continue; 37 Report("Read error; errno = %d\n", errno); 38 return std::make_pair(TotalBytesRead, false); 39 } 40 41 TotalBytesRead += BytesRead; 42 BytesToRead -= BytesRead; 43 Begin += BytesRead; 44 } 45 return std::make_pair(TotalBytesRead, true); 46 } 47 48 static bool readValueFromFile(const char *Filename, 49 long long *Value) XRAY_NEVER_INSTRUMENT { 50 int Fd = open(Filename, O_RDONLY | O_CLOEXEC); 51 if (Fd == -1) 52 return false; 53 static constexpr size_t BufSize = 256; 54 char Line[BufSize] = {}; 55 ssize_t BytesRead; 56 bool Success; 57 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); 58 close(Fd); 59 if (!Success) 60 return false; 61 const char *End = nullptr; 62 long long Tmp = internal_simple_strtoll(Line, &End, 10); 63 bool Result = false; 64 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { 65 *Value = Tmp; 66 Result = true; 67 } 68 return Result; 69 } 70 71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 72 long long TSCFrequency = -1; 73 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", 74 &TSCFrequency)) { 75 TSCFrequency *= 1000; 76 } else if (readValueFromFile( 77 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 78 &TSCFrequency)) { 79 TSCFrequency *= 1000; 80 } else { 81 Report("Unable to determine CPU frequency for TSC accounting.\n"); 82 } 83 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); 84 } 85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE 86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 87 long long TSCFrequency = -1; 88 size_t tscfreqsz = sizeof(TSCFrequency); 89 #if SANITIZER_APPLE 90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, 91 &tscfreqsz, NULL, 0) != -1) { 92 93 #else 94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, 95 NULL, 0) != -1) { 96 #endif 97 return static_cast<uint64_t>(TSCFrequency); 98 } else { 99 Report("Unable to determine CPU frequency for TSC accounting.\n"); 100 } 101 102 return 0; 103 } 104 #elif !SANITIZER_FUCHSIA 105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 106 /* Not supported */ 107 return 0; 108 } 109 #endif 110 111 static constexpr uint8_t CallOpCode = 0xe8; 112 static constexpr uint16_t MovR10Seq = 0xba41; 113 static constexpr uint16_t Jmp9Seq = 0x09eb; 114 static constexpr uint16_t Jmp20Seq = 0x14eb; 115 static constexpr uint16_t Jmp15Seq = 0x0feb; 116 static constexpr uint8_t JmpOpCode = 0xe9; 117 static constexpr uint8_t RetOpCode = 0xc3; 118 static constexpr uint16_t NopwSeq = 0x9066; 119 120 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; 121 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; 122 123 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, 124 const XRaySledEntry &Sled, 125 const XRayTrampolines &Trampolines, 126 bool LogArgs) XRAY_NEVER_INSTRUMENT { 127 // Here we do the dance of replacing the following sled: 128 // 129 // xray_sled_n: 130 // jmp +9 131 // <9 byte nop> 132 // 133 // With the following: 134 // 135 // mov r10d, <function id> 136 // call <relative 32bit offset to entry trampoline> 137 // 138 // We need to do this in the following order: 139 // 140 // 1. Put the function id first, 2 bytes from the start of the sled (just 141 // after the 2-byte jmp instruction). 142 // 2. Put the call opcode 6 bytes from the start of the sled. 143 // 3. Put the relative offset 7 bytes from the start of the sled. 144 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 145 // opcode and first operand. 146 // 147 // Prerequisite is to compute the relative offset to the trampoline's address. 148 auto Trampoline = 149 LogArgs ? Trampolines.LogArgsTrampoline : Trampolines.EntryTrampoline; 150 const uint64_t Address = Sled.address(); 151 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 152 (static_cast<int64_t>(Address) + 11); 153 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 154 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", 155 reinterpret_cast<void *>(Trampoline), 156 reinterpret_cast<void *>(Address)); 157 return false; 158 } 159 if (Enable) { 160 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 161 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 162 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 163 std::atomic_store_explicit( 164 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 165 std::memory_order_release); 166 } else { 167 std::atomic_store_explicit( 168 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 169 std::memory_order_release); 170 // FIXME: Write out the nops still? 171 } 172 return true; 173 } 174 175 bool patchFunctionExit( 176 const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, 177 const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { 178 // Here we do the dance of replacing the following sled: 179 // 180 // xray_sled_n: 181 // ret 182 // <10 byte nop> 183 // 184 // With the following: 185 // 186 // mov r10d, <function id> 187 // jmp <relative 32bit offset to exit trampoline> 188 // 189 // 1. Put the function id first, 2 bytes from the start of the sled (just 190 // after the 1-byte ret instruction). 191 // 2. Put the jmp opcode 6 bytes from the start of the sled. 192 // 3. Put the relative offset 7 bytes from the start of the sled. 193 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 194 // opcode and first operand. 195 // 196 // Prerequisite is to compute the relative offset fo the 197 // __xray_FunctionExit function's address. 198 auto Trampoline = Trampolines.ExitTrampoline; 199 const uint64_t Address = Sled.address(); 200 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 201 (static_cast<int64_t>(Address) + 11); 202 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 203 Report("XRay Exit trampoline (%p) too far from sled (%p)\n", 204 reinterpret_cast<void *>(Trampoline), 205 reinterpret_cast<void *>(Address)); 206 return false; 207 } 208 if (Enable) { 209 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 210 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; 211 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 212 std::atomic_store_explicit( 213 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 214 std::memory_order_release); 215 } else { 216 std::atomic_store_explicit( 217 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, 218 std::memory_order_release); 219 // FIXME: Write out the nops still? 220 } 221 return true; 222 } 223 224 bool patchFunctionTailExit( 225 const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, 226 const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT { 227 // Here we do the dance of replacing the tail call sled with a similar 228 // sequence as the entry sled, but calls the tail exit sled instead. 229 auto Trampoline = Trampolines.TailExitTrampoline; 230 const uint64_t Address = Sled.address(); 231 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 232 (static_cast<int64_t>(Address) + 11); 233 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 234 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", 235 reinterpret_cast<void *>(Trampoline), 236 reinterpret_cast<void *>(Address)); 237 return false; 238 } 239 if (Enable) { 240 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 241 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 242 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 243 std::atomic_store_explicit( 244 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 245 std::memory_order_release); 246 } else { 247 std::atomic_store_explicit( 248 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 249 std::memory_order_release); 250 // FIXME: Write out the nops still? 251 } 252 return true; 253 } 254 255 bool patchCustomEvent(const bool Enable, const uint32_t FuncId, 256 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 257 // Here we do the dance of replacing the following sled: 258 // 259 // xray_sled_n: 260 // jmp +15 // 2 bytes 261 // ... 262 // 263 // With the following: 264 // 265 // nopw // 2 bytes* 266 // ... 267 // 268 // 269 // The "unpatch" should just turn the 'nopw' back to a 'jmp +15'. 270 const uint64_t Address = Sled.address(); 271 if (Enable) { 272 std::atomic_store_explicit( 273 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 274 std::memory_order_release); 275 } else { 276 std::atomic_store_explicit( 277 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, 278 std::memory_order_release); 279 } 280 return false; 281 } 282 283 bool patchTypedEvent(const bool Enable, const uint32_t FuncId, 284 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 285 // Here we do the dance of replacing the following sled: 286 // 287 // xray_sled_n: 288 // jmp +20 // 2 byte instruction 289 // ... 290 // 291 // With the following: 292 // 293 // nopw // 2 bytes 294 // ... 295 // 296 // 297 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 298 // The 20 byte sled stashes three argument registers, calls the trampoline, 299 // unstashes the registers and returns. If the arguments are already in 300 // the correct registers, the stashing and unstashing become equivalently 301 // sized nops. 302 const uint64_t Address = Sled.address(); 303 if (Enable) { 304 std::atomic_store_explicit( 305 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 306 std::memory_order_release); 307 } else { 308 std::atomic_store_explicit( 309 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 310 std::memory_order_release); 311 } 312 return false; 313 } 314 315 #if !SANITIZER_FUCHSIA 316 // We determine whether the CPU we're running on has the correct features we 317 // need. In x86_64 this will be rdtscp support. 318 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { 319 unsigned int EAX, EBX, ECX, EDX; 320 321 // We check whether rdtscp support is enabled. According to the x86_64 manual, 322 // level should be set at 0x80000001, and we should have a look at bit 27 in 323 // EDX. That's 0x8000000 (or 1u << 27). 324 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) 325 : "0"(0x80000001)); 326 if (!(EDX & (1u << 27))) { 327 Report("Missing rdtscp support.\n"); 328 return false; 329 } 330 // Also check whether we can determine the CPU frequency, since if we cannot, 331 // we should use the emulated TSC instead. 332 if (!getTSCFrequency()) { 333 Report("Unable to determine CPU frequency.\n"); 334 return false; 335 } 336 return true; 337 } 338 #endif 339 340 } // namespace __xray 341