xref: /llvm-project/compiler-rt/lib/interception/interception_win.cpp (revision bbf377060adc8607e1187952388c7eeea7cf4933)
1 //===-- interception_win.cpp ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of AddressSanitizer, an address sanity checker.
10 //
11 // Windows-specific interception methods.
12 //
13 // This file is implementing several hooking techniques to intercept calls
14 // to functions. The hooks are dynamically installed by modifying the assembly
15 // code.
16 //
17 // The hooking techniques are making assumptions on the way the code is
18 // generated and are safe under these assumptions.
19 //
20 // On 64-bit architecture, there is no direct 64-bit jump instruction. To allow
21 // arbitrary branching on the whole memory space, the notion of trampoline
22 // region is used. A trampoline region is a memory space withing 2G boundary
23 // where it is safe to add custom assembly code to build 64-bit jumps.
24 //
25 // Hooking techniques
26 // ==================
27 //
28 // 1) Detour
29 //
30 //    The Detour hooking technique is assuming the presence of a header with
31 //    padding and an overridable 2-bytes nop instruction (mov edi, edi). The
32 //    nop instruction can safely be replaced by a 2-bytes jump without any need
33 //    to save the instruction. A jump to the target is encoded in the function
34 //    header and the nop instruction is replaced by a short jump to the header.
35 //
36 //        head:  5 x nop                 head:  jmp <hook>
37 //        func:  mov edi, edi    -->     func:  jmp short <head>
38 //               [...]                   real:  [...]
39 //
40 //    This technique is only implemented on 32-bit architecture.
41 //    Most of the time, Windows API are hookable with the detour technique.
42 //
43 // 2) Redirect Jump
44 //
45 //    The redirect jump is applicable when the first instruction is a direct
46 //    jump. The instruction is replaced by jump to the hook.
47 //
48 //        func:  jmp <label>     -->     func:  jmp <hook>
49 //
50 //    On a 64-bit architecture, a trampoline is inserted.
51 //
52 //        func:  jmp <label>     -->     func:  jmp <tramp>
53 //                                              [...]
54 //
55 //                                   [trampoline]
56 //                                      tramp:  jmp QWORD [addr]
57 //                                       addr:  .bytes <hook>
58 //
59 //    Note: <real> is equivalent to <label>.
60 //
61 // 3) HotPatch
62 //
63 //    The HotPatch hooking is assuming the presence of a header with padding
64 //    and a first instruction with at least 2-bytes.
65 //
66 //    The reason to enforce the 2-bytes limitation is to provide the minimal
67 //    space to encode a short jump. HotPatch technique is only rewriting one
68 //    instruction to avoid breaking a sequence of instructions containing a
69 //    branching target.
70 //
71 //    Assumptions are enforced by MSVC compiler by using the /HOTPATCH flag.
72 //      see: https://msdn.microsoft.com/en-us/library/ms173507.aspx
73 //    Default padding length is 5 bytes in 32-bits and 6 bytes in 64-bits.
74 //
75 //        head:   5 x nop                head:  jmp <hook>
76 //        func:   <instr>        -->     func:  jmp short <head>
77 //                [...]                  body:  [...]
78 //
79 //                                   [trampoline]
80 //                                       real:  <instr>
81 //                                              jmp <body>
82 //
83 //    On a 64-bit architecture:
84 //
85 //        head:   6 x nop                head:  jmp QWORD [addr1]
86 //        func:   <instr>        -->     func:  jmp short <head>
87 //                [...]                  body:  [...]
88 //
89 //                                   [trampoline]
90 //                                      addr1:  .bytes <hook>
91 //                                       real:  <instr>
92 //                                              jmp QWORD [addr2]
93 //                                      addr2:  .bytes <body>
94 //
95 // 4) Trampoline
96 //
97 //    The Trampoline hooking technique is the most aggressive one. It is
98 //    assuming that there is a sequence of instructions that can be safely
99 //    replaced by a jump (enough room and no incoming branches).
100 //
101 //    Unfortunately, these assumptions can't be safely presumed and code may
102 //    be broken after hooking.
103 //
104 //        func:   <instr>        -->     func:  jmp <hook>
105 //                <instr>
106 //                [...]                  body:  [...]
107 //
108 //                                   [trampoline]
109 //                                       real:  <instr>
110 //                                              <instr>
111 //                                              jmp <body>
112 //
113 //    On a 64-bit architecture:
114 //
115 //        func:   <instr>        -->     func:  jmp QWORD [addr1]
116 //                <instr>
117 //                [...]                  body:  [...]
118 //
119 //                                   [trampoline]
120 //                                      addr1:  .bytes <hook>
121 //                                       real:  <instr>
122 //                                              <instr>
123 //                                              jmp QWORD [addr2]
124 //                                      addr2:  .bytes <body>
125 //===----------------------------------------------------------------------===//
126 
127 #include "interception.h"
128 
129 #if SANITIZER_WINDOWS
130 #include "sanitizer_common/sanitizer_platform.h"
131 #define WIN32_LEAN_AND_MEAN
132 #include <windows.h>
133 #include <psapi.h>
134 
135 namespace __interception {
136 
137 static const int kAddressLength = FIRST_32_SECOND_64(4, 8);
138 static const int kJumpInstructionLength = 5;
139 static const int kShortJumpInstructionLength = 2;
140 UNUSED static const int kIndirectJumpInstructionLength = 6;
141 static const int kBranchLength =
142     FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength);
143 static const int kDirectBranchLength = kBranchLength + kAddressLength;
144 
145 #  if defined(_MSC_VER)
146 #    define INTERCEPTION_FORMAT(f, a)
147 #  else
148 #    define INTERCEPTION_FORMAT(f, a) __attribute__((format(printf, f, a)))
149 #  endif
150 
151 static void (*ErrorReportCallback)(const char *format, ...)
152     INTERCEPTION_FORMAT(1, 2);
153 
154 void SetErrorReportCallback(void (*callback)(const char *format, ...)) {
155   ErrorReportCallback = callback;
156 }
157 
158 #  define ReportError(...)                \
159     do {                                  \
160       if (ErrorReportCallback)            \
161         ErrorReportCallback(__VA_ARGS__); \
162     } while (0)
163 
164 static void InterceptionFailed() {
165   ReportError("interception_win: failed due to an unrecoverable error.\n");
166   // This acts like an abort when no debugger is attached. According to an old
167   // comment, calling abort() leads to an infinite recursion in CheckFailed.
168   __debugbreak();
169 }
170 
171 static bool DistanceIsWithin2Gig(uptr from, uptr target) {
172 #if SANITIZER_WINDOWS64
173   if (from < target)
174     return target - from <= (uptr)0x7FFFFFFFU;
175   else
176     return from - target <= (uptr)0x80000000U;
177 #else
178   // In a 32-bit address space, the address calculation will wrap, so this check
179   // is unnecessary.
180   return true;
181 #endif
182 }
183 
184 static uptr GetMmapGranularity() {
185   SYSTEM_INFO si;
186   GetSystemInfo(&si);
187   return si.dwAllocationGranularity;
188 }
189 
190 UNUSED static uptr RoundDownTo(uptr size, uptr boundary) {
191   return size & ~(boundary - 1);
192 }
193 
194 UNUSED static uptr RoundUpTo(uptr size, uptr boundary) {
195   return RoundDownTo(size + boundary - 1, boundary);
196 }
197 
198 // FIXME: internal_str* and internal_mem* functions should be moved from the
199 // ASan sources into interception/.
200 
201 static size_t _strlen(const char *str) {
202   const char* p = str;
203   while (*p != '\0') ++p;
204   return p - str;
205 }
206 
207 static char* _strchr(char* str, char c) {
208   while (*str) {
209     if (*str == c)
210       return str;
211     ++str;
212   }
213   return nullptr;
214 }
215 
216 static int _strcmp(const char *s1, const char *s2) {
217   while (true) {
218     unsigned c1 = *s1;
219     unsigned c2 = *s2;
220     if (c1 != c2) return (c1 < c2) ? -1 : 1;
221     if (c1 == 0) break;
222     s1++;
223     s2++;
224   }
225   return 0;
226 }
227 
228 static void _memset(void *p, int value, size_t sz) {
229   for (size_t i = 0; i < sz; ++i)
230     ((char*)p)[i] = (char)value;
231 }
232 
233 static void _memcpy(void *dst, void *src, size_t sz) {
234   char *dst_c = (char*)dst,
235        *src_c = (char*)src;
236   for (size_t i = 0; i < sz; ++i)
237     dst_c[i] = src_c[i];
238 }
239 
240 static bool ChangeMemoryProtection(
241     uptr address, uptr size, DWORD *old_protection) {
242   return ::VirtualProtect((void*)address, size,
243                           PAGE_EXECUTE_READWRITE,
244                           old_protection) != FALSE;
245 }
246 
247 static bool RestoreMemoryProtection(
248     uptr address, uptr size, DWORD old_protection) {
249   DWORD unused;
250   return ::VirtualProtect((void*)address, size,
251                           old_protection,
252                           &unused) != FALSE;
253 }
254 
255 static bool IsMemoryPadding(uptr address, uptr size) {
256   u8* function = (u8*)address;
257   for (size_t i = 0; i < size; ++i)
258     if (function[i] != 0x90 && function[i] != 0xCC)
259       return false;
260   return true;
261 }
262 
263 static const u8 kHintNop8Bytes[] = {
264   0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
265 };
266 
267 template<class T>
268 static bool FunctionHasPrefix(uptr address, const T &pattern) {
269   u8* function = (u8*)address - sizeof(pattern);
270   for (size_t i = 0; i < sizeof(pattern); ++i)
271     if (function[i] != pattern[i])
272       return false;
273   return true;
274 }
275 
276 static bool FunctionHasPadding(uptr address, uptr size) {
277   if (IsMemoryPadding(address - size, size))
278     return true;
279   if (size <= sizeof(kHintNop8Bytes) &&
280       FunctionHasPrefix(address, kHintNop8Bytes))
281     return true;
282   return false;
283 }
284 
285 static void WritePadding(uptr from, uptr size) {
286   _memset((void*)from, 0xCC, (size_t)size);
287 }
288 
289 static void WriteJumpInstruction(uptr from, uptr target) {
290   if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) {
291     ReportError(
292         "interception_win: cannot write jmp further than 2GB away, from %p to "
293         "%p.\n",
294         (void *)from, (void *)target);
295     InterceptionFailed();
296   }
297   ptrdiff_t offset = target - from - kJumpInstructionLength;
298   *(u8*)from = 0xE9;
299   *(u32*)(from + 1) = offset;
300 }
301 
302 static void WriteShortJumpInstruction(uptr from, uptr target) {
303   sptr offset = target - from - kShortJumpInstructionLength;
304   if (offset < -128 || offset > 127) {
305     ReportError("interception_win: cannot write short jmp from %p to %p\n",
306                 (void *)from, (void *)target);
307     InterceptionFailed();
308   }
309   *(u8*)from = 0xEB;
310   *(u8*)(from + 1) = (u8)offset;
311 }
312 
313 #if SANITIZER_WINDOWS64
314 static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) {
315   // jmp [rip + <offset>] = FF 25 <offset> where <offset> is a relative
316   // offset.
317   // The offset is the distance from then end of the jump instruction to the
318   // memory location containing the targeted address. The displacement is still
319   // 32-bit in x64, so indirect_target must be located within +/- 2GB range.
320   int offset = indirect_target - from - kIndirectJumpInstructionLength;
321   if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength,
322                             indirect_target)) {
323     ReportError(
324         "interception_win: cannot write indirect jmp with target further than "
325         "2GB away, from %p to %p.\n",
326         (void *)from, (void *)indirect_target);
327     InterceptionFailed();
328   }
329   *(u16*)from = 0x25FF;
330   *(u32*)(from + 2) = offset;
331 }
332 #endif
333 
334 static void WriteBranch(
335     uptr from, uptr indirect_target, uptr target) {
336 #if SANITIZER_WINDOWS64
337   WriteIndirectJumpInstruction(from, indirect_target);
338   *(u64*)indirect_target = target;
339 #else
340   (void)indirect_target;
341   WriteJumpInstruction(from, target);
342 #endif
343 }
344 
345 static void WriteDirectBranch(uptr from, uptr target) {
346 #if SANITIZER_WINDOWS64
347   // Emit an indirect jump through immediately following bytes:
348   //   jmp [rip + kBranchLength]
349   //   .quad <target>
350   WriteBranch(from, from + kBranchLength, target);
351 #else
352   WriteJumpInstruction(from, target);
353 #endif
354 }
355 
356 struct TrampolineMemoryRegion {
357   uptr content;
358   uptr allocated_size;
359   uptr max_size;
360 };
361 
362 UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31;  // 2 gig
363 static const int kMaxTrampolineRegion = 1024;
364 static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
365 
366 static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr,
367                                       uptr func_addr, size_t granularity) {
368 #  if SANITIZER_WINDOWS64
369   // Clamp {min,max}_addr to the accessible address space.
370   SYSTEM_INFO system_info;
371   ::GetSystemInfo(&system_info);
372   uptr min_virtual_addr =
373       RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity);
374   uptr max_virtual_addr =
375       RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity);
376   if (min_addr < min_virtual_addr)
377     min_addr = min_virtual_addr;
378   if (max_addr > max_virtual_addr)
379     max_addr = max_virtual_addr;
380 
381   // This loop probes the virtual address space to find free memory in the
382   // [min_addr, max_addr] interval. The search starts from func_addr and
383   // proceeds "outwards" towards the interval bounds using two probes, lo_addr
384   // and hi_addr, for addresses lower/higher than func_addr. At each step, it
385   // considers the probe closest to func_addr. If that address is not free, the
386   // probe is advanced (lower or higher depending on the probe) to the next
387   // memory block and the search continues.
388   uptr lo_addr = RoundDownTo(func_addr, granularity);
389   uptr hi_addr = RoundUpTo(func_addr, granularity);
390   while (lo_addr >= min_addr || hi_addr <= max_addr) {
391     // Consider the in-range address closest to func_addr.
392     uptr addr;
393     if (lo_addr < min_addr)
394       addr = hi_addr;
395     else if (hi_addr > max_addr)
396       addr = lo_addr;
397     else
398       addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr;
399 
400     MEMORY_BASIC_INFORMATION info;
401     if (!::VirtualQuery((void *)addr, &info, sizeof(info))) {
402       ReportError(
403           "interception_win: VirtualQuery in AllocateTrampolineRegion failed "
404           "for %p\n",
405           (void *)addr);
406       return nullptr;
407     }
408 
409     // Check whether a region can be allocated at |addr|.
410     if (info.State == MEM_FREE && info.RegionSize >= granularity) {
411       void *page =
412           ::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT,
413                          PAGE_EXECUTE_READWRITE);
414       if (page == nullptr)
415         ReportError(
416             "interception_win: VirtualAlloc in AllocateTrampolineRegion failed "
417             "for %p\n",
418             (void *)addr);
419       return page;
420     }
421 
422     if (addr == lo_addr)
423       lo_addr =
424           RoundDownTo((uptr)info.AllocationBase - granularity, granularity);
425     if (addr == hi_addr)
426       hi_addr =
427           RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity);
428   }
429 
430   ReportError(
431       "interception_win: AllocateTrampolineRegion failed to find free memory; "
432       "min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n",
433       (void *)min_addr, (void *)max_addr, (void *)func_addr, granularity);
434   return nullptr;
435 #else
436   return ::VirtualAlloc(nullptr,
437                         granularity,
438                         MEM_RESERVE | MEM_COMMIT,
439                         PAGE_EXECUTE_READWRITE);
440 #endif
441 }
442 
443 // Used by unittests to release mapped memory space.
444 void TestOnlyReleaseTrampolineRegions() {
445   for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
446     TrampolineMemoryRegion *current = &TrampolineRegions[bucket];
447     if (current->content == 0)
448       return;
449     ::VirtualFree((void*)current->content, 0, MEM_RELEASE);
450     current->content = 0;
451   }
452 }
453 
454 static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
455 #  if SANITIZER_WINDOWS64
456   uptr min_addr = func_address - kTrampolineRangeLimit;
457   uptr max_addr = func_address + kTrampolineRangeLimit - size;
458 
459   // Allocate memory within 2GB of the module (DLL or EXE file) so that any
460   // address within the module can be referenced with PC-relative operands.
461   // This allows us to not just jump to the trampoline with a PC-relative
462   // offset, but to relocate any instructions that we copy to the trampoline
463   // which have references to the original module. If we can't find the base
464   // address of the module (e.g. if func_address is in mmap'ed memory), just
465   // stay within 2GB of func_address.
466   HMODULE module;
467   if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
468                            GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
469                            (LPCWSTR)func_address, &module)) {
470     MODULEINFO module_info;
471     if (::GetModuleInformation(::GetCurrentProcess(), module,
472                                 &module_info, sizeof(module_info))) {
473       min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage -
474                  kTrampolineRangeLimit;
475       max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size;
476     }
477   }
478 
479   // Check for overflow.
480   if (min_addr > func_address)
481     min_addr = 0;
482   if (max_addr < func_address)
483     max_addr = ~(uptr)0;
484 #  else
485   uptr min_addr = 0;
486   uptr max_addr = ~min_addr;
487 #  endif
488 
489   // Find a region within [min_addr,max_addr] with enough space to allocate
490   // |size| bytes.
491   TrampolineMemoryRegion *region = nullptr;
492   for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
493     TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
494     if (current->content == 0) {
495       // No valid region found, allocate a new region.
496       size_t bucket_size = GetMmapGranularity();
497       void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address,
498                                                bucket_size);
499       if (content == nullptr)
500         return 0U;
501 
502       current->content = (uptr)content;
503       current->allocated_size = 0;
504       current->max_size = bucket_size;
505       region = current;
506       break;
507     } else if (current->max_size - current->allocated_size > size) {
508       uptr next_address = current->content + current->allocated_size;
509       if (next_address < min_addr || next_address > max_addr)
510         continue;
511       // The space can be allocated in the current region.
512       region = current;
513       break;
514     }
515   }
516 
517   // Failed to find a region.
518   if (region == nullptr)
519     return 0U;
520 
521   // Allocate the space in the current region.
522   uptr allocated_space = region->content + region->allocated_size;
523   region->allocated_size += size;
524   WritePadding(allocated_space, size);
525 
526   return allocated_space;
527 }
528 
529 // The following prologues cannot be patched because of the short jump
530 // jumping to the patching region.
531 
532 // Short jump patterns  below are only for x86_64.
533 #  if SANITIZER_WINDOWS_x64
534 // ntdll!wcslen in Win11
535 //   488bc1          mov     rax,rcx
536 //   0fb710          movzx   edx,word ptr [rax]
537 //   4883c002        add     rax,2
538 //   6685d2          test    dx,dx
539 //   75f4            jne     -12
540 static const u8 kPrologueWithShortJump1[] = {
541     0x48, 0x8b, 0xc1, 0x0f, 0xb7, 0x10, 0x48, 0x83,
542     0xc0, 0x02, 0x66, 0x85, 0xd2, 0x75, 0xf4,
543 };
544 
545 // ntdll!strrchr in Win11
546 //   4c8bc1          mov     r8,rcx
547 //   8a01            mov     al,byte ptr [rcx]
548 //   48ffc1          inc     rcx
549 //   84c0            test    al,al
550 //   75f7            jne     -9
551 static const u8 kPrologueWithShortJump2[] = {
552     0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1,
553     0x84, 0xc0, 0x75, 0xf7,
554 };
555 #endif
556 
557 // Returns 0 on error.
558 static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
559   if (rel_offset) {
560     *rel_offset = 0;
561   }
562 
563 #if SANITIZER_ARM64
564   // An ARM64 instruction is 4 bytes long.
565   return 4;
566 #endif
567 
568 #  if SANITIZER_WINDOWS_x64
569   if (memcmp((u8*)address, kPrologueWithShortJump1,
570              sizeof(kPrologueWithShortJump1)) == 0 ||
571       memcmp((u8*)address, kPrologueWithShortJump2,
572              sizeof(kPrologueWithShortJump2)) == 0) {
573     return 0;
574   }
575 #endif
576 
577   switch (*(u64*)address) {
578     case 0x90909090909006EB:  // stub: jmp over 6 x nop.
579       return 8;
580   }
581 
582   switch (*(u8*)address) {
583     case 0x90:  // 90 : nop
584     case 0xC3:  // C3 : ret   (for small/empty function interception
585     case 0xCC:  // CC : int 3  i.e. registering weak functions)
586       return 1;
587 
588     case 0x50:  // push eax / rax
589     case 0x51:  // push ecx / rcx
590     case 0x52:  // push edx / rdx
591     case 0x53:  // push ebx / rbx
592     case 0x54:  // push esp / rsp
593     case 0x55:  // push ebp / rbp
594     case 0x56:  // push esi / rsi
595     case 0x57:  // push edi / rdi
596     case 0x5D:  // pop ebp / rbp
597       return 1;
598 
599     case 0x6A:  // 6A XX = push XX
600       return 2;
601 
602     // This instruction can be encoded with a 16-bit immediate but that is
603     // incredibly unlikely.
604     case 0x68:  // 68 XX XX XX XX : push imm32
605       return 5;
606 
607     case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
608     case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
609     case 0xBA:  // ba XX XX XX XX : mov edx, XX XX XX XX
610       return 5;
611 
612     // Cannot overwrite control-instruction. Return 0 to indicate failure.
613     case 0xE9:  // E9 XX XX XX XX : jmp <label>
614     case 0xE8:  // E8 XX XX XX XX : call <func>
615     case 0xEB:  // EB XX : jmp XX (short jump)
616     case 0x70:  // 7Y YY : jy XX (short conditional jump)
617     case 0x71:
618     case 0x72:
619     case 0x73:
620     case 0x74:
621     case 0x75:
622     case 0x76:
623     case 0x77:
624     case 0x78:
625     case 0x79:
626     case 0x7A:
627     case 0x7B:
628     case 0x7C:
629     case 0x7D:
630     case 0x7E:
631     case 0x7F:
632       return 0;
633   }
634 
635   switch (*(u16*)(address)) {
636     case 0x018A:  // 8A 01 : mov al, byte ptr [ecx]
637     case 0xFF8B:  // 8B FF : mov edi, edi
638     case 0xEC8B:  // 8B EC : mov ebp, esp
639     case 0xc889:  // 89 C8 : mov eax, ecx
640     case 0xD189:  // 89 D1 : mov ecx, edx
641     case 0xE589:  // 89 E5 : mov ebp, esp
642     case 0xC18B:  // 8B C1 : mov eax, ecx
643     case 0xC031:  // 31 C0 : xor eax, eax
644     case 0xC931:  // 31 C9 : xor ecx, ecx
645     case 0xD231:  // 31 D2 : xor edx, edx
646     case 0xC033:  // 33 C0 : xor eax, eax
647     case 0xC933:  // 33 C9 : xor ecx, ecx
648     case 0xD233:  // 33 D2 : xor edx, edx
649     case 0xDB84:  // 84 DB : test bl,bl
650     case 0xC084:  // 84 C0 : test al,al
651     case 0xC984:  // 84 C9 : test cl,cl
652     case 0xD284:  // 84 D2 : test dl,dl
653       return 2;
654 
655     case 0x3980:  // 80 39 XX : cmp BYTE PTR [rcx], XX
656     case 0x4D8B:  // 8B 4D XX : mov XX(%ebp), ecx
657     case 0x558B:  // 8B 55 XX : mov XX(%ebp), edx
658     case 0x758B:  // 8B 75 XX : mov XX(%ebp), esp
659     case 0xE483:  // 83 E4 XX : and esp, XX
660     case 0xEC83:  // 83 EC XX : sub esp, XX
661     case 0xC1F6:  // F6 C1 XX : test cl, XX
662       return 3;
663 
664     case 0x89FF:  // FF 89 XX XX XX XX : dec dword ptr [ecx + XX XX XX XX]
665     case 0xEC81:  // 81 EC XX XX XX XX : sub esp, XX XX XX XX
666       return 6;
667 
668     // Cannot overwrite control-instruction. Return 0 to indicate failure.
669     case 0x25FF:  // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
670       return 0;
671   }
672 
673   switch (0x00FFFFFF & *(u32 *)address) {
674     case 0x244C8D:  // 8D 4C 24 XX : lea ecx, [esp + XX]
675     case 0x2474FF:  // FF 74 24 XX : push qword ptr [rsp + XX]
676       return 4;
677     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
678       return 7;
679   }
680 
681   switch (0x000000FF & *(u32 *)address) {
682     case 0xc2:  // C2 XX XX : ret XX (needed for registering weak functions)
683       return 3;
684   }
685 
686 #  if SANITIZER_WINDOWS_x64
687   switch (*(u8*)address) {
688     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
689                 //   movabs eax, dword ptr ds:[XXXXXXXX]
690       return 9;
691     case 0xF2:
692       switch (*(u32 *)(address + 1)) {
693           case 0x2444110f:  //  f2 0f 11 44 24 XX       movsd  QWORD PTR
694                             //  [rsp + XX], xmm0
695           case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
696                             //  [rsp + XX], xmm1
697           case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
698                             //  [rsp + XX], xmm2
699           case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
700                             //  [rsp + XX], xmm3
701           case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
702                             //  [rsp + XX], xmm4
703             return 6;
704       }
705       break;
706 
707     case 0x83:
708       const u8 next_byte = *(u8*)(address + 1);
709       const u8 mod = next_byte >> 6;
710       const u8 rm = next_byte & 7;
711       if (mod == 1 && rm == 4)
712         return 5;  // 83 ModR/M SIB Disp8 Imm8
713                    //   add|or|adc|sbb|and|sub|xor|cmp [r+disp8], imm8
714   }
715 
716   switch (*(u16*)address) {
717     case 0x5040:  // push rax
718     case 0x5140:  // push rcx
719     case 0x5240:  // push rdx
720     case 0x5340:  // push rbx
721     case 0x5440:  // push rsp
722     case 0x5540:  // push rbp
723     case 0x5640:  // push rsi
724     case 0x5740:  // push rdi
725     case 0x5441:  // push r12
726     case 0x5541:  // push r13
727     case 0x5641:  // push r14
728     case 0x5741:  // push r15
729     case 0x9066:  // Two-byte NOP
730     case 0xc084:  // test al, al
731     case 0x018a:  // mov al, byte ptr [rcx]
732       return 2;
733 
734     case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
735     case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rbp+YY], XX
736     case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
737     case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
738     case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
739     case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
740       return 4;
741 
742     case 0x058A:  // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
743     case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
744       if (rel_offset)
745         *rel_offset = 2;
746     case 0xB841:  // 41 B8 XX XX XX XX : mov r8d, XX XX XX XX
747       return 6;
748 
749     case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
750     case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rbp+YY], XX XX XX XX
751     case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
752     case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
753     case 0x7B81:  // 81 7B YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
754     case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
755       return 7;
756   }
757 
758   switch (0x00FFFFFF & *(u32 *)address) {
759     case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
760     case 0xc00b4d:    // 4d 0b c0 : or r8, r8
761     case 0xc03345:    // 45 33 c0 : xor r8d, r8d
762     case 0xc08548:    // 48 85 c0 : test rax, rax
763     case 0xc0854d:    // 4d 85 c0 : test r8, r8
764     case 0xc08b41:    // 41 8b c0 : mov eax, r8d
765     case 0xc0ff48:    // 48 ff c0 : inc rax
766     case 0xc0ff49:    // 49 ff c0 : inc r8
767     case 0xc18b41:    // 41 8b c1 : mov eax, r9d
768     case 0xc18b48:    // 48 8b c1 : mov rax, rcx
769     case 0xc18b4c:    // 4c 8b c1 : mov r8, rcx
770     case 0xc1ff48:    // 48 ff c1 : inc rcx
771     case 0xc1ff49:    // 49 ff c1 : inc r9
772     case 0xc28b41:    // 41 8b c2 : mov eax, r10d
773     case 0x01b60f:    // 0f b6 01 : movzx eax, BYTE PTR [rcx]
774     case 0x09b60f:    // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
775     case 0x11b60f:    // 0f b6 11 : movzx edx, BYTE PTR [rcx]
776     case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
777     case 0xc2ff48:    // 48 ff c2 : inc rdx
778     case 0xc2ff49:    // 49 ff c2 : inc r10
779     case 0xc38b41:    // 41 8b c3 : mov eax, r11d
780     case 0xc3ff48:    // 48 ff c3 : inc rbx
781     case 0xc3ff49:    // 49 ff c3 : inc r11
782     case 0xc48b41:    // 41 8b c4 : mov eax, r12d
783     case 0xc48b48:    // 48 8b c4 : mov rax, rsp
784     case 0xc4ff49:    // 49 ff c4 : inc r12
785     case 0xc5ff49:    // 49 ff c5 : inc r13
786     case 0xc6ff48:    // 48 ff c6 : inc rsi
787     case 0xc6ff49:    // 49 ff c6 : inc r14
788     case 0xc7ff48:    // 48 ff c7 : inc rdi
789     case 0xc7ff49:    // 49 ff c7 : inc r15
790     case 0xc93345:    // 45 33 c9 : xor r9d, r9d
791     case 0xc98548:    // 48 85 c9 : test rcx, rcx
792     case 0xc9854d:    // 4d 85 c9 : test r9, r9
793     case 0xc98b4c:    // 4c 8b c9 : mov r9, rcx
794     case 0xd12948:    // 48 29 d1 : sub rcx, rdx
795     case 0xca2b48:    // 48 2b ca : sub rcx, rdx
796     case 0xca3b48:    // 48 3b ca : cmp rcx, rdx
797     case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
798     case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
799     case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
800     case 0xd28548:    // 48 85 d2 : test rdx, rdx
801     case 0xd2854d:    // 4d 85 d2 : test r10, r10
802     case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
803     case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
804     case 0xd2be0f:    // 0f be d2 : movsx edx, dl
805     case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
806     case 0xd9f748:    // 48 f7 d9 : neg rcx
807     case 0xc03145:    // 45 31 c0 : xor r8d,r8d
808     case 0xc93145:    // 45 31 c9 : xor r9d,r9d
809     case 0xdb3345:    // 45 33 db : xor r11d, r11d
810     case 0xc08445:    // 45 84 c0 : test r8b,r8b
811     case 0xd28445:    // 45 84 d2 : test r10b,r10b
812     case 0xdb8548:    // 48 85 db : test rbx, rbx
813     case 0xdb854d:    // 4d 85 db : test r11, r11
814     case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
815     case 0xe48548:    // 48 85 e4 : test rsp, rsp
816     case 0xe4854d:    // 4d 85 e4 : test r12, r12
817     case 0xc88948:    // 48 89 c8 : mov rax,rcx
818     case 0xcb8948:    // 48 89 cb : mov rbx,rcx
819     case 0xd08948:    // 48 89 d0 : mov rax,rdx
820     case 0xd18948:    // 48 89 d1 : mov rcx,rdx
821     case 0xd38948:    // 48 89 d3 : mov rbx,rdx
822     case 0xe58948:    // 48 89 e5 : mov rbp, rsp
823     case 0xed8548:    // 48 85 ed : test rbp, rbp
824     case 0xc88949:    // 49 89 c8 : mov r8, rcx
825     case 0xc98949:    // 49 89 c9 : mov r9, rcx
826     case 0xca8949:    // 49 89 ca : mov r10,rcx
827     case 0xd08949:    // 49 89 d0 : mov r8, rdx
828     case 0xd18949:    // 49 89 d1 : mov r9, rdx
829     case 0xd28949:    // 49 89 d2 : mov r10, rdx
830     case 0xd38949:    // 49 89 d3 : mov r11, rdx
831     case 0xed854d:    // 4d 85 ed : test r13, r13
832     case 0xf6854d:    // 4d 85 f6 : test r14, r14
833     case 0xff854d:    // 4d 85 ff : test r15, r15
834       return 3;
835 
836     case 0x245489:    // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
837     case 0x428d44:    // 44 8d 42 XX : lea r8d , [rdx + XX]
838     case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
839     case 0xec8348:    // 48 83 ec XX : sub rsp, XX
840     case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
841     case 0x488d49:    // 49 8d 48 XX : lea rcx, [...]
842     case 0x048d4c:    // 4c 8d 04 XX : lea r8, [...]
843     case 0x148d4e:    // 4e 8d 14 XX : lea r10, [...]
844     case 0x398366:    // 66 83 39 XX : cmp WORD PTR [rcx], XX
845       return 4;
846 
847     case 0x441F0F:  // 0F 1F 44 XX XX :   nop DWORD PTR [...]
848     case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
849       return 5;
850 
851     case 0x788166:  // 66 81 78 XX YY YY  cmp WORD PTR [rax+XX], YY YY
852     case 0x798166:  // 66 81 79 XX YY YY  cmp WORD PTR [rcx+XX], YY YY
853     case 0x7a8166:  // 66 81 7a XX YY YY  cmp WORD PTR [rdx+XX], YY YY
854     case 0x7b8166:  // 66 81 7b XX YY YY  cmp WORD PTR [rbx+XX], YY YY
855     case 0x7e8166:  // 66 81 7e XX YY YY  cmp WORD PTR [rsi+XX], YY YY
856     case 0x7f8166:  // 66 81 7f XX YY YY  cmp WORD PTR [rdi+XX], YY YY
857       return 6;
858 
859     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
860     case 0xc0c748:    // 48 C7 C0 XX XX XX XX : mov rax, XX XX XX XX
861       return 7;
862 
863     // clang-format off
864     case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
865     case 0x798141:  // 41 81 79 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
866     case 0x7a8141:  // 41 81 7a XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
867     case 0x7b8141:  // 41 81 7b XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
868     case 0x7d8141:  // 41 81 7d XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
869     case 0x7e8141:  // 41 81 7e XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
870     case 0x7f8141:  // 41 81 7f YY XX XX XX XX : cmp DWORD PTR [r15+YY], XX XX XX XX
871     case 0x247c81:  // 81 7c 24 YY XX XX XX XX : cmp DWORD PTR [rsp+YY], XX XX XX XX
872       return 8;
873       // clang-format on
874 
875     case 0x058b48:    // 48 8b 05 XX XX XX XX :
876                       //   mov rax, QWORD PTR [rip + XXXXXXXX]
877     case 0x058d48:    // 48 8d 05 XX XX XX XX :
878                       //   lea rax, QWORD PTR [rip + XXXXXXXX]
879     case 0x0d8948:    // 48 89 0d XX XX XX XX :
880                       //   mov QWORD PTR [rip + XXXXXXXX], rcx
881     case 0x158948:    // 48 89 15 XX XX XX XX :
882                       //   mov QWORD PTR [rip + XXXXXXXX], rdx
883     case 0x25ff48:    // 48 ff 25 XX XX XX XX :
884                       //   rex.W jmp QWORD PTR [rip + XXXXXXXX]
885     case 0x158D4C:    // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
886       // Instructions having offset relative to 'rip' need offset adjustment.
887       if (rel_offset)
888         *rel_offset = 3;
889       return 7;
890 
891     case 0x2444c7:    // C7 44 24 XX YY YY YY YY
892                       //   mov dword ptr [rsp + XX], YYYYYYYY
893       return 8;
894 
895     case 0x7c8141:  // 41 81 7c ZZ YY XX XX XX XX
896                     // cmp DWORD PTR [reg+reg*n+YY], XX XX XX XX
897       return 9;
898   }
899 
900   switch (*(u32*)(address)) {
901     case 0x01b60f44:  // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
902     case 0x09b60f44:  // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
903     case 0x0ab60f44:  // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
904     case 0x11b60f44:  // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
905     case 0x1ab60f44:  // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
906       return 4;
907     case 0x24448b48:  // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
908     case 0x246c8948:  // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
909     case 0x245c8948:  // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
910     case 0x24748948:  // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
911     case 0x247c8948:  // 48 89 7c 24 XX : mov QWORD PTR [rsp + XX], rdi
912     case 0x244C8948:  // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx
913     case 0x24548948:  // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
914     case 0x244c894c:  // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
915     case 0x2444894c:  // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
916     case 0x244c8944:  // 44 89 4c 24 XX   mov DWORD PTR [rsp + XX], r9d
917     case 0x24448944:  // 44 89 44 24 XX   mov DWORD PTR [rsp + XX], r8d
918     case 0x246c8d48:  // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
919       return 5;
920     case 0x24648348:  // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
921       return 6;
922     case 0x24A48D48:  // 48 8D A4 24 XX XX XX XX : lea rsp, [rsp + XX XX XX XX]
923       return 8;
924   }
925 
926   switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
927     case 0xC07E0F4866:  // 66 48 0F 7E C0 : movq rax, xmm0
928       return 5;
929   }
930 
931 #else
932 
933   switch (*(u8*)address) {
934     case 0xA1:  // A1 XX XX XX XX :  mov eax, dword ptr ds:[XXXXXXXX]
935       return 5;
936   }
937   switch (*(u16*)address) {
938     case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
939     case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
940     case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
941     case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
942     case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
943       return 3;
944     case 0xC1F7:  // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
945       return 6;
946     case 0x3D83:  // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
947       return 7;
948     case 0x7D83:  // 83 7D XX YY : cmp dword ptr [ebp + XX], YY
949       return 4;
950   }
951 
952   switch (0x00FFFFFF & *(u32*)address) {
953     case 0x24448A:  // 8A 44 24 XX : mov eal, dword ptr [esp + XX]
954     case 0x24448B:  // 8B 44 24 XX : mov eax, dword ptr [esp + XX]
955     case 0x244C8B:  // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX]
956     case 0x24548B:  // 8B 54 24 XX : mov edx, dword ptr [esp + XX]
957     case 0x245C8B:  // 8B 5C 24 XX : mov ebx, dword ptr [esp + XX]
958     case 0x246C8B:  // 8B 6C 24 XX : mov ebp, dword ptr [esp + XX]
959     case 0x24748B:  // 8B 74 24 XX : mov esi, dword ptr [esp + XX]
960     case 0x247C8B:  // 8B 7C 24 XX : mov edi, dword ptr [esp + XX]
961       return 4;
962   }
963 
964   switch (*(u32*)address) {
965     case 0x2444B60F:  // 0F B6 44 24 XX : movzx eax, byte ptr [esp + XX]
966       return 5;
967   }
968 #endif
969 
970   // Unknown instruction! This might happen when we add a new interceptor, use
971   // a new compiler version, or if Windows changed how some functions are
972   // compiled. In either case, we print the address and 8 bytes of instructions
973   // to notify the user about the error and to help identify the unknown
974   // instruction. Don't treat this as a fatal error, though we can break the
975   // debugger if one has been attached.
976   u8 *bytes = (u8 *)address;
977   ReportError(
978       "interception_win: unhandled instruction at %p: %02x %02x %02x %02x %02x "
979       "%02x %02x %02x\n",
980       (void *)address, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4],
981       bytes[5], bytes[6], bytes[7]);
982   if (::IsDebuggerPresent())
983     __debugbreak();
984   return 0;
985 }
986 
987 size_t TestOnlyGetInstructionSize(uptr address, size_t *rel_offset) {
988   return GetInstructionSize(address, rel_offset);
989 }
990 
991 // Returns 0 on error.
992 static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
993   size_t cursor = 0;
994   while (cursor < size) {
995     size_t instruction_size = GetInstructionSize(address + cursor);
996     if (!instruction_size)
997       return 0;
998     cursor += instruction_size;
999   }
1000   return cursor;
1001 }
1002 
1003 static bool CopyInstructions(uptr to, uptr from, size_t size) {
1004   size_t cursor = 0;
1005   while (cursor != size) {
1006     size_t rel_offset = 0;
1007     size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
1008     if (!instruction_size)
1009       return false;
1010     _memcpy((void *)(to + cursor), (void *)(from + cursor),
1011             (size_t)instruction_size);
1012     if (rel_offset) {
1013 #  if SANITIZER_WINDOWS64
1014       // we want to make sure that the new relative offset still fits in 32-bits
1015       // this will be untrue if relocated_offset \notin [-2**31, 2**31)
1016       s64 delta = to - from;
1017       s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1018       if (-0x8000'0000ll > relocated_offset ||
1019           relocated_offset > 0x7FFF'FFFFll) {
1020         ReportError(
1021             "interception_win: CopyInstructions relocated_offset %lld outside "
1022             "32-bit range\n",
1023             (long long)relocated_offset);
1024         return false;
1025       }
1026 #  else
1027       // on 32-bit, the relative offset will always be correct
1028       s32 delta = to - from;
1029       s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1030 #  endif
1031       *(s32 *)(to + cursor + rel_offset) = relocated_offset;
1032     }
1033     cursor += instruction_size;
1034   }
1035   return true;
1036 }
1037 
1038 
1039 #if !SANITIZER_WINDOWS64
1040 bool OverrideFunctionWithDetour(
1041     uptr old_func, uptr new_func, uptr *orig_old_func) {
1042   const int kDetourHeaderLen = 5;
1043   const u16 kDetourInstruction = 0xFF8B;
1044 
1045   uptr header = (uptr)old_func - kDetourHeaderLen;
1046   uptr patch_length = kDetourHeaderLen + kShortJumpInstructionLength;
1047 
1048   // Validate that the function is hookable.
1049   if (*(u16*)old_func != kDetourInstruction ||
1050       !IsMemoryPadding(header, kDetourHeaderLen))
1051     return false;
1052 
1053   // Change memory protection to writable.
1054   DWORD protection = 0;
1055   if (!ChangeMemoryProtection(header, patch_length, &protection))
1056     return false;
1057 
1058   // Write a relative jump to the redirected function.
1059   WriteJumpInstruction(header, new_func);
1060 
1061   // Write the short jump to the function prefix.
1062   WriteShortJumpInstruction(old_func, header);
1063 
1064   // Restore previous memory protection.
1065   if (!RestoreMemoryProtection(header, patch_length, protection))
1066     return false;
1067 
1068   if (orig_old_func)
1069     *orig_old_func = old_func + kShortJumpInstructionLength;
1070 
1071   return true;
1072 }
1073 #endif
1074 
1075 bool OverrideFunctionWithRedirectJump(
1076     uptr old_func, uptr new_func, uptr *orig_old_func) {
1077   // Check whether the first instruction is a relative jump.
1078   if (*(u8*)old_func != 0xE9)
1079     return false;
1080 
1081   if (orig_old_func) {
1082     sptr relative_offset = *(s32 *)(old_func + 1);
1083     uptr absolute_target = old_func + relative_offset + kJumpInstructionLength;
1084     *orig_old_func = absolute_target;
1085   }
1086 
1087 #if SANITIZER_WINDOWS64
1088   // If needed, get memory space for a trampoline jump.
1089   uptr trampoline = AllocateMemoryForTrampoline(old_func, kDirectBranchLength);
1090   if (!trampoline)
1091     return false;
1092   WriteDirectBranch(trampoline, new_func);
1093 #endif
1094 
1095   // Change memory protection to writable.
1096   DWORD protection = 0;
1097   if (!ChangeMemoryProtection(old_func, kJumpInstructionLength, &protection))
1098     return false;
1099 
1100   // Write a relative jump to the redirected function.
1101   WriteJumpInstruction(old_func, FIRST_32_SECOND_64(new_func, trampoline));
1102 
1103   // Restore previous memory protection.
1104   if (!RestoreMemoryProtection(old_func, kJumpInstructionLength, protection))
1105     return false;
1106 
1107   return true;
1108 }
1109 
1110 bool OverrideFunctionWithHotPatch(
1111     uptr old_func, uptr new_func, uptr *orig_old_func) {
1112   const int kHotPatchHeaderLen = kBranchLength;
1113 
1114   uptr header = (uptr)old_func - kHotPatchHeaderLen;
1115   uptr patch_length = kHotPatchHeaderLen + kShortJumpInstructionLength;
1116 
1117   // Validate that the function is hot patchable.
1118   size_t instruction_size = GetInstructionSize(old_func);
1119   if (instruction_size < kShortJumpInstructionLength ||
1120       !FunctionHasPadding(old_func, kHotPatchHeaderLen))
1121     return false;
1122 
1123   if (orig_old_func) {
1124     // Put the needed instructions into the trampoline bytes.
1125     uptr trampoline_length = instruction_size + kDirectBranchLength;
1126     uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1127     if (!trampoline)
1128       return false;
1129     if (!CopyInstructions(trampoline, old_func, instruction_size))
1130       return false;
1131     WriteDirectBranch(trampoline + instruction_size,
1132                       old_func + instruction_size);
1133     *orig_old_func = trampoline;
1134   }
1135 
1136   // If needed, get memory space for indirect address.
1137   uptr indirect_address = 0;
1138 #if SANITIZER_WINDOWS64
1139   indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1140   if (!indirect_address)
1141     return false;
1142 #endif
1143 
1144   // Change memory protection to writable.
1145   DWORD protection = 0;
1146   if (!ChangeMemoryProtection(header, patch_length, &protection))
1147     return false;
1148 
1149   // Write jumps to the redirected function.
1150   WriteBranch(header, indirect_address, new_func);
1151   WriteShortJumpInstruction(old_func, header);
1152 
1153   // Restore previous memory protection.
1154   if (!RestoreMemoryProtection(header, patch_length, protection))
1155     return false;
1156 
1157   return true;
1158 }
1159 
1160 bool OverrideFunctionWithTrampoline(
1161     uptr old_func, uptr new_func, uptr *orig_old_func) {
1162 
1163   size_t instructions_length = kBranchLength;
1164   size_t padding_length = 0;
1165   uptr indirect_address = 0;
1166 
1167   if (orig_old_func) {
1168     // Find out the number of bytes of the instructions we need to copy
1169     // to the trampoline.
1170     instructions_length = RoundUpToInstrBoundary(kBranchLength, old_func);
1171     if (!instructions_length)
1172       return false;
1173 
1174     // Put the needed instructions into the trampoline bytes.
1175     uptr trampoline_length = instructions_length + kDirectBranchLength;
1176     uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1177     if (!trampoline)
1178       return false;
1179     if (!CopyInstructions(trampoline, old_func, instructions_length))
1180       return false;
1181     WriteDirectBranch(trampoline + instructions_length,
1182                       old_func + instructions_length);
1183     *orig_old_func = trampoline;
1184   }
1185 
1186 #if SANITIZER_WINDOWS64
1187   // Check if the targeted address can be encoded in the function padding.
1188   // Otherwise, allocate it in the trampoline region.
1189   if (IsMemoryPadding(old_func - kAddressLength, kAddressLength)) {
1190     indirect_address = old_func - kAddressLength;
1191     padding_length = kAddressLength;
1192   } else {
1193     indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1194     if (!indirect_address)
1195       return false;
1196   }
1197 #endif
1198 
1199   // Change memory protection to writable.
1200   uptr patch_address = old_func - padding_length;
1201   uptr patch_length = instructions_length + padding_length;
1202   DWORD protection = 0;
1203   if (!ChangeMemoryProtection(patch_address, patch_length, &protection))
1204     return false;
1205 
1206   // Patch the original function.
1207   WriteBranch(old_func, indirect_address, new_func);
1208 
1209   // Restore previous memory protection.
1210   if (!RestoreMemoryProtection(patch_address, patch_length, protection))
1211     return false;
1212 
1213   return true;
1214 }
1215 
1216 bool OverrideFunction(
1217     uptr old_func, uptr new_func, uptr *orig_old_func) {
1218 #if !SANITIZER_WINDOWS64
1219   if (OverrideFunctionWithDetour(old_func, new_func, orig_old_func))
1220     return true;
1221 #endif
1222   if (OverrideFunctionWithRedirectJump(old_func, new_func, orig_old_func))
1223     return true;
1224   if (OverrideFunctionWithHotPatch(old_func, new_func, orig_old_func))
1225     return true;
1226   if (OverrideFunctionWithTrampoline(old_func, new_func, orig_old_func))
1227     return true;
1228   return false;
1229 }
1230 
1231 static void **InterestingDLLsAvailable() {
1232   static const char *InterestingDLLs[] = {
1233     "kernel32.dll",
1234     "msvcr100d.dll",      // VS2010
1235     "msvcr110d.dll",      // VS2012
1236     "msvcr120d.dll",      // VS2013
1237     "vcruntime140d.dll",  // VS2015
1238     "ucrtbased.dll",      // Universal CRT
1239     "msvcr100.dll",       // VS2010
1240     "msvcr110.dll",       // VS2012
1241     "msvcr120.dll",       // VS2013
1242     "vcruntime140.dll",   // VS2015
1243     "ucrtbase.dll",       // Universal CRT
1244 #  if (defined(__MINGW32__) && defined(__i386__))
1245     "libc++.dll",     // libc++
1246     "libunwind.dll",  // libunwind
1247 #  endif
1248     // NTDLL must go last as it gets special treatment in OverrideFunction.
1249     "ntdll.dll",
1250     NULL
1251   };
1252   static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
1253   if (!result[0]) {
1254     for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
1255       if (HMODULE h = GetModuleHandleA(InterestingDLLs[i]))
1256         result[j++] = (void *)h;
1257     }
1258   }
1259   return &result[0];
1260 }
1261 
1262 namespace {
1263 // Utility for reading loaded PE images.
1264 template <typename T> class RVAPtr {
1265  public:
1266   RVAPtr(void *module, uptr rva)
1267       : ptr_(reinterpret_cast<T *>(reinterpret_cast<char *>(module) + rva)) {}
1268   operator T *() { return ptr_; }
1269   T *operator->() { return ptr_; }
1270   T *operator++() { return ++ptr_; }
1271 
1272  private:
1273   T *ptr_;
1274 };
1275 } // namespace
1276 
1277 // Internal implementation of GetProcAddress. At least since Windows 8,
1278 // GetProcAddress appears to initialize DLLs before returning function pointers
1279 // into them. This is problematic for the sanitizers, because they typically
1280 // want to intercept malloc *before* MSVCRT initializes. Our internal
1281 // implementation walks the export list manually without doing initialization.
1282 uptr InternalGetProcAddress(void *module, const char *func_name) {
1283   // Check that the module header is full and present.
1284   RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1285   RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1286   if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE ||  // "MZ"
1287       headers->Signature != IMAGE_NT_SIGNATURE ||             // "PE\0\0"
1288       headers->FileHeader.SizeOfOptionalHeader <
1289           sizeof(IMAGE_OPTIONAL_HEADER)) {
1290     return 0;
1291   }
1292 
1293   IMAGE_DATA_DIRECTORY *export_directory =
1294       &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
1295   if (export_directory->Size == 0)
1296     return 0;
1297   RVAPtr<IMAGE_EXPORT_DIRECTORY> exports(module,
1298                                          export_directory->VirtualAddress);
1299   RVAPtr<DWORD> functions(module, exports->AddressOfFunctions);
1300   RVAPtr<DWORD> names(module, exports->AddressOfNames);
1301   RVAPtr<WORD> ordinals(module, exports->AddressOfNameOrdinals);
1302 
1303   for (DWORD i = 0; i < exports->NumberOfNames; i++) {
1304     RVAPtr<char> name(module, names[i]);
1305     if (!_strcmp(func_name, name)) {
1306       DWORD index = ordinals[i];
1307       RVAPtr<char> func(module, functions[index]);
1308 
1309       // Handle forwarded functions.
1310       DWORD offset = functions[index];
1311       if (offset >= export_directory->VirtualAddress &&
1312           offset < export_directory->VirtualAddress + export_directory->Size) {
1313         // An entry for a forwarded function is a string with the following
1314         // format: "<module> . <function_name>" that is stored into the
1315         // exported directory.
1316         char function_name[256];
1317         size_t funtion_name_length = _strlen(func);
1318         if (funtion_name_length >= sizeof(function_name) - 1) {
1319           ReportError("interception_win: func too long: '%s'\n", (char *)func);
1320           InterceptionFailed();
1321         }
1322 
1323         _memcpy(function_name, func, funtion_name_length);
1324         function_name[funtion_name_length] = '\0';
1325         char* separator = _strchr(function_name, '.');
1326         if (!separator) {
1327           ReportError("interception_win: no separator in '%s'\n",
1328                       function_name);
1329           InterceptionFailed();
1330         }
1331         *separator = '\0';
1332 
1333         void* redirected_module = GetModuleHandleA(function_name);
1334         if (!redirected_module) {
1335           ReportError("interception_win: GetModuleHandleA failed for '%s'\n",
1336                       function_name);
1337           InterceptionFailed();
1338         }
1339         return InternalGetProcAddress(redirected_module, separator + 1);
1340       }
1341 
1342       return (uptr)(char *)func;
1343     }
1344   }
1345 
1346   return 0;
1347 }
1348 
1349 bool OverrideFunction(
1350     const char *func_name, uptr new_func, uptr *orig_old_func) {
1351   static const char *kNtDllIgnore[] = {
1352     "memcmp", "memcpy", "memmove", "memset"
1353   };
1354 
1355   bool hooked = false;
1356   void **DLLs = InterestingDLLsAvailable();
1357   for (size_t i = 0; DLLs[i]; ++i) {
1358     if (DLLs[i + 1] == nullptr) {
1359       // This is the last DLL, i.e. NTDLL. It exports some functions that
1360       // we only want to override in the CRT.
1361       for (const char *ignored : kNtDllIgnore) {
1362         if (_strcmp(func_name, ignored) == 0)
1363           return hooked;
1364       }
1365     }
1366 
1367     uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
1368     if (func_addr &&
1369         OverrideFunction(func_addr, new_func, orig_old_func)) {
1370       hooked = true;
1371     }
1372   }
1373   return hooked;
1374 }
1375 
1376 bool OverrideImportedFunction(const char *module_to_patch,
1377                               const char *imported_module,
1378                               const char *function_name, uptr new_function,
1379                               uptr *orig_old_func) {
1380   HMODULE module = GetModuleHandleA(module_to_patch);
1381   if (!module)
1382     return false;
1383 
1384   // Check that the module header is full and present.
1385   RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1386   RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1387   if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE ||  // "MZ"
1388       headers->Signature != IMAGE_NT_SIGNATURE ||             // "PE\0\0"
1389       headers->FileHeader.SizeOfOptionalHeader <
1390           sizeof(IMAGE_OPTIONAL_HEADER)) {
1391     return false;
1392   }
1393 
1394   IMAGE_DATA_DIRECTORY *import_directory =
1395       &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
1396 
1397   // Iterate the list of imported DLLs. FirstThunk will be null for the last
1398   // entry.
1399   RVAPtr<IMAGE_IMPORT_DESCRIPTOR> imports(module,
1400                                           import_directory->VirtualAddress);
1401   for (; imports->FirstThunk != 0; ++imports) {
1402     RVAPtr<const char> modname(module, imports->Name);
1403     if (_stricmp(&*modname, imported_module) == 0)
1404       break;
1405   }
1406   if (imports->FirstThunk == 0)
1407     return false;
1408 
1409   // We have two parallel arrays: the import address table (IAT) and the table
1410   // of names. They start out containing the same data, but the loader rewrites
1411   // the IAT to hold imported addresses and leaves the name table in
1412   // OriginalFirstThunk alone.
1413   RVAPtr<IMAGE_THUNK_DATA> name_table(module, imports->OriginalFirstThunk);
1414   RVAPtr<IMAGE_THUNK_DATA> iat(module, imports->FirstThunk);
1415   for (; name_table->u1.Ordinal != 0; ++name_table, ++iat) {
1416     if (!IMAGE_SNAP_BY_ORDINAL(name_table->u1.Ordinal)) {
1417       RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
1418           module, name_table->u1.ForwarderString);
1419       const char *funcname = &import_by_name->Name[0];
1420       if (_strcmp(funcname, function_name) == 0)
1421         break;
1422     }
1423   }
1424   if (name_table->u1.Ordinal == 0)
1425     return false;
1426 
1427   // Now we have the correct IAT entry. Do the swap. We have to make the page
1428   // read/write first.
1429   if (orig_old_func)
1430     *orig_old_func = iat->u1.AddressOfData;
1431   DWORD old_prot, unused_prot;
1432   if (!VirtualProtect(&iat->u1.AddressOfData, 4, PAGE_EXECUTE_READWRITE,
1433                       &old_prot))
1434     return false;
1435   iat->u1.AddressOfData = new_function;
1436   if (!VirtualProtect(&iat->u1.AddressOfData, 4, old_prot, &unused_prot))
1437     return false;  // Not clear if this failure bothers us.
1438   return true;
1439 }
1440 
1441 }  // namespace __interception
1442 
1443 #endif  // SANITIZER_WINDOWS
1444