master
   1//===-- interception_win.cpp ------------------------------------*- C++ -*-===//
   2//
   3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4// See https://llvm.org/LICENSE.txt for license information.
   5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6//
   7//===----------------------------------------------------------------------===//
   8//
   9// This file is a part of AddressSanitizer, an address sanity checker.
  10//
  11// Windows-specific interception methods.
  12//
  13// This file is implementing several hooking techniques to intercept calls
  14// to functions. The hooks are dynamically installed by modifying the assembly
  15// code.
  16//
  17// The hooking techniques are making assumptions on the way the code is
  18// generated and are safe under these assumptions.
  19//
  20// On 64-bit architecture, there is no direct 64-bit jump instruction. To allow
  21// arbitrary branching on the whole memory space, the notion of trampoline
  22// region is used. A trampoline region is a memory space withing 2G boundary
  23// where it is safe to add custom assembly code to build 64-bit jumps.
  24//
  25// Hooking techniques
  26// ==================
  27//
  28// 1) Detour
  29//
  30//    The Detour hooking technique is assuming the presence of a header with
  31//    padding and an overridable 2-bytes nop instruction (mov edi, edi). The
  32//    nop instruction can safely be replaced by a 2-bytes jump without any need
  33//    to save the instruction. A jump to the target is encoded in the function
  34//    header and the nop instruction is replaced by a short jump to the header.
  35//
  36//        head:  5 x nop                 head:  jmp <hook>
  37//        func:  mov edi, edi    -->     func:  jmp short <head>
  38//               [...]                   real:  [...]
  39//
  40//    This technique is only implemented on 32-bit architecture.
  41//    Most of the time, Windows API are hookable with the detour technique.
  42//
  43// 2) Redirect Jump
  44//
  45//    The redirect jump is applicable when the first instruction is a direct
  46//    jump. The instruction is replaced by jump to the hook.
  47//
  48//        func:  jmp <label>     -->     func:  jmp <hook>
  49//
  50//    On a 64-bit architecture, a trampoline is inserted.
  51//
  52//        func:  jmp <label>     -->     func:  jmp <tramp>
  53//                                              [...]
  54//
  55//                                   [trampoline]
  56//                                      tramp:  jmp QWORD [addr]
  57//                                       addr:  .bytes <hook>
  58//
  59//    Note: <real> is equivalent to <label>.
  60//
  61// 3) HotPatch
  62//
  63//    The HotPatch hooking is assuming the presence of a header with padding
  64//    and a first instruction with at least 2-bytes.
  65//
  66//    The reason to enforce the 2-bytes limitation is to provide the minimal
  67//    space to encode a short jump. HotPatch technique is only rewriting one
  68//    instruction to avoid breaking a sequence of instructions containing a
  69//    branching target.
  70//
  71//    Assumptions are enforced by MSVC compiler by using the /HOTPATCH flag.
  72//      see: https://msdn.microsoft.com/en-us/library/ms173507.aspx
  73//    Default padding length is 5 bytes in 32-bits and 6 bytes in 64-bits.
  74//
  75//        head:   5 x nop                head:  jmp <hook>
  76//        func:   <instr>        -->     func:  jmp short <head>
  77//                [...]                  body:  [...]
  78//
  79//                                   [trampoline]
  80//                                       real:  <instr>
  81//                                              jmp <body>
  82//
  83//    On a 64-bit architecture:
  84//
  85//        head:   6 x nop                head:  jmp QWORD [addr1]
  86//        func:   <instr>        -->     func:  jmp short <head>
  87//                [...]                  body:  [...]
  88//
  89//                                   [trampoline]
  90//                                      addr1:  .bytes <hook>
  91//                                       real:  <instr>
  92//                                              jmp QWORD [addr2]
  93//                                      addr2:  .bytes <body>
  94//
  95// 4) Trampoline
  96//
  97//    The Trampoline hooking technique is the most aggressive one. It is
  98//    assuming that there is a sequence of instructions that can be safely
  99//    replaced by a jump (enough room and no incoming branches).
 100//
 101//    Unfortunately, these assumptions can't be safely presumed and code may
 102//    be broken after hooking.
 103//
 104//        func:   <instr>        -->     func:  jmp <hook>
 105//                <instr>
 106//                [...]                  body:  [...]
 107//
 108//                                   [trampoline]
 109//                                       real:  <instr>
 110//                                              <instr>
 111//                                              jmp <body>
 112//
 113//    On a 64-bit architecture:
 114//
 115//        func:   <instr>        -->     func:  jmp QWORD [addr1]
 116//                <instr>
 117//                [...]                  body:  [...]
 118//
 119//                                   [trampoline]
 120//                                      addr1:  .bytes <hook>
 121//                                       real:  <instr>
 122//                                              <instr>
 123//                                              jmp QWORD [addr2]
 124//                                      addr2:  .bytes <body>
 125//===----------------------------------------------------------------------===//
 126
 127#include "interception.h"
 128
 129#if SANITIZER_WINDOWS
 130#include "sanitizer_common/sanitizer_platform.h"
 131#define WIN32_LEAN_AND_MEAN
 132#include <windows.h>
 133#include <psapi.h>
 134
 135namespace __interception {
 136
 137static const int kAddressLength = FIRST_32_SECOND_64(4, 8);
 138static const int kJumpInstructionLength = 5;
 139static const int kShortJumpInstructionLength = 2;
 140UNUSED static const int kIndirectJumpInstructionLength = 6;
 141static const int kBranchLength =
 142    FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength);
 143static const int kDirectBranchLength = kBranchLength + kAddressLength;
 144
 145#  if defined(_MSC_VER)
 146#    define INTERCEPTION_FORMAT(f, a)
 147#  else
 148#    define INTERCEPTION_FORMAT(f, a) __attribute__((format(printf, f, a)))
 149#  endif
 150
 151static void (*ErrorReportCallback)(const char *format, ...)
 152    INTERCEPTION_FORMAT(1, 2);
 153
 154void SetErrorReportCallback(void (*callback)(const char *format, ...)) {
 155  ErrorReportCallback = callback;
 156}
 157
 158#  define ReportError(...)                \
 159    do {                                  \
 160      if (ErrorReportCallback)            \
 161        ErrorReportCallback(__VA_ARGS__); \
 162    } while (0)
 163
 164static void InterceptionFailed() {
 165  ReportError("interception_win: failed due to an unrecoverable error.\n");
 166  // This acts like an abort when no debugger is attached. According to an old
 167  // comment, calling abort() leads to an infinite recursion in CheckFailed.
 168  __debugbreak();
 169}
 170
 171static bool DistanceIsWithin2Gig(uptr from, uptr target) {
 172#if SANITIZER_WINDOWS64
 173  if (from < target)
 174    return target - from <= (uptr)0x7FFFFFFFU;
 175  else
 176    return from - target <= (uptr)0x80000000U;
 177#else
 178  // In a 32-bit address space, the address calculation will wrap, so this check
 179  // is unnecessary.
 180  return true;
 181#endif
 182}
 183
 184static uptr GetMmapGranularity() {
 185  SYSTEM_INFO si;
 186  GetSystemInfo(&si);
 187  return si.dwAllocationGranularity;
 188}
 189
 190UNUSED static uptr RoundDownTo(uptr size, uptr boundary) {
 191  return size & ~(boundary - 1);
 192}
 193
 194UNUSED static uptr RoundUpTo(uptr size, uptr boundary) {
 195  return RoundDownTo(size + boundary - 1, boundary);
 196}
 197
 198// FIXME: internal_str* and internal_mem* functions should be moved from the
 199// ASan sources into interception/.
 200
 201static size_t _strlen(const char *str) {
 202  const char* p = str;
 203  while (*p != '\0') ++p;
 204  return p - str;
 205}
 206
 207static char* _strchr(char* str, char c) {
 208  while (*str) {
 209    if (*str == c)
 210      return str;
 211    ++str;
 212  }
 213  return nullptr;
 214}
 215
 216static int _strcmp(const char *s1, const char *s2) {
 217  while (true) {
 218    unsigned c1 = *s1;
 219    unsigned c2 = *s2;
 220    if (c1 != c2) return (c1 < c2) ? -1 : 1;
 221    if (c1 == 0) break;
 222    s1++;
 223    s2++;
 224  }
 225  return 0;
 226}
 227
 228static void _memset(void *p, int value, size_t sz) {
 229  for (size_t i = 0; i < sz; ++i)
 230    ((char*)p)[i] = (char)value;
 231}
 232
 233static void _memcpy(void *dst, void *src, size_t sz) {
 234  char *dst_c = (char*)dst,
 235       *src_c = (char*)src;
 236  for (size_t i = 0; i < sz; ++i)
 237    dst_c[i] = src_c[i];
 238}
 239
 240static bool ChangeMemoryProtection(
 241    uptr address, uptr size, DWORD *old_protection) {
 242  return ::VirtualProtect((void*)address, size,
 243                          PAGE_EXECUTE_READWRITE,
 244                          old_protection) != FALSE;
 245}
 246
 247static bool RestoreMemoryProtection(
 248    uptr address, uptr size, DWORD old_protection) {
 249  DWORD unused;
 250  return ::VirtualProtect((void*)address, size,
 251                          old_protection,
 252                          &unused) != FALSE;
 253}
 254
 255static bool IsMemoryPadding(uptr address, uptr size) {
 256  u8* function = (u8*)address;
 257  for (size_t i = 0; i < size; ++i)
 258    if (function[i] != 0x90 && function[i] != 0xCC)
 259      return false;
 260  return true;
 261}
 262
 263static const u8 kHintNop8Bytes[] = {
 264  0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
 265};
 266
 267template<class T>
 268static bool FunctionHasPrefix(uptr address, const T &pattern) {
 269  u8* function = (u8*)address - sizeof(pattern);
 270  for (size_t i = 0; i < sizeof(pattern); ++i)
 271    if (function[i] != pattern[i])
 272      return false;
 273  return true;
 274}
 275
 276static bool FunctionHasPadding(uptr address, uptr size) {
 277  if (IsMemoryPadding(address - size, size))
 278    return true;
 279  if (size <= sizeof(kHintNop8Bytes) &&
 280      FunctionHasPrefix(address, kHintNop8Bytes))
 281    return true;
 282  return false;
 283}
 284
 285static void WritePadding(uptr from, uptr size) {
 286  _memset((void*)from, 0xCC, (size_t)size);
 287}
 288
 289static void WriteJumpInstruction(uptr from, uptr target) {
 290  if (!DistanceIsWithin2Gig(from + kJumpInstructionLength, target)) {
 291    ReportError(
 292        "interception_win: cannot write jmp further than 2GB away, from %p to "
 293        "%p.\n",
 294        (void *)from, (void *)target);
 295    InterceptionFailed();
 296  }
 297  ptrdiff_t offset = target - from - kJumpInstructionLength;
 298  *(u8*)from = 0xE9;
 299  *(u32*)(from + 1) = offset;
 300}
 301
 302static void WriteShortJumpInstruction(uptr from, uptr target) {
 303  sptr offset = target - from - kShortJumpInstructionLength;
 304  if (offset < -128 || offset > 127) {
 305    ReportError("interception_win: cannot write short jmp from %p to %p\n",
 306                (void *)from, (void *)target);
 307    InterceptionFailed();
 308  }
 309  *(u8*)from = 0xEB;
 310  *(u8*)(from + 1) = (u8)offset;
 311}
 312
 313#if SANITIZER_WINDOWS64
 314static void WriteIndirectJumpInstruction(uptr from, uptr indirect_target) {
 315  // jmp [rip + <offset>] = FF 25 <offset> where <offset> is a relative
 316  // offset.
 317  // The offset is the distance from then end of the jump instruction to the
 318  // memory location containing the targeted address. The displacement is still
 319  // 32-bit in x64, so indirect_target must be located within +/- 2GB range.
 320  int offset = indirect_target - from - kIndirectJumpInstructionLength;
 321  if (!DistanceIsWithin2Gig(from + kIndirectJumpInstructionLength,
 322                            indirect_target)) {
 323    ReportError(
 324        "interception_win: cannot write indirect jmp with target further than "
 325        "2GB away, from %p to %p.\n",
 326        (void *)from, (void *)indirect_target);
 327    InterceptionFailed();
 328  }
 329  *(u16*)from = 0x25FF;
 330  *(u32*)(from + 2) = offset;
 331}
 332#endif
 333
 334static void WriteBranch(
 335    uptr from, uptr indirect_target, uptr target) {
 336#if SANITIZER_WINDOWS64
 337  WriteIndirectJumpInstruction(from, indirect_target);
 338  *(u64*)indirect_target = target;
 339#else
 340  (void)indirect_target;
 341  WriteJumpInstruction(from, target);
 342#endif
 343}
 344
 345static void WriteDirectBranch(uptr from, uptr target) {
 346#if SANITIZER_WINDOWS64
 347  // Emit an indirect jump through immediately following bytes:
 348  //   jmp [rip + kBranchLength]
 349  //   .quad <target>
 350  WriteBranch(from, from + kBranchLength, target);
 351#else
 352  WriteJumpInstruction(from, target);
 353#endif
 354}
 355
 356struct TrampolineMemoryRegion {
 357  uptr content;
 358  uptr allocated_size;
 359  uptr max_size;
 360};
 361
 362UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31;  // 2 gig
 363static const int kMaxTrampolineRegion = 1024;
 364static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
 365
 366static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr,
 367                                      uptr func_addr, size_t granularity) {
 368#  if SANITIZER_WINDOWS64
 369  // Clamp {min,max}_addr to the accessible address space.
 370  SYSTEM_INFO system_info;
 371  ::GetSystemInfo(&system_info);
 372  uptr min_virtual_addr =
 373      RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity);
 374  uptr max_virtual_addr =
 375      RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity);
 376  if (min_addr < min_virtual_addr)
 377    min_addr = min_virtual_addr;
 378  if (max_addr > max_virtual_addr)
 379    max_addr = max_virtual_addr;
 380
 381  // This loop probes the virtual address space to find free memory in the
 382  // [min_addr, max_addr] interval. The search starts from func_addr and
 383  // proceeds "outwards" towards the interval bounds using two probes, lo_addr
 384  // and hi_addr, for addresses lower/higher than func_addr. At each step, it
 385  // considers the probe closest to func_addr. If that address is not free, the
 386  // probe is advanced (lower or higher depending on the probe) to the next
 387  // memory block and the search continues.
 388  uptr lo_addr = RoundDownTo(func_addr, granularity);
 389  uptr hi_addr = RoundUpTo(func_addr, granularity);
 390  while (lo_addr >= min_addr || hi_addr <= max_addr) {
 391    // Consider the in-range address closest to func_addr.
 392    uptr addr;
 393    if (lo_addr < min_addr)
 394      addr = hi_addr;
 395    else if (hi_addr > max_addr)
 396      addr = lo_addr;
 397    else
 398      addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr;
 399
 400    MEMORY_BASIC_INFORMATION info;
 401    if (!::VirtualQuery((void *)addr, &info, sizeof(info))) {
 402      ReportError(
 403          "interception_win: VirtualQuery in AllocateTrampolineRegion failed "
 404          "for %p\n",
 405          (void *)addr);
 406      return nullptr;
 407    }
 408
 409    // Check whether a region can be allocated at |addr|.
 410    if (info.State == MEM_FREE && info.RegionSize >= granularity) {
 411      void *page =
 412          ::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT,
 413                         PAGE_EXECUTE_READWRITE);
 414      if (page == nullptr)
 415        ReportError(
 416            "interception_win: VirtualAlloc in AllocateTrampolineRegion failed "
 417            "for %p\n",
 418            (void *)addr);
 419      return page;
 420    }
 421
 422    if (addr == lo_addr)
 423      lo_addr =
 424          RoundDownTo((uptr)info.AllocationBase - granularity, granularity);
 425    if (addr == hi_addr)
 426      hi_addr =
 427          RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity);
 428  }
 429
 430  ReportError(
 431      "interception_win: AllocateTrampolineRegion failed to find free memory; "
 432      "min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n",
 433      (void *)min_addr, (void *)max_addr, (void *)func_addr, granularity);
 434  return nullptr;
 435#else
 436  return ::VirtualAlloc(nullptr,
 437                        granularity,
 438                        MEM_RESERVE | MEM_COMMIT,
 439                        PAGE_EXECUTE_READWRITE);
 440#endif
 441}
 442
 443// Used by unittests to release mapped memory space.
 444void TestOnlyReleaseTrampolineRegions() {
 445  for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
 446    TrampolineMemoryRegion *current = &TrampolineRegions[bucket];
 447    if (current->content == 0)
 448      return;
 449    ::VirtualFree((void*)current->content, 0, MEM_RELEASE);
 450    current->content = 0;
 451  }
 452}
 453
 454static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
 455#  if SANITIZER_WINDOWS64
 456  uptr min_addr = func_address - kTrampolineRangeLimit;
 457  uptr max_addr = func_address + kTrampolineRangeLimit - size;
 458
 459  // Allocate memory within 2GB of the module (DLL or EXE file) so that any
 460  // address within the module can be referenced with PC-relative operands.
 461  // This allows us to not just jump to the trampoline with a PC-relative
 462  // offset, but to relocate any instructions that we copy to the trampoline
 463  // which have references to the original module. If we can't find the base
 464  // address of the module (e.g. if func_address is in mmap'ed memory), just
 465  // stay within 2GB of func_address.
 466  HMODULE module;
 467  if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
 468                           GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
 469                           (LPCWSTR)func_address, &module)) {
 470    MODULEINFO module_info;
 471    if (::GetModuleInformation(::GetCurrentProcess(), module,
 472                                &module_info, sizeof(module_info))) {
 473      min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage -
 474                 kTrampolineRangeLimit;
 475      max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size;
 476    }
 477  }
 478
 479  // Check for overflow.
 480  if (min_addr > func_address)
 481    min_addr = 0;
 482  if (max_addr < func_address)
 483    max_addr = ~(uptr)0;
 484#  else
 485  uptr min_addr = 0;
 486  uptr max_addr = ~min_addr;
 487#  endif
 488
 489  // Find a region within [min_addr,max_addr] with enough space to allocate
 490  // |size| bytes.
 491  TrampolineMemoryRegion *region = nullptr;
 492  for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
 493    TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
 494    if (current->content == 0) {
 495      // No valid region found, allocate a new region.
 496      size_t bucket_size = GetMmapGranularity();
 497      void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address,
 498                                               bucket_size);
 499      if (content == nullptr)
 500        return 0U;
 501
 502      current->content = (uptr)content;
 503      current->allocated_size = 0;
 504      current->max_size = bucket_size;
 505      region = current;
 506      break;
 507    } else if (current->max_size - current->allocated_size > size) {
 508      uptr next_address = current->content + current->allocated_size;
 509      if (next_address < min_addr || next_address > max_addr)
 510        continue;
 511      // The space can be allocated in the current region.
 512      region = current;
 513      break;
 514    }
 515  }
 516
 517  // Failed to find a region.
 518  if (region == nullptr)
 519    return 0U;
 520
 521  // Allocate the space in the current region.
 522  uptr allocated_space = region->content + region->allocated_size;
 523  region->allocated_size += size;
 524  WritePadding(allocated_space, size);
 525
 526  return allocated_space;
 527}
 528
 529// The following prologues cannot be patched because of the short jump
 530// jumping to the patching region.
 531
 532// Short jump patterns  below are only for x86_64.
 533#  if SANITIZER_WINDOWS_x64
 534// ntdll!wcslen in Win11
 535//   488bc1          mov     rax,rcx
 536//   0fb710          movzx   edx,word ptr [rax]
 537//   4883c002        add     rax,2
 538//   6685d2          test    dx,dx
 539//   75f4            jne     -12
 540static const u8 kPrologueWithShortJump1[] = {
 541    0x48, 0x8b, 0xc1, 0x0f, 0xb7, 0x10, 0x48, 0x83,
 542    0xc0, 0x02, 0x66, 0x85, 0xd2, 0x75, 0xf4,
 543};
 544
 545// ntdll!strrchr in Win11
 546//   4c8bc1          mov     r8,rcx
 547//   8a01            mov     al,byte ptr [rcx]
 548//   48ffc1          inc     rcx
 549//   84c0            test    al,al
 550//   75f7            jne     -9
 551static const u8 kPrologueWithShortJump2[] = {
 552    0x4c, 0x8b, 0xc1, 0x8a, 0x01, 0x48, 0xff, 0xc1,
 553    0x84, 0xc0, 0x75, 0xf7,
 554};
 555#endif
 556
 557// Returns 0 on error.
 558static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 559  if (rel_offset) {
 560    *rel_offset = 0;
 561  }
 562
 563#if SANITIZER_ARM64
 564  // An ARM64 instruction is 4 bytes long.
 565  return 4;
 566#endif
 567
 568#  if SANITIZER_WINDOWS_x64
 569  if (memcmp((u8*)address, kPrologueWithShortJump1,
 570             sizeof(kPrologueWithShortJump1)) == 0 ||
 571      memcmp((u8*)address, kPrologueWithShortJump2,
 572             sizeof(kPrologueWithShortJump2)) == 0) {
 573    return 0;
 574  }
 575#endif
 576
 577  switch (*(u64*)address) {
 578    case 0x90909090909006EB:  // stub: jmp over 6 x nop.
 579      return 8;
 580  }
 581
 582  switch (*(u8*)address) {
 583    case 0x90:  // 90 : nop
 584    case 0xC3:  // C3 : ret   (for small/empty function interception
 585    case 0xCC:  // CC : int 3  i.e. registering weak functions)
 586      return 1;
 587
 588    case 0x50:  // push eax / rax
 589    case 0x51:  // push ecx / rcx
 590    case 0x52:  // push edx / rdx
 591    case 0x53:  // push ebx / rbx
 592    case 0x54:  // push esp / rsp
 593    case 0x55:  // push ebp / rbp
 594    case 0x56:  // push esi / rsi
 595    case 0x57:  // push edi / rdi
 596    case 0x5D:  // pop ebp / rbp
 597      return 1;
 598
 599    case 0x6A:  // 6A XX = push XX
 600      return 2;
 601
 602    // This instruction can be encoded with a 16-bit immediate but that is
 603    // incredibly unlikely.
 604    case 0x68:  // 68 XX XX XX XX : push imm32
 605      return 5;
 606
 607    case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
 608    case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
 609    case 0xBA:  // ba XX XX XX XX : mov edx, XX XX XX XX
 610      return 5;
 611
 612    // Cannot overwrite control-instruction. Return 0 to indicate failure.
 613    case 0xE9:  // E9 XX XX XX XX : jmp <label>
 614    case 0xE8:  // E8 XX XX XX XX : call <func>
 615    case 0xEB:  // EB XX : jmp XX (short jump)
 616    case 0x70:  // 7Y YY : jy XX (short conditional jump)
 617    case 0x71:
 618    case 0x72:
 619    case 0x73:
 620    case 0x74:
 621    case 0x75:
 622    case 0x76:
 623    case 0x77:
 624    case 0x78:
 625    case 0x79:
 626    case 0x7A:
 627    case 0x7B:
 628    case 0x7C:
 629    case 0x7D:
 630    case 0x7E:
 631    case 0x7F:
 632      return 0;
 633  }
 634
 635  switch (*(u16*)(address)) {
 636    case 0x018A:  // 8A 01 : mov al, byte ptr [ecx]
 637    case 0xFF8B:  // 8B FF : mov edi, edi
 638    case 0xEC8B:  // 8B EC : mov ebp, esp
 639    case 0xc889:  // 89 C8 : mov eax, ecx
 640    case 0xD189:  // 89 D1 : mov ecx, edx
 641    case 0xE589:  // 89 E5 : mov ebp, esp
 642    case 0xC18B:  // 8B C1 : mov eax, ecx
 643    case 0xC031:  // 31 C0 : xor eax, eax
 644    case 0xC931:  // 31 C9 : xor ecx, ecx
 645    case 0xD231:  // 31 D2 : xor edx, edx
 646    case 0xC033:  // 33 C0 : xor eax, eax
 647    case 0xC933:  // 33 C9 : xor ecx, ecx
 648    case 0xD233:  // 33 D2 : xor edx, edx
 649    case 0x9066:  // 66 90 : xchg %ax,%ax (Two-byte NOP)
 650    case 0xDB84:  // 84 DB : test bl,bl
 651    case 0xC084:  // 84 C0 : test al,al
 652    case 0xC984:  // 84 C9 : test cl,cl
 653    case 0xD284:  // 84 D2 : test dl,dl
 654      return 2;
 655
 656    case 0x3980:  // 80 39 XX : cmp BYTE PTR [rcx], XX
 657    case 0x4D8B:  // 8B 4D XX : mov XX(%ebp), ecx
 658    case 0x558B:  // 8B 55 XX : mov XX(%ebp), edx
 659    case 0x758B:  // 8B 75 XX : mov XX(%ebp), esp
 660    case 0xE483:  // 83 E4 XX : and esp, XX
 661    case 0xEC83:  // 83 EC XX : sub esp, XX
 662    case 0xC1F6:  // F6 C1 XX : test cl, XX
 663      return 3;
 664
 665    case 0x89FF:  // FF 89 XX XX XX XX : dec dword ptr [ecx + XX XX XX XX]
 666    case 0xEC81:  // 81 EC XX XX XX XX : sub esp, XX XX XX XX
 667      return 6;
 668
 669    // Cannot overwrite control-instruction. Return 0 to indicate failure.
 670    case 0x25FF:  // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
 671      return 0;
 672  }
 673
 674  switch (0x00FFFFFF & *(u32 *)address) {
 675    case 0x244C8D:  // 8D 4C 24 XX : lea ecx, [esp + XX]
 676    case 0x2474FF:  // FF 74 24 XX : push qword ptr [rsp + XX]
 677      return 4;
 678    case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
 679      return 7;
 680  }
 681
 682  switch (0x000000FF & *(u32 *)address) {
 683    case 0xc2:  // C2 XX XX : ret XX (needed for registering weak functions)
 684      return 3;
 685  }
 686
 687#  if SANITIZER_WINDOWS_x64
 688  switch (*(u8*)address) {
 689    case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
 690                //   movabs eax, dword ptr ds:[XXXXXXXX]
 691      return 9;
 692    case 0xF2:
 693      switch (*(u32 *)(address + 1)) {
 694          case 0x2444110f:  //  f2 0f 11 44 24 XX       movsd  QWORD PTR
 695                            //  [rsp + XX], xmm0
 696          case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
 697                            //  [rsp + XX], xmm1
 698          case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
 699                            //  [rsp + XX], xmm2
 700          case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
 701                            //  [rsp + XX], xmm3
 702          case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
 703                            //  [rsp + XX], xmm4
 704            return 6;
 705      }
 706      break;
 707
 708    case 0x83:
 709      const u8 next_byte = *(u8*)(address + 1);
 710      const u8 mod = next_byte >> 6;
 711      const u8 rm = next_byte & 7;
 712      if (mod == 1 && rm == 4)
 713        return 5;  // 83 ModR/M SIB Disp8 Imm8
 714                   //   add|or|adc|sbb|and|sub|xor|cmp [r+disp8], imm8
 715  }
 716
 717  switch (*(u16*)address) {
 718    case 0x5040:  // push rax
 719    case 0x5140:  // push rcx
 720    case 0x5240:  // push rdx
 721    case 0x5340:  // push rbx
 722    case 0x5440:  // push rsp
 723    case 0x5540:  // push rbp
 724    case 0x5640:  // push rsi
 725    case 0x5740:  // push rdi
 726    case 0x5441:  // push r12
 727    case 0x5541:  // push r13
 728    case 0x5641:  // push r14
 729    case 0x5741:  // push r15
 730    case 0xc084:  // test al, al
 731    case 0x018a:  // mov al, byte ptr [rcx]
 732      return 2;
 733
 734    case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
 735    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rbp+YY], XX
 736    case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
 737    case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
 738    case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
 739    case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
 740      return 4;
 741
 742    case 0x058A:  // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
 743    case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
 744      if (rel_offset)
 745        *rel_offset = 2;
 746      FALLTHROUGH;
 747    case 0xB841:  // 41 B8 XX XX XX XX : mov r8d, XX XX XX XX
 748      return 6;
 749
 750    case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
 751    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rbp+YY], XX XX XX XX
 752    case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
 753    case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
 754    case 0x7B81:  // 81 7B YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
 755    case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
 756      return 7;
 757
 758    case 0xb848:  // 48 b8 XX XX XX XX XX XX XX XX :
 759                  //   movabsq XX XX XX XX XX XX XX XX, rax
 760    case 0xba48:  // 48 ba XX XX XX XX XX XX XX XX :
 761                  //   movabsq XX XX XX XX XX XX XX XX, rdx
 762      return 10;
 763  }
 764
 765  switch (0x00FFFFFF & *(u32 *)address) {
 766    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
 767    case 0xc00b4d:    // 4d 0b c0 : or r8, r8
 768    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
 769    case 0xc08548:    // 48 85 c0 : test rax, rax
 770    case 0xc0854d:    // 4d 85 c0 : test r8, r8
 771    case 0xc08b41:    // 41 8b c0 : mov eax, r8d
 772    case 0xc0ff48:    // 48 ff c0 : inc rax
 773    case 0xc0ff49:    // 49 ff c0 : inc r8
 774    case 0xc18b41:    // 41 8b c1 : mov eax, r9d
 775    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
 776    case 0xc18b4c:    // 4c 8b c1 : mov r8, rcx
 777    case 0xc1ff48:    // 48 ff c1 : inc rcx
 778    case 0xc1ff49:    // 49 ff c1 : inc r9
 779    case 0xc28b41:    // 41 8b c2 : mov eax, r10d
 780    case 0x01b60f:    // 0f b6 01 : movzx eax, BYTE PTR [rcx]
 781    case 0x09b60f:    // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
 782    case 0x11b60f:    // 0f b6 11 : movzx edx, BYTE PTR [rcx]
 783    case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
 784    case 0xc2ff48:    // 48 ff c2 : inc rdx
 785    case 0xc2ff49:    // 49 ff c2 : inc r10
 786    case 0xc38b41:    // 41 8b c3 : mov eax, r11d
 787    case 0xc3ff48:    // 48 ff c3 : inc rbx
 788    case 0xc3ff49:    // 49 ff c3 : inc r11
 789    case 0xc48b41:    // 41 8b c4 : mov eax, r12d
 790    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
 791    case 0xc4ff49:    // 49 ff c4 : inc r12
 792    case 0xc5ff49:    // 49 ff c5 : inc r13
 793    case 0xc6ff48:    // 48 ff c6 : inc rsi
 794    case 0xc6ff49:    // 49 ff c6 : inc r14
 795    case 0xc7ff48:    // 48 ff c7 : inc rdi
 796    case 0xc7ff49:    // 49 ff c7 : inc r15
 797    case 0xc93345:    // 45 33 c9 : xor r9d, r9d
 798    case 0xc98548:    // 48 85 c9 : test rcx, rcx
 799    case 0xc9854d:    // 4d 85 c9 : test r9, r9
 800    case 0xc98b4c:    // 4c 8b c9 : mov r9, rcx
 801    case 0xd12948:    // 48 29 d1 : sub rcx, rdx
 802    case 0xca2b48:    // 48 2b ca : sub rcx, rdx
 803    case 0xca3b48:    // 48 3b ca : cmp rcx, rdx
 804    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
 805    case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
 806    case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
 807    case 0xd28548:    // 48 85 d2 : test rdx, rdx
 808    case 0xd2854d:    // 4d 85 d2 : test r10, r10
 809    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
 810    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
 811    case 0xd2be0f:    // 0f be d2 : movsx edx, dl
 812    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
 813    case 0xd9f748:    // 48 f7 d9 : neg rcx
 814    case 0xc03145:    // 45 31 c0 : xor r8d,r8d
 815    case 0xc93145:    // 45 31 c9 : xor r9d,r9d
 816    case 0xdb3345:    // 45 33 db : xor r11d, r11d
 817    case 0xc08445:    // 45 84 c0 : test r8b,r8b
 818    case 0xd28445:    // 45 84 d2 : test r10b,r10b
 819    case 0xdb8548:    // 48 85 db : test rbx, rbx
 820    case 0xdb854d:    // 4d 85 db : test r11, r11
 821    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
 822    case 0xe48548:    // 48 85 e4 : test rsp, rsp
 823    case 0xe4854d:    // 4d 85 e4 : test r12, r12
 824    case 0xc88948:    // 48 89 c8 : mov rax,rcx
 825    case 0xcb8948:    // 48 89 cb : mov rbx,rcx
 826    case 0xd08948:    // 48 89 d0 : mov rax,rdx
 827    case 0xd18948:    // 48 89 d1 : mov rcx,rdx
 828    case 0xd38948:    // 48 89 d3 : mov rbx,rdx
 829    case 0xe58948:    // 48 89 e5 : mov rbp, rsp
 830    case 0xed8548:    // 48 85 ed : test rbp, rbp
 831    case 0xc88949:    // 49 89 c8 : mov r8, rcx
 832    case 0xc98949:    // 49 89 c9 : mov r9, rcx
 833    case 0xca8949:    // 49 89 ca : mov r10,rcx
 834    case 0xd08949:    // 49 89 d0 : mov r8, rdx
 835    case 0xd18949:    // 49 89 d1 : mov r9, rdx
 836    case 0xd28949:    // 49 89 d2 : mov r10, rdx
 837    case 0xd38949:    // 49 89 d3 : mov r11, rdx
 838    case 0xed854d:    // 4d 85 ed : test r13, r13
 839    case 0xf6854d:    // 4d 85 f6 : test r14, r14
 840    case 0xff854d:    // 4d 85 ff : test r15, r15
 841      return 3;
 842
 843    case 0x245489:    // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
 844    case 0x428d44:    // 44 8d 42 XX : lea r8d , [rdx + XX]
 845    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
 846    case 0xec8348:    // 48 83 ec XX : sub rsp, XX
 847    case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
 848    case 0x488d49:    // 49 8d 48 XX : lea rcx, [...]
 849    case 0x048d4c:    // 4c 8d 04 XX : lea r8, [...]
 850    case 0x148d4e:    // 4e 8d 14 XX : lea r10, [...]
 851    case 0x398366:    // 66 83 39 XX : cmp WORD PTR [rcx], XX
 852      return 4;
 853
 854    case 0x441F0F:  // 0F 1F 44 XX XX :   nop DWORD PTR [...]
 855    case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
 856      return 5;
 857
 858    case 0x788166:  // 66 81 78 XX YY YY  cmp WORD PTR [rax+XX], YY YY
 859    case 0x798166:  // 66 81 79 XX YY YY  cmp WORD PTR [rcx+XX], YY YY
 860    case 0x7a8166:  // 66 81 7a XX YY YY  cmp WORD PTR [rdx+XX], YY YY
 861    case 0x7b8166:  // 66 81 7b XX YY YY  cmp WORD PTR [rbx+XX], YY YY
 862    case 0x7e8166:  // 66 81 7e XX YY YY  cmp WORD PTR [rsi+XX], YY YY
 863    case 0x7f8166:  // 66 81 7f XX YY YY  cmp WORD PTR [rdi+XX], YY YY
 864      return 6;
 865
 866    case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
 867    case 0xc0c748:    // 48 C7 C0 XX XX XX XX : mov rax, XX XX XX XX
 868      return 7;
 869
 870    // clang-format off
 871    case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
 872    case 0x798141:  // 41 81 79 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
 873    case 0x7a8141:  // 41 81 7a XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
 874    case 0x7b8141:  // 41 81 7b XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
 875    case 0x7d8141:  // 41 81 7d XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
 876    case 0x7e8141:  // 41 81 7e XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
 877    case 0x7f8141:  // 41 81 7f YY XX XX XX XX : cmp DWORD PTR [r15+YY], XX XX XX XX
 878    case 0x247c81:  // 81 7c 24 YY XX XX XX XX : cmp DWORD PTR [rsp+YY], XX XX XX XX
 879      return 8;
 880      // clang-format on
 881
 882    case 0x058b48:    // 48 8b 05 XX XX XX XX :
 883                      //   mov rax, QWORD PTR [rip + XXXXXXXX]
 884    case 0x058d48:    // 48 8d 05 XX XX XX XX :
 885                      //   lea rax, QWORD PTR [rip + XXXXXXXX]
 886    case 0x0d8948:    // 48 89 0d XX XX XX XX :
 887                      //   mov QWORD PTR [rip + XXXXXXXX], rcx
 888    case 0x158948:    // 48 89 15 XX XX XX XX :
 889                      //   mov QWORD PTR [rip + XXXXXXXX], rdx
 890    case 0x25ff48:    // 48 ff 25 XX XX XX XX :
 891                      //   rex.W jmp QWORD PTR [rip + XXXXXXXX]
 892    case 0x158D4C:    // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
 893      // Instructions having offset relative to 'rip' need offset adjustment.
 894      if (rel_offset)
 895        *rel_offset = 3;
 896      return 7;
 897
 898    case 0x2444c7:    // C7 44 24 XX YY YY YY YY
 899                      //   mov dword ptr [rsp + XX], YYYYYYYY
 900      return 8;
 901
 902    case 0x7c8141:  // 41 81 7c ZZ YY XX XX XX XX
 903                    // cmp DWORD PTR [reg+reg*n+YY], XX XX XX XX
 904      return 9;
 905  }
 906
 907  switch (*(u32*)(address)) {
 908    case 0x01b60f44:  // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
 909    case 0x09b60f44:  // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
 910    case 0x0ab60f44:  // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
 911    case 0x11b60f44:  // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
 912    case 0x1ab60f44:  // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
 913      return 4;
 914    case 0x24448b48:  // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
 915    case 0x246c8948:  // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
 916    case 0x245c8948:  // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
 917    case 0x24748948:  // 48 89 74 24 XX : mov QWORD PTR [rsp + XX], rsi
 918    case 0x247c8948:  // 48 89 7c 24 XX : mov QWORD PTR [rsp + XX], rdi
 919    case 0x244C8948:  // 48 89 4C 24 XX : mov QWORD PTR [rsp + XX], rcx
 920    case 0x24548948:  // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
 921    case 0x244c894c:  // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
 922    case 0x2444894c:  // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
 923    case 0x244c8944:  // 44 89 4c 24 XX   mov DWORD PTR [rsp + XX], r9d
 924    case 0x24448944:  // 44 89 44 24 XX   mov DWORD PTR [rsp + XX], r8d
 925    case 0x246c8d48:  // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
 926      return 5;
 927    case 0x24648348:  // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
 928      return 6;
 929    case 0x24A48D48:  // 48 8D A4 24 XX XX XX XX : lea rsp, [rsp + XX XX XX XX]
 930      return 8;
 931  }
 932
 933  switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
 934    case 0xC07E0F4866:  // 66 48 0F 7E C0 : movq rax, xmm0
 935      return 5;
 936  }
 937
 938#else
 939
 940  switch (*(u8*)address) {
 941    case 0xA1:  // A1 XX XX XX XX :  mov eax, dword ptr ds:[XXXXXXXX]
 942      return 5;
 943  }
 944  switch (*(u16*)address) {
 945    case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
 946    case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
 947    case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
 948    case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
 949    case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
 950      return 3;
 951    case 0xC1F7:  // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
 952      return 6;
 953    case 0x3D83:  // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
 954      return 7;
 955    case 0x7D83:  // 83 7D XX YY : cmp dword ptr [ebp + XX], YY
 956      return 4;
 957  }
 958
 959  switch (0x00FFFFFF & *(u32*)address) {
 960    case 0x24448A:  // 8A 44 24 XX : mov eal, dword ptr [esp + XX]
 961    case 0x24448B:  // 8B 44 24 XX : mov eax, dword ptr [esp + XX]
 962    case 0x244C8B:  // 8B 4C 24 XX : mov ecx, dword ptr [esp + XX]
 963    case 0x24548B:  // 8B 54 24 XX : mov edx, dword ptr [esp + XX]
 964    case 0x245C8B:  // 8B 5C 24 XX : mov ebx, dword ptr [esp + XX]
 965    case 0x246C8B:  // 8B 6C 24 XX : mov ebp, dword ptr [esp + XX]
 966    case 0x24748B:  // 8B 74 24 XX : mov esi, dword ptr [esp + XX]
 967    case 0x247C8B:  // 8B 7C 24 XX : mov edi, dword ptr [esp + XX]
 968      return 4;
 969  }
 970
 971  switch (*(u32*)address) {
 972    case 0x2444B60F:  // 0F B6 44 24 XX : movzx eax, byte ptr [esp + XX]
 973      return 5;
 974  }
 975#endif
 976
 977  // Unknown instruction! This might happen when we add a new interceptor, use
 978  // a new compiler version, or if Windows changed how some functions are
 979  // compiled. In either case, we print the address and 8 bytes of instructions
 980  // to notify the user about the error and to help identify the unknown
 981  // instruction. Don't treat this as a fatal error, though we can break the
 982  // debugger if one has been attached.
 983  u8 *bytes = (u8 *)address;
 984  ReportError(
 985      "interception_win: unhandled instruction at %p: %02x %02x %02x %02x %02x "
 986      "%02x %02x %02x\n",
 987      (void *)address, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4],
 988      bytes[5], bytes[6], bytes[7]);
 989  if (::IsDebuggerPresent())
 990    __debugbreak();
 991  return 0;
 992}
 993
 994size_t TestOnlyGetInstructionSize(uptr address, size_t *rel_offset) {
 995  return GetInstructionSize(address, rel_offset);
 996}
 997
 998// Returns 0 on error.
 999static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
1000  size_t cursor = 0;
1001  while (cursor < size) {
1002    size_t instruction_size = GetInstructionSize(address + cursor);
1003    if (!instruction_size)
1004      return 0;
1005    cursor += instruction_size;
1006  }
1007  return cursor;
1008}
1009
1010static bool CopyInstructions(uptr to, uptr from, size_t size) {
1011  size_t cursor = 0;
1012  while (cursor != size) {
1013    size_t rel_offset = 0;
1014    size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
1015    if (!instruction_size)
1016      return false;
1017    _memcpy((void *)(to + cursor), (void *)(from + cursor),
1018            (size_t)instruction_size);
1019    if (rel_offset) {
1020#  if SANITIZER_WINDOWS64
1021      // we want to make sure that the new relative offset still fits in 32-bits
1022      // this will be untrue if relocated_offset \notin [-2**31, 2**31)
1023      s64 delta = to - from;
1024      s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1025      if (-0x8000'0000ll > relocated_offset ||
1026          relocated_offset > 0x7FFF'FFFFll) {
1027        ReportError(
1028            "interception_win: CopyInstructions relocated_offset %lld outside "
1029            "32-bit range\n",
1030            (long long)relocated_offset);
1031        return false;
1032      }
1033#  else
1034      // on 32-bit, the relative offset will always be correct
1035      s32 delta = to - from;
1036      s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
1037#  endif
1038      *(s32 *)(to + cursor + rel_offset) = relocated_offset;
1039    }
1040    cursor += instruction_size;
1041  }
1042  return true;
1043}
1044
1045
1046#if !SANITIZER_WINDOWS64
1047bool OverrideFunctionWithDetour(
1048    uptr old_func, uptr new_func, uptr *orig_old_func) {
1049  const int kDetourHeaderLen = 5;
1050  const u16 kDetourInstruction = 0xFF8B;
1051
1052  uptr header = (uptr)old_func - kDetourHeaderLen;
1053  uptr patch_length = kDetourHeaderLen + kShortJumpInstructionLength;
1054
1055  // Validate that the function is hookable.
1056  if (*(u16*)old_func != kDetourInstruction ||
1057      !IsMemoryPadding(header, kDetourHeaderLen))
1058    return false;
1059
1060  // Change memory protection to writable.
1061  DWORD protection = 0;
1062  if (!ChangeMemoryProtection(header, patch_length, &protection))
1063    return false;
1064
1065  // Write a relative jump to the redirected function.
1066  WriteJumpInstruction(header, new_func);
1067
1068  // Write the short jump to the function prefix.
1069  WriteShortJumpInstruction(old_func, header);
1070
1071  // Restore previous memory protection.
1072  if (!RestoreMemoryProtection(header, patch_length, protection))
1073    return false;
1074
1075  if (orig_old_func)
1076    *orig_old_func = old_func + kShortJumpInstructionLength;
1077
1078  return true;
1079}
1080#endif
1081
1082bool OverrideFunctionWithRedirectJump(
1083    uptr old_func, uptr new_func, uptr *orig_old_func) {
1084  // Check whether the first instruction is a relative jump.
1085  if (*(u8*)old_func != 0xE9)
1086    return false;
1087
1088  if (orig_old_func) {
1089    sptr relative_offset = *(s32 *)(old_func + 1);
1090    uptr absolute_target = old_func + relative_offset + kJumpInstructionLength;
1091    *orig_old_func = absolute_target;
1092  }
1093
1094#if SANITIZER_WINDOWS64
1095  // If needed, get memory space for a trampoline jump.
1096  uptr trampoline = AllocateMemoryForTrampoline(old_func, kDirectBranchLength);
1097  if (!trampoline)
1098    return false;
1099  WriteDirectBranch(trampoline, new_func);
1100#endif
1101
1102  // Change memory protection to writable.
1103  DWORD protection = 0;
1104  if (!ChangeMemoryProtection(old_func, kJumpInstructionLength, &protection))
1105    return false;
1106
1107  // Write a relative jump to the redirected function.
1108  WriteJumpInstruction(old_func, FIRST_32_SECOND_64(new_func, trampoline));
1109
1110  // Restore previous memory protection.
1111  if (!RestoreMemoryProtection(old_func, kJumpInstructionLength, protection))
1112    return false;
1113
1114  return true;
1115}
1116
1117bool OverrideFunctionWithHotPatch(
1118    uptr old_func, uptr new_func, uptr *orig_old_func) {
1119  const int kHotPatchHeaderLen = kBranchLength;
1120
1121  uptr header = (uptr)old_func - kHotPatchHeaderLen;
1122  uptr patch_length = kHotPatchHeaderLen + kShortJumpInstructionLength;
1123
1124  // Validate that the function is hot patchable.
1125  size_t instruction_size = GetInstructionSize(old_func);
1126  if (instruction_size < kShortJumpInstructionLength ||
1127      !FunctionHasPadding(old_func, kHotPatchHeaderLen))
1128    return false;
1129
1130  if (orig_old_func) {
1131    // Put the needed instructions into the trampoline bytes.
1132    uptr trampoline_length = instruction_size + kDirectBranchLength;
1133    uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1134    if (!trampoline)
1135      return false;
1136    if (!CopyInstructions(trampoline, old_func, instruction_size))
1137      return false;
1138    WriteDirectBranch(trampoline + instruction_size,
1139                      old_func + instruction_size);
1140    *orig_old_func = trampoline;
1141  }
1142
1143  // If needed, get memory space for indirect address.
1144  uptr indirect_address = 0;
1145#if SANITIZER_WINDOWS64
1146  indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1147  if (!indirect_address)
1148    return false;
1149#endif
1150
1151  // Change memory protection to writable.
1152  DWORD protection = 0;
1153  if (!ChangeMemoryProtection(header, patch_length, &protection))
1154    return false;
1155
1156  // Write jumps to the redirected function.
1157  WriteBranch(header, indirect_address, new_func);
1158  WriteShortJumpInstruction(old_func, header);
1159
1160  // Restore previous memory protection.
1161  if (!RestoreMemoryProtection(header, patch_length, protection))
1162    return false;
1163
1164  return true;
1165}
1166
1167bool OverrideFunctionWithTrampoline(
1168    uptr old_func, uptr new_func, uptr *orig_old_func) {
1169
1170  size_t instructions_length = kBranchLength;
1171  size_t padding_length = 0;
1172  uptr indirect_address = 0;
1173
1174  if (orig_old_func) {
1175    // Find out the number of bytes of the instructions we need to copy
1176    // to the trampoline.
1177    instructions_length = RoundUpToInstrBoundary(kBranchLength, old_func);
1178    if (!instructions_length)
1179      return false;
1180
1181    // Put the needed instructions into the trampoline bytes.
1182    uptr trampoline_length = instructions_length + kDirectBranchLength;
1183    uptr trampoline = AllocateMemoryForTrampoline(old_func, trampoline_length);
1184    if (!trampoline)
1185      return false;
1186    if (!CopyInstructions(trampoline, old_func, instructions_length))
1187      return false;
1188    WriteDirectBranch(trampoline + instructions_length,
1189                      old_func + instructions_length);
1190    *orig_old_func = trampoline;
1191  }
1192
1193#if SANITIZER_WINDOWS64
1194  // Check if the targeted address can be encoded in the function padding.
1195  // Otherwise, allocate it in the trampoline region.
1196  if (IsMemoryPadding(old_func - kAddressLength, kAddressLength)) {
1197    indirect_address = old_func - kAddressLength;
1198    padding_length = kAddressLength;
1199  } else {
1200    indirect_address = AllocateMemoryForTrampoline(old_func, kAddressLength);
1201    if (!indirect_address)
1202      return false;
1203  }
1204#endif
1205
1206  // Change memory protection to writable.
1207  uptr patch_address = old_func - padding_length;
1208  uptr patch_length = instructions_length + padding_length;
1209  DWORD protection = 0;
1210  if (!ChangeMemoryProtection(patch_address, patch_length, &protection))
1211    return false;
1212
1213  // Patch the original function.
1214  WriteBranch(old_func, indirect_address, new_func);
1215
1216  // Restore previous memory protection.
1217  if (!RestoreMemoryProtection(patch_address, patch_length, protection))
1218    return false;
1219
1220  return true;
1221}
1222
1223bool OverrideFunction(
1224    uptr old_func, uptr new_func, uptr *orig_old_func) {
1225#if !SANITIZER_WINDOWS64
1226  if (OverrideFunctionWithDetour(old_func, new_func, orig_old_func))
1227    return true;
1228#endif
1229  if (OverrideFunctionWithRedirectJump(old_func, new_func, orig_old_func))
1230    return true;
1231  if (OverrideFunctionWithHotPatch(old_func, new_func, orig_old_func))
1232    return true;
1233  if (OverrideFunctionWithTrampoline(old_func, new_func, orig_old_func))
1234    return true;
1235  return false;
1236}
1237
1238static void **InterestingDLLsAvailable() {
1239  static const char *InterestingDLLs[] = {
1240    "kernel32.dll",
1241    "msvcr100d.dll",      // VS2010
1242    "msvcr110d.dll",      // VS2012
1243    "msvcr120d.dll",      // VS2013
1244    "vcruntime140d.dll",  // VS2015
1245    "ucrtbased.dll",      // Universal CRT
1246    "msvcr100.dll",       // VS2010
1247    "msvcr110.dll",       // VS2012
1248    "msvcr120.dll",       // VS2013
1249    "vcruntime140.dll",   // VS2015
1250    "ucrtbase.dll",       // Universal CRT
1251#  if (defined(__MINGW32__) && defined(__i386__))
1252    "libc++.dll",     // libc++
1253    "libunwind.dll",  // libunwind
1254#  endif
1255    // NTDLL must go last as it gets special treatment in OverrideFunction.
1256    "ntdll.dll",
1257    NULL
1258  };
1259  static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
1260  if (!result[0]) {
1261    for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
1262      if (HMODULE h = GetModuleHandleA(InterestingDLLs[i]))
1263        result[j++] = (void *)h;
1264    }
1265  }
1266  return &result[0];
1267}
1268
1269namespace {
1270// Utility for reading loaded PE images.
1271template <typename T> class RVAPtr {
1272 public:
1273  RVAPtr(void *module, uptr rva)
1274      : ptr_(reinterpret_cast<T *>(reinterpret_cast<char *>(module) + rva)) {}
1275  operator T *() { return ptr_; }
1276  T *operator->() { return ptr_; }
1277  T *operator++() { return ++ptr_; }
1278
1279 private:
1280  T *ptr_;
1281};
1282} // namespace
1283
1284// Internal implementation of GetProcAddress. At least since Windows 8,
1285// GetProcAddress appears to initialize DLLs before returning function pointers
1286// into them. This is problematic for the sanitizers, because they typically
1287// want to intercept malloc *before* MSVCRT initializes. Our internal
1288// implementation walks the export list manually without doing initialization.
1289uptr InternalGetProcAddress(void *module, const char *func_name) {
1290  // Check that the module header is full and present.
1291  RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1292  RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1293  if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE ||  // "MZ"
1294      headers->Signature != IMAGE_NT_SIGNATURE ||             // "PE\0\0"
1295      headers->FileHeader.SizeOfOptionalHeader <
1296          sizeof(IMAGE_OPTIONAL_HEADER)) {
1297    return 0;
1298  }
1299
1300  IMAGE_DATA_DIRECTORY *export_directory =
1301      &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
1302  if (export_directory->Size == 0)
1303    return 0;
1304  RVAPtr<IMAGE_EXPORT_DIRECTORY> exports(module,
1305                                         export_directory->VirtualAddress);
1306  RVAPtr<DWORD> functions(module, exports->AddressOfFunctions);
1307  RVAPtr<DWORD> names(module, exports->AddressOfNames);
1308  RVAPtr<WORD> ordinals(module, exports->AddressOfNameOrdinals);
1309
1310  for (DWORD i = 0; i < exports->NumberOfNames; i++) {
1311    RVAPtr<char> name(module, names[i]);
1312    if (!_strcmp(func_name, name)) {
1313      DWORD index = ordinals[i];
1314      RVAPtr<char> func(module, functions[index]);
1315
1316      // Handle forwarded functions.
1317      DWORD offset = functions[index];
1318      if (offset >= export_directory->VirtualAddress &&
1319          offset < export_directory->VirtualAddress + export_directory->Size) {
1320        // An entry for a forwarded function is a string with the following
1321        // format: "<module> . <function_name>" that is stored into the
1322        // exported directory.
1323        char function_name[256];
1324        size_t funtion_name_length = _strlen(func);
1325        if (funtion_name_length >= sizeof(function_name) - 1) {
1326          ReportError("interception_win: func too long: '%s'\n", (char *)func);
1327          InterceptionFailed();
1328        }
1329
1330        _memcpy(function_name, func, funtion_name_length);
1331        function_name[funtion_name_length] = '\0';
1332        char* separator = _strchr(function_name, '.');
1333        if (!separator) {
1334          ReportError("interception_win: no separator in '%s'\n",
1335                      function_name);
1336          InterceptionFailed();
1337        }
1338        *separator = '\0';
1339
1340        void* redirected_module = GetModuleHandleA(function_name);
1341        if (!redirected_module) {
1342          ReportError("interception_win: GetModuleHandleA failed for '%s'\n",
1343                      function_name);
1344          InterceptionFailed();
1345        }
1346        return InternalGetProcAddress(redirected_module, separator + 1);
1347      }
1348
1349      return (uptr)(char *)func;
1350    }
1351  }
1352
1353  return 0;
1354}
1355
1356bool OverrideFunction(
1357    const char *func_name, uptr new_func, uptr *orig_old_func) {
1358  static const char *kNtDllIgnore[] = {
1359    "memcmp", "memcpy", "memmove", "memset"
1360  };
1361
1362  bool hooked = false;
1363  void **DLLs = InterestingDLLsAvailable();
1364  for (size_t i = 0; DLLs[i]; ++i) {
1365    if (DLLs[i + 1] == nullptr) {
1366      // This is the last DLL, i.e. NTDLL. It exports some functions that
1367      // we only want to override in the CRT.
1368      for (const char *ignored : kNtDllIgnore) {
1369        if (_strcmp(func_name, ignored) == 0)
1370          return hooked;
1371      }
1372    }
1373
1374    uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
1375    if (func_addr &&
1376        OverrideFunction(func_addr, new_func, orig_old_func)) {
1377      hooked = true;
1378    }
1379  }
1380  return hooked;
1381}
1382
1383bool OverrideImportedFunction(const char *module_to_patch,
1384                              const char *imported_module,
1385                              const char *function_name, uptr new_function,
1386                              uptr *orig_old_func) {
1387  HMODULE module = GetModuleHandleA(module_to_patch);
1388  if (!module)
1389    return false;
1390
1391  // Check that the module header is full and present.
1392  RVAPtr<IMAGE_DOS_HEADER> dos_stub(module, 0);
1393  RVAPtr<IMAGE_NT_HEADERS> headers(module, dos_stub->e_lfanew);
1394  if (!module || dos_stub->e_magic != IMAGE_DOS_SIGNATURE ||  // "MZ"
1395      headers->Signature != IMAGE_NT_SIGNATURE ||             // "PE\0\0"
1396      headers->FileHeader.SizeOfOptionalHeader <
1397          sizeof(IMAGE_OPTIONAL_HEADER)) {
1398    return false;
1399  }
1400
1401  IMAGE_DATA_DIRECTORY *import_directory =
1402      &headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
1403
1404  // Iterate the list of imported DLLs. FirstThunk will be null for the last
1405  // entry.
1406  RVAPtr<IMAGE_IMPORT_DESCRIPTOR> imports(module,
1407                                          import_directory->VirtualAddress);
1408  for (; imports->FirstThunk != 0; ++imports) {
1409    RVAPtr<const char> modname(module, imports->Name);
1410    if (_stricmp(&*modname, imported_module) == 0)
1411      break;
1412  }
1413  if (imports->FirstThunk == 0)
1414    return false;
1415
1416  // We have two parallel arrays: the import address table (IAT) and the table
1417  // of names. They start out containing the same data, but the loader rewrites
1418  // the IAT to hold imported addresses and leaves the name table in
1419  // OriginalFirstThunk alone.
1420  RVAPtr<IMAGE_THUNK_DATA> name_table(module, imports->OriginalFirstThunk);
1421  RVAPtr<IMAGE_THUNK_DATA> iat(module, imports->FirstThunk);
1422  for (; name_table->u1.Ordinal != 0; ++name_table, ++iat) {
1423    if (!IMAGE_SNAP_BY_ORDINAL(name_table->u1.Ordinal)) {
1424      RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
1425          module, name_table->u1.ForwarderString);
1426      const char *funcname = &import_by_name->Name[0];
1427      if (_strcmp(funcname, function_name) == 0)
1428        break;
1429    }
1430  }
1431  if (name_table->u1.Ordinal == 0)
1432    return false;
1433
1434  // Now we have the correct IAT entry. Do the swap. We have to make the page
1435  // read/write first.
1436  if (orig_old_func)
1437    *orig_old_func = iat->u1.AddressOfData;
1438  DWORD old_prot, unused_prot;
1439  if (!VirtualProtect(&iat->u1.AddressOfData, 4, PAGE_EXECUTE_READWRITE,
1440                      &old_prot))
1441    return false;
1442  iat->u1.AddressOfData = new_function;
1443  if (!VirtualProtect(&iat->u1.AddressOfData, 4, old_prot, &unused_prot))
1444    return false;  // Not clear if this failure bothers us.
1445  return true;
1446}
1447
1448}  // namespace __interception
1449
1450#endif  // SANITIZER_WINDOWS