Commit e99c11856d

Alex Rønne Petersen <alex@alexrp.com>
2025-02-05 13:36:56
libtsan: Update to LLVM 20.
1 parent d67d52a
Changed files (83)
lib
tsan
builtins
interception
sanitizer_common
src
lib/tsan/builtins/assembly.h
@@ -290,4 +290,16 @@
   CFI_END
 #endif
 
+#ifdef __arm__
+#include "int_endianness.h"
+
+#if _YUGA_BIG_ENDIAN
+#define VMOV_TO_DOUBLE(dst, src0, src1) vmov dst, src1, src0 SEPARATOR
+#define VMOV_FROM_DOUBLE(dst0, dst1, src) vmov dst1, dst0, src SEPARATOR
+#else
+#define VMOV_TO_DOUBLE(dst, src0, src1) vmov dst, src0, src1 SEPARATOR
+#define VMOV_FROM_DOUBLE(dst0, dst1, src) vmov dst0, dst1, src SEPARATOR
+#endif
+#endif
+
 #endif // COMPILERRT_ASSEMBLY_H
lib/tsan/interception/interception.h
@@ -25,8 +25,19 @@
 
 // These typedefs should be used only in the interceptor definitions to replace
 // the standard system types (e.g. SSIZE_T instead of ssize_t)
-typedef __sanitizer::uptr    SIZE_T;
-typedef __sanitizer::sptr    SSIZE_T;
+// On Windows the system headers (basetsd.h) provide a conflicting definition
+// of SIZE_T/SSIZE_T that do not match the real size_t/ssize_t for 32-bit
+// systems (using long instead of the expected int). Work around the typedef
+// redefinition by #defining SIZE_T instead of using a typedef.
+// TODO: We should be using __sanitizer::usize (and a new ssize) instead of
+// these new macros as long as we ensure they match the real system definitions.
+#if SANITIZER_WINDOWS
+// Ensure that (S)SIZE_T were already defined as we are about to override them.
+#  include <basetsd.h>
+#endif
+
+#define SIZE_T __sanitizer::usize
+#define SSIZE_T __sanitizer::ssize
 typedef __sanitizer::sptr    PTRDIFF_T;
 typedef __sanitizer::s64     INTMAX_T;
 typedef __sanitizer::u64     UINTMAX_T;
@@ -338,16 +349,8 @@ const interpose_substitution substitution_##func_name[]             \
 #endif
 
 // ISO C++ forbids casting between pointer-to-function and pointer-to-object,
-// so we use casting via an integral type __interception::uptr,
-// assuming that system is POSIX-compliant. Using other hacks seem
-// challenging, as we don't even pass function type to
-// INTERCEPT_FUNCTION macro, only its name.
+// so we use casts via uintptr_t (the local __sanitizer::uptr equivalent).
 namespace __interception {
-#if defined(_WIN64)
-typedef unsigned long long uptr;
-#else
-typedef unsigned long uptr;
-#endif  // _WIN64
 
 #if defined(__ELF__) && !SANITIZER_FUCHSIA
 // The use of interceptors makes many sanitizers unusable for static linking.
lib/tsan/interception/interception_type_test.cpp
@@ -12,28 +12,35 @@
 //===----------------------------------------------------------------------===//
 
 #include "interception.h"
+#include "sanitizer_common/sanitizer_type_traits.h"
 
-#if SANITIZER_LINUX || SANITIZER_APPLE
-
-#include <sys/types.h>
+#if __has_include(<sys/types.h>)
+#  include <sys/types.h>
+#endif
 #include <stddef.h>
 #include <stdint.h>
 
-COMPILER_CHECK(sizeof(::SIZE_T) == sizeof(size_t));
-COMPILER_CHECK(sizeof(::SSIZE_T) == sizeof(ssize_t));
-COMPILER_CHECK(sizeof(::PTRDIFF_T) == sizeof(ptrdiff_t));
+COMPILER_CHECK((__sanitizer::is_same<__sanitizer::uptr, ::uintptr_t>::value));
+COMPILER_CHECK((__sanitizer::is_same<__sanitizer::sptr, ::intptr_t>::value));
+COMPILER_CHECK((__sanitizer::is_same<__sanitizer::usize, ::size_t>::value));
+COMPILER_CHECK((__sanitizer::is_same<::PTRDIFF_T, ::ptrdiff_t>::value));
+COMPILER_CHECK((__sanitizer::is_same<::SIZE_T, ::size_t>::value));
+#if !SANITIZER_WINDOWS
+// No ssize_t on Windows.
+COMPILER_CHECK((__sanitizer::is_same<::SSIZE_T, ::ssize_t>::value));
+#endif
+// TODO: These are not actually the same type on Linux (long vs long long)
 COMPILER_CHECK(sizeof(::INTMAX_T) == sizeof(intmax_t));
+COMPILER_CHECK(sizeof(::UINTMAX_T) == sizeof(uintmax_t));
 
-#  if SANITIZER_GLIBC || SANITIZER_ANDROID
+#if SANITIZER_GLIBC || SANITIZER_ANDROID
 COMPILER_CHECK(sizeof(::OFF64_T) == sizeof(off64_t));
-#  endif
+#endif
 
 // The following are the cases when pread (and friends) is used instead of
 // pread64. In those cases we need OFF_T to match off_t. We don't care about the
 // rest (they depend on _FILE_OFFSET_BITS setting when building an application).
-# if SANITIZER_ANDROID || !defined _FILE_OFFSET_BITS || \
-  _FILE_OFFSET_BITS != 64
+#if !SANITIZER_WINDOWS && (SANITIZER_ANDROID || !defined _FILE_OFFSET_BITS || \
+                           _FILE_OFFSET_BITS != 64)
 COMPILER_CHECK(sizeof(::OFF_T) == sizeof(off_t));
-# endif
-
 #endif
lib/tsan/interception/interception_win.cpp
@@ -27,7 +27,7 @@
 //
 // 1) Detour
 //
-//    The Detour hooking technique is assuming the presence of an header with
+//    The Detour hooking technique is assuming the presence of a header with
 //    padding and an overridable 2-bytes nop instruction (mov edi, edi). The
 //    nop instruction can safely be replaced by a 2-bytes jump without any need
 //    to save the instruction. A jump to the target is encoded in the function
@@ -47,7 +47,7 @@
 //
 //        func:  jmp <label>     -->     func:  jmp <hook>
 //
-//    On an 64-bit architecture, a trampoline is inserted.
+//    On a 64-bit architecture, a trampoline is inserted.
 //
 //        func:  jmp <label>     -->     func:  jmp <tramp>
 //                                              [...]
@@ -60,7 +60,7 @@
 //
 // 3) HotPatch
 //
-//    The HotPatch hooking is assuming the presence of an header with padding
+//    The HotPatch hooking is assuming the presence of a header with padding
 //    and a first instruction with at least 2-bytes.
 //
 //    The reason to enforce the 2-bytes limitation is to provide the minimal
@@ -80,7 +80,7 @@
 //                                       real:  <instr>
 //                                              jmp <body>
 //
-//    On an 64-bit architecture:
+//    On a 64-bit architecture:
 //
 //        head:   6 x nop                head:  jmp QWORD [addr1]
 //        func:   <instr>        -->     func:  jmp short <head>
@@ -110,7 +110,7 @@
 //                                              <instr>
 //                                              jmp <body>
 //
-//    On an 64-bit architecture:
+//    On a 64-bit architecture:
 //
 //        func:   <instr>        -->     func:  jmp QWORD [addr1]
 //                <instr>
@@ -130,6 +130,7 @@
 #include "sanitizer_common/sanitizer_platform.h"
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
+#include <psapi.h>
 
 namespace __interception {
 
@@ -186,8 +187,12 @@ static uptr GetMmapGranularity() {
   return si.dwAllocationGranularity;
 }
 
+UNUSED static uptr RoundDownTo(uptr size, uptr boundary) {
+  return size & ~(boundary - 1);
+}
+
 UNUSED static uptr RoundUpTo(uptr size, uptr boundary) {
-  return (size + boundary - 1) & ~(boundary - 1);
+  return RoundDownTo(size + boundary - 1, boundary);
 }
 
 // FIXME: internal_str* and internal_mem* functions should be moved from the
@@ -208,6 +213,18 @@ static char* _strchr(char* str, char c) {
   return nullptr;
 }
 
+static int _strcmp(const char *s1, const char *s2) {
+  while (true) {
+    unsigned c1 = *s1;
+    unsigned c2 = *s2;
+    if (c1 != c2) return (c1 < c2) ? -1 : 1;
+    if (c1 == 0) break;
+    s1++;
+    s2++;
+  }
+  return 0;
+}
+
 static void _memset(void *p, int value, size_t sz) {
   for (size_t i = 0; i < sz; ++i)
     ((char*)p)[i] = (char)value;
@@ -284,8 +301,11 @@ static void WriteJumpInstruction(uptr from, uptr target) {
 
 static void WriteShortJumpInstruction(uptr from, uptr target) {
   sptr offset = target - from - kShortJumpInstructionLength;
-  if (offset < -128 || offset > 127)
+  if (offset < -128 || offset > 127) {
+    ReportError("interception_win: cannot write short jmp from %p to %p\n",
+                (void *)from, (void *)target);
     InterceptionFailed();
+  }
   *(u8*)from = 0xEB;
   *(u8*)(from + 1) = (u8)offset;
 }
@@ -339,32 +359,78 @@ struct TrampolineMemoryRegion {
   uptr max_size;
 };
 
-UNUSED static const uptr kTrampolineScanLimitRange = 1ull << 31;  // 2 gig
+UNUSED static const uptr kTrampolineRangeLimit = 1ull << 31;  // 2 gig
 static const int kMaxTrampolineRegion = 1024;
 static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
 
-static void *AllocateTrampolineRegion(uptr image_address, size_t granularity) {
-#if SANITIZER_WINDOWS64
-  uptr address = image_address;
-  uptr scanned = 0;
-  while (scanned < kTrampolineScanLimitRange) {
+static void *AllocateTrampolineRegion(uptr min_addr, uptr max_addr,
+                                      uptr func_addr, size_t granularity) {
+#  if SANITIZER_WINDOWS64
+  // Clamp {min,max}_addr to the accessible address space.
+  SYSTEM_INFO system_info;
+  ::GetSystemInfo(&system_info);
+  uptr min_virtual_addr =
+      RoundUpTo((uptr)system_info.lpMinimumApplicationAddress, granularity);
+  uptr max_virtual_addr =
+      RoundDownTo((uptr)system_info.lpMaximumApplicationAddress, granularity);
+  if (min_addr < min_virtual_addr)
+    min_addr = min_virtual_addr;
+  if (max_addr > max_virtual_addr)
+    max_addr = max_virtual_addr;
+
+  // This loop probes the virtual address space to find free memory in the
+  // [min_addr, max_addr] interval. The search starts from func_addr and
+  // proceeds "outwards" towards the interval bounds using two probes, lo_addr
+  // and hi_addr, for addresses lower/higher than func_addr. At each step, it
+  // considers the probe closest to func_addr. If that address is not free, the
+  // probe is advanced (lower or higher depending on the probe) to the next
+  // memory block and the search continues.
+  uptr lo_addr = RoundDownTo(func_addr, granularity);
+  uptr hi_addr = RoundUpTo(func_addr, granularity);
+  while (lo_addr >= min_addr || hi_addr <= max_addr) {
+    // Consider the in-range address closest to func_addr.
+    uptr addr;
+    if (lo_addr < min_addr)
+      addr = hi_addr;
+    else if (hi_addr > max_addr)
+      addr = lo_addr;
+    else
+      addr = (hi_addr - func_addr < func_addr - lo_addr) ? hi_addr : lo_addr;
+
     MEMORY_BASIC_INFORMATION info;
-    if (!::VirtualQuery((void*)address, &info, sizeof(info)))
+    if (!::VirtualQuery((void *)addr, &info, sizeof(info))) {
+      ReportError(
+          "interception_win: VirtualQuery in AllocateTrampolineRegion failed "
+          "for %p\n",
+          (void *)addr);
       return nullptr;
+    }
 
-    // Check whether a region can be allocated at |address|.
+    // Check whether a region can be allocated at |addr|.
     if (info.State == MEM_FREE && info.RegionSize >= granularity) {
-      void *page = ::VirtualAlloc((void*)RoundUpTo(address, granularity),
-                                  granularity,
-                                  MEM_RESERVE | MEM_COMMIT,
-                                  PAGE_EXECUTE_READWRITE);
+      void *page =
+          ::VirtualAlloc((void *)addr, granularity, MEM_RESERVE | MEM_COMMIT,
+                         PAGE_EXECUTE_READWRITE);
+      if (page == nullptr)
+        ReportError(
+            "interception_win: VirtualAlloc in AllocateTrampolineRegion failed "
+            "for %p\n",
+            (void *)addr);
       return page;
     }
 
-    // Move to the next region.
-    address = (uptr)info.BaseAddress + info.RegionSize;
-    scanned += info.RegionSize;
+    if (addr == lo_addr)
+      lo_addr =
+          RoundDownTo((uptr)info.AllocationBase - granularity, granularity);
+    if (addr == hi_addr)
+      hi_addr =
+          RoundUpTo((uptr)info.BaseAddress + info.RegionSize, granularity);
   }
+
+  ReportError(
+      "interception_win: AllocateTrampolineRegion failed to find free memory; "
+      "min_addr: %p, max_addr: %p, func_addr: %p, granularity: %zu\n",
+      (void *)min_addr, (void *)max_addr, (void *)func_addr, granularity);
   return nullptr;
 #else
   return ::VirtualAlloc(nullptr,
@@ -385,15 +451,51 @@ void TestOnlyReleaseTrampolineRegions() {
   }
 }
 
-static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
-  // Find a region within 2G with enough space to allocate |size| bytes.
+static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
+#  if SANITIZER_WINDOWS64
+  uptr min_addr = func_address - kTrampolineRangeLimit;
+  uptr max_addr = func_address + kTrampolineRangeLimit - size;
+
+  // Allocate memory within 2GB of the module (DLL or EXE file) so that any
+  // address within the module can be referenced with PC-relative operands.
+  // This allows us to not just jump to the trampoline with a PC-relative
+  // offset, but to relocate any instructions that we copy to the trampoline
+  // which have references to the original module. If we can't find the base
+  // address of the module (e.g. if func_address is in mmap'ed memory), just
+  // stay within 2GB of func_address.
+  HMODULE module;
+  if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+                           GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                           (LPCWSTR)func_address, &module)) {
+    MODULEINFO module_info;
+    if (::GetModuleInformation(::GetCurrentProcess(), module,
+                                &module_info, sizeof(module_info))) {
+      min_addr = (uptr)module_info.lpBaseOfDll + module_info.SizeOfImage -
+                 kTrampolineRangeLimit;
+      max_addr = (uptr)module_info.lpBaseOfDll + kTrampolineRangeLimit - size;
+    }
+  }
+
+  // Check for overflow.
+  if (min_addr > func_address)
+    min_addr = 0;
+  if (max_addr < func_address)
+    max_addr = ~(uptr)0;
+#  else
+  uptr min_addr = 0;
+  uptr max_addr = ~min_addr;
+#  endif
+
+  // Find a region within [min_addr,max_addr] with enough space to allocate
+  // |size| bytes.
   TrampolineMemoryRegion *region = nullptr;
   for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
     TrampolineMemoryRegion* current = &TrampolineRegions[bucket];
     if (current->content == 0) {
       // No valid region found, allocate a new region.
       size_t bucket_size = GetMmapGranularity();
-      void *content = AllocateTrampolineRegion(image_address, bucket_size);
+      void *content = AllocateTrampolineRegion(min_addr, max_addr, func_address,
+                                               bucket_size);
       if (content == nullptr)
         return 0U;
 
@@ -403,13 +505,9 @@ static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
       region = current;
       break;
     } else if (current->max_size - current->allocated_size > size) {
-#if SANITIZER_WINDOWS64
-        // In 64-bits, the memory space must be allocated within 2G boundary.
-        uptr next_address = current->content + current->allocated_size;
-        if (next_address < image_address ||
-            next_address - image_address >= 0x7FFF0000)
-          continue;
-#endif
+      uptr next_address = current->content + current->allocated_size;
+      if (next_address < min_addr || next_address > max_addr)
+        continue;
       // The space can be allocated in the current region.
       region = current;
       break;
@@ -458,6 +556,10 @@ static const u8 kPrologueWithShortJump2[] = {
 
 // Returns 0 on error.
 static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
+  if (rel_offset) {
+    *rel_offset = 0;
+  }
+
 #if SANITIZER_ARM64
   // An ARM64 instruction is 4 bytes long.
   return 4;
@@ -497,8 +599,14 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x6A:  // 6A XX = push XX
       return 2;
 
+    // This instruction can be encoded with a 16-bit immediate but that is
+    // incredibly unlikely.
+    case 0x68:  // 68 XX XX XX XX : push imm32
+      return 5;
+
     case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
     case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
+    case 0xBA:  // ba XX XX XX XX : mov edx, XX XX XX XX
       return 5;
 
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
@@ -529,19 +637,43 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xFF8B:  // 8B FF : mov edi, edi
     case 0xEC8B:  // 8B EC : mov ebp, esp
     case 0xc889:  // 89 C8 : mov eax, ecx
+    case 0xD189:  // 89 D1 : mov ecx, edx
     case 0xE589:  // 89 E5 : mov ebp, esp
     case 0xC18B:  // 8B C1 : mov eax, ecx
+    case 0xC031:  // 31 C0 : xor eax, eax
+    case 0xC931:  // 31 C9 : xor ecx, ecx
+    case 0xD231:  // 31 D2 : xor edx, edx
     case 0xC033:  // 33 C0 : xor eax, eax
     case 0xC933:  // 33 C9 : xor ecx, ecx
     case 0xD233:  // 33 D2 : xor edx, edx
+    case 0xDB84:  // 84 DB : test bl,bl
+    case 0xC084:  // 84 C0 : test al,al
+    case 0xC984:  // 84 C9 : test cl,cl
+    case 0xD284:  // 84 D2 : test dl,dl
       return 2;
 
+    case 0x3980:  // 80 39 XX : cmp BYTE PTR [rcx], XX
+    case 0x4D8B:  // 8B 4D XX : mov XX(%ebp), ecx
+    case 0x558B:  // 8B 55 XX : mov XX(%ebp), edx
+    case 0x758B:  // 8B 75 XX : mov XX(%ebp), esp
+    case 0xE483:  // 83 E4 XX : and esp, XX
+    case 0xEC83:  // 83 EC XX : sub esp, XX
+    case 0xC1F6:  // F6 C1 XX : test cl, XX
+      return 3;
+
+    case 0x89FF:  // FF 89 XX XX XX XX : dec dword ptr [ecx + XX XX XX XX]
+    case 0xEC81:  // 81 EC XX XX XX XX : sub esp, XX XX XX XX
+      return 6;
+
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
-    case 0x25FF:  // FF 25 XX XX XX XX : jmp [XXXXXXXX]
+    case 0x25FF:  // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
       return 0;
   }
 
-  switch (0x00FFFFFF & *(u32*)address) {
+  switch (0x00FFFFFF & *(u32 *)address) {
+    case 0x244C8D:  // 8D 4C 24 XX : lea ecx, [esp + XX]
+    case 0x2474FF:  // FF 74 24 XX : push qword ptr [rsp + XX]
+      return 4;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
   }
@@ -556,6 +688,21 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
       return 9;
+    case 0xF2:
+      switch (*(u32 *)(address + 1)) {
+          case 0x2444110f:  //  f2 0f 11 44 24 XX       movsd  QWORD PTR
+                            //  [rsp + XX], xmm0
+          case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
+                            //  [rsp + XX], xmm1
+          case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
+                            //  [rsp + XX], xmm2
+          case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
+                            //  [rsp + XX], xmm3
+          case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
+                            //  [rsp + XX], xmm4
+            return 6;
+      }
+      break;
 
     case 0x83:
       const u8 next_byte = *(u8*)(address + 1);
@@ -584,55 +731,155 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x018a:  // mov al, byte ptr [rcx]
       return 2;
 
+    case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
+    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rbp+YY], XX
+    case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
+    case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
+    case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
+      return 4;
+
     case 0x058A:  // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
     case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
       if (rel_offset)
         *rel_offset = 2;
+    case 0xB841:  // 41 B8 XX XX XX XX : mov r8d, XX XX XX XX
       return 6;
+
+    case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
+    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rbp+YY], XX XX XX XX
+    case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
+    case 0x7B81:  // 81 7B YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
+    case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
+      return 7;
   }
 
-  switch (0x00FFFFFF & *(u32*)address) {
-    case 0xe58948:    // 48 8b c4 : mov rbp, rsp
-    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
-    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
-    case 0xd9f748:    // 48 f7 d9 : neg rcx
-    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
-    case 0x07c1f6:    // f6 c1 07 : test cl, 0x7
-    case 0xc98548:    // 48 85 C9 : test rcx, rcx
-    case 0xd28548:    // 48 85 d2 : test rdx, rdx
+  switch (0x00FFFFFF & *(u32 *)address) {
+    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
+    case 0xc00b4d:    // 4d 0b c0 : or r8, r8
+    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
+    case 0xc08548:    // 48 85 c0 : test rax, rax
     case 0xc0854d:    // 4d 85 c0 : test r8, r8
+    case 0xc08b41:    // 41 8b c0 : mov eax, r8d
+    case 0xc0ff48:    // 48 ff c0 : inc rax
+    case 0xc0ff49:    // 49 ff c0 : inc r8
+    case 0xc18b41:    // 41 8b c1 : mov eax, r9d
+    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
+    case 0xc18b4c:    // 4c 8b c1 : mov r8, rcx
+    case 0xc1ff48:    // 48 ff c1 : inc rcx
+    case 0xc1ff49:    // 49 ff c1 : inc r9
+    case 0xc28b41:    // 41 8b c2 : mov eax, r10d
+    case 0x01b60f:    // 0f b6 01 : movzx eax, BYTE PTR [rcx]
+    case 0x09b60f:    // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
+    case 0x11b60f:    // 0f b6 11 : movzx edx, BYTE PTR [rcx]
     case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
-    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
+    case 0xc2ff48:    // 48 ff c2 : inc rdx
+    case 0xc2ff49:    // 49 ff c2 : inc r10
+    case 0xc38b41:    // 41 8b c3 : mov eax, r11d
+    case 0xc3ff48:    // 48 ff c3 : inc rbx
+    case 0xc3ff49:    // 49 ff c3 : inc r11
+    case 0xc48b41:    // 41 8b c4 : mov eax, r12d
+    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
+    case 0xc4ff49:    // 49 ff c4 : inc r12
+    case 0xc5ff49:    // 49 ff c5 : inc r13
+    case 0xc6ff48:    // 48 ff c6 : inc rsi
+    case 0xc6ff49:    // 49 ff c6 : inc r14
+    case 0xc7ff48:    // 48 ff c7 : inc rdi
+    case 0xc7ff49:    // 49 ff c7 : inc r15
     case 0xc93345:    // 45 33 c9 : xor r9d, r9d
-    case 0xdb3345:    // 45 33 DB : xor r11d, r11d
-    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
-    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
-    case 0xc98b4c:    // 4C 8B C9 : mov r9, rcx
-    case 0xc18b4c:    // 4C 8B C1 : mov r8, rcx
-    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
+    case 0xc98548:    // 48 85 c9 : test rcx, rcx
+    case 0xc9854d:    // 4d 85 c9 : test r9, r9
+    case 0xc98b4c:    // 4c 8b c9 : mov r9, rcx
+    case 0xd12948:    // 48 29 d1 : sub rcx, rdx
     case 0xca2b48:    // 48 2b ca : sub rcx, rdx
     case 0xca3b48:    // 48 3b ca : cmp rcx, rdx
-    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
-    case 0xc00b4d:    // 3d 0b c0 : or r8, r8
-    case 0xc08b41:    // 41 8b c0 : mov eax, r8d
+    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
     case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
-    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
     case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
-    case 0xE0E483:    // 83 E4 E0 : and esp, 0xFFFFFFE0
+    case 0xd28548:    // 48 85 d2 : test rdx, rdx
+    case 0xd2854d:    // 4d 85 d2 : test r10, r10
+    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
+    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
+    case 0xd2be0f:    // 0f be d2 : movsx edx, dl
+    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
+    case 0xd9f748:    // 48 f7 d9 : neg rcx
+    case 0xc03145:    // 45 31 c0 : xor r8d,r8d
+    case 0xc93145:    // 45 31 c9 : xor r9d,r9d
+    case 0xdb3345:    // 45 33 db : xor r11d, r11d
+    case 0xc08445:    // 45 84 c0 : test r8b,r8b
+    case 0xd28445:    // 45 84 d2 : test r10b,r10b
+    case 0xdb8548:    // 48 85 db : test rbx, rbx
+    case 0xdb854d:    // 4d 85 db : test r11, r11
+    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
+    case 0xe48548:    // 48 85 e4 : test rsp, rsp
+    case 0xe4854d:    // 4d 85 e4 : test r12, r12
+    case 0xc88948:    // 48 89 c8 : mov rax,rcx
+    case 0xcb8948:    // 48 89 cb : mov rbx,rcx
+    case 0xd08948:    // 48 89 d0 : mov rax,rdx
+    case 0xd18948:    // 48 89 d1 : mov rcx,rdx
+    case 0xd38948:    // 48 89 d3 : mov rbx,rdx
+    case 0xe58948:    // 48 89 e5 : mov rbp, rsp
+    case 0xed8548:    // 48 85 ed : test rbp, rbp
+    case 0xc88949:    // 49 89 c8 : mov r8, rcx
+    case 0xc98949:    // 49 89 c9 : mov r9, rcx
+    case 0xca8949:    // 49 89 ca : mov r10,rcx
+    case 0xd08949:    // 49 89 d0 : mov r8, rdx
+    case 0xd18949:    // 49 89 d1 : mov r9, rdx
+    case 0xd28949:    // 49 89 d2 : mov r10, rdx
+    case 0xd38949:    // 49 89 d3 : mov r11, rdx
+    case 0xed854d:    // 4d 85 ed : test r13, r13
+    case 0xf6854d:    // 4d 85 f6 : test r14, r14
+    case 0xff854d:    // 4d 85 ff : test r15, r15
       return 3;
 
+    case 0x245489:    // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
+    case 0x428d44:    // 44 8d 42 XX : lea r8d , [rdx + XX]
+    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
     case 0xec8348:    // 48 83 ec XX : sub rsp, XX
     case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
-    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+    case 0x488d49:    // 49 8d 48 XX : lea rcx, [...]
+    case 0x048d4c:    // 4c 8d 04 XX : lea r8, [...]
+    case 0x148d4e:    // 4e 8d 14 XX : lea r10, [...]
+    case 0x398366:    // 66 83 39 XX : cmp WORD PTR [rcx], XX
       return 4;
 
+    case 0x441F0F:  // 0F 1F 44 XX XX :   nop DWORD PTR [...]
+    case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
+      return 5;
+
+    case 0x788166:  // 66 81 78 XX YY YY  cmp WORD PTR [rax+XX], YY YY
+    case 0x798166:  // 66 81 79 XX YY YY  cmp WORD PTR [rcx+XX], YY YY
+    case 0x7a8166:  // 66 81 7a XX YY YY  cmp WORD PTR [rdx+XX], YY YY
+    case 0x7b8166:  // 66 81 7b XX YY YY  cmp WORD PTR [rbx+XX], YY YY
+    case 0x7e8166:  // 66 81 7e XX YY YY  cmp WORD PTR [rsi+XX], YY YY
+    case 0x7f8166:  // 66 81 7f XX YY YY  cmp WORD PTR [rdi+XX], YY YY
+      return 6;
+
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
+    case 0xc0c748:    // 48 C7 C0 XX XX XX XX : mov rax, XX XX XX XX
       return 7;
 
+    // clang-format off
+    case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
+    case 0x798141:  // 41 81 79 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
+    case 0x7a8141:  // 41 81 7a XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
+    case 0x7b8141:  // 41 81 7b XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
+    case 0x7d8141:  // 41 81 7d XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
+    case 0x7e8141:  // 41 81 7e XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
+    case 0x7f8141:  // 41 81 7f YY XX XX XX XX : cmp DWORD PTR [r15+YY], XX XX XX XX
+    case 0x247c81:  // 81 7c 24 YY XX XX XX XX : cmp DWORD PTR [rsp+YY], XX XX XX XX
+      return 8;
+      // clang-format on
+
     case 0x058b48:    // 48 8b 05 XX XX XX XX :
                       //   mov rax, QWORD PTR [rip + XXXXXXXX]
     case 0x058d48:    // 48 8d 05 XX XX XX XX :
                       //   lea rax, QWORD PTR [rip + XXXXXXXX]
+    case 0x0d8948:    // 48 89 0d XX XX XX XX :
+                      //   mov QWORD PTR [rip + XXXXXXXX], rcx
+    case 0x158948:    // 48 89 15 XX XX XX XX :
+                      //   mov QWORD PTR [rip + XXXXXXXX], rdx
     case 0x25ff48:    // 48 ff 25 XX XX XX XX :
                       //   rex.W jmp QWORD PTR [rip + XXXXXXXX]
     case 0x158D4C:    // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
@@ -644,9 +891,19 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x2444c7:    // C7 44 24 XX YY YY YY YY
                       //   mov dword ptr [rsp + XX], YYYYYYYY
       return 8;
+
+    case 0x7c8141:  // 41 81 7c ZZ YY XX XX XX XX
+                    // cmp DWORD PTR [reg+reg*n+YY], XX XX XX XX
+      return 9;
   }
 
   switch (*(u32*)(address)) {
+    case 0x01b60f44:  // 44 0f b6 01 : movzx r8d, BYTE PTR [rcx]
+    case 0x09b60f44:  // 44 0f b6 09 : movzx r9d, BYTE PTR [rcx]
+    case 0x0ab60f44:  // 44 0f b6 0a : movzx r8d, BYTE PTR [rdx]
+    case 0x11b60f44:  // 44 0f b6 11 : movzx r10d, BYTE PTR [rcx]
+    case 0x1ab60f44:  // 44 0f b6 1a : movzx r11d, BYTE PTR [rdx]
+      return 4;
     case 0x24448b48:  // 48 8b 44 24 XX : mov rax, QWORD ptr [rsp + XX]
     case 0x246c8948:  // 48 89 6C 24 XX : mov QWORD ptr [rsp + XX], rbp
     case 0x245c8948:  // 48 89 5c 24 XX : mov QWORD PTR [rsp + XX], rbx
@@ -656,9 +913,19 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x24548948:  // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
     case 0x244c894c:  // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
     case 0x2444894c:  // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
+    case 0x244c8944:  // 44 89 4c 24 XX   mov DWORD PTR [rsp + XX], r9d
+    case 0x24448944:  // 44 89 44 24 XX   mov DWORD PTR [rsp + XX], r8d
+    case 0x246c8d48:  // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
       return 5;
-    case 0x24648348:  // 48 83 64 24 XX : and QWORD PTR [rsp + XX], YY
+    case 0x24648348:  // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
       return 6;
+    case 0x24A48D48:  // 48 8D A4 24 XX XX XX XX : lea rsp, [rsp + XX XX XX XX]
+      return 8;
+  }
+
+  switch (0xFFFFFFFFFFULL & *(u64 *)(address)) {
+    case 0xC07E0F4866:  // 66 48 0F 7E C0 : movq rax, xmm0
+      return 5;
   }
 
 #else
@@ -671,11 +938,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
     case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
     case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
-    case 0xEC83:  // 83 EC XX : sub esp, XX
+    case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
     case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
       return 3;
     case 0xC1F7:  // F7 C1 XX YY ZZ WW : test ecx, WWZZYYXX
-    case 0x25FF:  // FF 25 XX YY ZZ WW : jmp dword ptr ds:[WWZZYYXX]
       return 6;
     case 0x3D83:  // 83 3D XX YY ZZ WW TT : cmp TT, WWZZYYXX
       return 7;
@@ -718,6 +984,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   return 0;
 }
 
+size_t TestOnlyGetInstructionSize(uptr address, size_t *rel_offset) {
+  return GetInstructionSize(address, rel_offset);
+}
+
 // Returns 0 on error.
 static size_t RoundUpToInstrBoundary(size_t size, uptr address) {
   size_t cursor = 0;
@@ -745,8 +1015,14 @@ static bool CopyInstructions(uptr to, uptr from, size_t size) {
       // this will be untrue if relocated_offset \notin [-2**31, 2**31)
       s64 delta = to - from;
       s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
-      if (-0x8000'0000ll > relocated_offset || relocated_offset > 0x7FFF'FFFFll)
+      if (-0x8000'0000ll > relocated_offset ||
+          relocated_offset > 0x7FFF'FFFFll) {
+        ReportError(
+            "interception_win: CopyInstructions relocated_offset %lld outside "
+            "32-bit range\n",
+            (long long)relocated_offset);
         return false;
+      }
 #  else
       // on 32-bit, the relative offset will always be correct
       s32 delta = to - from;
@@ -969,8 +1245,7 @@ static void **InterestingDLLsAvailable() {
     "libc++.dll",     // libc++
     "libunwind.dll",  // libunwind
 #  endif
-    // NTDLL should go last as it exports some functions that we should
-    // override in the CRT [presumably only used internally].
+    // NTDLL must go last as it gets special treatment in OverrideFunction.
     "ntdll.dll",
     NULL
   };
@@ -1027,7 +1302,7 @@ uptr InternalGetProcAddress(void *module, const char *func_name) {
 
   for (DWORD i = 0; i < exports->NumberOfNames; i++) {
     RVAPtr<char> name(module, names[i]);
-    if (!strcmp(func_name, name)) {
+    if (!_strcmp(func_name, name)) {
       DWORD index = ordinals[i];
       RVAPtr<char> func(module, functions[index]);
 
@@ -1040,19 +1315,27 @@ uptr InternalGetProcAddress(void *module, const char *func_name) {
         // exported directory.
         char function_name[256];
         size_t funtion_name_length = _strlen(func);
-        if (funtion_name_length >= sizeof(function_name) - 1)
+        if (funtion_name_length >= sizeof(function_name) - 1) {
+          ReportError("interception_win: func too long: '%s'\n", (char *)func);
           InterceptionFailed();
+        }
 
         _memcpy(function_name, func, funtion_name_length);
         function_name[funtion_name_length] = '\0';
         char* separator = _strchr(function_name, '.');
-        if (!separator)
+        if (!separator) {
+          ReportError("interception_win: no separator in '%s'\n",
+                      function_name);
           InterceptionFailed();
+        }
         *separator = '\0';
 
         void* redirected_module = GetModuleHandleA(function_name);
-        if (!redirected_module)
+        if (!redirected_module) {
+          ReportError("interception_win: GetModuleHandleA failed for '%s'\n",
+                      function_name);
           InterceptionFailed();
+        }
         return InternalGetProcAddress(redirected_module, separator + 1);
       }
 
@@ -1065,9 +1348,22 @@ uptr InternalGetProcAddress(void *module, const char *func_name) {
 
 bool OverrideFunction(
     const char *func_name, uptr new_func, uptr *orig_old_func) {
+  static const char *kNtDllIgnore[] = {
+    "memcmp", "memcpy", "memmove", "memset"
+  };
+
   bool hooked = false;
   void **DLLs = InterestingDLLsAvailable();
   for (size_t i = 0; DLLs[i]; ++i) {
+    if (DLLs[i + 1] == nullptr) {
+      // This is the last DLL, i.e. NTDLL. It exports some functions that
+      // we only want to override in the CRT.
+      for (const char *ignored : kNtDllIgnore) {
+        if (_strcmp(func_name, ignored) == 0)
+          return hooked;
+      }
+    }
+
     uptr func_addr = InternalGetProcAddress(DLLs[i], func_name);
     if (func_addr &&
         OverrideFunction(func_addr, new_func, orig_old_func)) {
@@ -1121,7 +1417,7 @@ bool OverrideImportedFunction(const char *module_to_patch,
       RVAPtr<IMAGE_IMPORT_BY_NAME> import_by_name(
           module, name_table->u1.ForwarderString);
       const char *funcname = &import_by_name->Name[0];
-      if (strcmp(funcname, function_name) == 0)
+      if (_strcmp(funcname, function_name) == 0)
         break;
     }
   }
@@ -1144,4 +1440,4 @@ bool OverrideImportedFunction(const char *module_to_patch,
 
 }  // namespace __interception
 
-#endif  // SANITIZER_APPLE
+#endif  // SANITIZER_WINDOWS
lib/tsan/interception/interception_win.h
@@ -63,6 +63,9 @@ bool OverrideFunctionWithTrampoline(
 // Exposed for unittests
 void TestOnlyReleaseTrampolineRegions();
 
+// Exposed for unittests
+SIZE_T TestOnlyGetInstructionSize(uptr address, SIZE_T *rel_offset);
+
 }  // namespace __interception
 
 #if defined(INTERCEPTION_DYNAMIC_CRT)
lib/tsan/sanitizer_common/sanitizer_allocator.cpp
@@ -59,7 +59,7 @@ static void *RawInternalAlloc(uptr size, InternalAllocatorCache *cache,
 
 static void *RawInternalRealloc(void *ptr, uptr size,
                                 InternalAllocatorCache *cache) {
-  uptr alignment = 8;
+  constexpr usize alignment = Max<usize>(8, sizeof(void *));
   if (cache == 0) {
     SpinMutexLock l(&internal_allocator_cache_mu);
     return internal_allocator()->Reallocate(&internal_allocator_cache, ptr,
@@ -137,7 +137,8 @@ void InternalAllocatorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
 }
 
 // LowLevelAllocator
-constexpr uptr kLowLevelAllocatorDefaultAlignment = 8;
+constexpr usize kLowLevelAllocatorDefaultAlignment =
+    Max<usize>(8, sizeof(void *));
 constexpr uptr kMinNumPagesRounded = 16;
 constexpr uptr kMinRoundedSize = 65536;
 static uptr low_level_alloc_min_alignment = kLowLevelAllocatorDefaultAlignment;
lib/tsan/sanitizer_common/sanitizer_allocator_dlsym.h
@@ -15,6 +15,8 @@
 #define SANITIZER_ALLOCATOR_DLSYM_H
 
 #include "sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 
 namespace __sanitizer {
 
@@ -31,24 +33,22 @@ struct DlSymAllocator {
            UNLIKELY(internal_allocator()->FromPrimary(ptr));
   }
 
-  static void *Allocate(uptr size_in_bytes) {
-    void *ptr = InternalAlloc(size_in_bytes, nullptr, kWordSize);
+  static void *Allocate(uptr size_in_bytes, uptr align = kWordSize) {
+    void *ptr = InternalAlloc(size_in_bytes, nullptr, align);
     CHECK(internal_allocator()->FromPrimary(ptr));
-    Details::OnAllocate(ptr,
-                        internal_allocator()->GetActuallyAllocatedSize(ptr));
+    Details::OnAllocate(ptr, GetSize(ptr));
     return ptr;
   }
 
-  static void *Callocate(SIZE_T nmemb, SIZE_T size) {
+  static void *Callocate(usize nmemb, usize size) {
     void *ptr = InternalCalloc(nmemb, size);
     CHECK(internal_allocator()->FromPrimary(ptr));
-    Details::OnAllocate(ptr,
-                        internal_allocator()->GetActuallyAllocatedSize(ptr));
+    Details::OnAllocate(ptr, GetSize(ptr));
     return ptr;
   }
 
   static void Free(void *ptr) {
-    uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+    uptr size = GetSize(ptr);
     Details::OnFree(ptr, size);
     InternalFree(ptr);
   }
@@ -61,7 +61,7 @@ struct DlSymAllocator {
       Free(ptr);
       return nullptr;
     }
-    uptr size = internal_allocator()->GetActuallyAllocatedSize(ptr);
+    uptr size = GetSize(ptr);
     uptr memcpy_size = Min(new_size, size);
     void *new_ptr = Allocate(new_size);
     if (new_ptr)
@@ -70,6 +70,15 @@ struct DlSymAllocator {
     return new_ptr;
   }
 
+  static void *ReallocArray(void *ptr, uptr count, uptr size) {
+    CHECK(!CheckForCallocOverflow(count, size));
+    return Realloc(ptr, count * size);
+  }
+
+  static uptr GetSize(void *ptr) {
+    return internal_allocator()->GetActuallyAllocatedSize(ptr);
+  }
+
   static void OnAllocate(const void *ptr, uptr size) {}
   static void OnFree(const void *ptr, uptr size) {}
 };
lib/tsan/sanitizer_common/sanitizer_allocator_primary64.h
@@ -185,9 +185,10 @@ class SizeClassAllocator64 {
     // recoverable.
     if (UNLIKELY(!EnsureFreeArraySpace(region, region_beg,
                                        new_num_freed_chunks))) {
-      Report("FATAL: Internal error: %s's allocator exhausted the free list "
-             "space for size class %zd (%zd bytes).\n", SanitizerToolName,
-             class_id, ClassIdToSize(class_id));
+      Report(
+          "FATAL: Internal error: %s's allocator exhausted the free list "
+          "space for size class %zu (%zu bytes).\n",
+          SanitizerToolName, class_id, ClassIdToSize(class_id));
       Die();
     }
     for (uptr i = 0; i < n_chunks; i++)
@@ -763,8 +764,9 @@ class SizeClassAllocator64 {
     if (!region->exhausted) {
       region->exhausted = true;
       Printf("%s: Out of memory. ", SanitizerToolName);
-      Printf("The process has exhausted %zuMB for size class %zu.\n",
-             kRegionSize >> 20, ClassIdToSize(class_id));
+      Printf(
+          "The process has exhausted %zu MB for size class %zu (%zu bytes).\n",
+          kRegionSize >> 20, class_id, ClassIdToSize(class_id));
     }
     return true;
   }
lib/tsan/sanitizer_common/sanitizer_common.h
@@ -83,8 +83,8 @@ int TgKill(pid_t pid, tid_t tid, int sig);
 uptr GetThreadSelf();
 void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
                                 uptr *stack_bottom);
-void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
-                          uptr *tls_addr, uptr *tls_size);
+void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end,
+                          uptr *tls_begin, uptr *tls_end);
 
 // Memory management
 void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false);
@@ -239,13 +239,15 @@ void RemoveANSIEscapeSequencesFromString(char *buffer);
 void Printf(const char *format, ...) FORMAT(1, 2);
 void Report(const char *format, ...) FORMAT(1, 2);
 void SetPrintfAndReportCallback(void (*callback)(const char *));
-#define VReport(level, ...)                                              \
-  do {                                                                   \
-    if ((uptr)Verbosity() >= (level)) Report(__VA_ARGS__); \
+#define VReport(level, ...)                     \
+  do {                                          \
+    if (UNLIKELY((uptr)Verbosity() >= (level))) \
+      Report(__VA_ARGS__);                      \
   } while (0)
-#define VPrintf(level, ...)                                              \
-  do {                                                                   \
-    if ((uptr)Verbosity() >= (level)) Printf(__VA_ARGS__); \
+#define VPrintf(level, ...)                     \
+  do {                                          \
+    if (UNLIKELY((uptr)Verbosity() >= (level))) \
+      Printf(__VA_ARGS__);                      \
   } while (0)
 
 // Lock sanitizer error reporting and protects against nested errors.
@@ -266,7 +268,15 @@ class ScopedErrorReportLock {
 extern uptr stoptheworld_tracer_pid;
 extern uptr stoptheworld_tracer_ppid;
 
+// Returns true if the entire range can be read.
 bool IsAccessibleMemoryRange(uptr beg, uptr size);
+// Attempts to copy `n` bytes from memory range starting at `src` to `dest`.
+// Returns true if the entire range can be read. Returns `false` if any part of
+// the source range cannot be read, in which case the contents of `dest` are
+// undefined.
+bool TryMemCpy(void *dest, const void *src, uptr n);
+// Copies accessible memory, and zero fill inaccessible.
+void MemCpyAccessible(void *dest, const void *src, uptr n);
 
 // Error report formatting.
 const char *StripPathPrefix(const char *filepath,
@@ -917,7 +927,6 @@ typedef void (*RangeIteratorCallback)(uptr begin, uptr end, void *arg);
 
 enum AndroidApiLevel {
   ANDROID_NOT_ANDROID = 0,
-  ANDROID_KITKAT = 19,
   ANDROID_LOLLIPOP_MR1 = 22,
   ANDROID_POST_LOLLIPOP = 23
 };
lib/tsan/sanitizer_common/sanitizer_common_interceptors.inc
@@ -41,6 +41,7 @@
 #include "sanitizer_errno.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_platform_interceptors.h"
+#include "sanitizer_platform_limits_posix.h"
 #include "sanitizer_symbolizer.h"
 #include "sanitizer_tls_get_addr.h"
 
@@ -127,6 +128,39 @@ extern const short *_toupper_tab_;
 extern const short *_tolower_tab_;
 #endif
 
+#if SANITIZER_LINUX && SANITIZER_SPARC32
+// On 32-bit Linux/sparc64, double and long double are identical and glibc
+// uses a __nldbl_ (no long double) prefix for various stdio functions.
+#  define __isoc23_fscanf __nldbl___isoc23_fscanf
+#  define __isoc23_scanf __nldbl___isoc23_scanf
+#  define __isoc23_sscanf __nldbl___isoc23_sscanf
+#  define __isoc23_vfscanf __nldbl___isoc23_vfscanf
+#  define __isoc23_vscanf __nldbl___isoc23_vscanf
+#  define __isoc23_vsscanf __nldbl___isoc23_vsscanf
+#  define __isoc99_fscanf __nldbl___isoc99_fscanf
+#  define __isoc99_scanf __nldbl___isoc99_scanf
+#  define __isoc99_sscanf __nldbl___isoc99_sscanf
+#  define __isoc99_vfscanf __nldbl___isoc99_vfscanf
+#  define __isoc99_vscanf __nldbl___isoc99_vscanf
+#  define __isoc99_vsscanf __nldbl___isoc99_vsscanf
+#  define asprintf __nldbl_asprintf
+#  define fprintf __nldbl_fprintf
+#  define fscanf __nldbl_fscanf
+#  define printf __nldbl_printf
+#  define scanf __nldbl_scanf
+#  define snprintf __nldbl_snprintf
+#  define sprintf __nldbl_sprintf
+#  define sscanf __nldbl_sscanf
+#  define vasprintf __nldbl_vasprintf
+#  define vfprintf __nldbl_vfprintf
+#  define vfscanf __nldbl_vfscanf
+#  define vprintf __nldbl_vprintf
+#  define vscanf __nldbl_vscanf
+#  define vsnprintf __nldbl_vsnprintf
+#  define vsprintf __nldbl_vsprintf
+#  define vsscanf __nldbl_vsscanf
+#endif
+
 #if SANITIZER_MUSL && \
   (defined(__i386__) || defined(__arm__) || SANITIZER_MIPS32 || SANITIZER_PPC32)
 // musl 1.2.0 on existing 32-bit architectures uses new symbol names for the
@@ -313,7 +347,7 @@ extern const short *_tolower_tab_;
   uptr copy_length = internal_strnlen(s, size);                               \
   char *new_mem = (char *)WRAP(malloc)(copy_length + 1);                      \
   if (common_flags()->intercept_strndup) {                                    \
-    COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min(size, copy_length + 1));       \
+    COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min<uptr>(size, copy_length + 1)); \
   }                                                                           \
   if (new_mem) {                                                              \
     COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length);             \
@@ -411,7 +445,7 @@ INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) {
 #endif
 
 #if SANITIZER_INTERCEPT_STRNDUP
-INTERCEPTOR(char*, strndup, const char *s, uptr size) {
+INTERCEPTOR(char*, strndup, const char *s, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
 }
@@ -421,7 +455,7 @@ INTERCEPTOR(char*, strndup, const char *s, uptr size) {
 #endif // SANITIZER_INTERCEPT_STRNDUP
 
 #if SANITIZER_INTERCEPT___STRNDUP
-INTERCEPTOR(char*, __strndup, const char *s, uptr size) {
+INTERCEPTOR(char*, __strndup, const char *s, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
 }
@@ -477,23 +511,23 @@ INTERCEPTOR(int, strcmp, const char *s1, const char *s2) {
 }
 
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, uptr called_pc,
-                              const char *s1, const char *s2, uptr n,
+                              const char *s1, const char *s2, usize n,
                               int result)
 
-INTERCEPTOR(int, strncmp, const char *s1, const char *s2, uptr size) {
+INTERCEPTOR(int, strncmp, const char *s1, const char *s2, usize size) {
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_strncmp(s1, s2, size);
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strncmp, s1, s2, size);
   unsigned char c1 = 0, c2 = 0;
-  uptr i;
+  usize i;
   for (i = 0; i < size; i++) {
     c1 = (unsigned char)s1[i];
     c2 = (unsigned char)s2[i];
     if (c1 != c2 || c1 == '\0') break;
   }
-  uptr i1 = i;
-  uptr i2 = i;
+  usize i1 = i;
+  usize i2 = i;
   if (common_flags()->strict_string_checks) {
     for (; i1 < size && s1[i1]; i1++) {}
     for (; i2 < size && s2[i2]; i2++) {}
@@ -542,21 +576,21 @@ INTERCEPTOR(int, strcasecmp, const char *s1, const char *s2) {
 }
 
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, uptr called_pc,
-                              const char *s1, const char *s2, uptr size,
+                              const char *s1, const char *s2, usize size,
                               int result)
 
 INTERCEPTOR(int, strncasecmp, const char *s1, const char *s2, SIZE_T size) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, size);
   unsigned char c1 = 0, c2 = 0;
-  uptr i;
+  usize i;
   for (i = 0; i < size; i++) {
     c1 = (unsigned char)s1[i];
     c2 = (unsigned char)s2[i];
     if (CharCaseCmp(c1, c2) != 0 || c1 == '\0') break;
   }
-  uptr i1 = i;
-  uptr i2 = i;
+  usize i1 = i;
+  usize i2 = i;
   if (common_flags()->strict_string_checks) {
     for (; i1 < size && s1[i1]; i1++) {}
     for (; i2 < size && s2[i2]; i2++) {}
@@ -799,13 +833,13 @@ INTERCEPTOR(char *, strpbrk, const char *s1, const char *s2) {
 
 #if SANITIZER_INTERCEPT_MEMCMP
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, uptr called_pc,
-                              const void *s1, const void *s2, uptr n,
+                              const void *s1, const void *s2, usize n,
                               int result)
 
 // Common code for `memcmp` and `bcmp`.
 int MemcmpInterceptorCommon(void *ctx,
-                            int (*real_fn)(const void *, const void *, uptr),
-                            const void *a1, const void *a2, uptr size) {
+                            int (*real_fn)(const void *, const void *, usize),
+                            const void *a1, const void *a2, usize size) {
   if (common_flags()->intercept_memcmp) {
     if (common_flags()->strict_memcmp) {
       // Check the entire regions even if the first bytes of the buffers are
@@ -817,7 +851,7 @@ int MemcmpInterceptorCommon(void *ctx,
       unsigned char c1 = 0, c2 = 0;
       const unsigned char *s1 = (const unsigned char*)a1;
       const unsigned char *s2 = (const unsigned char*)a2;
-      uptr i;
+      usize i;
       for (i = 0; i < size; i++) {
         c1 = s1[i];
         c2 = s2[i];
@@ -837,7 +871,7 @@ int MemcmpInterceptorCommon(void *ctx,
   return result;
 }
 
-INTERCEPTOR(int, memcmp, const void *a1, const void *a2, uptr size) {
+INTERCEPTOR(int, memcmp, const void *a1, const void *a2, usize size) {
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_memcmp(a1, a2, size);
   void *ctx;
@@ -851,7 +885,7 @@ INTERCEPTOR(int, memcmp, const void *a1, const void *a2, uptr size) {
 #endif
 
 #if SANITIZER_INTERCEPT_BCMP
-INTERCEPTOR(int, bcmp, const void *a1, const void *a2, uptr size) {
+INTERCEPTOR(int, bcmp, const void *a1, const void *a2, usize size) {
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_memcmp(a1, a2, size);
   void *ctx;
@@ -1104,7 +1138,7 @@ INTERCEPTOR(SSIZE_T, write, int fd, void *ptr, SIZE_T count) {
 #endif
 
 #if SANITIZER_INTERCEPT_FWRITE
-INTERCEPTOR(SIZE_T, fwrite, const void *p, uptr size, uptr nmemb, void *file) {
+INTERCEPTOR(SIZE_T, fwrite, const void *p, usize size, usize nmemb, void *file) {
   // libc file streams can call user-supplied functions, see fopencookie.
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, file);
@@ -1255,6 +1289,12 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
   static const int PR_SET_VMA = 0x53564d41;
   static const int PR_SCHED_CORE = 62;
   static const int PR_SCHED_CORE_GET = 0;
+  static const int PR_GET_PDEATHSIG = 2;
+
+#  if !SANITIZER_ANDROID
+  static const int PR_SET_SECCOMP = 22;
+  static const int SECCOMP_MODE_FILTER = 2;
+#  endif
   if (option == PR_SET_VMA && arg2 == 0UL) {
     char *name = (char *)arg5;
     COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
@@ -1270,7 +1310,14 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
   } else if (res != -1 && option == PR_SCHED_CORE &&
              arg2 == PR_SCHED_CORE_GET) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64*)(arg5), sizeof(u64));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg5), sizeof(u64));
+  } else if (res != -1 && option == PR_GET_PDEATHSIG) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg2), sizeof(int));
+#  if SANITIZER_GLIBC
+  } else if (res != -1 && option == PR_SET_SECCOMP &&
+             arg2 == SECCOMP_MODE_FILTER) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64 *)(arg3), struct_sock_fprog_sz);
+#  endif
   }
   return res;
 }
@@ -2242,6 +2289,61 @@ INTERCEPTOR(int, pthread_getcpuclockid, uptr thread,
 #define INIT_CLOCK_GETCPUCLOCKID
 #endif
 
+#if SANITIZER_INTERCEPT_TIMER_CREATE
+INTERCEPTOR(int, timer_create, __sanitizer_clockid_t clockid, void *sevp,
+            __sanitizer_timer_t *timer) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, timer_create, clockid, sevp, timer);
+  int res = REAL(timer_create)(clockid, sevp, timer);
+  if (!res && timer) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, timer, sizeof *timer);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, timer_delete, __sanitizer_timer_t timer) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, timer_delete, timer);
+  int res = REAL(timer_delete)(timer);
+  return res;
+}
+
+INTERCEPTOR(int, timer_gettime, __sanitizer_timer_t timer,
+            struct __sanitizer_itimerspec *curr_value) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, timer_gettime, timer, curr_value);
+  int res = REAL(timer_gettime)(timer, curr_value);
+  if (!res && curr_value) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, sizeof *curr_value);
+  }
+  return res;
+}
+
+INTERCEPTOR(int, timer_settime, __sanitizer_timer_t timer, int flags,
+            const struct __sanitizer_itimerspec *new_value,
+            struct __sanitizer_itimerspec *old_value) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, timer_settime, timer, flags, new_value,
+                           old_value);
+  int res = REAL(timer_settime)(timer, flags, new_value, old_value);
+  if (!res) {
+    if (new_value)
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, new_value, sizeof *new_value);
+    if (old_value)
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, sizeof *old_value);
+  }
+  return res;
+}
+
+#  define INIT_TIMER_CREATE                                                \
+    COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_create, "GLIBC_2.3.3");  \
+    COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_delete, "GLIBC_2.3.3");  \
+    COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_gettime, "GLIBC_2.3.3"); \
+    COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_settime, "GLIBC_2.3.3");
+#else
+#  define INIT_TIMER_CREATE
+#endif
+
 #if SANITIZER_INTERCEPT_GETITIMER
 INTERCEPTOR(int, getitimer, int which, void *curr_value) {
   void *ctx;
@@ -2287,6 +2389,25 @@ INTERCEPTOR(int, setitimer, int which, const void *new_value, void *old_value) {
 #define INIT_GETITIMER
 #endif
 
+#if SANITIZER_INTERCEPT_TIMESPEC_GET
+INTERCEPTOR(int, timespec_get, struct __sanitizer_timespec *ts, int base) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, timespec_get, ts, base);
+  // We don't yet know if ts is addressable, so we use our own scratch buffer
+  struct __sanitizer_timespec ts_local;
+  int res = REAL(timespec_get)(&ts_local, base);
+  if (res) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ts,
+                                   sizeof(struct __sanitizer_timespec));
+    internal_memcpy(ts, &ts_local, sizeof(struct __sanitizer_timespec));
+  }
+  return res;
+}
+#  define INIT_TIMESPEC_GET COMMON_INTERCEPT_FUNCTION(timespec_get);
+#else
+#  define INIT_TIMESPEC_GET
+#endif
+
 #if SANITIZER_INTERCEPT_GLOB
 static void unpoison_glob_t(void *ctx, __sanitizer_glob_t *pglob) {
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pglob, sizeof(*pglob));
@@ -3427,23 +3548,27 @@ INTERCEPTOR(uptr, ptrace, int request, int pid, void *addr, void *data) {
   COMMON_INTERCEPTOR_ENTER(ctx, ptrace, request, pid, addr, data);
   __sanitizer_iovec local_iovec;
 
-  if (data) {
+  void *data_arg = ptrace_data_arg(request, addr, data);
+  if (data_arg) {
     if (request == ptrace_setregs) {
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_regs_struct_sz);
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, data_arg, struct_user_regs_struct_sz);
     } else if (request == ptrace_setfpregs) {
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpregs_struct_sz);
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, data_arg,
+                                    struct_user_fpregs_struct_sz);
     } else if (request == ptrace_setfpxregs) {
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, data_arg,
+                                    struct_user_fpxregs_struct_sz);
     } else if (request == ptrace_setvfpregs) {
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, data_arg,
+                                    struct_user_vfpregs_struct_sz);
     } else if (request == ptrace_setsiginfo) {
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, siginfo_t_sz);
+      COMMON_INTERCEPTOR_READ_RANGE(ctx, data_arg, siginfo_t_sz);
 
-    // Some kernel might zero the iovec::iov_base in case of invalid
-    // write access.  In this case copy the invalid address for further
-    // inspection.
+      // Some kernel might zero the iovec::iov_base in case of invalid
+      // write access.  In this case copy the invalid address for further
+      // inspection.
     } else if (request == ptrace_setregset || request == ptrace_getregset) {
-      __sanitizer_iovec *iovec = (__sanitizer_iovec*)data;
+      __sanitizer_iovec *iovec = (__sanitizer_iovec *)data_arg;
       COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec, sizeof(*iovec));
       local_iovec = *iovec;
       if (request == ptrace_setregset)
@@ -3456,23 +3581,26 @@ INTERCEPTOR(uptr, ptrace, int request, int pid, void *addr, void *data) {
   // https://github.com/google/sanitizers/issues/321.
   uptr res = REAL(ptrace)(request, pid, addr, data);
 
-  if (!res && data) {
+  if (!res && data_arg) {
     // Note that PEEK* requests assign different meaning to the return value.
     // This function does not handle them (nor does it need to).
     if (request == ptrace_getregs) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_regs_struct_sz);
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg, struct_user_regs_struct_sz);
     } else if (request == ptrace_getfpregs) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpregs_struct_sz);
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg,
+                                     struct_user_fpregs_struct_sz);
     } else if (request == ptrace_getfpxregs) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg,
+                                     struct_user_fpxregs_struct_sz);
     } else if (request == ptrace_getvfpregs) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg,
+                                     struct_user_vfpregs_struct_sz);
     } else if (request == ptrace_getsiginfo) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, siginfo_t_sz);
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg, siginfo_t_sz);
     } else if (request == ptrace_geteventmsg) {
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(unsigned long));
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data_arg, sizeof(unsigned long));
     } else if (request == ptrace_getregset) {
-      __sanitizer_iovec *iovec = (__sanitizer_iovec*)data;
+      __sanitizer_iovec *iovec = (__sanitizer_iovec *)data_arg;
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iovec, sizeof(*iovec));
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, local_iovec.iov_base,
                                      local_iovec.iov_len);
@@ -6425,12 +6553,12 @@ static void MlockIsUnsupported() {
           SanitizerToolName);
 }
 
-INTERCEPTOR(int, mlock, const void *addr, uptr len) {
+INTERCEPTOR(int, mlock, const void *addr, usize len) {
   MlockIsUnsupported();
   return 0;
 }
 
-INTERCEPTOR(int, munlock, const void *addr, uptr len) {
+INTERCEPTOR(int, munlock, const void *addr, usize len) {
   MlockIsUnsupported();
   return 0;
 }
@@ -8823,83 +8951,6 @@ INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, char *buf) {
 #define INIT_RMD160
 #endif
 
-#if SANITIZER_INTERCEPT_MD5
-INTERCEPTOR(void, MD5Init, void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
-  REAL(MD5Init)(context);
-  if (context)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data,
-            unsigned int len) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
-  if (data && len > 0)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Update)(context, data, len);
-  if (context)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Final)(digest, context);
-  if (digest)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
-}
-
-INTERCEPTOR(char *, MD5End, void *context, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
-  if (context)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  char *ret = REAL(MD5End)(context, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
-  if (filename)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
-  char *ret = REAL(MD5File)(filename, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len,
-            char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
-  if (data && len > 0)
-    COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  char *ret = REAL(MD5Data)(data, len, buf);
-  if (ret)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-#define INIT_MD5                                                               \
-  COMMON_INTERCEPT_FUNCTION(MD5Init);                                          \
-  COMMON_INTERCEPT_FUNCTION(MD5Update);                                        \
-  COMMON_INTERCEPT_FUNCTION(MD5Final);                                         \
-  COMMON_INTERCEPT_FUNCTION(MD5End);                                           \
-  COMMON_INTERCEPT_FUNCTION(MD5File);                                          \
-  COMMON_INTERCEPT_FUNCTION(MD5Data)
-#else
-#define INIT_MD5
-#endif
-
 #if SANITIZER_INTERCEPT_FSEEK
 INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) {
   void *ctx;
@@ -9030,107 +9081,6 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len,
 #define INIT_MD2
 #endif
 
-#if SANITIZER_INTERCEPT_SHA2
-#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
-  INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
-    REAL(SHA##LEN##_Init)(context); \
-    if (context) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-  } \
-  INTERCEPTOR(void, SHA##LEN##_Update, void *context, \
-              const u8 *data, SIZE_T len) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \
-    if (data && len > 0) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    REAL(SHA##LEN##_Update)(context, data, len); \
-    if (context) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-  } \
-  INTERCEPTOR(void, SHA##LEN##_Final, u8 digest[LEN/8], \
-  void *context) { \
-    void *ctx; \
-    CHECK_EQ(SHA##LEN##_digest_length, LEN/8); \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Final, digest, context); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    REAL(SHA##LEN##_Final)(digest, context); \
-    if (digest) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, \
-                                     sizeof(digest[0]) * \
-  SHA##LEN##_digest_length); \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_End, void *context, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_End, context, buf); \
-    if (context) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-    char *ret = REAL(SHA##LEN##_End)(context, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_File, const char *filename, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \
-    if (filename) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
-    char *ret = REAL(SHA##LEN##_File)(filename, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_FileChunk, const char *filename, char *buf, \
-              OFF_T offset, OFF_T length) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \
-  length); \
-    if (filename) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
-    char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  } \
-  INTERCEPTOR(char *, SHA##LEN##_Data, u8 *data, SIZE_T len, char *buf) { \
-    void *ctx; \
-    COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Data, data, len, buf); \
-    if (data && len > 0) \
-      COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
-    char *ret = REAL(SHA##LEN##_Data)(data, len, buf); \
-    if (ret) \
-      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
-    return ret; \
-  }
-
-SHA2_INTERCEPTORS(224, u32)
-SHA2_INTERCEPTORS(256, u32)
-SHA2_INTERCEPTORS(384, u64)
-SHA2_INTERCEPTORS(512, u64)
-
-#define INIT_SHA2_INTECEPTORS(LEN) \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Update); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Final); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_End); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_File); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_FileChunk); \
-  COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Data)
-
-#define INIT_SHA2 \
-  INIT_SHA2_INTECEPTORS(224); \
-  INIT_SHA2_INTECEPTORS(256); \
-  INIT_SHA2_INTECEPTORS(384); \
-  INIT_SHA2_INTECEPTORS(512)
-#undef SHA2_INTERCEPTORS
-#else
-#define INIT_SHA2
-#endif
-
 #if SANITIZER_INTERCEPT_VIS
 INTERCEPTOR(char *, vis, char *dst, int c, int flag, int nextc) {
   void *ctx;
@@ -9980,7 +9930,7 @@ INTERCEPTOR(SSIZE_T, getrandom, void *buf, SIZE_T buflen, unsigned int flags) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getrandom, buf, buflen, flags);
   // If GRND_NONBLOCK is set in the flags, it is non blocking.
-  static const int grnd_nonblock = 1; 
+  static const int grnd_nonblock = 1;
   SSIZE_T n;
   if ((flags & grnd_nonblock))
     n = REAL(getrandom)(buf, buflen, flags);
@@ -10296,6 +10246,23 @@ INTERCEPTOR(SSIZE_T, pwritev2, int fd, __sanitizer_iovec *iov, int iovcnt,
 #define INIT_PWRITEV2
 #endif
 
+#if SANITIZER_INTERCEPT_FREADLINK
+INTERCEPTOR(SSIZE_T, freadlink, int fd, char *buf, SIZE_T bufsiz) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, freadlink, fd, buf, bufsiz);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  SSIZE_T res = REAL(freadlink)(fd, buf, bufsiz);
+  if (res > 0)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
+  if (res >= 0 && fd > 0)
+    COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+#  define INIT_FREADLINK COMMON_INTERCEPT_FUNCTION(freadlink)
+#else
+#  define INIT_FREADLINK
+#endif
+
 #include "sanitizer_common_interceptors_netbsd_compat.inc"
 
 namespace __sanitizer {
@@ -10373,8 +10340,10 @@ static void InitializeCommonInterceptors() {
   INIT_SETPWENT;
   INIT_CLOCK_GETTIME;
   INIT_CLOCK_GETCPUCLOCKID;
+  INIT_TIMER_CREATE;
   INIT_GETITIMER;
   INIT_TIME;
+  INIT_TIMESPEC_GET;
   INIT_GLOB;
   INIT_GLOB64;
   INIT___B64_TO;
@@ -10588,10 +10557,8 @@ static void InitializeCommonInterceptors() {
   INIT_SHA1;
   INIT_MD4;
   INIT_RMD160;
-  INIT_MD5;
   INIT_FSEEK;
   INIT_MD2;
-  INIT_SHA2;
   INIT_VIS;
   INIT_CDB;
   INIT_GETFSENT;
@@ -10617,6 +10584,7 @@ static void InitializeCommonInterceptors() {
   INIT_CPUSET_GETAFFINITY;
   INIT_PREADV2;
   INIT_PWRITEV2;
+  INIT_FREADLINK;
 
   INIT___PRINTF_CHK;
 }
lib/tsan/sanitizer_common/sanitizer_common_interceptors_memintrinsics.inc
@@ -82,7 +82,7 @@
 #endif
 
 #if SANITIZER_INTERCEPT_MEMSET
-INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
+INTERCEPTOR(void *, memset, void *dst, int v, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size);
 }
@@ -93,7 +93,7 @@ INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
 #endif
 
 #if SANITIZER_INTERCEPT_MEMMOVE
-INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
+INTERCEPTOR(void *, memmove, void *dst, const void *src, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
 }
@@ -104,7 +104,7 @@ INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
 #endif
 
 #if SANITIZER_INTERCEPT_MEMCPY
-INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
+INTERCEPTOR(void *, memcpy, void *dst, const void *src, usize size) {
   // On OS X, calling internal_memcpy here will cause memory corruptions,
   // because memcpy and memmove are actually aliases of the same
   // implementation.  We need to use internal_memmove here.
@@ -133,63 +133,63 @@ INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
 #endif
 
 #if SANITIZER_INTERCEPT_AEABI_MEM
-INTERCEPTOR(void *, __aeabi_memmove, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memmove, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memmove4, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memmove4, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memmove8, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memmove8, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memcpy, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memcpy, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memcpy4, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memcpy4, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memcpy8, void *to, const void *from, uptr size) {
+INTERCEPTOR(void *, __aeabi_memcpy8, void *to, const void *from, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
 }
 
 // Note the argument order.
-INTERCEPTOR(void *, __aeabi_memset, void *block, uptr size, int c) {
+INTERCEPTOR(void *, __aeabi_memset, void *block, usize size, int c) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memset4, void *block, uptr size, int c) {
+INTERCEPTOR(void *, __aeabi_memset4, void *block, usize size, int c) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memset8, void *block, uptr size, int c) {
+INTERCEPTOR(void *, __aeabi_memset8, void *block, usize size, int c) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memclr, void *block, uptr size) {
+INTERCEPTOR(void *, __aeabi_memclr, void *block, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memclr4, void *block, uptr size) {
+INTERCEPTOR(void *, __aeabi_memclr4, void *block, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
 
-INTERCEPTOR(void *, __aeabi_memclr8, void *block, uptr size) {
+INTERCEPTOR(void *, __aeabi_memclr8, void *block, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
@@ -212,7 +212,7 @@ INTERCEPTOR(void *, __aeabi_memclr8, void *block, uptr size) {
 #endif  // SANITIZER_INTERCEPT_AEABI_MEM
 
 #if SANITIZER_INTERCEPT___BZERO
-INTERCEPTOR(void *, __bzero, void *block, uptr size) {
+INTERCEPTOR(void *, __bzero, void *block, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
@@ -222,7 +222,7 @@ INTERCEPTOR(void *, __bzero, void *block, uptr size) {
 #endif  // SANITIZER_INTERCEPT___BZERO
 
 #if SANITIZER_INTERCEPT_BZERO
-INTERCEPTOR(void *, bzero, void *block, uptr size) {
+INTERCEPTOR(void *, bzero, void *block, usize size) {
   void *ctx;
   COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
 }
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_aarch64.inc.S
@@ -0,0 +1,48 @@
+#if defined(__aarch64__) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+#include "builtins/assembly.h"
+
+ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
+
+.comm _ZN14__interception10real_vforkE,8,8
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        // Save x30 in the off-stack spill area.
+        hint    #25 // paciasp
+        stp     xzr, x30, [sp, #-16]!
+        bl      COMMON_INTERCEPTOR_SPILL_AREA
+        ldp     xzr, x30, [sp], 16
+        str     x30, [x0]
+
+        // Call real vfork. This may return twice. User code that runs between the first and the second return
+        // may clobber the stack frame of the interceptor; that's why it does not have a frame.
+        adrp    x0, _ZN14__interception10real_vforkE
+        ldr     x0, [x0, :lo12:_ZN14__interception10real_vforkE]
+        blr     x0
+
+        stp     x0, xzr, [sp, #-16]!
+        cmp     x0, #0
+        b.eq   .L_exit
+
+        // x0 != 0 => parent process. Clear stack shadow.
+        add    x0, sp, #16
+        bl     COMMON_INTERCEPTOR_HANDLE_VFORK
+
+.L_exit:
+        // Restore x30.
+        bl     COMMON_INTERCEPTOR_SPILL_AREA
+        ldr    x30, [x0]
+        ldp    x0, xzr, [sp], 16
+        hint   #29 // autiasp
+
+        ret
+ASM_SIZE(vfork)
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+GNU_PROPERTY_BTI_PAC
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_arm.inc.S
@@ -0,0 +1,49 @@
+#if defined(__arm__) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
+
+.comm _ZN14__interception10real_vforkE,4,4
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        // Save LR in the off-stack spill area.
+        push    {r4, lr}
+        bl      COMMON_INTERCEPTOR_SPILL_AREA
+        pop     {r4, lr}
+        str     lr, [r0]
+
+        // Call real vfork. This may return twice. User code that runs between the first and the second return
+        // may clobber the stack frame of the interceptor; that's why it does not have a frame.
+        ldr     r0, .LCPI0_0
+.LPC0_0:
+        ldr     r0, [pc, r0]
+        mov     lr, pc
+        bx      r0
+
+        push    {r0, r4}
+        cmp     r0, #0
+        beq     .L_exit
+
+        // r0 != 0 => parent process. Clear stack shadow.
+        add     r0, sp, #8
+        bl      COMMON_INTERCEPTOR_HANDLE_VFORK
+
+.L_exit:
+        // Restore LR.
+        bl      COMMON_INTERCEPTOR_SPILL_AREA
+        ldr     lr, [r0]
+        pop     {r0, r4}
+
+        mov     pc, lr
+
+.LCPI0_0:
+        .long   _ZN14__interception10real_vforkE - (.LPC0_0+8)
+
+ASM_SIZE(vfork)
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_i386.inc.S
@@ -0,0 +1,64 @@
+#if defined(__i386__) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+.comm _ZN14__interception10real_vforkE,4,4
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        _CET_ENDBR
+        // Store return address in the spill area and tear down the stack frame.
+        sub     $12, %esp
+        call    COMMON_INTERCEPTOR_SPILL_AREA
+        mov     12(%esp), %ecx
+        mov     %ecx, (%eax)
+        add     $16, %esp
+
+        call    .L0$pb
+.L0$pb:
+        pop     %eax
+.Ltmp0:
+        add     $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax
+        call    *_ZN14__interception10real_vforkE@GOTOFF(%eax)
+
+        // Restore the stack frame.
+        // 12(%esp) return address
+        // 8(%esp) spill %ebx
+        // 4(%esp) spill REAL(vfork) return value
+        // (%esp) call frame (arg0) for __*_handle_vfork
+        sub     $16, %esp
+        mov     %ebx, 8(%esp)
+        mov     %eax, 4(%esp)
+
+        // Form GOT address in %ebx.
+        call    .L1$pb
+.L1$pb:
+        pop     %ebx
+.Ltmp1:
+        add     $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+
+        // Restore original return address.
+        call    COMMON_INTERCEPTOR_SPILL_AREA
+        mov     (%eax), %ecx
+        mov     %ecx, 12(%esp)
+        mov     4(%esp), %eax
+
+        // Call handle_vfork in the parent process (%rax != 0).
+        test    %eax, %eax
+        je      .L_exit
+
+        lea     16(%esp), %ecx
+        mov     %ecx, (%esp)
+        call    COMMON_INTERCEPTOR_HANDLE_VFORK@PLT
+
+.L_exit:
+        mov     4(%esp), %eax
+        mov     8(%esp), %ebx
+        add     $12, %esp
+        ret
+ASM_SIZE(vfork)
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S
@@ -0,0 +1,57 @@
+#if defined(__loongarch_lp64) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
+ASM_HIDDEN(_ZN14__interception10real_vforkE)
+
+.text
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        // Save ra in the off-stack spill area.
+        // allocate space on stack
+        addi.d    $sp, $sp, -16
+        // store $ra value
+        st.d      $ra, $sp, 8
+        bl        COMMON_INTERCEPTOR_SPILL_AREA
+        // restore previous values from stack
+        ld.d      $ra, $sp, 8
+        // adjust stack
+        addi.d    $sp, $sp, 16
+        // store $ra by $a0
+        st.d      $ra, $a0, 0
+
+        // Call real vfork. This may return twice. User code that runs between the first and the second return
+        // may clobber the stack frame of the interceptor; that's why it does not have a frame.
+        la.local  $a0, _ZN14__interception10real_vforkE
+        ld.d      $a0, $a0, 0
+        jirl      $ra, $a0, 0
+
+        // adjust stack
+        addi.d    $sp, $sp, -16
+        // store $a0 by adjusted stack
+        st.d      $a0, $sp, 8
+        // jump to exit label if $a0 is 0
+        beqz      $a0, .L_exit
+
+        // $a0 != 0 => parent process. Clear stack shadow.
+        // put old $sp to $a0
+        addi.d    $a0, $sp, 16
+        bl        %plt(COMMON_INTERCEPTOR_HANDLE_VFORK)
+
+.L_exit:
+        // Restore $ra
+        bl        COMMON_INTERCEPTOR_SPILL_AREA
+        ld.d      $ra, $a0, 0
+        // load value by stack
+        ld.d      $a0, $sp, 8
+        // adjust stack
+        addi.d    $sp, $sp, 16
+        jr        $ra
+ASM_SIZE(vfork)
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_riscv64.inc.S
@@ -0,0 +1,56 @@
+#if (defined(__riscv) && (__riscv_xlen == 64)) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA)
+
+.comm _ZN14__interception10real_vforkE,8,8
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        // Save ra in the off-stack spill area.
+        // allocate space on stack
+        addi    sp, sp, -16
+        // store ra value 
+        sd      ra, 8(sp)
+        call    COMMON_INTERCEPTOR_SPILL_AREA
+        // restore previous values from stack
+        ld      ra, 8(sp)
+        // adjust stack
+        addi    sp, sp, 16
+        // store ra by x10
+        sd      ra, 0(x10)
+
+        // Call real vfork. This may return twice. User code that runs between the first and the second return
+        // may clobber the stack frame of the interceptor; that's why it does not have a frame.
+        la x10, _ZN14__interception10real_vforkE
+        ld x10, 0(x10)
+        jalr x10
+
+        // adjust stack
+        addi    sp, sp, -16
+        // store x10 by adjusted stack
+        sd      x10, 8(sp)
+        // jump to exit label if x10 is 0
+        beqz    x10, .L_exit
+
+        // x0 != 0 => parent process. Clear stack shadow.
+        // put old sp to x10
+        addi   x10, sp, 16
+        call   COMMON_INTERCEPTOR_HANDLE_VFORK
+
+.L_exit:
+        // Restore ra
+        call   COMMON_INTERCEPTOR_SPILL_AREA
+        ld     ra, 0(x10)
+        // load value by stack
+        ld     x10, 8(sp)
+        // adjust stack
+        addi   sp, sp, 16
+        ret
+ASM_SIZE(vfork)
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interceptors_vfork_x86_64.inc.S
@@ -0,0 +1,42 @@
+#if defined(__x86_64__) && defined(__linux__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+.comm _ZN14__interception10real_vforkE,8,8
+.globl ASM_WRAPPER_NAME(vfork)
+ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork))
+ASM_WRAPPER_NAME(vfork):
+        _CET_ENDBR
+        // Store return address in the spill area and tear down the stack frame.
+        push    %rcx
+        call    COMMON_INTERCEPTOR_SPILL_AREA
+        pop     %rcx
+        pop     %rdi
+        mov     %rdi, (%rax)
+
+        call    *_ZN14__interception10real_vforkE(%rip)
+
+        // Restore return address from the spill area.
+        push    %rcx
+        push    %rax
+        call    COMMON_INTERCEPTOR_SPILL_AREA
+        mov     (%rax), %rdx
+        mov     %rdx, 8(%rsp)
+        mov     (%rsp), %rax
+
+        // Call handle_vfork in the parent process (%rax != 0).
+        test    %rax, %rax
+        je      .L_exit
+
+        lea     16(%rsp), %rdi
+        call    COMMON_INTERCEPTOR_HANDLE_VFORK@PLT
+
+.L_exit:
+        pop     %rax
+        ret
+ASM_SIZE(ASM_WRAPPER_NAME(vfork))
+
+ASM_INTERCEPTOR_TRAMPOLINE(vfork)
+ASM_TRAMPOLINE_ALIAS(vfork, vfork)
+
+#endif
lib/tsan/sanitizer_common/sanitizer_common_interface.inc
@@ -10,6 +10,7 @@
 INTERFACE_FUNCTION(__sanitizer_acquire_crash_state)
 INTERFACE_FUNCTION(__sanitizer_annotate_contiguous_container)
 INTERFACE_FUNCTION(__sanitizer_annotate_double_ended_contiguous_container)
+INTERFACE_FUNCTION(__sanitizer_copy_contiguous_container_annotations)
 INTERFACE_FUNCTION(__sanitizer_contiguous_container_find_bad_address)
 INTERFACE_FUNCTION(
     __sanitizer_double_ended_contiguous_container_find_bad_address)
@@ -22,6 +23,7 @@ INTERFACE_FUNCTION(__sanitizer_verify_double_ended_contiguous_container)
 INTERFACE_WEAK_FUNCTION(__sanitizer_on_print)
 INTERFACE_WEAK_FUNCTION(__sanitizer_report_error_summary)
 INTERFACE_WEAK_FUNCTION(__sanitizer_sandbox_on_notify)
+INTERFACE_WEAK_FUNCTION(__sanitizer_get_dtls_size)
 // Sanitizer weak hooks
 INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_memcmp)
 INTERFACE_WEAK_FUNCTION(__sanitizer_weak_hook_strcmp)
@@ -51,3 +53,9 @@ INTERFACE_WEAK_FUNCTION(__sanitizer_ignore_free_hook)
 INTERFACE_FUNCTION(__sanitizer_internal_memcpy)
 INTERFACE_FUNCTION(__sanitizer_internal_memmove)
 INTERFACE_FUNCTION(__sanitizer_internal_memset)
+
+#if SANITIZER_WINDOWS
+INTERFACE_FUNCTION(__sanitizer_override_function)
+INTERFACE_FUNCTION(__sanitizer_override_function_by_addr)
+INTERFACE_FUNCTION(__sanitizer_register_weak_function)
+#endif
lib/tsan/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -171,7 +171,7 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
         "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
         "Perhaps you're using ulimit -v or ulimit -d\n",
         size);
-    Abort();
+    Die();
   }
   if (madvise_shadow && common_flags()->use_madv_dontdump)
     DontDumpShadowMemory(beg, size);
@@ -219,6 +219,32 @@ static void StopStackDepotBackgroundThread() {
 static void StopStackDepotBackgroundThread() {}
 #endif
 
+void MemCpyAccessible(void *dest, const void *src, uptr n) {
+  if (TryMemCpy(dest, src, n))
+    return;
+
+  const uptr page_size = GetPageSize();
+  uptr b = reinterpret_cast<uptr>(src);
+  uptr b_up = RoundUpTo(b, page_size);
+
+  uptr e = reinterpret_cast<uptr>(src) + n;
+  uptr e_down = RoundDownTo(e, page_size);
+
+  auto copy_or_zero = [dest, src](uptr beg, uptr end) {
+    const uptr udest = reinterpret_cast<uptr>(dest);
+    const uptr usrc = reinterpret_cast<uptr>(src);
+    void *d = reinterpret_cast<void *>(udest + (beg - usrc));
+    const uptr size = end - beg;
+    if (!TryMemCpy(d, reinterpret_cast<void *>(beg), size))
+      internal_memset(d, 0, size);
+  };
+
+  copy_or_zero(b, b_up);
+  for (uptr p = b_up; p < e_down; p += page_size)
+    copy_or_zero(p, p + page_size);
+  copy_or_zero(e_down, e);
+}
+
 }  // namespace __sanitizer
 
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
lib/tsan/sanitizer_common/sanitizer_common_nolibc.cpp
@@ -20,13 +20,14 @@ namespace __sanitizer {
 // The Windows implementations of these functions use the win32 API directly,
 // bypassing libc.
 #if !SANITIZER_WINDOWS
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
 void LogMessageOnPrintf(const char *str) {}
-#endif
+void InitTlsSize() {}
+#  endif
 void WriteToSyslog(const char *buffer) {}
 void Abort() { internal__exit(1); }
 bool CreateDir(const char *pathname) { return false; }
-#endif // !SANITIZER_WINDOWS
+#endif  // !SANITIZER_WINDOWS
 
 #if !SANITIZER_WINDOWS && !SANITIZER_APPLE
 void ListOfModules::init() {}
lib/tsan/sanitizer_common/sanitizer_common_syscalls.inc
@@ -48,6 +48,7 @@
 #if SANITIZER_LINUX
 
 #  include "sanitizer_libc.h"
+#  include "sanitizer_platform_limits_posix.h"
 
 #  define PRE_SYSCALL(name) \
     SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_syscall_pre_impl_##name
@@ -2530,18 +2531,19 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
 #  if !SANITIZER_ANDROID &&                                                   \
       (defined(__i386) || defined(__x86_64) || defined(__mips64) ||           \
        defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
-       defined(__loongarch__) || SANITIZER_RISCV64)
-  if (data) {
+       defined(__loongarch__) || SANITIZER_RISCV64 || defined(__sparc__))
+  long data_arg = ptrace_data_arg(request, addr, data);
+  if (data_arg) {
     if (request == ptrace_setregs) {
-      PRE_READ((void *)data, struct_user_regs_struct_sz);
+      PRE_READ((void *)data_arg, struct_user_regs_struct_sz);
     } else if (request == ptrace_setfpregs) {
-      PRE_READ((void *)data, struct_user_fpregs_struct_sz);
+      PRE_READ((void *)data_arg, struct_user_fpregs_struct_sz);
     } else if (request == ptrace_setfpxregs) {
-      PRE_READ((void *)data, struct_user_fpxregs_struct_sz);
+      PRE_READ((void *)data_arg, struct_user_fpxregs_struct_sz);
     } else if (request == ptrace_setsiginfo) {
-      PRE_READ((void *)data, siginfo_t_sz);
+      PRE_READ((void *)data_arg, siginfo_t_sz);
     } else if (request == ptrace_setregset) {
-      __sanitizer_iovec *iov = (__sanitizer_iovec *)data;
+      __sanitizer_iovec *iov = (__sanitizer_iovec *)data_arg;
       PRE_READ(iov->iov_base, iov->iov_len);
     }
   }
@@ -2552,25 +2554,26 @@ POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
 #  if !SANITIZER_ANDROID &&                                                   \
       (defined(__i386) || defined(__x86_64) || defined(__mips64) ||           \
        defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
-       defined(__loongarch__) || SANITIZER_RISCV64)
-  if (res >= 0 && data) {
+       defined(__loongarch__) || SANITIZER_RISCV64 || defined(__sparc__))
+  long data_arg = ptrace_data_arg(request, addr, data);
+  if (res >= 0 && data_arg) {
     // Note that this is different from the interceptor in
     // sanitizer_common_interceptors.inc.
     // PEEK* requests return resulting values through data pointer.
     if (request == ptrace_getregs) {
-      POST_WRITE((void *)data, struct_user_regs_struct_sz);
+      POST_WRITE((void *)data_arg, struct_user_regs_struct_sz);
     } else if (request == ptrace_getfpregs) {
-      POST_WRITE((void *)data, struct_user_fpregs_struct_sz);
+      POST_WRITE((void *)data_arg, struct_user_fpregs_struct_sz);
     } else if (request == ptrace_getfpxregs) {
-      POST_WRITE((void *)data, struct_user_fpxregs_struct_sz);
+      POST_WRITE((void *)data_arg, struct_user_fpxregs_struct_sz);
     } else if (request == ptrace_getsiginfo) {
-      POST_WRITE((void *)data, siginfo_t_sz);
+      POST_WRITE((void *)data_arg, siginfo_t_sz);
     } else if (request == ptrace_getregset) {
-      __sanitizer_iovec *iov = (__sanitizer_iovec *)data;
+      __sanitizer_iovec *iov = (__sanitizer_iovec *)data_arg;
       POST_WRITE(iov->iov_base, iov->iov_len);
     } else if (request == ptrace_peekdata || request == ptrace_peektext ||
                request == ptrace_peekuser) {
-      POST_WRITE((void *)data, sizeof(void *));
+      POST_WRITE((void *)data_arg, sizeof(void *));
     }
   }
 #  endif
lib/tsan/sanitizer_common/sanitizer_coverage_win_dll_thunk.cpp
@@ -1,20 +0,0 @@
-//===-- sanitizer_coverage_win_dll_thunk.cpp ------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a family of thunks that should be statically linked into
-// the DLLs that have instrumentation in order to delegate the calls to the
-// shared runtime that lives in the main binary.
-// See https://github.com/google/sanitizers/issues/209 for the details.
-//===----------------------------------------------------------------------===//
-#ifdef SANITIZER_DLL_THUNK
-#include "sanitizer_win_dll_thunk.h"
-// Sanitizer Coverage interface functions.
-#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "sanitizer_coverage_interface.inc"
-#endif // SANITIZER_DLL_THUNK
lib/tsan/sanitizer_common/sanitizer_coverage_win_dynamic_runtime_thunk.cpp
@@ -1,26 +0,0 @@
-//===-- sanitizer_coverage_win_dynamic_runtime_thunk.cpp ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines things that need to be present in the application modules
-// to interact with Sanitizer Coverage, when it is included in a dll.
-//
-//===----------------------------------------------------------------------===//
-#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
-#define SANITIZER_IMPORT_INTERFACE 1
-#include "sanitizer_win_defs.h"
-// Define weak alias for all weak functions imported from sanitizer coverage.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
-#include "sanitizer_coverage_interface.inc"
-#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
-
-namespace __sanitizer {
-// Add one, otherwise unused, external symbol to this object file so that the
-// Visual C++ linker includes it and reads the .drective section.
-void ForceWholeArchiveIncludeForSanCov() {}
-}
lib/tsan/sanitizer_common/sanitizer_coverage_win_weak_interception.cpp
@@ -1,23 +0,0 @@
-//===-- sanitizer_coverage_win_weak_interception.cpp ----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This module should be included in Sanitizer Coverage when it implemented as a
-// shared library on Windows (dll), in order to delegate the calls of weak
-// functions to the implementation in the main executable when a strong
-// definition is provided.
-//===----------------------------------------------------------------------===//
-#ifdef SANITIZER_DYNAMIC
-#include "sanitizer_win_weak_interception.h"
-#include "sanitizer_interface_internal.h"
-#include "sancov_flags.h"
-// Check if strong definitions for weak functions are present in the main
-// executable. If that is the case, override dll functions to point to strong
-// implementations.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "sanitizer_coverage_interface.inc"
-#endif // SANITIZER_DYNAMIC
lib/tsan/sanitizer_common/sanitizer_deadlock_detector.h
@@ -120,7 +120,7 @@ class DeadlockDetectorTLS {
     u32 lock;
     u32 stk;
   };
-  LockWithContext all_locks_with_contexts_[64];
+  LockWithContext all_locks_with_contexts_[128];
   uptr n_all_locks_;
 };
 
lib/tsan/sanitizer_common/sanitizer_dense_map.h
@@ -69,24 +69,14 @@ class DenseMapBase {
     setNumTombstones(0);
   }
 
+  /// Return true if the specified key is in the map, false otherwise.
+  bool contains(const KeyT &Key) const { return doFind(Key) != nullptr; }
+
   /// Return 1 if the specified key is in the map, 0 otherwise.
-  size_type count(const KeyT &Key) const {
-    const BucketT *TheBucket;
-    return LookupBucketFor(Key, TheBucket) ? 1 : 0;
-  }
+  size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; }
 
-  value_type *find(const KeyT &Key) {
-    BucketT *TheBucket;
-    if (LookupBucketFor(Key, TheBucket))
-      return TheBucket;
-    return nullptr;
-  }
-  const value_type *find(const KeyT &Key) const {
-    const BucketT *TheBucket;
-    if (LookupBucketFor(Key, TheBucket))
-      return TheBucket;
-    return nullptr;
-  }
+  value_type *find(const KeyT &Key) { return doFind(Key); }
+  const value_type *find(const KeyT &Key) const { return doFind(Key); }
 
   /// Alternate version of find() which allows a different, and possibly
   /// less expensive, key type.
@@ -95,25 +85,18 @@ class DenseMapBase {
   /// type used.
   template <class LookupKeyT>
   value_type *find_as(const LookupKeyT &Key) {
-    BucketT *TheBucket;
-    if (LookupBucketFor(Key, TheBucket))
-      return TheBucket;
-    return nullptr;
+    return doFind(Key);
   }
   template <class LookupKeyT>
   const value_type *find_as(const LookupKeyT &Key) const {
-    const BucketT *TheBucket;
-    if (LookupBucketFor(Key, TheBucket))
-      return TheBucket;
-    return nullptr;
+    return doFind(Key);
   }
 
   /// lookup - Return the entry for the specified key, or a default
   /// constructed value if no such entry exists.
   ValueT lookup(const KeyT &Key) const {
-    const BucketT *TheBucket;
-    if (LookupBucketFor(Key, TheBucket))
-      return TheBucket->getSecond();
+    if (const BucketT *Bucket = doFind(Key))
+      return Bucket->getSecond();
     return ValueT();
   }
 
@@ -184,8 +167,8 @@ class DenseMapBase {
   }
 
   bool erase(const KeyT &Val) {
-    BucketT *TheBucket;
-    if (!LookupBucketFor(Val, TheBucket))
+    BucketT *TheBucket = doFind(Val);
+    if (!TheBucket)
       return false;  // not in map.
 
     TheBucket->getSecond().~ValueT();
@@ -449,6 +432,35 @@ class DenseMapBase {
     return TheBucket;
   }
 
+  template <typename LookupKeyT>
+  BucketT *doFind(const LookupKeyT &Val) {
+    BucketT *BucketsPtr = getBuckets();
+    const unsigned NumBuckets = getNumBuckets();
+    if (NumBuckets == 0)
+      return nullptr;
+
+    const KeyT EmptyKey = getEmptyKey();
+    unsigned BucketNo = getHashValue(Val) & (NumBuckets - 1);
+    unsigned ProbeAmt = 1;
+    while (true) {
+      BucketT *Bucket = BucketsPtr + BucketNo;
+      if (LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
+        return Bucket;
+      if (LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
+        return nullptr;
+
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+      BucketNo &= NumBuckets - 1;
+    }
+  }
+
+  template <typename LookupKeyT>
+  const BucketT *doFind(const LookupKeyT &Val) const {
+    return const_cast<DenseMapBase *>(this)->doFind(Val);
+  }
+
   /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in
   /// FoundBucket.  If the bucket contains the key and a value, this returns
   /// true, otherwise it returns a bucket with an empty marker or tombstone and
lib/tsan/sanitizer_common/sanitizer_errno.cpp
@@ -23,6 +23,7 @@ namespace __sanitizer {
 COMPILER_CHECK(errno_ENOMEM == ENOMEM);
 COMPILER_CHECK(errno_EBUSY == EBUSY);
 COMPILER_CHECK(errno_EINVAL == EINVAL);
+COMPILER_CHECK(errno_ERANGE == ERANGE);
 
 // EOWNERDEAD is not present in some older platforms.
 #if defined(EOWNERDEAD)
lib/tsan/sanitizer_common/sanitizer_errno_codes.h
@@ -24,6 +24,7 @@ namespace __sanitizer {
 #define errno_ENOMEM 12
 #define errno_EBUSY 16
 #define errno_EINVAL 22
+#define errno_ERANGE 34
 #define errno_ENAMETOOLONG 36
 #define errno_ENOSYS 38
 
lib/tsan/sanitizer_common/sanitizer_fuchsia.cpp
@@ -94,7 +94,6 @@ void DisableCoreDumperIfNecessary() {}
 void InstallDeadlySignalHandlers(SignalHandlerType handler) {}
 void SetAlternateSignalStack() {}
 void UnsetAlternateSignalStack() {}
-void InitTlsSize() {}
 
 bool SignalContext::IsStackOverflow() const { return false; }
 void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); }
@@ -445,6 +444,11 @@ bool IsAccessibleMemoryRange(uptr beg, uptr size) {
   return status == ZX_OK;
 }
 
+bool TryMemCpy(void *dest, const void *src, uptr n) {
+  // TODO: implement.
+  return false;
+}
+
 // FIXME implement on this platform.
 void GetMemoryProfile(fill_profile_f cb, uptr *stats) {}
 
@@ -519,7 +523,6 @@ uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) {
 uptr MainThreadStackBase, MainThreadStackSize;
 
 bool GetRandom(void *buffer, uptr length, bool blocking) {
-  CHECK_LE(length, ZX_CPRNG_DRAW_MAX_LEN);
   _zx_cprng_draw(buffer, length);
   return true;
 }
lib/tsan/sanitizer_common/sanitizer_getauxval.h
@@ -21,22 +21,21 @@
 
 #if SANITIZER_LINUX || SANITIZER_FUCHSIA
 
-# if (__GLIBC_PREREQ(2, 16) || (SANITIZER_ANDROID && __ANDROID_API__ >= 21) || \
-      SANITIZER_FUCHSIA) &&                                                    \
-     !SANITIZER_GO
-#  define SANITIZER_USE_GETAUXVAL 1
-# else
-#  define SANITIZER_USE_GETAUXVAL 0
-# endif
-
-# if SANITIZER_USE_GETAUXVAL
-#  include <sys/auxv.h>
-# else
+#  if (__GLIBC_PREREQ(2, 16) || SANITIZER_ANDROID || SANITIZER_FUCHSIA) && \
+      !SANITIZER_GO
+#    define SANITIZER_USE_GETAUXVAL 1
+#  else
+#    define SANITIZER_USE_GETAUXVAL 0
+#  endif
+
+#  if SANITIZER_USE_GETAUXVAL
+#    include <sys/auxv.h>
+#  else
 // The weak getauxval definition allows to check for the function at runtime.
 // This is useful for Android, when compiled at a lower API level yet running
 // on a more recent platform that offers the function.
 extern "C" SANITIZER_WEAK_ATTRIBUTE unsigned long getauxval(unsigned long type);
-# endif
+#  endif
 
 #elif SANITIZER_NETBSD
 
lib/tsan/sanitizer_common/sanitizer_interface_internal.h
@@ -49,6 +49,11 @@ __sanitizer_sandbox_on_notify(__sanitizer_sandbox_arguments *args);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
 __sanitizer_report_error_summary(const char *error_summary);
 
+// Returns size of dynamically allocated block. This function can be overridden
+// by the client.
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE __sanitizer::uptr
+__sanitizer_get_dtls_size(const void *tls_begin);
+
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_dump();
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_dump_coverage(
     const __sanitizer::uptr *pcs, const __sanitizer::uptr len);
@@ -71,6 +76,11 @@ void __sanitizer_annotate_double_ended_contiguous_container(
     const void *old_container_beg, const void *old_container_end,
     const void *new_container_beg, const void *new_container_end);
 SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_copy_contiguous_container_annotations(const void *src_begin,
+                                                       const void *src_end,
+                                                       const void *dst_begin,
+                                                       const void *dst_end);
+SANITIZER_INTERFACE_ATTRIBUTE
 int __sanitizer_verify_contiguous_container(const void *beg, const void *mid,
                                             const void *end);
 SANITIZER_INTERFACE_ATTRIBUTE
lib/tsan/sanitizer_common/sanitizer_internal_defs.h
@@ -138,19 +138,25 @@
 // in a portable way by the language itself.
 namespace __sanitizer {
 
-#if defined(_WIN64)
+#if defined(__UINTPTR_TYPE__)
+#  if defined(__arm__) && defined(__linux__)
+// Linux Arm headers redefine __UINTPTR_TYPE__ and disagree with clang/gcc.
+typedef unsigned int uptr;
+typedef int sptr;
+#  else
+typedef __UINTPTR_TYPE__ uptr;
+typedef __INTPTR_TYPE__ sptr;
+#  endif
+#elif defined(_WIN64)
 // 64-bit Windows uses LLP64 data model.
 typedef unsigned long long uptr;
 typedef signed long long sptr;
-#else
-#  if (SANITIZER_WORDSIZE == 64) || SANITIZER_APPLE || SANITIZER_WINDOWS
-typedef unsigned long uptr;
-typedef signed long sptr;
-#  else
+#elif defined(_WIN32)
 typedef unsigned int uptr;
 typedef signed int sptr;
-#  endif
-#endif  // defined(_WIN64)
+#else
+#  error Unsupported compiler, missing __UINTPTR_TYPE__
+#endif  // defined(__UINTPTR_TYPE__)
 #if defined(__x86_64__)
 // Since x32 uses ILP32 data model in 64-bit hardware mode, we must use
 // 64-bit pointer to unwind stack frame.
@@ -194,16 +200,13 @@ typedef u64  OFF64_T;
 #ifdef __SIZE_TYPE__
 typedef __SIZE_TYPE__ usize;
 #else
-// Since we use this for operator new, usize must match the real size_t, but on
-// 32-bit Windows the definition of uptr does not actually match uintptr_t or
-// size_t because we are working around typedef mismatches for the (S)SIZE_T
-// types used in interception.h.
-// Until the definition of uptr has been fixed we have to special case Win32.
-#  if SANITIZER_WINDOWS && SANITIZER_WORDSIZE == 32
-typedef unsigned int usize;
-#  else
 typedef uptr usize;
-#  endif
+#endif
+
+#if defined(__s390__) && !defined(__s390x__)
+typedef long ssize;
+#else
+typedef sptr ssize;
 #endif
 
 typedef u64 tid_t;
@@ -466,6 +469,9 @@ using namespace __sanitizer;
 namespace __msan {
 using namespace __sanitizer;
 }
+namespace __nsan {
+using namespace __sanitizer;
+}
 namespace __hwasan {
 using namespace __sanitizer;
 }
lib/tsan/sanitizer_common/sanitizer_libignore.cpp
@@ -32,7 +32,7 @@ void LibIgnore::AddIgnoredLibrary(const char *name_templ) {
   lib->templ = internal_strdup(name_templ);
   lib->name = nullptr;
   lib->real_name = nullptr;
-  lib->loaded = false;
+  lib->range_id = kInvalidCodeRangeId;
 }
 
 void LibIgnore::OnLibraryLoaded(const char *name) {
@@ -43,7 +43,7 @@ void LibIgnore::OnLibraryLoaded(const char *name) {
       buf[0]) {
     for (uptr i = 0; i < count_; i++) {
       Lib *lib = &libs_[i];
-      if (!lib->loaded && (!lib->real_name) &&
+      if (!lib->loaded() && (!lib->real_name) &&
           TemplateMatch(lib->templ, name))
         lib->real_name = internal_strdup(buf.data());
     }
@@ -70,28 +70,31 @@ void LibIgnore::OnLibraryLoaded(const char *name) {
           Die();
         }
         loaded = true;
-        if (lib->loaded)
+        if (lib->loaded())
           continue;
         VReport(1,
                 "Matched called_from_lib suppression '%s' against library"
                 " '%s'\n",
                 lib->templ, mod.full_name());
-        lib->loaded = true;
         lib->name = internal_strdup(mod.full_name());
         const uptr idx =
             atomic_load(&ignored_ranges_count_, memory_order_relaxed);
         CHECK_LT(idx, ARRAY_SIZE(ignored_code_ranges_));
-        ignored_code_ranges_[idx].begin = range.beg;
-        ignored_code_ranges_[idx].end = range.end;
+        ignored_code_ranges_[idx].OnLoad(range.beg, range.end);
+        // Record the index of the ignored range.
+        lib->range_id = idx;
         atomic_store(&ignored_ranges_count_, idx + 1, memory_order_release);
         break;
       }
     }
-    if (lib->loaded && !loaded) {
-      Report("%s: library '%s' that was matched against called_from_lib"
-             " suppression '%s' is unloaded\n",
-             SanitizerToolName, lib->name, lib->templ);
-      Die();
+    if (lib->loaded() && !loaded) {
+      VReport(1,
+              "%s: library '%s' that was matched against called_from_lib"
+              " suppression '%s' is unloaded\n",
+              SanitizerToolName, lib->name, lib->templ);
+      // The library is unloaded so mark the ignored code range as unloaded.
+      ignored_code_ranges_[lib->range_id].OnUnload();
+      lib->range_id = kInvalidCodeRangeId;
     }
   }
 
@@ -110,8 +113,7 @@ void LibIgnore::OnLibraryLoaded(const char *name) {
         const uptr idx =
             atomic_load(&instrumented_ranges_count_, memory_order_relaxed);
         CHECK_LT(idx, ARRAY_SIZE(instrumented_code_ranges_));
-        instrumented_code_ranges_[idx].begin = range.beg;
-        instrumented_code_ranges_[idx].end = range.end;
+        instrumented_code_ranges_[idx].OnLoad(range.beg, range.end);
         atomic_store(&instrumented_ranges_count_, idx + 1,
                      memory_order_release);
       }
lib/tsan/sanitizer_common/sanitizer_libignore.h
@@ -49,25 +49,36 @@ class LibIgnore {
   bool IsPcInstrumented(uptr pc) const;
 
  private:
+  static const uptr kMaxIgnoredRanges = 128;
+  static const uptr kMaxInstrumentedRanges = 1024;
+  static const uptr kMaxLibs = 1024;
+  static const uptr kInvalidCodeRangeId = -1;
+
   struct Lib {
     char *templ;
     char *name;
     char *real_name;  // target of symlink
-    bool loaded;
+    uptr range_id;
+    bool loaded() const { return range_id != kInvalidCodeRangeId; };
   };
 
   struct LibCodeRange {
-    uptr begin;
-    uptr end;
-  };
+    bool IsInRange(uptr pc) const {
+      return (pc >= begin && pc < atomic_load(&end, memory_order_acquire));
+    }
 
-  inline bool IsInRange(uptr pc, const LibCodeRange &range) const {
-    return (pc >= range.begin && pc < range.end);
-  }
+    void OnLoad(uptr b, uptr e) {
+      begin = b;
+      atomic_store(&end, e, memory_order_release);
+    }
 
-  static const uptr kMaxIgnoredRanges = 128;
-  static const uptr kMaxInstrumentedRanges = 1024;
-  static const uptr kMaxLibs = 1024;
+    void OnUnload() { atomic_store(&end, 0, memory_order_release); }
+
+   private:
+    uptr begin;
+    // A value of 0 means the associated module was unloaded.
+    atomic_uintptr_t end;
+  };
 
   // Hot part:
   atomic_uintptr_t ignored_ranges_count_;
@@ -90,7 +101,7 @@ class LibIgnore {
 inline bool LibIgnore::IsIgnored(uptr pc, bool *pc_in_ignored_lib) const {
   const uptr n = atomic_load(&ignored_ranges_count_, memory_order_acquire);
   for (uptr i = 0; i < n; i++) {
-    if (IsInRange(pc, ignored_code_ranges_[i])) {
+    if (ignored_code_ranges_[i].IsInRange(pc)) {
       *pc_in_ignored_lib = true;
       return true;
     }
@@ -104,7 +115,7 @@ inline bool LibIgnore::IsIgnored(uptr pc, bool *pc_in_ignored_lib) const {
 inline bool LibIgnore::IsPcInstrumented(uptr pc) const {
   const uptr n = atomic_load(&instrumented_ranges_count_, memory_order_acquire);
   for (uptr i = 0; i < n; i++) {
-    if (IsInRange(pc, instrumented_code_ranges_[i]))
+    if (instrumented_code_ranges_[i].IsInRange(pc))
       return true;
   }
   return false;
lib/tsan/sanitizer_common/sanitizer_linux.cpp
@@ -107,7 +107,9 @@ extern struct ps_strings *__ps_strings;
 #  endif  // SANITIZER_NETBSD
 
 #  if SANITIZER_SOLARIS
+#    include <stddef.h>
 #    include <stdlib.h>
+#    include <sys/frame.h>
 #    include <thread.h>
 #    define environ _environ
 #  endif
@@ -132,9 +134,10 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
 // Are we using 32-bit or 64-bit Linux syscalls?
 // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32
 // but it still needs to use 64-bit syscalls.
-#  if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \
-                          SANITIZER_WORDSIZE == 64 ||                      \
-                          (defined(__mips__) && _MIPS_SIM == _ABIN32))
+#  if SANITIZER_LINUX &&                                \
+      (defined(__x86_64__) || defined(__powerpc64__) || \
+       SANITIZER_WORDSIZE == 64 ||                      \
+       (defined(__mips__) && defined(_ABIN32) && _MIPS_SIM == _ABIN32))
 #    define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1
 #  else
 #    define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
@@ -152,6 +155,8 @@ const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
 
 #  if SANITIZER_FREEBSD
 #    define SANITIZER_USE_GETENTROPY 1
+extern "C" void *__sys_mmap(void *addr, size_t len, int prot, int flags, int fd,
+                            off_t offset);
 #  endif
 
 namespace __sanitizer {
@@ -160,33 +165,56 @@ void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) {
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset));
 }
 
+#  if SANITIZER_LINUX
+// Deletes the specified signal from newset, if it is not present in oldset
+// Equivalently: newset[signum] = newset[signum] & oldset[signum]
+static void KeepUnblocked(__sanitizer_sigset_t &newset,
+                          __sanitizer_sigset_t &oldset, int signum) {
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  if (SANITIZER_ANDROID || !internal_sigismember(&oldset, signum))
+    internal_sigdelset(&newset, signum);
+}
+#  endif
+
 // Block asynchronous signals
 void BlockSignals(__sanitizer_sigset_t *oldset) {
-  __sanitizer_sigset_t set;
-  internal_sigfillset(&set);
-#  if SANITIZER_LINUX && !SANITIZER_ANDROID
+  __sanitizer_sigset_t newset;
+  internal_sigfillset(&newset);
+
+#  if SANITIZER_LINUX
+  __sanitizer_sigset_t currentset;
+
+#    if !SANITIZER_ANDROID
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  SetSigProcMask(NULL, &currentset);
+
   // Glibc uses SIGSETXID signal during setuid call. If this signal is blocked
   // on any thread, setuid call hangs.
   // See test/sanitizer_common/TestCases/Linux/setuid.c.
-  internal_sigdelset(&set, 33);
-#  endif
-#  if SANITIZER_LINUX
+  KeepUnblocked(newset, currentset, 33);
+#    endif  // !SANITIZER_ANDROID
+
   // Seccomp-BPF-sandboxed processes rely on SIGSYS to handle trapped syscalls.
   // If this signal is blocked, such calls cannot be handled and the process may
   // hang.
-  internal_sigdelset(&set, 31);
+  KeepUnblocked(newset, currentset, 31);
 
+#    if !SANITIZER_ANDROID
   // Don't block synchronous signals
-  internal_sigdelset(&set, SIGSEGV);
-  internal_sigdelset(&set, SIGBUS);
-  internal_sigdelset(&set, SIGILL);
-  internal_sigdelset(&set, SIGTRAP);
-  internal_sigdelset(&set, SIGABRT);
-  internal_sigdelset(&set, SIGFPE);
-  internal_sigdelset(&set, SIGPIPE);
-#  endif
+  // but also don't unblock signals that the user had deliberately blocked.
+  // FIXME: https://github.com/google/sanitizers/issues/1816
+  KeepUnblocked(newset, currentset, SIGSEGV);
+  KeepUnblocked(newset, currentset, SIGBUS);
+  KeepUnblocked(newset, currentset, SIGILL);
+  KeepUnblocked(newset, currentset, SIGTRAP);
+  KeepUnblocked(newset, currentset, SIGABRT);
+  KeepUnblocked(newset, currentset, SIGFPE);
+  KeepUnblocked(newset, currentset, SIGPIPE);
+#    endif  //! SANITIZER_ANDROID
+
+#  endif  // SANITIZER_LINUX
 
-  SetSigProcMask(&set, oldset);
+  SetSigProcMask(&newset, oldset);
 }
 
 ScopedBlockSignals::ScopedBlockSignals(__sanitizer_sigset_t *copy) {
@@ -218,7 +246,9 @@ ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); }
 #    if !SANITIZER_S390
 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
                    u64 offset) {
-#      if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+#      if SANITIZER_FREEBSD
+  return (uptr)__sys_mmap(addr, length, prot, flags, fd, offset);
+#      elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
   return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd,
                           offset);
 #      else
@@ -250,6 +280,11 @@ int internal_madvise(uptr addr, uptr length, int advice) {
   return internal_syscall(SYSCALL(madvise), addr, length, advice);
 }
 
+#    if SANITIZER_FREEBSD
+uptr internal_close_range(fd_t lowfd, fd_t highfd, int flags) {
+  return internal_syscall(SYSCALL(close_range), lowfd, highfd, flags);
+}
+#    endif
 uptr internal_close(fd_t fd) { return internal_syscall(SYSCALL(close), fd); }
 
 uptr internal_open(const char *filename, int flags) {
@@ -395,8 +430,9 @@ uptr internal_stat(const char *path, void *buf) {
                              AT_NO_AUTOMOUNT, STATX_BASIC_STATS, (uptr)&bufx);
   statx_to_stat(&bufx, (struct stat *)buf);
   return res;
-#      elif (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 ||    \
-             (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
+#      elif (                                                                 \
+          SANITIZER_WORDSIZE == 64 || SANITIZER_X32 ||                        \
+          (defined(__mips__) && defined(_ABIN32) && _MIPS_SIM == _ABIN32)) && \
           !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           0);
@@ -433,8 +469,9 @@ uptr internal_lstat(const char *path, void *buf) {
                              STATX_BASIC_STATS, (uptr)&bufx);
   statx_to_stat(&bufx, (struct stat *)buf);
   return res;
-#      elif (defined(_LP64) || SANITIZER_X32 ||              \
-             (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
+#      elif (                                                                 \
+          defined(_LP64) || SANITIZER_X32 ||                                  \
+          (defined(__mips__) && defined(_ABIN32) && _MIPS_SIM == _ABIN32)) && \
           !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
@@ -721,6 +758,11 @@ static void GetArgsAndEnv(char ***argv, char ***envp) {
 #    if !SANITIZER_GO
   if (&__libc_stack_end) {
     uptr *stack_end = (uptr *)__libc_stack_end;
+    // Linux/sparc64 needs an adjustment, cf. glibc
+    // sysdeps/sparc/sparc{32,64}/dl-machine.h (DL_STACK_END).
+#      if SANITIZER_LINUX && defined(__sparc__)
+    stack_end = &stack_end[16];
+#      endif
     // Normally argc can be obtained from *stack_end, however, on ARM glibc's
     // _start clobbers it:
     // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/arm/start.S;hb=refs/heads/release/2.31/master#l75
@@ -1014,34 +1056,29 @@ bool internal_sigismember(__sanitizer_sigset_t *set, int signum) {
 
 #  if !SANITIZER_NETBSD
 // ThreadLister implementation.
-ThreadLister::ThreadLister(pid_t pid) : pid_(pid), buffer_(4096) {
-  char task_directory_path[80];
-  internal_snprintf(task_directory_path, sizeof(task_directory_path),
-                    "/proc/%d/task/", pid);
-  descriptor_ = internal_open(task_directory_path, O_RDONLY | O_DIRECTORY);
-  if (internal_iserror(descriptor_)) {
-    Report("Can't open /proc/%d/task for reading.\n", pid);
-  }
+ThreadLister::ThreadLister(pid_t pid) : buffer_(4096) {
+  task_path_.AppendF("/proc/%d/task", pid);
 }
 
 ThreadLister::Result ThreadLister::ListThreads(
     InternalMmapVector<tid_t> *threads) {
-  if (internal_iserror(descriptor_))
+  int descriptor = internal_open(task_path_.data(), O_RDONLY | O_DIRECTORY);
+  if (internal_iserror(descriptor)) {
+    Report("Can't open %s for reading.\n", task_path_.data());
     return Error;
-  internal_lseek(descriptor_, 0, SEEK_SET);
+  }
+  auto cleanup = at_scope_exit([&] { internal_close(descriptor); });
   threads->clear();
 
   Result result = Ok;
   for (bool first_read = true;; first_read = false) {
-    // Resize to max capacity if it was downsized by IsAlive.
-    buffer_.resize(buffer_.capacity());
     CHECK_GE(buffer_.size(), 4096);
     uptr read = internal_getdents(
-        descriptor_, (struct linux_dirent *)buffer_.data(), buffer_.size());
+        descriptor, (struct linux_dirent *)buffer_.data(), buffer_.size());
     if (!read)
       return result;
     if (internal_iserror(read)) {
-      Report("Can't read directory entries from /proc/%d/task.\n", pid_);
+      Report("Can't read directory entries from %s.\n", task_path_.data());
       return Error;
     }
 
@@ -1079,26 +1116,33 @@ ThreadLister::Result ThreadLister::ListThreads(
   }
 }
 
-bool ThreadLister::IsAlive(int tid) {
+const char *ThreadLister::LoadStatus(tid_t tid) {
+  status_path_.clear();
+  status_path_.AppendF("%s/%llu/status", task_path_.data(), tid);
+  auto cleanup = at_scope_exit([&] {
+    // Resize back to capacity if it is downsized by `ReadFileToVector`.
+    buffer_.resize(buffer_.capacity());
+  });
+  if (!ReadFileToVector(status_path_.data(), &buffer_) || buffer_.empty())
+    return nullptr;
+  buffer_.push_back('\0');
+  return buffer_.data();
+}
+
+bool ThreadLister::IsAlive(tid_t tid) {
   // /proc/%d/task/%d/status uses same call to detect alive threads as
   // proc_task_readdir. See task_state implementation in Linux.
-  char path[80];
-  internal_snprintf(path, sizeof(path), "/proc/%d/task/%d/status", pid_, tid);
-  if (!ReadFileToVector(path, &buffer_) || buffer_.empty())
-    return false;
-  buffer_.push_back(0);
   static const char kPrefix[] = "\nPPid:";
-  const char *field = internal_strstr(buffer_.data(), kPrefix);
+  const char *status = LoadStatus(tid);
+  if (!status)
+    return false;
+  const char *field = internal_strstr(status, kPrefix);
   if (!field)
     return false;
   field += internal_strlen(kPrefix);
   return (int)internal_atoll(field) != 0;
 }
 
-ThreadLister::~ThreadLister() {
-  if (!internal_iserror(descriptor_))
-    internal_close(descriptor_);
-}
 #  endif
 
 #  if SANITIZER_WORDSIZE == 32
@@ -1808,11 +1852,6 @@ int internal_uname(struct utsname *buf) {
 #  endif
 
 #  if SANITIZER_ANDROID
-#    if __ANDROID_API__ < 21
-extern "C" __attribute__((weak)) int dl_iterate_phdr(
-    int (*)(struct dl_phdr_info *, size_t, void *), void *);
-#    endif
-
 static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
                                    void *data) {
   // Any name starting with "lib" indicates a bug in L where library base names
@@ -1828,9 +1867,7 @@ static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
 static atomic_uint32_t android_api_level;
 
 static AndroidApiLevel AndroidDetectApiLevelStatic() {
-#    if __ANDROID_API__ <= 19
-  return ANDROID_KITKAT;
-#    elif __ANDROID_API__ <= 22
+#    if __ANDROID_API__ <= 22
   return ANDROID_LOLLIPOP_MR1;
 #    else
   return ANDROID_POST_LOLLIPOP;
@@ -1838,8 +1875,6 @@ static AndroidApiLevel AndroidDetectApiLevelStatic() {
 }
 
 static AndroidApiLevel AndroidDetectApiLevel() {
-  if (!&dl_iterate_phdr)
-    return ANDROID_KITKAT;  // K or lower
   bool base_name_seen = false;
   dl_iterate_phdr(dl_iterate_phdr_test_cb, &base_name_seen);
   if (base_name_seen)
@@ -2014,6 +2049,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#    ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#      define SC_ADDRERR_RD (1 << 30)
+#    endif
+#    ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#      define SC_ADDRERR_WR (1 << 31)
+#    endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
     return SignalContext::Read;
@@ -2154,8 +2201,26 @@ bool SignalContext::IsTrueFaultingAddress() const {
 UNUSED
 static const char *RegNumToRegName(int reg) {
   switch (reg) {
-#  if SANITIZER_LINUX
+#  if SANITIZER_LINUX && SANITIZER_GLIBC || SANITIZER_NETBSD
 #    if defined(__x86_64__)
+#      if SANITIZER_NETBSD
+#        define REG_RAX _REG_RAX
+#        define REG_RBX _REG_RBX
+#        define REG_RCX _REG_RCX
+#        define REG_RDX _REG_RDX
+#        define REG_RDI _REG_RDI
+#        define REG_RSI _REG_RSI
+#        define REG_RBP _REG_RBP
+#        define REG_RSP _REG_RSP
+#        define REG_R8 _REG_R8
+#        define REG_R9 _REG_R9
+#        define REG_R10 _REG_R10
+#        define REG_R11 _REG_R11
+#        define REG_R12 _REG_R12
+#        define REG_R13 _REG_R13
+#        define REG_R14 _REG_R14
+#        define REG_R15 _REG_R15
+#      endif
     case REG_RAX:
       return "rax";
     case REG_RBX:
@@ -2189,6 +2254,16 @@ static const char *RegNumToRegName(int reg) {
     case REG_R15:
       return "r15";
 #    elif defined(__i386__)
+#      if SANITIZER_NETBSD
+#        define REG_EAX _REG_EAX
+#        define REG_EBX _REG_EBX
+#        define REG_ECX _REG_ECX
+#        define REG_EDX _REG_EDX
+#        define REG_EDI _REG_EDI
+#        define REG_ESI _REG_ESI
+#        define REG_EBP _REG_EBP
+#        define REG_ESP _REG_ESP
+#      endif
     case REG_EAX:
       return "eax";
     case REG_EBX:
@@ -2205,32 +2280,170 @@ static const char *RegNumToRegName(int reg) {
       return "ebp";
     case REG_ESP:
       return "esp";
+#    elif defined(__arm__)
+#      ifdef MAKE_CASE
+#        undef MAKE_CASE
+#      endif
+#      define REG_STR(reg) #reg
+#      define MAKE_CASE(N) \
+        case REG_R##N:     \
+          return REG_STR(r##N)
+    MAKE_CASE(0);
+    MAKE_CASE(1);
+    MAKE_CASE(2);
+    MAKE_CASE(3);
+    MAKE_CASE(4);
+    MAKE_CASE(5);
+    MAKE_CASE(6);
+    MAKE_CASE(7);
+    MAKE_CASE(8);
+    MAKE_CASE(9);
+    MAKE_CASE(10);
+    MAKE_CASE(11);
+    MAKE_CASE(12);
+    case REG_R13:
+      return "sp";
+    case REG_R14:
+      return "lr";
+    case REG_R15:
+      return "pc";
+#    elif defined(__aarch64__)
+#      define REG_STR(reg) #reg
+#      define MAKE_CASE(N) \
+        case N:            \
+          return REG_STR(x##N)
+    MAKE_CASE(0);
+    MAKE_CASE(1);
+    MAKE_CASE(2);
+    MAKE_CASE(3);
+    MAKE_CASE(4);
+    MAKE_CASE(5);
+    MAKE_CASE(6);
+    MAKE_CASE(7);
+    MAKE_CASE(8);
+    MAKE_CASE(9);
+    MAKE_CASE(10);
+    MAKE_CASE(11);
+    MAKE_CASE(12);
+    MAKE_CASE(13);
+    MAKE_CASE(14);
+    MAKE_CASE(15);
+    MAKE_CASE(16);
+    MAKE_CASE(17);
+    MAKE_CASE(18);
+    MAKE_CASE(19);
+    MAKE_CASE(20);
+    MAKE_CASE(21);
+    MAKE_CASE(22);
+    MAKE_CASE(23);
+    MAKE_CASE(24);
+    MAKE_CASE(25);
+    MAKE_CASE(26);
+    MAKE_CASE(27);
+    MAKE_CASE(28);
+    case 29:
+      return "fp";
+    case 30:
+      return "lr";
+    case 31:
+      return "sp";
 #    endif
-#  endif
+#  endif  // SANITIZER_LINUX && SANITIZER_GLIBC
     default:
       return NULL;
   }
   return NULL;
 }
 
-#  if SANITIZER_LINUX
+#  if ((SANITIZER_LINUX && SANITIZER_GLIBC) || SANITIZER_NETBSD) && \
+      (defined(__arm__) || defined(__aarch64__))
+static uptr GetArmRegister(ucontext_t *ctx, int RegNum) {
+  switch (RegNum) {
+#    if defined(__arm__) && !SANITIZER_NETBSD
+#      ifdef MAKE_CASE
+#        undef MAKE_CASE
+#      endif
+#      define MAKE_CASE(N) \
+        case REG_R##N:     \
+          return ctx->uc_mcontext.arm_r##N
+    MAKE_CASE(0);
+    MAKE_CASE(1);
+    MAKE_CASE(2);
+    MAKE_CASE(3);
+    MAKE_CASE(4);
+    MAKE_CASE(5);
+    MAKE_CASE(6);
+    MAKE_CASE(7);
+    MAKE_CASE(8);
+    MAKE_CASE(9);
+    MAKE_CASE(10);
+    case REG_R11:
+      return ctx->uc_mcontext.arm_fp;
+    case REG_R12:
+      return ctx->uc_mcontext.arm_ip;
+    case REG_R13:
+      return ctx->uc_mcontext.arm_sp;
+    case REG_R14:
+      return ctx->uc_mcontext.arm_lr;
+    case REG_R15:
+      return ctx->uc_mcontext.arm_pc;
+#    elif defined(__aarch64__)
+#      if SANITIZER_LINUX
+    case 0 ... 30:
+      return ctx->uc_mcontext.regs[RegNum];
+    case 31:
+      return ctx->uc_mcontext.sp;
+#      elif SANITIZER_NETBSD
+    case 0 ... 31:
+      return ctx->uc_mcontext.__gregs[RegNum];
+#      endif
+#    endif
+    default:
+      return 0;
+  }
+  return 0;
+}
+#  endif  // SANITIZER_LINUX && SANITIZER_GLIBC && (defined(__arm__) ||
+          // defined(__aarch64__))
+
 UNUSED
 static void DumpSingleReg(ucontext_t *ctx, int RegNum) {
   const char *RegName = RegNumToRegName(RegNum);
+#  if SANITIZER_LINUX && SANITIZER_GLIBC || SANITIZER_NETBSD
 #    if defined(__x86_64__)
   Printf("%s%s = 0x%016llx  ", internal_strlen(RegName) == 2 ? " " : "",
-         RegName, ctx->uc_mcontext.gregs[RegNum]);
+         RegName,
+#      if SANITIZER_LINUX
+         ctx->uc_mcontext.gregs[RegNum]
+#      elif SANITIZER_NETBSD
+         ctx->uc_mcontext.__gregs[RegNum]
+#      endif
+  );
 #    elif defined(__i386__)
-  Printf("%s = 0x%08x  ", RegName, ctx->uc_mcontext.gregs[RegNum]);
+  Printf("%s = 0x%08x  ", RegName,
+#      if SANITIZER_LINUX
+         ctx->uc_mcontext.gregs[RegNum]
+#      elif SANITIZER_NETBSD
+         ctx->uc_mcontext.__gregs[RegNum]
+#      endif
+  );
+#    elif defined(__arm__)
+  Printf("%s%s = 0x%08zx  ", internal_strlen(RegName) == 2 ? " " : "", RegName,
+         GetArmRegister(ctx, RegNum));
+#    elif defined(__aarch64__)
+  Printf("%s%s = 0x%016zx  ", internal_strlen(RegName) == 2 ? " " : "", RegName,
+         GetArmRegister(ctx, RegNum));
 #    else
   (void)RegName;
 #    endif
-}
+#  else
+  (void)RegName;
 #  endif
+}
 
 void SignalContext::DumpAllRegisters(void *context) {
   ucontext_t *ucontext = (ucontext_t *)context;
-#  if SANITIZER_LINUX
+#  if SANITIZER_LINUX && SANITIZER_GLIBC || SANITIZER_NETBSD
 #    if defined(__x86_64__)
   Report("Register values:\n");
   DumpSingleReg(ucontext, REG_RAX);
@@ -2269,6 +2482,35 @@ void SignalContext::DumpAllRegisters(void *context) {
   DumpSingleReg(ucontext, REG_EBP);
   DumpSingleReg(ucontext, REG_ESP);
   Printf("\n");
+#    elif defined(__arm__) && !SANITIZER_NETBSD
+  Report("Register values:\n");
+  DumpSingleReg(ucontext, REG_R0);
+  DumpSingleReg(ucontext, REG_R1);
+  DumpSingleReg(ucontext, REG_R2);
+  DumpSingleReg(ucontext, REG_R3);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_R4);
+  DumpSingleReg(ucontext, REG_R5);
+  DumpSingleReg(ucontext, REG_R6);
+  DumpSingleReg(ucontext, REG_R7);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_R8);
+  DumpSingleReg(ucontext, REG_R9);
+  DumpSingleReg(ucontext, REG_R10);
+  DumpSingleReg(ucontext, REG_R11);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_R12);
+  DumpSingleReg(ucontext, REG_R13);
+  DumpSingleReg(ucontext, REG_R14);
+  DumpSingleReg(ucontext, REG_R15);
+  Printf("\n");
+#    elif defined(__aarch64__)
+  Report("Register values:\n");
+  for (int i = 0; i <= 31; ++i) {
+    DumpSingleReg(ucontext, i);
+    if (i % 4 == 3)
+      Printf("\n");
+  }
 #    else
   (void)ucontext;
 #    endif
@@ -2310,6 +2552,8 @@ void SignalContext::DumpAllRegisters(void *context) {
 #    else
   (void)ucontext;
 #    endif
+#  else
+  (void)ucontext;
 #  endif
   // FIXME: Implement this for other OSes and architectures.
 }
@@ -2404,7 +2648,19 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 #    if SANITIZER_SOLARIS
   ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.gregs[REG_PC];
-  *sp = ucontext->uc_mcontext.gregs[REG_O6] + STACK_BIAS;
+  *sp = ucontext->uc_mcontext.gregs[REG_SP] + STACK_BIAS;
+  // Avoid SEGV when dereferencing sp on stack overflow with non-faulting load.
+  // This requires a SPARC V9 CPU.  Cannot use #ASI_PNF here: only supported
+  // since clang-19.
+#      if defined(__sparcv9)
+  asm("ldxa [%[fp]] 0x82, %[bp]"
+#      else
+  asm("lduwa [%[fp]] 0x82, %[bp]"
+#      endif
+      : [bp] "=r"(*bp)
+      : [fp] "r"(&((struct frame *)*sp)->fr_savfp));
+  if (*bp)
+    *bp += STACK_BIAS;
 #    else
   // Historical BSDism here.
   struct sigcontext *scontext = (struct sigcontext *)context;
@@ -2415,8 +2671,8 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   *pc = scontext->si_regs.pc;
   *sp = scontext->si_regs.u_regs[14];
 #      endif
-#    endif
   *bp = (uptr)((uhwptr *)*sp)[14] + STACK_BIAS;
+#    endif
 #  elif defined(__mips__)
   ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.pc;
@@ -2459,9 +2715,7 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 
 void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); }
 
-void InitializePlatformEarly() {
-  // Do nothing.
-}
+void InitializePlatformEarly() { InitTlsSize(); }
 
 void CheckASLR() {
 #  if SANITIZER_NETBSD
lib/tsan/sanitizer_common/sanitizer_linux.h
@@ -97,19 +97,19 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg);
 class ThreadLister {
  public:
   explicit ThreadLister(pid_t pid);
-  ~ThreadLister();
   enum Result {
     Error,
     Incomplete,
     Ok,
   };
   Result ListThreads(InternalMmapVector<tid_t> *threads);
+  const char *LoadStatus(tid_t tid);
 
  private:
-  bool IsAlive(int tid);
+  bool IsAlive(tid_t tid);
 
-  pid_t pid_;
-  int descriptor_ = -1;
+  InternalScopedString task_path_;
+  InternalScopedString status_path_;
   InternalMmapVector<char> buffer_;
 };
 
lib/tsan/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -40,6 +40,10 @@
 #  include <sys/resource.h>
 #  include <syslog.h>
 
+#  if SANITIZER_GLIBC
+#    include <gnu/libc-version.h>
+#  endif
+
 #  if !defined(ElfW)
 #    define ElfW(type) Elf_##type
 #  endif
@@ -53,7 +57,7 @@
 // that, it was never implemented. So just define it to zero.
 #    undef MAP_NORESERVE
 #    define MAP_NORESERVE 0
-extern const Elf_Auxinfo *__elf_aux_vector;
+extern const Elf_Auxinfo *__elf_aux_vector __attribute__((weak));
 extern "C" int __sys_sigaction(int signum, const struct sigaction *act,
                                struct sigaction *oldact);
 #  endif
@@ -196,27 +200,6 @@ bool SetEnv(const char *name, const char *value) {
 }
 #  endif
 
-__attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
-                                                   int *patch) {
-#  ifdef _CS_GNU_LIBC_VERSION
-  char buf[64];
-  uptr len = confstr(_CS_GNU_LIBC_VERSION, buf, sizeof(buf));
-  if (len >= sizeof(buf))
-    return false;
-  buf[len] = 0;
-  static const char kGLibC[] = "glibc ";
-  if (internal_strncmp(buf, kGLibC, sizeof(kGLibC) - 1) != 0)
-    return false;
-  const char *p = buf + sizeof(kGLibC) - 1;
-  *major = internal_simple_strtoll(p, &p, 10);
-  *minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
-  *patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
-  return true;
-#  else
-  return false;
-#  endif
-}
-
 // True if we can use dlpi_tls_data. glibc before 2.25 may leave NULL (BZ
 // #19826) so dlpi_tls_data cannot be used.
 //
@@ -226,112 +209,166 @@ __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
 __attribute__((unused)) static int g_use_dlpi_tls_data;
 
 #  if SANITIZER_GLIBC && !SANITIZER_GO
-__attribute__((unused)) static size_t g_tls_size;
-void InitTlsSize() {
-  int major, minor, patch;
-  g_use_dlpi_tls_data =
-      GetLibcVersion(&major, &minor, &patch) && major == 2 && minor >= 25;
-
-#    if defined(__aarch64__) || defined(__x86_64__) || \
-        defined(__powerpc64__) || defined(__loongarch__)
-  void *get_tls_static_info = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
-  size_t tls_align;
-  ((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align);
-#    endif
+static void GetGLibcVersion(int *major, int *minor, int *patch) {
+  const char *p = gnu_get_libc_version();
+  *major = internal_simple_strtoll(p, &p, 10);
+  // Caller does not expect anything else.
+  CHECK_EQ(*major, 2);
+  *minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
+  *patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
 }
-#  else
-void InitTlsSize() {}
-#  endif  // SANITIZER_GLIBC && !SANITIZER_GO
-
-// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage
-// of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan
-// to get the pointer to thread-specific data keys in the thread control block.
-#  if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \
-      !SANITIZER_ANDROID && !SANITIZER_GO
-// sizeof(struct pthread) from glibc.
-static atomic_uintptr_t thread_descriptor_size;
 
 static uptr ThreadDescriptorSizeFallback() {
-  uptr val = 0;
-#    if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#    if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || \
+        SANITIZER_RISCV64
   int major;
   int minor;
   int patch;
-  if (GetLibcVersion(&major, &minor, &patch) && major == 2) {
-    /* sizeof(struct pthread) values from various glibc versions.  */
-    if (SANITIZER_X32)
-      val = 1728;  // Assume only one particular version for x32.
-    // For ARM sizeof(struct pthread) changed in Glibc 2.23.
-    else if (SANITIZER_ARM)
-      val = minor <= 22 ? 1120 : 1216;
-    else if (minor <= 3)
-      val = FIRST_32_SECOND_64(1104, 1696);
-    else if (minor == 4)
-      val = FIRST_32_SECOND_64(1120, 1728);
-    else if (minor == 5)
-      val = FIRST_32_SECOND_64(1136, 1728);
-    else if (minor <= 9)
-      val = FIRST_32_SECOND_64(1136, 1712);
-    else if (minor == 10)
-      val = FIRST_32_SECOND_64(1168, 1776);
-    else if (minor == 11 || (minor == 12 && patch == 1))
-      val = FIRST_32_SECOND_64(1168, 2288);
-    else if (minor <= 14)
-      val = FIRST_32_SECOND_64(1168, 2304);
-    else if (minor < 32)  // Unknown version
-      val = FIRST_32_SECOND_64(1216, 2304);
-    else  // minor == 32
-      val = FIRST_32_SECOND_64(1344, 2496);
-  }
-#    elif defined(__s390__) || defined(__sparc__)
+  GetGLibcVersion(&major, &minor, &patch);
+#    endif
+
+#    if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+  /* sizeof(struct pthread) values from various glibc versions.  */
+  if (SANITIZER_X32)
+    return 1728;  // Assume only one particular version for x32.
+  // For ARM sizeof(struct pthread) changed in Glibc 2.23.
+  if (SANITIZER_ARM)
+    return minor <= 22 ? 1120 : 1216;
+  if (minor <= 3)
+    return FIRST_32_SECOND_64(1104, 1696);
+  if (minor == 4)
+    return FIRST_32_SECOND_64(1120, 1728);
+  if (minor == 5)
+    return FIRST_32_SECOND_64(1136, 1728);
+  if (minor <= 9)
+    return FIRST_32_SECOND_64(1136, 1712);
+  if (minor == 10)
+    return FIRST_32_SECOND_64(1168, 1776);
+  if (minor == 11 || (minor == 12 && patch == 1))
+    return FIRST_32_SECOND_64(1168, 2288);
+  if (minor <= 14)
+    return FIRST_32_SECOND_64(1168, 2304);
+  if (minor < 32)  // Unknown version
+    return FIRST_32_SECOND_64(1216, 2304);
+  // minor == 32
+  return FIRST_32_SECOND_64(1344, 2496);
+#    endif
+
+#    if SANITIZER_RISCV64
+  // TODO: consider adding an optional runtime check for an unknown (untested)
+  // glibc version
+  if (minor <= 28)  // WARNING: the highest tested version is 2.29
+    return 1772;    // no guarantees for this one
+  if (minor <= 31)
+    return 1772;  // tested against glibc 2.29, 2.31
+  return 1936;    // tested against glibc 2.32
+#    endif
+
+#    if defined(__s390__) || defined(__sparc__)
   // The size of a prefix of TCB including pthread::{specific_1stblock,specific}
   // suffices. Just return offsetof(struct pthread, specific_used), which hasn't
   // changed since 2007-05. Technically this applies to i386/x86_64 as well but
   // we call _dl_get_tls_static_info and need the precise size of struct
   // pthread.
   return FIRST_32_SECOND_64(524, 1552);
-#    elif defined(__mips__)
+#    endif
+
+#    if defined(__mips__)
   // TODO(sagarthakur): add more values as per different glibc versions.
-  val = FIRST_32_SECOND_64(1152, 1776);
-#    elif SANITIZER_LOONGARCH64
-  val = 1856;  // from glibc 2.36
-#    elif SANITIZER_RISCV64
-  int major;
-  int minor;
-  int patch;
-  if (GetLibcVersion(&major, &minor, &patch) && major == 2) {
-    // TODO: consider adding an optional runtime check for an unknown (untested)
-    // glibc version
-    if (minor <= 28)  // WARNING: the highest tested version is 2.29
-      val = 1772;     // no guarantees for this one
-    else if (minor <= 31)
-      val = 1772;  // tested against glibc 2.29, 2.31
-    else
-      val = 1936;  // tested against glibc 2.32
-  }
+  return FIRST_32_SECOND_64(1152, 1776);
+#    endif
+
+#    if SANITIZER_LOONGARCH64
+  return 1856;  // from glibc 2.36
+#    endif
 
-#    elif defined(__aarch64__)
+#    if defined(__aarch64__)
   // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22.
-  val = 1776;
-#    elif defined(__powerpc64__)
-  val = 1776;  // from glibc.ppc64le 2.20-8.fc21
+  return 1776;
+#    endif
+
+#    if defined(__powerpc64__)
+  return 1776;  // from glibc.ppc64le 2.20-8.fc21
 #    endif
-  return val;
 }
+#  endif  // SANITIZER_GLIBC && !SANITIZER_GO
+
+#  if SANITIZER_FREEBSD && !SANITIZER_GO
+// FIXME: Implementation is very GLIBC specific, but it's used by FreeBSD.
+static uptr ThreadDescriptorSizeFallback() {
+#    if defined(__s390__) || defined(__sparc__)
+  // The size of a prefix of TCB including pthread::{specific_1stblock,specific}
+  // suffices. Just return offsetof(struct pthread, specific_used), which hasn't
+  // changed since 2007-05. Technically this applies to i386/x86_64 as well but
+  // we call _dl_get_tls_static_info and need the precise size of struct
+  // pthread.
+  return FIRST_32_SECOND_64(524, 1552);
+#    endif
 
-uptr ThreadDescriptorSize() {
-  uptr val = atomic_load_relaxed(&thread_descriptor_size);
-  if (val)
-    return val;
-  // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in
-  // glibc 2.34 and later.
-  if (unsigned *psizeof = static_cast<unsigned *>(
-          dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread")))
-    val = *psizeof;
-  if (!val)
-    val = ThreadDescriptorSizeFallback();
-  atomic_store_relaxed(&thread_descriptor_size, val);
-  return val;
+#    if defined(__mips__)
+  // TODO(sagarthakur): add more values as per different glibc versions.
+  return FIRST_32_SECOND_64(1152, 1776);
+#    endif
+
+#    if SANITIZER_LOONGARCH64
+  return 1856;  // from glibc 2.36
+#    endif
+
+#    if defined(__aarch64__)
+  // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22.
+  return 1776;
+#    endif
+
+#    if defined(__powerpc64__)
+  return 1776;  // from glibc.ppc64le 2.20-8.fc21
+#    endif
+
+  return 0;
+}
+#  endif  // SANITIZER_FREEBSD && !SANITIZER_GO
+
+#  if (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
+// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage
+// of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan
+// to get the pointer to thread-specific data keys in the thread control block.
+// sizeof(struct pthread) from glibc.
+static uptr thread_descriptor_size;
+
+uptr ThreadDescriptorSize() { return thread_descriptor_size; }
+
+#    if SANITIZER_GLIBC
+__attribute__((unused)) static size_t g_tls_size;
+#    endif
+
+void InitTlsSize() {
+#    if SANITIZER_GLIBC
+  int major, minor, patch;
+  GetGLibcVersion(&major, &minor, &patch);
+  g_use_dlpi_tls_data = major == 2 && minor >= 25;
+
+  if (major == 2 && minor >= 34) {
+    // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in
+    // glibc 2.34 and later.
+    if (unsigned *psizeof = static_cast<unsigned *>(
+            dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread"))) {
+      thread_descriptor_size = *psizeof;
+    }
+  }
+
+#      if defined(__aarch64__) || defined(__x86_64__) || \
+          defined(__powerpc64__) || defined(__loongarch__)
+  auto *get_tls_static_info = (void (*)(size_t *, size_t *))dlsym(
+      RTLD_DEFAULT, "_dl_get_tls_static_info");
+  size_t tls_align;
+  // Can be null if static link.
+  if (get_tls_static_info)
+    get_tls_static_info(&g_tls_size, &tls_align);
+#      endif
+
+#    endif  // SANITIZER_GLIBC
+
+  if (!thread_descriptor_size)
+    thread_descriptor_size = ThreadDescriptorSizeFallback();
 }
 
 #    if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \
@@ -354,7 +391,13 @@ static uptr TlsPreTcbSize() {
   return kTlsPreTcbSize;
 }
 #    endif
+#  else   // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
+void InitTlsSize() {}
+uptr ThreadDescriptorSize() { return 0; }
+#  endif  // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
 
+#  if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \
+      !SANITIZER_ANDROID && !SANITIZER_GO
 namespace {
 struct TlsBlock {
   uptr begin, end, align;
@@ -626,25 +669,32 @@ uptr GetTlsSize() {
 }
 #  endif
 
-void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
-                          uptr *tls_addr, uptr *tls_size) {
+void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end,
+                          uptr *tls_begin, uptr *tls_end) {
 #  if SANITIZER_GO
   // Stub implementation for Go.
-  *stk_addr = *stk_size = *tls_addr = *tls_size = 0;
+  *stk_begin = 0;
+  *stk_end = 0;
+  *tls_begin = 0;
+  *tls_end = 0;
 #  else
-  GetTls(tls_addr, tls_size);
+  uptr tls_addr = 0;
+  uptr tls_size = 0;
+  GetTls(&tls_addr, &tls_size);
+  *tls_begin = tls_addr;
+  *tls_end = tls_addr + tls_size;
 
   uptr stack_top, stack_bottom;
   GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom);
-  *stk_addr = stack_bottom;
-  *stk_size = stack_top - stack_bottom;
+  *stk_begin = stack_bottom;
+  *stk_end = stack_top;
 
   if (!main) {
     // If stack and tls intersect, make them non-intersecting.
-    if (*tls_addr > *stk_addr && *tls_addr < *stk_addr + *stk_size) {
-      if (*stk_addr + *stk_size < *tls_addr + *tls_size)
-        *tls_size = *stk_addr + *stk_size - *tls_addr;
-      *stk_size = *tls_addr - *stk_addr;
+    if (*tls_begin > *stk_begin && *tls_begin < *stk_end) {
+      if (*stk_end < *tls_end)
+        *tls_end = *stk_end;
+      *stk_end = *tls_begin;
     }
   }
 #  endif
@@ -723,11 +773,6 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
   return 0;
 }
 
-#  if SANITIZER_ANDROID && __ANDROID_API__ < 21
-extern "C" __attribute__((weak)) int dl_iterate_phdr(
-    int (*)(struct dl_phdr_info *, size_t, void *), void *);
-#  endif
-
 static bool requiresProcmaps() {
 #  if SANITIZER_ANDROID && __ANDROID_API__ <= 22
   // Fall back to /proc/maps if dl_iterate_phdr is unavailable or broken.
@@ -890,11 +935,8 @@ extern "C" SANITIZER_WEAK_ATTRIBUTE int __android_log_write(int prio,
 void WriteOneLineToSyslog(const char *s) {
   if (&async_safe_write_log) {
     async_safe_write_log(SANITIZER_ANDROID_LOG_INFO, GetProcessName(), s);
-  } else if (AndroidGetApiLevel() > ANDROID_KITKAT) {
-    syslog(LOG_INFO, "%s", s);
   } else {
-    CHECK(&__android_log_write);
-    __android_log_write(SANITIZER_ANDROID_LOG_INFO, nullptr, s);
+    syslog(LOG_INFO, "%s", s);
   }
 }
 
lib/tsan/sanitizer_common/sanitizer_mac.cpp
@@ -45,7 +45,7 @@ extern char **environ;
 #    define SANITIZER_OS_TRACE 0
 #  endif
 
-// import new crash reporting api
+// Integrate with CrashReporter library if available
 #  if defined(__has_include) && __has_include(<CrashReporterClient.h>)
 #    define HAVE_CRASHREPORTERCLIENT_H 1
 #    include <CrashReporterClient.h>
@@ -545,9 +545,6 @@ uptr GetTlsSize() {
   return 0;
 }
 
-void InitTlsSize() {
-}
-
 uptr TlsBaseAddr() {
   uptr segbase = 0;
 #if defined(__x86_64__)
@@ -572,21 +569,18 @@ uptr TlsSize() {
 #endif
 }
 
-void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
-                          uptr *tls_addr, uptr *tls_size) {
-#if !SANITIZER_GO
-  uptr stack_top, stack_bottom;
-  GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom);
-  *stk_addr = stack_bottom;
-  *stk_size = stack_top - stack_bottom;
-  *tls_addr = TlsBaseAddr();
-  *tls_size = TlsSize();
-#else
-  *stk_addr = 0;
-  *stk_size = 0;
-  *tls_addr = 0;
-  *tls_size = 0;
-#endif
+void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end,
+                          uptr *tls_begin, uptr *tls_end) {
+#  if !SANITIZER_GO
+  GetThreadStackTopAndBottom(main, stk_end, stk_begin);
+  *tls_begin = TlsBaseAddr();
+  *tls_end = *tls_begin + TlsSize();
+#  else
+  *stk_begin = 0;
+  *stk_end = 0;
+  *tls_begin = 0;
+  *tls_end = 0;
+#  endif
 }
 
 void ListOfModules::init() {
@@ -788,7 +782,11 @@ void WriteOneLineToSyslog(const char *s) {
   if (GetMacosAlignedVersion() >= MacosVersion(10, 12)) {
     os_log_error(OS_LOG_DEFAULT, "%{public}s", s);
   } else {
+#pragma clang diagnostic push
+// as_log is deprecated.
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
     asl_log(nullptr, nullptr, ASL_LEVEL_ERR, "%s", s);
+#pragma clang diagnostic pop
   }
 #endif
 }
@@ -798,8 +796,13 @@ static char crashreporter_info_buff[__sanitizer::kErrorMessageBufferSize] = {};
 static Mutex crashreporter_info_mutex;
 
 extern "C" {
-// Integrate with crash reporter libraries.
+
 #if HAVE_CRASHREPORTERCLIENT_H
+// Available in CRASHREPORTER_ANNOTATIONS_VERSION 5+
+#    ifdef CRASHREPORTER_ANNOTATIONS_INITIALIZER
+CRASHREPORTER_ANNOTATIONS_INITIALIZER()
+#    else
+// Support for older CrashRerporter annotiations
 CRASH_REPORTER_CLIENT_HIDDEN
 struct crashreporter_annotations_t gCRAnnotations
     __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) = {
@@ -810,17 +813,17 @@ struct crashreporter_annotations_t gCRAnnotations
         0,
         0,
         0,
-#if CRASHREPORTER_ANNOTATIONS_VERSION > 4
+#      if CRASHREPORTER_ANNOTATIONS_VERSION > 4
         0,
-#endif
+#      endif
 };
-
-#else
-// fall back to old crashreporter api
+#    endif
+#  else
+// Revert to previous crash reporter API if client header is not available
 static const char *__crashreporter_info__ __attribute__((__used__)) =
     &crashreporter_info_buff[0];
 asm(".desc ___crashreporter_info__, 0x10");
-#endif
+#endif  // HAVE_CRASHREPORTERCLIENT_H
 
 }  // extern "C"
 
@@ -843,6 +846,9 @@ void LogFullErrorReport(const char *buffer) {
 #if !SANITIZER_GO
   // Log with os_trace. This will make it into the crash log.
 #if SANITIZER_OS_TRACE
+#pragma clang diagnostic push
+// os_trace is deprecated.
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
   if (GetMacosAlignedVersion() >= MacosVersion(10, 10)) {
     // os_trace requires the message (format parameter) to be a string literal.
     if (internal_strncmp(SanitizerToolName, "AddressSanitizer",
@@ -860,6 +866,7 @@ void LogFullErrorReport(const char *buffer) {
     if (common_flags()->log_to_syslog)
       os_trace("Consult syslog for more information.");
   }
+#pragma clang diagnostic pop
 #endif
 
   // Log to syslog.
@@ -970,8 +977,9 @@ static const char kDyldInsertLibraries[] = "DYLD_INSERT_LIBRARIES";
 LowLevelAllocator allocator_for_env;
 
 static bool ShouldCheckInterceptors() {
-  // Restrict "interceptors working?" check to ASan and TSan.
-  const char *sanitizer_names[] = {"AddressSanitizer", "ThreadSanitizer"};
+  // Restrict "interceptors working?" check
+  const char *sanitizer_names[] = {"AddressSanitizer", "ThreadSanitizer",
+                                   "RealtimeSanitizer"};
   size_t count = sizeof(sanitizer_names) / sizeof(sanitizer_names[0]);
   for (size_t i = 0; i < count; i++) {
     if (internal_strcmp(sanitizer_names[i], SanitizerToolName) == 0)
lib/tsan/sanitizer_common/sanitizer_platform_interceptors.h
@@ -84,6 +84,25 @@
 #define SI_NOT_MAC 1
 #endif
 
+#if SANITIZER_APPLE
+#  include <Availability.h>
+
+// aligned_alloc was introduced in OSX 10.15
+// Linking will fail when using an older SDK
+#  if defined(__MAC_10_15)
+// macOS 10.15 is greater than our minimal deployment target.  To ensure we
+// generate a weak reference so the dylib continues to work on older
+// systems, we need to forward declare the intercepted function as "weak
+// imports".
+SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment,
+                                          __sanitizer::usize __size);
+#    define SI_MAC_SDK_10_15_AVAILABLE 1
+#  else
+#    define SI_MAC_SDK_10_15_AVAILABLE 0
+#  endif  // defined(__MAC_10_15)
+
+#endif  // SANITIZER_APPLE
+
 #if SANITIZER_IOS
 #define SI_IOS 1
 #else
@@ -183,9 +202,16 @@
 #define SANITIZER_INTERCEPT_FPUTS SI_POSIX
 #define SANITIZER_INTERCEPT_PUTS SI_POSIX
 
+#define SANITIZER_INTERCEPT_CREAT64 (SI_GLIBC || SI_SOLARIS32)
+#define SANITIZER_INTERCEPT_FCNTL64 (SI_GLIBC || SI_SOLARIS32)
+#define SANITIZER_INTERCEPT_OPEN64 (SI_GLIBC || SI_SOLARIS32)
+#define SANITIZER_INTERCEPT_OPENAT64 (SI_GLIBC || SI_SOLARIS32)
+
 #define SANITIZER_INTERCEPT_PREAD64 (SI_GLIBC || SI_SOLARIS32)
 #define SANITIZER_INTERCEPT_PWRITE64 (SI_GLIBC || SI_SOLARIS32)
 
+#define SANITIZER_INTERCEPT_LSEEK64 (SI_GLIBC || SI_SOLARIS32)
+
 #define SANITIZER_INTERCEPT_READV SI_POSIX
 #define SANITIZER_INTERCEPT_WRITEV SI_POSIX
 
@@ -232,8 +258,12 @@
   (SI_FREEBSD || SI_NETBSD || SI_LINUX || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID \
   (SI_LINUX || SI_FREEBSD || SI_NETBSD)
+// TODO: This should be SI_POSIX, adding glibc first until I have time
+// to verify all timer_t typedefs on other platforms.
+#define SANITIZER_INTERCEPT_TIMER_CREATE SI_GLIBC
 #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
+#define SANITIZER_INTERCEPT_TIMESPEC_GET SI_LINUX
 #define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_GLOB64 SI_GLIBC
 #define SANITIZER_INTERCEPT___B64_TO SI_LINUX_NOT_ANDROID
@@ -274,8 +304,9 @@
 #if SI_LINUX_NOT_ANDROID &&                                                \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-     defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64)
-#define SANITIZER_INTERCEPT_PTRACE 1
+     defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64 ||   \
+     defined(__sparc__))
+#  define SANITIZER_INTERCEPT_PTRACE 1
 #else
 #define SANITIZER_INTERCEPT_PTRACE 0
 #endif
@@ -314,6 +345,8 @@
 #define SANITIZER_INTERCEPT_GETGROUPS SI_POSIX
 #define SANITIZER_INTERCEPT_POLL SI_POSIX
 #define SANITIZER_INTERCEPT_PPOLL SI_LINUX_NOT_ANDROID || SI_SOLARIS
+#define SANITIZER_INTERCEPT_EPOLL (SI_LINUX)
+#define SANITIZER_INTERCEPT_KQUEUE (SI_FREEBSD || SI_NETBSD || SI_MAC)
 #define SANITIZER_INTERCEPT_WORDEXP                                          \
   (SI_FREEBSD || SI_NETBSD || (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID || \
    SI_SOLARIS)
@@ -494,7 +527,8 @@
 #define SANITIZER_INTERCEPT_PVALLOC (SI_GLIBC || SI_ANDROID)
 #define SANITIZER_INTERCEPT_CFREE (SI_GLIBC && !SANITIZER_RISCV64)
 #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX
-#define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC)
+#define SANITIZER_INTERCEPT_ALIGNED_ALLOC \
+  (!SI_MAC || SI_MAC_SDK_10_15_AVAILABLE)
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD)
 #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WCSLEN 1
@@ -559,10 +593,8 @@
 #define SANITIZER_INTERCEPT_SHA1 SI_NETBSD
 #define SANITIZER_INTERCEPT_MD4 SI_NETBSD
 #define SANITIZER_INTERCEPT_RMD160 SI_NETBSD
-#define SANITIZER_INTERCEPT_MD5 (SI_NETBSD || SI_FREEBSD)
-#define SANITIZER_INTERCEPT_FSEEK (SI_NETBSD || SI_FREEBSD)
+#define SANITIZER_INTERCEPT_FSEEK SI_POSIX
 #define SANITIZER_INTERCEPT_MD2 SI_NETBSD
-#define SANITIZER_INTERCEPT_SHA2 (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_CDB SI_NETBSD
 #define SANITIZER_INTERCEPT_VIS (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_POPEN SI_POSIX
@@ -601,7 +633,13 @@
 // FIXME: also available from musl 1.2.5
 #define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26))
 #define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26))
-
+#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \
+    __MAC_OS_X_VERSION_MIN_REQUIRED >= 130000
+#  define SI_MAC_OS_DEPLOYMENT_MIN_13_00 1
+#else
+#  define SI_MAC_OS_DEPLOYMENT_MIN_13_00 0
+#endif
+#define SANITIZER_INTERCEPT_FREADLINK (SI_MAC && SI_MAC_OS_DEPLOYMENT_MIN_13_00)
 // This macro gives a way for downstream users to override the above
 // interceptor macros irrespective of the platform they are on. They have
 // to do two things:
lib/tsan/sanitizer_common/sanitizer_platform_limits_netbsd.cpp
@@ -547,6 +547,7 @@ unsigned pid_t_sz = sizeof(pid_t);
 unsigned timeval_sz = sizeof(timeval);
 unsigned uid_t_sz = sizeof(uid_t);
 unsigned gid_t_sz = sizeof(gid_t);
+unsigned fpos_t_sz = sizeof(fpos_t);
 unsigned mbstate_t_sz = sizeof(mbstate_t);
 unsigned sigset_t_sz = sizeof(sigset_t);
 unsigned struct_timezone_sz = sizeof(struct timezone);
@@ -2487,8 +2488,6 @@ const unsigned RMD160_return_length = RMD160_DIGEST_STRING_LENGTH;
 const unsigned MD5_CTX_sz = sizeof(MD5_CTX);
 const unsigned MD5_return_length = MD5_DIGEST_STRING_LENGTH;
 
-const unsigned fpos_t_sz = sizeof(fpos_t);
-
 const unsigned MD2_CTX_sz = sizeof(MD2_CTX);
 const unsigned MD2_return_length = MD2_DIGEST_STRING_LENGTH;
 
lib/tsan/sanitizer_common/sanitizer_platform_limits_netbsd.h
@@ -36,6 +36,7 @@ extern unsigned pid_t_sz;
 extern unsigned timeval_sz;
 extern unsigned uid_t_sz;
 extern unsigned gid_t_sz;
+extern unsigned fpos_t_sz;
 extern unsigned mbstate_t_sz;
 extern unsigned struct_timezone_sz;
 extern unsigned struct_tms_sz;
@@ -2335,8 +2336,6 @@ extern const unsigned RMD160_return_length;
 extern const unsigned MD5_CTX_sz;
 extern const unsigned MD5_return_length;
 
-extern const unsigned fpos_t_sz;
-
 extern const unsigned MD2_CTX_sz;
 extern const unsigned MD2_return_length;
 
lib/tsan/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -94,8 +94,9 @@
 #if SANITIZER_LINUX
 # include <utime.h>
 # include <sys/ptrace.h>
-#    if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \
-        defined(__hexagon__) || defined(__loongarch__) ||SANITIZER_RISCV64
+#    if defined(__mips64) || defined(__aarch64__) || defined(__arm__) ||       \
+        defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || \
+        defined(__sparc__)
 #      include <asm/ptrace.h>
 #      ifdef __arm__
 typedef struct user_fpregs elf_fpregset_t;
@@ -117,15 +118,16 @@ typedef struct user_fpregs elf_fpregset_t;
 #if SANITIZER_LINUX
 #if SANITIZER_GLIBC
 #include <fstab.h>
-#include <net/if_ppp.h>
-#include <netax25/ax25.h>
-#include <netipx/ipx.h>
-#include <netrom/netrom.h>
-#include <obstack.h>
-#if HAVE_RPC_XDR_H
-# include <rpc/xdr.h>
-#endif
-#include <scsi/scsi.h>
+#      include <linux/filter.h>
+#      include <net/if_ppp.h>
+#      include <netax25/ax25.h>
+#      include <netipx/ipx.h>
+#      include <netrom/netrom.h>
+#      include <obstack.h>
+#      if HAVE_RPC_XDR_H
+#        include <rpc/xdr.h>
+#      endif
+#      include <scsi/scsi.h>
 #else
 #include <linux/if_ppp.h>
 #include <linux/kd.h>
@@ -358,11 +360,12 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   const int wordexp_wrde_dooffs = WRDE_DOOFFS;
 #  endif  // !SANITIZER_ANDROID
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
-    (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
-     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-     defined(__s390__) || defined(__loongarch__)|| SANITIZER_RISCV64)
-#if defined(__mips64) || defined(__powerpc64__) || defined(__arm__)
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
+      (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
+       defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+       defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64 ||   \
+       defined(__sparc__))
+#    if defined(__mips64) || defined(__powerpc64__) || defined(__arm__)
   unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs);
   unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t);
 #elif SANITIZER_RISCV64
@@ -377,19 +380,22 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 #elif defined(__s390__)
   unsigned struct_user_regs_struct_sz = sizeof(struct _user_regs_struct);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct _user_fpregs_struct);
-#else
+#    elif defined(__sparc__)
+  unsigned struct_user_regs_struct_sz = sizeof(struct sunos_regs);
+  unsigned struct_user_fpregs_struct_sz = sizeof(struct sunos_fp);
+#    else
   unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct);
-#endif // __mips64 || __powerpc64__ || __aarch64__ || __loongarch__
-#if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \
-    defined(__aarch64__) || defined(__arm__) || defined(__s390__) ||    \
-    defined(__loongarch__) || SANITIZER_RISCV64
+#    endif  // __mips64 || __powerpc64__ || __aarch64__ || __loongarch__
+#    if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \
+        defined(__aarch64__) || defined(__arm__) || defined(__s390__) ||    \
+        defined(__loongarch__) || SANITIZER_RISCV64 || defined(__sparc__)
   unsigned struct_user_fpxregs_struct_sz = 0;
 #else
   unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct);
 #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
-// || __s390__ || __loongarch__
-#ifdef __arm__
+  // || __s390__ || __loongarch__ || SANITIZER_RISCV64 || __sparc__
+#    ifdef __arm__
   unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
 #else
   unsigned struct_user_vfpregs_struct_sz = 0;
@@ -531,13 +537,16 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 
   unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info);
   unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats);
-#endif  // SANITIZER_GLIBC
+  unsigned struct_sock_fprog_sz = sizeof(struct sock_fprog);
+#  endif  // SANITIZER_GLIBC
 
-#if !SANITIZER_ANDROID && !SANITIZER_APPLE
+#  if !SANITIZER_ANDROID && !SANITIZER_APPLE
   unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req);
   unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req);
 #endif
 
+  unsigned fpos_t_sz = sizeof(fpos_t);
+
   const unsigned long __sanitizer_bufsiz = BUFSIZ;
 
   const unsigned IOCTL_NOT_PRESENT = 0;
@@ -1084,7 +1093,7 @@ CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_len);
 CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_level);
 CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_type);
 
-#if SANITIZER_LINUX && (__ANDROID_API__ >= 21 || __GLIBC_PREREQ (2, 14))
+#  if SANITIZER_LINUX && (SANITIZER_ANDROID || __GLIBC_PREREQ(2, 14))
 CHECK_TYPE_SIZE(mmsghdr);
 CHECK_SIZE_AND_OFFSET(mmsghdr, msg_hdr);
 CHECK_SIZE_AND_OFFSET(mmsghdr, msg_len);
lib/tsan/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -98,10 +98,13 @@ const unsigned struct_kernel_stat64_sz = 104;
 const unsigned struct_kernel_stat_sz = 144;
 const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__mips__)
-const unsigned struct_kernel_stat_sz =
-    SANITIZER_ANDROID
-        ? FIRST_32_SECOND_64(104, 128)
-        : FIRST_32_SECOND_64((_MIPS_SIM == _ABIN32) ? 176 : 160, 216);
+const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID
+                                           ? FIRST_32_SECOND_64(104, 128)
+#      if defined(_ABIN32) && _MIPS_SIM == _ABIN32
+                                           : FIRST_32_SECOND_64(176, 216);
+#      else
+                                           : FIRST_32_SECOND_64(160, 216);
+#      endif
 const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__s390__) && !defined(__s390x__)
 const unsigned struct_kernel_stat_sz = 64;
@@ -313,7 +316,7 @@ extern unsigned struct_statvfs_sz;
 
 struct __sanitizer_iovec {
   void *iov_base;
-  uptr iov_len;
+  usize iov_len;
 };
 
 #if !SANITIZER_ANDROID
@@ -389,6 +392,16 @@ typedef long __sanitizer_time_t;
 
 typedef long __sanitizer_suseconds_t;
 
+struct __sanitizer_timespec {
+  __sanitizer_time_t tv_sec; /* seconds */
+  u64 tv_nsec;               /* nanoseconds */
+};
+
+struct __sanitizer_itimerspec {
+  struct __sanitizer_timespec it_interval; /* timer period */
+  struct __sanitizer_timespec it_value;    /* timer expiration */
+};
+
 struct __sanitizer_timeval {
   __sanitizer_time_t tv_sec;
   __sanitizer_suseconds_t tv_usec;
@@ -513,6 +526,7 @@ struct __sanitizer_dirent64 {
   unsigned short d_reclen;
   // more fields that we don't care about
 };
+extern unsigned struct_sock_fprog_sz;
 #endif
 
 #if defined(__x86_64__) && !defined(_LP64)
@@ -590,7 +604,7 @@ struct __sanitizer_siginfo_pad {
 #if SANITIZER_LINUX
 # define SANITIZER_HAS_SIGINFO 1
 union __sanitizer_siginfo {
-  struct {
+  __extension__ struct {
     int si_signo;
 # if SANITIZER_MIPS
     int si_code;
@@ -855,10 +869,11 @@ typedef void __sanitizer_FILE;
 # define SANITIZER_HAS_STRUCT_FILE 0
 #endif
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
-    (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
-     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-     defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64)
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
+      (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
+       defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+       defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64 ||   \
+       defined(__sparc__))
 extern unsigned struct_user_regs_struct_sz;
 extern unsigned struct_user_fpregs_struct_sz;
 extern unsigned struct_user_fpxregs_struct_sz;
@@ -880,9 +895,24 @@ extern int ptrace_setsiginfo;
 extern int ptrace_getregset;
 extern int ptrace_setregset;
 extern int ptrace_geteventmsg;
-#endif
 
-#if SANITIZER_LINUX  && !SANITIZER_ANDROID
+// Helper for the ptrace interceptor.
+template <class T>
+inline T ptrace_data_arg(int request, T addr, T data) {
+#    if SANITIZER_LINUX && SANITIZER_SPARC
+  // As described in ptrace(2), the meanings of addr and data are reversed
+  // for the PTRACE_GETREGS, PTRACE_GETFPREGS, PTRACE_GETREGS, and
+  // PTRACE_GETFPREGS requests on Linux/sparc64.
+  if (request == ptrace_getregs || request == ptrace_getfpregs ||
+      request == ptrace_setregs || request == ptrace_setfpregs)
+    return addr;
+  else
+#    endif
+    return data;
+}
+#  endif
+
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID
 extern unsigned struct_shminfo_sz;
 extern unsigned struct_shm_info_sz;
 extern int shmctl_ipc_stat;
@@ -1050,7 +1080,7 @@ extern unsigned struct_serial_struct_sz;
 extern unsigned struct_sockaddr_ax25_sz;
 extern unsigned struct_unimapdesc_sz;
 extern unsigned struct_unimapinit_sz;
-#endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
+#  endif  // SANITIZER_LINUX && !SANITIZER_ANDROID
 
 extern const unsigned long __sanitizer_bufsiz;
 
@@ -1064,6 +1094,8 @@ extern unsigned struct_sioc_sg_req_sz;
 extern unsigned struct_sioc_vif_req_sz;
 #endif
 
+extern unsigned fpos_t_sz;
+
 // ioctl request identifiers
 
 // A special value to mark ioctls that are not present on the target platform,
@@ -1500,6 +1532,10 @@ extern const int si_SEGV_ACCERR;
 
 #define SIGACTION_SYMNAME sigaction
 
+#  if SANITIZER_LINUX
+typedef void *__sanitizer_timer_t;
+#  endif
+
 #endif  // SANITIZER_LINUX || SANITIZER_APPLE
 
 #endif
lib/tsan/sanitizer_common/sanitizer_platform_limits_solaris.cpp
@@ -32,6 +32,7 @@
 #include <semaphore.h>
 #include <signal.h>
 #include <stddef.h>
+#include <stdio.h>
 #include <sys/ethernet.h>
 #include <sys/filio.h>
 #include <sys/ipc.h>
@@ -135,6 +136,8 @@ namespace __sanitizer {
   unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req);
   unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req);
 
+  unsigned fpos_t_sz = sizeof(fpos_t);
+
   const unsigned IOCTL_NOT_PRESENT = 0;
 
   unsigned IOCTL_FIOASYNC = FIOASYNC;
lib/tsan/sanitizer_common/sanitizer_platform_limits_solaris.h
@@ -418,6 +418,8 @@ extern unsigned struct_winsize_sz;
 extern unsigned struct_sioc_sg_req_sz;
 extern unsigned struct_sioc_vif_req_sz;
 
+extern unsigned fpos_t_sz;
+
 // ioctl request identifiers
 
 // A special value to mark ioctls that are not present on the target platform,
lib/tsan/sanitizer_common/sanitizer_posix.cpp
@@ -353,7 +353,15 @@ bool ShouldMockFailureToOpen(const char *path) {
          internal_strncmp(path, "/proc/", 6) == 0;
 }
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_GO
+bool OpenReadsVaArgs(int oflag) {
+#  ifdef O_TMPFILE
+  return (oflag & (O_CREAT | O_TMPFILE)) != 0;
+#  else
+  return (oflag & O_CREAT) != 0;
+#  endif
+}
+
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_GO
 int GetNamedMappingFd(const char *name, uptr size, int *flags) {
   if (!common_flags()->decorate_proc_maps || !name)
     return -1;
lib/tsan/sanitizer_common/sanitizer_posix.h
@@ -28,6 +28,9 @@ namespace __sanitizer {
 // Don't use directly, use __sanitizer::OpenFile() instead.
 uptr internal_open(const char *filename, int flags);
 uptr internal_open(const char *filename, int flags, u32 mode);
+#  if SANITIZER_FREEBSD
+uptr internal_close_range(fd_t lowfd, fd_t highfd, int flags);
+#  endif
 uptr internal_close(fd_t fd);
 
 uptr internal_read(fd_t fd, void *buf, uptr count);
@@ -86,7 +89,7 @@ int internal_pthread_join(void *th, void **ret);
       return REAL(pthread_create)(th, attr, callback, param);             \
     }                                                                     \
     int internal_pthread_join(void *th, void **ret) {                     \
-      return REAL(pthread_join(th, ret));                                 \
+      return REAL(pthread_join)(th, ret);                                 \
     }                                                                     \
     }  // namespace __sanitizer
 
@@ -108,6 +111,7 @@ bool IsStateDetached(int state);
 fd_t ReserveStandardFds(fd_t fd);
 
 bool ShouldMockFailureToOpen(const char *path);
+bool OpenReadsVaArgs(int oflag);
 
 // Create a non-file mapping with a given /proc/self/maps name.
 uptr MmapNamed(void *addr, uptr length, int prot, int flags, const char *name);
lib/tsan/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -288,26 +288,86 @@ bool SignalContext::IsStackOverflow() const {
 
 #endif  // SANITIZER_GO
 
+static void SetNonBlock(int fd) {
+  int res = fcntl(fd, F_GETFL, 0);
+  CHECK(!internal_iserror(res, nullptr));
+
+  res |= O_NONBLOCK;
+  res = fcntl(fd, F_SETFL, res);
+  CHECK(!internal_iserror(res, nullptr));
+}
+
 bool IsAccessibleMemoryRange(uptr beg, uptr size) {
-  uptr page_size = GetPageSizeCached();
-  // Checking too large memory ranges is slow.
-  CHECK_LT(size, page_size * 10);
-  int sock_pair[2];
-  if (pipe(sock_pair))
-    return false;
-  uptr bytes_written =
-      internal_write(sock_pair[1], reinterpret_cast<void *>(beg), size);
-  int write_errno;
-  bool result;
-  if (internal_iserror(bytes_written, &write_errno)) {
-    CHECK_EQ(EFAULT, write_errno);
-    result = false;
-  } else {
-    result = (bytes_written == size);
+  while (size) {
+    // `read` from `fds[0]` into a dummy buffer to free up the pipe buffer for
+    // more `write` is slower than just recreating a pipe.
+    int fds[2];
+    CHECK_EQ(0, pipe(fds));
+
+    auto cleanup = at_scope_exit([&]() {
+      internal_close(fds[0]);
+      internal_close(fds[1]);
+    });
+
+    SetNonBlock(fds[1]);
+
+    int write_errno;
+    uptr w = internal_write(fds[1], reinterpret_cast<char *>(beg), size);
+    if (internal_iserror(w, &write_errno)) {
+      if (write_errno == EINTR)
+        continue;
+      CHECK_EQ(EFAULT, write_errno);
+      return false;
+    }
+    size -= w;
+    beg += w;
+  }
+
+  return true;
+}
+
+bool TryMemCpy(void *dest, const void *src, uptr n) {
+  if (!n)
+    return true;
+  int fds[2];
+  CHECK_EQ(0, pipe(fds));
+
+  auto cleanup = at_scope_exit([&]() {
+    internal_close(fds[0]);
+    internal_close(fds[1]);
+  });
+
+  SetNonBlock(fds[0]);
+  SetNonBlock(fds[1]);
+
+  char *d = static_cast<char *>(dest);
+  const char *s = static_cast<const char *>(src);
+
+  while (n) {
+    int e;
+    uptr w = internal_write(fds[1], s, n);
+    if (internal_iserror(w, &e)) {
+      if (e == EINTR)
+        continue;
+      CHECK_EQ(EFAULT, e);
+      return false;
+    }
+    s += w;
+    n -= w;
+
+    while (w) {
+      uptr r = internal_read(fds[0], d, w);
+      if (internal_iserror(r, &e)) {
+        CHECK_EQ(EINTR, e);
+        continue;
+      }
+
+      d += r;
+      w -= r;
+    }
   }
-  internal_close(sock_pair[0]);
-  internal_close(sock_pair[1]);
-  return result;
+
+  return true;
 }
 
 void PlatformPrepareForSandboxing(void *args) {
@@ -483,7 +543,11 @@ pid_t StartSubprocess(const char *program, const char *const argv[],
       internal_close(stderr_fd);
     }
 
+#  if SANITIZER_FREEBSD
+    internal_close_range(3, ~static_cast<fd_t>(0), 0);
+#  else
     for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) internal_close(fd);
+#  endif
 
     internal_execve(program, const_cast<char **>(&argv[0]),
                     const_cast<char *const *>(envp));
lib/tsan/sanitizer_common/sanitizer_procmaps_mac.cpp
@@ -433,7 +433,9 @@ void MemoryMappingLayout::DumpListOfModules(
   MemoryMappedSegmentData data;
   segment.data_ = &data;
   while (Next(&segment)) {
-    if (segment.filename[0] == '\0') continue;
+    // skip the __PAGEZERO segment, its vmsize is 0
+    if (segment.filename[0] == '\0' || (segment.start == segment.end))
+      continue;
     LoadedModule *cur_module = nullptr;
     if (!modules->empty() &&
         0 == internal_strcmp(segment.filename, modules->back().full_name())) {
lib/tsan/sanitizer_common/sanitizer_procmaps_solaris.cpp
@@ -11,6 +11,10 @@
 
 // Before Solaris 11.4, <procfs.h> doesn't work in a largefile environment.
 #undef _FILE_OFFSET_BITS
+
+// Avoid conflict between `_TIME_BITS` defined vs. `_FILE_OFFSET_BITS`
+// undefined in some Linux configurations.
+#undef _TIME_BITS
 #include "sanitizer_platform.h"
 #if SANITIZER_SOLARIS
 #  include <fcntl.h>
lib/tsan/sanitizer_common/sanitizer_redefine_builtins.h
@@ -17,9 +17,11 @@
 // The asm hack only works with GCC and Clang.
 #    if !defined(_WIN32)
 
-asm("memcpy = __sanitizer_internal_memcpy");
-asm("memmove = __sanitizer_internal_memmove");
-asm("memset = __sanitizer_internal_memset");
+asm(R"(
+    .set memcpy, __sanitizer_internal_memcpy
+    .set memmove, __sanitizer_internal_memmove
+    .set memset, __sanitizer_internal_memset
+    )");
 
 #      if defined(__cplusplus) && \
           !defined(SANITIZER_COMMON_REDEFINE_BUILTINS_IN_STD)
lib/tsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -137,10 +137,6 @@ class ThreadSuspender {
 };
 
 bool ThreadSuspender::SuspendThread(tid_t tid) {
-  // Are we already attached to this thread?
-  // Currently this check takes linear time, however the number of threads is
-  // usually small.
-  if (suspended_threads_list_.ContainsTid(tid)) return false;
   int pterrno;
   if (internal_iserror(internal_ptrace(PTRACE_ATTACH, tid, nullptr, nullptr),
                        &pterrno)) {
@@ -217,17 +213,28 @@ bool ThreadSuspender::SuspendAllThreads() {
     switch (thread_lister.ListThreads(&threads)) {
       case ThreadLister::Error:
         ResumeAllThreads();
+        VReport(1, "Failed to list threads\n");
         return false;
       case ThreadLister::Incomplete:
+        VReport(1, "Incomplete list\n");
         retry = true;
         break;
       case ThreadLister::Ok:
         break;
     }
     for (tid_t tid : threads) {
+      // Are we already attached to this thread?
+      // Currently this check takes linear time, however the number of threads
+      // is usually small.
+      if (suspended_threads_list_.ContainsTid(tid))
+        continue;
       if (SuspendThread(tid))
         retry = true;
+      else
+        VReport(2, "%llu/status: %s\n", tid, thread_lister.LoadStatus(tid));
     }
+    if (retry)
+      VReport(1, "SuspendAllThreads retry: %d\n", i);
   }
   return suspended_threads_list_.ThreadCount();
 }
lib/tsan/sanitizer_common/sanitizer_symbolizer.h
@@ -185,17 +185,17 @@ class Symbolizer final {
   class ModuleNameOwner {
    public:
     explicit ModuleNameOwner(Mutex *synchronized_by)
-        : last_match_(nullptr), mu_(synchronized_by) {
+        : mu_(synchronized_by), last_match_(nullptr) {
       storage_.reserve(kInitialCapacity);
     }
     const char *GetOwnedCopy(const char *str);
 
    private:
     static const uptr kInitialCapacity = 1000;
-    InternalMmapVector<const char*> storage_;
-    const char *last_match_;
 
     Mutex *mu_;
+    const char *last_match_ SANITIZER_GUARDED_BY(mu_);
+    InternalMmapVector<const char *> storage_ SANITIZER_GUARDED_BY(*mu_);
   } module_names_;
 
   /// Platform-specific function for creating a Symbolizer object.
@@ -220,7 +220,7 @@ class Symbolizer final {
   // always synchronized.
   Mutex mu_;
 
-  IntrusiveList<SymbolizerTool> tools_;
+  IntrusiveList<SymbolizerTool> tools_ SANITIZER_GUARDED_BY(mu_);
 
   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
 
lib/tsan/sanitizer_common/sanitizer_symbolizer_mac.cpp
@@ -30,7 +30,7 @@ namespace __sanitizer {
 bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   Dl_info info;
   int result = dladdr((const void *)addr, &info);
-  if (!result) return false;
+  if (!result || !info.dli_sname) return false;
 
   // Compute offset if possible. `dladdr()` doesn't always ensure that `addr >=
   // sym_addr` so only compute the offset when this holds. Failure to find the
@@ -51,7 +51,7 @@ bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
 bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *datainfo) {
   Dl_info info;
   int result = dladdr((const void *)addr, &info);
-  if (!result) return false;
+  if (!result || !info.dli_sname) return false;
   const char *demangled = DemangleSwiftAndCXX(info.dli_sname);
   if (!demangled)
     demangled = info.dli_sname;
lib/tsan/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -227,12 +227,15 @@ static void ReportStackOverflowImpl(const SignalContext &sig, u32 tid,
          SanitizerToolName, kDescription, (void *)sig.addr, (void *)sig.pc,
          (void *)sig.bp, (void *)sig.sp, tid);
   Printf("%s", d.Default());
-  InternalMmapVector<BufferedStackTrace> stack_buffer(1);
-  BufferedStackTrace *stack = stack_buffer.data();
-  stack->Reset();
-  unwind(sig, unwind_context, stack);
-  stack->Print();
-  ReportErrorSummary(kDescription, stack);
+  // Avoid SEGVs in the unwinder when bp couldn't be determined.
+  if (sig.bp) {
+    InternalMmapVector<BufferedStackTrace> stack_buffer(1);
+    BufferedStackTrace *stack = stack_buffer.data();
+    stack->Reset();
+    unwind(sig, unwind_context, stack);
+    stack->Print();
+    ReportErrorSummary(kDescription, stack);
+  }
 }
 
 static void ReportDeadlySignalImpl(const SignalContext &sig, u32 tid,
lib/tsan/sanitizer_common/sanitizer_symbolizer_win.cpp
@@ -65,12 +65,13 @@ void InitializeDbgHelpIfNeeded() {
   HMODULE dbghelp = LoadLibraryA("dbghelp.dll");
   CHECK(dbghelp && "failed to load dbghelp.dll");
 
-#define DBGHELP_IMPORT(name)                                                  \
-  do {                                                                        \
-    name =                                                                    \
-        reinterpret_cast<decltype(::name) *>(GetProcAddress(dbghelp, #name)); \
-    CHECK(name != nullptr);                                                   \
-  } while (0)
+#  define DBGHELP_IMPORT(name)                     \
+    do {                                           \
+      name = reinterpret_cast<decltype(::name) *>( \
+          (void *)GetProcAddress(dbghelp, #name)); \
+      CHECK(name != nullptr);                      \
+    } while (0)
+
   DBGHELP_IMPORT(StackWalk64);
   DBGHELP_IMPORT(SymCleanup);
   DBGHELP_IMPORT(SymFromAddr);
lib/tsan/sanitizer_common/sanitizer_thread_history.cpp
@@ -0,0 +1,72 @@
+//===-- sanitizer_thread_history.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_thread_history.h"
+
+#include "sanitizer_stackdepot.h"
+namespace __sanitizer {
+
+void PrintThreadHistory(ThreadRegistry &registry, InternalScopedString &out) {
+  ThreadRegistryLock l(&registry);
+  // Stack traces are largest part of printout and they often the same for
+  // multiple threads, so we will deduplicate them.
+  InternalMmapVector<const ThreadContextBase *> stacks;
+
+  registry.RunCallbackForEachThreadLocked(
+      [](ThreadContextBase *context, void *arg) {
+        static_cast<decltype(&stacks)>(arg)->push_back(context);
+      },
+      &stacks);
+
+  Sort(stacks.data(), stacks.size(),
+       [](const ThreadContextBase *a, const ThreadContextBase *b) {
+         if (a->stack_id < b->stack_id)
+           return true;
+         if (a->stack_id > b->stack_id)
+           return false;
+         return a->unique_id < b->unique_id;
+       });
+
+  auto describe_thread = [&](const ThreadContextBase *context) {
+    if (!context) {
+      out.Append("T-1");
+      return;
+    }
+    out.AppendF("T%llu/%llu", context->unique_id, context->os_id);
+    if (internal_strlen(context->name))
+      out.AppendF(" (%s)", context->name);
+  };
+
+  auto get_parent =
+      [&](const ThreadContextBase *context) -> const ThreadContextBase * {
+    if (!context)
+      return nullptr;
+    ThreadContextBase *parent = registry.GetThreadLocked(context->parent_tid);
+    if (!parent)
+      return nullptr;
+    if (parent->unique_id >= context->unique_id)
+      return nullptr;
+    return parent;
+  };
+
+  const ThreadContextBase *prev = nullptr;
+  for (const ThreadContextBase *context : stacks) {
+    if (prev && prev->stack_id != context->stack_id)
+      StackDepotGet(prev->stack_id).PrintTo(&out);
+    prev = context;
+    out.Append("Thread ");
+    describe_thread(context);
+    out.Append(" was created by ");
+    describe_thread(get_parent(context));
+    out.Append("\n");
+  }
+  if (prev)
+    StackDepotGet(prev->stack_id).PrintTo(&out);
+}
+
+}  // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_thread_history.h
@@ -0,0 +1,24 @@
+//===-- sanitizer_thread_history.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility to print thread histroy from ThreadRegistry.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_THREAD_HISTORY_H
+#define SANITIZER_THREAD_HISTORY_H
+
+#include "sanitizer_thread_registry.h"
+
+namespace __sanitizer {
+
+void PrintThreadHistory(ThreadRegistry& registry, InternalScopedString& out);
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_THREAD_HISTORY_H
lib/tsan/sanitizer_common/sanitizer_thread_registry.cpp
@@ -18,9 +18,17 @@
 namespace __sanitizer {
 
 ThreadContextBase::ThreadContextBase(u32 tid)
-    : tid(tid), unique_id(0), reuse_count(), os_id(0), user_id(0),
-      status(ThreadStatusInvalid), detached(false),
-      thread_type(ThreadType::Regular), parent_tid(0), next(0) {
+    : tid(tid),
+      unique_id(0),
+      reuse_count(),
+      os_id(0),
+      user_id(0),
+      status(ThreadStatusInvalid),
+      detached(false),
+      thread_type(ThreadType::Regular),
+      parent_tid(0),
+      stack_id(0),
+      next(0) {
   name[0] = '\0';
   atomic_store(&thread_destroyed, 0, memory_order_release);
 }
@@ -39,8 +47,7 @@ void ThreadContextBase::SetName(const char *new_name) {
 }
 
 void ThreadContextBase::SetDead() {
-  CHECK(status == ThreadStatusRunning ||
-        status == ThreadStatusFinished);
+  CHECK(status == ThreadStatusRunning || status == ThreadStatusFinished);
   status = ThreadStatusDead;
   user_id = 0;
   OnDead();
@@ -68,7 +75,8 @@ void ThreadContextBase::SetFinished() {
   // for a thread that never actually started.  In that case the thread
   // should go to ThreadStatusFinished regardless of whether it was created
   // as detached.
-  if (!detached || status == ThreadStatusCreated) status = ThreadStatusFinished;
+  if (!detached || status == ThreadStatusCreated)
+    status = ThreadStatusFinished;
   OnFinished();
 }
 
@@ -81,14 +89,17 @@ void ThreadContextBase::SetStarted(tid_t _os_id, ThreadType _thread_type,
 }
 
 void ThreadContextBase::SetCreated(uptr _user_id, u64 _unique_id,
-                                   bool _detached, u32 _parent_tid, void *arg) {
+                                   bool _detached, u32 _parent_tid,
+                                   u32 _stack_tid, void *arg) {
   status = ThreadStatusCreated;
   user_id = _user_id;
   unique_id = _unique_id;
   detached = _detached;
   // Parent tid makes no sense for the main thread.
-  if (tid != kMainTid)
+  if (tid != kMainTid) {
     parent_tid = _parent_tid;
+    stack_id = _stack_tid;
+  }
   OnCreated(arg);
 }
 
@@ -124,8 +135,10 @@ void ThreadRegistry::GetNumberOfThreads(uptr *total, uptr *running,
   ThreadRegistryLock l(this);
   if (total)
     *total = threads_.size();
-  if (running) *running = running_threads_;
-  if (alive) *alive = alive_threads_;
+  if (running)
+    *running = running_threads_;
+  if (alive)
+    *alive = alive_threads_;
 }
 
 uptr ThreadRegistry::GetMaxAliveThreads() {
@@ -134,7 +147,7 @@ uptr ThreadRegistry::GetMaxAliveThreads() {
 }
 
 u32 ThreadRegistry::CreateThread(uptr user_id, bool detached, u32 parent_tid,
-                                 void *arg) {
+                                 u32 stack_tid, void *arg) {
   ThreadRegistryLock l(this);
   u32 tid = kInvalidTid;
   ThreadContextBase *tctx = QuarantinePop();
@@ -150,8 +163,10 @@ u32 ThreadRegistry::CreateThread(uptr user_id, bool detached, u32 parent_tid,
     Report("%s: Thread limit (%u threads) exceeded. Dying.\n",
            SanitizerToolName, max_threads_);
 #else
-    Printf("race: limit on %u simultaneously alive goroutines is exceeded,"
-        " dying\n", max_threads_);
+    Printf(
+        "race: limit on %u simultaneously alive goroutines is exceeded,"
+        " dying\n",
+        max_threads_);
 #endif
     Die();
   }
@@ -170,8 +185,8 @@ u32 ThreadRegistry::CreateThread(uptr user_id, bool detached, u32 parent_tid,
     // positives later (e.g. if we join a wrong thread).
     CHECK(live_.try_emplace(user_id, tid).second);
   }
-  tctx->SetCreated(user_id, total_threads_++, detached,
-                   parent_tid, arg);
+  tctx->SetCreated(user_id, total_threads_++, detached, parent_tid, stack_tid,
+                   arg);
   return tid;
 }
 
@@ -196,8 +211,8 @@ u32 ThreadRegistry::FindThread(FindThreadCallback cb, void *arg) {
   return kInvalidTid;
 }
 
-ThreadContextBase *
-ThreadRegistry::FindThreadContextLocked(FindThreadCallback cb, void *arg) {
+ThreadContextBase *ThreadRegistry::FindThreadContextLocked(
+    FindThreadCallback cb, void *arg) {
   CheckLocked();
   for (u32 tid = 0; tid < threads_.size(); tid++) {
     ThreadContextBase *tctx = threads_[tid];
@@ -210,7 +225,7 @@ ThreadRegistry::FindThreadContextLocked(FindThreadCallback cb, void *arg) {
 static bool FindThreadContextByOsIdCallback(ThreadContextBase *tctx,
                                             void *arg) {
   return (tctx->os_id == (uptr)arg && tctx->status != ThreadStatusInvalid &&
-      tctx->status != ThreadStatusDead);
+          tctx->status != ThreadStatusDead);
 }
 
 ThreadContextBase *ThreadRegistry::FindThreadContextByOsIDLocked(tid_t os_id) {
lib/tsan/sanitizer_common/sanitizer_thread_registry.h
@@ -52,6 +52,7 @@ class ThreadContextBase {
   ThreadType thread_type;
 
   u32 parent_tid;
+  u32 stack_id;
   ThreadContextBase *next;  // For storing thread contexts in a list.
 
   atomic_uint32_t thread_destroyed; // To address race of Joined vs Finished
@@ -63,7 +64,7 @@ class ThreadContextBase {
   void SetFinished();
   void SetStarted(tid_t _os_id, ThreadType _thread_type, void *arg);
   void SetCreated(uptr _user_id, u64 _unique_id, bool _detached,
-                  u32 _parent_tid, void *arg);
+                  u32 _parent_tid, u32 _stack_tid, void *arg);
   void Reset();
 
   void SetDestroyed();
@@ -101,12 +102,16 @@ class SANITIZER_MUTEX ThreadRegistry {
 
   // Should be guarded by ThreadRegistryLock.
   ThreadContextBase *GetThreadLocked(u32 tid) {
-    return threads_.empty() ? nullptr : threads_[tid];
+    return tid < threads_.size() ? threads_[tid] : nullptr;
   }
 
   u32 NumThreadsLocked() const { return threads_.size(); }
 
-  u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, void *arg);
+  u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, u32 stack_tid,
+                   void *arg);
+  u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, void *arg) {
+    return CreateThread(user_id, detached, parent_tid, 0, arg);
+  }
 
   typedef void (*ThreadCallback)(ThreadContextBase *tctx, void *arg);
   // Invokes callback with a specified arg for each thread context.
lib/tsan/sanitizer_common/sanitizer_tls_get_addr.cpp
@@ -14,6 +14,8 @@
 
 #include "sanitizer_allocator_interface.h"
 #include "sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_platform_interceptors.h"
 
@@ -66,7 +68,7 @@ static DTLS::DTVBlock *DTLS_NextBlock(atomic_uintptr_t *cur) {
 }
 
 static DTLS::DTV *DTLS_Find(uptr id) {
-  VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", (void *)&dtls, id);
+  VReport(3, "__tls_get_addr: DTLS_Find %p %zd\n", (void *)&dtls, id);
   static constexpr uptr kPerBlock = ARRAY_SIZE(DTLS::DTVBlock::dtvs);
   DTLS::DTVBlock *cur = DTLS_NextBlock(&dtls.dtv_block);
   if (!cur)
@@ -110,6 +112,21 @@ SANITIZER_WEAK_ATTRIBUTE
 const void *__sanitizer_get_allocated_begin(const void *p);
 }
 
+SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size,
+                             const void *tls_begin) {
+  const void *start = __sanitizer_get_allocated_begin(tls_begin);
+  if (!start)
+    return 0;
+  CHECK_LE(start, tls_begin);
+  uptr tls_size = __sanitizer_get_allocated_size(start);
+  VReport(2, "__tls_get_addr: glibc DTLS suspected; tls={%p,0x%zx}\n",
+          tls_begin, tls_size);
+  uptr offset =
+      (reinterpret_cast<uptr>(tls_begin) - reinterpret_cast<uptr>(start));
+  CHECK_LE(offset, tls_size);
+  return tls_size - offset;
+}
+
 DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
                                 uptr static_tls_begin, uptr static_tls_end) {
   if (!common_flags()->intercept_tls_get_addr) return 0;
@@ -117,8 +134,8 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
   uptr dso_id = arg->dso_id;
   DTLS::DTV *dtv = DTLS_Find(dso_id);
   if (!dtv || dtv->beg)
-    return 0;
-  uptr tls_size = 0;
+    return nullptr;
+  CHECK_LE(static_tls_begin, static_tls_end);
   uptr tls_beg = reinterpret_cast<uptr>(res) - arg->offset - kDtvOffset;
   VReport(2,
           "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: %p; sp: %p "
@@ -126,36 +143,33 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
           (void *)arg, arg->dso_id, arg->offset, res, (void *)tls_beg,
           (void *)&tls_beg,
           atomic_load(&number_of_live_dtls, memory_order_relaxed));
-  if (dtls.last_memalign_ptr == tls_beg) {
-    tls_size = dtls.last_memalign_size;
-    VReport(2, "__tls_get_addr: glibc <=2.24 suspected; tls={%p,0x%zx}\n",
-            (void *)tls_beg, tls_size);
-  } else if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) {
+  if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) {
     // This is the static TLS block which was initialized / unpoisoned at thread
     // creation.
     VReport(2, "__tls_get_addr: static tls: %p\n", (void *)tls_beg);
-    tls_size = 0;
-  } else if (const void *start =
-                 __sanitizer_get_allocated_begin((void *)tls_beg)) {
-    tls_beg = (uptr)start;
-    tls_size = __sanitizer_get_allocated_size(start);
-    VReport(2, "__tls_get_addr: glibc >=2.25 suspected; tls={%p,0x%zx}\n",
-            (void *)tls_beg, tls_size);
-  } else {
-    VReport(2, "__tls_get_addr: Can't guess glibc version\n");
-    // This may happen inside the DTOR of main thread, so just ignore it.
-    tls_size = 0;
+    dtv->beg = tls_beg;
+    dtv->size = 0;
+    return nullptr;
   }
+  if (uptr tls_size =
+          __sanitizer_get_dtls_size(reinterpret_cast<void *>(tls_beg))) {
+    dtv->beg = tls_beg;
+    dtv->size = tls_size;
+    return dtv;
+  }
+  VReport(2, "__tls_get_addr: Can't guess glibc version\n");
+  // This may happen inside the DTOR a thread, or async signal handlers before
+  // thread initialization, so just ignore it.
+  //
+  // If the unknown block is dynamic TLS, unlikely we will be able to recognize
+  // it in future, mark it as done with '{tls_beg, 0}'.
+  //
+  // If the block is static TLS, possible reason of failed detection is nullptr
+  // in `static_tls_begin`. Regardless of reasons, the future handling of static
+  // TLS is still '{tls_beg, 0}'.
   dtv->beg = tls_beg;
-  dtv->size = tls_size;
-  return dtv;
-}
-
-void DTLS_on_libc_memalign(void *ptr, uptr size) {
-  if (!common_flags()->intercept_tls_get_addr) return;
-  VReport(2, "DTLS_on_libc_memalign: %p 0x%zx\n", ptr, size);
-  dtls.last_memalign_ptr = reinterpret_cast<uptr>(ptr);
-  dtls.last_memalign_size = size;
+  dtv->size = 0;
+  return nullptr;
 }
 
 DTLS *DTLS_Get() { return &dtls; }
@@ -166,7 +180,9 @@ bool DTLSInDestruction(DTLS *dtls) {
 }
 
 #else
-void DTLS_on_libc_memalign(void *ptr, uptr size) {}
+SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size, const void *) {
+  return 0;
+}
 DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res,
   unsigned long, unsigned long) { return 0; }
 DTLS *DTLS_Get() { return 0; }
lib/tsan/sanitizer_common/sanitizer_tls_get_addr.h
@@ -55,10 +55,6 @@ struct DTLS {
   static_assert(sizeof(DTVBlock) <= 4096UL, "Unexpected block size");
 
   atomic_uintptr_t dtv_block;
-
-  // Auxiliary fields, don't access them outside sanitizer_tls_get_addr.cpp
-  uptr last_memalign_size;
-  uptr last_memalign_ptr;
 };
 
 template <typename Fn>
lib/tsan/sanitizer_common/sanitizer_win.cpp
@@ -164,7 +164,24 @@ void UnmapOrDie(void *addr, uptr size, bool raw_report) {
 static void *ReturnNullptrOnOOMOrDie(uptr size, const char *mem_type,
                                      const char *mmap_type) {
   error_t last_error = GetLastError();
-  if (last_error == ERROR_NOT_ENOUGH_MEMORY)
+
+  // Assumption: VirtualAlloc is the last system call that was invoked before
+  //   this method.
+  // VirtualAlloc emits one of 3 error codes when running out of memory
+  // 1. ERROR_NOT_ENOUGH_MEMORY:
+  //  There's not enough memory to execute the command
+  // 2. ERROR_INVALID_PARAMETER:
+  //  VirtualAlloc will return this if the request would allocate memory at an
+  //  address exceeding or being very close to the maximum application address
+  //  (the `lpMaximumApplicationAddress` field within the `SystemInfo` struct).
+  //  This does not seem to be officially documented, but is corroborated here:
+  //  https://stackoverflow.com/questions/45833674/why-does-virtualalloc-fail-for-lpaddress-greater-than-0x6ffffffffff
+  // 3. ERROR_COMMITMENT_LIMIT:
+  //  VirtualAlloc will return this if e.g. the pagefile is too small to commit
+  //  the requested amount of memory.
+  if (last_error == ERROR_NOT_ENOUGH_MEMORY ||
+      last_error == ERROR_INVALID_PARAMETER ||
+      last_error == ERROR_COMMITMENT_LIMIT)
     return nullptr;
   ReportMmapFailureAndDie(size, mem_type, mmap_type, last_error);
 }
@@ -873,24 +890,18 @@ uptr GetTlsSize() {
   return 0;
 }
 
-void InitTlsSize() {
-}
-
-void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
-                          uptr *tls_addr, uptr *tls_size) {
-#if SANITIZER_GO
-  *stk_addr = 0;
-  *stk_size = 0;
-  *tls_addr = 0;
-  *tls_size = 0;
-#else
-  uptr stack_top, stack_bottom;
-  GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom);
-  *stk_addr = stack_bottom;
-  *stk_size = stack_top - stack_bottom;
-  *tls_addr = 0;
-  *tls_size = 0;
-#endif
+void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end,
+                          uptr *tls_begin, uptr *tls_end) {
+#  if SANITIZER_GO
+  *stk_begin = 0;
+  *stk_end = 0;
+  *tls_begin = 0;
+  *tls_end = 0;
+#  else
+  GetThreadStackTopAndBottom(main, stk_end, stk_begin);
+  *tls_begin = 0;
+  *tls_end = 0;
+#  endif
 }
 
 void ReportFile::Write(const char *buffer, uptr length) {
@@ -974,6 +985,11 @@ bool IsAccessibleMemoryRange(uptr beg, uptr size) {
   return true;
 }
 
+bool TryMemCpy(void *dest, const void *src, uptr n) {
+  // TODO: implement.
+  return false;
+}
+
 bool SignalContext::IsStackOverflow() const {
   return (DWORD)GetType() == EXCEPTION_STACK_OVERFLOW;
 }
@@ -1039,7 +1055,52 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
 }
 
 void SignalContext::DumpAllRegisters(void *context) {
-  // FIXME: Implement this.
+  CONTEXT *ctx = (CONTEXT *)context;
+#  if defined(_M_X64)
+  Report("Register values:\n");
+  Printf("rax = %llx  ", ctx->Rax);
+  Printf("rbx = %llx  ", ctx->Rbx);
+  Printf("rcx = %llx  ", ctx->Rcx);
+  Printf("rdx = %llx  ", ctx->Rdx);
+  Printf("\n");
+  Printf("rdi = %llx  ", ctx->Rdi);
+  Printf("rsi = %llx  ", ctx->Rsi);
+  Printf("rbp = %llx  ", ctx->Rbp);
+  Printf("rsp = %llx  ", ctx->Rsp);
+  Printf("\n");
+  Printf("r8  = %llx  ", ctx->R8);
+  Printf("r9  = %llx  ", ctx->R9);
+  Printf("r10 = %llx  ", ctx->R10);
+  Printf("r11 = %llx  ", ctx->R11);
+  Printf("\n");
+  Printf("r12 = %llx  ", ctx->R12);
+  Printf("r13 = %llx  ", ctx->R13);
+  Printf("r14 = %llx  ", ctx->R14);
+  Printf("r15 = %llx  ", ctx->R15);
+  Printf("\n");
+#  elif defined(_M_IX86)
+  Report("Register values:\n");
+  Printf("eax = %lx  ", ctx->Eax);
+  Printf("ebx = %lx  ", ctx->Ebx);
+  Printf("ecx = %lx  ", ctx->Ecx);
+  Printf("edx = %lx  ", ctx->Edx);
+  Printf("\n");
+  Printf("edi = %lx  ", ctx->Edi);
+  Printf("esi = %lx  ", ctx->Esi);
+  Printf("ebp = %lx  ", ctx->Ebp);
+  Printf("esp = %lx  ", ctx->Esp);
+  Printf("\n");
+#  elif defined(_M_ARM64)
+  Report("Register values:\n");
+  for (int i = 0; i <= 30; i++) {
+    Printf("x%d%s = %llx", i < 10 ? " " : "", ctx->X[i]);
+    if (i % 4 == 3)
+      Printf("\n");
+  }
+#  else
+  // TODO
+  (void)ctx;
+#  endif
 }
 
 int SignalContext::GetType() const {
lib/tsan/sanitizer_common/sanitizer_win_dll_thunk.cpp
@@ -1,101 +0,0 @@
-//===-- sanitizer_win_dll_thunk.cpp ---------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This file defines a family of thunks that should be statically linked into
-// the DLLs that have instrumentation in order to delegate the calls to the
-// shared runtime that lives in the main binary.
-// See https://github.com/google/sanitizers/issues/209 for the details.
-//===----------------------------------------------------------------------===//
-
-#ifdef SANITIZER_DLL_THUNK
-#include "sanitizer_win_defs.h"
-#include "sanitizer_win_dll_thunk.h"
-#include "interception/interception.h"
-
-extern "C" {
-void *WINAPI GetModuleHandleA(const char *module_name);
-void abort();
-}
-
-namespace __sanitizer {
-uptr dllThunkGetRealAddrOrDie(const char *name) {
-  uptr ret =
-      __interception::InternalGetProcAddress((void *)GetModuleHandleA(0), name);
-  if (!ret)
-    abort();
-  return ret;
-}
-
-int dllThunkIntercept(const char* main_function, uptr dll_function) {
-  uptr wrapper = dllThunkGetRealAddrOrDie(main_function);
-  if (!__interception::OverrideFunction(dll_function, wrapper, 0))
-    abort();
-  return 0;
-}
-
-int dllThunkInterceptWhenPossible(const char* main_function,
-    const char* default_function, uptr dll_function) {
-  uptr wrapper = __interception::InternalGetProcAddress(
-    (void *)GetModuleHandleA(0), main_function);
-  if (!wrapper)
-    wrapper = dllThunkGetRealAddrOrDie(default_function);
-  if (!__interception::OverrideFunction(dll_function, wrapper, 0))
-    abort();
-  return 0;
-}
-} // namespace __sanitizer
-
-// Include Sanitizer Common interface.
-#define INTERFACE_FUNCTION(Name) INTERCEPT_SANITIZER_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "sanitizer_common_interface.inc"
-
-#pragma section(".DLLTH$A", read)
-#pragma section(".DLLTH$Z", read)
-
-typedef void (*DllThunkCB)();
-extern "C" {
-__declspec(allocate(".DLLTH$A")) DllThunkCB __start_dll_thunk;
-__declspec(allocate(".DLLTH$Z")) DllThunkCB __stop_dll_thunk;
-}
-
-// Disable compiler warnings that show up if we declare our own version
-// of a compiler intrinsic (e.g. strlen).
-#pragma warning(disable: 4391)
-#pragma warning(disable: 4392)
-
-extern "C" int __dll_thunk_init() {
-  static bool flag = false;
-  // __dll_thunk_init is expected to be called by only one thread.
-  if (flag) return 0;
-  flag = true;
-
-  for (DllThunkCB *it = &__start_dll_thunk; it < &__stop_dll_thunk; ++it)
-    if (*it)
-      (*it)();
-
-  // In DLLs, the callbacks are expected to return 0,
-  // otherwise CRT initialization fails.
-  return 0;
-}
-
-// We want to call dll_thunk_init before C/C++ initializers / constructors are
-// executed, otherwise functions like memset might be invoked.
-#pragma section(".CRT$XIB", long, read)
-__declspec(allocate(".CRT$XIB")) int (*__dll_thunk_preinit)() =
-    __dll_thunk_init;
-
-static void WINAPI dll_thunk_thread_init(void *mod, unsigned long reason,
-                                         void *reserved) {
-  if (reason == /*DLL_PROCESS_ATTACH=*/1) __dll_thunk_init();
-}
-
-#pragma section(".CRT$XLAB", long, read)
-__declspec(allocate(".CRT$XLAB")) void (WINAPI *__dll_thunk_tls_init)(void *,
-    unsigned long, void *) = dll_thunk_thread_init;
-
-#endif // SANITIZER_DLL_THUNK
lib/tsan/sanitizer_common/sanitizer_win_dll_thunk.h
@@ -1,181 +0,0 @@
-//===-- sanitizer_win_dll_thunk.h -----------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This header provide helper macros to delegate calls to the shared runtime
-// that lives in the main executable. It should be included to dll_thunks that
-// will be linked to the dlls, when the sanitizer is a static library included
-// in the main executable.
-//===----------------------------------------------------------------------===//
-#ifndef SANITIZER_WIN_DLL_THUNK_H
-#define SANITIZER_WIN_DLL_THUNK_H
-#include "sanitizer_internal_defs.h"
-
-namespace __sanitizer {
-uptr dllThunkGetRealAddrOrDie(const char *name);
-
-int dllThunkIntercept(const char* main_function, uptr dll_function);
-
-int dllThunkInterceptWhenPossible(const char* main_function,
-    const char* default_function, uptr dll_function);
-}
-
-extern "C" int __dll_thunk_init();
-
-// ----------------- Function interception helper macros -------------------- //
-// Override dll_function with main_function from main executable.
-#define INTERCEPT_OR_DIE(main_function, dll_function)                          \
-  static int intercept_##dll_function() {                                      \
-    return __sanitizer::dllThunkIntercept(main_function, (__sanitizer::uptr)   \
-        dll_function);                                                         \
-  }                                                                            \
-  __pragma(section(".DLLTH$M", long, read))                                    \
-  __declspec(allocate(".DLLTH$M")) int (*__dll_thunk_##dll_function)() =       \
-    intercept_##dll_function;
-
-// Try to override dll_function with main_function from main executable.
-// If main_function is not present, override dll_function with default_function.
-#define INTERCEPT_WHEN_POSSIBLE(main_function, default_function, dll_function) \
-  static int intercept_##dll_function() {                                      \
-    return __sanitizer::dllThunkInterceptWhenPossible(main_function,           \
-        default_function, (__sanitizer::uptr)dll_function);                    \
-  }                                                                            \
-  __pragma(section(".DLLTH$M", long, read))                                    \
-  __declspec(allocate(".DLLTH$M")) int (*__dll_thunk_##dll_function)() =       \
-    intercept_##dll_function;
-
-// -------------------- Function interception macros ------------------------ //
-// Special case of hooks -- ASan own interface functions.  Those are only called
-// after __asan_init, thus an empty implementation is sufficient.
-#define INTERCEPT_SANITIZER_FUNCTION(name)                                     \
-  extern "C" __declspec(noinline) void name() {                                \
-    volatile int prevent_icf = (__LINE__ << 8) ^ __COUNTER__;                  \
-    static const char function_name[] = #name;                                 \
-    for (const char* ptr = &function_name[0]; *ptr; ++ptr)                     \
-      prevent_icf ^= *ptr;                                                     \
-    (void)prevent_icf;                                                         \
-    __debugbreak();                                                            \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name)
-
-// Special case of hooks -- Weak functions, could be redefined in the main
-// executable, but that is not necessary, so we shouldn't die if we can not find
-// a reference. Instead, when the function is not present in the main executable
-// we consider the default impl provided by asan library.
-#define INTERCEPT_SANITIZER_WEAK_FUNCTION(name)                                \
-  extern "C" __declspec(noinline) void name() {                                \
-    volatile int prevent_icf = (__LINE__ << 8) ^ __COUNTER__;                  \
-    static const char function_name[] = #name;                                 \
-    for (const char* ptr = &function_name[0]; *ptr; ++ptr)                     \
-      prevent_icf ^= *ptr;                                                     \
-    (void)prevent_icf;                                                         \
-    __debugbreak();                                                            \
-  }                                                                            \
-  INTERCEPT_WHEN_POSSIBLE(#name, STRINGIFY(WEAK_EXPORT_NAME(name)), name)
-
-// We can't define our own version of strlen etc. because that would lead to
-// link-time or even type mismatch errors.  Instead, we can declare a function
-// just to be able to get its address.  Me may miss the first few calls to the
-// functions since it can be called before __dll_thunk_init, but that would lead
-// to false negatives in the startup code before user's global initializers,
-// which isn't a big deal.
-#define INTERCEPT_LIBRARY_FUNCTION(name)                                       \
-  extern "C" void name();                                                      \
-  INTERCEPT_OR_DIE(STRINGIFY(WRAP(name)), name)
-
-// Use these macros for functions that could be called before __dll_thunk_init()
-// is executed and don't lead to errors if defined (free, malloc, etc).
-#define INTERCEPT_WRAP_V_V(name)                                               \
-  extern "C" void name() {                                                     \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    fn();                                                                      \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_V_W(name)                                               \
-  extern "C" void name(void *arg) {                                            \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    fn(arg);                                                                   \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_V_WW(name)                                              \
-  extern "C" void name(void *arg1, void *arg2) {                               \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    fn(arg1, arg2);                                                            \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_V_WWW(name)                                             \
-  extern "C" void name(void *arg1, void *arg2, void *arg3) {                   \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    fn(arg1, arg2, arg3);                                                      \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_V(name)                                               \
-  extern "C" void *name() {                                                    \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn();                                                               \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_W(name)                                               \
-  extern "C" void *name(void *arg) {                                           \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg);                                                            \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_WW(name)                                              \
-  extern "C" void *name(void *arg1, void *arg2) {                              \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg1, arg2);                                                     \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_WWW(name)                                             \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3) {                  \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg1, arg2, arg3);                                               \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_WWWW(name)                                            \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4) {      \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg1, arg2, arg3, arg4);                                         \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_WWWWW(name)                                           \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
-                        void *arg5) {                                          \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg1, arg2, arg3, arg4, arg5);                                   \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#define INTERCEPT_WRAP_W_WWWWWW(name)                                          \
-  extern "C" void *name(void *arg1, void *arg2, void *arg3, void *arg4,        \
-                        void *arg5, void *arg6) {                              \
-    typedef decltype(name) *fntype;                                            \
-    static fntype fn = (fntype)__sanitizer::dllThunkGetRealAddrOrDie(#name);   \
-    return fn(arg1, arg2, arg3, arg4, arg5, arg6);                             \
-  }                                                                            \
-  INTERCEPT_OR_DIE(#name, name);
-
-#endif // SANITIZER_WIN_DLL_THUNK_H
lib/tsan/sanitizer_common/sanitizer_win_dynamic_runtime_thunk.cpp
@@ -1,26 +0,0 @@
-//===-- santizer_win_dynamic_runtime_thunk.cpp ----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines things that need to be present in the application modules
-// to interact with Sanitizer Common, when it is included in a dll.
-//
-//===----------------------------------------------------------------------===//
-#ifdef SANITIZER_DYNAMIC_RUNTIME_THUNK
-#define SANITIZER_IMPORT_INTERFACE 1
-#include "sanitizer_win_defs.h"
-// Define weak alias for all weak functions imported from sanitizer common.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) WIN_WEAK_IMPORT_DEF(Name)
-#include "sanitizer_common_interface.inc"
-#endif // SANITIZER_DYNAMIC_RUNTIME_THUNK
-
-namespace __sanitizer {
-// Add one, otherwise unused, external symbol to this object file so that the
-// Visual C++ linker includes it and reads the .drective section.
-void ForceWholeArchiveIncludeForSanitizerCommon() {}
-}
lib/tsan/sanitizer_common/sanitizer_win_immortalize.h
@@ -0,0 +1,71 @@
+//===-- sanitizer_win_immortalize.h ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between AddressSanitizer, and interception.
+//
+// Windows-specific thread-safe and pre-CRT global initialization safe
+// infrastructure to create an object whose destructor is never called.
+//===----------------------------------------------------------------------===//
+#if SANITIZER_WINDOWS
+#  pragma once
+// Requires including sanitizer_placement_new.h (which is not allowed to be
+// included in headers).
+
+#  include "sanitizer_win_defs.h"
+// These types are required to satisfy XFG which requires that the names of the
+// types for indirect calls to be correct as well as the name of the original
+// type for any typedefs.
+
+// TODO: There must be a better way to do this
+#  ifndef _WINDOWS_
+typedef void* PVOID;
+typedef int BOOL;
+typedef union _RTL_RUN_ONCE {
+  PVOID ptr;
+} INIT_ONCE, *PINIT_ONCE;
+
+extern "C" {
+__declspec(dllimport) int WINAPI InitOnceExecuteOnce(
+    PINIT_ONCE, BOOL(WINAPI*)(PINIT_ONCE, PVOID, PVOID*), void*, void*);
+}
+#  endif
+
+namespace __sanitizer {
+template <class Ty>
+BOOL WINAPI immortalize_impl(PINIT_ONCE, PVOID storage_ptr, PVOID*) noexcept {
+  // Ty must provide a placement new operator
+  new (storage_ptr) Ty();
+  return 1;
+}
+
+template <class Ty, typename Arg>
+BOOL WINAPI immortalize_impl(PINIT_ONCE, PVOID storage_ptr,
+                             PVOID* param) noexcept {
+  // Ty must provide a placement new operator
+  new (storage_ptr) Ty(*((Arg*)param));
+  return 1;
+}
+
+template <class Ty>
+Ty& immortalize() {  // return a reference to an object that will live forever
+  static INIT_ONCE flag;
+  alignas(Ty) static unsigned char storage[sizeof(Ty)];
+  InitOnceExecuteOnce(&flag, immortalize_impl<Ty>, &storage, nullptr);
+  return reinterpret_cast<Ty&>(storage);
+}
+
+template <class Ty, typename Arg>
+Ty& immortalize(
+    Arg arg) {  // return a reference to an object that will live forever
+  static INIT_ONCE flag;
+  alignas(Ty) static unsigned char storage[sizeof(Ty)];
+  InitOnceExecuteOnce(&flag, immortalize_impl<Ty, Arg>, &storage, &arg);
+  return reinterpret_cast<Ty&>(storage);
+}
+}  // namespace __sanitizer
+#endif  // SANITIZER_WINDOWS
lib/tsan/sanitizer_common/sanitizer_win_interception.cpp
@@ -0,0 +1,156 @@
+//===-- sanitizer_win_interception.cpp --------------------    --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Windows-specific export surface to provide interception for parts of the
+// runtime that are always statically linked, both for overriding user-defined
+// functions as well as registering weak functions that the ASAN runtime should
+// use over defaults.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+#  include <stddef.h>
+
+#  include "interception/interception.h"
+#  include "sanitizer_addrhashmap.h"
+#  include "sanitizer_common.h"
+#  include "sanitizer_internal_defs.h"
+#  include "sanitizer_placement_new.h"
+#  include "sanitizer_win_immortalize.h"
+#  include "sanitizer_win_interception.h"
+
+using namespace __sanitizer;
+
+extern "C" void *__ImageBase;
+
+namespace __sanitizer {
+
+static uptr GetSanitizerDllExport(const char *export_name) {
+  const uptr function_address =
+      __interception::InternalGetProcAddress(&__ImageBase, export_name);
+  if (function_address == 0) {
+    Report("ERROR: Failed to find sanitizer DLL export '%s'\n", export_name);
+    CHECK("Failed to find sanitizer DLL export" && 0);
+  }
+  return function_address;
+}
+
+struct WeakCallbackList {
+  explicit constexpr WeakCallbackList(RegisterWeakFunctionCallback cb)
+      : callback(cb), next(nullptr) {}
+
+  static void *operator new(size_t size) { return InternalAlloc(size); }
+
+  static void operator delete(void *p) { InternalFree(p); }
+
+  RegisterWeakFunctionCallback callback;
+  WeakCallbackList *next;
+};
+using WeakCallbackMap = AddrHashMap<WeakCallbackList *, 11>;
+
+static WeakCallbackMap *GetWeakCallbackMap() {
+  return &immortalize<WeakCallbackMap>();
+}
+
+void AddRegisterWeakFunctionCallback(uptr export_address,
+                                     RegisterWeakFunctionCallback cb) {
+  WeakCallbackMap::Handle h_find_or_create(GetWeakCallbackMap(), export_address,
+                                           false, true);
+  CHECK(h_find_or_create.exists());
+  if (h_find_or_create.created()) {
+    *h_find_or_create = new WeakCallbackList(cb);
+  } else {
+    (*h_find_or_create)->next = new WeakCallbackList(cb);
+  }
+}
+
+static void RunWeakFunctionCallbacks(uptr export_address) {
+  WeakCallbackMap::Handle h_find(GetWeakCallbackMap(), export_address, false,
+                                 false);
+  if (!h_find.exists()) {
+    return;
+  }
+
+  WeakCallbackList *list = *h_find;
+  do {
+    list->callback();
+  } while ((list = list->next));
+}
+
+}  // namespace __sanitizer
+
+extern "C" __declspec(dllexport) bool __cdecl __sanitizer_override_function(
+    const char *export_name, const uptr user_function,
+    uptr *const old_user_function) {
+  CHECK(export_name);
+  CHECK(user_function);
+
+  const uptr sanitizer_function = GetSanitizerDllExport(export_name);
+
+  const bool function_overridden = __interception::OverrideFunction(
+      user_function, sanitizer_function, old_user_function);
+  if (!function_overridden) {
+    Report(
+        "ERROR: Failed to override local function at '%p' with sanitizer "
+        "function '%s'\n",
+        user_function, export_name);
+    CHECK("Failed to replace local function with sanitizer version." && 0);
+  }
+
+  return function_overridden;
+}
+
+extern "C"
+    __declspec(dllexport) bool __cdecl __sanitizer_override_function_by_addr(
+        const uptr source_function, const uptr target_function,
+        uptr *const old_target_function) {
+  CHECK(source_function);
+  CHECK(target_function);
+
+  const bool function_overridden = __interception::OverrideFunction(
+      target_function, source_function, old_target_function);
+  if (!function_overridden) {
+    Report(
+        "ERROR: Failed to override function at '%p' with function at "
+        "'%p'\n",
+        target_function, source_function);
+    CHECK("Failed to apply function override." && 0);
+  }
+
+  return function_overridden;
+}
+
+extern "C"
+    __declspec(dllexport) bool __cdecl __sanitizer_register_weak_function(
+        const char *export_name, const uptr user_function,
+        uptr *const old_user_function) {
+  CHECK(export_name);
+  CHECK(user_function);
+
+  const uptr sanitizer_function = GetSanitizerDllExport(export_name);
+
+  const bool function_overridden = __interception::OverrideFunction(
+      sanitizer_function, user_function, old_user_function);
+  if (!function_overridden) {
+    Report(
+        "ERROR: Failed to register local function at '%p' to be used in "
+        "place of sanitizer function '%s'\n.",
+        user_function, export_name);
+    CHECK("Failed to register weak function." && 0);
+  }
+
+  // Note that thread-safety of RunWeakFunctionCallbacks in InitializeFlags
+  // depends on __sanitizer_register_weak_functions being called during the
+  // loader lock.
+  RunWeakFunctionCallbacks(sanitizer_function);
+
+  return function_overridden;
+}
+
+#endif  // SANITIZER_WINDOWS
lib/tsan/sanitizer_common/sanitizer_win_interception.h
@@ -0,0 +1,32 @@
+//===-- sanitizer_win_interception.h ----------------------    --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Windows-specific export surface to provide interception for parts of the
+// runtime that are always statically linked, both for overriding user-defined
+// functions as well as registering weak functions that the ASAN runtime should
+// use over defaults.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_WIN_INTERCEPTION_H
+#define SANITIZER_WIN_INTERCEPTION_H
+
+#include "sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+
+#  include "sanitizer_common.h"
+#  include "sanitizer_internal_defs.h"
+
+namespace __sanitizer {
+using RegisterWeakFunctionCallback = void (*)();
+void AddRegisterWeakFunctionCallback(uptr export_address,
+                                     RegisterWeakFunctionCallback cb);
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_WINDOWS
+#endif  // SANITIZER_WIN_INTERCEPTION_H
\ No newline at end of file
lib/tsan/sanitizer_common/sanitizer_win_weak_interception.cpp
@@ -1,94 +0,0 @@
-//===-- sanitizer_win_weak_interception.cpp -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This module should be included in the sanitizer when it is implemented as a
-// shared library on Windows (dll), in order to delegate the calls of weak
-// functions to the implementation in the main executable when a strong
-// definition is provided.
-//===----------------------------------------------------------------------===//
-
-#include "sanitizer_platform.h"
-#if SANITIZER_WINDOWS && SANITIZER_DYNAMIC
-#include "sanitizer_win_weak_interception.h"
-#include "sanitizer_allocator_interface.h"
-#include "sanitizer_interface_internal.h"
-#include "sanitizer_win_defs.h"
-#include "interception/interception.h"
-
-extern "C" {
-void *WINAPI GetModuleHandleA(const char *module_name);
-void abort();
-}
-
-namespace __sanitizer {
-// Try to get a pointer to real_function in the main module and override
-// dll_function with that pointer. If the function isn't found, nothing changes.
-int interceptWhenPossible(uptr dll_function, const char *real_function) {
-  uptr real = __interception::InternalGetProcAddress(
-      (void *)GetModuleHandleA(0), real_function);
-  if (real && !__interception::OverrideFunction((uptr)dll_function, real, 0))
-    abort();
-  return 0;
-}
-} // namespace __sanitizer
-
-// Declare weak hooks.
-extern "C" {
-void __sanitizer_on_print(const char *str);
-void __sanitizer_weak_hook_memcmp(uptr called_pc, const void *s1,
-                                  const void *s2, uptr n, int result);
-void __sanitizer_weak_hook_strcmp(uptr called_pc, const char *s1,
-                                  const char *s2, int result);
-void __sanitizer_weak_hook_strncmp(uptr called_pc, const char *s1,
-                                   const char *s2, uptr n, int result);
-void __sanitizer_weak_hook_strstr(uptr called_pc, const char *s1,
-                                  const char *s2, char *result);
-}
-
-// Include Sanitizer Common interface.
-#define INTERFACE_FUNCTION(Name)
-#define INTERFACE_WEAK_FUNCTION(Name) INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)
-#include "sanitizer_common_interface.inc"
-
-#pragma section(".WEAK$A", read)
-#pragma section(".WEAK$Z", read)
-
-typedef void (*InterceptCB)();
-extern "C" {
-__declspec(allocate(".WEAK$A")) InterceptCB __start_weak_list;
-__declspec(allocate(".WEAK$Z")) InterceptCB __stop_weak_list;
-}
-
-static int weak_intercept_init() {
-  static bool flag = false;
-  // weak_interception_init is expected to be called by only one thread.
-  if (flag) return 0;
-  flag = true;
-
-  for (InterceptCB *it = &__start_weak_list; it < &__stop_weak_list; ++it)
-    if (*it)
-      (*it)();
-
-  // In DLLs, the callbacks are expected to return 0,
-  // otherwise CRT initialization fails.
-  return 0;
-}
-
-#pragma section(".CRT$XIB", long, read)
-__declspec(allocate(".CRT$XIB")) int (*__weak_intercept_preinit)() =
-    weak_intercept_init;
-
-static void WINAPI weak_intercept_thread_init(void *mod, unsigned long reason,
-                                              void *reserved) {
-  if (reason == /*DLL_PROCESS_ATTACH=*/1) weak_intercept_init();
-}
-
-#pragma section(".CRT$XLAB", long, read)
-__declspec(allocate(".CRT$XLAB")) void(WINAPI *__weak_intercept_tls_init)(
-    void *, unsigned long, void *) = weak_intercept_thread_init;
-
-#endif // SANITIZER_WINDOWS && SANITIZER_DYNAMIC
lib/tsan/sanitizer_common/sanitizer_win_weak_interception.h
@@ -1,32 +0,0 @@
-//===-- sanitizer_win_weak_interception.h ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// This header provide helper macros to delegate calls of weak functions to the
-// implementation in the main executable when a strong definition is present.
-//===----------------------------------------------------------------------===//
-#ifndef SANITIZER_WIN_WEAK_INTERCEPTION_H
-#define SANITIZER_WIN_WEAK_INTERCEPTION_H
-#include "sanitizer_internal_defs.h"
-
-namespace __sanitizer {
-int interceptWhenPossible(uptr dll_function, const char *real_function);
-}
-
-// ----------------- Function interception helper macros -------------------- //
-// Weak functions, could be redefined in the main executable, but that is not
-// necessary, so we shouldn't die if we can not find a reference.
-#define INTERCEPT_WEAK(Name) interceptWhenPossible((uptr) Name, #Name);
-
-#define INTERCEPT_SANITIZER_WEAK_FUNCTION(Name)                                \
-  static int intercept_##Name() {                                              \
-    return __sanitizer::interceptWhenPossible((__sanitizer::uptr) Name, #Name);\
-  }                                                                            \
-  __pragma(section(".WEAK$M", long, read))                                     \
-  __declspec(allocate(".WEAK$M")) int (*__weak_intercept_##Name)() =           \
-      intercept_##Name;
-
-#endif // SANITIZER_WIN_WEAK_INTERCEPTION_H
lib/tsan/tsan_interceptors_mac.cpp
@@ -14,22 +14,21 @@
 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_APPLE
 
-#include "interception/interception.h"
-#include "tsan_interceptors.h"
-#include "tsan_interface.h"
-#include "tsan_interface_ann.h"
-#include "tsan_spinlock_defs_mac.h"
-#include "sanitizer_common/sanitizer_addrhashmap.h"
-
-#include <errno.h>
-#include <libkern/OSAtomic.h>
-#include <objc/objc-sync.h>
-#include <os/lock.h>
-#include <sys/ucontext.h>
-
-#if defined(__has_include) && __has_include(<xpc/xpc.h>)
-#include <xpc/xpc.h>
-#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
+#  include <errno.h>
+#  include <libkern/OSAtomic.h>
+#  include <objc/objc-sync.h>
+#  include <os/lock.h>
+#  include <sys/ucontext.h>
+
+#  include "interception/interception.h"
+#  include "sanitizer_common/sanitizer_addrhashmap.h"
+#  include "tsan_interceptors.h"
+#  include "tsan_interface.h"
+#  include "tsan_interface_ann.h"
+
+#  if defined(__has_include) && __has_include(<xpc/xpc.h>)
+#    include <xpc/xpc.h>
+#  endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
 
 typedef long long_t;
 
@@ -49,51 +48,55 @@ static constexpr morder kMacOrderBarrier = mo_acq_rel;
 static constexpr morder kMacOrderNonBarrier = mo_acq_rel;
 static constexpr morder kMacFailureOrder = mo_relaxed;
 
-#define OSATOMIC_INTERCEPTOR(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
-  TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                 \
-    SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                 \
-    return tsan_atomic_f((volatile tsan_t *)ptr, x, mo);                \
-  }
-
-#define OSATOMIC_INTERCEPTOR_PLUS_X(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
-  TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                        \
-    SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                        \
-    return tsan_atomic_f((volatile tsan_t *)ptr, x, mo) + x;                   \
-  }
+#  define OSATOMIC_INTERCEPTOR(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
+    TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                 \
+      SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                 \
+      return tsan_atomic_f((volatile tsan_t *)ptr, x, mo);                \
+    }
 
-#define OSATOMIC_INTERCEPTOR_PLUS_1(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
-  TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                             \
-    SCOPED_TSAN_INTERCEPTOR(f, ptr);                                           \
-    return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) + 1;                   \
-  }
+#  define OSATOMIC_INTERCEPTOR_PLUS_X(return_t, t, tsan_t, f, tsan_atomic_f, \
+                                      mo)                                    \
+    TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                    \
+      SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                    \
+      return tsan_atomic_f((volatile tsan_t *)ptr, x, mo) + x;               \
+    }
 
-#define OSATOMIC_INTERCEPTOR_MINUS_1(return_t, t, tsan_t, f, tsan_atomic_f, \
-                                     mo)                                    \
-  TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                          \
-    SCOPED_TSAN_INTERCEPTOR(f, ptr);                                        \
-    return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) - 1;                \
-  }
+#  define OSATOMIC_INTERCEPTOR_PLUS_1(return_t, t, tsan_t, f, tsan_atomic_f, \
+                                      mo)                                    \
+    TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                         \
+      SCOPED_TSAN_INTERCEPTOR(f, ptr);                                       \
+      return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) + 1;               \
+    }
 
-#define OSATOMIC_INTERCEPTORS_ARITHMETIC(f, tsan_atomic_f, m)                  \
-  m(int32_t, int32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,             \
-    kMacOrderNonBarrier)                                                       \
-  m(int32_t, int32_t, a32, f##32##Barrier, __tsan_atomic32_##tsan_atomic_f,    \
-    kMacOrderBarrier)                                                          \
-  m(int64_t, int64_t, a64, f##64, __tsan_atomic64_##tsan_atomic_f,             \
-    kMacOrderNonBarrier)                                                       \
-  m(int64_t, int64_t, a64, f##64##Barrier, __tsan_atomic64_##tsan_atomic_f,    \
-    kMacOrderBarrier)
-
-#define OSATOMIC_INTERCEPTORS_BITWISE(f, tsan_atomic_f, m, m_orig)             \
-  m(int32_t, uint32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,            \
-    kMacOrderNonBarrier)                                                       \
-  m(int32_t, uint32_t, a32, f##32##Barrier, __tsan_atomic32_##tsan_atomic_f,   \
-    kMacOrderBarrier)                                                          \
-  m_orig(int32_t, uint32_t, a32, f##32##Orig, __tsan_atomic32_##tsan_atomic_f, \
-    kMacOrderNonBarrier)                                                       \
-  m_orig(int32_t, uint32_t, a32, f##32##OrigBarrier,                           \
-    __tsan_atomic32_##tsan_atomic_f, kMacOrderBarrier)
+#  define OSATOMIC_INTERCEPTOR_MINUS_1(return_t, t, tsan_t, f, tsan_atomic_f, \
+                                       mo)                                    \
+    TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                          \
+      SCOPED_TSAN_INTERCEPTOR(f, ptr);                                        \
+      return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) - 1;                \
+    }
 
+#  define OSATOMIC_INTERCEPTORS_ARITHMETIC(f, tsan_atomic_f, m)              \
+    m(int32_t, int32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,         \
+      kMacOrderNonBarrier)                                                   \
+        m(int32_t, int32_t, a32, f##32##Barrier,                             \
+          __tsan_atomic32_##tsan_atomic_f, kMacOrderBarrier)                 \
+            m(int64_t, int64_t, a64, f##64, __tsan_atomic64_##tsan_atomic_f, \
+              kMacOrderNonBarrier)                                           \
+                m(int64_t, int64_t, a64, f##64##Barrier,                     \
+                  __tsan_atomic64_##tsan_atomic_f, kMacOrderBarrier)
+
+#  define OSATOMIC_INTERCEPTORS_BITWISE(f, tsan_atomic_f, m, m_orig)     \
+    m(int32_t, uint32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,    \
+      kMacOrderNonBarrier)                                               \
+        m(int32_t, uint32_t, a32, f##32##Barrier,                        \
+          __tsan_atomic32_##tsan_atomic_f, kMacOrderBarrier)             \
+            m_orig(int32_t, uint32_t, a32, f##32##Orig,                  \
+                   __tsan_atomic32_##tsan_atomic_f, kMacOrderNonBarrier) \
+                m_orig(int32_t, uint32_t, a32, f##32##OrigBarrier,       \
+                       __tsan_atomic32_##tsan_atomic_f, kMacOrderBarrier)
+
+#  pragma clang diagnostic push  // OSAtomic* deprecation
+#  pragma clang diagnostic ignored "-Wdeprecated-declarations"
 OSATOMIC_INTERCEPTORS_ARITHMETIC(OSAtomicAdd, fetch_add,
                                  OSATOMIC_INTERCEPTOR_PLUS_X)
 OSATOMIC_INTERCEPTORS_ARITHMETIC(OSAtomicIncrement, fetch_add,
@@ -106,23 +109,26 @@ OSATOMIC_INTERCEPTORS_BITWISE(OSAtomicAnd, fetch_and,
                               OSATOMIC_INTERCEPTOR_PLUS_X, OSATOMIC_INTERCEPTOR)
 OSATOMIC_INTERCEPTORS_BITWISE(OSAtomicXor, fetch_xor,
                               OSATOMIC_INTERCEPTOR_PLUS_X, OSATOMIC_INTERCEPTOR)
+#  pragma clang diagnostic pop  // OSAtomic* deprecation
+
+#  define OSATOMIC_INTERCEPTORS_CAS(f, tsan_atomic_f, tsan_t, t)           \
+    TSAN_INTERCEPTOR(bool, f, t old_value, t new_value, t volatile *ptr) { \
+      SCOPED_TSAN_INTERCEPTOR(f, old_value, new_value, ptr);               \
+      return tsan_atomic_f##_compare_exchange_strong(                      \
+          (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value, \
+          kMacOrderNonBarrier, kMacFailureOrder);                          \
+    }                                                                      \
+                                                                           \
+    TSAN_INTERCEPTOR(bool, f##Barrier, t old_value, t new_value,           \
+                     t volatile *ptr) {                                    \
+      SCOPED_TSAN_INTERCEPTOR(f##Barrier, old_value, new_value, ptr);      \
+      return tsan_atomic_f##_compare_exchange_strong(                      \
+          (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value, \
+          kMacOrderBarrier, kMacFailureOrder);                             \
+    }
 
-#define OSATOMIC_INTERCEPTORS_CAS(f, tsan_atomic_f, tsan_t, t)              \
-  TSAN_INTERCEPTOR(bool, f, t old_value, t new_value, t volatile *ptr) {    \
-    SCOPED_TSAN_INTERCEPTOR(f, old_value, new_value, ptr);                  \
-    return tsan_atomic_f##_compare_exchange_strong(                         \
-        (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value,    \
-        kMacOrderNonBarrier, kMacFailureOrder);                             \
-  }                                                                         \
-                                                                            \
-  TSAN_INTERCEPTOR(bool, f##Barrier, t old_value, t new_value,              \
-                   t volatile *ptr) {                                       \
-    SCOPED_TSAN_INTERCEPTOR(f##Barrier, old_value, new_value, ptr);         \
-    return tsan_atomic_f##_compare_exchange_strong(                         \
-        (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value,    \
-        kMacOrderBarrier, kMacFailureOrder);                                \
-  }
-
+#  pragma clang diagnostic push  // OSAtomicCompareAndSwap* deprecation
+#  pragma clang diagnostic ignored "-Wdeprecated-declarations"
 OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwapInt, __tsan_atomic32, a32, int)
 OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwapLong, __tsan_atomic64, a64,
                           long_t)
@@ -132,24 +138,28 @@ OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwap32, __tsan_atomic32, a32,
                           int32_t)
 OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwap64, __tsan_atomic64, a64,
                           int64_t)
+#  pragma clang diagnostic pop  // OSAtomicCompareAndSwap* deprecation
+
+#  define OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, mo)             \
+    TSAN_INTERCEPTOR(bool, f, uint32_t n, volatile void *ptr) {    \
+      SCOPED_TSAN_INTERCEPTOR(f, n, ptr);                          \
+      volatile char *byte_ptr = ((volatile char *)ptr) + (n >> 3); \
+      char bit = 0x80u >> (n & 7);                                 \
+      char mask = clear ? ~bit : bit;                              \
+      char orig_byte = op((volatile a8 *)byte_ptr, mask, mo);      \
+      return orig_byte & bit;                                      \
+    }
 
-#define OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, mo)             \
-  TSAN_INTERCEPTOR(bool, f, uint32_t n, volatile void *ptr) {    \
-    SCOPED_TSAN_INTERCEPTOR(f, n, ptr);                          \
-    volatile char *byte_ptr = ((volatile char *)ptr) + (n >> 3); \
-    char bit = 0x80u >> (n & 7);                                 \
-    char mask = clear ? ~bit : bit;                              \
-    char orig_byte = op((volatile a8 *)byte_ptr, mask, mo);      \
-    return orig_byte & bit;                                      \
-  }
-
-#define OSATOMIC_INTERCEPTORS_BITOP(f, op, clear)               \
-  OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, kMacOrderNonBarrier) \
-  OSATOMIC_INTERCEPTOR_BITOP(f##Barrier, op, clear, kMacOrderBarrier)
+#  define OSATOMIC_INTERCEPTORS_BITOP(f, op, clear)               \
+    OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, kMacOrderNonBarrier) \
+    OSATOMIC_INTERCEPTOR_BITOP(f##Barrier, op, clear, kMacOrderBarrier)
 
+#  pragma clang diagnostic push  // OSAtomicTestAnd* deprecation
+#  pragma clang diagnostic ignored "-Wdeprecated-declarations"
 OSATOMIC_INTERCEPTORS_BITOP(OSAtomicTestAndSet, __tsan_atomic8_fetch_or, false)
 OSATOMIC_INTERCEPTORS_BITOP(OSAtomicTestAndClear, __tsan_atomic8_fetch_and,
                             true)
+#  pragma clang diagnostic pop  // OSAtomicTestAnd* deprecation
 
 TSAN_INTERCEPTOR(void, OSAtomicEnqueue, OSQueueHead *list, void *item,
                  size_t offset) {
@@ -161,12 +171,13 @@ TSAN_INTERCEPTOR(void, OSAtomicEnqueue, OSQueueHead *list, void *item,
 TSAN_INTERCEPTOR(void *, OSAtomicDequeue, OSQueueHead *list, size_t offset) {
   SCOPED_TSAN_INTERCEPTOR(OSAtomicDequeue, list, offset);
   void *item = REAL(OSAtomicDequeue)(list, offset);
-  if (item) __tsan_acquire(item);
+  if (item)
+    __tsan_acquire(item);
   return item;
 }
 
 // OSAtomicFifoEnqueue and OSAtomicFifoDequeue are only on OS X.
-#if !SANITIZER_IOS
+#  if !SANITIZER_IOS
 
 TSAN_INTERCEPTOR(void, OSAtomicFifoEnqueue, OSFifoQueueHead *list, void *item,
                  size_t offset) {
@@ -179,11 +190,22 @@ TSAN_INTERCEPTOR(void *, OSAtomicFifoDequeue, OSFifoQueueHead *list,
                  size_t offset) {
   SCOPED_TSAN_INTERCEPTOR(OSAtomicFifoDequeue, list, offset);
   void *item = REAL(OSAtomicFifoDequeue)(list, offset);
-  if (item) __tsan_acquire(item);
+  if (item)
+    __tsan_acquire(item);
   return item;
 }
 
-#endif
+#  endif
+
+// If `OSSPINLOCK_USE_INLINED=1` is set, then SDK headers don't declare these
+// as functions, but macros that call non-deprecated APIs.  Undefine these
+// macros so they don't interfere with the interceptor machinery.
+#  undef OSSpinLockLock
+#  undef OSSpinLockTry
+#  undef OSSpinLockUnlock
+
+#  pragma clang diagnostic push  // OSSpinLock* deprecation
+#  pragma clang diagnostic ignored "-Wdeprecated-declarations"
 
 TSAN_INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) {
   CHECK(!cur_thread()->is_dead);
@@ -216,6 +238,7 @@ TSAN_INTERCEPTOR(void, OSSpinLockUnlock, volatile OSSpinLock *lock) {
   Release(thr, pc, (uptr)lock);
   REAL(OSSpinLockUnlock)(lock);
 }
+#  pragma clang diagnostic pop  // OSSpinLock* deprecation
 
 TSAN_INTERCEPTOR(void, os_lock_lock, void *lock) {
   CHECK(!cur_thread()->is_dead);
@@ -288,7 +311,7 @@ TSAN_INTERCEPTOR(void, os_unfair_lock_unlock, os_unfair_lock_t lock) {
   REAL(os_unfair_lock_unlock)(lock);
 }
 
-#if defined(__has_include) && __has_include(<xpc/xpc.h>)
+#  if defined(__has_include) && __has_include(<xpc/xpc.h>)
 
 TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler,
                  xpc_connection_t connection, xpc_handler_t handler) {
@@ -342,7 +365,7 @@ TSAN_INTERCEPTOR(void, xpc_connection_cancel, xpc_connection_t connection) {
   REAL(xpc_connection_cancel)(connection);
 }
 
-#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
+#  endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
 
 // Determines whether the Obj-C object pointer is a tagged pointer. Tagged
 // pointers encode the object data directly in their pointer bits and do not
@@ -365,7 +388,7 @@ static uptr GetOrCreateSyncAddress(uptr addr, ThreadState *thr, uptr pc) {
   Map::Handle h(&Addresses, addr);
   if (h.created()) {
     ThreadIgnoreBegin(thr, pc);
-    *h = (uptr) user_alloc(thr, pc, /*size=*/1);
+    *h = (uptr)user_alloc(thr, pc, /*size=*/1);
     ThreadIgnoreEnd(thr);
   }
   return *h;
@@ -383,7 +406,8 @@ static uptr SyncAddressForObjCObject(id obj, ThreadState *thr, uptr pc) {
 
 TSAN_INTERCEPTOR(int, objc_sync_enter, id obj) {
   SCOPED_TSAN_INTERCEPTOR(objc_sync_enter, obj);
-  if (!obj) return REAL(objc_sync_enter)(obj);
+  if (!obj)
+    return REAL(objc_sync_enter)(obj);
   uptr addr = SyncAddressForObjCObject(obj, thr, pc);
   MutexPreLock(thr, pc, addr, MutexFlagWriteReentrant);
   int result = REAL(objc_sync_enter)(obj);
@@ -394,11 +418,13 @@ TSAN_INTERCEPTOR(int, objc_sync_enter, id obj) {
 
 TSAN_INTERCEPTOR(int, objc_sync_exit, id obj) {
   SCOPED_TSAN_INTERCEPTOR(objc_sync_exit, obj);
-  if (!obj) return REAL(objc_sync_exit)(obj);
+  if (!obj)
+    return REAL(objc_sync_exit)(obj);
   uptr addr = SyncAddressForObjCObject(obj, thr, pc);
   MutexUnlock(thr, pc, addr);
   int result = REAL(objc_sync_exit)(obj);
-  if (result != OBJC_SYNC_SUCCESS) MutexInvalidAccess(thr, pc, addr);
+  if (result != OBJC_SYNC_SUCCESS)
+    MutexInvalidAccess(thr, pc, addr);
   return result;
 }
 
@@ -429,7 +455,7 @@ TSAN_INTERCEPTOR(int, swapcontext, ucontext_t *oucp, const ucontext_t *ucp) {
 
 // On macOS, libc++ is always linked dynamically, so intercepting works the
 // usual way.
-#define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
+#  define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
 
 namespace {
 struct fake_shared_weak_count {
lib/tsan/tsan_interceptors_posix.cpp
@@ -12,14 +12,15 @@
 // sanitizer_common/sanitizer_common_interceptors.inc
 //===----------------------------------------------------------------------===//
 
+#include "sanitizer_common/sanitizer_allocator_dlsym.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_glibc_version.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
 #include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
-#include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_posix.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_tls_get_addr.h"
@@ -96,7 +97,7 @@ extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
 extern "C" int pthread_setspecific(unsigned key, const void *v);
 DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
-DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, usize size)
 DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
 extern "C" int pthread_equal(void *t1, void *t2);
 extern "C" void *pthread_self();
@@ -252,6 +253,13 @@ SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnPotentiallyBlockingRegionBegin() {}
 SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnPotentiallyBlockingRegionEnd() {}
 #endif
 
+// FIXME: Use for `in_symbolizer()` as well. As-is we can't use
+// `DlSymAllocator`, because it uses the primary allocator only. Symbolizer
+// requires support of the secondary allocator for larger blocks.
+struct DlsymAlloc : public DlSymAllocator<DlsymAlloc> {
+  static bool UseImpl() { return (ctx && !ctx->initialized); }
+};
+
 }  // namespace __tsan
 
 static ThreadSignalContext *SigCtx(ThreadState *thr) {
@@ -661,6 +669,8 @@ TSAN_INTERCEPTOR(void, _longjmp, uptr *env, int val) {
 TSAN_INTERCEPTOR(void*, malloc, uptr size) {
   if (in_symbolizer())
     return InternalAlloc(size);
+  if (DlsymAlloc::Use())
+    return DlsymAlloc::Allocate(size);
   void *p = 0;
   {
     SCOPED_INTERCEPTOR_RAW(malloc, size);
@@ -678,12 +688,14 @@ TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
   return user_memalign(thr, pc, align, sz);
 }
 
-TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+TSAN_INTERCEPTOR(void *, calloc, uptr n, uptr size) {
   if (in_symbolizer())
-    return InternalCalloc(size, n);
+    return InternalCalloc(n, size);
+  if (DlsymAlloc::Use())
+    return DlsymAlloc::Callocate(n, size);
   void *p = 0;
   {
-    SCOPED_INTERCEPTOR_RAW(calloc, size, n);
+    SCOPED_INTERCEPTOR_RAW(calloc, n, size);
     p = user_calloc(thr, pc, size, n);
   }
   invoke_malloc_hook(p, n * size);
@@ -693,6 +705,8 @@ TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
 TSAN_INTERCEPTOR(void*, realloc, void *p, uptr size) {
   if (in_symbolizer())
     return InternalRealloc(p, size);
+  if (DlsymAlloc::Use() || DlsymAlloc::PointerIsMine(p))
+    return DlsymAlloc::Realloc(p, size);
   if (p)
     invoke_free_hook(p);
   {
@@ -703,13 +717,13 @@ TSAN_INTERCEPTOR(void*, realloc, void *p, uptr size) {
   return p;
 }
 
-TSAN_INTERCEPTOR(void*, reallocarray, void *p, uptr size, uptr n) {
+TSAN_INTERCEPTOR(void *, reallocarray, void *p, uptr n, uptr size) {
   if (in_symbolizer())
-    return InternalReallocArray(p, size, n);
+    return InternalReallocArray(p, n, size);
   if (p)
     invoke_free_hook(p);
   {
-    SCOPED_INTERCEPTOR_RAW(reallocarray, p, size, n);
+    SCOPED_INTERCEPTOR_RAW(reallocarray, p, n, size);
     p = user_reallocarray(thr, pc, p, size, n);
   }
   invoke_malloc_hook(p, size);
@@ -717,20 +731,24 @@ TSAN_INTERCEPTOR(void*, reallocarray, void *p, uptr size, uptr n) {
 }
 
 TSAN_INTERCEPTOR(void, free, void *p) {
-  if (p == 0)
+  if (UNLIKELY(!p))
     return;
   if (in_symbolizer())
     return InternalFree(p);
+  if (DlsymAlloc::PointerIsMine(p))
+    return DlsymAlloc::Free(p);
   invoke_free_hook(p);
   SCOPED_INTERCEPTOR_RAW(free, p);
   user_free(thr, pc, p);
 }
 
 TSAN_INTERCEPTOR(void, cfree, void *p) {
-  if (p == 0)
+  if (UNLIKELY(!p))
     return;
   if (in_symbolizer())
     return InternalFree(p);
+  if (DlsymAlloc::PointerIsMine(p))
+    return DlsymAlloc::Free(p);
   invoke_free_hook(p);
   SCOPED_INTERCEPTOR_RAW(cfree, p);
   user_free(thr, pc, p);
@@ -750,7 +768,7 @@ TSAN_INTERCEPTOR(char *, strcpy, char *dst, const char *src) {
   return REAL(strcpy)(dst, src);
 }
 
-TSAN_INTERCEPTOR(char*, strncpy, char *dst, char *src, uptr n) {
+TSAN_INTERCEPTOR(char*, strncpy, char *dst, char *src, usize n) {
   SCOPED_TSAN_INTERCEPTOR(strncpy, dst, src, n);
   uptr srclen = internal_strnlen(src, n);
   MemoryAccessRange(thr, pc, (uptr)dst, n, true);
@@ -1097,7 +1115,7 @@ int internal_pthread_create(void *th, void *attr, void *(*callback)(void *),
 }
 int internal_pthread_join(void *th, void **ret) {
   ScopedIgnoreInterceptors ignore;
-  return REAL(pthread_join(th, ret));
+  return REAL(pthread_join)(th, ret);
 }
 }  // namespace __sanitizer
 
@@ -1662,13 +1680,23 @@ TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
 #endif
 
 TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) {
-  va_list ap;
-  va_start(ap, oflag);
-  mode_t mode = va_arg(ap, int);
-  va_end(ap);
+  mode_t mode = 0;
+  if (OpenReadsVaArgs(oflag)) {
+    va_list ap;
+    va_start(ap, oflag);
+    mode = va_arg(ap, int);
+    va_end(ap);
+  }
+
   SCOPED_TSAN_INTERCEPTOR(open, name, oflag, mode);
   READ_STRING(thr, pc, name, 0);
-  int fd = REAL(open)(name, oflag, mode);
+
+  int fd;
+  if (OpenReadsVaArgs(oflag))
+    fd = REAL(open)(name, oflag, mode);
+  else
+    fd = REAL(open)(name, oflag);
+
   if (fd >= 0)
     FdFileCreate(thr, pc, fd);
   return fd;
lib/tsan/tsan_interface.h
@@ -16,8 +16,8 @@
 #define TSAN_INTERFACE_H
 
 #include <sanitizer_common/sanitizer_internal_defs.h>
-using __sanitizer::uptr;
 using __sanitizer::tid_t;
+using __sanitizer::uptr;
 
 // This header should NOT include any other headers.
 // All functions in this header are extern "C" and start with __tsan_.
@@ -203,17 +203,18 @@ int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
 namespace __tsan {
 
 // These should match declarations from public tsan_interface_atomic.h header.
-typedef unsigned char      a8;
+typedef unsigned char a8;
 typedef unsigned short a16;
-typedef unsigned int       a32;
+typedef unsigned int a32;
 typedef unsigned long long a64;
-#if !SANITIZER_GO && (defined(__SIZEOF_INT128__) \
-    || (__clang_major__ * 100 + __clang_minor__ >= 302)) && \
+#if !SANITIZER_GO &&                                      \
+    (defined(__SIZEOF_INT128__) ||                        \
+     (__clang_major__ * 100 + __clang_minor__ >= 302)) && \
     !defined(__mips64) && !defined(__s390x__)
 __extension__ typedef __int128 a128;
-# define __TSAN_HAS_INT128 1
+#  define __TSAN_HAS_INT128 1
 #else
-# define __TSAN_HAS_INT128 0
+#  define __TSAN_HAS_INT128 0
 #endif
 
 // Part of ABI, do not change.
@@ -231,180 +232,180 @@ struct ThreadState;
 
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_load(const volatile a8 *a, morder mo);
+a8 __tsan_atomic8_load(const volatile a8 *a, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_load(const volatile a16 *a, morder mo);
+a16 __tsan_atomic16_load(const volatile a16 *a, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_load(const volatile a32 *a, morder mo);
+a32 __tsan_atomic32_load(const volatile a32 *a, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_load(const volatile a64 *a, morder mo);
+a64 __tsan_atomic64_load(const volatile a64 *a, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_load(const volatile a128 *a, morder mo);
+a128 __tsan_atomic128_load(const volatile a128 *a, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo);
+void __tsan_atomic8_store(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo);
+void __tsan_atomic16_store(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo);
+void __tsan_atomic32_store(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo);
+void __tsan_atomic64_store(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo);
+void __tsan_atomic128_store(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo);
+a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo);
+a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo);
+a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo);
+a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, int mo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo);
+a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, int mo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
-                                           morder mo, morder fmo);
+int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v, int mo,
+                                           int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
-                                            morder mo, morder fmo);
+                                            int mo, int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
-                                            morder mo, morder fmo);
+                                            int mo, int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
-                                            morder mo, morder fmo);
+                                            int mo, int fmo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
-                                             morder mo, morder fmo);
+                                             int mo, int fmo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v, morder mo,
-                                         morder fmo);
+int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v, int mo,
+                                         int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
-                                          morder mo, morder fmo);
+                                          int mo, int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
-                                          morder mo, morder fmo);
+                                          int mo, int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
-                                          morder mo, morder fmo);
+                                          int mo, int fmo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
-                                           morder mo, morder fmo);
+                                           int mo, int fmo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v, morder mo,
-                                       morder fmo);
+a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v, int mo,
+                                       int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
-                                         morder mo, morder fmo);
+a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v, int mo,
+                                         int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
-                                         morder mo, morder fmo);
+a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v, int mo,
+                                         int fmo);
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
-                                         morder mo, morder fmo);
+a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v, int mo,
+                                         int fmo);
 #if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 a128 __tsan_atomic128_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
-                                           morder mo, morder fmo);
+                                           int mo, int fmo);
 #endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic_thread_fence(morder mo);
+void __tsan_atomic_thread_fence(int mo);
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic_signal_fence(morder mo);
+void __tsan_atomic_signal_fence(int mo);
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
lib/tsan/tsan_interface_atomic.cpp
@@ -18,9 +18,9 @@
 // The following page contains more background information:
 // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
 
+#include "sanitizer_common/sanitizer_mutex.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
-#include "sanitizer_common/sanitizer_mutex.h"
 #include "tsan_flags.h"
 #include "tsan_interface.h"
 #include "tsan_rtl.h"
@@ -34,8 +34,8 @@ static StaticSpinMutex mutex128;
 
 #if SANITIZER_DEBUG
 static bool IsLoadOrder(morder mo) {
-  return mo == mo_relaxed || mo == mo_consume
-      || mo == mo_acquire || mo == mo_seq_cst;
+  return mo == mo_relaxed || mo == mo_consume || mo == mo_acquire ||
+         mo == mo_seq_cst;
 }
 
 static bool IsStoreOrder(morder mo) {
@@ -48,42 +48,49 @@ static bool IsReleaseOrder(morder mo) {
 }
 
 static bool IsAcquireOrder(morder mo) {
-  return mo == mo_consume || mo == mo_acquire
-      || mo == mo_acq_rel || mo == mo_seq_cst;
+  return mo == mo_consume || mo == mo_acquire || mo == mo_acq_rel ||
+         mo == mo_seq_cst;
 }
 
 static bool IsAcqRelOrder(morder mo) {
   return mo == mo_acq_rel || mo == mo_seq_cst;
 }
 
-template<typename T> T func_xchg(volatile T *v, T op) {
+template <typename T>
+T func_xchg(volatile T *v, T op) {
   T res = __sync_lock_test_and_set(v, op);
   // __sync_lock_test_and_set does not contain full barrier.
   __sync_synchronize();
   return res;
 }
 
-template<typename T> T func_add(volatile T *v, T op) {
+template <typename T>
+T func_add(volatile T *v, T op) {
   return __sync_fetch_and_add(v, op);
 }
 
-template<typename T> T func_sub(volatile T *v, T op) {
+template <typename T>
+T func_sub(volatile T *v, T op) {
   return __sync_fetch_and_sub(v, op);
 }
 
-template<typename T> T func_and(volatile T *v, T op) {
+template <typename T>
+T func_and(volatile T *v, T op) {
   return __sync_fetch_and_and(v, op);
 }
 
-template<typename T> T func_or(volatile T *v, T op) {
+template <typename T>
+T func_or(volatile T *v, T op) {
   return __sync_fetch_and_or(v, op);
 }
 
-template<typename T> T func_xor(volatile T *v, T op) {
+template <typename T>
+T func_xor(volatile T *v, T op) {
   return __sync_fetch_and_xor(v, op);
 }
 
-template<typename T> T func_nand(volatile T *v, T op) {
+template <typename T>
+T func_nand(volatile T *v, T op) {
   // clang does not support __sync_fetch_and_nand.
   T cmp = *v;
   for (;;) {
@@ -95,7 +102,8 @@ template<typename T> T func_nand(volatile T *v, T op) {
   }
 }
 
-template<typename T> T func_cas(volatile T *v, T cmp, T xch) {
+template <typename T>
+T func_cas(volatile T *v, T cmp, T xch) {
   return __sync_val_compare_and_swap(v, cmp, xch);
 }
 
@@ -103,8 +111,8 @@ template<typename T> T func_cas(volatile T *v, T cmp, T xch) {
 // Atomic ops are executed under tsan internal mutex,
 // here we assume that the atomic variables are not accessed
 // from non-instrumented code.
-#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) && !SANITIZER_GO \
-    && __TSAN_HAS_INT128
+#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) && !SANITIZER_GO && \
+    __TSAN_HAS_INT128
 a128 func_xchg(volatile a128 *v, a128 op) {
   SpinMutexLock lock(&mutex128);
   a128 cmp = *v;
@@ -197,89 +205,24 @@ static atomic_uint64_t *to_atomic(const volatile a64 *a) {
 
 static memory_order to_mo(morder mo) {
   switch (mo) {
-  case mo_relaxed: return memory_order_relaxed;
-  case mo_consume: return memory_order_consume;
-  case mo_acquire: return memory_order_acquire;
-  case mo_release: return memory_order_release;
-  case mo_acq_rel: return memory_order_acq_rel;
-  case mo_seq_cst: return memory_order_seq_cst;
+    case mo_relaxed:
+      return memory_order_relaxed;
+    case mo_consume:
+      return memory_order_consume;
+    case mo_acquire:
+      return memory_order_acquire;
+    case mo_release:
+      return memory_order_release;
+    case mo_acq_rel:
+      return memory_order_acq_rel;
+    case mo_seq_cst:
+      return memory_order_seq_cst;
   }
   DCHECK(0);
   return memory_order_seq_cst;
 }
 
-template<typename T>
-static T NoTsanAtomicLoad(const volatile T *a, morder mo) {
-  return atomic_load(to_atomic(a), to_mo(mo));
-}
-
-#if __TSAN_HAS_INT128 && !SANITIZER_GO
-static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) {
-  SpinMutexLock lock(&mutex128);
-  return *a;
-}
-#endif
-
-template <typename T>
-static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
-  DCHECK(IsLoadOrder(mo));
-  // This fast-path is critical for performance.
-  // Assume the access is atomic.
-  if (!IsAcquireOrder(mo)) {
-    MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
-                 kAccessRead | kAccessAtomic);
-    return NoTsanAtomicLoad(a, mo);
-  }
-  // Don't create sync object if it does not exist yet. For example, an atomic
-  // pointer is initialized to nullptr and then periodically acquire-loaded.
-  T v = NoTsanAtomicLoad(a, mo);
-  SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
-  if (s) {
-    SlotLocker locker(thr);
-    ReadLock lock(&s->mtx);
-    thr->clock.Acquire(s->clock);
-    // Re-read under sync mutex because we need a consistent snapshot
-    // of the value and the clock we acquire.
-    v = NoTsanAtomicLoad(a, mo);
-  }
-  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessRead | kAccessAtomic);
-  return v;
-}
-
-template<typename T>
-static void NoTsanAtomicStore(volatile T *a, T v, morder mo) {
-  atomic_store(to_atomic(a), v, to_mo(mo));
-}
-
-#if __TSAN_HAS_INT128 && !SANITIZER_GO
-static void NoTsanAtomicStore(volatile a128 *a, a128 v, morder mo) {
-  SpinMutexLock lock(&mutex128);
-  *a = v;
-}
-#endif
-
-template <typename T>
-static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
-                        morder mo) {
-  DCHECK(IsStoreOrder(mo));
-  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
-  // This fast-path is critical for performance.
-  // Assume the access is atomic.
-  // Strictly saying even relaxed store cuts off release sequence,
-  // so must reset the clock.
-  if (!IsReleaseOrder(mo)) {
-    NoTsanAtomicStore(a, v, mo);
-    return;
-  }
-  SlotLocker locker(thr);
-  {
-    auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
-    Lock lock(&s->mtx);
-    thr->clock.ReleaseStore(&s->clock);
-    NoTsanAtomicStore(a, v, mo);
-  }
-  IncrementEpoch(thr);
-}
+namespace {
 
 template <typename T, T (*F)(volatile T *v, T op)>
 static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
@@ -303,175 +246,265 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
   return v;
 }
 
-template<typename T>
-static T NoTsanAtomicExchange(volatile T *a, T v, morder mo) {
-  return func_xchg(a, v);
-}
+struct OpLoad {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, const volatile T *a) {
+    return atomic_load(to_atomic(a), to_mo(mo));
+  }
 
-template<typename T>
-static T NoTsanAtomicFetchAdd(volatile T *a, T v, morder mo) {
-  return func_add(a, v);
-}
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
+  static a128 NoTsanAtomic(morder mo, const volatile a128 *a) {
+    SpinMutexLock lock(&mutex128);
+    return *a;
+  }
+#endif
 
-template<typename T>
-static T NoTsanAtomicFetchSub(volatile T *a, T v, morder mo) {
-  return func_sub(a, v);
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, const volatile T *a) {
+    DCHECK(IsLoadOrder(mo));
+    // This fast-path is critical for performance.
+    // Assume the access is atomic.
+    if (!IsAcquireOrder(mo)) {
+      MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+                   kAccessRead | kAccessAtomic);
+      return NoTsanAtomic(mo, a);
+    }
+    // Don't create sync object if it does not exist yet. For example, an atomic
+    // pointer is initialized to nullptr and then periodically acquire-loaded.
+    T v = NoTsanAtomic(mo, a);
+    SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
+    if (s) {
+      SlotLocker locker(thr);
+      ReadLock lock(&s->mtx);
+      thr->clock.Acquire(s->clock);
+      // Re-read under sync mutex because we need a consistent snapshot
+      // of the value and the clock we acquire.
+      v = NoTsanAtomic(mo, a);
+    }
+    MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+                 kAccessRead | kAccessAtomic);
+    return v;
+  }
+};
 
-template<typename T>
-static T NoTsanAtomicFetchAnd(volatile T *a, T v, morder mo) {
-  return func_and(a, v);
-}
+struct OpStore {
+  template <typename T>
+  static void NoTsanAtomic(morder mo, volatile T *a, T v) {
+    atomic_store(to_atomic(a), v, to_mo(mo));
+  }
 
-template<typename T>
-static T NoTsanAtomicFetchOr(volatile T *a, T v, morder mo) {
-  return func_or(a, v);
-}
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
+  static void NoTsanAtomic(morder mo, volatile a128 *a, a128 v) {
+    SpinMutexLock lock(&mutex128);
+    *a = v;
+  }
+#endif
 
-template<typename T>
-static T NoTsanAtomicFetchXor(volatile T *a, T v, morder mo) {
-  return func_xor(a, v);
-}
+  template <typename T>
+  static void Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    DCHECK(IsStoreOrder(mo));
+    MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+                 kAccessWrite | kAccessAtomic);
+    // This fast-path is critical for performance.
+    // Assume the access is atomic.
+    // Strictly saying even relaxed store cuts off release sequence,
+    // so must reset the clock.
+    if (!IsReleaseOrder(mo)) {
+      NoTsanAtomic(mo, a, v);
+      return;
+    }
+    SlotLocker locker(thr);
+    {
+      auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+      Lock lock(&s->mtx);
+      thr->clock.ReleaseStore(&s->clock);
+      NoTsanAtomic(mo, a, v);
+    }
+    IncrementEpoch(thr);
+  }
+};
 
-template<typename T>
-static T NoTsanAtomicFetchNand(volatile T *a, T v, morder mo) {
-  return func_nand(a, v);
-}
+struct OpExchange {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_xchg(a, v);
+  }
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_xchg>(thr, pc, a, v, mo);
+  }
+};
 
-template<typename T>
-static T AtomicExchange(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_xchg>(thr, pc, a, v, mo);
-}
+struct OpFetchAdd {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_add(a, v);
+  }
 
-template<typename T>
-static T AtomicFetchAdd(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_add>(thr, pc, a, v, mo);
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_add>(thr, pc, a, v, mo);
+  }
+};
 
-template<typename T>
-static T AtomicFetchSub(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_sub>(thr, pc, a, v, mo);
-}
+struct OpFetchSub {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_sub(a, v);
+  }
 
-template<typename T>
-static T AtomicFetchAnd(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_and>(thr, pc, a, v, mo);
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_sub>(thr, pc, a, v, mo);
+  }
+};
 
-template<typename T>
-static T AtomicFetchOr(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_or>(thr, pc, a, v, mo);
-}
+struct OpFetchAnd {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_and(a, v);
+  }
 
-template<typename T>
-static T AtomicFetchXor(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_xor>(thr, pc, a, v, mo);
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_and>(thr, pc, a, v, mo);
+  }
+};
 
-template<typename T>
-static T AtomicFetchNand(ThreadState *thr, uptr pc, volatile T *a, T v,
-    morder mo) {
-  return AtomicRMW<T, func_nand>(thr, pc, a, v, mo);
-}
+struct OpFetchOr {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_or(a, v);
+  }
 
-template<typename T>
-static bool NoTsanAtomicCAS(volatile T *a, T *c, T v, morder mo, morder fmo) {
-  return atomic_compare_exchange_strong(to_atomic(a), c, v, to_mo(mo));
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_or>(thr, pc, a, v, mo);
+  }
+};
 
-#if __TSAN_HAS_INT128
-static bool NoTsanAtomicCAS(volatile a128 *a, a128 *c, a128 v,
-    morder mo, morder fmo) {
-  a128 old = *c;
-  a128 cur = func_cas(a, old, v);
-  if (cur == old)
-    return true;
-  *c = cur;
-  return false;
-}
-#endif
+struct OpFetchXor {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_xor(a, v);
+  }
 
-template<typename T>
-static T NoTsanAtomicCAS(volatile T *a, T c, T v, morder mo, morder fmo) {
-  NoTsanAtomicCAS(a, &c, v, mo, fmo);
-  return c;
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_xor>(thr, pc, a, v, mo);
+  }
+};
 
-template <typename T>
-static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v,
-                      morder mo, morder fmo) {
-  // 31.7.2.18: "The failure argument shall not be memory_order_release
-  // nor memory_order_acq_rel". LLVM (2021-05) fallbacks to Monotonic
-  // (mo_relaxed) when those are used.
-  DCHECK(IsLoadOrder(fmo));
+struct OpFetchNand {
+  template <typename T>
+  static T NoTsanAtomic(morder mo, volatile T *a, T v) {
+    return func_nand(a, v);
+  }
 
-  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
-  if (LIKELY(mo == mo_relaxed && fmo == mo_relaxed)) {
-    T cc = *c;
-    T pr = func_cas(a, cc, v);
-    if (pr == cc)
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, volatile T *a, T v) {
+    return AtomicRMW<T, func_nand>(thr, pc, a, v, mo);
+  }
+};
+
+struct OpCAS {
+  template <typename T>
+  static bool NoTsanAtomic(morder mo, morder fmo, volatile T *a, T *c, T v) {
+    return atomic_compare_exchange_strong(to_atomic(a), c, v, to_mo(mo));
+  }
+
+#if __TSAN_HAS_INT128
+  static bool NoTsanAtomic(morder mo, morder fmo, volatile a128 *a, a128 *c,
+                           a128 v) {
+    a128 old = *c;
+    a128 cur = func_cas(a, old, v);
+    if (cur == old)
       return true;
-    *c = pr;
+    *c = cur;
     return false;
   }
-  SlotLocker locker(thr);
-  bool release = IsReleaseOrder(mo);
-  bool success;
-  {
-    auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
-    RWLock lock(&s->mtx, release);
-    T cc = *c;
-    T pr = func_cas(a, cc, v);
-    success = pr == cc;
-    if (!success) {
+#endif
+
+  template <typename T>
+  static T NoTsanAtomic(morder mo, morder fmo, volatile T *a, T c, T v) {
+    NoTsanAtomic(mo, fmo, a, &c, v);
+    return c;
+  }
+
+  template <typename T>
+  static bool Atomic(ThreadState *thr, uptr pc, morder mo, morder fmo,
+                     volatile T *a, T *c, T v) {
+    // 31.7.2.18: "The failure argument shall not be memory_order_release
+    // nor memory_order_acq_rel". LLVM (2021-05) fallbacks to Monotonic
+    // (mo_relaxed) when those are used.
+    DCHECK(IsLoadOrder(fmo));
+
+    MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+                 kAccessWrite | kAccessAtomic);
+    if (LIKELY(mo == mo_relaxed && fmo == mo_relaxed)) {
+      T cc = *c;
+      T pr = func_cas(a, cc, v);
+      if (pr == cc)
+        return true;
       *c = pr;
-      mo = fmo;
+      return false;
     }
-    if (success && IsAcqRelOrder(mo))
-      thr->clock.ReleaseAcquire(&s->clock);
-    else if (success && IsReleaseOrder(mo))
-      thr->clock.Release(&s->clock);
-    else if (IsAcquireOrder(mo))
-      thr->clock.Acquire(s->clock);
+    SlotLocker locker(thr);
+    bool release = IsReleaseOrder(mo);
+    bool success;
+    {
+      auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+      RWLock lock(&s->mtx, release);
+      T cc = *c;
+      T pr = func_cas(a, cc, v);
+      success = pr == cc;
+      if (!success) {
+        *c = pr;
+        mo = fmo;
+      }
+      if (success && IsAcqRelOrder(mo))
+        thr->clock.ReleaseAcquire(&s->clock);
+      else if (success && IsReleaseOrder(mo))
+        thr->clock.Release(&s->clock);
+      else if (IsAcquireOrder(mo))
+        thr->clock.Acquire(s->clock);
+    }
+    if (success && release)
+      IncrementEpoch(thr);
+    return success;
   }
-  if (success && release)
-    IncrementEpoch(thr);
-  return success;
-}
 
-template<typename T>
-static T AtomicCAS(ThreadState *thr, uptr pc,
-    volatile T *a, T c, T v, morder mo, morder fmo) {
-  AtomicCAS(thr, pc, a, &c, v, mo, fmo);
-  return c;
-}
+  template <typename T>
+  static T Atomic(ThreadState *thr, uptr pc, morder mo, morder fmo,
+                  volatile T *a, T c, T v) {
+    Atomic(thr, pc, mo, fmo, a, &c, v);
+    return c;
+  }
+};
 
 #if !SANITIZER_GO
-static void NoTsanAtomicFence(morder mo) {
-  __sync_synchronize();
-}
+struct OpFence {
+  static void NoTsanAtomic(morder mo) { __sync_synchronize(); }
 
-static void AtomicFence(ThreadState *thr, uptr pc, morder mo) {
-  // FIXME(dvyukov): not implemented.
-  __sync_synchronize();
-}
+  static void Atomic(ThreadState *thr, uptr pc, morder mo) {
+    // FIXME(dvyukov): not implemented.
+    __sync_synchronize();
+  }
+};
 #endif
 
+}  // namespace
+
 // Interface functions follow.
 #if !SANITIZER_GO
 
 // C/C++
 
 static morder convert_morder(morder mo) {
-  if (flags()->force_seq_cst_atomics)
-    return (morder)mo_seq_cst;
+  return flags()->force_seq_cst_atomics ? mo_seq_cst : mo;
+}
 
+static morder to_morder(int mo) {
   // Filter out additional memory order flags:
   // MEMMODEL_SYNC        = 1 << 15
   // __ATOMIC_HLE_ACQUIRE = 1 << 16
@@ -482,468 +515,481 @@ static morder convert_morder(morder mo) {
   // since we use __sync_ atomics for actual atomic operations,
   // we can safely ignore it as well. It also subtly affects semantics,
   // but we don't model the difference.
-  return (morder)(mo & 0x7fff);
+  morder res = static_cast<morder>(static_cast<u8>(mo));
+  DCHECK_LE(res, mo_seq_cst);
+  return res;
 }
 
-#  define ATOMIC_IMPL(func, ...)                                \
-    ThreadState *const thr = cur_thread();                      \
-    ProcessPendingSignals(thr);                                 \
-    if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors)) \
-      return NoTsanAtomic##func(__VA_ARGS__);                   \
-    mo = convert_morder(mo);                                    \
-    return Atomic##func(thr, GET_CALLER_PC(), __VA_ARGS__);
+template <class Op, class... Types>
+ALWAYS_INLINE auto AtomicImpl(morder mo, Types... args) {
+  ThreadState *const thr = cur_thread();
+  ProcessPendingSignals(thr);
+  if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors))
+    return Op::NoTsanAtomic(mo, args...);
+  return Op::Atomic(thr, GET_CALLER_PC(), convert_morder(mo), args...);
+}
 
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_load(const volatile a8 *a, morder mo) {
-  ATOMIC_IMPL(Load, a, mo);
+a8 __tsan_atomic8_load(const volatile a8 *a, int mo) {
+  return AtomicImpl<OpLoad>(to_morder(mo), a);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_load(const volatile a16 *a, morder mo) {
-  ATOMIC_IMPL(Load, a, mo);
+a16 __tsan_atomic16_load(const volatile a16 *a, int mo) {
+  return AtomicImpl<OpLoad>(to_morder(mo), a);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_load(const volatile a32 *a, morder mo) {
-  ATOMIC_IMPL(Load, a, mo);
+a32 __tsan_atomic32_load(const volatile a32 *a, int mo) {
+  return AtomicImpl<OpLoad>(to_morder(mo), a);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_load(const volatile a64 *a, morder mo) {
-  ATOMIC_IMPL(Load, a, mo);
+a64 __tsan_atomic64_load(const volatile a64 *a, int mo) {
+  return AtomicImpl<OpLoad>(to_morder(mo), a);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_load(const volatile a128 *a, morder mo) {
-  ATOMIC_IMPL(Load, a, mo);
+a128 __tsan_atomic128_load(const volatile a128 *a, int mo) {
+  return AtomicImpl<OpLoad>(to_morder(mo), a);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(Store, a, v, mo);
+void __tsan_atomic8_store(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpStore>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(Store, a, v, mo);
+void __tsan_atomic16_store(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpStore>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(Store, a, v, mo);
+void __tsan_atomic32_store(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpStore>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(Store, a, v, mo);
+void __tsan_atomic64_store(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpStore>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(Store, a, v, mo);
+void __tsan_atomic128_store(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpStore>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(Exchange, a, v, mo);
+a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpExchange>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(Exchange, a, v, mo);
+a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpExchange>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(Exchange, a, v, mo);
+a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpExchange>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(Exchange, a, v, mo);
+a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpExchange>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(Exchange, a, v, mo);
+a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpExchange>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchAdd, a, v, mo);
+a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchAdd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchAdd, a, v, mo);
+a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchAdd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchAdd, a, v, mo);
+a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchAdd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchAdd, a, v, mo);
+a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchAdd>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchAdd, a, v, mo);
+a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchAdd>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchSub, a, v, mo);
+a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchSub>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchSub, a, v, mo);
+a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchSub>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchSub, a, v, mo);
+a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchSub>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchSub, a, v, mo);
+a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchSub>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchSub, a, v, mo);
+a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchSub>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchAnd, a, v, mo);
+a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchAnd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchAnd, a, v, mo);
+a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchAnd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchAnd, a, v, mo);
+a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchAnd>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchAnd, a, v, mo);
+a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchAnd>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchAnd, a, v, mo);
+a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchAnd>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchOr, a, v, mo);
+a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchOr>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchOr, a, v, mo);
+a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchOr>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchOr, a, v, mo);
+a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchOr>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchOr, a, v, mo);
+a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchOr>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchOr, a, v, mo);
+a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchOr>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchXor, a, v, mo);
+a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchXor>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchXor, a, v, mo);
+a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchXor>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchXor, a, v, mo);
+a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchXor>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchXor, a, v, mo);
+a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchXor>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchXor, a, v, mo);
+a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchXor>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo) {
-  ATOMIC_IMPL(FetchNand, a, v, mo);
+a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, int mo) {
+  return AtomicImpl<OpFetchNand>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo) {
-  ATOMIC_IMPL(FetchNand, a, v, mo);
+a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, int mo) {
+  return AtomicImpl<OpFetchNand>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo) {
-  ATOMIC_IMPL(FetchNand, a, v, mo);
+a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, int mo) {
+  return AtomicImpl<OpFetchNand>(to_morder(mo), a, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo) {
-  ATOMIC_IMPL(FetchNand, a, v, mo);
+a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, int mo) {
+  return AtomicImpl<OpFetchNand>(to_morder(mo), a, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
-a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo) {
-  ATOMIC_IMPL(FetchNand, a, v, mo);
+a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, int mo) {
+  return AtomicImpl<OpFetchNand>(to_morder(mo), a, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v, int mo,
+                                           int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                            int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                            int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                            int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                             int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v, int mo,
+                                         int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                          int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                          int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                          int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                           int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v, int mo,
+                                       int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v, int mo,
+                                         int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v, int mo,
+                                         int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v, int mo,
+                                         int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
 
-#if __TSAN_HAS_INT128
+#  if __TSAN_HAS_INT128
 SANITIZER_INTERFACE_ATTRIBUTE
 a128 __tsan_atomic128_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
-    morder mo, morder fmo) {
-  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+                                           int mo, int fmo) {
+  return AtomicImpl<OpCAS>(to_morder(mo), to_morder(fmo), a, c, v);
 }
-#endif
+#  endif
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic_thread_fence(morder mo) { ATOMIC_IMPL(Fence, mo); }
+void __tsan_atomic_thread_fence(int mo) {
+  return AtomicImpl<OpFence>(to_morder(mo));
+}
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_atomic_signal_fence(morder mo) {
-}
+void __tsan_atomic_signal_fence(int mo) {}
 }  // extern "C"
 
 #else  // #if !SANITIZER_GO
 
 // Go
 
-#  define ATOMIC(func, ...)               \
-    if (thr->ignore_sync) {               \
-      NoTsanAtomic##func(__VA_ARGS__);    \
-    } else {                              \
-      FuncEntry(thr, cpc);                \
-      Atomic##func(thr, pc, __VA_ARGS__); \
-      FuncExit(thr);                      \
-    }
+template <class Op, class... Types>
+void AtomicGo(ThreadState *thr, uptr cpc, uptr pc, Types... args) {
+  if (thr->ignore_sync) {
+    (void)Op::NoTsanAtomic(args...);
+  } else {
+    FuncEntry(thr, cpc);
+    (void)Op::Atomic(thr, pc, args...);
+    FuncExit(thr);
+  }
+}
 
-#  define ATOMIC_RET(func, ret, ...)              \
-    if (thr->ignore_sync) {                       \
-      (ret) = NoTsanAtomic##func(__VA_ARGS__);    \
-    } else {                                      \
-      FuncEntry(thr, cpc);                        \
-      (ret) = Atomic##func(thr, pc, __VA_ARGS__); \
-      FuncExit(thr);                              \
-    }
+template <class Op, class... Types>
+auto AtomicGoRet(ThreadState *thr, uptr cpc, uptr pc, Types... args) {
+  if (thr->ignore_sync) {
+    return Op::NoTsanAtomic(args...);
+  } else {
+    FuncEntry(thr, cpc);
+    auto ret = Op::Atomic(thr, pc, args...);
+    FuncExit(thr);
+    return ret;
+  }
+}
 
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(Load, *(a32*)(a+8), *(a32**)a, mo_acquire);
+  *(a32 *)(a + 8) = AtomicGoRet<OpLoad>(thr, cpc, pc, mo_acquire, *(a32 **)a);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(Load, *(a64*)(a+8), *(a64**)a, mo_acquire);
+  *(a64 *)(a + 8) = AtomicGoRet<OpLoad>(thr, cpc, pc, mo_acquire, *(a64 **)a);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC(Store, *(a32**)a, *(a32*)(a+8), mo_release);
+  AtomicGo<OpStore>(thr, cpc, pc, mo_release, *(a32 **)a, *(a32 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC(Store, *(a64**)a, *(a64*)(a+8), mo_release);
+  AtomicGo<OpStore>(thr, cpc, pc, mo_release, *(a64 **)a, *(a64 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchAdd, *(a32*)(a+16), *(a32**)a, *(a32*)(a+8), mo_acq_rel);
+  *(a32 *)(a + 16) = AtomicGoRet<OpFetchAdd>(thr, cpc, pc, mo_acq_rel,
+                                             *(a32 **)a, *(a32 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchAdd, *(a64*)(a+16), *(a64**)a, *(a64*)(a+8), mo_acq_rel);
+  *(a64 *)(a + 16) = AtomicGoRet<OpFetchAdd>(thr, cpc, pc, mo_acq_rel,
+                                             *(a64 **)a, *(a64 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchAnd, *(a32 *)(a + 16), *(a32 **)a, *(a32 *)(a + 8),
-             mo_acq_rel);
+  *(a32 *)(a + 16) = AtomicGoRet<OpFetchAnd>(thr, cpc, pc, mo_acq_rel,
+                                             *(a32 **)a, *(a32 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchAnd, *(a64 *)(a + 16), *(a64 **)a, *(a64 *)(a + 8),
-             mo_acq_rel);
+  *(a64 *)(a + 16) = AtomicGoRet<OpFetchAnd>(thr, cpc, pc, mo_acq_rel,
+                                             *(a64 **)a, *(a64 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchOr, *(a32 *)(a + 16), *(a32 **)a, *(a32 *)(a + 8),
-             mo_acq_rel);
+  *(a32 *)(a + 16) = AtomicGoRet<OpFetchOr>(thr, cpc, pc, mo_acq_rel,
+                                            *(a32 **)a, *(a32 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(FetchOr, *(a64 *)(a + 16), *(a64 **)a, *(a64 *)(a + 8),
-             mo_acq_rel);
+  *(a64 *)(a + 16) = AtomicGoRet<OpFetchOr>(thr, cpc, pc, mo_acq_rel,
+                                            *(a64 **)a, *(a64 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(Exchange, *(a32*)(a+16), *(a32**)a, *(a32*)(a+8), mo_acq_rel);
+  *(a32 *)(a + 16) = AtomicGoRet<OpExchange>(thr, cpc, pc, mo_acq_rel,
+                                             *(a32 **)a, *(a32 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  ATOMIC_RET(Exchange, *(a64*)(a+16), *(a64**)a, *(a64*)(a+8), mo_acq_rel);
+  *(a64 *)(a + 16) = AtomicGoRet<OpExchange>(thr, cpc, pc, mo_acq_rel,
+                                             *(a64 **)a, *(a64 *)(a + 8));
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_go_atomic32_compare_exchange(
-    ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  a32 cur = 0;
-  a32 cmp = *(a32*)(a+8);
-  ATOMIC_RET(CAS, cur, *(a32**)a, cmp, *(a32*)(a+12), mo_acq_rel, mo_acquire);
-  *(bool*)(a+16) = (cur == cmp);
+void __tsan_go_atomic32_compare_exchange(ThreadState *thr, uptr cpc, uptr pc,
+                                         u8 *a) {
+  a32 cmp = *(a32 *)(a + 8);
+  a32 cur = AtomicGoRet<OpCAS>(thr, cpc, pc, mo_acq_rel, mo_acquire, *(a32 **)a,
+                               cmp, *(a32 *)(a + 12));
+  *(bool *)(a + 16) = (cur == cmp);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void __tsan_go_atomic64_compare_exchange(
-    ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
-  a64 cur = 0;
-  a64 cmp = *(a64*)(a+8);
-  ATOMIC_RET(CAS, cur, *(a64**)a, cmp, *(a64*)(a+16), mo_acq_rel, mo_acquire);
-  *(bool*)(a+24) = (cur == cmp);
+void __tsan_go_atomic64_compare_exchange(ThreadState *thr, uptr cpc, uptr pc,
+                                         u8 *a) {
+  a64 cmp = *(a64 *)(a + 8);
+  a64 cur = AtomicGoRet<OpCAS>(thr, cpc, pc, mo_acq_rel, mo_acquire, *(a64 **)a,
+                               cmp, *(a64 *)(a + 16));
+  *(bool *)(a + 24) = (cur == cmp);
 }
 }  // extern "C"
 #endif  // #if !SANITIZER_GO
lib/tsan/tsan_mman.cpp
@@ -252,7 +252,7 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
     if (AllocatorMayReturnNull())
       return SetErrnoOnNull(nullptr);
     GET_STACK_TRACE_FATAL(thr, pc);
-    ReportReallocArrayOverflow(size, n, &stack);
+    ReportReallocArrayOverflow(n, size, &stack);
   }
   return user_realloc(thr, pc, p, size * n);
 }
lib/tsan/tsan_platform_linux.cpp
@@ -418,7 +418,6 @@ void InitializePlatform() {
     Die();
   }
 
-  InitTlsSize();
 #endif  // !SANITIZER_GO
 }
 
lib/tsan/tsan_rtl.cpp
@@ -673,7 +673,8 @@ void CheckUnwind() {
   thr->ignore_reads_and_writes++;
   atomic_store_relaxed(&thr->in_signal_handler, 0);
 #endif
-  PrintCurrentStackSlow(StackTrace::GetCurrentPc());
+  PrintCurrentStack(StackTrace::GetCurrentPc(),
+                    common_flags()->fast_unwind_on_fatal);
 }
 
 bool is_initialized;
@@ -806,6 +807,7 @@ int Finalize(ThreadState *thr) {
 
 #if !SANITIZER_GO
 void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+  VReport(2, "BeforeFork tid: %llu\n", GetTid());
   GlobalProcessorLock();
   // Detaching from the slot makes OnUserFree skip writing to the shadow.
   // The slot will be locked so any attempts to use it will deadlock anyway.
@@ -847,6 +849,7 @@ static void ForkAfter(ThreadState* thr,
   SlotAttachAndLock(thr);
   SlotUnlock(thr);
   GlobalProcessorUnlock();
+  VReport(2, "AfterFork tid: %llu\n", GetTid());
 }
 
 void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr, false); }
lib/tsan/tsan_rtl.h
@@ -514,7 +514,7 @@ bool IsExpectedReport(uptr addr, uptr size);
 StackID CurrentStackId(ThreadState *thr, uptr pc);
 ReportStack *SymbolizeStackId(StackID stack_id);
 void PrintCurrentStack(ThreadState *thr, uptr pc);
-void PrintCurrentStackSlow(uptr pc);  // uses libunwind
+void PrintCurrentStack(uptr pc, bool fast);  // may uses libunwind
 MBlock *JavaHeapBlock(uptr addr, uptr *start);
 
 void Initialize(ThreadState *thr);
lib/tsan/tsan_rtl_report.cpp
@@ -828,18 +828,18 @@ void PrintCurrentStack(ThreadState *thr, uptr pc) {
   PrintStack(SymbolizeStack(trace));
 }
 
-// Always inlining PrintCurrentStackSlow, because LocatePcInTrace assumes
+// Always inlining PrintCurrentStack, because LocatePcInTrace assumes
 // __sanitizer_print_stack_trace exists in the actual unwinded stack, but
-// tail-call to PrintCurrentStackSlow breaks this assumption because
+// tail-call to PrintCurrentStack breaks this assumption because
 // __sanitizer_print_stack_trace disappears after tail-call.
 // However, this solution is not reliable enough, please see dvyukov's comment
 // http://reviews.llvm.org/D19148#406208
 // Also see PR27280 comment 2 and 3 for breaking examples and analysis.
-ALWAYS_INLINE USED void PrintCurrentStackSlow(uptr pc) {
+ALWAYS_INLINE USED void PrintCurrentStack(uptr pc, bool fast) {
 #if !SANITIZER_GO
   uptr bp = GET_CURRENT_FRAME();
   auto *ptrace = New<BufferedStackTrace>();
-  ptrace->Unwind(pc, bp, nullptr, false);
+  ptrace->Unwind(pc, bp, nullptr, fast);
 
   for (uptr i = 0; i < ptrace->size / 2; i++) {
     uptr tmp = ptrace->trace_buffer[i];
@@ -857,6 +857,6 @@ using namespace __tsan;
 extern "C" {
 SANITIZER_INTERFACE_ATTRIBUTE
 void __sanitizer_print_stack_trace() {
-  PrintCurrentStackSlow(StackTrace::GetCurrentPc());
+  PrintCurrentStack(StackTrace::GetCurrentPc(), false);
 }
 }  // extern "C"
lib/tsan/tsan_rtl_thread.cpp
@@ -165,14 +165,16 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
 #endif
 
   uptr stk_addr = 0;
-  uptr stk_size = 0;
+  uptr stk_end = 0;
   uptr tls_addr = 0;
-  uptr tls_size = 0;
+  uptr tls_end = 0;
 #if !SANITIZER_GO
   if (thread_type != ThreadType::Fiber)
-    GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
-                         &tls_size);
+    GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_end, &tls_addr,
+                         &tls_end);
 #endif
+  uptr stk_size = stk_end - stk_addr;
+  uptr tls_size = tls_end - tls_addr;
   thr->stk_addr = stk_addr;
   thr->stk_size = stk_size;
   thr->tls_addr = tls_addr;
lib/tsan/tsan_spinlock_defs_mac.h
@@ -1,45 +0,0 @@
-//===-- tsan_spinlock_defs_mac.h -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-// Mac-specific forward-declared function defintions that may be
-// deprecated in later versions of the OS.
-// These are needed for interceptors.
-//
-//===----------------------------------------------------------------------===//
-
-#if SANITIZER_APPLE
-
-#ifndef TSAN_SPINLOCK_DEFS_MAC_H
-#define TSAN_SPINLOCK_DEFS_MAC_H
-
-#include <stdint.h>
-
-extern "C" {
-
-/*
-Provides forward declarations related to OSSpinLocks on Darwin. These functions are
-deprecated on macOS version 10.12 and later,
-and are no longer included in the system headers.
-
-However, the symbols are still available on the system, so we provide these forward
-declarations to prevent compilation errors in tsan_interceptors_mac.cpp, which
-references these functions when defining TSAN interceptor functions.
-*/
-
-typedef int32_t OSSpinLock;
-
-void OSSpinLockLock(volatile OSSpinLock *__lock);
-void OSSpinLockUnlock(volatile OSSpinLock *__lock);
-bool OSSpinLockTry(volatile OSSpinLock *__lock);
-
-}
-
-#endif //TSAN_SPINLOCK_DEFS_MAC_H
-#endif // SANITIZER_APPLE
src/libtsan.zig
@@ -410,9 +410,6 @@ const sanitizer_common_sources = [_][]const u8{
     "sanitizer_allocator.cpp",
     "sanitizer_chained_origin_depot.cpp",
     "sanitizer_common.cpp",
-    "sanitizer_coverage_win_dll_thunk.cpp",
-    "sanitizer_coverage_win_dynamic_runtime_thunk.cpp",
-    "sanitizer_coverage_win_weak_interception.cpp",
     "sanitizer_deadlock_detector1.cpp",
     "sanitizer_deadlock_detector2.cpp",
     "sanitizer_errno.cpp",
@@ -452,9 +449,7 @@ const sanitizer_common_sources = [_][]const u8{
     "sanitizer_tls_get_addr.cpp",
     "sanitizer_type_traits.cpp",
     "sanitizer_win.cpp",
-    "sanitizer_win_dll_thunk.cpp",
-    "sanitizer_win_dynamic_runtime_thunk.cpp",
-    "sanitizer_win_weak_interception.cpp",
+    "sanitizer_win_interception.cpp",
 };
 
 const sanitizer_nolibc_sources = [_][]const u8{
@@ -490,6 +485,7 @@ const sanitizer_symbolizer_sources = [_][]const u8{
     "sanitizer_symbolizer_report.cpp",
     "sanitizer_symbolizer_report_fuchsia.cpp",
     "sanitizer_symbolizer_win.cpp",
+    "sanitizer_thread_history.cpp",
     "sanitizer_unwind_linux_libcdep.cpp",
     "sanitizer_unwind_fuchsia.cpp",
     "sanitizer_unwind_win.cpp",