Commit a40cdad18c

Alex Rønne Petersen <alex@alexrp.com>
2024-09-24 13:47:29
tsan: Update to LLVM 19.1.0.
1 parent 7f6b7c5
Changed files (129)
lib
tsan
builtins
interception
sanitizer_common
src
lib/tsan/builtins/assembly.h
@@ -0,0 +1,293 @@
+//===-- assembly.h - compiler-rt assembler support macros -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines macros for use in compiler-rt assembler source.
+// This file is not part of the interface of this library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef COMPILERRT_ASSEMBLY_H
+#define COMPILERRT_ASSEMBLY_H
+
+#if defined(__linux__) && defined(__CET__)
+#if __has_include(<cet.h>)
+#include <cet.h>
+#endif
+#endif
+
+#if defined(__APPLE__) && defined(__aarch64__)
+#define SEPARATOR %%
+#else
+#define SEPARATOR ;
+#endif
+
+#if defined(__APPLE__)
+#define HIDDEN(name) .private_extern name
+#define LOCAL_LABEL(name) L_##name
+// tell linker it can break up file at label boundaries
+#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols
+#define SYMBOL_IS_FUNC(name)
+#define CONST_SECTION .const
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#elif defined(__ELF__)
+
+#define HIDDEN(name) .hidden name
+#define LOCAL_LABEL(name) .L_##name
+#define FILE_LEVEL_DIRECTIVE
+#if defined(__arm__) || defined(__aarch64__)
+#define SYMBOL_IS_FUNC(name) .type name,%function
+#else
+#define SYMBOL_IS_FUNC(name) .type name,@function
+#endif
+#define CONST_SECTION .section .rodata
+
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) ||        \
+    defined(__linux__)
+#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
+#else
+#define NO_EXEC_STACK_DIRECTIVE
+#endif
+
+#else // !__APPLE__ && !__ELF__
+
+#define HIDDEN(name)
+#define LOCAL_LABEL(name) .L ## name
+#define FILE_LEVEL_DIRECTIVE
+#define SYMBOL_IS_FUNC(name)                                                   \
+  .def name SEPARATOR                                                          \
+    .scl 2 SEPARATOR                                                           \
+    .type 32 SEPARATOR                                                         \
+  .endef
+#define CONST_SECTION .section .rdata,"rd"
+
+#define NO_EXEC_STACK_DIRECTIVE
+
+#endif
+
+#if defined(__arm__) || defined(__aarch64__)
+#define FUNC_ALIGN                                                             \
+  .text SEPARATOR                                                              \
+  .balign 16 SEPARATOR
+#else
+#define FUNC_ALIGN
+#endif
+
+// BTI and PAC gnu property note
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+#define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+#else
+#define PAC_FLAG 0
+#endif
+
+#define GNU_PROPERTY(type, value)                                              \
+  .pushsection .note.gnu.property, "a" SEPARATOR                               \
+  .p2align 3 SEPARATOR                                                         \
+  .word 4 SEPARATOR                                                            \
+  .word 16 SEPARATOR                                                           \
+  .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR                                       \
+  .asciz "GNU" SEPARATOR                                                       \
+  .word type SEPARATOR                                                         \
+  .word 4 SEPARATOR                                                            \
+  .word value SEPARATOR                                                        \
+  .word 0 SEPARATOR                                                            \
+  .popsection
+
+#if BTI_FLAG != 0
+#define BTI_C hint #34
+#define BTI_J hint #36
+#else
+#define BTI_C
+#define BTI_J
+#endif
+
+#if (BTI_FLAG | PAC_FLAG) != 0
+#define GNU_PROPERTY_BTI_PAC                                                   \
+  GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+#else
+#define GNU_PROPERTY_BTI_PAC
+#endif
+
+#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
+#define CFI_START .cfi_startproc
+#define CFI_END .cfi_endproc
+#else
+#define CFI_START
+#define CFI_END
+#endif
+
+#if defined(__arm__)
+
+// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
+// - for '-mthumb -march=armv6' compiler defines '__thumb__'
+// - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__'
+#if defined(__thumb2__) || defined(__thumb__)
+#define DEFINE_CODE_STATE .thumb SEPARATOR
+#define DECLARE_FUNC_ENCODING    .thumb_func SEPARATOR
+#if defined(__thumb2__)
+#define USE_THUMB_2
+#define IT(cond)  it cond
+#define ITT(cond) itt cond
+#define ITE(cond) ite cond
+#else
+#define USE_THUMB_1
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif // defined(__thumb__2)
+#else // !defined(__thumb2__) && !defined(__thumb__)
+#define DEFINE_CODE_STATE .arm SEPARATOR
+#define DECLARE_FUNC_ENCODING
+#define IT(cond)
+#define ITT(cond)
+#define ITE(cond)
+#endif
+
+#if defined(USE_THUMB_1) && defined(USE_THUMB_2)
+#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together."
+#endif
+
+#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
+#define ARM_HAS_BX
+#endif
+#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) &&  \
+    (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
+#define __ARM_FEATURE_CLZ
+#endif
+
+#ifdef ARM_HAS_BX
+#define JMP(r) bx r
+#define JMPc(r, c) bx##c r
+#else
+#define JMP(r) mov pc, r
+#define JMPc(r, c) mov##c pc, r
+#endif
+
+// pop {pc} can't switch Thumb mode on ARMv4T
+#if __ARM_ARCH >= 5
+#define POP_PC() pop {pc}
+#else
+#define POP_PC()                                                               \
+  pop {ip};                                                                    \
+  JMP(ip)
+#endif
+
+#if defined(USE_THUMB_2)
+#define WIDE(op) op.w
+#else
+#define WIDE(op) op
+#endif
+#else // !defined(__arm)
+#define DECLARE_FUNC_ENCODING
+#define DEFINE_CODE_STATE
+#endif
+
+#define GLUE2_(a, b) a##b
+#define GLUE(a, b) GLUE2_(a, b)
+#define GLUE2(a, b) GLUE2_(a, b)
+#define GLUE3_(a, b, c) a##b##c
+#define GLUE3(a, b, c) GLUE3_(a, b, c)
+#define GLUE4_(a, b, c, d) a##b##c##d
+#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d)
+
+#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
+
+#ifdef VISIBILITY_HIDDEN
+#define DECLARE_SYMBOL_VISIBILITY(name)                                        \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR
+#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) \
+  HIDDEN(name) SEPARATOR
+#else
+#define DECLARE_SYMBOL_VISIBILITY(name)
+#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name)
+#endif
+
+#define DEFINE_COMPILERRT_FUNCTION(name)                                       \
+  DEFINE_CODE_STATE                                                            \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name)                                              \
+  DECLARE_FUNC_ENCODING                                                        \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \
+  DEFINE_CODE_STATE                                                            \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
+  .thumb_func SEPARATOR                                                        \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \
+  DEFINE_CODE_STATE                                                            \
+  FILE_LEVEL_DIRECTIVE SEPARATOR                                               \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \
+  DECLARE_FUNC_ENCODING                                                        \
+  SYMBOL_NAME(name):
+
+#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \
+  DEFINE_CODE_STATE                                                            \
+  .globl name SEPARATOR                                                        \
+  SYMBOL_IS_FUNC(name) SEPARATOR                                               \
+  HIDDEN(name) SEPARATOR                                                       \
+  DECLARE_FUNC_ENCODING                                                        \
+  name:
+
+#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name)                     \
+  DEFINE_CODE_STATE                                                            \
+  FUNC_ALIGN                                                                   \
+  .globl name SEPARATOR                                                        \
+  SYMBOL_IS_FUNC(name) SEPARATOR                                               \
+  DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) SEPARATOR                          \
+  DECLARE_FUNC_ENCODING                                                        \
+  name:                                                                        \
+  SEPARATOR CFI_START                                                          \
+  SEPARATOR BTI_C
+
+#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
+  .globl SYMBOL_NAME(name) SEPARATOR                                           \
+  SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
+  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
+  .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR
+
+#if defined(__ARM_EABI__)
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \
+  DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name)
+#else
+#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)
+#endif
+
+#ifdef __ELF__
+#define END_COMPILERRT_FUNCTION(name)                                          \
+  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name)                                  \
+  CFI_END SEPARATOR                                                            \
+  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#else
+#define END_COMPILERRT_FUNCTION(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name)                                  \
+  CFI_END
+#endif
+
+#endif // COMPILERRT_ASSEMBLY_H
lib/tsan/interception/interception.h
@@ -185,6 +185,11 @@ const interpose_substitution substitution_##func_name[]             \
 #  else
 #   define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n"
 #  endif  // SANITIZER_FREEBSD || SANITIZER_NETBSD
+#  if defined(__arm__) || defined(__aarch64__)
+#   define ASM_TYPE_FUNCTION_STR "%function"
+#  else
+#   define ASM_TYPE_FUNCTION_STR "@function"
+#  endif
 // Keep trampoline implementation in sync with sanitizer_common/sanitizer_asm.h
 #  define DECLARE_WRAPPER(ret_type, func, ...)                                 \
      extern "C" ret_type func(__VA_ARGS__);                                    \
@@ -196,12 +201,14 @@ const interpose_substitution substitution_##func_name[]             \
        __ASM_WEAK_WRAPPER(func)                                                \
        ".set " #func ", " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n"           \
        ".globl " SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n"                    \
-       ".type  " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", %function\n"         \
+       ".type  " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", "                    \
+         ASM_TYPE_FUNCTION_STR "\n"                                            \
        SANITIZER_STRINGIFY(TRAMPOLINE(func)) ":\n"                             \
-       SANITIZER_STRINGIFY(CFI_STARTPROC) "\n"                                 \
-       SANITIZER_STRINGIFY(ASM_TAIL_CALL) " __interceptor_"                    \
-         SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func)) "\n"                   \
-       SANITIZER_STRINGIFY(CFI_ENDPROC) "\n"                                   \
+       C_ASM_STARTPROC "\n"                                                    \
+       C_ASM_TAIL_CALL(SANITIZER_STRINGIFY(TRAMPOLINE(func)),                  \
+                       "__interceptor_"                                        \
+                         SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func))) "\n"  \
+       C_ASM_ENDPROC "\n"                                                      \
        ".size  " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", "                    \
             ".-" SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n"                    \
      );
@@ -341,6 +348,18 @@ typedef unsigned long long uptr;
 #else
 typedef unsigned long uptr;
 #endif  // _WIN64
+
+#if defined(__ELF__) && !SANITIZER_FUCHSIA
+// The use of interceptors makes many sanitizers unusable for static linking.
+// Define a function, if called, will cause a linker error (undefined _DYNAMIC).
+// However, -static-pie (which is not common) cannot be detected at link time.
+extern uptr kDynamic[] asm("_DYNAMIC");
+inline void DoesNotSupportStaticLinking() {
+  [[maybe_unused]] volatile auto x = &kDynamic;
+}
+#else
+inline void DoesNotSupportStaticLinking() {}
+#endif
 }  // namespace __interception
 
 #define INCLUDED_FROM_INTERCEPTION_LIB
lib/tsan/interception/interception_linux.h
@@ -28,12 +28,14 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real,
                        uptr func, uptr trampoline);
 }  // namespace __interception
 
-#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \
-  ::__interception::InterceptFunction(            \
-      #func,                                      \
-      (::__interception::uptr *)&REAL(func),      \
-      (::__interception::uptr)&(func),            \
-      (::__interception::uptr)&TRAMPOLINE(func))
+// Cast func to type of REAL(func) before casting to uptr in case it is an
+// overloaded function, which is the case for some glibc functions when
+// _FORTIFY_SOURCE is used. This disambiguates which overload to use.
+#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func)            \
+  ::__interception::InterceptFunction(                       \
+      #func, (::__interception::uptr *)&REAL(func),          \
+      (::__interception::uptr)(decltype(REAL(func)))&(func), \
+      (::__interception::uptr) &TRAMPOLINE(func))
 
 // dlvsym is a GNU extension supported by some other platforms.
 #if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD
@@ -41,7 +43,7 @@ bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real,
   ::__interception::InterceptFunction(                        \
       #func, symver,                                          \
       (::__interception::uptr *)&REAL(func),                  \
-      (::__interception::uptr)&(func),                        \
+      (::__interception::uptr)(decltype(REAL(func)))&(func),  \
       (::__interception::uptr)&TRAMPOLINE(func))
 #else
 #define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \
lib/tsan/interception/interception_win.cpp
@@ -1,4 +1,4 @@
-//===-- interception_linux.cpp ----------------------------------*- C++ -*-===//
+//===-- interception_win.cpp ------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -339,7 +339,7 @@ struct TrampolineMemoryRegion {
   uptr max_size;
 };
 
-UNUSED static const uptr kTrampolineScanLimitRange = 1 << 31;  // 2 gig
+UNUSED static const uptr kTrampolineScanLimitRange = 1ull << 31;  // 2 gig
 static const int kMaxTrampolineRegion = 1024;
 static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion];
 
@@ -431,7 +431,8 @@ static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
 // The following prologues cannot be patched because of the short jump
 // jumping to the patching region.
 
-#if SANITIZER_WINDOWS64
+// Short jump patterns  below are only for x86_64.
+#  if SANITIZER_WINDOWS_x64
 // ntdll!wcslen in Win11
 //   488bc1          mov     rax,rcx
 //   0fb710          movzx   edx,word ptr [rax]
@@ -457,7 +458,12 @@ static const u8 kPrologueWithShortJump2[] = {
 
 // Returns 0 on error.
 static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
-#if SANITIZER_WINDOWS64
+#if SANITIZER_ARM64
+  // An ARM64 instruction is 4 bytes long.
+  return 4;
+#endif
+
+#  if SANITIZER_WINDOWS_x64
   if (memcmp((u8*)address, kPrologueWithShortJump1,
              sizeof(kPrologueWithShortJump1)) == 0 ||
       memcmp((u8*)address, kPrologueWithShortJump2,
@@ -473,6 +479,8 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
   switch (*(u8*)address) {
     case 0x90:  // 90 : nop
+    case 0xC3:  // C3 : ret   (for small/empty function interception
+    case 0xCC:  // CC : int 3  i.e. registering weak functions)
       return 1;
 
     case 0x50:  // push eax / rax
@@ -496,7 +504,6 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
     case 0xE9:  // E9 XX XX XX XX : jmp <label>
     case 0xE8:  // E8 XX XX XX XX : call <func>
-    case 0xC3:  // C3 : ret
     case 0xEB:  // EB XX : jmp XX (short jump)
     case 0x70:  // 7Y YY : jy XX (short conditional jump)
     case 0x71:
@@ -539,7 +546,12 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
       return 7;
   }
 
-#if SANITIZER_WINDOWS64
+  switch (0x000000FF & *(u32 *)address) {
+    case 0xc2:  // C2 XX XX : ret XX (needed for registering weak functions)
+      return 3;
+  }
+
+#  if SANITIZER_WINDOWS_x64
   switch (*(u8*)address) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
@@ -572,6 +584,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x018a:  // mov al, byte ptr [rcx]
       return 2;
 
+    case 0x058A:  // 8A 05 XX XX XX XX : mov al, byte ptr [XX XX XX XX]
     case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
       if (rel_offset)
         *rel_offset = 2;
@@ -598,6 +611,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xc18b4c:    // 4C 8B C1 : mov r8, rcx
     case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
     case 0xca2b48:    // 48 2b ca : sub rcx, rdx
+    case 0xca3b48:    // 48 3b ca : cmp rcx, rdx
     case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
     case 0xc00b4d:    // 3d 0b c0 : or r8, r8
     case 0xc08b41:    // 41 8b c0 : mov eax, r8d
@@ -617,9 +631,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
     case 0x058b48:    // 48 8b 05 XX XX XX XX :
                       //   mov rax, QWORD PTR [rip + XXXXXXXX]
+    case 0x058d48:    // 48 8d 05 XX XX XX XX :
+                      //   lea rax, QWORD PTR [rip + XXXXXXXX]
     case 0x25ff48:    // 48 ff 25 XX XX XX XX :
                       //   rex.W jmp QWORD PTR [rip + XXXXXXXX]
-
+    case 0x158D4C:    // 4c 8d 15 XX XX XX XX : lea r10, [rip + XX]
       // Instructions having offset relative to 'rip' need offset adjustment.
       if (rel_offset)
         *rel_offset = 3;
@@ -721,16 +737,22 @@ static bool CopyInstructions(uptr to, uptr from, size_t size) {
     size_t instruction_size = GetInstructionSize(from + cursor, &rel_offset);
     if (!instruction_size)
       return false;
-    _memcpy((void*)(to + cursor), (void*)(from + cursor),
+    _memcpy((void *)(to + cursor), (void *)(from + cursor),
             (size_t)instruction_size);
     if (rel_offset) {
-      uptr delta = to - from;
-      uptr relocated_offset = *(u32*)(to + cursor + rel_offset) - delta;
-#if SANITIZER_WINDOWS64
-      if (relocated_offset + 0x80000000U >= 0xFFFFFFFFU)
+#  if SANITIZER_WINDOWS64
+      // we want to make sure that the new relative offset still fits in 32-bits
+      // this will be untrue if relocated_offset \notin [-2**31, 2**31)
+      s64 delta = to - from;
+      s64 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
+      if (-0x8000'0000ll > relocated_offset || relocated_offset > 0x7FFF'FFFFll)
         return false;
-#endif
-      *(u32*)(to + cursor + rel_offset) = relocated_offset;
+#  else
+      // on 32-bit, the relative offset will always be correct
+      s32 delta = to - from;
+      s32 relocated_offset = *(s32 *)(to + cursor + rel_offset) - delta;
+#  endif
+      *(s32 *)(to + cursor + rel_offset) = relocated_offset;
     }
     cursor += instruction_size;
   }
@@ -932,19 +954,26 @@ bool OverrideFunction(
 
 static void **InterestingDLLsAvailable() {
   static const char *InterestingDLLs[] = {
-      "kernel32.dll",
-      "msvcr100.dll",      // VS2010
-      "msvcr110.dll",      // VS2012
-      "msvcr120.dll",      // VS2013
-      "vcruntime140.dll",  // VS2015
-      "ucrtbase.dll",      // Universal CRT
-#if (defined(__MINGW32__) && defined(__i386__))
-      "libc++.dll",        // libc++
-      "libunwind.dll",     // libunwind
-#endif
-      // NTDLL should go last as it exports some functions that we should
-      // override in the CRT [presumably only used internally].
-      "ntdll.dll", NULL};
+    "kernel32.dll",
+    "msvcr100d.dll",      // VS2010
+    "msvcr110d.dll",      // VS2012
+    "msvcr120d.dll",      // VS2013
+    "vcruntime140d.dll",  // VS2015
+    "ucrtbased.dll",      // Universal CRT
+    "msvcr100.dll",       // VS2010
+    "msvcr110.dll",       // VS2012
+    "msvcr120.dll",       // VS2013
+    "vcruntime140.dll",   // VS2015
+    "ucrtbase.dll",       // Universal CRT
+#  if (defined(__MINGW32__) && defined(__i386__))
+    "libc++.dll",     // libc++
+    "libunwind.dll",  // libunwind
+#  endif
+    // NTDLL should go last as it exports some functions that we should
+    // override in the CRT [presumably only used internally].
+    "ntdll.dll",
+    NULL
+  };
   static void *result[ARRAY_SIZE(InterestingDLLs)] = { 0 };
   if (!result[0]) {
     for (size_t i = 0, j = 0; InterestingDLLs[i]; ++i) {
lib/tsan/sanitizer_common/sanitizer_allocator.cpp
@@ -25,7 +25,7 @@ namespace __sanitizer {
 const char *PrimaryAllocatorName = "SizeClassAllocator";
 const char *SecondaryAllocatorName = "LargeMmapAllocator";
 
-static ALIGNED(64) char internal_alloc_placeholder[sizeof(InternalAllocator)];
+alignas(64) static char internal_alloc_placeholder[sizeof(InternalAllocator)];
 static atomic_uint8_t internal_allocator_initialized;
 static StaticSpinMutex internal_alloc_init_mu;
 
@@ -138,14 +138,20 @@ void InternalAllocatorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
 
 // LowLevelAllocator
 constexpr uptr kLowLevelAllocatorDefaultAlignment = 8;
+constexpr uptr kMinNumPagesRounded = 16;
+constexpr uptr kMinRoundedSize = 65536;
 static uptr low_level_alloc_min_alignment = kLowLevelAllocatorDefaultAlignment;
 static LowLevelAllocateCallback low_level_alloc_callback;
 
+static LowLevelAllocator Alloc;
+LowLevelAllocator &GetGlobalLowLevelAllocator() { return Alloc; }
+
 void *LowLevelAllocator::Allocate(uptr size) {
   // Align allocation size.
   size = RoundUpTo(size, low_level_alloc_min_alignment);
   if (allocated_end_ - allocated_current_ < (sptr)size) {
-    uptr size_to_allocate = RoundUpTo(size, GetPageSizeCached());
+    uptr size_to_allocate = RoundUpTo(
+        size, Min(GetPageSizeCached() * kMinNumPagesRounded, kMinRoundedSize));
     allocated_current_ = (char *)MmapOrDie(size_to_allocate, __func__);
     allocated_end_ = allocated_current_ + size_to_allocate;
     if (low_level_alloc_callback) {
lib/tsan/sanitizer_common/sanitizer_allocator_interface.h
@@ -40,6 +40,8 @@ SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
     void __sanitizer_malloc_hook(void *ptr, uptr size);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
     void __sanitizer_free_hook(void *ptr);
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int
+__sanitizer_ignore_free_hook(void *ptr);
 
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
 __sanitizer_purge_allocator();
lib/tsan/sanitizer_common/sanitizer_allocator_primary32.h
@@ -278,7 +278,7 @@ class SizeClassAllocator32 {
   static const uptr kRegionSize = 1 << kRegionSizeLog;
   static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
 
-  struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) SizeClassInfo {
+  struct alignas(SANITIZER_CACHE_LINE_SIZE) SizeClassInfo {
     StaticSpinMutex mutex;
     IntrusiveList<TransferBatch> free_list;
     u32 rand_state;
lib/tsan/sanitizer_common/sanitizer_allocator_primary64.h
@@ -316,13 +316,13 @@ class SizeClassAllocator64 {
     Printf(
         "%s %02zd (%6zd): mapped: %6zdK allocs: %7zd frees: %7zd inuse: %6zd "
         "num_freed_chunks %7zd avail: %6zd rss: %6zdK releases: %6zd "
-        "last released: %6lldK region: 0x%zx\n",
+        "last released: %6lldK region: %p\n",
         region->exhausted ? "F" : " ", class_id, ClassIdToSize(class_id),
         region->mapped_user >> 10, region->stats.n_allocated,
         region->stats.n_freed, in_use, region->num_freed_chunks, avail_chunks,
         rss >> 10, region->rtoi.num_releases,
         region->rtoi.last_released_bytes >> 10,
-        SpaceBeg() + kRegionSize * class_id);
+        (void *)(SpaceBeg() + kRegionSize * class_id));
   }
 
   void PrintStats() {
@@ -636,15 +636,17 @@ class SizeClassAllocator64 {
   }
   uptr SpaceEnd() const { return  SpaceBeg() + kSpaceSize; }
   // kRegionSize should be able to satisfy the largest size class.
-  static_assert(kRegionSize >= SizeClassMap::kMaxSize);
+  static_assert(kRegionSize >= SizeClassMap::kMaxSize,
+                "Region size exceed largest size");
   // kRegionSize must be <= 2^36, see CompactPtrT.
-  COMPILER_CHECK((kRegionSize) <= (1ULL << (SANITIZER_WORDSIZE / 2 + 4)));
+  COMPILER_CHECK((kRegionSize) <=
+                 (1ULL << (sizeof(CompactPtrT) * 8 + kCompactPtrScale)));
   // Call mmap for user memory with at least this size.
-  static const uptr kUserMapSize = 1 << 16;
+  static const uptr kUserMapSize = 1 << 18;
   // Call mmap for metadata memory with at least this size.
   static const uptr kMetaMapSize = 1 << 16;
   // Call mmap for free array memory with at least this size.
-  static const uptr kFreeArrayMapSize = 1 << 16;
+  static const uptr kFreeArrayMapSize = 1 << 18;
 
   atomic_sint32_t release_to_os_interval_ms_;
 
@@ -665,7 +667,7 @@ class SizeClassAllocator64 {
     u64 last_released_bytes;
   };
 
-  struct ALIGNED(SANITIZER_CACHE_LINE_SIZE) RegionInfo {
+  struct alignas(SANITIZER_CACHE_LINE_SIZE) RegionInfo {
     Mutex mutex;
     uptr num_freed_chunks;  // Number of elements in the freearray.
     uptr mapped_free_array;  // Bytes mapped for freearray.
lib/tsan/sanitizer_common/sanitizer_asm.h
@@ -42,6 +42,16 @@
 # define CFI_RESTORE(reg)
 #endif
 
+#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT)
+# define ASM_STARTPROC CFI_STARTPROC; hint #34
+# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC) "\nhint #34"
+#else
+# define ASM_STARTPROC CFI_STARTPROC
+# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC)
+#endif
+#define ASM_ENDPROC CFI_ENDPROC
+#define C_ASM_ENDPROC SANITIZER_STRINGIFY(CFI_ENDPROC)
+
 #if defined(__x86_64__) || defined(__i386__) || defined(__sparc__)
 # define ASM_TAIL_CALL jmp
 #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
@@ -53,6 +63,29 @@
 # define ASM_TAIL_CALL tail
 #endif
 
+// Currently, almost all of the shared libraries rely on the value of
+// $t9 to get the address of current function, instead of PCREL, even
+// on MIPSr6. To be compatiable with them, we have to set $t9 properly.
+// MIPS uses GOT to get the address of preemptible functions.
+#if defined(__mips64)
+#  define C_ASM_TAIL_CALL(t_func, i_func)                       \
+    "lui $t8, %hi(%neg(%gp_rel(" t_func ")))\n"                 \
+    "daddu $t8, $t8, $t9\n"                                     \
+    "daddiu $t8, $t8, %lo(%neg(%gp_rel(" t_func ")))\n"         \
+    "ld $t9, %got_disp(" i_func ")($t8)\n"                      \
+    "jr $t9\n"
+#elif defined(__mips__)
+#  define C_ASM_TAIL_CALL(t_func, i_func)                       \
+    ".set    noreorder\n"                                       \
+    ".cpload $t9\n"                                             \
+    ".set    reorder\n"                                         \
+    "lw $t9, %got(" i_func ")($gp)\n"                           \
+    "jr $t9\n"
+#elif defined(ASM_TAIL_CALL)
+#  define C_ASM_TAIL_CALL(t_func, i_func)                       \
+    SANITIZER_STRINGIFY(ASM_TAIL_CALL) " " i_func
+#endif
+
 #if defined(__ELF__) && defined(__x86_64__) || defined(__i386__) || \
     defined(__riscv)
 # define ASM_PREEMPTIBLE_SYM(sym) sym@plt
@@ -62,7 +95,11 @@
 
 #if !defined(__APPLE__)
 # define ASM_HIDDEN(symbol) .hidden symbol
-# define ASM_TYPE_FUNCTION(symbol) .type symbol, %function
+# if defined(__arm__) || defined(__aarch64__)
+#  define ASM_TYPE_FUNCTION(symbol) .type symbol, %function
+# else
+#  define ASM_TYPE_FUNCTION(symbol) .type symbol, @function
+# endif
 # define ASM_SIZE(symbol) .size symbol, .-symbol
 # define ASM_SYMBOL(symbol) symbol
 # define ASM_SYMBOL_INTERCEPTOR(symbol) symbol
@@ -87,9 +124,9 @@
          .globl __interceptor_trampoline_##name;                               \
          ASM_TYPE_FUNCTION(__interceptor_trampoline_##name);                   \
          __interceptor_trampoline_##name:                                      \
-                 CFI_STARTPROC;                                                \
+                 ASM_STARTPROC;                                                \
                  ASM_TAIL_CALL ASM_PREEMPTIBLE_SYM(__interceptor_##name);      \
-                 CFI_ENDPROC;                                                  \
+                 ASM_ENDPROC;                                                  \
          ASM_SIZE(__interceptor_trampoline_##name)
 #  define ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT 1
 # endif  // Architecture supports interceptor trampoline
lib/tsan/sanitizer_common/sanitizer_atomic.h
@@ -18,12 +18,24 @@
 namespace __sanitizer {
 
 enum memory_order {
+// If the __atomic atomic builtins are supported (Clang/GCC), use the
+// compiler provided macro values so that we can map the atomic operations
+// to __atomic_* directly.
+#ifdef __ATOMIC_SEQ_CST
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_consume = __ATOMIC_CONSUME,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+#else
   memory_order_relaxed = 1 << 0,
   memory_order_consume = 1 << 1,
   memory_order_acquire = 1 << 2,
   memory_order_release = 1 << 3,
   memory_order_acq_rel = 1 << 4,
   memory_order_seq_cst = 1 << 5
+#endif
 };
 
 struct atomic_uint8_t {
@@ -49,7 +61,7 @@ struct atomic_uint32_t {
 struct atomic_uint64_t {
   typedef u64 Type;
   // On 32-bit platforms u64 is not necessary aligned on 8 bytes.
-  volatile ALIGNED(8) Type val_dont_use;
+  alignas(8) volatile Type val_dont_use;
 };
 
 struct atomic_uintptr_t {
lib/tsan/sanitizer_common/sanitizer_atomic_clang.h
@@ -14,60 +14,63 @@
 #ifndef SANITIZER_ATOMIC_CLANG_H
 #define SANITIZER_ATOMIC_CLANG_H
 
-#if defined(__i386__) || defined(__x86_64__)
-# include "sanitizer_atomic_clang_x86.h"
-#else
-# include "sanitizer_atomic_clang_other.h"
-#endif
-
 namespace __sanitizer {
 
-// We would like to just use compiler builtin atomic operations
-// for loads and stores, but they are mostly broken in clang:
-// - they lead to vastly inefficient code generation
-// (http://llvm.org/bugs/show_bug.cgi?id=17281)
-// - 64-bit atomic operations are not implemented on x86_32
-// (http://llvm.org/bugs/show_bug.cgi?id=15034)
-// - they are not implemented on ARM
-// error: undefined reference to '__atomic_load_4'
+// We use the compiler builtin atomic operations for loads and stores, which
+// generates correct code for all architectures, but may require libatomic
+// on platforms where e.g. 64-bit atomics are not supported natively.
 
 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 // for mappings of the memory model to different processors.
 
-inline void atomic_signal_fence(memory_order) {
+inline void atomic_signal_fence(memory_order mo) { __atomic_signal_fence(mo); }
+
+inline void atomic_thread_fence(memory_order mo) { __atomic_thread_fence(mo); }
+
+inline void proc_yield(int cnt) {
+  __asm__ __volatile__("" ::: "memory");
+#if defined(__i386__) || defined(__x86_64__)
+  for (int i = 0; i < cnt; i++) __asm__ __volatile__("pause");
   __asm__ __volatile__("" ::: "memory");
+#endif
 }
 
-inline void atomic_thread_fence(memory_order) {
-  __sync_synchronize();
+template <typename T>
+inline typename T::Type atomic_load(const volatile T *a, memory_order mo) {
+  DCHECK(mo == memory_order_relaxed || mo == memory_order_consume ||
+         mo == memory_order_acquire || mo == memory_order_seq_cst);
+  DCHECK(!((uptr)a % sizeof(*a)));
+  return __atomic_load_n(&a->val_dont_use, mo);
 }
 
-template<typename T>
-inline typename T::Type atomic_fetch_add(volatile T *a,
-    typename T::Type v, memory_order mo) {
-  (void)mo;
+template <typename T>
+inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
+  DCHECK(mo == memory_order_relaxed || mo == memory_order_release ||
+         mo == memory_order_seq_cst);
   DCHECK(!((uptr)a % sizeof(*a)));
-  return __sync_fetch_and_add(&a->val_dont_use, v);
+  __atomic_store_n(&a->val_dont_use, v, mo);
 }
 
-template<typename T>
-inline typename T::Type atomic_fetch_sub(volatile T *a,
-    typename T::Type v, memory_order mo) {
+template <typename T>
+inline typename T::Type atomic_fetch_add(volatile T *a, typename T::Type v,
+                                         memory_order mo) {
+  DCHECK(!((uptr)a % sizeof(*a)));
+  return __atomic_fetch_add(&a->val_dont_use, v, mo);
+}
+
+template <typename T>
+inline typename T::Type atomic_fetch_sub(volatile T *a, typename T::Type v,
+                                         memory_order mo) {
   (void)mo;
   DCHECK(!((uptr)a % sizeof(*a)));
-  return __sync_fetch_and_add(&a->val_dont_use, -v);
+  return __atomic_fetch_sub(&a->val_dont_use, v, mo);
 }
 
-template<typename T>
-inline typename T::Type atomic_exchange(volatile T *a,
-    typename T::Type v, memory_order mo) {
+template <typename T>
+inline typename T::Type atomic_exchange(volatile T *a, typename T::Type v,
+                                        memory_order mo) {
   DCHECK(!((uptr)a % sizeof(*a)));
-  if (mo & (memory_order_release | memory_order_acq_rel | memory_order_seq_cst))
-    __sync_synchronize();
-  v = __sync_lock_test_and_set(&a->val_dont_use, v);
-  if (mo == memory_order_seq_cst)
-    __sync_synchronize();
-  return v;
+  return __atomic_exchange_n(&a->val_dont_use, v, mo);
 }
 
 template <typename T>
@@ -82,9 +85,8 @@ inline bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp,
                                    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
 }
 
-template<typename T>
-inline bool atomic_compare_exchange_weak(volatile T *a,
-                                         typename T::Type *cmp,
+template <typename T>
+inline bool atomic_compare_exchange_weak(volatile T *a, typename T::Type *cmp,
                                          typename T::Type xchg,
                                          memory_order mo) {
   return atomic_compare_exchange_strong(a, cmp, xchg, mo);
@@ -92,13 +94,6 @@ inline bool atomic_compare_exchange_weak(volatile T *a,
 
 }  // namespace __sanitizer
 
-// This include provides explicit template instantiations for atomic_uint64_t
-// on MIPS32, which does not directly support 8 byte atomics. It has to
-// proceed the template definitions above.
-#if defined(_MIPS_SIM) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
-#  include "sanitizer_atomic_clang_mips.h"
-#endif
-
 #undef ATOMIC_ORDER
 
 #endif  // SANITIZER_ATOMIC_CLANG_H
lib/tsan/sanitizer_common/sanitizer_atomic_clang_mips.h
@@ -1,117 +0,0 @@
-//===-- sanitizer_atomic_clang_mips.h ---------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
-// Not intended for direct inclusion. Include sanitizer_atomic.h.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SANITIZER_ATOMIC_CLANG_MIPS_H
-#define SANITIZER_ATOMIC_CLANG_MIPS_H
-
-namespace __sanitizer {
-
-// MIPS32 does not support atomics > 4 bytes. To address this lack of
-// functionality, the sanitizer library provides helper methods which use an
-// internal spin lock mechanism to emulate atomic operations when the size is
-// 8 bytes.
-static void __spin_lock(volatile int *lock) {
-  while (__sync_lock_test_and_set(lock, 1))
-    while (*lock) {
-    }
-}
-
-static void __spin_unlock(volatile int *lock) { __sync_lock_release(lock); }
-
-// Make sure the lock is on its own cache line to prevent false sharing.
-// Put it inside a struct that is aligned and padded to the typical MIPS
-// cacheline which is 32 bytes.
-static struct {
-  int lock;
-  char pad[32 - sizeof(int)];
-} __attribute__((aligned(32))) lock = {0, {0}};
-
-template <>
-inline atomic_uint64_t::Type atomic_fetch_add(volatile atomic_uint64_t *ptr,
-                                              atomic_uint64_t::Type val,
-                                              memory_order mo) {
-  DCHECK(mo &
-         (memory_order_relaxed | memory_order_release | memory_order_seq_cst));
-  DCHECK(!((uptr)ptr % sizeof(*ptr)));
-
-  atomic_uint64_t::Type ret;
-
-  __spin_lock(&lock.lock);
-  ret = *(const_cast<atomic_uint64_t::Type volatile *>(&ptr->val_dont_use));
-  ptr->val_dont_use = ret + val;
-  __spin_unlock(&lock.lock);
-
-  return ret;
-}
-
-template <>
-inline atomic_uint64_t::Type atomic_fetch_sub(volatile atomic_uint64_t *ptr,
-                                              atomic_uint64_t::Type val,
-                                              memory_order mo) {
-  return atomic_fetch_add(ptr, -val, mo);
-}
-
-template <>
-inline bool atomic_compare_exchange_strong(volatile atomic_uint64_t *ptr,
-                                           atomic_uint64_t::Type *cmp,
-                                           atomic_uint64_t::Type xchg,
-                                           memory_order mo) {
-  DCHECK(mo &
-         (memory_order_relaxed | memory_order_release | memory_order_seq_cst));
-  DCHECK(!((uptr)ptr % sizeof(*ptr)));
-
-  typedef atomic_uint64_t::Type Type;
-  Type cmpv = *cmp;
-  Type prev;
-  bool ret = false;
-
-  __spin_lock(&lock.lock);
-  prev = *(const_cast<Type volatile *>(&ptr->val_dont_use));
-  if (prev == cmpv) {
-    ret = true;
-    ptr->val_dont_use = xchg;
-  }
-  __spin_unlock(&lock.lock);
-
-  return ret;
-}
-
-template <>
-inline atomic_uint64_t::Type atomic_load(const volatile atomic_uint64_t *ptr,
-                                         memory_order mo) {
-  DCHECK(mo &
-         (memory_order_relaxed | memory_order_release | memory_order_seq_cst));
-  DCHECK(!((uptr)ptr % sizeof(*ptr)));
-
-  atomic_uint64_t::Type zero = 0;
-  volatile atomic_uint64_t *Newptr =
-      const_cast<volatile atomic_uint64_t *>(ptr);
-  return atomic_fetch_add(Newptr, zero, mo);
-}
-
-template <>
-inline void atomic_store(volatile atomic_uint64_t *ptr, atomic_uint64_t::Type v,
-                         memory_order mo) {
-  DCHECK(mo &
-         (memory_order_relaxed | memory_order_release | memory_order_seq_cst));
-  DCHECK(!((uptr)ptr % sizeof(*ptr)));
-
-  __spin_lock(&lock.lock);
-  ptr->val_dont_use = v;
-  __spin_unlock(&lock.lock);
-}
-
-}  // namespace __sanitizer
-
-#endif  // SANITIZER_ATOMIC_CLANG_MIPS_H
-
lib/tsan/sanitizer_common/sanitizer_atomic_clang_other.h
@@ -1,85 +0,0 @@
-//===-- sanitizer_atomic_clang_other.h --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
-// Not intended for direct inclusion. Include sanitizer_atomic.h.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SANITIZER_ATOMIC_CLANG_OTHER_H
-#define SANITIZER_ATOMIC_CLANG_OTHER_H
-
-namespace __sanitizer {
-
-
-inline void proc_yield(int cnt) {
-  __asm__ __volatile__("" ::: "memory");
-}
-
-template<typename T>
-inline typename T::Type atomic_load(
-    const volatile T *a, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_consume
-      | memory_order_acquire | memory_order_seq_cst));
-  DCHECK(!((uptr)a % sizeof(*a)));
-  typename T::Type v;
-
-  if (sizeof(*a) < 8 || sizeof(void*) == 8) {
-    // Assume that aligned loads are atomic.
-    if (mo == memory_order_relaxed) {
-      v = a->val_dont_use;
-    } else if (mo == memory_order_consume) {
-      // Assume that processor respects data dependencies
-      // (and that compiler won't break them).
-      __asm__ __volatile__("" ::: "memory");
-      v = a->val_dont_use;
-      __asm__ __volatile__("" ::: "memory");
-    } else if (mo == memory_order_acquire) {
-      __asm__ __volatile__("" ::: "memory");
-      v = a->val_dont_use;
-      __sync_synchronize();
-    } else {  // seq_cst
-      // E.g. on POWER we need a hw fence even before the store.
-      __sync_synchronize();
-      v = a->val_dont_use;
-      __sync_synchronize();
-    }
-  } else {
-    __atomic_load(const_cast<typename T::Type volatile *>(&a->val_dont_use), &v,
-                  __ATOMIC_SEQ_CST);
-  }
-  return v;
-}
-
-template<typename T>
-inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_release
-      | memory_order_seq_cst));
-  DCHECK(!((uptr)a % sizeof(*a)));
-
-  if (sizeof(*a) < 8 || sizeof(void*) == 8) {
-    // Assume that aligned loads are atomic.
-    if (mo == memory_order_relaxed) {
-      a->val_dont_use = v;
-    } else if (mo == memory_order_release) {
-      __sync_synchronize();
-      a->val_dont_use = v;
-      __asm__ __volatile__("" ::: "memory");
-    } else {  // seq_cst
-      __sync_synchronize();
-      a->val_dont_use = v;
-      __sync_synchronize();
-    }
-  } else {
-    __atomic_store(&a->val_dont_use, &v, __ATOMIC_SEQ_CST);
-  }
-}
-
-}  // namespace __sanitizer
-
-#endif  // #ifndef SANITIZER_ATOMIC_CLANG_OTHER_H
lib/tsan/sanitizer_common/sanitizer_atomic_clang_x86.h
@@ -1,113 +0,0 @@
-//===-- sanitizer_atomic_clang_x86.h ----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
-// Not intended for direct inclusion. Include sanitizer_atomic.h.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SANITIZER_ATOMIC_CLANG_X86_H
-#define SANITIZER_ATOMIC_CLANG_X86_H
-
-namespace __sanitizer {
-
-inline void proc_yield(int cnt) {
-  __asm__ __volatile__("" ::: "memory");
-  for (int i = 0; i < cnt; i++)
-    __asm__ __volatile__("pause");
-  __asm__ __volatile__("" ::: "memory");
-}
-
-template<typename T>
-inline typename T::Type atomic_load(
-    const volatile T *a, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_consume
-      | memory_order_acquire | memory_order_seq_cst));
-  DCHECK(!((uptr)a % sizeof(*a)));
-  typename T::Type v;
-
-  if (sizeof(*a) < 8 || sizeof(void*) == 8) {
-    // Assume that aligned loads are atomic.
-    if (mo == memory_order_relaxed) {
-      v = a->val_dont_use;
-    } else if (mo == memory_order_consume) {
-      // Assume that processor respects data dependencies
-      // (and that compiler won't break them).
-      __asm__ __volatile__("" ::: "memory");
-      v = a->val_dont_use;
-      __asm__ __volatile__("" ::: "memory");
-    } else if (mo == memory_order_acquire) {
-      __asm__ __volatile__("" ::: "memory");
-      v = a->val_dont_use;
-      // On x86 loads are implicitly acquire.
-      __asm__ __volatile__("" ::: "memory");
-    } else {  // seq_cst
-      // On x86 plain MOV is enough for seq_cst store.
-      __asm__ __volatile__("" ::: "memory");
-      v = a->val_dont_use;
-      __asm__ __volatile__("" ::: "memory");
-    }
-  } else {
-    // 64-bit load on 32-bit platform.
-    __asm__ __volatile__(
-        "movq %1, %%mm0;"  // Use mmx reg for 64-bit atomic moves
-        "movq %%mm0, %0;"  // (ptr could be read-only)
-        "emms;"            // Empty mmx state/Reset FP regs
-        : "=m" (v)
-        : "m" (a->val_dont_use)
-        : // mark the mmx registers as clobbered
-#ifdef __MMX__
-          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
-#endif  // #ifdef __MMX__
-          "memory");
-  }
-  return v;
-}
-
-template<typename T>
-inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_release
-      | memory_order_seq_cst));
-  DCHECK(!((uptr)a % sizeof(*a)));
-
-  if (sizeof(*a) < 8 || sizeof(void*) == 8) {
-    // Assume that aligned loads are atomic.
-    if (mo == memory_order_relaxed) {
-      a->val_dont_use = v;
-    } else if (mo == memory_order_release) {
-      // On x86 stores are implicitly release.
-      __asm__ __volatile__("" ::: "memory");
-      a->val_dont_use = v;
-      __asm__ __volatile__("" ::: "memory");
-    } else {  // seq_cst
-      // On x86 stores are implicitly release.
-      __asm__ __volatile__("" ::: "memory");
-      a->val_dont_use = v;
-      __sync_synchronize();
-    }
-  } else {
-    // 64-bit store on 32-bit platform.
-    __asm__ __volatile__(
-        "movq %1, %%mm0;"  // Use mmx reg for 64-bit atomic moves
-        "movq %%mm0, %0;"
-        "emms;"            // Empty mmx state/Reset FP regs
-        : "=m" (a->val_dont_use)
-        : "m" (v)
-        : // mark the mmx registers as clobbered
-#ifdef __MMX__
-          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
-#endif  // #ifdef __MMX__
-          "memory");
-    if (mo == memory_order_seq_cst)
-      __sync_synchronize();
-  }
-}
-
-}  // namespace __sanitizer
-
-#endif  // #ifndef SANITIZER_ATOMIC_CLANG_X86_H
lib/tsan/sanitizer_common/sanitizer_atomic_msvc.h
@@ -70,8 +70,8 @@ inline void proc_yield(int cnt) {
 template<typename T>
 inline typename T::Type atomic_load(
     const volatile T *a, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_consume
-      | memory_order_acquire | memory_order_seq_cst));
+  DCHECK(mo == memory_order_relaxed || mo == memory_order_consume ||
+         mo == memory_order_acquire || mo == memory_order_seq_cst);
   DCHECK(!((uptr)a % sizeof(*a)));
   typename T::Type v;
   // FIXME(dvyukov): 64-bit load is not atomic on 32-bits.
@@ -87,8 +87,8 @@ inline typename T::Type atomic_load(
 
 template<typename T>
 inline void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
-  DCHECK(mo & (memory_order_relaxed | memory_order_release
-      | memory_order_seq_cst));
+  DCHECK(mo == memory_order_relaxed || mo == memory_order_release ||
+         mo == memory_order_seq_cst);
   DCHECK(!((uptr)a % sizeof(*a)));
   // FIXME(dvyukov): 64-bit store is not atomic on 32-bits.
   if (mo == memory_order_relaxed) {
lib/tsan/sanitizer_common/sanitizer_bitvector.h
@@ -321,23 +321,23 @@ class TwoLevelBitVector {
   };
 
  private:
-  void check(uptr idx) const { CHECK_LE(idx, size()); }
+  void check(uptr idx) const { CHECK_LT(idx, size()); }
 
   uptr idx0(uptr idx) const {
     uptr res = idx / (BV::kSize * BV::kSize);
-    CHECK_LE(res, kLevel1Size);
+    CHECK_LT(res, kLevel1Size);
     return res;
   }
 
   uptr idx1(uptr idx) const {
     uptr res = (idx / BV::kSize) % BV::kSize;
-    CHECK_LE(res, BV::kSize);
+    CHECK_LT(res, BV::kSize);
     return res;
   }
 
   uptr idx2(uptr idx) const {
     uptr res = idx % BV::kSize;
-    CHECK_LE(res, BV::kSize);
+    CHECK_LT(res, BV::kSize);
     return res;
   }
 
lib/tsan/sanitizer_common/sanitizer_chained_origin_depot.cpp
@@ -139,9 +139,11 @@ u32 ChainedOriginDepot::Get(u32 id, u32 *other) {
   return desc.here_id;
 }
 
-void ChainedOriginDepot::LockAll() { depot.LockAll(); }
+void ChainedOriginDepot::LockBeforeFork() { depot.LockBeforeFork(); }
 
-void ChainedOriginDepot::UnlockAll() { depot.UnlockAll(); }
+void ChainedOriginDepot::UnlockAfterFork(bool fork_child) {
+  depot.UnlockAfterFork(fork_child);
+}
 
 void ChainedOriginDepot::TestOnlyUnmap() { depot.TestOnlyUnmap(); }
 
lib/tsan/sanitizer_common/sanitizer_chained_origin_depot.h
@@ -32,8 +32,8 @@ class ChainedOriginDepot {
   // Retrieves the stored StackDepot ID for the given origin ID.
   u32 Get(u32 id, u32 *other);
 
-  void LockAll();
-  void UnlockAll();
+  void LockBeforeFork();
+  void UnlockAfterFork(bool fork_child);
   void TestOnlyUnmap();
 
  private:
lib/tsan/sanitizer_common/sanitizer_common.cpp
@@ -115,8 +115,9 @@ void ReportErrorSummary(const char *error_message, const char *alt_tool_name) {
   if (!common_flags()->print_summary)
     return;
   InternalScopedString buff;
-  buff.append("SUMMARY: %s: %s",
-              alt_tool_name ? alt_tool_name : SanitizerToolName, error_message);
+  buff.AppendF("SUMMARY: %s: %s",
+               alt_tool_name ? alt_tool_name : SanitizerToolName,
+               error_message);
   __sanitizer_report_error_summary(buff.data());
 }
 
@@ -346,7 +347,13 @@ void RunMallocHooks(void *ptr, uptr size) {
   }
 }
 
-void RunFreeHooks(void *ptr) {
+// Returns '1' if the call to free() should be ignored (based on
+// __sanitizer_ignore_free_hook), or '0' otherwise.
+int RunFreeHooks(void *ptr) {
+  if (__sanitizer_ignore_free_hook(ptr)) {
+    return 1;
+  }
+
   __sanitizer_free_hook(ptr);
   for (int i = 0; i < kMaxMallocFreeHooks; i++) {
     auto hook = MFHooks[i].free_hook;
@@ -354,6 +361,8 @@ void RunFreeHooks(void *ptr) {
       break;
     hook(ptr);
   }
+
+  return 0;
 }
 
 static int InstallMallocFreeHooks(void (*malloc_hook)(const void *, uptr),
@@ -418,4 +427,9 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
   (void)ptr;
 }
 
+SANITIZER_INTERFACE_WEAK_DEF(int, __sanitizer_ignore_free_hook, void *ptr) {
+  (void)ptr;
+  return 0;
+}
+
 } // extern "C"
lib/tsan/sanitizer_common/sanitizer_common.h
@@ -32,6 +32,7 @@ struct AddressInfo;
 struct BufferedStackTrace;
 struct SignalContext;
 struct StackTrace;
+struct SymbolizedStack;
 
 // Constants.
 const uptr kWordSize = SANITIZER_WORDSIZE / 8;
@@ -59,14 +60,10 @@ inline int Verbosity() {
   return atomic_load(&current_verbosity, memory_order_relaxed);
 }
 
-#if SANITIZER_ANDROID
-inline uptr GetPageSize() {
-// Android post-M sysconf(_SC_PAGESIZE) crashes if called from .preinit_array.
-  return 4096;
-}
-inline uptr GetPageSizeCached() {
-  return 4096;
-}
+#if SANITIZER_ANDROID && !defined(__aarch64__)
+// 32-bit Android only has 4k pages.
+inline uptr GetPageSize() { return 4096; }
+inline uptr GetPageSizeCached() { return 4096; }
 #else
 uptr GetPageSize();
 extern uptr PageSizeCached;
@@ -76,6 +73,7 @@ inline uptr GetPageSizeCached() {
   return PageSizeCached;
 }
 #endif
+
 uptr GetMmapGranularity();
 uptr GetMaxVirtualAddress();
 uptr GetMaxUserVirtualAddress();
@@ -90,10 +88,11 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
 
 // Memory management
 void *MmapOrDie(uptr size, const char *mem_type, bool raw_report = false);
+
 inline void *MmapOrDieQuietly(uptr size, const char *mem_type) {
   return MmapOrDie(size, mem_type, /*raw_report*/ true);
 }
-void UnmapOrDie(void *addr, uptr size);
+void UnmapOrDie(void *addr, uptr size, bool raw_report = false);
 // Behaves just like MmapOrDie, but tolerates out of memory condition, in that
 // case returns nullptr.
 void *MmapOrDieOnFatalError(uptr size, const char *mem_type);
@@ -138,7 +137,8 @@ void UnmapFromTo(uptr from, uptr to);
 // shadow_size_bytes bytes on the right, which on linux is mapped no access.
 // The high_mem_end may be updated if the original shadow size doesn't fit.
 uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
-                      uptr min_shadow_base_alignment, uptr &high_mem_end);
+                      uptr min_shadow_base_alignment, uptr &high_mem_end,
+                      uptr granularity);
 
 // Let S = max(shadow_size, num_aliases * alias_size, ring_buffer_size).
 // Reserves 2*S bytes of address space to the right of the returned address and
@@ -177,7 +177,7 @@ bool DontDumpShadowMemory(uptr addr, uptr length);
 // Check if the built VMA size matches the runtime one.
 void CheckVMASize();
 void RunMallocHooks(void *ptr, uptr size);
-void RunFreeHooks(void *ptr);
+int RunFreeHooks(void *ptr);
 
 class ReservedAddressRange {
  public:
@@ -208,6 +208,11 @@ void ParseUnixMemoryProfile(fill_profile_f cb, uptr *stats, char *smaps,
 // Simple low-level (mmap-based) allocator for internal use. Doesn't have
 // constructor, so all instances of LowLevelAllocator should be
 // linker initialized.
+//
+// NOTE: Users should instead use the singleton provided via
+// `GetGlobalLowLevelAllocator()` rather than create a new one. This way, the
+// number of mmap fragments can be reduced and use the same contiguous mmap
+// provided by this singleton.
 class LowLevelAllocator {
  public:
   // Requires an external lock.
@@ -224,6 +229,8 @@ typedef void (*LowLevelAllocateCallback)(uptr ptr, uptr size);
 // Passing NULL removes the callback.
 void SetLowLevelAllocateCallback(LowLevelAllocateCallback callback);
 
+LowLevelAllocator &GetGlobalLowLevelAllocator();
+
 // IO
 void CatastrophicErrorWrite(const char *buffer, uptr length);
 void RawWrite(const char *buffer);
@@ -386,6 +393,8 @@ void ReportErrorSummary(const char *error_type, const AddressInfo &info,
 // Same as above, but obtains AddressInfo by symbolizing top stack trace frame.
 void ReportErrorSummary(const char *error_type, const StackTrace *trace,
                         const char *alt_tool_name = nullptr);
+// Skips frames which we consider internal and not usefull to the users.
+const SymbolizedStack *SkipInternalFrames(const SymbolizedStack *frames);
 
 void ReportMmapWriteExec(int prot, int mflags);
 
@@ -500,7 +509,7 @@ inline int ToLower(int c) {
 // A low-level vector based on mmap. May incur a significant memory overhead for
 // small vectors.
 // WARNING: The current implementation supports only POD types.
-template<typename T>
+template <typename T, bool raw_report = false>
 class InternalMmapVectorNoCtor {
  public:
   using value_type = T;
@@ -510,7 +519,7 @@ class InternalMmapVectorNoCtor {
     data_ = 0;
     reserve(initial_capacity);
   }
-  void Destroy() { UnmapOrDie(data_, capacity_bytes_); }
+  void Destroy() { UnmapOrDie(data_, capacity_bytes_, raw_report); }
   T &operator[](uptr i) {
     CHECK_LT(i, size_);
     return data_[i];
@@ -586,9 +595,10 @@ class InternalMmapVectorNoCtor {
     CHECK_LE(size_, new_capacity);
     uptr new_capacity_bytes =
         RoundUpTo(new_capacity * sizeof(T), GetPageSizeCached());
-    T *new_data = (T *)MmapOrDie(new_capacity_bytes, "InternalMmapVector");
+    T *new_data =
+        (T *)MmapOrDie(new_capacity_bytes, "InternalMmapVector", raw_report);
     internal_memcpy(new_data, data_, size_ * sizeof(T));
-    UnmapOrDie(data_, capacity_bytes_);
+    UnmapOrDie(data_, capacity_bytes_, raw_report);
     data_ = new_data;
     capacity_bytes_ = new_capacity_bytes;
   }
@@ -636,7 +646,8 @@ class InternalScopedString {
     buffer_.resize(1);
     buffer_[0] = '\0';
   }
-  void append(const char *format, ...) FORMAT(2, 3);
+  void Append(const char *str);
+  void AppendF(const char *format, ...) FORMAT(2, 3);
   const char *data() const { return buffer_.data(); }
   char *data() { return buffer_.data(); }
 
@@ -1086,7 +1097,7 @@ inline u32 GetNumberOfCPUsCached() {
 
 }  // namespace __sanitizer
 
-inline void *operator new(__sanitizer::operator_new_size_type size,
+inline void *operator new(__sanitizer::usize size,
                           __sanitizer::LowLevelAllocator &alloc) {
   return alloc.Allocate(size);
 }
lib/tsan/sanitizer_common/sanitizer_common_interceptors.inc
@@ -33,16 +33,17 @@
 //   COMMON_INTERCEPTOR_STRERROR
 //===----------------------------------------------------------------------===//
 
+#include <stdarg.h>
+
 #include "interception/interception.h"
 #include "sanitizer_addrhashmap.h"
+#include "sanitizer_dl.h"
 #include "sanitizer_errno.h"
 #include "sanitizer_placement_new.h"
 #include "sanitizer_platform_interceptors.h"
 #include "sanitizer_symbolizer.h"
 #include "sanitizer_tls_get_addr.h"
 
-#include <stdarg.h>
-
 #if SANITIZER_INTERCEPTOR_HOOKS
 #define CALL_WEAK_INTERCEPTOR_HOOK(f, ...) f(__VA_ARGS__);
 #define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...) \
@@ -445,11 +446,13 @@ INTERCEPTOR(char*, textdomain, const char *domainname) {
 #define INIT_TEXTDOMAIN
 #endif
 
-#if SANITIZER_INTERCEPT_STRCMP
+#if SANITIZER_INTERCEPT_STRCMP || SANITIZER_INTERCEPT_MEMCMP
 static inline int CharCmpX(unsigned char c1, unsigned char c2) {
   return (c1 == c2) ? 0 : (c1 < c2) ? -1 : 1;
 }
+#endif
 
+#if SANITIZER_INTERCEPT_STRCMP
 DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcmp, uptr called_pc,
                               const char *s1, const char *s2, int result)
 
@@ -971,7 +974,7 @@ INTERCEPTOR(SSIZE_T, read, int fd, void *ptr, SIZE_T count) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  SSIZE_T res = REAL(read)(fd, ptr, count);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(read)(fd, ptr, count);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1006,7 +1009,7 @@ INTERCEPTOR(SSIZE_T, pread, int fd, void *ptr, SIZE_T count, OFF_T offset) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  SSIZE_T res = REAL(pread)(fd, ptr, count, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(pread)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1024,7 +1027,7 @@ INTERCEPTOR(SSIZE_T, pread64, int fd, void *ptr, SIZE_T count, OFF64_T offset) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  SSIZE_T res = REAL(pread64)(fd, ptr, count, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(pread64)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1040,7 +1043,7 @@ INTERCEPTOR_WITH_SUFFIX(SSIZE_T, readv, int fd, __sanitizer_iovec *iov,
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, readv, fd, iov, iovcnt);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
-  SSIZE_T res = REAL(readv)(fd, iov, iovcnt);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(readv)(fd, iov, iovcnt);
   if (res > 0) write_iovec(ctx, iov, iovcnt, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1056,7 +1059,7 @@ INTERCEPTOR(SSIZE_T, preadv, int fd, __sanitizer_iovec *iov, int iovcnt,
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, preadv, fd, iov, iovcnt, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
-  SSIZE_T res = REAL(preadv)(fd, iov, iovcnt, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(preadv)(fd, iov, iovcnt, offset);
   if (res > 0) write_iovec(ctx, iov, iovcnt, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1072,7 +1075,8 @@ INTERCEPTOR(SSIZE_T, preadv64, int fd, __sanitizer_iovec *iov, int iovcnt,
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, preadv64, fd, iov, iovcnt, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
-  SSIZE_T res = REAL(preadv64)(fd, iov, iovcnt, offset);
+  SSIZE_T res =
+      COMMON_INTERCEPTOR_BLOCK_REAL(preadv64)(fd, iov, iovcnt, offset);
   if (res > 0) write_iovec(ctx, iov, iovcnt, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   return res;
@@ -1088,8 +1092,9 @@ INTERCEPTOR(SSIZE_T, write, int fd, void *ptr, SIZE_T count) {
   COMMON_INTERCEPTOR_ENTER(ctx, write, fd, ptr, count);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(write)(fd, ptr, count);
-  // FIXME: this check should be _before_ the call to REAL(write), not after
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(write)(fd, ptr, count);
+  // FIXME: this check should be _before_ the call to
+  // COMMON_INTERCEPTOR_BLOCK_REAL(write), not after
   if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
   return res;
 }
@@ -1118,7 +1123,7 @@ INTERCEPTOR(SSIZE_T, pwrite, int fd, void *ptr, SIZE_T count, OFF_T offset) {
   COMMON_INTERCEPTOR_ENTER(ctx, pwrite, fd, ptr, count, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(pwrite)(fd, ptr, count, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(pwrite)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
   return res;
 }
@@ -1134,7 +1139,7 @@ INTERCEPTOR(SSIZE_T, pwrite64, int fd, void *ptr, OFF64_T count,
   COMMON_INTERCEPTOR_ENTER(ctx, pwrite64, fd, ptr, count, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(pwrite64)(fd, ptr, count, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(pwrite64)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
   return res;
 }
@@ -1150,7 +1155,7 @@ INTERCEPTOR_WITH_SUFFIX(SSIZE_T, writev, int fd, __sanitizer_iovec *iov,
   COMMON_INTERCEPTOR_ENTER(ctx, writev, fd, iov, iovcnt);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(writev)(fd, iov, iovcnt);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(writev)(fd, iov, iovcnt);
   if (res > 0) read_iovec(ctx, iov, iovcnt, res);
   return res;
 }
@@ -1166,7 +1171,7 @@ INTERCEPTOR(SSIZE_T, pwritev, int fd, __sanitizer_iovec *iov, int iovcnt,
   COMMON_INTERCEPTOR_ENTER(ctx, pwritev, fd, iov, iovcnt, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(pwritev)(fd, iov, iovcnt, offset);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(pwritev)(fd, iov, iovcnt, offset);
   if (res > 0) read_iovec(ctx, iov, iovcnt, res);
   return res;
 }
@@ -1182,7 +1187,8 @@ INTERCEPTOR(SSIZE_T, pwritev64, int fd, __sanitizer_iovec *iov, int iovcnt,
   COMMON_INTERCEPTOR_ENTER(ctx, pwritev64, fd, iov, iovcnt, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
   if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
-  SSIZE_T res = REAL(pwritev64)(fd, iov, iovcnt, offset);
+  SSIZE_T res =
+      COMMON_INTERCEPTOR_BLOCK_REAL(pwritev64)(fd, iov, iovcnt, offset);
   if (res > 0) read_iovec(ctx, iov, iovcnt, res);
   return res;
 }
@@ -1245,6 +1251,7 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, prctl, option, arg2, arg3, arg4, arg5);
   static const int PR_SET_NAME = 15;
+  static const int PR_GET_NAME = 16;
   static const int PR_SET_VMA = 0x53564d41;
   static const int PR_SCHED_CORE = 62;
   static const int PR_SCHED_CORE_GET = 0;
@@ -1258,7 +1265,11 @@ INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
     internal_strncpy(buff, (char *)arg2, 15);
     buff[15] = 0;
     COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, buff);
-  } else if (res != -1 && option == PR_SCHED_CORE && arg2 == PR_SCHED_CORE_GET) {
+  } else if (res == 0 && option == PR_GET_NAME) {
+    char *name = (char *)arg2;
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
+  } else if (res != -1 && option == PR_SCHED_CORE &&
+             arg2 == PR_SCHED_CORE_GET) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64*)(arg5), sizeof(u64));
   }
   return res;
@@ -2546,7 +2557,7 @@ INTERCEPTOR_WITH_SUFFIX(int, wait, int *status) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(wait)(status);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(wait)(status);
   if (res != -1 && status)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
   return res;
@@ -2564,7 +2575,7 @@ INTERCEPTOR_WITH_SUFFIX(int, waitid, int idtype, int id, void *infop,
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(waitid)(idtype, id, infop, options);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(waitid)(idtype, id, infop, options);
   if (res != -1 && infop)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, infop, siginfo_t_sz);
   return res;
@@ -2575,7 +2586,7 @@ INTERCEPTOR_WITH_SUFFIX(int, waitpid, int pid, int *status, int options) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(waitpid)(pid, status, options);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(waitpid)(pid, status, options);
   if (res != -1 && status)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
   return res;
@@ -2586,7 +2597,7 @@ INTERCEPTOR(int, wait3, int *status, int options, void *rusage) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(wait3)(status, options, rusage);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(wait3)(status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
     if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
@@ -2600,7 +2611,8 @@ INTERCEPTOR(int, __wait4, int pid, int *status, int options, void *rusage) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(__wait4)(pid, status, options, rusage);
+  int res =
+      COMMON_INTERCEPTOR_BLOCK_REAL(__wait4)(pid, status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
     if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
@@ -2615,7 +2627,7 @@ INTERCEPTOR(int, wait4, int pid, int *status, int options, void *rusage) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int res = REAL(wait4)(pid, status, options, rusage);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(wait4)(pid, status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
     if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
@@ -2993,7 +3005,7 @@ INTERCEPTOR(int, accept, int fd, void *addr, unsigned *addrlen) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
     addrlen0 = *addrlen;
   }
-  int fd2 = REAL(accept)(fd, addr, addrlen);
+  int fd2 = COMMON_INTERCEPTOR_BLOCK_REAL(accept)(fd, addr, addrlen);
   if (fd2 >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
     if (addr && addrlen)
@@ -3018,7 +3030,7 @@ INTERCEPTOR(int, accept4, int fd, void *addr, unsigned *addrlen, int f) {
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  int fd2 = REAL(accept4)(fd, addr, addrlen, f);
+  int fd2 = COMMON_INTERCEPTOR_BLOCK_REAL(accept4)(fd, addr, addrlen, f);
   if (fd2 >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
     if (addr && addrlen)
@@ -3042,7 +3054,7 @@ INTERCEPTOR(int, paccept, int fd, void *addr, unsigned *addrlen,
     addrlen0 = *addrlen;
   }
   if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
-  int fd2 = REAL(paccept)(fd, addr, addrlen, set, f);
+  int fd2 = COMMON_INTERCEPTOR_BLOCK_REAL(paccept)(fd, addr, addrlen, set, f);
   if (fd2 >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
     if (addr && addrlen)
@@ -3123,7 +3135,7 @@ INTERCEPTOR(SSIZE_T, recvmsg, int fd, struct __sanitizer_msghdr *msg,
   // FIXME: under ASan the call below may write to freed memory and corrupt
   // its metadata. See
   // https://github.com/google/sanitizers/issues/321.
-  SSIZE_T res = REAL(recvmsg)(fd, msg, flags);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(recvmsg)(fd, msg, flags);
   if (res >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
     if (msg) {
@@ -3144,7 +3156,8 @@ INTERCEPTOR(int, recvmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, recvmmsg, fd, msgvec, vlen, flags, timeout);
   if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
-  int res = REAL(recvmmsg)(fd, msgvec, vlen, flags, timeout);
+  int res =
+      COMMON_INTERCEPTOR_BLOCK_REAL(recvmmsg)(fd, msgvec, vlen, flags, timeout);
   if (res >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
     for (int i = 0; i < res; ++i) {
@@ -3222,7 +3235,7 @@ INTERCEPTOR(SSIZE_T, sendmsg, int fd, struct __sanitizer_msghdr *msg,
     COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
     COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
   }
-  SSIZE_T res = REAL(sendmsg)(fd, msg, flags);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(sendmsg)(fd, msg, flags);
   if (common_flags()->intercept_send && res >= 0 && msg)
     read_msghdr(ctx, msg, res);
   return res;
@@ -3241,7 +3254,7 @@ INTERCEPTOR(int, sendmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
     COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
     COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
   }
-  int res = REAL(sendmmsg)(fd, msgvec, vlen, flags);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(sendmmsg)(fd, msgvec, vlen, flags);
   if (res >= 0 && msgvec) {
     for (int i = 0; i < res; ++i) {
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &msgvec[i].msg_len,
@@ -3264,7 +3277,7 @@ INTERCEPTOR(int, msgsnd, int msqid, const void *msgp, SIZE_T msgsz,
   COMMON_INTERCEPTOR_ENTER(ctx, msgsnd, msqid, msgp, msgsz, msgflg);
   if (msgp)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, msgp, sizeof(long) + msgsz);
-  int res = REAL(msgsnd)(msqid, msgp, msgsz, msgflg);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(msgsnd)(msqid, msgp, msgsz, msgflg);
   return res;
 }
 
@@ -3272,7 +3285,8 @@ INTERCEPTOR(SSIZE_T, msgrcv, int msqid, void *msgp, SIZE_T msgsz,
             long msgtyp, int msgflg) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, msgrcv, msqid, msgp, msgsz, msgtyp, msgflg);
-  SSIZE_T len = REAL(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg);
+  SSIZE_T len =
+      COMMON_INTERCEPTOR_BLOCK_REAL(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg);
   if (len != -1)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msgp, sizeof(long) + len);
   return len;
@@ -6116,7 +6130,7 @@ INTERCEPTOR(int, flopen, const char *path, int flags, ...) {
   if (path) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
   }
-  return REAL(flopen)(path, flags, mode);
+  return COMMON_INTERCEPTOR_BLOCK_REAL(flopen)(path, flags, mode);
 }
 
 INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
@@ -6129,7 +6143,7 @@ INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
   if (path) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
   }
-  return REAL(flopenat)(dirfd, path, flags, mode);
+  return COMMON_INTERCEPTOR_BLOCK_REAL(flopenat)(dirfd, path, flags, mode);
 }
 
 #define INIT_FLOPEN    \
@@ -6305,7 +6319,36 @@ INTERCEPTOR(int, fclose, __sanitizer_FILE *fp) {
 INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag);
-  if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
+
+  if (filename) {
+    COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
+
+#  if !SANITIZER_DYNAMIC
+    // We care about a very specific use-case: dladdr on
+    // statically-linked ASan may return <main program>
+    // instead of the library.
+    // We therefore only take effect if the sanitizer is statically
+    // linked, and we don't bother canonicalizing paths because
+    // dladdr should return the same address both times (we assume
+    // the user did not canonicalize the result from dladdr).
+    if (common_flags()->test_only_replace_dlopen_main_program) {
+      VPrintf(1, "dlopen interceptor: filename: %s\n", filename);
+
+      const char *SelfFName = DladdrSelfFName();
+      VPrintf(1, "dlopen interceptor: DladdrSelfFName: %p %s\n",
+              (const void *)SelfFName, SelfFName);
+
+      if (SelfFName && internal_strcmp(SelfFName, filename) == 0) {
+        // It's possible they copied the string from dladdr, so
+        // we do a string comparison rather than pointer comparison.
+        VPrintf(1, "dlopen interceptor: replacing %s because it matches %s\n",
+                filename, SelfFName);
+        filename = (char *)0;  // RTLD_DEFAULT
+      }
+    }
+#  endif  // !SANITIZER_DYNAMIC
+  }
+
   void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag);
   Symbolizer::GetOrInit()->InvalidateModuleList();
   COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
@@ -6685,7 +6728,7 @@ INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, recv, fd, buf, len, flags);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
-  SSIZE_T res = REAL(recv)(fd, buf, len, flags);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(recv)(fd, buf, len, flags);
   if (res > 0) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
   }
@@ -6702,7 +6745,8 @@ INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
   SIZE_T srcaddr_sz;
   if (srcaddr) srcaddr_sz = *addrlen;
   (void)srcaddr_sz;  // prevent "set but not used" warning
-  SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(recvfrom)(fd, buf, len, flags,
+                                                        srcaddr, addrlen);
   if (res > 0)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
   if (res >= 0 && srcaddr)
@@ -6725,7 +6769,7 @@ INTERCEPTOR(SSIZE_T, send, int fd, void *buf, SIZE_T len, int flags) {
     COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
     COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
   }
-  SSIZE_T res = REAL(send)(fd, buf, len, flags);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(send)(fd, buf, len, flags);
   if (common_flags()->intercept_send && res > 0)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
   return res;
@@ -6740,7 +6784,8 @@ INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags,
     COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
   }
   // Can't check dstaddr as it may have uninitialized padding at the end.
-  SSIZE_T res = REAL(sendto)(fd, buf, len, flags, dstaddr, addrlen);
+  SSIZE_T res = COMMON_INTERCEPTOR_BLOCK_REAL(sendto)(fd, buf, len, flags,
+                                                      dstaddr, addrlen);
   if (common_flags()->intercept_send && res > 0)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
   return res;
@@ -6753,25 +6798,25 @@ INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags,
 #endif
 
 #if SANITIZER_INTERCEPT_EVENTFD_READ_WRITE
-INTERCEPTOR(int, eventfd_read, int fd, u64 *value) {
+INTERCEPTOR(int, eventfd_read, int fd, __sanitizer_eventfd_t *value) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, eventfd_read, fd, value);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
-  int res = REAL(eventfd_read)(fd, value);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(eventfd_read)(fd, value);
   if (res == 0) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, sizeof(*value));
     if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
   }
   return res;
 }
-INTERCEPTOR(int, eventfd_write, int fd, u64 value) {
+INTERCEPTOR(int, eventfd_write, int fd, __sanitizer_eventfd_t value) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, eventfd_write, fd, value);
   if (fd >= 0) {
     COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
     COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
   }
-  int res = REAL(eventfd_write)(fd, value);
+  int res = COMMON_INTERCEPTOR_BLOCK_REAL(eventfd_write)(fd, value);
   return res;
 }
 #define INIT_EVENTFD_READ_WRITE            \
@@ -7394,7 +7439,8 @@ INTERCEPTOR(int, open_by_handle_at, int mount_fd, struct file_handle* handle,
   COMMON_INTERCEPTOR_READ_RANGE(
       ctx, &sanitizer_handle->f_handle, sanitizer_handle->handle_bytes);
 
-  return REAL(open_by_handle_at)(mount_fd, handle, flags);
+  return COMMON_INTERCEPTOR_BLOCK_REAL(open_by_handle_at)(mount_fd, handle,
+                                                          flags);
 }
 
 #define INIT_OPEN_BY_HANDLE_AT COMMON_INTERCEPT_FUNCTION(open_by_handle_at)
@@ -7609,9 +7655,9 @@ static void write_protoent(void *ctx, struct __sanitizer_protoent *p) {
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_aliases, pp_size * sizeof(char *));
 }
 
-INTERCEPTOR(struct __sanitizer_protoent *, getprotoent) {
+INTERCEPTOR(struct __sanitizer_protoent *, getprotoent,) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, getprotoent);
+  COMMON_INTERCEPTOR_ENTER(ctx, getprotoent,);
   struct __sanitizer_protoent *p = REAL(getprotoent)();
   if (p)
     write_protoent(ctx, p);
@@ -7698,9 +7744,9 @@ INTERCEPTOR(int, getprotobynumber_r, int num,
 #endif
 
 #if SANITIZER_INTERCEPT_NETENT
-INTERCEPTOR(struct __sanitizer_netent *, getnetent) {
+INTERCEPTOR(struct __sanitizer_netent *, getnetent,) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, getnetent);
+  COMMON_INTERCEPTOR_ENTER(ctx, getnetent,);
   struct __sanitizer_netent *n = REAL(getnetent)();
   if (n) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
@@ -9862,9 +9908,9 @@ INTERCEPTOR(char *, fdevname_r,  int fd, char *buf, SIZE_T len) {
 #endif
 
 #if SANITIZER_INTERCEPT_GETUSERSHELL
-INTERCEPTOR(char *, getusershell) {
+INTERCEPTOR(char *, getusershell,) {
   void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, getusershell);
+  COMMON_INTERCEPTOR_ENTER(ctx, getusershell,);
   char *res = REAL(getusershell)();
   if (res)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
@@ -9933,7 +9979,13 @@ INTERCEPTOR(void, sl_free, void *sl, int freeall) {
 INTERCEPTOR(SSIZE_T, getrandom, void *buf, SIZE_T buflen, unsigned int flags) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getrandom, buf, buflen, flags);
-  SSIZE_T n = REAL(getrandom)(buf, buflen, flags);
+  // If GRND_NONBLOCK is set in the flags, it is non blocking.
+  static const int grnd_nonblock = 1; 
+  SSIZE_T n;
+  if ((flags & grnd_nonblock))
+    n = REAL(getrandom)(buf, buflen, flags);
+  else
+    n = COMMON_INTERCEPTOR_BLOCK_REAL(getrandom)(buf, buflen, flags);
   if (n > 0) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, n);
   }
@@ -10180,20 +10232,6 @@ INTERCEPTOR(int, __xuname, int size, void *utsname) {
 #define INIT___XUNAME
 #endif
 
-#if SANITIZER_INTERCEPT_HEXDUMP
-INTERCEPTOR(void, hexdump, const void *ptr, int length, const char *header, int flags) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, hexdump, ptr, length, header, flags);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, length);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, header, internal_strlen(header) + 1);
-  REAL(hexdump)(ptr, length, header, flags);
-}
-
-#define INIT_HEXDUMP COMMON_INTERCEPT_FUNCTION(hexdump);
-#else
-#define INIT_HEXDUMP
-#endif
-
 #if SANITIZER_INTERCEPT_ARGP_PARSE
 INTERCEPTOR(int, argp_parse, const struct argp *argp, int argc, char **argv,
             unsigned flags, int *arg_index, void *input) {
@@ -10226,6 +10264,38 @@ INTERCEPTOR(int, cpuset_getaffinity, int level, int which, __int64_t id, SIZE_T
 #define INIT_CPUSET_GETAFFINITY
 #endif
 
+#if SANITIZER_INTERCEPT_PREADV2
+INTERCEPTOR(SSIZE_T, preadv2, int fd, __sanitizer_iovec *iov, int iovcnt,
+            OFF_T offset, int flags) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, preadv2, fd, iov, iovcnt, offset, flags);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  SSIZE_T res = REAL(preadv2)(fd, iov, iovcnt, offset, flags);
+  if (res > 0) write_iovec(ctx, iov, iovcnt, res);
+  if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
+  return res;
+}
+#define INIT_PREADV2 COMMON_INTERCEPT_FUNCTION(preadv2)
+#else
+#define INIT_PREADV2
+#endif
+
+#if SANITIZER_INTERCEPT_PWRITEV2
+INTERCEPTOR(SSIZE_T, pwritev2, int fd, __sanitizer_iovec *iov, int iovcnt,
+            OFF_T offset, int flags) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pwritev2, fd, iov, iovcnt, offset, flags);
+  COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
+  SSIZE_T res = REAL(pwritev2)(fd, iov, iovcnt, offset, flags);
+  if (res > 0) read_iovec(ctx, iov, iovcnt, res);
+  return res;
+}
+#define INIT_PWRITEV2 COMMON_INTERCEPT_FUNCTION(pwritev2)
+#else
+#define INIT_PWRITEV2
+#endif
+
 #include "sanitizer_common_interceptors_netbsd_compat.inc"
 
 namespace __sanitizer {
@@ -10543,9 +10613,10 @@ static void InitializeCommonInterceptors() {
   INIT_PROCCTL
   INIT_UNAME;
   INIT___XUNAME;
-  INIT_HEXDUMP;
   INIT_ARGP_PARSE;
   INIT_CPUSET_GETAFFINITY;
+  INIT_PREADV2;
+  INIT_PWRITEV2;
 
   INIT___PRINTF_CHK;
 }
lib/tsan/sanitizer_common/sanitizer_common_interceptors_format.inc
@@ -547,24 +547,25 @@ static void printf_common(void *ctx, const char *format, va_list aq) {
       continue;
     } else if (size == FSS_STRLEN) {
       if (void *argp = va_arg(aq, void *)) {
+        uptr len;
         if (dir.starredPrecision) {
           // FIXME: properly support starred precision for strings.
-          size = 0;
+          len = 0;
         } else if (dir.fieldPrecision > 0) {
           // Won't read more than "precision" symbols.
-          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
-          if (size < dir.fieldPrecision) size++;
+          len = internal_strnlen((const char *)argp, dir.fieldPrecision);
+          if (len < (uptr)dir.fieldPrecision)
+            len++;
         } else {
           // Whole string will be accessed.
-          size = internal_strlen((const char *)argp) + 1;
+          len = internal_strlen((const char *)argp) + 1;
         }
-        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
+        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
       }
     } else if (size == FSS_WCSLEN) {
       if (void *argp = va_arg(aq, void *)) {
         // FIXME: Properly support wide-character strings (via wcsrtombs).
-        size = 0;
-        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
+        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
       }
     } else {
       // Skip non-pointer args
lib/tsan/sanitizer_common/sanitizer_common_interface.inc
@@ -46,6 +46,7 @@ INTERFACE_FUNCTION(__sanitizer_purge_allocator)
 INTERFACE_FUNCTION(__sanitizer_print_memory_profile)
 INTERFACE_WEAK_FUNCTION(__sanitizer_free_hook)
 INTERFACE_WEAK_FUNCTION(__sanitizer_malloc_hook)
+INTERFACE_WEAK_FUNCTION(__sanitizer_ignore_free_hook)
 // Memintrinsic functions.
 INTERFACE_FUNCTION(__sanitizer_internal_memcpy)
 INTERFACE_FUNCTION(__sanitizer_internal_memmove)
lib/tsan/sanitizer_common/sanitizer_common_interface_posix.inc
@@ -9,6 +9,7 @@
 //===----------------------------------------------------------------------===//
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data)
+INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_frame)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush)
 INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_demangle)
lib/tsan/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -87,8 +87,8 @@ void MaybeStartBackgroudThread() {
   if (!common_flags()->hard_rss_limit_mb &&
       !common_flags()->soft_rss_limit_mb &&
       !common_flags()->heap_profile) return;
-  if (!&real_pthread_create) {
-    VPrintf(1, "%s: real_pthread_create undefined\n", SanitizerToolName);
+  if (!&internal_pthread_create) {
+    VPrintf(1, "%s: internal_pthread_create undefined\n", SanitizerToolName);
     return;  // Can't spawn the thread anyway.
   }
 
@@ -119,8 +119,10 @@ void MaybeStartBackgroudThread() {}
 #endif
 
 void WriteToSyslog(const char *msg) {
+  if (!msg)
+    return;
   InternalScopedString msg_copy;
-  msg_copy.append("%s", msg);
+  msg_copy.Append(msg);
   const char *p = msg_copy.data();
 
   // Print one line at a time.
@@ -167,7 +169,7 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
                      : !MmapFixedNoReserve(beg, size, name)) {
     Report(
         "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-        "Perhaps you're using ulimit -v\n",
+        "Perhaps you're using ulimit -v or ulimit -d\n",
         size);
     Abort();
   }
lib/tsan/sanitizer_common/sanitizer_common_syscalls.inc
@@ -38,6 +38,10 @@
 //          Called before fork syscall.
 //   COMMON_SYSCALL_POST_FORK(long res)
 //          Called after fork syscall.
+//   COMMON_SYSCALL_BLOCKING_START()
+//         Called before blocking syscall.
+//   COMMON_SYSCALL_BLOCKING_END()
+//         Called after blocking syscall.
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
@@ -85,6 +89,16 @@
       {}
 #  endif
 
+#  ifndef COMMON_SYSCALL_BLOCKING_START
+#    define COMMON_SYSCALL_BLOCKING_START() \
+      {}
+#  endif
+
+#  ifndef COMMON_SYSCALL_BLOCKING_END
+#    define COMMON_SYSCALL_BLOCKING_END() \
+      {}
+#  endif
+
 // FIXME: do some kind of PRE_READ for all syscall arguments (int(s) and such).
 
 extern "C" {
@@ -2808,6 +2822,15 @@ PRE_SYSCALL(fchownat)
 POST_SYSCALL(fchownat)
 (long res, long dfd, const void *filename, long user, long group, long flag) {}
 
+PRE_SYSCALL(fchmodat2)(long dfd, const void *filename, long mode, long flag) {
+  if (filename)
+    PRE_READ(filename,
+             __sanitizer::internal_strlen((const char *)filename) + 1);
+}
+
+POST_SYSCALL(fchmodat2)
+(long res, long dfd, const void *filename, long mode, long flag) {}
+
 PRE_SYSCALL(openat)(long dfd, const void *filename, long flags, long mode) {
   if (filename)
     PRE_READ(filename,
@@ -3167,6 +3190,18 @@ POST_SYSCALL(sigaltstack)(long res, void *ss, void *oss) {
     }
   }
 }
+
+PRE_SYSCALL(futex)
+(void *uaddr, long futex_op, long val, void *timeout, void *uaddr2, long val3) {
+  COMMON_SYSCALL_BLOCKING_START();
+}
+
+POST_SYSCALL(futex)
+(long res, void *uaddr, long futex_op, long val, void *timeout, void *uaddr2,
+ long val3) {
+  COMMON_SYSCALL_BLOCKING_END();
+}
+
 }  // extern "C"
 
 #  undef PRE_SYSCALL
lib/tsan/sanitizer_common/sanitizer_dl.cpp
@@ -0,0 +1,37 @@
+//===-- sanitizer_dl.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file has helper functions that depend on libc's dynamic loading
+// introspection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_dl.h"
+
+#include "sanitizer_common/sanitizer_platform.h"
+
+#if SANITIZER_GLIBC
+#  include <dlfcn.h>
+#endif
+
+namespace __sanitizer {
+extern const char *SanitizerToolName;
+
+const char *DladdrSelfFName(void) {
+#if SANITIZER_GLIBC
+  Dl_info info;
+  int ret = dladdr((void *)&SanitizerToolName, &info);
+  if (ret) {
+    return info.dli_fname;
+  }
+#endif
+
+  return nullptr;
+}
+
+}  // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_dl.h
@@ -0,0 +1,26 @@
+//===-- sanitizer_dl.h ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file has helper functions that depend on libc's dynamic loading
+// introspection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_DL_H
+#define SANITIZER_DL_H
+
+namespace __sanitizer {
+
+// Returns the path to the shared object or - in the case of statically linked
+// sanitizers
+// - the main program itself, that contains the sanitizer.
+const char* DladdrSelfFName(void);
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_DL_H
lib/tsan/sanitizer_common/sanitizer_file.cpp
@@ -69,7 +69,7 @@ void ReportFile::ReopenIfNecessary() {
     WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
     WriteToFile(kStderrFd, full_path, internal_strlen(full_path));
     char errmsg[100];
-    internal_snprintf(errmsg, sizeof(errmsg), " (reason: %d)", err);
+    internal_snprintf(errmsg, sizeof(errmsg), " (reason: %d)\n", err);
     WriteToFile(kStderrFd, errmsg, internal_strlen(errmsg));
     Die();
   }
@@ -88,6 +88,8 @@ static void RecursiveCreateParentDirs(char *path) {
       const char *ErrorMsgPrefix = "ERROR: Can't create directory: ";
       WriteToFile(kStderrFd, ErrorMsgPrefix, internal_strlen(ErrorMsgPrefix));
       WriteToFile(kStderrFd, path, internal_strlen(path));
+      const char *ErrorMsgSuffix = "\n";
+      WriteToFile(kStderrFd, ErrorMsgSuffix, internal_strlen(ErrorMsgSuffix));
       Die();
     }
     path[i] = save;
lib/tsan/sanitizer_common/sanitizer_file.h
@@ -84,7 +84,7 @@ bool IsPathSeparator(const char c);
 bool IsAbsolutePath(const char *path);
 // Returns true on success, false on failure.
 bool CreateDir(const char *pathname);
-// Starts a subprocess and returs its pid.
+// Starts a subprocess and returns its pid.
 // If *_fd parameters are not kInvalidFd their corresponding input/output
 // streams will be redirect to the file. The files will always be closed
 // in parent process even in case of an error.
lib/tsan/sanitizer_common/sanitizer_flag_parser.cpp
@@ -19,8 +19,6 @@
 
 namespace __sanitizer {
 
-LowLevelAllocator FlagParser::Alloc;
-
 class UnknownFlags {
   static const int kMaxUnknownFlags = 20;
   const char *unknown_flags_[kMaxUnknownFlags];
@@ -49,7 +47,7 @@ void ReportUnrecognizedFlags() {
 
 char *FlagParser::ll_strndup(const char *s, uptr n) {
   uptr len = internal_strnlen(s, n);
-  char *s2 = (char*)Alloc.Allocate(len + 1);
+  char *s2 = (char *)GetGlobalLowLevelAllocator().Allocate(len + 1);
   internal_memcpy(s2, s, len);
   s2[len] = 0;
   return s2;
@@ -185,7 +183,8 @@ void FlagParser::RegisterHandler(const char *name, FlagHandlerBase *handler,
 }
 
 FlagParser::FlagParser() : n_flags_(0), buf_(nullptr), pos_(0) {
-  flags_ = (Flag *)Alloc.Allocate(sizeof(Flag) * kMaxFlags);
+  flags_ =
+      (Flag *)GetGlobalLowLevelAllocator().Allocate(sizeof(Flag) * kMaxFlags);
 }
 
 }  // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_flag_parser.h
@@ -178,8 +178,6 @@ class FlagParser {
   bool ParseFile(const char *path, bool ignore_missing);
   void PrintFlagDescriptions();
 
-  static LowLevelAllocator Alloc;
-
  private:
   void fatal_error(const char *err);
   bool is_space(char c);
@@ -193,7 +191,7 @@ class FlagParser {
 template <typename T>
 static void RegisterFlag(FlagParser *parser, const char *name, const char *desc,
                          T *var) {
-  FlagHandler<T> *fh = new (FlagParser::Alloc) FlagHandler<T>(var);
+  FlagHandler<T> *fh = new (GetGlobalLowLevelAllocator()) FlagHandler<T>(var);
   parser->RegisterHandler(name, fh, desc);
 }
 
lib/tsan/sanitizer_common/sanitizer_flags.cpp
@@ -108,11 +108,11 @@ class FlagHandlerInclude final : public FlagHandlerBase {
 };
 
 void RegisterIncludeFlags(FlagParser *parser, CommonFlags *cf) {
-  FlagHandlerInclude *fh_include = new (FlagParser::Alloc)
+  FlagHandlerInclude *fh_include = new (GetGlobalLowLevelAllocator())
       FlagHandlerInclude(parser, /*ignore_missing*/ false);
   parser->RegisterHandler("include", fh_include,
                           "read more options from the given file");
-  FlagHandlerInclude *fh_include_if_exists = new (FlagParser::Alloc)
+  FlagHandlerInclude *fh_include_if_exists = new (GetGlobalLowLevelAllocator())
       FlagHandlerInclude(parser, /*ignore_missing*/ true);
   parser->RegisterHandler(
       "include_if_exists", fh_include_if_exists,
lib/tsan/sanitizer_common/sanitizer_flags.inc
@@ -269,3 +269,16 @@ COMMON_FLAG(bool, detect_write_exec, false,
 COMMON_FLAG(bool, test_only_emulate_no_memorymap, false,
             "TEST ONLY fail to read memory mappings to emulate sanitized "
             "\"init\"")
+// With static linking, dladdr((void*)pthread_join) or similar will return the
+// path to the main program. This flag will replace dlopen(<main program,...>
+// with dlopen(NULL,...), which is the correct way to get a handle to the main
+// program.
+COMMON_FLAG(bool, test_only_replace_dlopen_main_program, false,
+            "TEST ONLY replace dlopen(<main program>,...) with dlopen(NULL)")
+
+COMMON_FLAG(bool, enable_symbolizer_markup, SANITIZER_FUCHSIA,
+            "Use sanitizer symbolizer markup, available on Linux "
+            "and always set true for Fuchsia.")
+
+COMMON_FLAG(bool, detect_invalid_join, true,
+            "If set, check invalid joins of threads.")
lib/tsan/sanitizer_common/sanitizer_flat_map.h
@@ -109,6 +109,10 @@ class TwoLevelMap {
     return *AddressSpaceView::LoadWritable(&map2[idx % kSize2]);
   }
 
+  void Lock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Lock(); }
+
+  void Unlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS { mu_.Unlock(); }
+
  private:
   constexpr uptr MmapSize() const {
     return RoundUpTo(kSize2 * sizeof(T), GetPageSizeCached());
lib/tsan/sanitizer_common/sanitizer_freebsd.h
@@ -1,137 +0,0 @@
-//===-- sanitizer_freebsd.h -------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of Sanitizer runtime. It contains FreeBSD-specific
-// definitions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SANITIZER_FREEBSD_H
-#define SANITIZER_FREEBSD_H
-
-#include "sanitizer_internal_defs.h"
-
-// x86-64 FreeBSD 9.2 and older define 'ucontext_t' incorrectly in
-// 32-bit mode.
-#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
-#include <osreldate.h>
-#if __FreeBSD_version <= 902001  // v9.2
-#include <link.h>
-#include <sys/param.h>
-#include <ucontext.h>
-
-namespace __sanitizer {
-
-typedef unsigned long long __xuint64_t;
-
-typedef __int32_t __xregister_t;
-
-typedef struct __xmcontext {
-  __xregister_t mc_onstack;
-  __xregister_t mc_gs;
-  __xregister_t mc_fs;
-  __xregister_t mc_es;
-  __xregister_t mc_ds;
-  __xregister_t mc_edi;
-  __xregister_t mc_esi;
-  __xregister_t mc_ebp;
-  __xregister_t mc_isp;
-  __xregister_t mc_ebx;
-  __xregister_t mc_edx;
-  __xregister_t mc_ecx;
-  __xregister_t mc_eax;
-  __xregister_t mc_trapno;
-  __xregister_t mc_err;
-  __xregister_t mc_eip;
-  __xregister_t mc_cs;
-  __xregister_t mc_eflags;
-  __xregister_t mc_esp;
-  __xregister_t mc_ss;
-
-  int mc_len;
-  int mc_fpformat;
-  int mc_ownedfp;
-  __xregister_t mc_flags;
-
-  int mc_fpstate[128] __aligned(16);
-  __xregister_t mc_fsbase;
-  __xregister_t mc_gsbase;
-  __xregister_t mc_xfpustate;
-  __xregister_t mc_xfpustate_len;
-
-  int mc_spare2[4];
-} xmcontext_t;
-
-typedef struct __xucontext {
-  sigset_t uc_sigmask;
-  xmcontext_t uc_mcontext;
-
-  struct __ucontext *uc_link;
-  stack_t uc_stack;
-  int uc_flags;
-  int __spare__[4];
-} xucontext_t;
-
-struct xkinfo_vmentry {
-  int kve_structsize;
-  int kve_type;
-  __xuint64_t kve_start;
-  __xuint64_t kve_end;
-  __xuint64_t kve_offset;
-  __xuint64_t kve_vn_fileid;
-  __uint32_t kve_vn_fsid;
-  int kve_flags;
-  int kve_resident;
-  int kve_private_resident;
-  int kve_protection;
-  int kve_ref_count;
-  int kve_shadow_count;
-  int kve_vn_type;
-  __xuint64_t kve_vn_size;
-  __uint32_t kve_vn_rdev;
-  __uint16_t kve_vn_mode;
-  __uint16_t kve_status;
-  int _kve_ispare[12];
-  char kve_path[PATH_MAX];
-};
-
-typedef struct {
-  __uint32_t p_type;
-  __uint32_t p_offset;
-  __uint32_t p_vaddr;
-  __uint32_t p_paddr;
-  __uint32_t p_filesz;
-  __uint32_t p_memsz;
-  __uint32_t p_flags;
-  __uint32_t p_align;
-} XElf32_Phdr;
-
-struct xdl_phdr_info {
-  Elf_Addr dlpi_addr;
-  const char *dlpi_name;
-  const XElf32_Phdr *dlpi_phdr;
-  Elf_Half dlpi_phnum;
-  unsigned long long int dlpi_adds;
-  unsigned long long int dlpi_subs;
-  size_t dlpi_tls_modid;
-  void *dlpi_tls_data;
-};
-
-typedef int (*__xdl_iterate_hdr_callback)(struct xdl_phdr_info *, size_t,
-                                          void *);
-typedef int xdl_iterate_phdr_t(__xdl_iterate_hdr_callback, void *);
-
-#define xdl_iterate_phdr(callback, param) \
-  (((xdl_iterate_phdr_t *)dl_iterate_phdr)((callback), (param)))
-
-}  // namespace __sanitizer
-
-#endif  // __FreeBSD_version <= 902001
-#endif  // SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
-
-#endif  // SANITIZER_FREEBSD_H
lib/tsan/sanitizer_common/sanitizer_fuchsia.cpp
@@ -129,6 +129,60 @@ uptr GetMaxVirtualAddress() { return GetMaxUserVirtualAddress(); }
 
 bool ErrorIsOOM(error_t err) { return err == ZX_ERR_NO_MEMORY; }
 
+// For any sanitizer internal that needs to map something which can be unmapped
+// later, first attempt to map to a pre-allocated VMAR. This helps reduce
+// fragmentation from many small anonymous mmap calls. A good value for this
+// VMAR size would be the total size of your typical sanitizer internal objects
+// allocated in an "average" process lifetime. Examples of this include:
+// FakeStack, LowLevelAllocator mappings, TwoLevelMap, InternalMmapVector,
+// StackStore, CreateAsanThread, etc.
+//
+// This is roughly equal to the total sum of sanitizer internal mappings for a
+// large test case.
+constexpr size_t kSanitizerHeapVmarSize = 13ULL << 20;
+static zx_handle_t gSanitizerHeapVmar = ZX_HANDLE_INVALID;
+
+static zx_status_t GetSanitizerHeapVmar(zx_handle_t *vmar) {
+  zx_status_t status = ZX_OK;
+  if (gSanitizerHeapVmar == ZX_HANDLE_INVALID) {
+    CHECK_EQ(kSanitizerHeapVmarSize % GetPageSizeCached(), 0);
+    uintptr_t base;
+    status = _zx_vmar_allocate(
+        _zx_vmar_root_self(),
+        ZX_VM_CAN_MAP_READ | ZX_VM_CAN_MAP_WRITE | ZX_VM_CAN_MAP_SPECIFIC, 0,
+        kSanitizerHeapVmarSize, &gSanitizerHeapVmar, &base);
+  }
+  *vmar = gSanitizerHeapVmar;
+  if (status == ZX_OK)
+    CHECK_NE(gSanitizerHeapVmar, ZX_HANDLE_INVALID);
+  return status;
+}
+
+static zx_status_t TryVmoMapSanitizerVmar(zx_vm_option_t options,
+                                          size_t vmar_offset, zx_handle_t vmo,
+                                          size_t size, uintptr_t *addr,
+                                          zx_handle_t *vmar_used = nullptr) {
+  zx_handle_t vmar;
+  zx_status_t status = GetSanitizerHeapVmar(&vmar);
+  if (status != ZX_OK)
+    return status;
+
+  status = _zx_vmar_map(gSanitizerHeapVmar, options, vmar_offset, vmo,
+                        /*vmo_offset=*/0, size, addr);
+  if (vmar_used)
+    *vmar_used = gSanitizerHeapVmar;
+  if (status == ZX_ERR_NO_RESOURCES || status == ZX_ERR_INVALID_ARGS) {
+    // This means there's no space in the heap VMAR, so fallback to the root
+    // VMAR.
+    status = _zx_vmar_map(_zx_vmar_root_self(), options, vmar_offset, vmo,
+                          /*vmo_offset=*/0, size, addr);
+    if (vmar_used)
+      *vmar_used = _zx_vmar_root_self();
+  }
+
+  return status;
+}
+
 static void *DoAnonymousMmapOrDie(uptr size, const char *mem_type,
                                   bool raw_report, bool die_for_nomem) {
   size = RoundUpTo(size, GetPageSize());
@@ -144,11 +198,9 @@ static void *DoAnonymousMmapOrDie(uptr size, const char *mem_type,
   _zx_object_set_property(vmo, ZX_PROP_NAME, mem_type,
                           internal_strlen(mem_type));
 
-  // TODO(mcgrathr): Maybe allocate a VMAR for all sanitizer heap and use that?
   uintptr_t addr;
-  status =
-      _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
-                   vmo, 0, size, &addr);
+  status = TryVmoMapSanitizerVmar(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE,
+                                  /*vmar_offset=*/0, vmo, size, &addr);
   _zx_handle_close(vmo);
 
   if (status != ZX_OK) {
@@ -226,27 +278,32 @@ static uptr DoMmapFixedOrDie(zx_handle_t vmar, uptr fixed_addr, uptr map_size,
 
 uptr ReservedAddressRange::Map(uptr fixed_addr, uptr map_size,
                                const char *name) {
-  return DoMmapFixedOrDie(os_handle_, fixed_addr, map_size, base_, name_,
-                          false);
+  return DoMmapFixedOrDie(os_handle_, fixed_addr, map_size, base_,
+                          name ? name : name_, false);
 }
 
 uptr ReservedAddressRange::MapOrDie(uptr fixed_addr, uptr map_size,
                                     const char *name) {
-  return DoMmapFixedOrDie(os_handle_, fixed_addr, map_size, base_, name_, true);
+  return DoMmapFixedOrDie(os_handle_, fixed_addr, map_size, base_,
+                          name ? name : name_, true);
 }
 
-void UnmapOrDieVmar(void *addr, uptr size, zx_handle_t target_vmar) {
+void UnmapOrDieVmar(void *addr, uptr size, zx_handle_t target_vmar,
+                    bool raw_report) {
   if (!addr || !size)
     return;
   size = RoundUpTo(size, GetPageSize());
 
   zx_status_t status =
       _zx_vmar_unmap(target_vmar, reinterpret_cast<uintptr_t>(addr), size);
-  if (status != ZX_OK) {
-    Report("ERROR: %s failed to deallocate 0x%zx (%zd) bytes at address %p\n",
-           SanitizerToolName, size, size, addr);
-    CHECK("unable to unmap" && 0);
+  if (status == ZX_ERR_INVALID_ARGS && target_vmar == gSanitizerHeapVmar) {
+    // If there wasn't any space in the heap vmar, the fallback was the root
+    // vmar.
+    status = _zx_vmar_unmap(_zx_vmar_root_self(),
+                            reinterpret_cast<uintptr_t>(addr), size);
   }
+  if (status != ZX_OK)
+    ReportMunmapFailureAndDie(addr, size, status, raw_report);
 
   DecreaseTotalMmap(size);
 }
@@ -268,7 +325,8 @@ void ReservedAddressRange::Unmap(uptr addr, uptr size) {
   }
   // Partial unmapping does not affect the fact that the initial range is still
   // reserved, and the resulting unmapped memory can't be reused.
-  UnmapOrDieVmar(reinterpret_cast<void *>(addr), size, vmar);
+  UnmapOrDieVmar(reinterpret_cast<void *>(addr), size, vmar,
+                 /*raw_report=*/false);
 }
 
 // This should never be called.
@@ -307,17 +365,16 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
   _zx_object_set_property(vmo, ZX_PROP_NAME, mem_type,
                           internal_strlen(mem_type));
 
-  // TODO(mcgrathr): Maybe allocate a VMAR for all sanitizer heap and use that?
-
   // Map a larger size to get a chunk of address space big enough that
   // it surely contains an aligned region of the requested size.  Then
   // overwrite the aligned middle portion with a mapping from the
   // beginning of the VMO, and unmap the excess before and after.
   size_t map_size = size + alignment;
   uintptr_t addr;
-  status =
-      _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
-                   vmo, 0, map_size, &addr);
+  zx_handle_t vmar_used;
+  status = TryVmoMapSanitizerVmar(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE,
+                                  /*vmar_offset=*/0, vmo, map_size, &addr,
+                                  &vmar_used);
   if (status == ZX_OK) {
     uintptr_t map_addr = addr;
     uintptr_t map_end = map_addr + map_size;
@@ -325,12 +382,12 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
     uintptr_t end = addr + size;
     if (addr != map_addr) {
       zx_info_vmar_t info;
-      status = _zx_object_get_info(_zx_vmar_root_self(), ZX_INFO_VMAR, &info,
-                                   sizeof(info), NULL, NULL);
+      status = _zx_object_get_info(vmar_used, ZX_INFO_VMAR, &info, sizeof(info),
+                                   NULL, NULL);
       if (status == ZX_OK) {
         uintptr_t new_addr;
         status = _zx_vmar_map(
-            _zx_vmar_root_self(),
+            vmar_used,
             ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_SPECIFIC_OVERWRITE,
             addr - info.base, vmo, 0, size, &new_addr);
         if (status == ZX_OK)
@@ -338,9 +395,9 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
       }
     }
     if (status == ZX_OK && addr != map_addr)
-      status = _zx_vmar_unmap(_zx_vmar_root_self(), map_addr, addr - map_addr);
+      status = _zx_vmar_unmap(vmar_used, map_addr, addr - map_addr);
     if (status == ZX_OK && end != map_end)
-      status = _zx_vmar_unmap(_zx_vmar_root_self(), end, map_end - end);
+      status = _zx_vmar_unmap(vmar_used, end, map_end - end);
   }
   _zx_handle_close(vmo);
 
@@ -355,8 +412,8 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
   return reinterpret_cast<void *>(addr);
 }
 
-void UnmapOrDie(void *addr, uptr size) {
-  UnmapOrDieVmar(addr, size, _zx_vmar_root_self());
+void UnmapOrDie(void *addr, uptr size, bool raw_report) {
+  UnmapOrDieVmar(addr, size, gSanitizerHeapVmar, raw_report);
 }
 
 void ReleaseMemoryPagesToOS(uptr beg, uptr end) {
lib/tsan/sanitizer_common/sanitizer_hash.h
@@ -62,6 +62,6 @@ class MurMur2Hash64Builder {
     return x;
   }
 };
-}  //namespace __sanitizer
+}  // namespace __sanitizer
 
 #endif  // SANITIZER_HASH_H
lib/tsan/sanitizer_common/sanitizer_internal_defs.h
@@ -15,6 +15,11 @@
 #include "sanitizer_platform.h"
 #include "sanitizer_redefine_builtins.h"
 
+// GCC does not understand __has_feature.
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+
 #ifndef SANITIZER_DEBUG
 # define SANITIZER_DEBUG 0
 #endif
@@ -30,13 +35,20 @@
 # define SANITIZER_INTERFACE_ATTRIBUTE __declspec(dllexport)
 #endif
 # define SANITIZER_WEAK_ATTRIBUTE
+#  define SANITIZER_WEAK_IMPORT
 #elif SANITIZER_GO
 # define SANITIZER_INTERFACE_ATTRIBUTE
 # define SANITIZER_WEAK_ATTRIBUTE
+#  define SANITIZER_WEAK_IMPORT
 #else
 # define SANITIZER_INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
 # define SANITIZER_WEAK_ATTRIBUTE  __attribute__((weak))
-#endif
+#  if SANITIZER_APPLE
+#    define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak_import))
+#  else
+#    define SANITIZER_WEAK_IMPORT extern "C" SANITIZER_WEAK_ATTRIBUTE
+#  endif  // SANITIZER_APPLE
+#endif    // SANITIZER_WINDOWS
 
 //--------------------------- WEAK FUNCTIONS ---------------------------------//
 // When working with weak functions, to simplify the code and make it more
@@ -179,15 +191,19 @@ typedef uptr OFF_T;
 #endif
 typedef u64  OFF64_T;
 
-#if (SANITIZER_WORDSIZE == 64) || SANITIZER_APPLE
-typedef uptr operator_new_size_type;
+#ifdef __SIZE_TYPE__
+typedef __SIZE_TYPE__ usize;
 #else
-# if defined(__s390__) && !defined(__s390x__)
-// Special case: 31-bit s390 has unsigned long as size_t.
-typedef unsigned long operator_new_size_type;
-# else
-typedef u32 operator_new_size_type;
-# endif
+// Since we use this for operator new, usize must match the real size_t, but on
+// 32-bit Windows the definition of uptr does not actually match uintptr_t or
+// size_t because we are working around typedef mismatches for the (S)SIZE_T
+// types used in interception.h.
+// Until the definition of uptr has been fixed we have to special case Win32.
+#  if SANITIZER_WINDOWS && SANITIZER_WORDSIZE == 32
+typedef unsigned int usize;
+#  else
+typedef uptr usize;
+#  endif
 #endif
 
 typedef u64 tid_t;
lib/tsan/sanitizer_common/sanitizer_libc.cpp
@@ -199,6 +199,14 @@ char *internal_strncat(char *dst, const char *src, uptr n) {
   return dst;
 }
 
+wchar_t *internal_wcscpy(wchar_t *dst, const wchar_t *src) {
+  wchar_t *dst_it = dst;
+  do {
+    *dst_it++ = *src++;
+  } while (*src);
+  return dst;
+}
+
 uptr internal_strlcpy(char *dst, const char *src, uptr maxlen) {
   const uptr srclen = internal_strlen(src);
   if (srclen < maxlen) {
@@ -218,6 +226,14 @@ char *internal_strncpy(char *dst, const char *src, uptr n) {
   return dst;
 }
 
+wchar_t *internal_wcsncpy(wchar_t *dst, const wchar_t *src, uptr n) {
+  uptr i;
+  for (i = 0; i < n && src[i]; ++i)
+    dst[i] = src[i];
+  internal_memset(dst + i, 0, (n - i) * sizeof(wchar_t));
+  return dst;
+}
+
 uptr internal_strnlen(const char *s, uptr maxlen) {
   uptr i = 0;
   while (i < maxlen && s[i]) i++;
lib/tsan/sanitizer_common/sanitizer_libc.h
@@ -71,7 +71,8 @@ int internal_snprintf(char *buffer, uptr length, const char *format, ...)
     FORMAT(3, 4);
 uptr internal_wcslen(const wchar_t *s);
 uptr internal_wcsnlen(const wchar_t *s, uptr maxlen);
-
+wchar_t *internal_wcscpy(wchar_t *dst, const wchar_t *src);
+wchar_t *internal_wcsncpy(wchar_t *dst, const wchar_t *src, uptr maxlen);
 // Return true if all bytes in [mem, mem+size) are zero.
 // Optimized for the case when the result is true.
 bool mem_is_zero(const char *mem, uptr size);
lib/tsan/sanitizer_common/sanitizer_libignore.cpp
@@ -105,8 +105,8 @@ void LibIgnore::OnLibraryLoaded(const char *name) {
           continue;
         if (IsPcInstrumented(range.beg) && IsPcInstrumented(range.end - 1))
           continue;
-        VReport(1, "Adding instrumented range 0x%zx-0x%zx from library '%s'\n",
-                range.beg, range.end, mod.full_name());
+        VReport(1, "Adding instrumented range %p-%p from library '%s'\n",
+                (void *)range.beg, (void *)range.end, mod.full_name());
         const uptr idx =
             atomic_load(&instrumented_ranges_count_, memory_order_relaxed);
         CHECK_LT(idx, ARRAY_SIZE(instrumented_code_ranges_));
lib/tsan/sanitizer_common/sanitizer_linux.cpp
@@ -16,101 +16,105 @@
 #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
     SANITIZER_SOLARIS
 
-#include "sanitizer_common.h"
-#include "sanitizer_flags.h"
-#include "sanitizer_getauxval.h"
-#include "sanitizer_internal_defs.h"
-#include "sanitizer_libc.h"
-#include "sanitizer_linux.h"
-#include "sanitizer_mutex.h"
-#include "sanitizer_placement_new.h"
-#include "sanitizer_procmaps.h"
-
-#if SANITIZER_LINUX && !SANITIZER_GO
-#include <asm/param.h>
-#endif
+#  include "sanitizer_common.h"
+#  include "sanitizer_flags.h"
+#  include "sanitizer_getauxval.h"
+#  include "sanitizer_internal_defs.h"
+#  include "sanitizer_libc.h"
+#  include "sanitizer_linux.h"
+#  include "sanitizer_mutex.h"
+#  include "sanitizer_placement_new.h"
+#  include "sanitizer_procmaps.h"
+
+#  if SANITIZER_LINUX && !SANITIZER_GO
+#    include <asm/param.h>
+#  endif
 
 // For mips64, syscall(__NR_stat) fills the buffer in the 'struct kernel_stat'
 // format. Struct kernel_stat is defined as 'struct stat' in asm/stat.h. To
 // access stat from asm/stat.h, without conflicting with definition in
-// sys/stat.h, we use this trick.
-#if SANITIZER_MIPS64
-#include <asm/unistd.h>
-#include <sys/types.h>
-#define stat kernel_stat
-#if SANITIZER_GO
-#undef st_atime
-#undef st_mtime
-#undef st_ctime
-#define st_atime st_atim
-#define st_mtime st_mtim
-#define st_ctime st_ctim
-#endif
-#include <asm/stat.h>
-#undef stat
-#endif
+// sys/stat.h, we use this trick.  sparc64 is similar, using
+// syscall(__NR_stat64) and struct kernel_stat64.
+#  if SANITIZER_LINUX && (SANITIZER_MIPS64 || SANITIZER_SPARC64)
+#    include <asm/unistd.h>
+#    include <sys/types.h>
+#    define stat kernel_stat
+#    if SANITIZER_SPARC64
+#      define stat64 kernel_stat64
+#    endif
+#    if SANITIZER_GO
+#      undef st_atime
+#      undef st_mtime
+#      undef st_ctime
+#      define st_atime st_atim
+#      define st_mtime st_mtim
+#      define st_ctime st_ctim
+#    endif
+#    include <asm/stat.h>
+#    undef stat
+#    undef stat64
+#  endif
 
-#include <dlfcn.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <link.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include <sys/param.h>
-#if !SANITIZER_SOLARIS
-#include <sys/ptrace.h>
-#endif
-#include <sys/resource.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <ucontext.h>
-#include <unistd.h>
-
-#if SANITIZER_LINUX
-#include <sys/utsname.h>
-#endif
+#  include <dlfcn.h>
+#  include <errno.h>
+#  include <fcntl.h>
+#  include <link.h>
+#  include <pthread.h>
+#  include <sched.h>
+#  include <signal.h>
+#  include <sys/mman.h>
+#  if !SANITIZER_SOLARIS
+#    include <sys/ptrace.h>
+#  endif
+#  include <sys/resource.h>
+#  include <sys/stat.h>
+#  include <sys/syscall.h>
+#  include <sys/time.h>
+#  include <sys/types.h>
+#  include <ucontext.h>
+#  include <unistd.h>
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
-#include <sys/personality.h>
-#endif
+#  if SANITIZER_LINUX
+#    include <sys/utsname.h>
+#  endif
 
-#if SANITIZER_LINUX && defined(__loongarch__)
-#  include <sys/sysmacros.h>
-#endif
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID
+#    include <sys/personality.h>
+#  endif
 
-#if SANITIZER_FREEBSD
-#include <sys/exec.h>
-#include <sys/procctl.h>
-#include <sys/sysctl.h>
-#include <machine/atomic.h>
+#  if SANITIZER_LINUX && defined(__loongarch__)
+#    include <sys/sysmacros.h>
+#  endif
+
+#  if SANITIZER_FREEBSD
+#    include <machine/atomic.h>
+#    include <sys/exec.h>
+#    include <sys/procctl.h>
+#    include <sys/sysctl.h>
 extern "C" {
 // <sys/umtx.h> must be included after <errno.h> and <sys/types.h> on
 // FreeBSD 9.2 and 10.0.
-#include <sys/umtx.h>
+#    include <sys/umtx.h>
 }
-#include <sys/thr.h>
-#endif  // SANITIZER_FREEBSD
+#    include <sys/thr.h>
+#  endif  // SANITIZER_FREEBSD
 
-#if SANITIZER_NETBSD
-#include <limits.h>  // For NAME_MAX
-#include <sys/sysctl.h>
-#include <sys/exec.h>
+#  if SANITIZER_NETBSD
+#    include <limits.h>  // For NAME_MAX
+#    include <sys/exec.h>
+#    include <sys/sysctl.h>
 extern struct ps_strings *__ps_strings;
-#endif  // SANITIZER_NETBSD
+#  endif  // SANITIZER_NETBSD
 
-#if SANITIZER_SOLARIS
-#include <stdlib.h>
-#include <thread.h>
-#define environ _environ
-#endif
+#  if SANITIZER_SOLARIS
+#    include <stdlib.h>
+#    include <thread.h>
+#    define environ _environ
+#  endif
 
 extern char **environ;
 
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
 // <linux/time.h>
 struct kernel_timeval {
   long tv_sec;
@@ -123,36 +127,32 @@ const int FUTEX_WAKE = 1;
 const int FUTEX_PRIVATE_FLAG = 128;
 const int FUTEX_WAIT_PRIVATE = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
 const int FUTEX_WAKE_PRIVATE = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;
-#endif  // SANITIZER_LINUX
+#  endif  // SANITIZER_LINUX
 
 // Are we using 32-bit or 64-bit Linux syscalls?
 // x32 (which defines __x86_64__) has SANITIZER_WORDSIZE == 32
 // but it still needs to use 64-bit syscalls.
-#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \
-                        SANITIZER_WORDSIZE == 64 ||                      \
-                        (defined(__mips__) && _MIPS_SIM == _ABIN32))
-# define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1
-#else
-# define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
-#endif
+#  if SANITIZER_LINUX && (defined(__x86_64__) || defined(__powerpc64__) || \
+                          SANITIZER_WORDSIZE == 64 ||                      \
+                          (defined(__mips__) && _MIPS_SIM == _ABIN32))
+#    define SANITIZER_LINUX_USES_64BIT_SYSCALLS 1
+#  else
+#    define SANITIZER_LINUX_USES_64BIT_SYSCALLS 0
+#  endif
 
-// Note : FreeBSD had implemented both
-// Linux apis, available from
-// future 12.x version most likely
-#if SANITIZER_LINUX && defined(__NR_getrandom)
-# if !defined(GRND_NONBLOCK)
-#  define GRND_NONBLOCK 1
-# endif
-# define SANITIZER_USE_GETRANDOM 1
-#else
-# define SANITIZER_USE_GETRANDOM 0
-#endif  // SANITIZER_LINUX && defined(__NR_getrandom)
-
-#if SANITIZER_FREEBSD && __FreeBSD_version >= 1200000
-#  define SANITIZER_USE_GETENTROPY 1
-#else
-#  define SANITIZER_USE_GETENTROPY 0
-#endif
+// Note : FreeBSD implemented both Linux and OpenBSD apis.
+#  if SANITIZER_LINUX && defined(__NR_getrandom)
+#    if !defined(GRND_NONBLOCK)
+#      define GRND_NONBLOCK 1
+#    endif
+#    define SANITIZER_USE_GETRANDOM 1
+#  else
+#    define SANITIZER_USE_GETRANDOM 0
+#  endif  // SANITIZER_LINUX && defined(__NR_getrandom)
+
+#  if SANITIZER_FREEBSD
+#    define SANITIZER_USE_GETENTROPY 1
+#  endif
 
 namespace __sanitizer {
 
@@ -160,6 +160,7 @@ void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset) {
   CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, set, oldset));
 }
 
+// Block asynchronous signals
 void BlockSignals(__sanitizer_sigset_t *oldset) {
   __sanitizer_sigset_t set;
   internal_sigfillset(&set);
@@ -174,7 +175,17 @@ void BlockSignals(__sanitizer_sigset_t *oldset) {
   // If this signal is blocked, such calls cannot be handled and the process may
   // hang.
   internal_sigdelset(&set, 31);
+
+  // Don't block synchronous signals
+  internal_sigdelset(&set, SIGSEGV);
+  internal_sigdelset(&set, SIGBUS);
+  internal_sigdelset(&set, SIGILL);
+  internal_sigdelset(&set, SIGTRAP);
+  internal_sigdelset(&set, SIGABRT);
+  internal_sigdelset(&set, SIGFPE);
+  internal_sigdelset(&set, SIGPIPE);
 #  endif
+
   SetSigProcMask(&set, oldset);
 }
 
@@ -203,33 +214,33 @@ ScopedBlockSignals::~ScopedBlockSignals() { SetSigProcMask(&saved_, nullptr); }
 #  endif
 
 // --------------- sanitizer_libc.h
-#if !SANITIZER_SOLARIS && !SANITIZER_NETBSD
-#if !SANITIZER_S390
+#  if !SANITIZER_SOLARIS && !SANITIZER_NETBSD
+#    if !SANITIZER_S390
 uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
                    u64 offset) {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+#      if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
   return internal_syscall(SYSCALL(mmap), (uptr)addr, length, prot, flags, fd,
                           offset);
-#else
+#      else
   // mmap2 specifies file offset in 4096-byte units.
   CHECK(IsAligned(offset, 4096));
   return internal_syscall(SYSCALL(mmap2), addr, length, prot, flags, fd,
-                          offset / 4096);
-#endif
+                          (OFF_T)(offset / 4096));
+#      endif
 }
-#endif // !SANITIZER_S390
+#    endif  // !SANITIZER_S390
 
 uptr internal_munmap(void *addr, uptr length) {
   return internal_syscall(SYSCALL(munmap), (uptr)addr, length);
 }
 
-#if SANITIZER_LINUX
+#    if SANITIZER_LINUX
 uptr internal_mremap(void *old_address, uptr old_size, uptr new_size, int flags,
                      void *new_address) {
   return internal_syscall(SYSCALL(mremap), (uptr)old_address, old_size,
                           new_size, flags, (uptr)new_address);
 }
-#endif
+#    endif
 
 int internal_mprotect(void *addr, uptr length, int prot) {
   return internal_syscall(SYSCALL(mprotect), (uptr)addr, length, prot);
@@ -239,25 +250,23 @@ int internal_madvise(uptr addr, uptr length, int advice) {
   return internal_syscall(SYSCALL(madvise), addr, length, advice);
 }
 
-uptr internal_close(fd_t fd) {
-  return internal_syscall(SYSCALL(close), fd);
-}
+uptr internal_close(fd_t fd) { return internal_syscall(SYSCALL(close), fd); }
 
 uptr internal_open(const char *filename, int flags) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags);
-#else
+#    else
   return internal_syscall(SYSCALL(open), (uptr)filename, flags);
-#endif
+#    endif
 }
 
 uptr internal_open(const char *filename, int flags, u32 mode) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(openat), AT_FDCWD, (uptr)filename, flags,
                           mode);
-#else
+#    else
   return internal_syscall(SYSCALL(open), (uptr)filename, flags, mode);
-#endif
+#    endif
 }
 
 uptr internal_read(fd_t fd, void *buf, uptr count) {
@@ -276,12 +285,12 @@ uptr internal_write(fd_t fd, const void *buf, uptr count) {
 
 uptr internal_ftruncate(fd_t fd, uptr size) {
   sptr res;
-  HANDLE_EINTR(res, (sptr)internal_syscall(SYSCALL(ftruncate), fd,
-               (OFF_T)size));
+  HANDLE_EINTR(res,
+               (sptr)internal_syscall(SYSCALL(ftruncate), fd, (OFF_T)size));
   return res;
 }
 
-#if (!SANITIZER_LINUX_USES_64BIT_SYSCALLS || SANITIZER_SPARC) && SANITIZER_LINUX
+#    if !SANITIZER_LINUX_USES_64BIT_SYSCALLS && SANITIZER_LINUX
 static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
@@ -298,9 +307,9 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) {
   out->st_mtime = in->st_mtime;
   out->st_ctime = in->st_ctime;
 }
-#endif
+#    endif
 
-#if SANITIZER_LINUX && defined(__loongarch__)
+#    if SANITIZER_LINUX && defined(__loongarch__)
 static void statx_to_stat(struct statx *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = makedev(in->stx_dev_major, in->stx_dev_minor);
@@ -320,27 +329,32 @@ static void statx_to_stat(struct statx *in, struct stat *out) {
   out->st_ctime = in->stx_ctime.tv_sec;
   out->st_ctim.tv_nsec = in->stx_ctime.tv_nsec;
 }
-#endif
+#    endif
 
-#if SANITIZER_MIPS64
+#    if SANITIZER_MIPS64 || SANITIZER_SPARC64
+#      if SANITIZER_MIPS64
+typedef struct kernel_stat kstat_t;
+#      else
+typedef struct kernel_stat64 kstat_t;
+#      endif
 // Undefine compatibility macros from <sys/stat.h>
 // so that they would not clash with the kernel_stat
 // st_[a|m|c]time fields
-#if !SANITIZER_GO
-#undef st_atime
-#undef st_mtime
-#undef st_ctime
-#endif
-#if defined(SANITIZER_ANDROID)
+#      if !SANITIZER_GO
+#        undef st_atime
+#        undef st_mtime
+#        undef st_ctime
+#      endif
+#      if defined(SANITIZER_ANDROID)
 // Bionic sys/stat.h defines additional macros
 // for compatibility with the old NDKs and
 // they clash with the kernel_stat structure
 // st_[a|m|c]time_nsec fields.
-#undef st_atime_nsec
-#undef st_mtime_nsec
-#undef st_ctime_nsec
-#endif
-static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+#        undef st_atime_nsec
+#        undef st_mtime_nsec
+#        undef st_ctime_nsec
+#      endif
+static void kernel_stat_to_stat(kstat_t *in, struct stat *out) {
   internal_memset(out, 0, sizeof(*out));
   out->st_dev = in->st_dev;
   out->st_ino = in->st_ino;
@@ -352,96 +366,113 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
   out->st_size = in->st_size;
   out->st_blksize = in->st_blksize;
   out->st_blocks = in->st_blocks;
-#if defined(__USE_MISC)     || \
-    defined(__USE_XOPEN2K8) || \
-    defined(SANITIZER_ANDROID)
+#      if defined(__USE_MISC) || defined(__USE_XOPEN2K8) || \
+          defined(SANITIZER_ANDROID)
   out->st_atim.tv_sec = in->st_atime;
   out->st_atim.tv_nsec = in->st_atime_nsec;
   out->st_mtim.tv_sec = in->st_mtime;
   out->st_mtim.tv_nsec = in->st_mtime_nsec;
   out->st_ctim.tv_sec = in->st_ctime;
   out->st_ctim.tv_nsec = in->st_ctime_nsec;
-#else
+#      else
   out->st_atime = in->st_atime;
   out->st_atimensec = in->st_atime_nsec;
   out->st_mtime = in->st_mtime;
   out->st_mtimensec = in->st_mtime_nsec;
   out->st_ctime = in->st_ctime;
   out->st_atimensec = in->st_ctime_nsec;
-#endif
+#      endif
 }
-#endif
+#    endif
 
 uptr internal_stat(const char *path, void *buf) {
-#  if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf, 0);
-#  elif SANITIZER_LINUX
-#    if defined(__loongarch__)
+#    elif SANITIZER_LINUX
+#      if defined(__loongarch__)
   struct statx bufx;
   int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path,
                              AT_NO_AUTOMOUNT, STATX_BASIC_STATS, (uptr)&bufx);
   statx_to_stat(&bufx, (struct stat *)buf);
   return res;
-#    elif (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 ||    \
-           (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
-        !SANITIZER_SPARC
+#      elif (SANITIZER_WORDSIZE == 64 || SANITIZER_X32 ||    \
+             (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
+          !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           0);
-#    else
+#      elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, 0);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
+#      else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
                              (uptr)&buf64, 0);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#    endif
-#  else
+#      endif
+#    else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(stat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#  endif
+#    endif
 }
 
 uptr internal_lstat(const char *path, void *buf) {
-#  if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
-#  elif SANITIZER_LINUX
-#    if defined(__loongarch__)
+#    elif SANITIZER_LINUX
+#      if defined(__loongarch__)
   struct statx bufx;
   int res = internal_syscall(SYSCALL(statx), AT_FDCWD, (uptr)path,
                              AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT,
                              STATX_BASIC_STATS, (uptr)&bufx);
   statx_to_stat(&bufx, (struct stat *)buf);
   return res;
-#    elif (defined(_LP64) || SANITIZER_X32 ||              \
-           (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
-        !SANITIZER_SPARC
+#      elif (defined(_LP64) || SANITIZER_X32 ||              \
+             (defined(__mips__) && _MIPS_SIM == _ABIN32)) && \
+          !SANITIZER_SPARC
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
-#    else
+#      elif SANITIZER_SPARC64
+  kstat_t buf64;
+  int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
+                             (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
+  kernel_stat_to_stat(&buf64, (struct stat *)buf);
+  return res;
+#      else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstatat64), AT_FDCWD, (uptr)path,
                              (uptr)&buf64, AT_SYMLINK_NOFOLLOW);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#    endif
-#  else
+#      endif
+#    else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(lstat64), path, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#  endif
+#    endif
 }
 
 uptr internal_fstat(fd_t fd, void *buf) {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
-#if SANITIZER_MIPS64
+#    if SANITIZER_FREEBSD || SANITIZER_LINUX_USES_64BIT_SYSCALLS
+#      if SANITIZER_MIPS64
   // For mips64, fstat syscall fills buffer in the format of kernel_stat
-  struct kernel_stat kbuf;
+  kstat_t kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
   return res;
+#      elif SANITIZER_LINUX && SANITIZER_SPARC64
+  // For sparc64, fstat64 syscall fills buffer in the format of kernel_stat64
+  kstat_t kbuf;
+  int res = internal_syscall(SYSCALL(fstat64), fd, &kbuf);
+  kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+  return res;
 #      elif SANITIZER_LINUX && defined(__loongarch__)
   struct statx bufx;
   int res = internal_syscall(SYSCALL(statx), fd, "", AT_EMPTY_PATH,
@@ -451,12 +482,12 @@ uptr internal_fstat(fd_t fd, void *buf) {
 #      else
   return internal_syscall(SYSCALL(fstat), fd, (uptr)buf);
 #      endif
-#else
+#    else
   struct stat64 buf64;
   int res = internal_syscall(SYSCALL(fstat64), fd, &buf64);
   stat64_to_stat(&buf64, (struct stat *)buf);
   return res;
-#endif
+#    endif
 }
 
 uptr internal_filesize(fd_t fd) {
@@ -466,50 +497,46 @@ uptr internal_filesize(fd_t fd) {
   return (uptr)st.st_size;
 }
 
-uptr internal_dup(int oldfd) {
-  return internal_syscall(SYSCALL(dup), oldfd);
-}
+uptr internal_dup(int oldfd) { return internal_syscall(SYSCALL(dup), oldfd); }
 
 uptr internal_dup2(int oldfd, int newfd) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(dup3), oldfd, newfd, 0);
-#else
+#    else
   return internal_syscall(SYSCALL(dup2), oldfd, newfd);
-#endif
+#    endif
 }
 
 uptr internal_readlink(const char *path, char *buf, uptr bufsize) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(readlinkat), AT_FDCWD, (uptr)path, (uptr)buf,
                           bufsize);
-#else
+#    else
   return internal_syscall(SYSCALL(readlink), (uptr)path, (uptr)buf, bufsize);
-#endif
+#    endif
 }
 
 uptr internal_unlink(const char *path) {
 #    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(unlinkat), AT_FDCWD, (uptr)path, 0);
-#else
+#    else
   return internal_syscall(SYSCALL(unlink), (uptr)path);
-#endif
+#    endif
 }
 
 uptr internal_rename(const char *oldpath, const char *newpath) {
-#  if (defined(__riscv) || defined(__loongarch__)) && defined(__linux__)
+#    if (defined(__riscv) || defined(__loongarch__)) && defined(__linux__)
   return internal_syscall(SYSCALL(renameat2), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath, 0);
-#  elif SANITIZER_LINUX
+#    elif SANITIZER_LINUX
   return internal_syscall(SYSCALL(renameat), AT_FDCWD, (uptr)oldpath, AT_FDCWD,
                           (uptr)newpath);
-#  else
+#    else
   return internal_syscall(SYSCALL(rename), (uptr)oldpath, (uptr)newpath);
-#  endif
+#    endif
 }
 
-uptr internal_sched_yield() {
-  return internal_syscall(SYSCALL(sched_yield));
-}
+uptr internal_sched_yield() { return internal_syscall(SYSCALL(sched_yield)); }
 
 void internal_usleep(u64 useconds) {
   struct timespec ts;
@@ -523,18 +550,18 @@ uptr internal_execve(const char *filename, char *const argv[],
   return internal_syscall(SYSCALL(execve), (uptr)filename, (uptr)argv,
                           (uptr)envp);
 }
-#endif  // !SANITIZER_SOLARIS && !SANITIZER_NETBSD
+#  endif  // !SANITIZER_SOLARIS && !SANITIZER_NETBSD
 
-#if !SANITIZER_NETBSD
+#  if !SANITIZER_NETBSD
 void internal__exit(int exitcode) {
-#if SANITIZER_FREEBSD || SANITIZER_SOLARIS
+#    if SANITIZER_FREEBSD || SANITIZER_SOLARIS
   internal_syscall(SYSCALL(exit), exitcode);
-#else
+#    else
   internal_syscall(SYSCALL(exit_group), exitcode);
-#endif
+#    endif
   Die();  // Unreachable.
 }
-#endif  // !SANITIZER_NETBSD
+#  endif  // !SANITIZER_NETBSD
 
 // ----------------- sanitizer_common.h
 bool FileExists(const char *filename) {
@@ -556,30 +583,32 @@ bool DirExists(const char *path) {
 
 #  if !SANITIZER_NETBSD
 tid_t GetTid() {
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   long Tid;
   thr_self(&Tid);
   return Tid;
-#elif SANITIZER_SOLARIS
+#    elif SANITIZER_SOLARIS
   return thr_self();
-#else
+#    else
   return internal_syscall(SYSCALL(gettid));
-#endif
+#    endif
 }
 
 int TgKill(pid_t pid, tid_t tid, int sig) {
-#if SANITIZER_LINUX
+#    if SANITIZER_LINUX
   return internal_syscall(SYSCALL(tgkill), pid, tid, sig);
-#elif SANITIZER_FREEBSD
+#    elif SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(thr_kill2), pid, tid, sig);
-#elif SANITIZER_SOLARIS
+#    elif SANITIZER_SOLARIS
   (void)pid;
-  return thr_kill(tid, sig);
-#endif
+  errno = thr_kill(tid, sig);
+  // TgKill is expected to return -1 on error, not an errno.
+  return errno != 0 ? -1 : 0;
+#    endif
 }
-#endif
+#  endif
 
-#if SANITIZER_GLIBC
+#  if SANITIZER_GLIBC
 u64 NanoTime() {
   kernel_timeval tv;
   internal_memset(&tv, 0, sizeof(tv));
@@ -590,19 +619,19 @@ u64 NanoTime() {
 uptr internal_clock_gettime(__sanitizer_clockid_t clk_id, void *tp) {
   return internal_syscall(SYSCALL(clock_gettime), clk_id, tp);
 }
-#elif !SANITIZER_SOLARIS && !SANITIZER_NETBSD
+#  elif !SANITIZER_SOLARIS && !SANITIZER_NETBSD
 u64 NanoTime() {
   struct timespec ts;
   clock_gettime(CLOCK_REALTIME, &ts);
   return (u64)ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec;
 }
-#endif
+#  endif
 
 // Like getenv, but reads env directly from /proc (on Linux) or parses the
 // 'environ' array (on some others) and does not use libc. This function
 // should be called first inside __asan_init.
 const char *GetEnv(const char *name) {
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_SOLARIS
+#  if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_SOLARIS
   if (::environ != 0) {
     uptr NameLen = internal_strlen(name);
     for (char **Env = ::environ; *Env != 0; Env++) {
@@ -611,7 +640,7 @@ const char *GetEnv(const char *name) {
     }
   }
   return 0;  // Not found.
-#elif SANITIZER_LINUX
+#  elif SANITIZER_LINUX
   static char *environ;
   static uptr len;
   static bool inited;
@@ -621,13 +650,13 @@ const char *GetEnv(const char *name) {
     if (!ReadFileToBuffer("/proc/self/environ", &environ, &environ_size, &len))
       environ = nullptr;
   }
-  if (!environ || len == 0) return nullptr;
+  if (!environ || len == 0)
+    return nullptr;
   uptr namelen = internal_strlen(name);
   const char *p = environ;
   while (*p != '\0') {  // will happen at the \0\0 that terminates the buffer
     // proc file has the format NAME=value\0NAME=value\0NAME=value\0...
-    const char* endp =
-        (char*)internal_memchr(p, '\0', len - (p - environ));
+    const char *endp = (char *)internal_memchr(p, '\0', len - (p - environ));
     if (!endp)  // this entry isn't NUL terminated
       return nullptr;
     else if (!internal_memcmp(p, name, namelen) && p[namelen] == '=')  // Match.
@@ -635,18 +664,18 @@ const char *GetEnv(const char *name) {
     p = endp + 1;
   }
   return nullptr;  // Not found.
-#else
-#error "Unsupported platform"
-#endif
+#  else
+#    error "Unsupported platform"
+#  endif
 }
 
-#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD && !SANITIZER_GO
+#  if !SANITIZER_FREEBSD && !SANITIZER_NETBSD && !SANITIZER_GO
 extern "C" {
 SANITIZER_WEAK_ATTRIBUTE extern void *__libc_stack_end;
 }
-#endif
+#  endif
 
-#if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
+#  if !SANITIZER_FREEBSD && !SANITIZER_NETBSD
 static void ReadNullSepFileToArray(const char *path, char ***arr,
                                    int arr_size) {
   char *buff;
@@ -659,20 +688,21 @@ static void ReadNullSepFileToArray(const char *path, char ***arr,
   }
   (*arr)[0] = buff;
   int count, i;
-  for (count = 1, i = 1; ; i++) {
+  for (count = 1, i = 1;; i++) {
     if (buff[i] == 0) {
-      if (buff[i+1] == 0) break;
-      (*arr)[count] = &buff[i+1];
+      if (buff[i + 1] == 0)
+        break;
+      (*arr)[count] = &buff[i + 1];
       CHECK_LE(count, arr_size - 1);  // FIXME: make this more flexible.
       count++;
     }
   }
   (*arr)[count] = nullptr;
 }
-#endif
+#  endif
 
 static void GetArgsAndEnv(char ***argv, char ***envp) {
-#if SANITIZER_FREEBSD
+#  if SANITIZER_FREEBSD
   // On FreeBSD, retrieving the argument and environment arrays is done via the
   // kern.ps_strings sysctl, which returns a pointer to a structure containing
   // this information. See also <sys/exec.h>.
@@ -684,30 +714,30 @@ static void GetArgsAndEnv(char ***argv, char ***envp) {
   }
   *argv = pss->ps_argvstr;
   *envp = pss->ps_envstr;
-#elif SANITIZER_NETBSD
+#  elif SANITIZER_NETBSD
   *argv = __ps_strings->ps_argvstr;
   *envp = __ps_strings->ps_envstr;
-#else // SANITIZER_FREEBSD
-#if !SANITIZER_GO
+#  else  // SANITIZER_FREEBSD
+#    if !SANITIZER_GO
   if (&__libc_stack_end) {
-    uptr* stack_end = (uptr*)__libc_stack_end;
+    uptr *stack_end = (uptr *)__libc_stack_end;
     // Normally argc can be obtained from *stack_end, however, on ARM glibc's
     // _start clobbers it:
     // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/arm/start.S;hb=refs/heads/release/2.31/master#l75
     // Do not special-case ARM and infer argc from argv everywhere.
     int argc = 0;
     while (stack_end[argc + 1]) argc++;
-    *argv = (char**)(stack_end + 1);
-    *envp = (char**)(stack_end + argc + 2);
+    *argv = (char **)(stack_end + 1);
+    *envp = (char **)(stack_end + argc + 2);
   } else {
-#endif // !SANITIZER_GO
+#    endif  // !SANITIZER_GO
     static const int kMaxArgv = 2000, kMaxEnvp = 2000;
     ReadNullSepFileToArray("/proc/self/cmdline", argv, kMaxArgv);
     ReadNullSepFileToArray("/proc/self/environ", envp, kMaxEnvp);
-#if !SANITIZER_GO
+#    if !SANITIZER_GO
   }
-#endif // !SANITIZER_GO
-#endif // SANITIZER_FREEBSD
+#    endif  // !SANITIZER_GO
+#  endif    // SANITIZER_FREEBSD
 }
 
 char **GetArgv() {
@@ -722,12 +752,12 @@ char **GetEnviron() {
   return envp;
 }
 
-#if !SANITIZER_SOLARIS
+#  if !SANITIZER_SOLARIS
 void FutexWait(atomic_uint32_t *p, u32 cmp) {
 #    if SANITIZER_FREEBSD
   _umtx_op(p, UMTX_OP_WAIT_UINT, cmp, 0, 0);
 #    elif SANITIZER_NETBSD
-  sched_yield();   /* No userspace futex-like synchronization */
+  sched_yield(); /* No userspace futex-like synchronization */
 #    else
   internal_syscall(SYSCALL(futex), (uptr)p, FUTEX_WAIT_PRIVATE, cmp, 0, 0, 0);
 #    endif
@@ -737,7 +767,7 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
 #    if SANITIZER_FREEBSD
   _umtx_op(p, UMTX_OP_WAKE, count, 0, 0);
 #    elif SANITIZER_NETBSD
-                   /* No userspace futex-like synchronization */
+  /* No userspace futex-like synchronization */
 #    else
   internal_syscall(SYSCALL(futex), (uptr)p, FUTEX_WAKE_PRIVATE, count, 0, 0, 0);
 #    endif
@@ -749,26 +779,26 @@ void FutexWake(atomic_uint32_t *p, u32 count) {
 // The actual size of this structure is specified by d_reclen.
 // Note that getdents64 uses a different structure format. We only provide the
 // 32-bit syscall here.
-#if SANITIZER_NETBSD
+#  if SANITIZER_NETBSD
 // Not used
-#else
+#  else
 struct linux_dirent {
 #    if SANITIZER_X32 || SANITIZER_LINUX
   u64 d_ino;
   u64 d_off;
 #    else
-  unsigned long      d_ino;
-  unsigned long      d_off;
+  unsigned long d_ino;
+  unsigned long d_off;
 #    endif
-  unsigned short     d_reclen;
+  unsigned short d_reclen;
 #    if SANITIZER_LINUX
-  unsigned char      d_type;
+  unsigned char d_type;
 #    endif
-  char               d_name[256];
+  char d_name[256];
 };
-#endif
+#  endif
 
-#if !SANITIZER_SOLARIS && !SANITIZER_NETBSD
+#  if !SANITIZER_SOLARIS && !SANITIZER_NETBSD
 // Syscall wrappers.
 uptr internal_ptrace(int request, int pid, void *addr, void *data) {
   return internal_syscall(SYSCALL(ptrace), request, pid, (uptr)addr,
@@ -780,24 +810,20 @@ uptr internal_waitpid(int pid, int *status, int options) {
                           0 /* rusage */);
 }
 
-uptr internal_getpid() {
-  return internal_syscall(SYSCALL(getpid));
-}
+uptr internal_getpid() { return internal_syscall(SYSCALL(getpid)); }
 
-uptr internal_getppid() {
-  return internal_syscall(SYSCALL(getppid));
-}
+uptr internal_getppid() { return internal_syscall(SYSCALL(getppid)); }
 
 int internal_dlinfo(void *handle, int request, void *p) {
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   return dlinfo(handle, request, p);
-#else
+#    else
   UNIMPLEMENTED();
-#endif
+#    endif
 }
 
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(getdirentries), fd, (uptr)dirp, count, NULL);
 #    elif SANITIZER_LINUX
   return internal_syscall(SYSCALL(getdents64), fd, (uptr)dirp, count);
@@ -810,7 +836,7 @@ uptr internal_lseek(fd_t fd, OFF_T offset, int whence) {
   return internal_syscall(SYSCALL(lseek), fd, offset, whence);
 }
 
-#if SANITIZER_LINUX
+#    if SANITIZER_LINUX
 uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5) {
   return internal_syscall(SYSCALL(prctl), option, arg2, arg3, arg4, arg5);
 }
@@ -827,10 +853,16 @@ uptr internal_sigaltstack(const void *ss, void *oss) {
   return internal_syscall(SYSCALL(sigaltstack), (uptr)ss, (uptr)oss);
 }
 
+extern "C" pid_t __fork(void);
+
 int internal_fork() {
 #    if SANITIZER_LINUX
 #      if SANITIZER_S390
   return internal_syscall(SYSCALL(clone), 0, SIGCHLD);
+#      elif SANITIZER_SPARC
+  // The clone syscall interface on SPARC differs massively from the rest,
+  // so fall back to __fork.
+  return __fork();
 #      else
   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
 #      endif
@@ -839,7 +871,7 @@ int internal_fork() {
 #    endif
 }
 
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
 int internal_sysctl(const int *name, unsigned int namelen, void *oldp,
                     uptr *oldlenp, const void *newp, uptr newlen) {
   return internal_syscall(SYSCALL(__sysctl), name, namelen, oldp,
@@ -854,11 +886,11 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
   // followed by sysctl(). To avoid calling the intercepted version and
   // asserting if this happens during startup, call the real sysctlnametomib()
   // followed by internal_sysctl() if the syscall is not available.
-#ifdef SYS___sysctlbyname
+#      ifdef SYS___sysctlbyname
   return internal_syscall(SYSCALL(__sysctlbyname), sname,
                           internal_strlen(sname), oldp, (size_t *)oldlenp, newp,
                           (size_t)newlen);
-#else
+#      else
   static decltype(sysctlnametomib) *real_sysctlnametomib = nullptr;
   if (!real_sysctlnametomib)
     real_sysctlnametomib =
@@ -870,12 +902,12 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
   if (real_sysctlnametomib(sname, oid, &len) == -1)
     return (-1);
   return internal_sysctl(oid, len, oldp, oldlenp, newp, newlen);
-#endif
+#      endif
 }
-#endif
+#    endif
 
-#if SANITIZER_LINUX
-#define SA_RESTORER 0x04000000
+#    if SANITIZER_LINUX
+#      define SA_RESTORER 0x04000000
 // Doesn't set sa_restorer if the caller did not set it, so use with caution
 //(see below).
 int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
@@ -899,15 +931,15 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     // rt_sigaction, so we need to do the same (we'll need to reimplement the
     // restorers; for x86_64 the restorer address can be obtained from
     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#      if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     k_act.sa_restorer = u_act->sa_restorer;
-#endif
+#      endif
   }
 
   uptr result = internal_syscall(SYSCALL(rt_sigaction), (uptr)signum,
-      (uptr)(u_act ? &k_act : nullptr),
-      (uptr)(u_oldact ? &k_oldact : nullptr),
-      (uptr)sizeof(__sanitizer_kernel_sigset_t));
+                                 (uptr)(u_act ? &k_act : nullptr),
+                                 (uptr)(u_oldact ? &k_oldact : nullptr),
+                                 (uptr)sizeof(__sanitizer_kernel_sigset_t));
 
   if ((result == 0) && u_oldact) {
     u_oldact->handler = k_oldact.handler;
@@ -915,24 +947,24 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     u_oldact->sa_flags = k_oldact.sa_flags;
-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#      if !SANITIZER_ANDROID || !SANITIZER_MIPS32
     u_oldact->sa_restorer = k_oldact.sa_restorer;
-#endif
+#      endif
   }
   return result;
 }
-#endif  // SANITIZER_LINUX
+#    endif  // SANITIZER_LINUX
 
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
                           __sanitizer_sigset_t *oldset) {
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   return internal_syscall(SYSCALL(sigprocmask), how, set, oldset);
-#else
+#    else
   __sanitizer_kernel_sigset_t *k_set = (__sanitizer_kernel_sigset_t *)set;
   __sanitizer_kernel_sigset_t *k_oldset = (__sanitizer_kernel_sigset_t *)oldset;
   return internal_syscall(SYSCALL(rt_sigprocmask), (uptr)how, (uptr)k_set,
                           (uptr)k_oldset, sizeof(__sanitizer_kernel_sigset_t));
-#endif
+#    endif
 }
 
 void internal_sigfillset(__sanitizer_sigset_t *set) {
@@ -943,7 +975,7 @@ void internal_sigemptyset(__sanitizer_sigset_t *set) {
   internal_memset(set, 0, sizeof(*set));
 }
 
-#if SANITIZER_LINUX
+#    if SANITIZER_LINUX
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum) {
   signum -= 1;
   CHECK_GE(signum, 0);
@@ -963,7 +995,7 @@ bool internal_sigismember(__sanitizer_sigset_t *set, int signum) {
   const uptr bit = signum % (sizeof(k_set->sig[0]) * 8);
   return k_set->sig[idx] & ((uptr)1 << bit);
 }
-#elif SANITIZER_FREEBSD
+#    elif SANITIZER_FREEBSD
 uptr internal_procctl(int type, int id, int cmd, void *data) {
   return internal_syscall(SYSCALL(procctl), type, id, cmd, data);
 }
@@ -977,10 +1009,10 @@ bool internal_sigismember(__sanitizer_sigset_t *set, int signum) {
   sigset_t *rset = reinterpret_cast<sigset_t *>(set);
   return sigismember(rset, signum);
 }
-#endif
-#endif // !SANITIZER_SOLARIS
+#    endif
+#  endif  // !SANITIZER_SOLARIS
 
-#if !SANITIZER_NETBSD
+#  if !SANITIZER_NETBSD
 // ThreadLister implementation.
 ThreadLister::ThreadLister(pid_t pid) : pid_(pid), buffer_(4096) {
   char task_directory_path[80];
@@ -1067,25 +1099,26 @@ ThreadLister::~ThreadLister() {
   if (!internal_iserror(descriptor_))
     internal_close(descriptor_);
 }
-#endif
+#  endif
 
-#if SANITIZER_WORDSIZE == 32
+#  if SANITIZER_WORDSIZE == 32
 // Take care of unusable kernel area in top gigabyte.
 static uptr GetKernelAreaSize() {
-#if SANITIZER_LINUX && !SANITIZER_X32
+#    if SANITIZER_LINUX && !SANITIZER_X32
   const uptr gbyte = 1UL << 30;
 
   // Firstly check if there are writable segments
   // mapped to top gigabyte (e.g. stack).
-  MemoryMappingLayout proc_maps(/*cache_enabled*/true);
+  MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
   if (proc_maps.Error())
     return 0;
   MemoryMappedSegment segment;
   while (proc_maps.Next(&segment)) {
-    if ((segment.end >= 3 * gbyte) && segment.IsWritable()) return 0;
+    if ((segment.end >= 3 * gbyte) && segment.IsWritable())
+      return 0;
   }
 
-#if !SANITIZER_ANDROID
+#      if !SANITIZER_ANDROID
   // Even if nothing is mapped, top Gb may still be accessible
   // if we are running on 64-bit kernel.
   // Uname may report misleading results if personality type
@@ -1095,21 +1128,22 @@ static uptr GetKernelAreaSize() {
   if (!(pers & PER_MASK) && internal_uname(&uname_info) == 0 &&
       internal_strstr(uname_info.machine, "64"))
     return 0;
-#endif  // SANITIZER_ANDROID
+#      endif  // SANITIZER_ANDROID
 
   // Top gigabyte is reserved for kernel.
   return gbyte;
-#else
+#    else
   return 0;
-#endif  // SANITIZER_LINUX && !SANITIZER_X32
+#    endif  // SANITIZER_LINUX && !SANITIZER_X32
 }
-#endif  // SANITIZER_WORDSIZE == 32
+#  endif  // SANITIZER_WORDSIZE == 32
 
 uptr GetMaxVirtualAddress() {
-#if SANITIZER_NETBSD && defined(__x86_64__)
+#  if SANITIZER_NETBSD && defined(__x86_64__)
   return 0x7f7ffffff000ULL;  // (0x00007f8000000000 - PAGE_SIZE)
-#elif SANITIZER_WORDSIZE == 64
-# if defined(__powerpc64__) || defined(__aarch64__) || defined(__loongarch__)
+#  elif SANITIZER_WORDSIZE == 64
+#    if defined(__powerpc64__) || defined(__aarch64__) || \
+        defined(__loongarch__) || SANITIZER_RISCV64
   // On PowerPC64 we have two different address space layouts: 44- and 46-bit.
   // We somehow need to figure out which one we are using now and choose
   // one of 0x00000fffffffffffUL and 0x00003fffffffffffUL.
@@ -1118,97 +1152,97 @@ uptr GetMaxVirtualAddress() {
   // This should (does) work for both PowerPC64 Endian modes.
   // Similarly, aarch64 has multiple address space layouts: 39, 42 and 47-bit.
   // loongarch64 also has multiple address space layouts: default is 47-bit.
+  // RISC-V 64 also has multiple address space layouts: 39, 48 and 57-bit.
   return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
-#elif SANITIZER_RISCV64
-  return (1ULL << 38) - 1;
-# elif SANITIZER_MIPS64
+#    elif SANITIZER_MIPS64
   return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
-# elif defined(__s390x__)
+#    elif defined(__s390x__)
   return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
-#elif defined(__sparc__)
+#    elif defined(__sparc__)
   return ~(uptr)0;
-# else
+#    else
   return (1ULL << 47) - 1;  // 0x00007fffffffffffUL;
-# endif
-#else  // SANITIZER_WORDSIZE == 32
-# if defined(__s390__)
+#    endif
+#  else  // SANITIZER_WORDSIZE == 32
+#    if defined(__s390__)
   return (1ULL << 31) - 1;  // 0x7fffffff;
-# else
+#    else
   return (1ULL << 32) - 1;  // 0xffffffff;
-# endif
-#endif  // SANITIZER_WORDSIZE
+#    endif
+#  endif  // SANITIZER_WORDSIZE
 }
 
 uptr GetMaxUserVirtualAddress() {
   uptr addr = GetMaxVirtualAddress();
-#if SANITIZER_WORDSIZE == 32 && !defined(__s390__)
+#  if SANITIZER_WORDSIZE == 32 && !defined(__s390__)
   if (!common_flags()->full_address_space)
     addr -= GetKernelAreaSize();
   CHECK_LT(reinterpret_cast<uptr>(&addr), addr);
-#endif
+#  endif
   return addr;
 }
 
-#if !SANITIZER_ANDROID
+#  if !SANITIZER_ANDROID || defined(__aarch64__)
 uptr GetPageSize() {
-#if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \
-    defined(EXEC_PAGESIZE)
+#    if SANITIZER_LINUX && (defined(__x86_64__) || defined(__i386__)) && \
+        defined(EXEC_PAGESIZE)
   return EXEC_PAGESIZE;
-#elif SANITIZER_FREEBSD || SANITIZER_NETBSD
-// Use sysctl as sysconf can trigger interceptors internally.
+#    elif SANITIZER_FREEBSD || SANITIZER_NETBSD
+  // Use sysctl as sysconf can trigger interceptors internally.
   int pz = 0;
   uptr pzl = sizeof(pz);
   int mib[2] = {CTL_HW, HW_PAGESIZE};
   int rv = internal_sysctl(mib, 2, &pz, &pzl, nullptr, 0);
   CHECK_EQ(rv, 0);
   return (uptr)pz;
-#elif SANITIZER_USE_GETAUXVAL
+#    elif SANITIZER_USE_GETAUXVAL
   return getauxval(AT_PAGESZ);
-#else
+#    else
   return sysconf(_SC_PAGESIZE);  // EXEC_PAGESIZE may not be trustworthy.
-#endif
+#    endif
 }
-#endif // !SANITIZER_ANDROID
+#  endif
 
-uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
-#if SANITIZER_SOLARIS
+uptr ReadBinaryName(/*out*/ char *buf, uptr buf_len) {
+#  if SANITIZER_SOLARIS
   const char *default_module_name = getexecname();
   CHECK_NE(default_module_name, NULL);
   return internal_snprintf(buf, buf_len, "%s", default_module_name);
-#else
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD
-#if SANITIZER_FREEBSD
+#  else
+#    if SANITIZER_FREEBSD || SANITIZER_NETBSD
+#      if SANITIZER_FREEBSD
   const int Mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
-#else
+#      else
   const int Mib[4] = {CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME};
-#endif
+#      endif
   const char *default_module_name = "kern.proc.pathname";
   uptr Size = buf_len;
   bool IsErr =
       (internal_sysctl(Mib, ARRAY_SIZE(Mib), buf, &Size, NULL, 0) != 0);
   int readlink_error = IsErr ? errno : 0;
   uptr module_name_len = Size;
-#else
+#    else
   const char *default_module_name = "/proc/self/exe";
-  uptr module_name_len = internal_readlink(
-      default_module_name, buf, buf_len);
+  uptr module_name_len = internal_readlink(default_module_name, buf, buf_len);
   int readlink_error;
   bool IsErr = internal_iserror(module_name_len, &readlink_error);
-#endif  // SANITIZER_SOLARIS
+#    endif
   if (IsErr) {
     // We can't read binary name for some reason, assume it's unknown.
-    Report("WARNING: reading executable name failed with errno %d, "
-           "some stack frames may not be symbolized\n", readlink_error);
-    module_name_len = internal_snprintf(buf, buf_len, "%s",
-                                        default_module_name);
+    Report(
+        "WARNING: reading executable name failed with errno %d, "
+        "some stack frames may not be symbolized\n",
+        readlink_error);
+    module_name_len =
+        internal_snprintf(buf, buf_len, "%s", default_module_name);
     CHECK_LT(module_name_len, buf_len);
   }
   return module_name_len;
-#endif
+#  endif
 }
 
 uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) {
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
   char *tmpbuf;
   uptr tmpsize;
   uptr tmplen;
@@ -1218,7 +1252,7 @@ uptr ReadLongProcessName(/*out*/ char *buf, uptr buf_len) {
     UnmapOrDie(tmpbuf, tmpsize);
     return internal_strlen(buf);
   }
-#endif
+#  endif
   return ReadBinaryName(buf, buf_len);
 }
 
@@ -1228,20 +1262,22 @@ bool LibraryNameIs(const char *full_name, const char *base_name) {
   // Strip path.
   while (*name != '\0') name++;
   while (name > full_name && *name != '/') name--;
-  if (*name == '/') name++;
+  if (*name == '/')
+    name++;
   uptr base_name_length = internal_strlen(base_name);
-  if (internal_strncmp(name, base_name, base_name_length)) return false;
+  if (internal_strncmp(name, base_name, base_name_length))
+    return false;
   return (name[base_name_length] == '-' || name[base_name_length] == '.');
 }
 
-#if !SANITIZER_ANDROID
+#  if !SANITIZER_ANDROID
 // Call cb for each region mapped by map.
 void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) {
   CHECK_NE(map, nullptr);
-#if !SANITIZER_FREEBSD
+#    if !SANITIZER_FREEBSD
   typedef ElfW(Phdr) Elf_Phdr;
   typedef ElfW(Ehdr) Elf_Ehdr;
-#endif // !SANITIZER_FREEBSD
+#    endif  // !SANITIZER_FREEBSD
   char *base = (char *)map->l_addr;
   Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
   char *phdrs = base + ehdr->e_phoff;
@@ -1273,10 +1309,10 @@ void ForEachMappedRegion(link_map *map, void (*cb)(const void *, uptr)) {
     }
   }
 }
-#endif
+#  endif
 
-#if SANITIZER_LINUX
-#if defined(__x86_64__)
+#  if SANITIZER_LINUX
+#    if defined(__x86_64__)
 // We cannot use glibc's clone wrapper, because it messes with the child
 // task's TLS. It writes the PID and TID of the child task to its thread
 // descriptor, but in our case the child task shares the thread descriptor with
@@ -1295,50 +1331,46 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   register void *r8 __asm__("r8") = newtls;
   register int *r10 __asm__("r10") = child_tidptr;
   __asm__ __volatile__(
-                       /* %rax = syscall(%rax = SYSCALL(clone),
-                        *                %rdi = flags,
-                        *                %rsi = child_stack,
-                        *                %rdx = parent_tidptr,
-                        *                %r8  = new_tls,
-                        *                %r10 = child_tidptr)
-                        */
-                       "syscall\n"
-
-                       /* if (%rax != 0)
-                        *   return;
-                        */
-                       "testq  %%rax,%%rax\n"
-                       "jnz    1f\n"
-
-                       /* In the child. Terminate unwind chain. */
-                       // XXX: We should also terminate the CFI unwind chain
-                       // here. Unfortunately clang 3.2 doesn't support the
-                       // necessary CFI directives, so we skip that part.
-                       "xorq   %%rbp,%%rbp\n"
-
-                       /* Call "fn(arg)". */
-                       "popq   %%rax\n"
-                       "popq   %%rdi\n"
-                       "call   *%%rax\n"
-
-                       /* Call _exit(%rax). */
-                       "movq   %%rax,%%rdi\n"
-                       "movq   %2,%%rax\n"
-                       "syscall\n"
-
-                       /* Return to parent. */
-                     "1:\n"
-                       : "=a" (res)
-                       : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)),
-                         "S"(child_stack),
-                         "D"(flags),
-                         "d"(parent_tidptr),
-                         "r"(r8),
-                         "r"(r10)
-                       : "memory", "r11", "rcx");
+      /* %rax = syscall(%rax = SYSCALL(clone),
+       *                %rdi = flags,
+       *                %rsi = child_stack,
+       *                %rdx = parent_tidptr,
+       *                %r8  = new_tls,
+       *                %r10 = child_tidptr)
+       */
+      "syscall\n"
+
+      /* if (%rax != 0)
+       *   return;
+       */
+      "testq  %%rax,%%rax\n"
+      "jnz    1f\n"
+
+      /* In the child. Terminate unwind chain. */
+      // XXX: We should also terminate the CFI unwind chain
+      // here. Unfortunately clang 3.2 doesn't support the
+      // necessary CFI directives, so we skip that part.
+      "xorq   %%rbp,%%rbp\n"
+
+      /* Call "fn(arg)". */
+      "popq   %%rax\n"
+      "popq   %%rdi\n"
+      "call   *%%rax\n"
+
+      /* Call _exit(%rax). */
+      "movq   %%rax,%%rdi\n"
+      "movq   %2,%%rax\n"
+      "syscall\n"
+
+      /* Return to parent. */
+      "1:\n"
+      : "=a"(res)
+      : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "S"(child_stack), "D"(flags),
+        "d"(parent_tidptr), "r"(r8), "r"(r10)
+      : "memory", "r11", "rcx");
   return res;
 }
-#elif defined(__mips__)
+#    elif defined(__mips__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
@@ -1353,68 +1385,63 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   // We don't have proper CFI directives here because it requires alot of code
   // for very marginal benefits.
   __asm__ __volatile__(
-                       /* $v0 = syscall($v0 = __NR_clone,
-                        * $a0 = flags,
-                        * $a1 = child_stack,
-                        * $a2 = parent_tidptr,
-                        * $a3 = new_tls,
-                        * $a4 = child_tidptr)
-                        */
-                       ".cprestore 16;\n"
-                       "move $4,%1;\n"
-                       "move $5,%2;\n"
-                       "move $6,%3;\n"
-                       "move $7,%4;\n"
-                       /* Store the fifth argument on stack
-                        * if we are using 32-bit abi.
-                        */
-#if SANITIZER_WORDSIZE == 32
-                       "lw %5,16($29);\n"
-#else
-                       "move $8,%5;\n"
-#endif
-                       "li $2,%6;\n"
-                       "syscall;\n"
-
-                       /* if ($v0 != 0)
-                        * return;
-                        */
-                       "bnez $2,1f;\n"
-
-                       /* Call "fn(arg)". */
-#if SANITIZER_WORDSIZE == 32
-#ifdef __BIG_ENDIAN__
-                       "lw $25,4($29);\n"
-                       "lw $4,12($29);\n"
-#else
-                       "lw $25,0($29);\n"
-                       "lw $4,8($29);\n"
-#endif
-#else
-                       "ld $25,0($29);\n"
-                       "ld $4,8($29);\n"
-#endif
-                       "jal $25;\n"
-
-                       /* Call _exit($v0). */
-                       "move $4,$2;\n"
-                       "li $2,%7;\n"
-                       "syscall;\n"
-
-                       /* Return to parent. */
-                     "1:\n"
-                       : "=r" (res)
-                       : "r"(flags),
-                         "r"(child_stack),
-                         "r"(parent_tidptr),
-                         "r"(a3),
-                         "r"(a4),
-                         "i"(__NR_clone),
-                         "i"(__NR_exit)
-                       : "memory", "$29" );
+      /* $v0 = syscall($v0 = __NR_clone,
+       * $a0 = flags,
+       * $a1 = child_stack,
+       * $a2 = parent_tidptr,
+       * $a3 = new_tls,
+       * $a4 = child_tidptr)
+       */
+      ".cprestore 16;\n"
+      "move $4,%1;\n"
+      "move $5,%2;\n"
+      "move $6,%3;\n"
+      "move $7,%4;\n"
+  /* Store the fifth argument on stack
+   * if we are using 32-bit abi.
+   */
+#      if SANITIZER_WORDSIZE == 32
+      "lw %5,16($29);\n"
+#      else
+      "move $8,%5;\n"
+#      endif
+      "li $2,%6;\n"
+      "syscall;\n"
+
+      /* if ($v0 != 0)
+       * return;
+       */
+      "bnez $2,1f;\n"
+
+  /* Call "fn(arg)". */
+#      if SANITIZER_WORDSIZE == 32
+#        ifdef __BIG_ENDIAN__
+      "lw $25,4($29);\n"
+      "lw $4,12($29);\n"
+#        else
+      "lw $25,0($29);\n"
+      "lw $4,8($29);\n"
+#        endif
+#      else
+      "ld $25,0($29);\n"
+      "ld $4,8($29);\n"
+#      endif
+      "jal $25;\n"
+
+      /* Call _exit($v0). */
+      "move $4,$2;\n"
+      "li $2,%7;\n"
+      "syscall;\n"
+
+      /* Return to parent. */
+      "1:\n"
+      : "=r"(res)
+      : "r"(flags), "r"(child_stack), "r"(parent_tidptr), "r"(a3), "r"(a4),
+        "i"(__NR_clone), "i"(__NR_exit)
+      : "memory", "$29");
   return res;
 }
-#elif SANITIZER_RISCV64
+#    elif SANITIZER_RISCV64
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   if (!fn || !child_stack)
@@ -1455,7 +1482,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
       : "memory");
   return res;
 }
-#elif defined(__aarch64__)
+#    elif defined(__aarch64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   register long long res __asm__("x0");
@@ -1466,47 +1493,45 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   ((unsigned long long *)child_stack)[0] = (uptr)fn;
   ((unsigned long long *)child_stack)[1] = (uptr)arg;
 
-  register int (*__fn)(void *)  __asm__("x0") = fn;
+  register int (*__fn)(void *) __asm__("x0") = fn;
   register void *__stack __asm__("x1") = child_stack;
-  register int   __flags __asm__("x2") = flags;
-  register void *__arg   __asm__("x3") = arg;
-  register int  *__ptid  __asm__("x4") = parent_tidptr;
-  register void *__tls   __asm__("x5") = newtls;
-  register int  *__ctid  __asm__("x6") = child_tidptr;
+  register int __flags __asm__("x2") = flags;
+  register void *__arg __asm__("x3") = arg;
+  register int *__ptid __asm__("x4") = parent_tidptr;
+  register void *__tls __asm__("x5") = newtls;
+  register int *__ctid __asm__("x6") = child_tidptr;
 
   __asm__ __volatile__(
-                       "mov x0,x2\n" /* flags  */
-                       "mov x2,x4\n" /* ptid  */
-                       "mov x3,x5\n" /* tls  */
-                       "mov x4,x6\n" /* ctid  */
-                       "mov x8,%9\n" /* clone  */
-
-                       "svc 0x0\n"
-
-                       /* if (%r0 != 0)
-                        *   return %r0;
-                        */
-                       "cmp x0, #0\n"
-                       "bne 1f\n"
-
-                       /* In the child, now. Call "fn(arg)". */
-                       "ldp x1, x0, [sp], #16\n"
-                       "blr x1\n"
-
-                       /* Call _exit(%r0).  */
-                       "mov x8, %10\n"
-                       "svc 0x0\n"
-                     "1:\n"
-
-                       : "=r" (res)
-                       : "i"(-EINVAL),
-                         "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
-                         "r"(__ptid), "r"(__tls), "r"(__ctid),
-                         "i"(__NR_clone), "i"(__NR_exit)
-                       : "x30", "memory");
+      "mov x0,x2\n" /* flags  */
+      "mov x2,x4\n" /* ptid  */
+      "mov x3,x5\n" /* tls  */
+      "mov x4,x6\n" /* ctid  */
+      "mov x8,%9\n" /* clone  */
+
+      "svc 0x0\n"
+
+      /* if (%r0 != 0)
+       *   return %r0;
+       */
+      "cmp x0, #0\n"
+      "bne 1f\n"
+
+      /* In the child, now. Call "fn(arg)". */
+      "ldp x1, x0, [sp], #16\n"
+      "blr x1\n"
+
+      /* Call _exit(%r0).  */
+      "mov x8, %10\n"
+      "svc 0x0\n"
+      "1:\n"
+
+      : "=r"(res)
+      : "i"(-EINVAL), "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg),
+        "r"(__ptid), "r"(__tls), "r"(__ctid), "i"(__NR_clone), "i"(__NR_exit)
+      : "x30", "memory");
   return res;
 }
-#elif SANITIZER_LOONGARCH64
+#    elif SANITIZER_LOONGARCH64
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   if (!fn || !child_stack)
@@ -1544,119 +1569,110 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
       : "=r"(res)
       : "0"(__flags), "r"(__stack), "r"(__ptid), "r"(__ctid), "r"(__tls),
         "r"(__fn), "r"(__arg), "r"(nr_clone), "i"(__NR_exit)
-      : "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8");
+      : "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
+        "$t8");
   return res;
 }
-#elif defined(__powerpc64__)
+#    elif defined(__powerpc64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
-                   int *parent_tidptr, void *newtls, int *child_tidptr) {
+                    int *parent_tidptr, void *newtls, int *child_tidptr) {
   long long res;
 // Stack frame structure.
-#if SANITIZER_PPC64V1
-//   Back chain == 0        (SP + 112)
-// Frame (112 bytes):
-//   Parameter save area    (SP + 48), 8 doublewords
-//   TOC save area          (SP + 40)
-//   Link editor doubleword (SP + 32)
-//   Compiler doubleword    (SP + 24)
-//   LR save area           (SP + 16)
-//   CR save area           (SP + 8)
-//   Back chain             (SP + 0)
-# define FRAME_SIZE 112
-# define FRAME_TOC_SAVE_OFFSET 40
-#elif SANITIZER_PPC64V2
-//   Back chain == 0        (SP + 32)
-// Frame (32 bytes):
-//   TOC save area          (SP + 24)
-//   LR save area           (SP + 16)
-//   CR save area           (SP + 8)
-//   Back chain             (SP + 0)
-# define FRAME_SIZE 32
-# define FRAME_TOC_SAVE_OFFSET 24
-#else
-# error "Unsupported PPC64 ABI"
-#endif
+#      if SANITIZER_PPC64V1
+  //   Back chain == 0        (SP + 112)
+  // Frame (112 bytes):
+  //   Parameter save area    (SP + 48), 8 doublewords
+  //   TOC save area          (SP + 40)
+  //   Link editor doubleword (SP + 32)
+  //   Compiler doubleword    (SP + 24)
+  //   LR save area           (SP + 16)
+  //   CR save area           (SP + 8)
+  //   Back chain             (SP + 0)
+#        define FRAME_SIZE 112
+#        define FRAME_TOC_SAVE_OFFSET 40
+#      elif SANITIZER_PPC64V2
+  //   Back chain == 0        (SP + 32)
+  // Frame (32 bytes):
+  //   TOC save area          (SP + 24)
+  //   LR save area           (SP + 16)
+  //   CR save area           (SP + 8)
+  //   Back chain             (SP + 0)
+#        define FRAME_SIZE 32
+#        define FRAME_TOC_SAVE_OFFSET 24
+#      else
+#        error "Unsupported PPC64 ABI"
+#      endif
   if (!fn || !child_stack)
     return -EINVAL;
   CHECK_EQ(0, (uptr)child_stack % 16);
 
   register int (*__fn)(void *) __asm__("r3") = fn;
-  register void *__cstack      __asm__("r4") = child_stack;
-  register int __flags         __asm__("r5") = flags;
-  register void *__arg         __asm__("r6") = arg;
-  register int *__ptidptr      __asm__("r7") = parent_tidptr;
-  register void *__newtls      __asm__("r8") = newtls;
-  register int *__ctidptr      __asm__("r9") = child_tidptr;
-
- __asm__ __volatile__(
-           /* fn and arg are saved across the syscall */
-           "mr 28, %5\n\t"
-           "mr 27, %8\n\t"
-
-           /* syscall
-             r0 == __NR_clone
-             r3 == flags
-             r4 == child_stack
-             r5 == parent_tidptr
-             r6 == newtls
-             r7 == child_tidptr */
-           "mr 3, %7\n\t"
-           "mr 5, %9\n\t"
-           "mr 6, %10\n\t"
-           "mr 7, %11\n\t"
-           "li 0, %3\n\t"
-           "sc\n\t"
-
-           /* Test if syscall was successful */
-           "cmpdi  cr1, 3, 0\n\t"
-           "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
-           "bne-   cr1, 1f\n\t"
-
-           /* Set up stack frame */
-           "li    29, 0\n\t"
-           "stdu  29, -8(1)\n\t"
-           "stdu  1, -%12(1)\n\t"
-           /* Do the function call */
-           "std   2, %13(1)\n\t"
-#if SANITIZER_PPC64V1
-           "ld    0, 0(28)\n\t"
-           "ld    2, 8(28)\n\t"
-           "mtctr 0\n\t"
-#elif SANITIZER_PPC64V2
-           "mr    12, 28\n\t"
-           "mtctr 12\n\t"
-#else
-# error "Unsupported PPC64 ABI"
-#endif
-           "mr    3, 27\n\t"
-           "bctrl\n\t"
-           "ld    2, %13(1)\n\t"
-
-           /* Call _exit(r3) */
-           "li 0, %4\n\t"
-           "sc\n\t"
-
-           /* Return to parent */
-           "1:\n\t"
-           "mr %0, 3\n\t"
-             : "=r" (res)
-             : "0" (-1),
-               "i" (EINVAL),
-               "i" (__NR_clone),
-               "i" (__NR_exit),
-               "r" (__fn),
-               "r" (__cstack),
-               "r" (__flags),
-               "r" (__arg),
-               "r" (__ptidptr),
-               "r" (__newtls),
-               "r" (__ctidptr),
-               "i" (FRAME_SIZE),
-               "i" (FRAME_TOC_SAVE_OFFSET)
-             : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29");
+  register void *__cstack __asm__("r4") = child_stack;
+  register int __flags __asm__("r5") = flags;
+  register void *__arg __asm__("r6") = arg;
+  register int *__ptidptr __asm__("r7") = parent_tidptr;
+  register void *__newtls __asm__("r8") = newtls;
+  register int *__ctidptr __asm__("r9") = child_tidptr;
+
+  __asm__ __volatile__(
+      /* fn and arg are saved across the syscall */
+      "mr 28, %5\n\t"
+      "mr 27, %8\n\t"
+
+      /* syscall
+        r0 == __NR_clone
+        r3 == flags
+        r4 == child_stack
+        r5 == parent_tidptr
+        r6 == newtls
+        r7 == child_tidptr */
+      "mr 3, %7\n\t"
+      "mr 5, %9\n\t"
+      "mr 6, %10\n\t"
+      "mr 7, %11\n\t"
+      "li 0, %3\n\t"
+      "sc\n\t"
+
+      /* Test if syscall was successful */
+      "cmpdi  cr1, 3, 0\n\t"
+      "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
+      "bne-   cr1, 1f\n\t"
+
+      /* Set up stack frame */
+      "li    29, 0\n\t"
+      "stdu  29, -8(1)\n\t"
+      "stdu  1, -%12(1)\n\t"
+      /* Do the function call */
+      "std   2, %13(1)\n\t"
+#      if SANITIZER_PPC64V1
+      "ld    0, 0(28)\n\t"
+      "ld    2, 8(28)\n\t"
+      "mtctr 0\n\t"
+#      elif SANITIZER_PPC64V2
+      "mr    12, 28\n\t"
+      "mtctr 12\n\t"
+#      else
+#        error "Unsupported PPC64 ABI"
+#      endif
+      "mr    3, 27\n\t"
+      "bctrl\n\t"
+      "ld    2, %13(1)\n\t"
+
+      /* Call _exit(r3) */
+      "li 0, %4\n\t"
+      "sc\n\t"
+
+      /* Return to parent */
+      "1:\n\t"
+      "mr %0, 3\n\t"
+      : "=r"(res)
+      : "0"(-1), "i"(EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(__fn),
+        "r"(__cstack), "r"(__flags), "r"(__arg), "r"(__ptidptr), "r"(__newtls),
+        "r"(__ctidptr), "i"(FRAME_SIZE), "i"(FRAME_TOC_SAVE_OFFSET)
+      : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29");
   return res;
 }
-#elif defined(__i386__)
+#    elif defined(__i386__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   int res;
@@ -1669,59 +1685,56 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   ((unsigned int *)child_stack)[2] = (uptr)fn;
   ((unsigned int *)child_stack)[3] = (uptr)arg;
   __asm__ __volatile__(
-                       /* %eax = syscall(%eax = SYSCALL(clone),
-                        *                %ebx = flags,
-                        *                %ecx = child_stack,
-                        *                %edx = parent_tidptr,
-                        *                %esi  = new_tls,
-                        *                %edi = child_tidptr)
-                        */
-
-                        /* Obtain flags */
-                        "movl    (%%ecx), %%ebx\n"
-                        /* Do the system call */
-                        "pushl   %%ebx\n"
-                        "pushl   %%esi\n"
-                        "pushl   %%edi\n"
-                        /* Remember the flag value.  */
-                        "movl    %%ebx, (%%ecx)\n"
-                        "int     $0x80\n"
-                        "popl    %%edi\n"
-                        "popl    %%esi\n"
-                        "popl    %%ebx\n"
-
-                        /* if (%eax != 0)
-                         *   return;
-                         */
-
-                        "test    %%eax,%%eax\n"
-                        "jnz    1f\n"
-
-                        /* terminate the stack frame */
-                        "xorl   %%ebp,%%ebp\n"
-                        /* Call FN. */
-                        "call    *%%ebx\n"
-#ifdef PIC
-                        "call    here\n"
-                        "here:\n"
-                        "popl    %%ebx\n"
-                        "addl    $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n"
-#endif
-                        /* Call exit */
-                        "movl    %%eax, %%ebx\n"
-                        "movl    %2, %%eax\n"
-                        "int     $0x80\n"
-                        "1:\n"
-                       : "=a" (res)
-                       : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)),
-                         "c"(child_stack),
-                         "d"(parent_tidptr),
-                         "S"(newtls),
-                         "D"(child_tidptr)
-                       : "memory");
+      /* %eax = syscall(%eax = SYSCALL(clone),
+       *                %ebx = flags,
+       *                %ecx = child_stack,
+       *                %edx = parent_tidptr,
+       *                %esi  = new_tls,
+       *                %edi = child_tidptr)
+       */
+
+      /* Obtain flags */
+      "movl    (%%ecx), %%ebx\n"
+      /* Do the system call */
+      "pushl   %%ebx\n"
+      "pushl   %%esi\n"
+      "pushl   %%edi\n"
+      /* Remember the flag value.  */
+      "movl    %%ebx, (%%ecx)\n"
+      "int     $0x80\n"
+      "popl    %%edi\n"
+      "popl    %%esi\n"
+      "popl    %%ebx\n"
+
+      /* if (%eax != 0)
+       *   return;
+       */
+
+      "test    %%eax,%%eax\n"
+      "jnz    1f\n"
+
+      /* terminate the stack frame */
+      "xorl   %%ebp,%%ebp\n"
+      /* Call FN. */
+      "call    *%%ebx\n"
+#      ifdef PIC
+      "call    here\n"
+      "here:\n"
+      "popl    %%ebx\n"
+      "addl    $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n"
+#      endif
+      /* Call exit */
+      "movl    %%eax, %%ebx\n"
+      "movl    %2, %%eax\n"
+      "int     $0x80\n"
+      "1:\n"
+      : "=a"(res)
+      : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "c"(child_stack),
+        "d"(parent_tidptr), "S"(newtls), "D"(child_tidptr)
+      : "memory");
   return res;
 }
-#elif defined(__arm__)
+#    elif defined(__arm__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
   unsigned int res;
@@ -1737,70 +1750,68 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   register int *r4 __asm__("r4") = child_tidptr;
   register int r7 __asm__("r7") = __NR_clone;
 
-#if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__)
-# define ARCH_HAS_BX
-#endif
-#if __ARM_ARCH > 4
-# define ARCH_HAS_BLX
-#endif
+#      if __ARM_ARCH > 4 || defined(__ARM_ARCH_4T__)
+#        define ARCH_HAS_BX
+#      endif
+#      if __ARM_ARCH > 4
+#        define ARCH_HAS_BLX
+#      endif
 
-#ifdef ARCH_HAS_BX
-# ifdef ARCH_HAS_BLX
-#  define BLX(R) "blx "  #R "\n"
-# else
-#  define BLX(R) "mov lr, pc; bx " #R "\n"
-# endif
-#else
-# define BLX(R)  "mov lr, pc; mov pc," #R "\n"
-#endif
+#      ifdef ARCH_HAS_BX
+#        ifdef ARCH_HAS_BLX
+#          define BLX(R) "blx " #R "\n"
+#        else
+#          define BLX(R) "mov lr, pc; bx " #R "\n"
+#        endif
+#      else
+#        define BLX(R) "mov lr, pc; mov pc," #R "\n"
+#      endif
 
   __asm__ __volatile__(
-                       /* %r0 = syscall(%r7 = SYSCALL(clone),
-                        *               %r0 = flags,
-                        *               %r1 = child_stack,
-                        *               %r2 = parent_tidptr,
-                        *               %r3  = new_tls,
-                        *               %r4 = child_tidptr)
-                        */
-
-                       /* Do the system call */
-                       "swi 0x0\n"
-
-                       /* if (%r0 != 0)
-                        *   return %r0;
-                        */
-                       "cmp r0, #0\n"
-                       "bne 1f\n"
-
-                       /* In the child, now. Call "fn(arg)". */
-                       "ldr r0, [sp, #4]\n"
-                       "ldr ip, [sp], #8\n"
-                       BLX(ip)
-                       /* Call _exit(%r0). */
-                       "mov r7, %7\n"
-                       "swi 0x0\n"
-                       "1:\n"
-                       "mov %0, r0\n"
-                       : "=r"(res)
-                       : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r7),
-                         "i"(__NR_exit)
-                       : "memory");
+      /* %r0 = syscall(%r7 = SYSCALL(clone),
+       *               %r0 = flags,
+       *               %r1 = child_stack,
+       *               %r2 = parent_tidptr,
+       *               %r3  = new_tls,
+       *               %r4 = child_tidptr)
+       */
+
+      /* Do the system call */
+      "swi 0x0\n"
+
+      /* if (%r0 != 0)
+       *   return %r0;
+       */
+      "cmp r0, #0\n"
+      "bne 1f\n"
+
+      /* In the child, now. Call "fn(arg)". */
+      "ldr r0, [sp, #4]\n"
+      "ldr ip, [sp], #8\n" BLX(ip)
+      /* Call _exit(%r0). */
+      "mov r7, %7\n"
+      "swi 0x0\n"
+      "1:\n"
+      "mov %0, r0\n"
+      : "=r"(res)
+      : "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r7), "i"(__NR_exit)
+      : "memory");
   return res;
 }
-#endif
-#endif  // SANITIZER_LINUX
+#    endif
+#  endif  // SANITIZER_LINUX
 
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
 int internal_uname(struct utsname *buf) {
   return internal_syscall(SYSCALL(uname), buf);
 }
-#endif
+#  endif
 
-#if SANITIZER_ANDROID
-#if __ANDROID_API__ < 21
+#  if SANITIZER_ANDROID
+#    if __ANDROID_API__ < 21
 extern "C" __attribute__((weak)) int dl_iterate_phdr(
     int (*)(struct dl_phdr_info *, size_t, void *), void *);
-#endif
+#    endif
 
 static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
                                    void *data) {
@@ -1817,40 +1828,41 @@ static int dl_iterate_phdr_test_cb(struct dl_phdr_info *info, size_t size,
 static atomic_uint32_t android_api_level;
 
 static AndroidApiLevel AndroidDetectApiLevelStatic() {
-#if __ANDROID_API__ <= 19
+#    if __ANDROID_API__ <= 19
   return ANDROID_KITKAT;
-#elif __ANDROID_API__ <= 22
+#    elif __ANDROID_API__ <= 22
   return ANDROID_LOLLIPOP_MR1;
-#else
+#    else
   return ANDROID_POST_LOLLIPOP;
-#endif
+#    endif
 }
 
 static AndroidApiLevel AndroidDetectApiLevel() {
   if (!&dl_iterate_phdr)
-    return ANDROID_KITKAT; // K or lower
+    return ANDROID_KITKAT;  // K or lower
   bool base_name_seen = false;
   dl_iterate_phdr(dl_iterate_phdr_test_cb, &base_name_seen);
   if (base_name_seen)
-    return ANDROID_LOLLIPOP_MR1; // L MR1
+    return ANDROID_LOLLIPOP_MR1;  // L MR1
   return ANDROID_POST_LOLLIPOP;   // post-L
   // Plain L (API level 21) is completely broken wrt ASan and not very
   // interesting to detect.
 }
 
-extern "C" __attribute__((weak)) void* _DYNAMIC;
+extern "C" __attribute__((weak)) void *_DYNAMIC;
 
 AndroidApiLevel AndroidGetApiLevel() {
   AndroidApiLevel level =
       (AndroidApiLevel)atomic_load(&android_api_level, memory_order_relaxed);
-  if (level) return level;
+  if (level)
+    return level;
   level = &_DYNAMIC == nullptr ? AndroidDetectApiLevelStatic()
                                : AndroidDetectApiLevel();
   atomic_store(&android_api_level, level, memory_order_relaxed);
   return level;
 }
 
-#endif
+#  endif
 
 static HandleSignalMode GetHandleSignalModeImpl(int signum) {
   switch (signum) {
@@ -1877,28 +1889,28 @@ HandleSignalMode GetHandleSignalMode(int signum) {
   return result;
 }
 
-#if !SANITIZER_GO
+#  if !SANITIZER_GO
 void *internal_start_thread(void *(*func)(void *arg), void *arg) {
-  if (&real_pthread_create == 0)
+  if (&internal_pthread_create == 0)
     return nullptr;
   // Start the thread with signals blocked, otherwise it can steal user signals.
   ScopedBlockSignals block(nullptr);
   void *th;
-  real_pthread_create(&th, nullptr, func, arg);
+  internal_pthread_create(&th, nullptr, func, arg);
   return th;
 }
 
 void internal_join_thread(void *th) {
-  if (&real_pthread_join)
-    real_pthread_join(th, nullptr);
+  if (&internal_pthread_join)
+    internal_pthread_join(th, nullptr);
 }
-#else
+#  else
 void *internal_start_thread(void *(*func)(void *), void *arg) { return 0; }
 
 void internal_join_thread(void *th) {}
-#endif
+#  endif
 
-#if SANITIZER_LINUX && defined(__aarch64__)
+#  if SANITIZER_LINUX && defined(__aarch64__)
 // Android headers in the older NDK releases miss this definition.
 struct __sanitizer_esr_context {
   struct _aarch64_ctx head;
@@ -1910,7 +1922,8 @@ static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
   u8 *aux = reinterpret_cast<u8 *>(ucontext->uc_mcontext.__reserved);
   while (true) {
     _aarch64_ctx *ctx = (_aarch64_ctx *)aux;
-    if (ctx->size == 0) break;
+    if (ctx->size == 0)
+      break;
     if (ctx->magic == kEsrMagic) {
       *esr = ((__sanitizer_esr_context *)ctx)->esr;
       return true;
@@ -1919,31 +1932,29 @@ static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
   }
   return false;
 }
-#elif SANITIZER_FREEBSD && defined(__aarch64__)
+#  elif SANITIZER_FREEBSD && defined(__aarch64__)
 // FreeBSD doesn't provide ESR in the ucontext.
-static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) {
-  return false;
-}
-#endif
+static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { return false; }
+#  endif
 
 using Context = ucontext_t;
 
 SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
   Context *ucontext = (Context *)context;
-#if defined(__x86_64__) || defined(__i386__)
+#  if defined(__x86_64__) || defined(__i386__)
   static const uptr PF_WRITE = 1U << 1;
-#if SANITIZER_FREEBSD
+#    if SANITIZER_FREEBSD
   uptr err = ucontext->uc_mcontext.mc_err;
-#elif SANITIZER_NETBSD
+#    elif SANITIZER_NETBSD
   uptr err = ucontext->uc_mcontext.__gregs[_REG_ERR];
-#elif SANITIZER_SOLARIS && defined(__i386__)
+#    elif SANITIZER_SOLARIS && defined(__i386__)
   const int Err = 13;
   uptr err = ucontext->uc_mcontext.gregs[Err];
-#else
+#    else
   uptr err = ucontext->uc_mcontext.gregs[REG_ERR];
-#endif // SANITIZER_FREEBSD
+#    endif  // SANITIZER_FREEBSD
   return err & PF_WRITE ? Write : Read;
-#elif defined(__mips__)
+#  elif defined(__mips__)
   uint32_t *exception_source;
   uint32_t faulty_instruction;
   uint32_t op_code;
@@ -1959,12 +1970,12 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     case 0x29:  // sh
     case 0x2b:  // sw
     case 0x3f:  // sd
-#if __mips_isa_rev < 6
+#    if __mips_isa_rev < 6
     case 0x2c:  // sdl
     case 0x2d:  // sdr
     case 0x2a:  // swl
     case 0x2e:  // swr
-#endif
+#    endif
       return SignalContext::Write;
 
     case 0x20:  // lb
@@ -1974,14 +1985,14 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     case 0x23:  // lw
     case 0x27:  // lwu
     case 0x37:  // ld
-#if __mips_isa_rev < 6
+#    if __mips_isa_rev < 6
     case 0x1a:  // ldl
     case 0x1b:  // ldr
     case 0x22:  // lwl
     case 0x26:  // lwr
-#endif
+#    endif
       return SignalContext::Read;
-#if __mips_isa_rev == 6
+#    if __mips_isa_rev == 6
     case 0x3b:  // pcrel
       op_code = (faulty_instruction >> 19) & 0x3;
       switch (op_code) {
@@ -1989,50 +2000,51 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
         case 0x2:  // lwupc
           return SignalContext::Read;
       }
-#endif
+#    endif
   }
   return SignalContext::Unknown;
-#elif defined(__arm__)
+#  elif defined(__arm__)
   static const uptr FSR_WRITE = 1U << 11;
   uptr fsr = ucontext->uc_mcontext.error_code;
   return fsr & FSR_WRITE ? Write : Read;
-#elif defined(__aarch64__)
+#  elif defined(__aarch64__)
   static const u64 ESR_ELx_WNR = 1U << 6;
   u64 esr;
-  if (!Aarch64GetESR(ucontext, &esr)) return Unknown;
+  if (!Aarch64GetESR(ucontext, &esr))
+    return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
-#elif defined(__loongarch__)
+#  elif defined(__loongarch__)
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
     return SignalContext::Read;
   if (flags & SC_ADDRERR_WR)
     return SignalContext::Write;
   return SignalContext::Unknown;
-#elif defined(__sparc__)
+#  elif defined(__sparc__)
   // Decode the instruction to determine the access type.
   // From OpenSolaris $SRC/uts/sun4/os/trap.c (get_accesstype).
-#if SANITIZER_SOLARIS
+#    if SANITIZER_SOLARIS
   uptr pc = ucontext->uc_mcontext.gregs[REG_PC];
-#else
+#    else
   // Historical BSDism here.
   struct sigcontext *scontext = (struct sigcontext *)context;
-#if defined(__arch64__)
+#      if defined(__arch64__)
   uptr pc = scontext->sigc_regs.tpc;
-#else
+#      else
   uptr pc = scontext->si_regs.pc;
-#endif
-#endif
+#      endif
+#    endif
   u32 instr = *(u32 *)pc;
-  return (instr >> 21) & 1 ? Write: Read;
-#elif defined(__riscv)
-#if SANITIZER_FREEBSD
+  return (instr >> 21) & 1 ? Write : Read;
+#  elif defined(__riscv)
+#    if SANITIZER_FREEBSD
   unsigned long pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc;
-#else
+#    else
   unsigned long pc = ucontext->uc_mcontext.__gregs[REG_PC];
-#endif
+#    endif
   unsigned faulty_instruction = *(uint16_t *)pc;
 
-#if defined(__riscv_compressed)
+#    if defined(__riscv_compressed)
   if ((faulty_instruction & 0x3) != 0x3) {  // it's a compressed instruction
     // set op_bits to the instruction bits [1, 0, 15, 14, 13]
     unsigned op_bits =
@@ -2040,38 +2052,38 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     unsigned rd = faulty_instruction & 0xF80;  // bits 7-11, inclusive
     switch (op_bits) {
       case 0b10'010:  // c.lwsp (rd != x0)
-#if __riscv_xlen == 64
+#      if __riscv_xlen == 64
       case 0b10'011:  // c.ldsp (rd != x0)
-#endif
+#      endif
         return rd ? SignalContext::Read : SignalContext::Unknown;
       case 0b00'010:  // c.lw
-#if __riscv_flen >= 32 && __riscv_xlen == 32
+#      if __riscv_flen >= 32 && __riscv_xlen == 32
       case 0b10'011:  // c.flwsp
-#endif
-#if __riscv_flen >= 32 || __riscv_xlen == 64
+#      endif
+#      if __riscv_flen >= 32 || __riscv_xlen == 64
       case 0b00'011:  // c.flw / c.ld
-#endif
-#if __riscv_flen == 64
+#      endif
+#      if __riscv_flen == 64
       case 0b00'001:  // c.fld
       case 0b10'001:  // c.fldsp
-#endif
+#      endif
         return SignalContext::Read;
       case 0b00'110:  // c.sw
       case 0b10'110:  // c.swsp
-#if __riscv_flen >= 32 || __riscv_xlen == 64
+#      if __riscv_flen >= 32 || __riscv_xlen == 64
       case 0b00'111:  // c.fsw / c.sd
       case 0b10'111:  // c.fswsp / c.sdsp
-#endif
-#if __riscv_flen == 64
+#      endif
+#      if __riscv_flen == 64
       case 0b00'101:  // c.fsd
       case 0b10'101:  // c.fsdsp
-#endif
+#      endif
         return SignalContext::Write;
       default:
         return SignalContext::Unknown;
     }
   }
-#endif
+#    endif
 
   unsigned opcode = faulty_instruction & 0x7f;         // lower 7 bits
   unsigned funct3 = (faulty_instruction >> 12) & 0x7;  // bits 12-14, inclusive
@@ -2081,9 +2093,9 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
         case 0b000:  // lb
         case 0b001:  // lh
         case 0b010:  // lw
-#if __riscv_xlen == 64
+#    if __riscv_xlen == 64
         case 0b011:  // ld
-#endif
+#    endif
         case 0b100:  // lbu
         case 0b101:  // lhu
           return SignalContext::Read;
@@ -2095,20 +2107,20 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
         case 0b000:  // sb
         case 0b001:  // sh
         case 0b010:  // sw
-#if __riscv_xlen == 64
+#    if __riscv_xlen == 64
         case 0b011:  // sd
-#endif
+#    endif
           return SignalContext::Write;
         default:
           return SignalContext::Unknown;
       }
-#if __riscv_flen >= 32
+#    if __riscv_flen >= 32
     case 0b0000111:  // floating-point loads
       switch (funct3) {
         case 0b010:  // flw
-#if __riscv_flen == 64
+#      if __riscv_flen == 64
         case 0b011:  // fld
-#endif
+#      endif
           return SignalContext::Read;
         default:
           return SignalContext::Unknown;
@@ -2116,21 +2128,21 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const {
     case 0b0100111:  // floating-point stores
       switch (funct3) {
         case 0b010:  // fsw
-#if __riscv_flen == 64
+#      if __riscv_flen == 64
         case 0b011:  // fsd
-#endif
+#      endif
           return SignalContext::Write;
         default:
           return SignalContext::Unknown;
       }
-#endif
+#    endif
     default:
       return SignalContext::Unknown;
   }
-#else
+#  else
   (void)ucontext;
   return Unknown;  // FIXME: Implement.
-#endif
+#  endif
 }
 
 bool SignalContext::IsTrueFaultingAddress() const {
@@ -2139,129 +2151,288 @@ bool SignalContext::IsTrueFaultingAddress() const {
   return si->si_signo == SIGSEGV && si->si_code != 128;
 }
 
+UNUSED
+static const char *RegNumToRegName(int reg) {
+  switch (reg) {
+#  if SANITIZER_LINUX
+#    if defined(__x86_64__)
+    case REG_RAX:
+      return "rax";
+    case REG_RBX:
+      return "rbx";
+    case REG_RCX:
+      return "rcx";
+    case REG_RDX:
+      return "rdx";
+    case REG_RDI:
+      return "rdi";
+    case REG_RSI:
+      return "rsi";
+    case REG_RBP:
+      return "rbp";
+    case REG_RSP:
+      return "rsp";
+    case REG_R8:
+      return "r8";
+    case REG_R9:
+      return "r9";
+    case REG_R10:
+      return "r10";
+    case REG_R11:
+      return "r11";
+    case REG_R12:
+      return "r12";
+    case REG_R13:
+      return "r13";
+    case REG_R14:
+      return "r14";
+    case REG_R15:
+      return "r15";
+#    elif defined(__i386__)
+    case REG_EAX:
+      return "eax";
+    case REG_EBX:
+      return "ebx";
+    case REG_ECX:
+      return "ecx";
+    case REG_EDX:
+      return "edx";
+    case REG_EDI:
+      return "edi";
+    case REG_ESI:
+      return "esi";
+    case REG_EBP:
+      return "ebp";
+    case REG_ESP:
+      return "esp";
+#    endif
+#  endif
+    default:
+      return NULL;
+  }
+  return NULL;
+}
+
+#  if SANITIZER_LINUX
+UNUSED
+static void DumpSingleReg(ucontext_t *ctx, int RegNum) {
+  const char *RegName = RegNumToRegName(RegNum);
+#    if defined(__x86_64__)
+  Printf("%s%s = 0x%016llx  ", internal_strlen(RegName) == 2 ? " " : "",
+         RegName, ctx->uc_mcontext.gregs[RegNum]);
+#    elif defined(__i386__)
+  Printf("%s = 0x%08x  ", RegName, ctx->uc_mcontext.gregs[RegNum]);
+#    else
+  (void)RegName;
+#    endif
+}
+#  endif
+
 void SignalContext::DumpAllRegisters(void *context) {
-  // FIXME: Implement this.
+  ucontext_t *ucontext = (ucontext_t *)context;
+#  if SANITIZER_LINUX
+#    if defined(__x86_64__)
+  Report("Register values:\n");
+  DumpSingleReg(ucontext, REG_RAX);
+  DumpSingleReg(ucontext, REG_RBX);
+  DumpSingleReg(ucontext, REG_RCX);
+  DumpSingleReg(ucontext, REG_RDX);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_RDI);
+  DumpSingleReg(ucontext, REG_RSI);
+  DumpSingleReg(ucontext, REG_RBP);
+  DumpSingleReg(ucontext, REG_RSP);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_R8);
+  DumpSingleReg(ucontext, REG_R9);
+  DumpSingleReg(ucontext, REG_R10);
+  DumpSingleReg(ucontext, REG_R11);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_R12);
+  DumpSingleReg(ucontext, REG_R13);
+  DumpSingleReg(ucontext, REG_R14);
+  DumpSingleReg(ucontext, REG_R15);
+  Printf("\n");
+#    elif defined(__i386__)
+  // Duplication of this report print is caused by partial support
+  // of register values dumping. In case of unsupported yet architecture let's
+  // avoid printing 'Register values:' without actual values in the following
+  // output.
+  Report("Register values:\n");
+  DumpSingleReg(ucontext, REG_EAX);
+  DumpSingleReg(ucontext, REG_EBX);
+  DumpSingleReg(ucontext, REG_ECX);
+  DumpSingleReg(ucontext, REG_EDX);
+  Printf("\n");
+  DumpSingleReg(ucontext, REG_EDI);
+  DumpSingleReg(ucontext, REG_ESI);
+  DumpSingleReg(ucontext, REG_EBP);
+  DumpSingleReg(ucontext, REG_ESP);
+  Printf("\n");
+#    else
+  (void)ucontext;
+#    endif
+#  elif SANITIZER_FREEBSD
+#    if defined(__x86_64__)
+  Report("Register values:\n");
+  Printf("rax = 0x%016lx  ", ucontext->uc_mcontext.mc_rax);
+  Printf("rbx = 0x%016lx  ", ucontext->uc_mcontext.mc_rbx);
+  Printf("rcx = 0x%016lx  ", ucontext->uc_mcontext.mc_rcx);
+  Printf("rdx = 0x%016lx  ", ucontext->uc_mcontext.mc_rdx);
+  Printf("\n");
+  Printf("rdi = 0x%016lx  ", ucontext->uc_mcontext.mc_rdi);
+  Printf("rsi = 0x%016lx  ", ucontext->uc_mcontext.mc_rsi);
+  Printf("rbp = 0x%016lx  ", ucontext->uc_mcontext.mc_rbp);
+  Printf("rsp = 0x%016lx  ", ucontext->uc_mcontext.mc_rsp);
+  Printf("\n");
+  Printf(" r8 = 0x%016lx  ", ucontext->uc_mcontext.mc_r8);
+  Printf(" r9 = 0x%016lx  ", ucontext->uc_mcontext.mc_r9);
+  Printf("r10 = 0x%016lx  ", ucontext->uc_mcontext.mc_r10);
+  Printf("r11 = 0x%016lx  ", ucontext->uc_mcontext.mc_r11);
+  Printf("\n");
+  Printf("r12 = 0x%016lx  ", ucontext->uc_mcontext.mc_r12);
+  Printf("r13 = 0x%016lx  ", ucontext->uc_mcontext.mc_r13);
+  Printf("r14 = 0x%016lx  ", ucontext->uc_mcontext.mc_r14);
+  Printf("r15 = 0x%016lx  ", ucontext->uc_mcontext.mc_r15);
+  Printf("\n");
+#    elif defined(__i386__)
+  Report("Register values:\n");
+  Printf("eax = 0x%08x  ", ucontext->uc_mcontext.mc_eax);
+  Printf("ebx = 0x%08x  ", ucontext->uc_mcontext.mc_ebx);
+  Printf("ecx = 0x%08x  ", ucontext->uc_mcontext.mc_ecx);
+  Printf("edx = 0x%08x  ", ucontext->uc_mcontext.mc_edx);
+  Printf("\n");
+  Printf("edi = 0x%08x  ", ucontext->uc_mcontext.mc_edi);
+  Printf("esi = 0x%08x  ", ucontext->uc_mcontext.mc_esi);
+  Printf("ebp = 0x%08x  ", ucontext->uc_mcontext.mc_ebp);
+  Printf("esp = 0x%08x  ", ucontext->uc_mcontext.mc_esp);
+  Printf("\n");
+#    else
+  (void)ucontext;
+#    endif
+#  endif
+  // FIXME: Implement this for other OSes and architectures.
 }
 
 static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
-#if SANITIZER_NETBSD
+#  if SANITIZER_NETBSD
   // This covers all NetBSD architectures
   ucontext_t *ucontext = (ucontext_t *)context;
   *pc = _UC_MACHINE_PC(ucontext);
   *bp = _UC_MACHINE_FP(ucontext);
   *sp = _UC_MACHINE_SP(ucontext);
-#elif defined(__arm__)
-  ucontext_t *ucontext = (ucontext_t*)context;
+#  elif defined(__arm__)
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.arm_pc;
   *bp = ucontext->uc_mcontext.arm_fp;
   *sp = ucontext->uc_mcontext.arm_sp;
-#elif defined(__aarch64__)
-# if SANITIZER_FREEBSD
-  ucontext_t *ucontext = (ucontext_t*)context;
+#  elif defined(__aarch64__)
+#    if SANITIZER_FREEBSD
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.mc_gpregs.gp_elr;
   *bp = ucontext->uc_mcontext.mc_gpregs.gp_x[29];
   *sp = ucontext->uc_mcontext.mc_gpregs.gp_sp;
-# else
-  ucontext_t *ucontext = (ucontext_t*)context;
+#    else
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.regs[29];
   *sp = ucontext->uc_mcontext.sp;
-# endif
-#elif defined(__hppa__)
-  ucontext_t *ucontext = (ucontext_t*)context;
+#    endif
+#  elif defined(__hppa__)
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.sc_iaoq[0];
   /* GCC uses %r3 whenever a frame pointer is needed.  */
   *bp = ucontext->uc_mcontext.sc_gr[3];
   *sp = ucontext->uc_mcontext.sc_gr[30];
-#elif defined(__x86_64__)
-# if SANITIZER_FREEBSD
-  ucontext_t *ucontext = (ucontext_t*)context;
+#  elif defined(__x86_64__)
+#    if SANITIZER_FREEBSD
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.mc_rip;
   *bp = ucontext->uc_mcontext.mc_rbp;
   *sp = ucontext->uc_mcontext.mc_rsp;
-# else
-  ucontext_t *ucontext = (ucontext_t*)context;
+#    else
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.gregs[REG_RIP];
   *bp = ucontext->uc_mcontext.gregs[REG_RBP];
   *sp = ucontext->uc_mcontext.gregs[REG_RSP];
-# endif
-#elif defined(__i386__)
-# if SANITIZER_FREEBSD
-  ucontext_t *ucontext = (ucontext_t*)context;
+#    endif
+#  elif defined(__i386__)
+#    if SANITIZER_FREEBSD
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.mc_eip;
   *bp = ucontext->uc_mcontext.mc_ebp;
   *sp = ucontext->uc_mcontext.mc_esp;
-# else
-  ucontext_t *ucontext = (ucontext_t*)context;
-# if SANITIZER_SOLARIS
+#    else
+  ucontext_t *ucontext = (ucontext_t *)context;
+#      if SANITIZER_SOLARIS
   /* Use the numeric values: the symbolic ones are undefined by llvm
      include/llvm/Support/Solaris.h.  */
-# ifndef REG_EIP
-#  define REG_EIP 14 // REG_PC
-# endif
-# ifndef REG_EBP
-#  define REG_EBP  6 // REG_FP
-# endif
-# ifndef REG_UESP
-#  define REG_UESP 17 // REG_SP
-# endif
-# endif
+#        ifndef REG_EIP
+#          define REG_EIP 14  // REG_PC
+#        endif
+#        ifndef REG_EBP
+#          define REG_EBP 6  // REG_FP
+#        endif
+#        ifndef REG_UESP
+#          define REG_UESP 17  // REG_SP
+#        endif
+#      endif
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
   *sp = ucontext->uc_mcontext.gregs[REG_UESP];
-# endif
-#elif defined(__powerpc__) || defined(__powerpc64__)
+#    endif
+#  elif defined(__powerpc__) || defined(__powerpc64__)
 #    if SANITIZER_FREEBSD
   ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.mc_srr0;
   *sp = ucontext->uc_mcontext.mc_frame[1];
   *bp = ucontext->uc_mcontext.mc_frame[31];
 #    else
-  ucontext_t *ucontext = (ucontext_t*)context;
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.regs->nip;
   *sp = ucontext->uc_mcontext.regs->gpr[PT_R1];
   // The powerpc{,64}-linux ABIs do not specify r31 as the frame
   // pointer, but GCC always uses r31 when we need a frame pointer.
   *bp = ucontext->uc_mcontext.regs->gpr[PT_R31];
 #    endif
-#elif defined(__sparc__)
-#if defined(__arch64__) || defined(__sparcv9)
-#define STACK_BIAS 2047
-#else
-#define STACK_BIAS 0
-# endif
-# if SANITIZER_SOLARIS
+#  elif defined(__sparc__)
+#    if defined(__arch64__) || defined(__sparcv9)
+#      define STACK_BIAS 2047
+#    else
+#      define STACK_BIAS 0
+#    endif
+#    if SANITIZER_SOLARIS
   ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.gregs[REG_PC];
   *sp = ucontext->uc_mcontext.gregs[REG_O6] + STACK_BIAS;
-#else
+#    else
   // Historical BSDism here.
   struct sigcontext *scontext = (struct sigcontext *)context;
-#if defined(__arch64__)
+#      if defined(__arch64__)
   *pc = scontext->sigc_regs.tpc;
   *sp = scontext->sigc_regs.u_regs[14] + STACK_BIAS;
-#else
+#      else
   *pc = scontext->si_regs.pc;
   *sp = scontext->si_regs.u_regs[14];
-#endif
-# endif
+#      endif
+#    endif
   *bp = (uptr)((uhwptr *)*sp)[14] + STACK_BIAS;
-#elif defined(__mips__)
-  ucontext_t *ucontext = (ucontext_t*)context;
+#  elif defined(__mips__)
+  ucontext_t *ucontext = (ucontext_t *)context;
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.gregs[30];
   *sp = ucontext->uc_mcontext.gregs[29];
-#elif defined(__s390__)
-  ucontext_t *ucontext = (ucontext_t*)context;
-# if defined(__s390x__)
+#  elif defined(__s390__)
+  ucontext_t *ucontext = (ucontext_t *)context;
+#    if defined(__s390x__)
   *pc = ucontext->uc_mcontext.psw.addr;
-# else
+#    else
   *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff;
-# endif
+#    endif
   *bp = ucontext->uc_mcontext.gregs[11];
   *sp = ucontext->uc_mcontext.gregs[15];
-#elif defined(__riscv)
-  ucontext_t *ucontext = (ucontext_t*)context;
+#  elif defined(__riscv)
+  ucontext_t *ucontext = (ucontext_t *)context;
 #    if SANITIZER_FREEBSD
   *pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc;
   *bp = ucontext->uc_mcontext.mc_gpregs.gp_s[0];
@@ -2293,7 +2464,7 @@ void InitializePlatformEarly() {
 }
 
 void CheckASLR() {
-#if SANITIZER_NETBSD
+#  if SANITIZER_NETBSD
   int mib[3];
   int paxflags;
   uptr len = sizeof(paxflags);
@@ -2308,12 +2479,13 @@ void CheckASLR() {
   }
 
   if (UNLIKELY(paxflags & CTL_PROC_PAXFLAGS_ASLR)) {
-    Printf("This sanitizer is not compatible with enabled ASLR.\n"
-           "To disable ASLR, please run \"paxctl +a %s\" and try again.\n",
-           GetArgv()[0]);
+    Printf(
+        "This sanitizer is not compatible with enabled ASLR.\n"
+        "To disable ASLR, please run \"paxctl +a %s\" and try again.\n",
+        GetArgv()[0]);
     Die();
   }
-#elif SANITIZER_FREEBSD
+#  elif SANITIZER_FREEBSD
   int aslr_status;
   int r = internal_procctl(P_PID, 0, PROC_ASLR_STATUS, &aslr_status);
   if (UNLIKELY(r == -1)) {
@@ -2323,9 +2495,13 @@ void CheckASLR() {
     return;
   }
   if ((aslr_status & PROC_ASLR_ACTIVE) != 0) {
-    Printf("This sanitizer is not compatible with enabled ASLR "
-           "and binaries compiled with PIE\n");
-    Die();
+    VReport(1,
+            "This sanitizer is not compatible with enabled ASLR "
+            "and binaries compiled with PIE\n"
+            "ASLR will be disabled and the program re-executed.\n");
+    int aslr_ctl = PROC_ASLR_FORCE_DISABLE;
+    CHECK_NE(internal_procctl(P_PID, 0, PROC_ASLR_CTL, &aslr_ctl), -1);
+    ReExec();
   }
 #  elif SANITIZER_PPC64V2
   // Disable ASLR for Linux PPC64LE.
@@ -2345,7 +2521,7 @@ void CheckASLR() {
 }
 
 void CheckMPROTECT() {
-#if SANITIZER_NETBSD
+#  if SANITIZER_NETBSD
   int mib[3];
   int paxflags;
   uptr len = sizeof(paxflags);
@@ -2363,13 +2539,13 @@ void CheckMPROTECT() {
     Printf("This sanitizer is not compatible with enabled MPROTECT\n");
     Die();
   }
-#else
+#  else
   // Do nothing
-#endif
+#  endif
 }
 
 void CheckNoDeepBind(const char *filename, int flag) {
-#ifdef RTLD_DEEPBIND
+#  ifdef RTLD_DEEPBIND
   if (flag & RTLD_DEEPBIND) {
     Report(
         "You are trying to dlopen a %s shared library with RTLD_DEEPBIND flag"
@@ -2380,7 +2556,7 @@ void CheckNoDeepBind(const char *filename, int flag) {
         filename, filename);
     Die();
   }
-#endif
+#  endif
 }
 
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
@@ -2393,16 +2569,16 @@ uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
 bool GetRandom(void *buffer, uptr length, bool blocking) {
   if (!buffer || !length || length > 256)
     return false;
-#if SANITIZER_USE_GETENTROPY
+#  if SANITIZER_USE_GETENTROPY
   uptr rnd = getentropy(buffer, length);
   int rverrno = 0;
   if (internal_iserror(rnd, &rverrno) && rverrno == EFAULT)
     return false;
   else if (rnd == 0)
     return true;
-#endif // SANITIZER_USE_GETENTROPY
+#  endif  // SANITIZER_USE_GETENTROPY
 
-#if SANITIZER_USE_GETRANDOM
+#  if SANITIZER_USE_GETRANDOM
   static atomic_uint8_t skip_getrandom_syscall;
   if (!atomic_load_relaxed(&skip_getrandom_syscall)) {
     // Up to 256 bytes, getrandom will not be interrupted.
@@ -2414,7 +2590,7 @@ bool GetRandom(void *buffer, uptr length, bool blocking) {
     else if (res == length)
       return true;
   }
-#endif // SANITIZER_USE_GETRANDOM
+#  endif  // SANITIZER_USE_GETRANDOM
   // Up to 256 bytes, a read off /dev/urandom will not be interrupted.
   // blocking is moot here, O_NONBLOCK has no effect when opening /dev/urandom.
   uptr fd = internal_open("/dev/urandom", O_RDONLY);
@@ -2427,6 +2603,6 @@ bool GetRandom(void *buffer, uptr length, bool blocking) {
   return true;
 }
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
 
 #endif
lib/tsan/sanitizer_common/sanitizer_linux.h
@@ -13,15 +13,15 @@
 #define SANITIZER_LINUX_H
 
 #include "sanitizer_platform.h"
-#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD ||                \
+#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
     SANITIZER_SOLARIS
-#include "sanitizer_common.h"
-#include "sanitizer_internal_defs.h"
-#include "sanitizer_platform_limits_freebsd.h"
-#include "sanitizer_platform_limits_netbsd.h"
-#include "sanitizer_platform_limits_posix.h"
-#include "sanitizer_platform_limits_solaris.h"
-#include "sanitizer_posix.h"
+#  include "sanitizer_common.h"
+#  include "sanitizer_internal_defs.h"
+#  include "sanitizer_platform_limits_freebsd.h"
+#  include "sanitizer_platform_limits_netbsd.h"
+#  include "sanitizer_platform_limits_posix.h"
+#  include "sanitizer_platform_limits_solaris.h"
+#  include "sanitizer_posix.h"
 
 struct link_map;  // Opaque type returned by dlopen().
 struct utsname;
@@ -46,9 +46,9 @@ void ReadProcMaps(ProcSelfMapsBuff *proc_maps);
 
 // Syscall wrappers.
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count);
-uptr internal_sigaltstack(const void* ss, void* oss);
+uptr internal_sigaltstack(const void *ss, void *oss);
 uptr internal_sigprocmask(int how, __sanitizer_sigset_t *set,
-    __sanitizer_sigset_t *oldset);
+                          __sanitizer_sigset_t *oldset);
 
 void SetSigProcMask(__sanitizer_sigset_t *set, __sanitizer_sigset_t *oldset);
 void BlockSignals(__sanitizer_sigset_t *oldset = nullptr);
@@ -65,10 +65,10 @@ struct ScopedBlockSignals {
 
 #  if SANITIZER_GLIBC
 uptr internal_clock_gettime(__sanitizer_clockid_t clk_id, void *tp);
-#endif
+#  endif
 
 // Linux-only syscalls.
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
 uptr internal_prctl(int option, uptr arg2, uptr arg3, uptr arg4, uptr arg5);
 #    if defined(__x86_64__)
 uptr internal_arch_prctl(int option, uptr arg2);
@@ -83,15 +83,15 @@ void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
         defined(__arm__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr);
-#endif
+#    endif
 int internal_uname(struct utsname *buf);
-#elif SANITIZER_FREEBSD
+#  elif SANITIZER_FREEBSD
 uptr internal_procctl(int type, int id, int cmd, void *data);
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
-#elif SANITIZER_NETBSD
+#  elif SANITIZER_NETBSD
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg);
-#endif  // SANITIZER_LINUX
+#  endif  // SANITIZER_LINUX
 
 // This class reads thread IDs from /proc/<pid>/task using only syscalls.
 class ThreadLister {
@@ -135,36 +135,60 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) {
   ReleaseMemoryPagesToOS(beg, end);
 }
 
-#if SANITIZER_ANDROID
-
-#if defined(__aarch64__)
-# define __get_tls() \
-    ({ void** __v; __asm__("mrs %0, tpidr_el0" : "=r"(__v)); __v; })
-#elif defined(__arm__)
-# define __get_tls() \
-    ({ void** __v; __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__v)); __v; })
-#elif defined(__mips__)
+#  if SANITIZER_ANDROID
+
+#    if defined(__aarch64__)
+#      define __get_tls()                           \
+        ({                                          \
+          void **__v;                               \
+          __asm__("mrs %0, tpidr_el0" : "=r"(__v)); \
+          __v;                                      \
+        })
+#    elif defined(__arm__)
+#      define __get_tls()                                    \
+        ({                                                   \
+          void **__v;                                        \
+          __asm__("mrc p15, 0, %0, c13, c0, 3" : "=r"(__v)); \
+          __v;                                               \
+        })
+#    elif defined(__mips__)
 // On mips32r1, this goes via a kernel illegal instruction trap that's
 // optimized for v1.
-# define __get_tls() \
-    ({ register void** __v asm("v1"); \
-       __asm__(".set    push\n" \
-               ".set    mips32r2\n" \
-               "rdhwr   %0,$29\n" \
-               ".set    pop\n" : "=r"(__v)); \
-       __v; })
-#elif defined (__riscv)
-# define __get_tls() \
-    ({ void** __v; __asm__("mv %0, tp" : "=r"(__v)); __v; })
-#elif defined(__i386__)
-# define __get_tls() \
-    ({ void** __v; __asm__("movl %%gs:0, %0" : "=r"(__v)); __v; })
-#elif defined(__x86_64__)
-# define __get_tls() \
-    ({ void** __v; __asm__("mov %%fs:0, %0" : "=r"(__v)); __v; })
-#else
-#error "Unsupported architecture."
-#endif
+#      define __get_tls()                \
+        ({                               \
+          register void **__v asm("v1"); \
+          __asm__(                       \
+              ".set    push\n"           \
+              ".set    mips32r2\n"       \
+              "rdhwr   %0,$29\n"         \
+              ".set    pop\n"            \
+              : "=r"(__v));              \
+          __v;                           \
+        })
+#    elif defined(__riscv)
+#      define __get_tls()                   \
+        ({                                  \
+          void **__v;                       \
+          __asm__("mv %0, tp" : "=r"(__v)); \
+          __v;                              \
+        })
+#    elif defined(__i386__)
+#      define __get_tls()                         \
+        ({                                        \
+          void **__v;                             \
+          __asm__("movl %%gs:0, %0" : "=r"(__v)); \
+          __v;                                    \
+        })
+#    elif defined(__x86_64__)
+#      define __get_tls()                        \
+        ({                                       \
+          void **__v;                            \
+          __asm__("mov %%fs:0, %0" : "=r"(__v)); \
+          __v;                                   \
+        })
+#    else
+#      error "Unsupported architecture."
+#    endif
 
 // The Android Bionic team has allocated a TLS slot for sanitizers starting
 // with Q, given that Android currently doesn't support ELF TLS. It is used to
@@ -175,7 +199,7 @@ ALWAYS_INLINE uptr *get_android_tls_ptr() {
   return reinterpret_cast<uptr *>(&__get_tls()[TLS_SLOT_SANITIZER]);
 }
 
-#endif  // SANITIZER_ANDROID
+#  endif  // SANITIZER_ANDROID
 
 }  // namespace __sanitizer
 
lib/tsan/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -16,89 +16,101 @@
 #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
     SANITIZER_SOLARIS
 
-#include "sanitizer_allocator_internal.h"
-#include "sanitizer_atomic.h"
-#include "sanitizer_common.h"
-#include "sanitizer_file.h"
-#include "sanitizer_flags.h"
-#include "sanitizer_freebsd.h"
-#include "sanitizer_getauxval.h"
-#include "sanitizer_glibc_version.h"
-#include "sanitizer_linux.h"
-#include "sanitizer_placement_new.h"
-#include "sanitizer_procmaps.h"
-#include "sanitizer_solaris.h"
-
-#if SANITIZER_NETBSD
-#define _RTLD_SOURCE  // for __lwp_gettcb_fast() / __lwp_getprivate_fast()
-#endif
+#  include "sanitizer_allocator_internal.h"
+#  include "sanitizer_atomic.h"
+#  include "sanitizer_common.h"
+#  include "sanitizer_file.h"
+#  include "sanitizer_flags.h"
+#  include "sanitizer_getauxval.h"
+#  include "sanitizer_glibc_version.h"
+#  include "sanitizer_linux.h"
+#  include "sanitizer_placement_new.h"
+#  include "sanitizer_procmaps.h"
+#  include "sanitizer_solaris.h"
+
+#  if SANITIZER_NETBSD
+#    define _RTLD_SOURCE  // for __lwp_gettcb_fast() / __lwp_getprivate_fast()
+#  endif
 
-#include <dlfcn.h>  // for dlsym()
-#include <link.h>
-#include <pthread.h>
-#include <signal.h>
-#include <sys/mman.h>
-#include <sys/resource.h>
-#include <syslog.h>
+#  include <dlfcn.h>  // for dlsym()
+#  include <link.h>
+#  include <pthread.h>
+#  include <signal.h>
+#  include <sys/mman.h>
+#  include <sys/resource.h>
+#  include <syslog.h>
 
-#if !defined(ElfW)
-#define ElfW(type) Elf_##type
-#endif
+#  if !defined(ElfW)
+#    define ElfW(type) Elf_##type
+#  endif
 
-#if SANITIZER_FREEBSD
-#include <pthread_np.h>
-#include <osreldate.h>
-#include <sys/sysctl.h>
-#define pthread_getattr_np pthread_attr_get_np
+#  if SANITIZER_FREEBSD
+#    include <pthread_np.h>
+#    include <sys/auxv.h>
+#    include <sys/sysctl.h>
+#    define pthread_getattr_np pthread_attr_get_np
 // The MAP_NORESERVE define has been removed in FreeBSD 11.x, and even before
 // that, it was never implemented. So just define it to zero.
-#undef MAP_NORESERVE
-#define MAP_NORESERVE 0
-#endif
+#    undef MAP_NORESERVE
+#    define MAP_NORESERVE 0
+extern const Elf_Auxinfo *__elf_aux_vector;
+extern "C" int __sys_sigaction(int signum, const struct sigaction *act,
+                               struct sigaction *oldact);
+#  endif
 
-#if SANITIZER_NETBSD
-#include <sys/sysctl.h>
-#include <sys/tls.h>
-#include <lwp.h>
-#endif
+#  if SANITIZER_NETBSD
+#    include <lwp.h>
+#    include <sys/sysctl.h>
+#    include <sys/tls.h>
+#  endif
 
-#if SANITIZER_SOLARIS
-#include <stddef.h>
-#include <stdlib.h>
-#include <thread.h>
-#endif
+#  if SANITIZER_SOLARIS
+#    include <stddef.h>
+#    include <stdlib.h>
+#    include <thread.h>
+#  endif
 
-#if SANITIZER_ANDROID
-#include <android/api-level.h>
-#if !defined(CPU_COUNT) && !defined(__aarch64__)
-#include <dirent.h>
-#include <fcntl.h>
+#  if SANITIZER_ANDROID
+#    include <android/api-level.h>
+#    if !defined(CPU_COUNT) && !defined(__aarch64__)
+#      include <dirent.h>
+#      include <fcntl.h>
 struct __sanitizer::linux_dirent {
-  long           d_ino;
-  off_t          d_off;
+  long d_ino;
+  off_t d_off;
   unsigned short d_reclen;
-  char           d_name[];
+  char d_name[];
 };
-#endif
-#endif
+#    endif
+#  endif
 
-#if !SANITIZER_ANDROID
-#include <elf.h>
-#include <unistd.h>
-#endif
+#  if !SANITIZER_ANDROID
+#    include <elf.h>
+#    include <unistd.h>
+#  endif
 
 namespace __sanitizer {
 
-SANITIZER_WEAK_ATTRIBUTE int
-real_sigaction(int signum, const void *act, void *oldact);
+SANITIZER_WEAK_ATTRIBUTE int real_sigaction(int signum, const void *act,
+                                            void *oldact);
 
 int internal_sigaction(int signum, const void *act, void *oldact) {
-#if !SANITIZER_GO
+#  if SANITIZER_FREEBSD
+  // On FreeBSD, call the sigaction syscall directly (part of libsys in FreeBSD
+  // 15) since the libc version goes via a global interposing table. Due to
+  // library initialization order the table can be relocated after the call to
+  // InitializeDeadlySignals() which then crashes when dereferencing the
+  // uninitialized pointer in libc.
+  return __sys_sigaction(signum, (const struct sigaction *)act,
+                         (struct sigaction *)oldact);
+#  else
+#    if !SANITIZER_GO
   if (&real_sigaction)
     return real_sigaction(signum, act, oldact);
-#endif
+#    endif
   return sigaction(signum, (const struct sigaction *)act,
                    (struct sigaction *)oldact);
+#  endif
 }
 
 void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
@@ -111,7 +123,7 @@ void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
     CHECK_EQ(getrlimit(RLIMIT_STACK, &rl), 0);
 
     // Find the mapping that contains a stack variable.
-    MemoryMappingLayout proc_maps(/*cache_enabled*/true);
+    MemoryMappingLayout proc_maps(/*cache_enabled*/ true);
     if (proc_maps.Error()) {
       *stack_top = *stack_bottom = 0;
       return;
@@ -119,7 +131,8 @@ void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
     MemoryMappedSegment segment;
     uptr prev_end = 0;
     while (proc_maps.Next(&segment)) {
-      if ((uptr)&rl < segment.end) break;
+      if ((uptr)&rl < segment.end)
+        break;
       prev_end = segment.end;
     }
     CHECK((uptr)&rl >= segment.start && (uptr)&rl < segment.end);
@@ -127,7 +140,8 @@ void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
     // Get stacksize from rlimit, but clip it so that it does not overlap
     // with other mappings.
     uptr stacksize = rl.rlim_cur;
-    if (stacksize > segment.end - prev_end) stacksize = segment.end - prev_end;
+    if (stacksize > segment.end - prev_end)
+      stacksize = segment.end - prev_end;
     // When running with unlimited stack size, we still want to set some limit.
     // The unlimited stack size is caused by 'ulimit -s unlimited'.
     // Also, for some reason, GNU make spawns subprocesses with unlimited stack.
@@ -135,43 +149,56 @@ void GetThreadStackTopAndBottom(bool at_initialization, uptr *stack_top,
       stacksize = kMaxThreadStackSize;
     *stack_top = segment.end;
     *stack_bottom = segment.end - stacksize;
+
+    uptr maxAddr = GetMaxUserVirtualAddress();
+    // Edge case: the stack mapping on some systems may be off-by-one e.g.,
+    //     fffffffdf000-1000000000000 rw-p 00000000 00:00 0 [stack]
+    // instead of:
+    //     fffffffdf000- ffffffffffff
+    // The out-of-range stack_top can result in an invalid shadow address
+    // calculation, since those usually assume the parameters are in range.
+    if (*stack_top == maxAddr + 1)
+      *stack_top = maxAddr;
+    else
+      CHECK_LE(*stack_top, maxAddr);
+
     return;
   }
   uptr stacksize = 0;
   void *stackaddr = nullptr;
-#if SANITIZER_SOLARIS
+#  if SANITIZER_SOLARIS
   stack_t ss;
   CHECK_EQ(thr_stksegment(&ss), 0);
   stacksize = ss.ss_size;
   stackaddr = (char *)ss.ss_sp - stacksize;
-#else  // !SANITIZER_SOLARIS
+#  else   // !SANITIZER_SOLARIS
   pthread_attr_t attr;
   pthread_attr_init(&attr);
   CHECK_EQ(pthread_getattr_np(pthread_self(), &attr), 0);
   internal_pthread_attr_getstack(&attr, &stackaddr, &stacksize);
   pthread_attr_destroy(&attr);
-#endif  // SANITIZER_SOLARIS
+#  endif  // SANITIZER_SOLARIS
 
   *stack_top = (uptr)stackaddr + stacksize;
   *stack_bottom = (uptr)stackaddr;
 }
 
-#if !SANITIZER_GO
+#  if !SANITIZER_GO
 bool SetEnv(const char *name, const char *value) {
   void *f = dlsym(RTLD_NEXT, "setenv");
   if (!f)
     return false;
-  typedef int(*setenv_ft)(const char *name, const char *value, int overwrite);
+  typedef int (*setenv_ft)(const char *name, const char *value, int overwrite);
   setenv_ft setenv_f;
   CHECK_EQ(sizeof(setenv_f), sizeof(f));
   internal_memcpy(&setenv_f, &f, sizeof(f));
   return setenv_f(name, value, 1) == 0;
 }
-#endif
+#  endif
 
 __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
                                                    int *patch) {
-#ifdef _CS_GNU_LIBC_VERSION
+#  ifdef _CS_GNU_LIBC_VERSION
   char buf[64];
   uptr len = confstr(_CS_GNU_LIBC_VERSION, buf, sizeof(buf));
   if (len >= sizeof(buf))
@@ -185,9 +212,9 @@ __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
   *minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
   *patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
   return true;
-#else
+#  else
   return false;
-#endif
+#  endif
 }
 
 // True if we can use dlpi_tls_data. glibc before 2.25 may leave NULL (BZ
@@ -198,42 +225,42 @@ __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
 // https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=254774
 __attribute__((unused)) static int g_use_dlpi_tls_data;
 
-#if SANITIZER_GLIBC && !SANITIZER_GO
+#  if SANITIZER_GLIBC && !SANITIZER_GO
 __attribute__((unused)) static size_t g_tls_size;
 void InitTlsSize() {
   int major, minor, patch;
   g_use_dlpi_tls_data =
       GetLibcVersion(&major, &minor, &patch) && major == 2 && minor >= 25;
 
-#if defined(__aarch64__) || defined(__x86_64__) || defined(__powerpc64__) || \
-    defined(__loongarch__)
+#    if defined(__aarch64__) || defined(__x86_64__) || \
+        defined(__powerpc64__) || defined(__loongarch__)
   void *get_tls_static_info = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
   size_t tls_align;
   ((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align);
-#endif
+#    endif
 }
-#else
-void InitTlsSize() { }
-#endif  // SANITIZER_GLIBC && !SANITIZER_GO
+#  else
+void InitTlsSize() {}
+#  endif  // SANITIZER_GLIBC && !SANITIZER_GO
 
 // On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage
 // of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan
 // to get the pointer to thread-specific data keys in the thread control block.
-#if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \
-    !SANITIZER_ANDROID && !SANITIZER_GO
+#  if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \
+      !SANITIZER_ANDROID && !SANITIZER_GO
 // sizeof(struct pthread) from glibc.
 static atomic_uintptr_t thread_descriptor_size;
 
 static uptr ThreadDescriptorSizeFallback() {
   uptr val = 0;
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#    if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
   int major;
   int minor;
   int patch;
   if (GetLibcVersion(&major, &minor, &patch) && major == 2) {
     /* sizeof(struct pthread) values from various glibc versions.  */
     if (SANITIZER_X32)
-      val = 1728; // Assume only one particular version for x32.
+      val = 1728;  // Assume only one particular version for x32.
     // For ARM sizeof(struct pthread) changed in Glibc 2.23.
     else if (SANITIZER_ARM)
       val = minor <= 22 ? 1120 : 1216;
@@ -256,19 +283,19 @@ static uptr ThreadDescriptorSizeFallback() {
     else  // minor == 32
       val = FIRST_32_SECOND_64(1344, 2496);
   }
-#elif defined(__s390__) || defined(__sparc__)
+#    elif defined(__s390__) || defined(__sparc__)
   // The size of a prefix of TCB including pthread::{specific_1stblock,specific}
   // suffices. Just return offsetof(struct pthread, specific_used), which hasn't
   // changed since 2007-05. Technically this applies to i386/x86_64 as well but
   // we call _dl_get_tls_static_info and need the precise size of struct
   // pthread.
   return FIRST_32_SECOND_64(524, 1552);
-#elif defined(__mips__)
+#    elif defined(__mips__)
   // TODO(sagarthakur): add more values as per different glibc versions.
   val = FIRST_32_SECOND_64(1152, 1776);
-#elif SANITIZER_LOONGARCH64
-  val = 1856; // from glibc 2.36
-#elif SANITIZER_RISCV64
+#    elif SANITIZER_LOONGARCH64
+  val = 1856;  // from glibc 2.36
+#    elif SANITIZER_RISCV64
   int major;
   int minor;
   int patch;
@@ -283,12 +310,12 @@ static uptr ThreadDescriptorSizeFallback() {
       val = 1936;  // tested against glibc 2.32
   }
 
-#elif defined(__aarch64__)
+#    elif defined(__aarch64__)
   // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22.
   val = 1776;
-#elif defined(__powerpc64__)
-  val = 1776; // from glibc.ppc64le 2.20-8.fc21
-#endif
+#    elif defined(__powerpc64__)
+  val = 1776;  // from glibc.ppc64le 2.20-8.fc21
+#    endif
   return val;
 }
 
@@ -307,26 +334,26 @@ uptr ThreadDescriptorSize() {
   return val;
 }
 
-#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \
-    SANITIZER_LOONGARCH64
+#    if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \
+        SANITIZER_LOONGARCH64
 // TlsPreTcbSize includes size of struct pthread_descr and size of tcb
 // head structure. It lies before the static tls blocks.
 static uptr TlsPreTcbSize() {
-#if defined(__mips__)
-  const uptr kTcbHead = 16; // sizeof (tcbhead_t)
-#elif defined(__powerpc64__)
-  const uptr kTcbHead = 88; // sizeof (tcbhead_t)
-#elif SANITIZER_RISCV64
+#      if defined(__mips__)
   const uptr kTcbHead = 16;  // sizeof (tcbhead_t)
-#elif SANITIZER_LOONGARCH64
+#      elif defined(__powerpc64__)
+  const uptr kTcbHead = 88;  // sizeof (tcbhead_t)
+#      elif SANITIZER_RISCV64
   const uptr kTcbHead = 16;  // sizeof (tcbhead_t)
-#endif
+#      elif SANITIZER_LOONGARCH64
+  const uptr kTcbHead = 16;  // sizeof (tcbhead_t)
+#      endif
   const uptr kTlsAlign = 16;
   const uptr kTlsPreTcbSize =
       RoundUpTo(ThreadDescriptorSize() + kTcbHead, kTlsAlign);
   return kTlsPreTcbSize;
 }
-#endif
+#    endif
 
 namespace {
 struct TlsBlock {
@@ -336,7 +363,7 @@ struct TlsBlock {
 };
 }  // namespace
 
-#ifdef __s390__
+#    ifdef __s390__
 extern "C" uptr __tls_get_offset(void *arg);
 
 static uptr TlsGetOffset(uptr ti_module, uptr ti_offset) {
@@ -354,16 +381,16 @@ static uptr TlsGetOffset(uptr ti_module, uptr ti_offset) {
       : "memory", "cc", "0", "1", "3", "4", "5", "14");
   return r2;
 }
-#else
+#    else
 extern "C" void *__tls_get_addr(size_t *);
-#endif
+#    endif
 
 static size_t main_tls_modid;
 
 static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size,
                                   void *data) {
   size_t tls_modid;
-#if SANITIZER_SOLARIS
+#    if SANITIZER_SOLARIS
   // dlpi_tls_modid is only available since Solaris 11.4 SRU 10.  Use
   // dlinfo(RTLD_DI_LINKMAP) instead which works on all of Solaris 11.3,
   // 11.4, and Illumos.  The tlsmodid of the executable was changed to 1 in
@@ -376,27 +403,26 @@ static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size,
   Rt_map *map;
   dlinfo(RTLD_SELF, RTLD_DI_LINKMAP, &map);
   tls_modid = map->rt_tlsmodid;
-#else
+#    else
   main_tls_modid = 1;
   tls_modid = info->dlpi_tls_modid;
-#endif
+#    endif
 
   if (tls_modid < main_tls_modid)
     return 0;
   uptr begin;
-#if !SANITIZER_SOLARIS
+#    if !SANITIZER_SOLARIS
   begin = (uptr)info->dlpi_tls_data;
-#endif
+#    endif
   if (!g_use_dlpi_tls_data) {
     // Call __tls_get_addr as a fallback. This forces TLS allocation on glibc
     // and FreeBSD.
-#ifdef __s390__
-    begin = (uptr)__builtin_thread_pointer() +
-            TlsGetOffset(tls_modid, 0);
-#else
+#    ifdef __s390__
+    begin = (uptr)__builtin_thread_pointer() + TlsGetOffset(tls_modid, 0);
+#    else
     size_t mod_and_off[2] = {tls_modid, 0};
     begin = (uptr)__tls_get_addr(mod_and_off);
-#endif
+#    endif
   }
   for (unsigned i = 0; i != info->dlpi_phnum; ++i)
     if (info->dlpi_phdr[i].p_type == PT_TLS) {
@@ -439,23 +465,21 @@ __attribute__((unused)) static void GetStaticTlsBoundary(uptr *addr, uptr *size,
   *addr = ranges[l].begin;
   *size = ranges[r - 1].end - ranges[l].begin;
 }
-#endif  // (x86_64 || i386 || mips || ...) && (SANITIZER_FREEBSD ||
-        // SANITIZER_LINUX) && !SANITIZER_ANDROID && !SANITIZER_GO
+#  endif  // (x86_64 || i386 || mips || ...) && (SANITIZER_FREEBSD ||
+          // SANITIZER_LINUX) && !SANITIZER_ANDROID && !SANITIZER_GO
 
-#if SANITIZER_NETBSD
-static struct tls_tcb * ThreadSelfTlsTcb() {
+#  if SANITIZER_NETBSD
+static struct tls_tcb *ThreadSelfTlsTcb() {
   struct tls_tcb *tcb = nullptr;
-#ifdef __HAVE___LWP_GETTCB_FAST
+#    ifdef __HAVE___LWP_GETTCB_FAST
   tcb = (struct tls_tcb *)__lwp_gettcb_fast();
-#elif defined(__HAVE___LWP_GETPRIVATE_FAST)
+#    elif defined(__HAVE___LWP_GETPRIVATE_FAST)
   tcb = (struct tls_tcb *)__lwp_getprivate_fast();
-#endif
+#    endif
   return tcb;
 }
 
-uptr ThreadSelf() {
-  return (uptr)ThreadSelfTlsTcb()->tcb_pthread;
-}
+uptr ThreadSelf() { return (uptr)ThreadSelfTlsTcb()->tcb_pthread; }
 
 int GetSizeFromHdr(struct dl_phdr_info *info, size_t size, void *data) {
   const Elf_Phdr *hdr = info->dlpi_phdr;
@@ -463,23 +487,23 @@ int GetSizeFromHdr(struct dl_phdr_info *info, size_t size, void *data) {
 
   for (; hdr != last_hdr; ++hdr) {
     if (hdr->p_type == PT_TLS && info->dlpi_tls_modid == 1) {
-      *(uptr*)data = hdr->p_memsz;
+      *(uptr *)data = hdr->p_memsz;
       break;
     }
   }
   return 0;
 }
-#endif  // SANITIZER_NETBSD
+#  endif  // SANITIZER_NETBSD
 
-#if SANITIZER_ANDROID
+#  if SANITIZER_ANDROID
 // Bionic provides this API since S.
 extern "C" SANITIZER_WEAK_ATTRIBUTE void __libc_get_static_tls_bounds(void **,
                                                                       void **);
-#endif
+#  endif
 
-#if !SANITIZER_GO
+#  if !SANITIZER_GO
 static void GetTls(uptr *addr, uptr *size) {
-#if SANITIZER_ANDROID
+#    if SANITIZER_ANDROID
   if (&__libc_get_static_tls_bounds) {
     void *start_addr;
     void *end_addr;
@@ -491,48 +515,48 @@ static void GetTls(uptr *addr, uptr *size) {
     *addr = 0;
     *size = 0;
   }
-#elif SANITIZER_GLIBC && defined(__x86_64__)
+#    elif SANITIZER_GLIBC && defined(__x86_64__)
   // For aarch64 and x86-64, use an O(1) approach which requires relatively
   // precise ThreadDescriptorSize. g_tls_size was initialized in InitTlsSize.
-#  if SANITIZER_X32
+#      if SANITIZER_X32
   asm("mov %%fs:8,%0" : "=r"(*addr));
-#  else
+#      else
   asm("mov %%fs:16,%0" : "=r"(*addr));
-#  endif
+#      endif
   *size = g_tls_size;
   *addr -= *size;
   *addr += ThreadDescriptorSize();
-#elif SANITIZER_GLIBC && defined(__aarch64__)
+#    elif SANITIZER_GLIBC && defined(__aarch64__)
   *addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
           ThreadDescriptorSize();
   *size = g_tls_size + ThreadDescriptorSize();
-#elif SANITIZER_GLIBC && defined(__loongarch__)
-#  ifdef __clang__
+#    elif SANITIZER_GLIBC && defined(__loongarch__)
+#      ifdef __clang__
   *addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
           ThreadDescriptorSize();
-#  else
+#      else
   asm("or %0,$tp,$zero" : "=r"(*addr));
   *addr -= ThreadDescriptorSize();
-#  endif
+#      endif
   *size = g_tls_size + ThreadDescriptorSize();
-#elif SANITIZER_GLIBC && defined(__powerpc64__)
+#    elif SANITIZER_GLIBC && defined(__powerpc64__)
   // Workaround for glibc<2.25(?). 2.27 is known to not need this.
   uptr tp;
   asm("addi %0,13,-0x7000" : "=r"(tp));
   const uptr pre_tcb_size = TlsPreTcbSize();
   *addr = tp - pre_tcb_size;
   *size = g_tls_size + pre_tcb_size;
-#elif SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS
+#    elif SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS
   uptr align;
   GetStaticTlsBoundary(addr, size, &align);
-#if defined(__x86_64__) || defined(__i386__) || defined(__s390__) || \
-    defined(__sparc__)
+#      if defined(__x86_64__) || defined(__i386__) || defined(__s390__) || \
+          defined(__sparc__)
   if (SANITIZER_GLIBC) {
-#if defined(__x86_64__) || defined(__i386__)
+#        if defined(__x86_64__) || defined(__i386__)
     align = Max<uptr>(align, 64);
-#else
+#        else
     align = Max<uptr>(align, 16);
-#endif
+#        endif
   }
   const uptr tp = RoundUpTo(*addr + *size, align);
 
@@ -551,26 +575,26 @@ static void GetTls(uptr *addr, uptr *size) {
   // because the number of bytes after pthread::specific is larger.
   *addr = tp - RoundUpTo(*size, align);
   *size = tp - *addr + ThreadDescriptorSize();
-#else
+#      else
   if (SANITIZER_GLIBC)
     *size += 1664;
   else if (SANITIZER_FREEBSD)
     *size += 128;  // RTLD_STATIC_TLS_EXTRA
-#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
+#        if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
   const uptr pre_tcb_size = TlsPreTcbSize();
   *addr -= pre_tcb_size;
   *size += pre_tcb_size;
-#else
+#        else
   // arm and aarch64 reserve two words at TP, so this underestimates the range.
   // However, this is sufficient for the purpose of finding the pointers to
   // thread-specific data keys.
   const uptr tcb_size = ThreadDescriptorSize();
   *addr -= tcb_size;
   *size += tcb_size;
-#endif
-#endif
-#elif SANITIZER_NETBSD
-  struct tls_tcb * const tcb = ThreadSelfTlsTcb();
+#        endif
+#      endif
+#    elif SANITIZER_NETBSD
+  struct tls_tcb *const tcb = ThreadSelfTlsTcb();
   *addr = 0;
   *size = 0;
   if (tcb != 0) {
@@ -583,31 +607,31 @@ static void GetTls(uptr *addr, uptr *size) {
       *addr = (uptr)tcb->tcb_dtv[1];
     }
   }
-#else
-#error "Unknown OS"
-#endif
+#    else
+#      error "Unknown OS"
+#    endif
 }
-#endif
+#  endif
 
-#if !SANITIZER_GO
+#  if !SANITIZER_GO
 uptr GetTlsSize() {
-#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
-    SANITIZER_SOLARIS
+#    if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
+        SANITIZER_SOLARIS
   uptr addr, size;
   GetTls(&addr, &size);
   return size;
-#else
+#    else
   return 0;
-#endif
+#    endif
 }
-#endif
+#  endif
 
 void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
                           uptr *tls_addr, uptr *tls_size) {
-#if SANITIZER_GO
+#  if SANITIZER_GO
   // Stub implementation for Go.
   *stk_addr = *stk_size = *tls_addr = *tls_size = 0;
-#else
+#  else
   GetTls(tls_addr, tls_size);
 
   uptr stack_top, stack_bottom;
@@ -623,16 +647,12 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
       *stk_size = *tls_addr - *stk_addr;
     }
   }
-#endif
+#  endif
 }
 
-#if !SANITIZER_FREEBSD
+#  if !SANITIZER_FREEBSD
 typedef ElfW(Phdr) Elf_Phdr;
-#elif SANITIZER_WORDSIZE == 32 && __FreeBSD_version <= 902001  // v9.2
-#define Elf_Phdr XElf32_Phdr
-#define dl_phdr_info xdl_phdr_info
-#define dl_iterate_phdr(c, b) xdl_iterate_phdr((c), (b))
-#endif  // !SANITIZER_FREEBSD
+#  endif
 
 struct DlIteratePhdrData {
   InternalMmapVectorNoCtor<LoadedModule> *modules;
@@ -652,8 +672,7 @@ static int AddModuleSegments(const char *module_name, dl_phdr_info *info,
       uptr cur_end = cur_beg + phdr->p_memsz;
       bool executable = phdr->p_flags & PF_X;
       bool writable = phdr->p_flags & PF_W;
-      cur_module.addAddressRange(cur_beg, cur_end, executable,
-                                 writable);
+      cur_module.addAddressRange(cur_beg, cur_end, executable, writable);
     } else if (phdr->p_type == PT_NOTE) {
 #  ifdef NT_GNU_BUILD_ID
       uptr off = 0;
@@ -698,33 +717,30 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
     return AddModuleSegments(module_name.data(), info, data->modules);
   }
 
-  if (info->dlpi_name) {
-    InternalScopedString module_name;
-    module_name.append("%s", info->dlpi_name);
-    return AddModuleSegments(module_name.data(), info, data->modules);
-  }
+  if (info->dlpi_name)
+    return AddModuleSegments(info->dlpi_name, info, data->modules);
 
   return 0;
 }
 
-#if SANITIZER_ANDROID && __ANDROID_API__ < 21
+#  if SANITIZER_ANDROID && __ANDROID_API__ < 21
 extern "C" __attribute__((weak)) int dl_iterate_phdr(
     int (*)(struct dl_phdr_info *, size_t, void *), void *);
-#endif
+#  endif
 
 static bool requiresProcmaps() {
-#if SANITIZER_ANDROID && __ANDROID_API__ <= 22
+#  if SANITIZER_ANDROID && __ANDROID_API__ <= 22
   // Fall back to /proc/maps if dl_iterate_phdr is unavailable or broken.
   // The runtime check allows the same library to work with
   // both K and L (and future) Android releases.
   return AndroidGetApiLevel() <= ANDROID_LOLLIPOP_MR1;
-#else
+#  else
   return false;
-#endif
+#  endif
 }
 
 static void procmapsInit(InternalMmapVectorNoCtor<LoadedModule> *modules) {
-  MemoryMappingLayout memory_mapping(/*cache_enabled*/true);
+  MemoryMappingLayout memory_mapping(/*cache_enabled*/ true);
   memory_mapping.DumpListOfModules(modules);
 }
 
@@ -776,22 +792,19 @@ uptr GetRSS() {
   // We need the second number which is RSS in pages.
   char *pos = buf;
   // Skip the first number.
-  while (*pos >= '0' && *pos <= '9')
-    pos++;
+  while (*pos >= '0' && *pos <= '9') pos++;
   // Skip whitespaces.
-  while (!(*pos >= '0' && *pos <= '9') && *pos != 0)
-    pos++;
+  while (!(*pos >= '0' && *pos <= '9') && *pos != 0) pos++;
   // Read the number.
   uptr rss = 0;
-  while (*pos >= '0' && *pos <= '9')
-    rss = rss * 10 + *pos++ - '0';
+  while (*pos >= '0' && *pos <= '9') rss = rss * 10 + *pos++ - '0';
   return rss * GetPageSizeCached();
 }
 
 // sysconf(_SC_NPROCESSORS_{CONF,ONLN}) cannot be used on most platforms as
 // they allocate memory.
 u32 GetNumberOfCPUs() {
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD
+#  if SANITIZER_FREEBSD || SANITIZER_NETBSD
   u32 ncpu;
   int req[2];
   uptr len = sizeof(ncpu);
@@ -799,7 +812,7 @@ u32 GetNumberOfCPUs() {
   req[1] = HW_NCPU;
   CHECK_EQ(internal_sysctl(req, 2, &ncpu, &len, NULL, 0), 0);
   return ncpu;
-#elif SANITIZER_ANDROID && !defined(CPU_COUNT) && !defined(__aarch64__)
+#  elif SANITIZER_ANDROID && !defined(CPU_COUNT) && !defined(__aarch64__)
   // Fall back to /sys/devices/system/cpu on Android when cpu_set_t doesn't
   // exist in sched.h. That is the case for toolchains generated with older
   // NDKs.
@@ -827,26 +840,26 @@ u32 GetNumberOfCPUs() {
       break;
     if (entry->d_ino != 0 && *d_type == DT_DIR) {
       if (entry->d_name[0] == 'c' && entry->d_name[1] == 'p' &&
-          entry->d_name[2] == 'u' &&
-          entry->d_name[3] >= '0' && entry->d_name[3] <= '9')
+          entry->d_name[2] == 'u' && entry->d_name[3] >= '0' &&
+          entry->d_name[3] <= '9')
         n_cpus++;
     }
     entry = (struct linux_dirent *)(((u8 *)entry) + entry->d_reclen);
   }
   internal_close(fd);
   return n_cpus;
-#elif SANITIZER_SOLARIS
+#  elif SANITIZER_SOLARIS
   return sysconf(_SC_NPROCESSORS_ONLN);
-#else
+#  else
   cpu_set_t CPUs;
   CHECK_EQ(sched_getaffinity(0, sizeof(cpu_set_t), &CPUs), 0);
   return CPU_COUNT(&CPUs);
-#endif
+#  endif
 }
 
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
 
-#if SANITIZER_ANDROID
+#    if SANITIZER_ANDROID
 static atomic_uint8_t android_log_initialized;
 
 void AndroidLogInit() {
@@ -858,13 +871,15 @@ static bool ShouldLogAfterPrintf() {
   return atomic_load(&android_log_initialized, memory_order_acquire);
 }
 
-extern "C" SANITIZER_WEAK_ATTRIBUTE
-int async_safe_write_log(int pri, const char* tag, const char* msg);
-extern "C" SANITIZER_WEAK_ATTRIBUTE
-int __android_log_write(int prio, const char* tag, const char* msg);
+extern "C" SANITIZER_WEAK_ATTRIBUTE int async_safe_write_log(int pri,
+                                                             const char *tag,
+                                                             const char *msg);
+extern "C" SANITIZER_WEAK_ATTRIBUTE int __android_log_write(int prio,
+                                                            const char *tag,
+                                                            const char *msg);
 
 // ANDROID_LOG_INFO is 4, but can't be resolved at runtime.
-#define SANITIZER_ANDROID_LOG_INFO 4
+#      define SANITIZER_ANDROID_LOG_INFO 4
 
 // async_safe_write_log is a new public version of __libc_write_log that is
 // used behind syslog. It is preferable to syslog as it will not do any dynamic
@@ -883,14 +898,14 @@ void WriteOneLineToSyslog(const char *s) {
   }
 }
 
-extern "C" SANITIZER_WEAK_ATTRIBUTE
-void android_set_abort_message(const char *);
+extern "C" SANITIZER_WEAK_ATTRIBUTE void android_set_abort_message(
+    const char *);
 
 void SetAbortMessage(const char *str) {
   if (&android_set_abort_message)
     android_set_abort_message(str);
 }
-#else
+#    else
 void AndroidLogInit() {}
 
 static bool ShouldLogAfterPrintf() { return true; }
@@ -898,16 +913,16 @@ static bool ShouldLogAfterPrintf() { return true; }
 void WriteOneLineToSyslog(const char *s) { syslog(LOG_INFO, "%s", s); }
 
 void SetAbortMessage(const char *str) {}
-#endif  // SANITIZER_ANDROID
+#    endif  // SANITIZER_ANDROID
 
 void LogMessageOnPrintf(const char *str) {
   if (common_flags()->log_to_syslog && ShouldLogAfterPrintf())
     WriteToSyslog(str);
 }
 
-#endif  // SANITIZER_LINUX
+#  endif  // SANITIZER_LINUX
 
-#if SANITIZER_GLIBC && !SANITIZER_GO
+#  if SANITIZER_GLIBC && !SANITIZER_GO
 // glibc crashes when using clock_gettime from a preinit_array function as the
 // vDSO function pointers haven't been initialized yet. __progname is
 // initialized after the vDSO function pointers, so if it exists, is not null
@@ -918,8 +933,8 @@ inline bool CanUseVDSO() { return &__progname && __progname && *__progname; }
 // MonotonicNanoTime is a timing function that can leverage the vDSO by calling
 // clock_gettime. real_clock_gettime only exists if clock_gettime is
 // intercepted, so define it weakly and use it if available.
-extern "C" SANITIZER_WEAK_ATTRIBUTE
-int real_clock_gettime(u32 clk_id, void *tp);
+extern "C" SANITIZER_WEAK_ATTRIBUTE int real_clock_gettime(u32 clk_id,
+                                                           void *tp);
 u64 MonotonicNanoTime() {
   timespec ts;
   if (CanUseVDSO()) {
@@ -932,19 +947,26 @@ u64 MonotonicNanoTime() {
   }
   return (u64)ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
 }
-#else
+#  else
 // Non-glibc & Go always use the regular function.
 u64 MonotonicNanoTime() {
   timespec ts;
   clock_gettime(CLOCK_MONOTONIC, &ts);
   return (u64)ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
 }
-#endif  // SANITIZER_GLIBC && !SANITIZER_GO
+#  endif  // SANITIZER_GLIBC && !SANITIZER_GO
 
 void ReExec() {
   const char *pathname = "/proc/self/exe";
 
-#if SANITIZER_NETBSD
+#  if SANITIZER_FREEBSD
+  for (const auto *aux = __elf_aux_vector; aux->a_type != AT_NULL; aux++) {
+    if (aux->a_type == AT_EXECPATH) {
+      pathname = static_cast<const char *>(aux->a_un.a_ptr);
+      break;
+    }
+  }
+#  elif SANITIZER_NETBSD
   static const int name[] = {
       CTL_KERN,
       KERN_PROC_ARGS,
@@ -957,14 +979,14 @@ void ReExec() {
   len = sizeof(path);
   if (internal_sysctl(name, ARRAY_SIZE(name), path, &len, NULL, 0) != -1)
     pathname = path;
-#elif SANITIZER_SOLARIS
+#  elif SANITIZER_SOLARIS
   pathname = getexecname();
   CHECK_NE(pathname, NULL);
-#elif SANITIZER_USE_GETAUXVAL
+#  elif SANITIZER_USE_GETAUXVAL
   // Calling execve with /proc/self/exe sets that as $EXEC_ORIGIN. Binaries that
   // rely on that will fail to load shared libraries. Query AT_EXECFN instead.
   pathname = reinterpret_cast<const char *>(getauxval(AT_EXECFN));
-#endif
+#  endif
 
   uptr rv = internal_execve(pathname, GetArgv(), GetEnviron());
   int rverrno;
@@ -986,9 +1008,8 @@ void UnmapFromTo(uptr from, uptr to) {
 }
 
 uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
-                      uptr min_shadow_base_alignment,
-                      UNUSED uptr &high_mem_end) {
-  const uptr granularity = GetMmapGranularity();
+                      uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end,
+                      uptr granularity) {
   const uptr alignment =
       Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
   const uptr left_padding =
@@ -1016,14 +1037,14 @@ static uptr MmapSharedNoReserve(uptr addr, uptr size) {
 
 static uptr MremapCreateAlias(uptr base_addr, uptr alias_addr,
                               uptr alias_size) {
-#if SANITIZER_LINUX
+#  if SANITIZER_LINUX
   return internal_mremap(reinterpret_cast<void *>(base_addr), 0, alias_size,
                          MREMAP_MAYMOVE | MREMAP_FIXED,
                          reinterpret_cast<void *>(alias_addr));
-#else
+#  else
   CHECK(false && "mremap is not supported outside of Linux");
   return 0;
-#endif
+#  endif
 }
 
 static void CreateAliases(uptr start_addr, uptr alias_size, uptr num_aliases) {
@@ -1068,12 +1089,12 @@ uptr MapDynamicShadowAndAliases(uptr shadow_size, uptr alias_size,
 }
 
 void InitializePlatformCommonFlags(CommonFlags *cf) {
-#if SANITIZER_ANDROID
+#  if SANITIZER_ANDROID
   if (&__libc_get_static_tls_bounds == nullptr)
     cf->detect_leaks = false;
-#endif
+#  endif
 }
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
 
 #endif
lib/tsan/sanitizer_common/sanitizer_linux_s390.cpp
@@ -15,14 +15,14 @@
 
 #if SANITIZER_LINUX && SANITIZER_S390
 
-#include <dlfcn.h>
-#include <errno.h>
-#include <sys/syscall.h>
-#include <sys/utsname.h>
-#include <unistd.h>
+#  include <dlfcn.h>
+#  include <errno.h>
+#  include <sys/syscall.h>
+#  include <sys/utsname.h>
+#  include <unistd.h>
 
-#include "sanitizer_libc.h"
-#include "sanitizer_linux.h"
+#  include "sanitizer_libc.h"
+#  include "sanitizer_linux.h"
 
 namespace __sanitizer {
 
@@ -37,22 +37,19 @@ uptr internal_mmap(void *addr, uptr length, int prot, int flags, int fd,
     unsigned long fd;
     unsigned long offset;
   } params = {
-    (unsigned long)addr,
-    (unsigned long)length,
-    (unsigned long)prot,
-    (unsigned long)flags,
-    (unsigned long)fd,
-# ifdef __s390x__
-    (unsigned long)offset,
-# else
+      (unsigned long)addr,   (unsigned long)length, (unsigned long)prot,
+      (unsigned long)flags,  (unsigned long)fd,
+#  ifdef __s390x__
+      (unsigned long)offset,
+#  else
     (unsigned long)(offset / 4096),
-# endif
+#  endif
   };
-# ifdef __s390x__
+#  ifdef __s390x__
   return syscall(__NR_mmap, &params);
-# else
+#  else
   return syscall(__NR_mmap2, &params);
-# endif
+#  endif
 }
 
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
@@ -63,58 +60,54 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   }
   CHECK_EQ(0, (uptr)child_stack % 16);
   // Minimum frame size.
-#ifdef __s390x__
+#  ifdef __s390x__
   child_stack = (char *)child_stack - 160;
-#else
+#  else
   child_stack = (char *)child_stack - 96;
-#endif
+#  endif
   // Terminate unwind chain.
   ((unsigned long *)child_stack)[0] = 0;
   // And pass parameters.
   ((unsigned long *)child_stack)[1] = (uptr)fn;
   ((unsigned long *)child_stack)[2] = (uptr)arg;
   register uptr res __asm__("r2");
-  register void *__cstack      __asm__("r2") = child_stack;
-  register long __flags        __asm__("r3") = flags;
-  register int * __ptidptr     __asm__("r4") = parent_tidptr;
-  register int * __ctidptr     __asm__("r5") = child_tidptr;
-  register void * __newtls     __asm__("r6") = newtls;
+  register void *__cstack __asm__("r2") = child_stack;
+  register long __flags __asm__("r3") = flags;
+  register int *__ptidptr __asm__("r4") = parent_tidptr;
+  register int *__ctidptr __asm__("r5") = child_tidptr;
+  register void *__newtls __asm__("r6") = newtls;
 
   __asm__ __volatile__(
-                       /* Clone. */
-                       "svc    %1\n"
-
-                       /* if (%r2 != 0)
-                        *   return;
-                        */
-#ifdef __s390x__
-                       "cghi   %%r2, 0\n"
-#else
-                       "chi    %%r2, 0\n"
-#endif
-                       "jne    1f\n"
-
-                       /* Call "fn(arg)". */
-#ifdef __s390x__
-                       "lmg    %%r1, %%r2, 8(%%r15)\n"
-#else
-                       "lm     %%r1, %%r2, 4(%%r15)\n"
-#endif
-                       "basr   %%r14, %%r1\n"
-
-                       /* Call _exit(%r2). */
-                       "svc %2\n"
-
-                       /* Return to parent. */
-                     "1:\n"
-                       : "=r" (res)
-                       : "i"(__NR_clone), "i"(__NR_exit),
-                         "r"(__cstack),
-                         "r"(__flags),
-                         "r"(__ptidptr),
-                         "r"(__ctidptr),
-                         "r"(__newtls)
-                       : "memory", "cc");
+      /* Clone. */
+      "svc    %1\n"
+
+  /* if (%r2 != 0)
+   *   return;
+   */
+#  ifdef __s390x__
+      "cghi   %%r2, 0\n"
+#  else
+      "chi    %%r2, 0\n"
+#  endif
+      "jne    1f\n"
+
+  /* Call "fn(arg)". */
+#  ifdef __s390x__
+      "lmg    %%r1, %%r2, 8(%%r15)\n"
+#  else
+      "lm     %%r1, %%r2, 4(%%r15)\n"
+#  endif
+      "basr   %%r14, %%r1\n"
+
+      /* Call _exit(%r2). */
+      "svc %2\n"
+
+      /* Return to parent. */
+      "1:\n"
+      : "=r"(res)
+      : "i"(__NR_clone), "i"(__NR_exit), "r"(__cstack), "r"(__flags),
+        "r"(__ptidptr), "r"(__ctidptr), "r"(__newtls)
+      : "memory", "cc");
   if (res >= (uptr)-4095) {
     errno = -res;
     return -1;
@@ -122,7 +115,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
   return res;
 }
 
-#if SANITIZER_S390_64
+#  if SANITIZER_S390_64
 static bool FixedCVE_2016_2143() {
   // Try to determine if the running kernel has a fix for CVE-2016-2143,
   // return false if in doubt (better safe than sorry).  Distros may want to
@@ -137,20 +130,20 @@ static bool FixedCVE_2016_2143() {
   // At least first 2 should be matched.
   if (ptr[0] != '.')
     return false;
-  minor = internal_simple_strtoll(ptr+1, &ptr, 10);
+  minor = internal_simple_strtoll(ptr + 1, &ptr, 10);
   // Third is optional.
   if (ptr[0] == '.')
-    patch = internal_simple_strtoll(ptr+1, &ptr, 10);
+    patch = internal_simple_strtoll(ptr + 1, &ptr, 10);
   if (major < 3) {
     if (major == 2 && minor == 6 && patch == 32 && ptr[0] == '-' &&
         internal_strstr(ptr, ".el6")) {
       // Check RHEL6
-      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
-      if (r1 >= 657) // 2.6.32-657.el6 or later
+      int r1 = internal_simple_strtoll(ptr + 1, &ptr, 10);
+      if (r1 >= 657)  // 2.6.32-657.el6 or later
         return true;
       if (r1 == 642 && ptr[0] == '.') {
-        int r2 = internal_simple_strtoll(ptr+1, &ptr, 10);
-        if (r2 >= 9) // 2.6.32-642.9.1.el6 or later
+        int r2 = internal_simple_strtoll(ptr + 1, &ptr, 10);
+        if (r2 >= 9)  // 2.6.32-642.9.1.el6 or later
           return true;
       }
     }
@@ -166,12 +159,12 @@ static bool FixedCVE_2016_2143() {
     if (minor == 10 && patch == 0 && ptr[0] == '-' &&
         internal_strstr(ptr, ".el7")) {
       // Check RHEL7
-      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
-      if (r1 >= 426) // 3.10.0-426.el7 or later
+      int r1 = internal_simple_strtoll(ptr + 1, &ptr, 10);
+      if (r1 >= 426)  // 3.10.0-426.el7 or later
         return true;
       if (r1 == 327 && ptr[0] == '.') {
-        int r2 = internal_simple_strtoll(ptr+1, &ptr, 10);
-        if (r2 >= 27) // 3.10.0-327.27.1.el7 or later
+        int r2 = internal_simple_strtoll(ptr + 1, &ptr, 10);
+        if (r2 >= 27)  // 3.10.0-327.27.1.el7 or later
           return true;
       }
     }
@@ -187,8 +180,8 @@ static bool FixedCVE_2016_2143() {
     if (minor == 4 && patch == 0 && ptr[0] == '-' &&
         internal_strstr(buf.version, "Ubuntu")) {
       // Check Ubuntu 16.04
-      int r1 = internal_simple_strtoll(ptr+1, &ptr, 10);
-      if (r1 >= 13) // 4.4.0-13 or later
+      int r1 = internal_simple_strtoll(ptr + 1, &ptr, 10);
+      if (r1 >= 13)  // 4.4.0-13 or later
         return true;
     }
     // Otherwise, OK if 4.5+.
@@ -211,18 +204,19 @@ void AvoidCVE_2016_2143() {
   if (GetEnv("SANITIZER_IGNORE_CVE_2016_2143"))
     return;
   Report(
-    "ERROR: Your kernel seems to be vulnerable to CVE-2016-2143.  Using ASan,\n"
-    "MSan, TSan, DFSan or LSan with such kernel can and will crash your\n"
-    "machine, or worse.\n"
-    "\n"
-    "If you are certain your kernel is not vulnerable (you have compiled it\n"
-    "yourself, or are using an unrecognized distribution kernel), you can\n"
-    "override this safety check by exporting SANITIZER_IGNORE_CVE_2016_2143\n"
-    "with any value.\n");
+      "ERROR: Your kernel seems to be vulnerable to CVE-2016-2143.  Using "
+      "ASan,\n"
+      "MSan, TSan, DFSan or LSan with such kernel can and will crash your\n"
+      "machine, or worse.\n"
+      "\n"
+      "If you are certain your kernel is not vulnerable (you have compiled it\n"
+      "yourself, or are using an unrecognized distribution kernel), you can\n"
+      "override this safety check by exporting SANITIZER_IGNORE_CVE_2016_2143\n"
+      "with any value.\n");
   Die();
 }
-#endif
+#  endif
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
 
-#endif // SANITIZER_LINUX && SANITIZER_S390
+#endif  // SANITIZER_LINUX && SANITIZER_S390
lib/tsan/sanitizer_common/sanitizer_mac.cpp
@@ -1188,8 +1188,8 @@ uptr GetMaxVirtualAddress() {
 }
 
 uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
-                      uptr min_shadow_base_alignment, uptr &high_mem_end) {
-  const uptr granularity = GetMmapGranularity();
+                      uptr min_shadow_base_alignment, uptr &high_mem_end,
+                      uptr granularity) {
   const uptr alignment =
       Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
   const uptr left_padding =
@@ -1372,8 +1372,8 @@ void DumpProcessMap() {
   for (uptr i = 0; i < modules.size(); ++i) {
     char uuid_str[128];
     FormatUUID(uuid_str, sizeof(uuid_str), modules[i].uuid());
-    Printf("0x%zx-0x%zx %s (%s) %s\n", modules[i].base_address(),
-           modules[i].max_address(), modules[i].full_name(),
+    Printf("%p-%p %s (%s) %s\n", (void *)modules[i].base_address(),
+           (void *)modules[i].max_address(), modules[i].full_name(),
            ModuleArchToString(modules[i].arch()), uuid_str);
   }
   Printf("End of module map.\n");
lib/tsan/sanitizer_common/sanitizer_mallinfo.h
@@ -31,6 +31,10 @@ struct __sanitizer_struct_mallinfo {
   int v[10];
 };
 
+struct __sanitizer_struct_mallinfo2 {
+  uptr v[10];
+};
+
 #endif
 
 }  // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_malloc_mac.inc
@@ -123,7 +123,7 @@ INTERCEPTOR(void, malloc_set_zone_name, malloc_zone_t *zone, const char *name) {
   COMMON_MALLOC_ENTER();
   InternalScopedString new_name;
   if (name && zone->introspect == sanitizer_zone.introspect) {
-    new_name.append(COMMON_MALLOC_ZONE_NAME "-%s", name);
+    new_name.AppendF(COMMON_MALLOC_ZONE_NAME "-%s", name);
     name = new_name.data();
   }
 
lib/tsan/sanitizer_common/sanitizer_mutex.cpp
@@ -212,8 +212,10 @@ struct InternalDeadlockDetector {
     return initialized > 0;
   }
 };
-
-static THREADLOCAL InternalDeadlockDetector deadlock_detector;
+// This variable is used by the __tls_get_addr interceptor, so cannot use the
+// global-dynamic TLS model, as that would result in crashes.
+__attribute__((tls_model("initial-exec"))) static THREADLOCAL
+    InternalDeadlockDetector deadlock_detector;
 
 void CheckedMutex::LockImpl(uptr pc) { deadlock_detector.Lock(type_, pc); }
 
lib/tsan/sanitizer_common/sanitizer_placement_new.h
@@ -17,8 +17,6 @@
 
 #include "sanitizer_internal_defs.h"
 
-inline void *operator new(__sanitizer::operator_new_size_type sz, void *p) {
-  return p;
-}
+inline void *operator new(__sanitizer::usize sz, void *p) { return p; }
 
 #endif  // SANITIZER_PLACEMENT_NEW_H
lib/tsan/sanitizer_common/sanitizer_platform.h
@@ -260,6 +260,17 @@
 #  define SANITIZER_ARM64 0
 #endif
 
+#if SANITIZER_WINDOWS64 && SANITIZER_ARM64
+#  define SANITIZER_WINDOWS_ARM64 1
+#  define SANITIZER_WINDOWS_x64 0
+#elif SANITIZER_WINDOWS64 && !SANITIZER_ARM64
+#  define SANITIZER_WINDOWS_ARM64 0
+#  define SANITIZER_WINDOWS_x64 1
+#else
+#  define SANITIZER_WINDOWS_ARM64 0
+#  define SANITIZER_WINDOWS_x64 0
+#endif
+
 #if SANITIZER_SOLARIS && SANITIZER_WORDSIZE == 32
 #  define SANITIZER_SOLARIS32 1
 #else
@@ -284,7 +295,8 @@
 // For such platforms build this code with -DSANITIZER_CAN_USE_ALLOCATOR64=0 or
 // change the definition of SANITIZER_CAN_USE_ALLOCATOR64 here.
 #ifndef SANITIZER_CAN_USE_ALLOCATOR64
-#  if SANITIZER_RISCV64 || SANITIZER_IOS
+#  if (SANITIZER_RISCV64 && !SANITIZER_FUCHSIA && !SANITIZER_LINUX) || \
+      SANITIZER_IOS || SANITIZER_DRIVERKIT
 #    define SANITIZER_CAN_USE_ALLOCATOR64 0
 #  elif defined(__mips64) || defined(__hexagon__)
 #    define SANITIZER_CAN_USE_ALLOCATOR64 0
@@ -303,7 +315,15 @@
 #    define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 40)
 #  endif
 #elif SANITIZER_RISCV64
-#  define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 38)
+// FIXME: Rather than hardcoding the VMA here, we should rely on
+// GetMaxUserVirtualAddress(). This will require some refactoring though since
+// many places either hardcode some value or SANITIZER_MMAP_RANGE_SIZE is
+// assumed to be some constant integer.
+#  if SANITIZER_FUCHSIA
+#    define SANITIZER_MMAP_RANGE_SIZE (1ULL << 38)
+#  else
+#    define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 56)
+#  endif
 #elif defined(__aarch64__)
 #  if SANITIZER_APPLE
 #    if SANITIZER_OSX || SANITIZER_IOSSIM
lib/tsan/sanitizer_common/sanitizer_platform_interceptors.h
@@ -191,7 +191,8 @@
 
 #define SANITIZER_INTERCEPT_PREADV \
   (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
-#define SANITIZER_INTERCEPT_PWRITEV SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PWRITEV \
+  (SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_PREADV64 SI_GLIBC
 #define SANITIZER_INTERCEPT_PWRITEV64 SI_GLIBC
 
@@ -301,7 +302,8 @@
 #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME (SI_GLIBC || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CONFSTR \
   (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
-#define SANITIZER_INTERCEPT_SCHED_GETAFFINITY SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_SCHED_GETAFFINITY \
+  (SI_LINUX_NOT_ANDROID || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_SCHED_GETPARAM SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_STRERROR SI_POSIX
 #define SANITIZER_INTERCEPT_STRERROR_R SI_POSIX
@@ -462,7 +464,7 @@
   (SI_LINUX || SI_MAC || SI_WINDOWS || SI_FREEBSD || SI_NETBSD || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_RECV_RECVFROM SI_POSIX
 #define SANITIZER_INTERCEPT_SEND_SENDTO SI_POSIX
-#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE SI_LINUX
+#define SANITIZER_INTERCEPT_EVENTFD_READ_WRITE (SI_LINUX || SI_FREEBSD)
 
 #define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33))
 #define SANITIZER_INTERCEPT_STAT                                        \
@@ -575,12 +577,12 @@
 #define SANITIZER_INTERCEPT_SL_INIT (SI_FREEBSD || SI_NETBSD)
 
 #define SANITIZER_INTERCEPT_GETRANDOM \
-  ((SI_LINUX && __GLIBC_PREREQ(2, 25)) || SI_FREEBSD)
+  ((SI_LINUX && __GLIBC_PREREQ(2, 25)) || SI_FREEBSD || SI_SOLARIS)
 #define SANITIZER_INTERCEPT___CXA_ATEXIT SI_NETBSD
 #define SANITIZER_INTERCEPT_ATEXIT SI_NETBSD
 #define SANITIZER_INTERCEPT_PTHREAD_ATFORK SI_NETBSD
 #define SANITIZER_INTERCEPT_GETENTROPY \
-  ((SI_LINUX && __GLIBC_PREREQ(2, 25)) || SI_FREEBSD)
+  ((SI_LINUX && __GLIBC_PREREQ(2, 25)) || SI_FREEBSD || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_QSORT \
   (SI_POSIX && !SI_IOSSIM && !SI_WATCHOS && !SI_TVOS && !SI_ANDROID)
 #define SANITIZER_INTERCEPT_QSORT_R SI_GLIBC
@@ -594,9 +596,11 @@
 #define SANITIZER_INTERCEPT___XUNAME SI_FREEBSD
 #define SANITIZER_INTERCEPT_FLOPEN SI_FREEBSD
 #define SANITIZER_INTERCEPT_PROCCTL SI_FREEBSD
-#define SANITIZER_INTERCEPT_HEXDUMP SI_FREEBSD
 #define SANITIZER_INTERCEPT_ARGP_PARSE SI_GLIBC
 #define SANITIZER_INTERCEPT_CPUSET_GETAFFINITY SI_FREEBSD
+// FIXME: also available from musl 1.2.5
+#define SANITIZER_INTERCEPT_PREADV2 (SI_LINUX && __GLIBC_PREREQ(2, 26))
+#define SANITIZER_INTERCEPT_PWRITEV2 (SI_LINUX && __GLIBC_PREREQ(2, 26))
 
 // This macro gives a way for downstream users to override the above
 // interceptor macros irrespective of the platform they are on. They have
lib/tsan/sanitizer_common/sanitizer_platform_limits_freebsd.cpp
@@ -475,6 +475,8 @@ CHECK_TYPE_SIZE(nfds_t);
 CHECK_TYPE_SIZE(sigset_t);
 
 COMPILER_CHECK(sizeof(__sanitizer_sigaction) == sizeof(struct sigaction));
+COMPILER_CHECK(sizeof(__sanitizer_siginfo) == sizeof(siginfo_t));
+CHECK_SIZE_AND_OFFSET(siginfo_t, si_value);
 // Can't write checks for sa_handler and sa_sigaction due to them being
 // preprocessor macros.
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
lib/tsan/sanitizer_common/sanitizer_platform_limits_freebsd.h
@@ -301,11 +301,29 @@ struct __sanitizer_sigset_t {
 
 typedef __sanitizer_sigset_t __sanitizer_kernel_sigset_t;
 
+union __sanitizer_sigval {
+  int sival_int;
+  void *sival_ptr;
+};
+
 struct __sanitizer_siginfo {
-  // The size is determined by looking at sizeof of real siginfo_t on linux.
-  u64 opaque[128 / sizeof(u64)];
+  int si_signo;
+  int si_errno;
+  int si_code;
+  pid_t si_pid;
+  u32 si_uid;
+  int si_status;
+  void *si_addr;
+  union __sanitizer_sigval si_value;
+#  if SANITIZER_WORDSIZE == 64
+  char data[40];
+#  else
+  char data[32];
+#  endif
 };
 
+typedef __sanitizer_siginfo __sanitizer_siginfo_t;
+
 using __sanitizer_sighandler_ptr = void (*)(int sig);
 using __sanitizer_sigactionhandler_ptr = void (*)(int sig,
                                                   __sanitizer_siginfo *siginfo,
@@ -726,6 +744,8 @@ struct __sanitizer_cpuset {
 
 typedef struct __sanitizer_cpuset __sanitizer_cpuset_t;
 extern unsigned struct_cpuset_sz;
+
+typedef unsigned long long __sanitizer_eventfd_t;
 }  // namespace __sanitizer
 
 #  define CHECK_TYPE_SIZE(TYPE) \
lib/tsan/sanitizer_common/sanitizer_platform_limits_openbsd.cpp
lib/tsan/sanitizer_common/sanitizer_platform_limits_openbsd.h
lib/tsan/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -523,6 +523,7 @@ typedef long __sanitizer_clock_t;
 
 #if SANITIZER_LINUX
 typedef int __sanitizer_clockid_t;
+typedef unsigned long long __sanitizer_eventfd_t;
 #endif
 
 #if SANITIZER_LINUX
lib/tsan/sanitizer_common/sanitizer_posix.cpp
@@ -54,12 +54,12 @@ void *MmapOrDie(uptr size, const char *mem_type, bool raw_report) {
   return (void *)res;
 }
 
-void UnmapOrDie(void *addr, uptr size) {
+void UnmapOrDie(void *addr, uptr size, bool raw_report) {
   if (!addr || !size) return;
   uptr res = internal_munmap(addr, size);
   int reserrno;
   if (UNLIKELY(internal_iserror(res, &reserrno)))
-    ReportMunmapFailureAndDie(addr, size, reserrno);
+    ReportMunmapFailureAndDie(addr, size, reserrno, raw_report);
   DecreaseTotalMmap(size);
 }
 
@@ -85,8 +85,8 @@ void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
   CHECK(IsPowerOfTwo(size));
   CHECK(IsPowerOfTwo(alignment));
   uptr map_size = size + alignment;
-  // mmap maps entire pages and rounds up map_size needs to be a an integral 
-  // number of pages. 
+  // mmap maps entire pages and rounds up map_size needs to be a an integral
+  // number of pages.
   // We need to be aware of this size for calculating end and for unmapping
   // fragments before and after the alignment region.
   map_size = RoundUpTo(map_size, GetPageSizeCached());
@@ -130,8 +130,8 @@ static void *MmapFixedImpl(uptr fixed_addr, uptr size, bool tolerate_enomem,
     if (tolerate_enomem && reserrno == ENOMEM)
       return nullptr;
     char mem_type[40];
-    internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
-                      fixed_addr);
+    internal_snprintf(mem_type, sizeof(mem_type), "memory at address %p",
+                      (void *)fixed_addr);
     ReportMmapFailureAndDie(size, mem_type, "allocate", reserrno);
   }
   IncreaseTotalMmap(size);
lib/tsan/sanitizer_common/sanitizer_posix.h
@@ -74,21 +74,21 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
 // These functions call appropriate pthread_ functions directly, bypassing
 // the interceptor. They are weak and may not be present in some tools.
 SANITIZER_WEAK_ATTRIBUTE
-int real_pthread_create(void *th, void *attr, void *(*callback)(void *),
-                        void *param);
+int internal_pthread_create(void *th, void *attr, void *(*callback)(void *),
+                            void *param);
 SANITIZER_WEAK_ATTRIBUTE
-int real_pthread_join(void *th, void **ret);
-
-#define DEFINE_REAL_PTHREAD_FUNCTIONS                                          \
-  namespace __sanitizer {                                                      \
-  int real_pthread_create(void *th, void *attr, void *(*callback)(void *),     \
-                          void *param) {                                       \
-    return REAL(pthread_create)(th, attr, callback, param);                    \
-  }                                                                            \
-  int real_pthread_join(void *th, void **ret) {                                \
-    return REAL(pthread_join(th, ret));                                        \
-  }                                                                            \
-  }  // namespace __sanitizer
+int internal_pthread_join(void *th, void **ret);
+
+#  define DEFINE_INTERNAL_PTHREAD_FUNCTIONS                               \
+    namespace __sanitizer {                                               \
+    int internal_pthread_create(void *th, void *attr,                     \
+                                void *(*callback)(void *), void *param) { \
+      return REAL(pthread_create)(th, attr, callback, param);             \
+    }                                                                     \
+    int internal_pthread_join(void *th, void **ret) {                     \
+      return REAL(pthread_join(th, ret));                                 \
+    }                                                                     \
+    }  // namespace __sanitizer
 
 int internal_pthread_attr_getstack(void *attr, void **addr, uptr *size);
 
lib/tsan/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -91,12 +91,12 @@ static rlim_t getlim(int res) {
 
 static void setlim(int res, rlim_t lim) {
   struct rlimit rlim;
-  if (getrlimit(res, const_cast<struct rlimit *>(&rlim))) {
+  if (getrlimit(res, &rlim)) {
     Report("ERROR: %s getrlimit() failed %d\n", SanitizerToolName, errno);
     Die();
   }
   rlim.rlim_cur = lim;
-  if (setrlimit(res, const_cast<struct rlimit *>(&rlim))) {
+  if (setrlimit(res, &rlim)) {
     Report("ERROR: %s setrlimit() failed %d\n", SanitizerToolName, errno);
     Die();
   }
@@ -104,7 +104,27 @@ static void setlim(int res, rlim_t lim) {
 
 void DisableCoreDumperIfNecessary() {
   if (common_flags()->disable_coredump) {
-    setlim(RLIMIT_CORE, 0);
+    rlimit rlim;
+    CHECK_EQ(0, getrlimit(RLIMIT_CORE, &rlim));
+    // On Linux, if the kernel.core_pattern sysctl starts with a '|' (i.e. it
+    // is being piped to a coredump handler such as systemd-coredumpd), the
+    // kernel ignores RLIMIT_CORE (since we aren't creating a file in the file
+    // system) except for the magic value of 1, which disables coredumps when
+    // piping. 1 byte is too small for any kind of valid core dump, so it
+    // also disables coredumps if kernel.core_pattern creates files directly.
+    // While most piped coredump handlers do respect the crashing processes'
+    // RLIMIT_CORE, this is notable not the case for Debian's systemd-coredump
+    // due to a local patch that changes sysctl.d/50-coredump.conf to ignore
+    // the specified limit and instead use RLIM_INFINITY.
+    //
+    // The alternative to using RLIMIT_CORE=1 would be to use prctl() with the
+    // PR_SET_DUMPABLE flag, however that also prevents ptrace(), so makes it
+    // impossible to attach a debugger.
+    //
+    // Note: we use rlim_max in the Min() call here since that is the upper
+    // limit for what can be set without getting an EINVAL error.
+    rlim.rlim_cur = Min<rlim_t>(SANITIZER_LINUX ? 1 : 0, rlim.rlim_max);
+    CHECK_EQ(0, setrlimit(RLIMIT_CORE, &rlim));
   }
 }
 
@@ -307,9 +327,10 @@ static bool MmapFixed(uptr fixed_addr, uptr size, int additional_flags,
                 MAP_PRIVATE | MAP_FIXED | additional_flags | MAP_ANON, name);
   int reserrno;
   if (internal_iserror(p, &reserrno)) {
-    Report("ERROR: %s failed to "
-           "allocate 0x%zx (%zd) bytes at address %zx (errno: %d)\n",
-           SanitizerToolName, size, size, fixed_addr, reserrno);
+    Report(
+        "ERROR: %s failed to "
+        "allocate 0x%zx (%zd) bytes at address %p (errno: %d)\n",
+        SanitizerToolName, size, size, (void *)fixed_addr, reserrno);
     return false;
   }
   IncreaseTotalMmap(size);
lib/tsan/sanitizer_common/sanitizer_printf.cpp
@@ -54,7 +54,7 @@ static int AppendNumber(char **buff, const char *buff_end, u64 absolute_value,
   uptr num_buffer[kMaxLen];
   int pos = 0;
   do {
-    RAW_CHECK_MSG((uptr)pos < kMaxLen, "AppendNumber buffer overflow");
+    RAW_CHECK_MSG((uptr)pos < kMaxLen, "AppendNumber buffer overflow",);
     num_buffer[pos++] = absolute_value % base;
     absolute_value /= base;
   } while (absolute_value > 0);
@@ -337,7 +337,14 @@ int internal_snprintf(char *buffer, uptr length, const char *format, ...) {
   return needed_length;
 }
 
-void InternalScopedString::append(const char *format, ...) {
+void InternalScopedString::Append(const char *str) {
+  uptr prev_len = length();
+  uptr str_len = internal_strlen(str);
+  buffer_.resize(prev_len + str_len + 1);
+  internal_memcpy(buffer_.data() + prev_len, str, str_len + 1);
+}
+
+void InternalScopedString::AppendF(const char *format, ...) {
   uptr prev_len = length();
 
   while (true) {
lib/tsan/sanitizer_common/sanitizer_procmaps_bsd.cpp
@@ -13,9 +13,6 @@
 #include "sanitizer_platform.h"
 #if SANITIZER_FREEBSD || SANITIZER_NETBSD
 #include "sanitizer_common.h"
-#if SANITIZER_FREEBSD
-#include "sanitizer_freebsd.h"
-#endif
 #include "sanitizer_procmaps.h"
 
 // clang-format off
@@ -29,29 +26,35 @@
 
 #include <limits.h>
 
-// Fix 'kinfo_vmentry' definition on FreeBSD prior v9.2 in 32-bit mode.
-#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
-#include <osreldate.h>
-#if __FreeBSD_version <= 902001 // v9.2
-#define kinfo_vmentry xkinfo_vmentry
-#endif
-#endif
-
 namespace __sanitizer {
 
 #if SANITIZER_FREEBSD
 void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
-  const int Mib[] = {
-    CTL_KERN,
-    KERN_PROC,
-    KERN_PROC_PID,
-    getpid()
-  }; 
-
-  struct kinfo_proc InfoProc;
-  uptr Len = sizeof(InfoProc);
-  CHECK_EQ(internal_sysctl(Mib, ARRAY_SIZE(Mib), nullptr, (uptr *)&InfoProc, &Len, 0), 0);
-  cb(0, InfoProc.ki_rssize * GetPageSizeCached(), false, stats);
+  const int Mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()};
+
+  struct kinfo_proc *InfoProc;
+  uptr Len = sizeof(*InfoProc);
+  uptr Size = Len;
+  InfoProc = (struct kinfo_proc *)MmapOrDie(Size, "GetMemoryProfile()");
+  CHECK_EQ(
+      internal_sysctl(Mib, ARRAY_SIZE(Mib), nullptr, (uptr *)InfoProc, &Len, 0),
+      0);
+  cb(0, InfoProc->ki_rssize * GetPageSizeCached(), false, stats);
+  UnmapOrDie(InfoProc, Size, true);
+}
+#elif SANITIZER_NETBSD
+void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
+  struct kinfo_proc2 *InfoProc;
+  uptr Len = sizeof(*InfoProc);
+  uptr Size = Len;
+  const int Mib[] = {CTL_KERN, KERN_PROC2, KERN_PROC_PID,
+                     getpid(), (int)Size,  1};
+  InfoProc = (struct kinfo_proc2 *)MmapOrDie(Size, "GetMemoryProfile()");
+  CHECK_EQ(
+      internal_sysctl(Mib, ARRAY_SIZE(Mib), nullptr, (uptr *)InfoProc, &Len, 0),
+      0);
+  cb(0, InfoProc->p_vm_rssize * GetPageSizeCached(), false, stats);
+  UnmapOrDie(InfoProc, Size, true);
 }
 #endif
 
lib/tsan/sanitizer_common/sanitizer_procmaps_common.cpp
@@ -145,7 +145,7 @@ void MemoryMappingLayout::DumpListOfModules(
   }
 }
 
-#if SANITIZER_LINUX || SANITIZER_ANDROID || SANITIZER_SOLARIS || SANITIZER_NETBSD
+#if SANITIZER_LINUX || SANITIZER_ANDROID || SANITIZER_SOLARIS
 void GetMemoryProfile(fill_profile_f cb, uptr *stats) {
   char *smaps = nullptr;
   uptr smaps_cap = 0;
lib/tsan/sanitizer_common/sanitizer_ptrauth.h
@@ -9,31 +9,33 @@
 #ifndef SANITIZER_PTRAUTH_H
 #define SANITIZER_PTRAUTH_H
 
-#if __has_feature(ptrauth_calls)
-#include <ptrauth.h>
+#if __has_feature(ptrauth_intrinsics)
+#  include <ptrauth.h>
 #elif defined(__ARM_FEATURE_PAC_DEFAULT) && !defined(__APPLE__)
-inline unsigned long ptrauth_strip(void* __value, unsigned int __key) {
-  // On the stack the link register is protected with Pointer
-  // Authentication Code when compiled with -mbranch-protection.
-  // Let's stripping the PAC unconditionally because xpaclri is in
-  // the NOP space so will do nothing when it is not enabled or not available.
-  unsigned long ret;
-  asm volatile(
-      "mov x30, %1\n\t"
-      "hint #7\n\t"  // xpaclri
-      "mov %0, x30\n\t"
-      : "=r"(ret)
-      : "r"(__value)
-      : "x30");
-  return ret;
-}
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+// On the stack the link register is protected with Pointer
+// Authentication Code when compiled with -mbranch-protection.
+// Let's stripping the PAC unconditionally because xpaclri is in
+// the NOP space so will do nothing when it is not enabled or not available.
+#  define ptrauth_strip(__value, __key) \
+    ({                                  \
+      __typeof(__value) ret;            \
+      asm volatile(                     \
+          "mov x30, %1\n\t"             \
+          "hint #7\n\t"                 \
+          "mov %0, x30\n\t"             \
+          "mov x30, xzr\n\t"            \
+          : "=r"(ret)                   \
+          : "r"(__value)                \
+          : "x30");                     \
+      ret;                              \
+    })
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #else
 // Copied from <ptrauth.h>
-#define ptrauth_strip(__value, __key) __value
-#define ptrauth_auth_data(__value, __old_key, __old_data) __value
-#define ptrauth_string_discriminator(__string) ((int)0)
+#  define ptrauth_strip(__value, __key) __value
+#  define ptrauth_auth_data(__value, __old_key, __old_data) __value
+#  define ptrauth_string_discriminator(__string) ((int)0)
 #endif
 
 #define STRIP_PAC_PC(pc) ((uptr)ptrauth_strip(pc, 0))
lib/tsan/sanitizer_common/sanitizer_redefine_builtins.h
@@ -11,16 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 #ifndef SANITIZER_COMMON_NO_REDEFINE_BUILTINS
-#ifndef SANITIZER_REDEFINE_BUILTINS_H
-#define SANITIZER_REDEFINE_BUILTINS_H
+#  ifndef SANITIZER_REDEFINE_BUILTINS_H
+#    define SANITIZER_REDEFINE_BUILTINS_H
 
 // The asm hack only works with GCC and Clang.
-#if !defined(_WIN32)
+#    if !defined(_WIN32)
 
 asm("memcpy = __sanitizer_internal_memcpy");
 asm("memmove = __sanitizer_internal_memmove");
 asm("memset = __sanitizer_internal_memset");
 
+#      if defined(__cplusplus) && \
+          !defined(SANITIZER_COMMON_REDEFINE_BUILTINS_IN_STD)
+
 // The builtins should not be redefined in source files that make use of C++
 // standard libraries, in particular where C++STL headers with inline functions
 // are used. The redefinition in such cases would lead to ODR violations.
@@ -46,7 +49,8 @@ using unordered_set = Define_SANITIZER_COMMON_NO_REDEFINE_BUILTINS_in_cpp_file;
 using vector = Define_SANITIZER_COMMON_NO_REDEFINE_BUILTINS_in_cpp_file;
 }  // namespace std
 
-#endif  // !_WIN32
+#      endif  // __cpluplus
+#    endif    // !_WIN32
 
-#endif  // SANITIZER_REDEFINE_BUILTINS_H
-#endif  // SANITIZER_COMMON_NO_REDEFINE_BUILTINS
+#  endif  // SANITIZER_REDEFINE_BUILTINS_H
+#endif    // SANITIZER_COMMON_NO_REDEFINE_BUILTINS
lib/tsan/sanitizer_common/sanitizer_ring_buffer.h
@@ -47,7 +47,9 @@ class RingBuffer {
   void push(T t) {
     *next_ = t;
     next_--;
-    // The condition below works only if sizeof(T) is divisible by sizeof(T*).
+    static_assert((sizeof(T) % sizeof(T *)) == 0,
+                  "The condition below works only if sizeof(T) is divisible by "
+                  "sizeof(T*).");
     if (next_ <= reinterpret_cast<T*>(&next_))
       next_ = last_;
   }
lib/tsan/sanitizer_common/sanitizer_stack_store.cpp
@@ -44,6 +44,9 @@ StackStore::Id StackStore::Store(const StackTrace &trace, uptr *pack) {
   uptr idx = 0;
   *pack = 0;
   uptr *stack_trace = Alloc(h.size + 1, &idx, pack);
+  // No more space.
+  if (stack_trace == nullptr)
+    return 0;
   *stack_trace = h.ToUptr();
   internal_memcpy(stack_trace + 1, trace.trace, h.size * sizeof(uptr));
   *pack += blocks_[GetBlockIdx(idx)].Stored(h.size + 1);
@@ -76,8 +79,10 @@ uptr *StackStore::Alloc(uptr count, uptr *idx, uptr *pack) {
     uptr block_idx = GetBlockIdx(start);
     uptr last_idx = GetBlockIdx(start + count - 1);
     if (LIKELY(block_idx == last_idx)) {
-      // Fits into the a single block.
-      CHECK_LT(block_idx, ARRAY_SIZE(blocks_));
+      // Fits into a single block.
+      // No more available blocks.  Indicate inability to allocate more memory.
+      if (block_idx >= ARRAY_SIZE(blocks_))
+        return nullptr;
       *idx = start;
       return blocks_[block_idx].GetOrCreate(this) + GetInBlockIdx(start);
     }
lib/tsan/sanitizer_common/sanitizer_stackdepot.cpp
@@ -215,16 +215,16 @@ StackTrace StackDepotGet(u32 id) {
   return theDepot.Get(id);
 }
 
-void StackDepotLockAll() {
-  theDepot.LockAll();
+void StackDepotLockBeforeFork() {
+  theDepot.LockBeforeFork();
   compress_thread.LockAndStop();
   stackStore.LockAll();
 }
 
-void StackDepotUnlockAll() {
+void StackDepotUnlockAfterFork(bool fork_child) {
   stackStore.UnlockAll();
   compress_thread.Unlock();
-  theDepot.UnlockAll();
+  theDepot.UnlockAfterFork(fork_child);
 }
 
 void StackDepotPrintAll() {
lib/tsan/sanitizer_common/sanitizer_stackdepot.h
@@ -39,8 +39,8 @@ StackDepotHandle StackDepotPut_WithHandle(StackTrace stack);
 // Retrieves a stored stack trace by the id.
 StackTrace StackDepotGet(u32 id);
 
-void StackDepotLockAll();
-void StackDepotUnlockAll();
+void StackDepotLockBeforeFork();
+void StackDepotUnlockAfterFork(bool fork_child);
 void StackDepotPrintAll();
 void StackDepotStopBackgroundThread();
 
lib/tsan/sanitizer_common/sanitizer_stackdepotbase.h
@@ -52,8 +52,8 @@ class StackDepotBase {
     };
   }
 
-  void LockAll();
-  void UnlockAll();
+  void LockBeforeFork();
+  void UnlockAfterFork(bool fork_child);
   void PrintAll();
 
   void TestOnlyUnmap() {
@@ -160,18 +160,33 @@ StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) {
 }
 
 template <class Node, int kReservedBits, int kTabSizeLog>
-void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockAll() {
-  for (int i = 0; i < kTabSize; ++i) {
-    lock(&tab[i]);
-  }
+void StackDepotBase<Node, kReservedBits, kTabSizeLog>::LockBeforeFork() {
+  // Do not lock hash table. It's very expensive, but it's not rely needed. The
+  // parent process will neither lock nor unlock. Child process risks to be
+  // deadlocked on already locked buckets. To avoid deadlock we will unlock
+  // every locked buckets in `UnlockAfterFork`. This may affect consistency of
+  // the hash table, but the only issue is a few items inserted by parent
+  // process will be not found by child, and the child may insert them again,
+  // wasting some space in `stackStore`.
+
+  // We still need to lock nodes.
+  nodes.Lock();
 }
 
 template <class Node, int kReservedBits, int kTabSizeLog>
-void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAll() {
+void StackDepotBase<Node, kReservedBits, kTabSizeLog>::UnlockAfterFork(
+    bool fork_child) {
+  nodes.Unlock();
+
+  // Only unlock in child process to avoid deadlock. See `LockBeforeFork`.
+  if (!fork_child)
+    return;
+
   for (int i = 0; i < kTabSize; ++i) {
     atomic_uint32_t *p = &tab[i];
     uptr s = atomic_load(p, memory_order_relaxed);
-    unlock(p, s & kUnlockMask);
+    if (s & kLockMask)
+      unlock(p, s & kUnlockMask);
   }
 }
 
lib/tsan/sanitizer_common/sanitizer_stacktrace_libcdep.cpp
@@ -29,42 +29,43 @@ class StackTraceTextPrinter {
         frame_delimiter_(frame_delimiter),
         output_(output),
         dedup_token_(dedup_token),
-        symbolize_(RenderNeedsSymbolization(stack_trace_fmt)) {}
+        symbolize_(StackTracePrinter::GetOrInit()->RenderNeedsSymbolization(
+            stack_trace_fmt)) {}
 
   bool ProcessAddressFrames(uptr pc) {
-    SymbolizedStack *frames = symbolize_
-                                  ? Symbolizer::GetOrInit()->SymbolizePC(pc)
-                                  : SymbolizedStack::New(pc);
+    SymbolizedStackHolder symbolized_stack(
+        symbolize_ ? Symbolizer::GetOrInit()->SymbolizePC(pc)
+                   : SymbolizedStack::New(pc));
+    const SymbolizedStack *frames = symbolized_stack.get();
     if (!frames)
       return false;
 
-    for (SymbolizedStack *cur = frames; cur; cur = cur->next) {
+    for (const SymbolizedStack *cur = frames; cur; cur = cur->next) {
       uptr prev_len = output_->length();
-      RenderFrame(output_, stack_trace_fmt_, frame_num_++, cur->info.address,
-                  symbolize_ ? &cur->info : nullptr,
-                  common_flags()->symbolize_vs_style,
-                  common_flags()->strip_path_prefix);
+      StackTracePrinter::GetOrInit()->RenderFrame(
+          output_, stack_trace_fmt_, frame_num_++, cur->info.address,
+          symbolize_ ? &cur->info : nullptr, common_flags()->symbolize_vs_style,
+          common_flags()->strip_path_prefix);
 
       if (prev_len != output_->length())
-        output_->append("%c", frame_delimiter_);
+        output_->AppendF("%c", frame_delimiter_);
 
       ExtendDedupToken(cur);
     }
-    frames->ClearAll();
     return true;
   }
 
  private:
   // Extend the dedup token by appending a new frame.
-  void ExtendDedupToken(SymbolizedStack *stack) {
+  void ExtendDedupToken(const SymbolizedStack *stack) {
     if (!dedup_token_)
       return;
 
     if (dedup_frames_-- > 0) {
       if (dedup_token_->length())
-        dedup_token_->append("--");
-      if (stack->info.function != nullptr)
-        dedup_token_->append("%s", stack->info.function);
+        dedup_token_->Append("--");
+      if (stack->info.function)
+        dedup_token_->Append(stack->info.function);
     }
   }
 
@@ -98,7 +99,7 @@ void StackTrace::PrintTo(InternalScopedString *output) const {
                                 output, &dedup_token);
 
   if (trace == nullptr || size == 0) {
-    output->append("    <empty stack>\n\n");
+    output->Append("    <empty stack>\n\n");
     return;
   }
 
@@ -110,11 +111,11 @@ void StackTrace::PrintTo(InternalScopedString *output) const {
   }
 
   // Always add a trailing empty line after stack trace.
-  output->append("\n");
+  output->Append("\n");
 
   // Append deduplication token, if non-empty.
   if (dedup_token.length())
-    output->append("DEDUP_TOKEN: %s\n", dedup_token.data());
+    output->AppendF("DEDUP_TOKEN: %s\n", dedup_token.data());
 }
 
 uptr StackTrace::PrintTo(char *out_buf, uptr out_buf_size) const {
@@ -197,7 +198,7 @@ void __sanitizer_symbolize_pc(uptr pc, const char *fmt, char *out_buf,
   StackTraceTextPrinter printer(fmt, '\0', &output, nullptr);
   if (!printer.ProcessAddressFrames(pc)) {
     output.clear();
-    output.append("<can't symbolize>");
+    output.Append("<can't symbolize>");
   }
   CopyStringToBuffer(output, out_buf, out_buf_size);
 }
@@ -210,7 +211,8 @@ void __sanitizer_symbolize_global(uptr data_addr, const char *fmt,
   DataInfo DI;
   if (!Symbolizer::GetOrInit()->SymbolizeData(data_addr, &DI)) return;
   InternalScopedString data_desc;
-  RenderData(&data_desc, fmt, &DI, common_flags()->strip_path_prefix);
+  StackTracePrinter::GetOrInit()->RenderData(&data_desc, fmt, &DI,
+                                             common_flags()->strip_path_prefix);
   internal_strncpy(out_buf, data_desc.data(), out_buf_size);
   out_buf[out_buf_size - 1] = 0;
 }
lib/tsan/sanitizer_common/sanitizer_stacktrace_printer.cpp
@@ -12,13 +12,28 @@
 
 #include "sanitizer_stacktrace_printer.h"
 
+#include "sanitizer_common.h"
 #include "sanitizer_file.h"
 #include "sanitizer_flags.h"
 #include "sanitizer_fuchsia.h"
+#include "sanitizer_symbolizer_markup.h"
 
 namespace __sanitizer {
 
-const char *StripFunctionName(const char *function) {
+StackTracePrinter *StackTracePrinter::GetOrInit() {
+  static StackTracePrinter *stacktrace_printer;
+  static StaticSpinMutex init_mu;
+  SpinMutexLock l(&init_mu);
+  if (stacktrace_printer)
+    return stacktrace_printer;
+
+  stacktrace_printer = StackTracePrinter::NewStackTracePrinter();
+
+  CHECK(stacktrace_printer);
+  return stacktrace_printer;
+}
+
+const char *StackTracePrinter::StripFunctionName(const char *function) {
   if (!common_flags()->demangle)
     return function;
   if (!function)
@@ -47,6 +62,13 @@ const char *StripFunctionName(const char *function) {
 // sanitizer_symbolizer_markup.cpp implements these differently.
 #if !SANITIZER_SYMBOLIZER_MARKUP
 
+StackTracePrinter *StackTracePrinter::NewStackTracePrinter() {
+  if (common_flags()->enable_symbolizer_markup)
+    return new (GetGlobalLowLevelAllocator()) MarkupStackTracePrinter();
+
+  return new (GetGlobalLowLevelAllocator()) FormattedStackTracePrinter();
+}
+
 static const char *DemangleFunctionName(const char *function) {
   if (!common_flags()->demangle)
     return function;
@@ -130,20 +152,23 @@ static void MaybeBuildIdToBuffer(const AddressInfo &info, bool PrefixSpace,
                                  InternalScopedString *buffer) {
   if (info.uuid_size) {
     if (PrefixSpace)
-      buffer->append(" ");
-    buffer->append("(BuildId: ");
+      buffer->Append(" ");
+    buffer->Append("(BuildId: ");
     for (uptr i = 0; i < info.uuid_size; ++i) {
-      buffer->append("%02x", info.uuid[i]);
+      buffer->AppendF("%02x", info.uuid[i]);
     }
-    buffer->append(")");
+    buffer->Append(")");
   }
 }
 
 static const char kDefaultFormat[] = "    #%n %p %F %L";
 
-void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
-                 uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix) {
+void FormattedStackTracePrinter::RenderFrame(InternalScopedString *buffer,
+                                             const char *format, int frame_no,
+                                             uptr address,
+                                             const AddressInfo *info,
+                                             bool vs_style,
+                                             const char *strip_path_prefix) {
   // info will be null in the case where symbolization is not needed for the
   // given format. This ensures that the code below will get a hard failure
   // rather than print incorrect information in case RenderNeedsSymbolization
@@ -154,56 +179,56 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
     format = kDefaultFormat;
   for (const char *p = format; *p != '\0'; p++) {
     if (*p != '%') {
-      buffer->append("%c", *p);
+      buffer->AppendF("%c", *p);
       continue;
     }
     p++;
     switch (*p) {
     case '%':
-      buffer->append("%%");
+      buffer->Append("%");
       break;
     // Frame number and all fields of AddressInfo structure.
     case 'n':
-      buffer->append("%u", frame_no);
+      buffer->AppendF("%u", frame_no);
       break;
     case 'p':
-      buffer->append("0x%zx", address);
+      buffer->AppendF("%p", (void *)address);
       break;
     case 'm':
-      buffer->append("%s", StripPathPrefix(info->module, strip_path_prefix));
+      buffer->AppendF("%s", StripPathPrefix(info->module, strip_path_prefix));
       break;
     case 'o':
-      buffer->append("0x%zx", info->module_offset);
+      buffer->AppendF("0x%zx", info->module_offset);
       break;
     case 'b':
       MaybeBuildIdToBuffer(*info, /*PrefixSpace=*/false, buffer);
       break;
     case 'f':
-      buffer->append("%s",
-                     DemangleFunctionName(StripFunctionName(info->function)));
+      buffer->AppendF("%s",
+                      DemangleFunctionName(StripFunctionName(info->function)));
       break;
     case 'q':
-      buffer->append("0x%zx", info->function_offset != AddressInfo::kUnknown
-                                  ? info->function_offset
-                                  : 0x0);
+      buffer->AppendF("0x%zx", info->function_offset != AddressInfo::kUnknown
+                                   ? info->function_offset
+                                   : 0x0);
       break;
     case 's':
-      buffer->append("%s", StripPathPrefix(info->file, strip_path_prefix));
+      buffer->AppendF("%s", StripPathPrefix(info->file, strip_path_prefix));
       break;
     case 'l':
-      buffer->append("%d", info->line);
+      buffer->AppendF("%d", info->line);
       break;
     case 'c':
-      buffer->append("%d", info->column);
+      buffer->AppendF("%d", info->column);
       break;
     // Smarter special cases.
     case 'F':
       // Function name and offset, if file is unknown.
       if (info->function) {
-        buffer->append("in %s",
-                       DemangleFunctionName(StripFunctionName(info->function)));
+        buffer->AppendF(
+            "in %s", DemangleFunctionName(StripFunctionName(info->function)));
         if (!info->file && info->function_offset != AddressInfo::kUnknown)
-          buffer->append("+0x%zx", info->function_offset);
+          buffer->AppendF("+0x%zx", info->function_offset);
       }
       break;
     case 'S':
@@ -224,7 +249,7 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
         MaybeBuildIdToBuffer(*info, /*PrefixSpace=*/true, buffer);
 #endif
       } else {
-        buffer->append("(<unknown module>)");
+        buffer->Append("(<unknown module>)");
       }
       break;
     case 'M':
@@ -239,18 +264,18 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
         MaybeBuildIdToBuffer(*info, /*PrefixSpace=*/true, buffer);
 #endif
       } else {
-        buffer->append("(%p)", (void *)address);
+        buffer->AppendF("(%p)", (void *)address);
       }
       break;
     default:
       Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
-             (void *)p);
+             (const void *)p);
       Die();
     }
   }
 }
 
-bool RenderNeedsSymbolization(const char *format) {
+bool FormattedStackTracePrinter::RenderNeedsSymbolization(const char *format) {
   if (0 == internal_strcmp(format, "DEFAULT"))
     format = kDefaultFormat;
   for (const char *p = format; *p != '\0'; p++) {
@@ -273,30 +298,32 @@ bool RenderNeedsSymbolization(const char *format) {
   return false;
 }
 
-void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix) {
+void FormattedStackTracePrinter::RenderData(InternalScopedString *buffer,
+                                            const char *format,
+                                            const DataInfo *DI,
+                                            const char *strip_path_prefix) {
   for (const char *p = format; *p != '\0'; p++) {
     if (*p != '%') {
-      buffer->append("%c", *p);
+      buffer->AppendF("%c", *p);
       continue;
     }
     p++;
     switch (*p) {
       case '%':
-        buffer->append("%%");
+        buffer->Append("%");
         break;
       case 's':
-        buffer->append("%s", StripPathPrefix(DI->file, strip_path_prefix));
+        buffer->AppendF("%s", StripPathPrefix(DI->file, strip_path_prefix));
         break;
       case 'l':
-        buffer->append("%zu", DI->line);
+        buffer->AppendF("%zu", DI->line);
         break;
       case 'g':
-        buffer->append("%s", DI->name);
+        buffer->AppendF("%s", DI->name);
         break;
       default:
         Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p,
-               (void *)p);
+               (const void *)p);
         Die();
     }
   }
@@ -304,33 +331,35 @@ void RenderData(InternalScopedString *buffer, const char *format,
 
 #endif  // !SANITIZER_SYMBOLIZER_MARKUP
 
-void RenderSourceLocation(InternalScopedString *buffer, const char *file,
-                          int line, int column, bool vs_style,
-                          const char *strip_path_prefix) {
+void StackTracePrinter::RenderSourceLocation(InternalScopedString *buffer,
+                                             const char *file, int line,
+                                             int column, bool vs_style,
+                                             const char *strip_path_prefix) {
   if (vs_style && line > 0) {
-    buffer->append("%s(%d", StripPathPrefix(file, strip_path_prefix), line);
+    buffer->AppendF("%s(%d", StripPathPrefix(file, strip_path_prefix), line);
     if (column > 0)
-      buffer->append(",%d", column);
-    buffer->append(")");
+      buffer->AppendF(",%d", column);
+    buffer->Append(")");
     return;
   }
 
-  buffer->append("%s", StripPathPrefix(file, strip_path_prefix));
+  buffer->AppendF("%s", StripPathPrefix(file, strip_path_prefix));
   if (line > 0) {
-    buffer->append(":%d", line);
+    buffer->AppendF(":%d", line);
     if (column > 0)
-      buffer->append(":%d", column);
+      buffer->AppendF(":%d", column);
   }
 }
 
-void RenderModuleLocation(InternalScopedString *buffer, const char *module,
-                          uptr offset, ModuleArch arch,
-                          const char *strip_path_prefix) {
-  buffer->append("(%s", StripPathPrefix(module, strip_path_prefix));
+void StackTracePrinter::RenderModuleLocation(InternalScopedString *buffer,
+                                             const char *module, uptr offset,
+                                             ModuleArch arch,
+                                             const char *strip_path_prefix) {
+  buffer->AppendF("(%s", StripPathPrefix(module, strip_path_prefix));
   if (arch != kModuleArchUnknown) {
-    buffer->append(":%s", ModuleArchToString(arch));
+    buffer->AppendF(":%s", ModuleArchToString(arch));
   }
-  buffer->append("+0x%zx)", offset);
+  buffer->AppendF("+0x%zx)", offset);
 }
 
 } // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_stacktrace_printer.h
@@ -13,61 +13,102 @@
 #define SANITIZER_STACKTRACE_PRINTER_H
 
 #include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
 #include "sanitizer_symbolizer.h"
 
 namespace __sanitizer {
 
-// Strip interceptor prefixes from function name.
-const char *StripFunctionName(const char *function);
-
-// Render the contents of "info" structure, which represents the contents of
-// stack frame "frame_no" and appends it to the "buffer". "format" is a
-// string with placeholders, which is copied to the output with
-// placeholders substituted with the contents of "info". For example,
-// format string
-//   "  frame %n: function %F at %S"
-// will be turned into
-//   "  frame 10: function foo::bar() at my/file.cc:10"
-// You may additionally pass "strip_path_prefix" to strip prefixes of paths to
-// source files and modules.
-// Here's the full list of available placeholders:
-//   %% - represents a '%' character;
-//   %n - frame number (copy of frame_no);
-//   %p - PC in hex format;
-//   %m - path to module (binary or shared object);
-//   %o - offset in the module in hex format;
-//   %f - function name;
-//   %q - offset in the function in hex format (*if available*);
-//   %s - path to source file;
-//   %l - line in the source file;
-//   %c - column in the source file;
-//   %F - if function is known to be <foo>, prints "in <foo>", possibly
-//        followed by the offset in this function, but only if source file
-//        is unknown;
-//   %S - prints file/line/column information;
-//   %L - prints location information: file/line/column, if it is known, or
-//        module+offset if it is known, or (<unknown module>) string.
-//   %M - prints module basename and offset, if it is known, or PC.
-void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
-                 uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix = "");
-
-bool RenderNeedsSymbolization(const char *format);
-
-void RenderSourceLocation(InternalScopedString *buffer, const char *file,
-                          int line, int column, bool vs_style,
-                          const char *strip_path_prefix);
-
-void RenderModuleLocation(InternalScopedString *buffer, const char *module,
-                          uptr offset, ModuleArch arch,
-                          const char *strip_path_prefix);
-
-// Same as RenderFrame, but for data section (global variables).
-// Accepts %s, %l from above.
-// Also accepts:
-//   %g - name of the global variable.
-void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix = "");
+// StacktracePrinter is an interface that is implemented by
+// classes that can perform rendering of the different parts
+// of a stacktrace.
+class StackTracePrinter {
+ public:
+  static StackTracePrinter *GetOrInit();
+
+  // Strip interceptor prefixes from function name.
+  const char *StripFunctionName(const char *function);
+
+  virtual void RenderFrame(InternalScopedString *buffer, const char *format,
+                           int frame_no, uptr address, const AddressInfo *info,
+                           bool vs_style, const char *strip_path_prefix = "") {
+    // Should be pure virtual, but we can't depend on __cxa_pure_virtual.
+    UNIMPLEMENTED();
+  }
+
+  virtual bool RenderNeedsSymbolization(const char *format) {
+    // Should be pure virtual, but we can't depend on __cxa_pure_virtual.
+    UNIMPLEMENTED();
+  }
+
+  void RenderSourceLocation(InternalScopedString *buffer, const char *file,
+                            int line, int column, bool vs_style,
+                            const char *strip_path_prefix);
+
+  void RenderModuleLocation(InternalScopedString *buffer, const char *module,
+                            uptr offset, ModuleArch arch,
+                            const char *strip_path_prefix);
+  virtual void RenderData(InternalScopedString *buffer, const char *format,
+                          const DataInfo *DI,
+                          const char *strip_path_prefix = "") {
+    // Should be pure virtual, but we can't depend on __cxa_pure_virtual.
+    UNIMPLEMENTED();
+  }
+
+ private:
+  // To be called from StackTracePrinter::GetOrInit
+  static StackTracePrinter *NewStackTracePrinter();
+
+ protected:
+  ~StackTracePrinter() {}
+};
+
+class FormattedStackTracePrinter : public StackTracePrinter {
+ public:
+  // Render the contents of "info" structure, which represents the contents of
+  // stack frame "frame_no" and appends it to the "buffer". "format" is a
+  // string with placeholders, which is copied to the output with
+  // placeholders substituted with the contents of "info". For example,
+  // format string
+  //   "  frame %n: function %F at %S"
+  // will be turned into
+  //   "  frame 10: function foo::bar() at my/file.cc:10"
+  // You may additionally pass "strip_path_prefix" to strip prefixes of paths to
+  // source files and modules.
+  // Here's the full list of available placeholders:
+  //   %% - represents a '%' character;
+  //   %n - frame number (copy of frame_no);
+  //   %p - PC in hex format;
+  //   %m - path to module (binary or shared object);
+  //   %o - offset in the module in hex format;
+  //   %f - function name;
+  //   %q - offset in the function in hex format (*if available*);
+  //   %s - path to source file;
+  //   %l - line in the source file;
+  //   %c - column in the source file;
+  //   %F - if function is known to be <foo>, prints "in <foo>", possibly
+  //        followed by the offset in this function, but only if source file
+  //        is unknown;
+  //   %S - prints file/line/column information;
+  //   %L - prints location information: file/line/column, if it is known, or
+  //        module+offset if it is known, or (<unknown module>) string.
+  //   %M - prints module basename and offset, if it is known, or PC.
+  void RenderFrame(InternalScopedString *buffer, const char *format,
+                   int frame_no, uptr address, const AddressInfo *info,
+                   bool vs_style, const char *strip_path_prefix = "") override;
+
+  bool RenderNeedsSymbolization(const char *format) override;
+
+  // Same as RenderFrame, but for data section (global variables).
+  // Accepts %s, %l from above.
+  // Also accepts:
+  //   %g - name of the global variable.
+  void RenderData(InternalScopedString *buffer, const char *format,
+                  const DataInfo *DI,
+                  const char *strip_path_prefix = "") override;
+
+ protected:
+  ~FormattedStackTracePrinter() {}
+};
 
 }  // namespace __sanitizer
 
lib/tsan/sanitizer_common/sanitizer_stacktrace_sparc.cpp
@@ -58,17 +58,16 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top,
   // Avoid infinite loop when frame == frame[0] by using frame > prev_frame.
   while (IsValidFrame(bp, stack_top, bottom) && IsAligned(bp, sizeof(uhwptr)) &&
          size < max_depth) {
-    uhwptr pc1 = ((uhwptr *)bp)[15];
+    // %o7 contains the address of the call instruction and not the
+    // return address, so we need to compensate.
+    uhwptr pc1 = GetNextInstructionPc(((uhwptr *)bp)[15]);
     // Let's assume that any pointer in the 0th page is invalid and
     // stop unwinding here.  If we're adding support for a platform
     // where this isn't true, we need to reconsider this check.
     if (pc1 < kPageSize)
       break;
-    if (pc1 != pc) {
-      // %o7 contains the address of the call instruction and not the
-      // return address, so we need to compensate.
-      trace_buffer[size++] = GetNextInstructionPc((uptr)pc1);
-    }
+    if (pc1 != pc)
+      trace_buffer[size++] = pc1;
     bottom = bp;
     bp = (uptr)((uhwptr *)bp)[14] + STACK_BIAS;
   }
lib/tsan/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -257,8 +257,8 @@ static void TracerThreadDieCallback() {
 static void TracerThreadSignalHandler(int signum, __sanitizer_siginfo *siginfo,
                                       void *uctx) {
   SignalContext ctx(siginfo, uctx);
-  Printf("Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n", signum,
-         ctx.addr, ctx.pc, ctx.sp);
+  Printf("Tracer caught signal %d: addr=%p pc=%p sp=%p\n", signum,
+         (void *)ctx.addr, (void *)ctx.pc, (void *)ctx.sp);
   ThreadSuspender *inst = thread_suspender_instance;
   if (inst) {
     if (signum == SIGABRT)
@@ -565,7 +565,7 @@ PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP(
   constexpr uptr uptr_sz = sizeof(uptr);
   int pterrno;
 #ifdef ARCH_IOVEC_FOR_GETREGSET
-  auto append = [&](uptr regset) {
+  auto AppendF = [&](uptr regset) {
     uptr size = buffer->size();
     // NT_X86_XSTATE requires 64bit alignment.
     uptr size_up = RoundUpTo(size, 8 / uptr_sz);
@@ -596,11 +596,11 @@ PtraceRegistersStatus SuspendedThreadsListLinux::GetRegistersAndSP(
   };
 
   buffer->clear();
-  bool fail = !append(NT_PRSTATUS);
+  bool fail = !AppendF(NT_PRSTATUS);
   if (!fail) {
     // Accept the first available and do not report errors.
     for (uptr regs : kExtraRegs)
-      if (regs && append(regs))
+      if (regs && AppendF(regs))
         break;
   }
 #else
lib/tsan/sanitizer_common/sanitizer_stoptheworld_netbsd_libcdep.cpp
@@ -158,8 +158,8 @@ static void TracerThreadDieCallback() {
 static void TracerThreadSignalHandler(int signum, __sanitizer_siginfo *siginfo,
                                       void *uctx) {
   SignalContext ctx(siginfo, uctx);
-  Printf("Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n", signum,
-         ctx.addr, ctx.pc, ctx.sp);
+  Printf("Tracer caught signal %d: addr=%p pc=%p sp=%p\n", signum,
+         (void *)ctx.addr, (void *)ctx.pc, (void *)ctx.sp);
   ThreadSuspender *inst = thread_suspender_instance;
   if (inst) {
     if (signum == SIGABRT)
lib/tsan/sanitizer_common/sanitizer_suppressions.cpp
@@ -86,7 +86,7 @@ void SuppressionContext::ParseFromFile(const char *filename) {
   }
 
   Parse(file_contents);
-  UnmapOrDie(file_contents, contents_size);
+  UnmapOrDie(file_contents, buffer_size);
 }
 
 bool SuppressionContext::Match(const char *str, const char *type,
@@ -138,7 +138,10 @@ void SuppressionContext::Parse(const char *str) {
         }
       }
       if (type == suppression_types_num_) {
-        Printf("%s: failed to parse suppressions\n", SanitizerToolName);
+        Printf("%s: failed to parse suppressions.\n", SanitizerToolName);
+        Printf("Supported suppression types are:\n");
+        for (type = 0; type < suppression_types_num_; type++)
+          Printf("- %s\n", suppression_types_[type]);
         Die();
       }
       Suppression s;
lib/tsan/sanitizer_common/sanitizer_symbolizer.cpp
@@ -10,6 +10,8 @@
 // run-time libraries.
 //===----------------------------------------------------------------------===//
 
+#include <errno.h>
+
 #include "sanitizer_allocator_internal.h"
 #include "sanitizer_common.h"
 #include "sanitizer_internal_defs.h"
@@ -128,7 +130,7 @@ Symbolizer::Symbolizer(IntrusiveList<SymbolizerTool> tools)
       start_hook_(0), end_hook_(0) {}
 
 Symbolizer::SymbolizerScope::SymbolizerScope(const Symbolizer *sym)
-    : sym_(sym) {
+    : sym_(sym), errno_(errno) {
   if (sym_->start_hook_)
     sym_->start_hook_();
 }
@@ -136,6 +138,7 @@ Symbolizer::SymbolizerScope::SymbolizerScope(const Symbolizer *sym)
 Symbolizer::SymbolizerScope::~SymbolizerScope() {
   if (sym_->end_hook_)
     sym_->end_hook_();
+  errno = errno_;
 }
 
 }  // namespace __sanitizer
lib/tsan/sanitizer_common/sanitizer_symbolizer.h
@@ -64,6 +64,26 @@ struct SymbolizedStack {
   SymbolizedStack();
 };
 
+class SymbolizedStackHolder {
+  SymbolizedStack *Stack;
+
+  void clear() {
+    if (Stack)
+      Stack->ClearAll();
+  }
+
+ public:
+  explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr)
+      : Stack(Stack) {}
+  ~SymbolizedStackHolder() { clear(); }
+  void reset(SymbolizedStack *S = nullptr) {
+    if (Stack != S)
+      clear();
+    Stack = S;
+  }
+  const SymbolizedStack *get() const { return Stack; }
+};
+
 // For now, DataInfo is used to describe global variable.
 struct DataInfo {
   // Owns all the string members. Storage for them is
@@ -136,7 +156,7 @@ class Symbolizer final {
 
   // Release internal caches (if any).
   void Flush();
-  // Attempts to demangle the provided C++ mangled name.
+  // Attempts to demangle the provided C++ mangled name. Never returns nullptr.
   const char *Demangle(const char *name);
 
   // Allow user to install hooks that would be called before/after Symbolizer
@@ -154,6 +174,8 @@ class Symbolizer final {
 
   void InvalidateModuleList();
 
+  const ListOfModules &GetRefreshedListOfModules();
+
  private:
   // GetModuleNameAndOffsetForPC has to return a string to the caller.
   // Since the corresponding module might get unloaded later, we should create
@@ -187,7 +209,7 @@ class Symbolizer final {
   // If stale, need to reload the modules before looking up addresses.
   bool modules_fresh_;
 
-  // Platform-specific default demangler, must not return nullptr.
+  // Platform-specific default demangler, returns nullptr on failure.
   const char *PlatformDemangle(const char *name);
 
   static Symbolizer *symbolizer_;
@@ -212,6 +234,7 @@ class Symbolizer final {
     ~SymbolizerScope();
    private:
     const Symbolizer *sym_;
+    int errno_;  // Backup errno in case symbolizer change the value.
   };
 };
 
lib/tsan/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -160,6 +160,15 @@ void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
 // Used by LLVMSymbolizer and InternalSymbolizer.
 void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
 
+// Parses repeated strings in the following format:
+//   <function_name>
+//   <var_name>
+//   <file_name>:<line_number>[:<column_number>]
+//   [<frame_offset>|??] [<size>|??] [<tag_offset>|??]
+// Used by LLVMSymbolizer and InternalSymbolizer.
+void ParseSymbolizeFrameOutput(const char *str,
+                               InternalMmapVector<LocalInfo> *locals);
+
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
lib/tsan/sanitizer_common/sanitizer_symbolizer_libbacktrace.cpp
@@ -199,7 +199,7 @@ static char *DemangleAlloc(const char *name, bool always_alloc) {
 #endif
   if (always_alloc)
     return internal_strdup(name);
-  return 0;
+  return nullptr;
 }
 
 const char *LibbacktraceSymbolizer::Demangle(const char *name) {
lib/tsan/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -117,7 +117,7 @@ bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
       return true;
     }
   }
-  return true;
+  return false;
 }
 
 bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
@@ -133,7 +133,7 @@ bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
       return true;
     }
   }
-  return true;
+  return false;
 }
 
 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
@@ -159,13 +159,16 @@ void Symbolizer::Flush() {
 }
 
 const char *Symbolizer::Demangle(const char *name) {
+  CHECK(name);
   Lock l(&mu_);
   for (auto &tool : tools_) {
     SymbolizerScope sym_scope(this);
     if (const char *demangled = tool.Demangle(name))
       return demangled;
   }
-  return PlatformDemangle(name);
+  if (const char *demangled = PlatformDemangle(name))
+    return demangled;
+  return name;
 }
 
 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
@@ -188,6 +191,13 @@ void Symbolizer::RefreshModules() {
   modules_fresh_ = true;
 }
 
+const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
+  if (!modules_fresh_)
+    RefreshModules();
+
+  return modules_;
+}
+
 static const LoadedModule *SearchForModule(const ListOfModules &modules,
                                            uptr address) {
   for (uptr i = 0; i < modules.size(); i++) {
@@ -382,8 +392,8 @@ void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
   str = ExtractUptr(str, "\n", &info->line);
 }
 
-static void ParseSymbolizeFrameOutput(const char *str,
-                                      InternalMmapVector<LocalInfo> *locals) {
+void ParseSymbolizeFrameOutput(const char *str,
+                               InternalMmapVector<LocalInfo> *locals) {
   if (internal_strncmp(str, "??", 2) == 0)
     return;
 
lib/tsan/sanitizer_common/sanitizer_symbolizer_mac.cpp
@@ -42,7 +42,8 @@ bool DlAddrSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   }
 
   const char *demangled = DemangleSwiftAndCXX(info.dli_sname);
-  if (!demangled) return false;
+  if (!demangled)
+    demangled = info.dli_sname;
   stack->info.function = internal_strdup(demangled);
   return true;
 }
@@ -52,6 +53,8 @@ bool DlAddrSymbolizer::SymbolizeData(uptr addr, DataInfo *datainfo) {
   int result = dladdr((const void *)addr, &info);
   if (!result) return false;
   const char *demangled = DemangleSwiftAndCXX(info.dli_sname);
+  if (!demangled)
+    demangled = info.dli_sname;
   datainfo->name = internal_strdup(demangled);
   datainfo->start = (uptr)info.dli_saddr;
   return true;
lib/tsan/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -8,143 +8,155 @@
 //
 // This file is shared between various sanitizers' runtime libraries.
 //
-// Implementation of offline markup symbolizer.
+// This generic support for offline symbolizing is based on the
+// Fuchsia port.  We don't do any actual symbolization per se.
+// Instead, we emit text containing raw addresses and raw linkage
+// symbol names, embedded in Fuchsia's symbolization markup format.
+// See the spec at:
+// https://llvm.org/docs/SymbolizerMarkupFormat.html
 //===----------------------------------------------------------------------===//
 
-#include "sanitizer_platform.h"
-#if SANITIZER_SYMBOLIZER_MARKUP
-
-#if SANITIZER_FUCHSIA
-#include "sanitizer_symbolizer_fuchsia.h"
-#  endif
+#include "sanitizer_symbolizer_markup.h"
 
-#  include <limits.h>
-#  include <unwind.h>
-
-#  include "sanitizer_stacktrace.h"
-#  include "sanitizer_symbolizer.h"
+#include "sanitizer_common.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_markup_constants.h"
 
 namespace __sanitizer {
 
-// This generic support for offline symbolizing is based on the
-// Fuchsia port.  We don't do any actual symbolization per se.
-// Instead, we emit text containing raw addresses and raw linkage
-// symbol names, embedded in Fuchsia's symbolization markup format.
-// Fuchsia's logging infrastructure emits enough information about
-// process memory layout that a post-processing filter can do the
-// symbolization and pretty-print the markup.  See the spec at:
-// https://fuchsia.googlesource.com/zircon/+/master/docs/symbolizer_markup.md
-
-// This is used by UBSan for type names, and by ASan for global variable names.
-// It's expected to return a static buffer that will be reused on each call.
-const char *Symbolizer::Demangle(const char *name) {
-  static char buffer[kFormatDemangleMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
-  return buffer;
+void MarkupStackTracePrinter::RenderData(InternalScopedString *buffer,
+                                         const char *format, const DataInfo *DI,
+                                         const char *strip_path_prefix) {
+  RenderContext(buffer);
+  buffer->AppendF(kFormatData, reinterpret_cast<void *>(DI->start));
 }
 
-// This is used mostly for suppression matching.  Making it work
-// would enable "interceptor_via_lib" suppressions.  It's also used
-// once in UBSan to say "in module ..." in a message that also
-// includes an address in the module, so post-processing can already
-// pretty-print that so as to indicate the module.
-bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
-                                             uptr *module_address) {
+bool MarkupStackTracePrinter::RenderNeedsSymbolization(const char *format) {
   return false;
 }
 
-// This is mainly used by hwasan for online symbolization. This isn't needed
-// since hwasan can always just dump stack frames for offline symbolization.
-bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
-
-// This is used in some places for suppression checking, which we
-// don't really support for Fuchsia.  It's also used in UBSan to
-// identify a PC location to a function name, so we always fill in
-// the function member with a string containing markup around the PC
-// value.
-// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
-// to render stack frames, but that should be changed to use
-// RenderStackFrame.
-SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
-  SymbolizedStack *s = SymbolizedStack::New(addr);
+// We don't support the stack_trace_format flag at all.
+void MarkupStackTracePrinter::RenderFrame(InternalScopedString *buffer,
+                                          const char *format, int frame_no,
+                                          uptr address, const AddressInfo *info,
+                                          bool vs_style,
+                                          const char *strip_path_prefix) {
+  CHECK(!RenderNeedsSymbolization(format));
+  RenderContext(buffer);
+  buffer->AppendF(kFormatFrame, frame_no, reinterpret_cast<void *>(address));
+}
+
+bool MarkupSymbolizerTool::SymbolizePC(uptr addr, SymbolizedStack *stack) {
   char buffer[kFormatFunctionMax];
-  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
-  s->info.function = internal_strdup(buffer);
-  return s;
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction,
+                    reinterpret_cast<void *>(addr));
+  stack->info.function = internal_strdup(buffer);
+  return true;
 }
 
-// Always claim we succeeded, so that RenderDataInfo will be called.
-bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+bool MarkupSymbolizerTool::SymbolizeData(uptr addr, DataInfo *info) {
   info->Clear();
   info->start = addr;
   return true;
 }
 
-// We ignore the format argument to __sanitizer_symbolize_global.
-void RenderData(InternalScopedString *buffer, const char *format,
-                const DataInfo *DI, const char *strip_path_prefix) {
-  buffer->append(kFormatData, DI->start);
+const char *MarkupSymbolizerTool::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
 }
 
-bool RenderNeedsSymbolization(const char *format) { return false; }
-
-// We don't support the stack_trace_format flag at all.
-void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no,
-                 uptr address, const AddressInfo *info, bool vs_style,
-                 const char *strip_path_prefix) {
-  CHECK(!RenderNeedsSymbolization(format));
-  buffer->append(kFormatFrame, frame_no, address);
+// Fuchsia's implementation of symbolizer markup doesn't need to emit contextual
+// elements at this point.
+// Fuchsia's logging infrastructure emits enough information about
+// process memory layout that a post-processing filter can do the
+// symbolization and pretty-print the markup.
+#if !SANITIZER_FUCHSIA
+
+static bool ModulesEq(const LoadedModule &module,
+                      const RenderedModule &renderedModule) {
+  return module.base_address() == renderedModule.base_address &&
+         internal_memcmp(module.uuid(), renderedModule.uuid,
+                         module.uuid_size()) == 0 &&
+         internal_strcmp(module.full_name(), renderedModule.full_name) == 0;
 }
 
-Symbolizer *Symbolizer::PlatformInit() {
-  return new (symbolizer_allocator_) Symbolizer({});
+static bool ModuleHasBeenRendered(
+    const LoadedModule &module,
+    const InternalMmapVectorNoCtor<RenderedModule> &renderedModules) {
+  for (const auto &renderedModule : renderedModules)
+    if (ModulesEq(module, renderedModule))
+      return true;
+
+  return false;
 }
 
-void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
-
-void StartReportDeadlySignal() {}
-void ReportDeadlySignal(const SignalContext &sig, u32 tid,
-                        UnwindSignalStackCallbackType unwind,
-                        const void *unwind_context) {}
-
-#if SANITIZER_CAN_SLOW_UNWIND
-struct UnwindTraceArg {
-  BufferedStackTrace *stack;
-  u32 max_depth;
-};
-
-_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx, void *param) {
-  UnwindTraceArg *arg = static_cast<UnwindTraceArg *>(param);
-  CHECK_LT(arg->stack->size, arg->max_depth);
-  uptr pc = _Unwind_GetIP(ctx);
-  if (pc < PAGE_SIZE) return _URC_NORMAL_STOP;
-  arg->stack->trace_buffer[arg->stack->size++] = pc;
-  return (arg->stack->size == arg->max_depth ? _URC_NORMAL_STOP
-                                             : _URC_NO_REASON);
+static void RenderModule(InternalScopedString *buffer,
+                         const LoadedModule &module, uptr moduleId) {
+  InternalScopedString buildIdBuffer;
+  for (uptr i = 0; i < module.uuid_size(); i++)
+    buildIdBuffer.AppendF("%02x", module.uuid()[i]);
+
+  buffer->AppendF(kFormatModule, moduleId, module.full_name(),
+                  buildIdBuffer.data());
+  buffer->Append("\n");
 }
 
-void BufferedStackTrace::UnwindSlow(uptr pc, u32 max_depth) {
-  CHECK_GE(max_depth, 2);
-  size = 0;
-  UnwindTraceArg arg = {this, Min(max_depth + 1, kStackTraceMax)};
-  _Unwind_Backtrace(Unwind_Trace, &arg);
-  CHECK_GT(size, 0);
-  // We need to pop a few frames so that pc is on top.
-  uptr to_pop = LocatePcInTrace(pc);
-  // trace_buffer[0] belongs to the current function so we always pop it,
-  // unless there is only 1 frame in the stack trace (1 frame is always better
-  // than 0!).
-  PopStackFrames(Min(to_pop, static_cast<uptr>(1)));
-  trace_buffer[0] = pc;
+static void RenderMmaps(InternalScopedString *buffer,
+                        const LoadedModule &module, uptr moduleId) {
+  InternalScopedString accessBuffer;
+
+  // All module mmaps are readable at least
+  for (const auto &range : module.ranges()) {
+    accessBuffer.Append("r");
+    if (range.writable)
+      accessBuffer.Append("w");
+    if (range.executable)
+      accessBuffer.Append("x");
+
+    //{{{mmap:%starting_addr:%size_in_hex:load:%moduleId:r%(w|x):%relative_addr}}}
+
+    // module.base_address == dlpi_addr
+    // range.beg == dlpi_addr + p_vaddr
+    // relative address == p_vaddr == range.beg - module.base_address
+    buffer->AppendF(kFormatMmap, reinterpret_cast<void *>(range.beg),
+                    range.end - range.beg, static_cast<int>(moduleId),
+                    accessBuffer.data(), range.beg - module.base_address());
+
+    buffer->Append("\n");
+    accessBuffer.clear();
+  }
 }
 
-void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) {
-  CHECK(context);
-  CHECK_GE(max_depth, 2);
-  UNREACHABLE("signal context doesn't exist");
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *buffer) {
+  if (renderedModules_.size() == 0)
+    buffer->Append("{{{reset}}}\n");
+
+  const auto &modules = Symbolizer::GetOrInit()->GetRefreshedListOfModules();
+
+  for (const auto &module : modules) {
+    if (ModuleHasBeenRendered(module, renderedModules_))
+      continue;
+
+    // symbolizer markup id, used to refer to this modules from other contextual
+    // elements
+    uptr moduleId = renderedModules_.size();
+
+    RenderModule(buffer, module, moduleId);
+    RenderMmaps(buffer, module, moduleId);
+
+    renderedModules_.push_back({
+        internal_strdup(module.full_name()),
+        module.base_address(),
+        {},
+    });
+
+    // kModuleUUIDSize is the size of curModule.uuid
+    CHECK_GE(kModuleUUIDSize, module.uuid_size());
+    internal_memcpy(renderedModules_.back().uuid, module.uuid(),
+                    module.uuid_size());
+  }
 }
-#endif  // SANITIZER_CAN_SLOW_UNWIND
+#endif  // !SANITIZER_FUCHSIA
 
 }  // namespace __sanitizer
-
-#endif  // SANITIZER_SYMBOLIZER_MARKUP
lib/tsan/sanitizer_common/sanitizer_symbolizer_markup.h
@@ -0,0 +1,79 @@
+//===-- sanitizer_symbolizer_markup.h -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file is shared between various sanitizers' runtime libraries.
+//
+//  Header for the offline markup symbolizer.
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_SYMBOLIZER_MARKUP_H
+#define SANITIZER_SYMBOLIZER_MARKUP_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_stacktrace_printer.h"
+#include "sanitizer_symbolizer.h"
+#include "sanitizer_symbolizer_internal.h"
+
+namespace __sanitizer {
+
+// Simplier view of a LoadedModule. It only holds information necessary to
+// identify unique modules.
+struct RenderedModule {
+  char *full_name;
+  uptr base_address;
+  u8 uuid[kModuleUUIDSize];  // BuildId
+};
+
+class MarkupStackTracePrinter : public StackTracePrinter {
+ public:
+  // We don't support the stack_trace_format flag at all.
+  void RenderFrame(InternalScopedString *buffer, const char *format,
+                   int frame_no, uptr address, const AddressInfo *info,
+                   bool vs_style, const char *strip_path_prefix = "") override;
+
+  bool RenderNeedsSymbolization(const char *format) override;
+
+  // We ignore the format argument to __sanitizer_symbolize_global.
+  void RenderData(InternalScopedString *buffer, const char *format,
+                  const DataInfo *DI,
+                  const char *strip_path_prefix = "") override;
+
+ private:
+  // Keeps track of the modules that have been rendered to avoid re-rendering
+  // them
+  InternalMmapVector<RenderedModule> renderedModules_;
+  void RenderContext(InternalScopedString *buffer);
+
+ protected:
+  ~MarkupStackTracePrinter() {}
+};
+
+class MarkupSymbolizerTool final : public SymbolizerTool {
+ public:
+  // This is used in some places for suppression checking, which we
+  // don't really support for Fuchsia.  It's also used in UBSan to
+  // identify a PC location to a function name, so we always fill in
+  // the function member with a string containing markup around the PC
+  // value.
+  // TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+  // to render stack frames, but that should be changed to use
+  // RenderStackFrame.
+  bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
+
+  // Always claim we succeeded, so that RenderDataInfo will be called.
+  bool SymbolizeData(uptr addr, DataInfo *info) override;
+
+  // May return NULL if demangling failed.
+  // This is used by UBSan for type names, and by ASan for global variable
+  // names. It's expected to return a static buffer that will be reused on each
+  // call.
+  const char *Demangle(const char *name) override;
+};
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_H
lib/tsan/sanitizer_common/sanitizer_symbolizer_fuchsia.h → lib/tsan/sanitizer_common/sanitizer_symbolizer_markup_constants.h
@@ -1,4 +1,5 @@
-//===-- sanitizer_symbolizer_fuchsia.h -----------------------------------===//
+//===-- sanitizer_symbolizer_markup_constants.h
+//-----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,10 +9,10 @@
 //
 // This file is shared between various sanitizers' runtime libraries.
 //
-// Define Fuchsia's string formats and limits for the markup symbolizer.
+// Define string formats and limits for the markup symbolizer.
 //===----------------------------------------------------------------------===//
-#ifndef SANITIZER_SYMBOLIZER_FUCHSIA_H
-#define SANITIZER_SYMBOLIZER_FUCHSIA_H
+#ifndef SANITIZER_SYMBOLIZER_MARKUP_CONSTANTS_H
+#define SANITIZER_SYMBOLIZER_MARKUP_CONSTANTS_H
 
 #include "sanitizer_internal_defs.h"
 
@@ -32,11 +33,17 @@ constexpr uptr kFormatFunctionMax = 64;  // More than big enough for 64-bit hex.
 constexpr const char *kFormatData = "{{{data:%p}}}";
 
 // One frame in a backtrace (printed on a line by itself).
-constexpr const char *kFormatFrame = "{{{bt:%u:%p}}}";
+constexpr const char *kFormatFrame = "{{{bt:%d:%p}}}";
+
+// Module contextual element.
+constexpr const char *kFormatModule = "{{{module:%zu:%s:elf:%s}}}";
+
+// mmap for a module segment.
+constexpr const char *kFormatMmap = "{{{mmap:%p:0x%zx:load:%d:%s:0x%zx}}}";
 
 // Dump trigger element.
 #define FORMAT_DUMPFILE "{{{dumpfile:%s:%s}}}"
 
 }  // namespace __sanitizer
 
-#endif  // SANITIZER_SYMBOLIZER_FUCHSIA_H
+#endif  // SANITIZER_SYMBOLIZER_MARKUP_CONSTANTS_H
lib/tsan/sanitizer_common/sanitizer_symbolizer_markup_fuchsia.cpp
@@ -0,0 +1,85 @@
+//===-- sanitizer_symbolizer_markup_fuchsia.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is shared between various sanitizers' runtime libraries.
+//
+// Fuchsia specific implementation of offline markup symbolizer.
+//===----------------------------------------------------------------------===//
+#include "sanitizer_platform.h"
+
+#if SANITIZER_SYMBOLIZER_MARKUP
+
+#  include "sanitizer_common.h"
+#  include "sanitizer_stacktrace_printer.h"
+#  include "sanitizer_symbolizer.h"
+#  include "sanitizer_symbolizer_markup.h"
+#  include "sanitizer_symbolizer_markup_constants.h"
+
+namespace __sanitizer {
+
+// This is used by UBSan for type names, and by ASan for global variable names.
+// It's expected to return a static buffer that will be reused on each call.
+const char *Symbolizer::Demangle(const char *name) {
+  static char buffer[kFormatDemangleMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatDemangle, name);
+  return buffer;
+}
+
+// This is used mostly for suppression matching.  Making it work
+// would enable "interceptor_via_lib" suppressions.  It's also used
+// once in UBSan to say "in module ..." in a message that also
+// includes an address in the module, so post-processing can already
+// pretty-print that so as to indicate the module.
+bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
+                                             uptr *module_address) {
+  return false;
+}
+
+// This is mainly used by hwasan for online symbolization. This isn't needed
+// since hwasan can always just dump stack frames for offline symbolization.
+bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { return false; }
+
+// This is used in some places for suppression checking, which we
+// don't really support for Fuchsia.  It's also used in UBSan to
+// identify a PC location to a function name, so we always fill in
+// the function member with a string containing markup around the PC
+// value.
+// TODO(mcgrathr): Under SANITIZER_GO, it's currently used by TSan
+// to render stack frames, but that should be changed to use
+// RenderStackFrame.
+SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
+  SymbolizedStack *s = SymbolizedStack::New(addr);
+  char buffer[kFormatFunctionMax];
+  internal_snprintf(buffer, sizeof(buffer), kFormatFunction, addr);
+  s->info.function = internal_strdup(buffer);
+  return s;
+}
+
+// Always claim we succeeded, so that RenderDataInfo will be called.
+bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
+  info->Clear();
+  info->start = addr;
+  return true;
+}
+
+// Fuchsia only uses MarkupStackTracePrinter
+StackTracePrinter *StackTracePrinter::NewStackTracePrinter() {
+  return new (GetGlobalLowLevelAllocator()) MarkupStackTracePrinter();
+}
+
+void MarkupStackTracePrinter::RenderContext(InternalScopedString *) {}
+
+Symbolizer *Symbolizer::PlatformInit() {
+  return new (symbolizer_allocator_) Symbolizer({});
+}
+
+void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_MARKUP
lib/tsan/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_platform.h"
+#include "sanitizer_symbolizer_markup.h"
 #if SANITIZER_POSIX
 #  include <dlfcn.h>  // for dlsym()
 #  include <errno.h>
@@ -56,7 +57,7 @@ const char *DemangleCXXABI(const char *name) {
           __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
       return demangled_name;
 
-  return name;
+  return nullptr;
 }
 
 // As of now, there are no headers for the Swift runtime. Once they are
@@ -324,9 +325,12 @@ __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
 __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
                            char *Buffer, int MaxLength);
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
+__sanitizer_symbolize_frame(const char *ModuleName, u64 ModuleOffset,
+                            char *Buffer, int MaxLength);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void
 __sanitizer_symbolize_flush();
-SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
 __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength);
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool
 __sanitizer_symbolize_set_demangle(bool Demangle);
@@ -337,19 +341,19 @@ __sanitizer_symbolize_set_inline_frames(bool InlineFrames);
 class InternalSymbolizer final : public SymbolizerTool {
  public:
   static InternalSymbolizer *get(LowLevelAllocator *alloc) {
-    if (__sanitizer_symbolize_set_demangle)
-      CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle));
-    if (__sanitizer_symbolize_set_inline_frames)
-      CHECK(__sanitizer_symbolize_set_inline_frames(
-          common_flags()->symbolize_inline_frames));
-    if (__sanitizer_symbolize_code && __sanitizer_symbolize_data)
-      return new (*alloc) InternalSymbolizer();
-    return 0;
+    // These one is the most used one, so we will use it to detect a presence of
+    // internal symbolizer.
+    if (&__sanitizer_symbolize_code == nullptr)
+      return nullptr;
+    CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle));
+    CHECK(__sanitizer_symbolize_set_inline_frames(
+        common_flags()->symbolize_inline_frames));
+    return new (*alloc) InternalSymbolizer();
   }
 
   bool SymbolizePC(uptr addr, SymbolizedStack *stack) override {
     bool result = __sanitizer_symbolize_code(
-        stack->info.module, stack->info.module_offset, buffer_, kBufferSize);
+        stack->info.module, stack->info.module_offset, buffer_, sizeof(buffer_));
     if (result)
       ParseSymbolizePCOutput(buffer_, stack);
     return result;
@@ -357,7 +361,7 @@ class InternalSymbolizer final : public SymbolizerTool {
 
   bool SymbolizeData(uptr addr, DataInfo *info) override {
     bool result = __sanitizer_symbolize_data(info->module, info->module_offset,
-                                             buffer_, kBufferSize);
+                                             buffer_, sizeof(buffer_));
     if (result) {
       ParseSymbolizeDataOutput(buffer_, info);
       info->start += (addr - info->module_offset);  // Add the base address.
@@ -365,34 +369,29 @@ class InternalSymbolizer final : public SymbolizerTool {
     return result;
   }
 
-  void Flush() override {
-    if (__sanitizer_symbolize_flush)
-      __sanitizer_symbolize_flush();
+  bool SymbolizeFrame(uptr addr, FrameInfo *info) override {
+    bool result = __sanitizer_symbolize_frame(info->module, info->module_offset,
+                                              buffer_, sizeof(buffer_));
+    if (result)
+      ParseSymbolizeFrameOutput(buffer_, &info->locals);
+    return result;
   }
 
+  void Flush() override { __sanitizer_symbolize_flush(); }
+
   const char *Demangle(const char *name) override {
-    if (__sanitizer_symbolize_demangle) {
-      for (uptr res_length = 1024;
-           res_length <= InternalSizeClassMap::kMaxSize;) {
-        char *res_buff = static_cast<char *>(InternalAlloc(res_length));
-        uptr req_length =
-            __sanitizer_symbolize_demangle(name, res_buff, res_length);
-        if (req_length > res_length) {
-          res_length = req_length + 1;
-          InternalFree(res_buff);
-          continue;
-        }
-        return res_buff;
-      }
+    if (__sanitizer_symbolize_demangle(name, buffer_, sizeof(buffer_))) {
+      char *res_buff = nullptr;
+      ExtractToken(buffer_, "", &res_buff);
+      return res_buff;
     }
-    return name;
+    return nullptr;
   }
 
  private:
   InternalSymbolizer() {}
 
-  static const int kBufferSize = 16 * 1024;
-  char buffer_[kBufferSize];
+  char buffer_[16 * 1024];
 };
 #  else  // SANITIZER_SUPPORTS_WEAK_HOOKS
 
@@ -470,6 +469,12 @@ static void ChooseSymbolizerTools(IntrusiveList<SymbolizerTool> *list,
     VReport(2, "Symbolizer is disabled.\n");
     return;
   }
+  if (common_flags()->enable_symbolizer_markup) {
+    VReport(2, "Using symbolizer markup");
+    SymbolizerTool *tool = new (*allocator) MarkupSymbolizerTool();
+    CHECK(tool);
+    list->push_back(tool);
+  }
   if (IsAllocatorOutOfMemory()) {
     VReport(2, "Cannot use internal symbolizer: out of memory\n");
   } else if (SymbolizerTool *tool = InternalSymbolizer::get(allocator)) {
lib/tsan/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -28,14 +28,41 @@
 namespace __sanitizer {
 
 #if !SANITIZER_GO
+
+static bool FrameIsInternal(const SymbolizedStack *frame) {
+  if (!frame)
+    return true;
+  const char *file = frame->info.file;
+  const char *module = frame->info.module;
+  // On Gentoo, the path is g++-*, so there's *not* a missing /.
+  if (file && (internal_strstr(file, "/compiler-rt/lib/") ||
+               internal_strstr(file, "/include/c++/") ||
+               internal_strstr(file, "/include/g++")))
+    return true;
+  if (file && internal_strstr(file, "\\compiler-rt\\lib\\"))
+    return true;
+  if (module && (internal_strstr(module, "libclang_rt.")))
+    return true;
+  if (module && (internal_strstr(module, "clang_rt.")))
+    return true;
+  return false;
+}
+
+const SymbolizedStack *SkipInternalFrames(const SymbolizedStack *frames) {
+  for (const SymbolizedStack *f = frames; f; f = f->next)
+    if (!FrameIsInternal(f))
+      return f;
+  return nullptr;
+}
+
 void ReportErrorSummary(const char *error_type, const AddressInfo &info,
                         const char *alt_tool_name) {
   if (!common_flags()->print_summary) return;
   InternalScopedString buff;
-  buff.append("%s ", error_type);
-  RenderFrame(&buff, "%L %F", 0, info.address, &info,
-              common_flags()->symbolize_vs_style,
-              common_flags()->strip_path_prefix);
+  buff.AppendF("%s ", error_type);
+  StackTracePrinter::GetOrInit()->RenderFrame(
+      &buff, "%L %F", 0, info.address, &info,
+      common_flags()->symbolize_vs_style, common_flags()->strip_path_prefix);
   ReportErrorSummary(buff.data(), alt_tool_name);
 }
 #endif
@@ -75,16 +102,33 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack,
 #if !SANITIZER_GO
   if (!common_flags()->print_summary)
     return;
-  if (stack->size == 0) {
-    ReportErrorSummary(error_type);
-    return;
+
+  // Find first non-internal stack frame.
+  for (uptr i = 0; i < stack->size; ++i) {
+    uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[i]);
+    SymbolizedStackHolder symbolized_stack(
+        Symbolizer::GetOrInit()->SymbolizePC(pc));
+    if (const SymbolizedStack *frame = symbolized_stack.get()) {
+      if (const SymbolizedStack *summary_frame = SkipInternalFrames(frame)) {
+        ReportErrorSummary(error_type, summary_frame->info, alt_tool_name);
+        return;
+      }
+    }
+  }
+
+  // Fallback to the top one.
+  if (stack->size) {
+    uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]);
+    SymbolizedStackHolder symbolized_stack(
+        Symbolizer::GetOrInit()->SymbolizePC(pc));
+    if (const SymbolizedStack *frame = symbolized_stack.get()) {
+      ReportErrorSummary(error_type, frame->info, alt_tool_name);
+      return;
+    }
   }
-  // Currently, we include the first stack frame into the report summary.
-  // Maybe sometimes we need to choose another frame (e.g. skip memcpy/etc).
-  uptr pc = StackTrace::GetPreviousInstructionPc(stack->trace[0]);
-  SymbolizedStack *frame = Symbolizer::GetOrInit()->SymbolizePC(pc);
-  ReportErrorSummary(error_type, frame->info, alt_tool_name);
-  frame->ClearAll();
+
+  // Fallback to a summary without location.
+  ReportErrorSummary(error_type);
 #endif
 }
 
@@ -148,22 +192,22 @@ static void MaybeReportNonExecRegion(uptr pc) {
 static void PrintMemoryByte(InternalScopedString *str, const char *before,
                             u8 byte) {
   SanitizerCommonDecorator d;
-  str->append("%s%s%x%x%s ", before, d.MemoryByte(), byte >> 4, byte & 15,
-              d.Default());
+  str->AppendF("%s%s%x%x%s ", before, d.MemoryByte(), byte >> 4, byte & 15,
+               d.Default());
 }
 
 static void MaybeDumpInstructionBytes(uptr pc) {
   if (!common_flags()->dump_instruction_bytes || (pc < GetPageSizeCached()))
     return;
   InternalScopedString str;
-  str.append("First 16 instruction bytes at pc: ");
+  str.AppendF("First 16 instruction bytes at pc: ");
   if (IsAccessibleMemoryRange(pc, 16)) {
     for (int i = 0; i < 16; ++i) {
       PrintMemoryByte(&str, "", ((u8 *)pc)[i]);
     }
-    str.append("\n");
+    str.AppendF("\n");
   } else {
-    str.append("unaccessible\n");
+    str.AppendF("unaccessible\n");
   }
   Report("%s", str.data());
 }
lib/tsan/sanitizer_common/sanitizer_symbolizer_report_fuchsia.cpp
@@ -0,0 +1,33 @@
+//===-- sanitizer_symbolizer_report_fuchsia.cpp
+//-----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the report functions for fuchsia.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+
+#if SANITIZER_SYMBOLIZER_MARKUP
+
+#  include "sanitizer_common.h"
+
+namespace __sanitizer {
+void StartReportDeadlySignal() {}
+
+void ReportDeadlySignal(const SignalContext &sig, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context) {}
+
+void HandleDeadlySignal(void *siginfo, void *context, u32 tid,
+                        UnwindSignalStackCallbackType unwind,
+                        const void *unwind_context) {}
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_SYMBOLIZER_MARKUP
lib/tsan/sanitizer_common/sanitizer_symbolizer_win.cpp
@@ -175,9 +175,7 @@ const char *WinSymbolizerTool::Demangle(const char *name) {
     return name;
 }
 
-const char *Symbolizer::PlatformDemangle(const char *name) {
-  return name;
-}
+const char *Symbolizer::PlatformDemangle(const char *name) { return nullptr; }
 
 namespace {
 struct ScopedHandle {
@@ -233,7 +231,7 @@ bool SymbolizerProcess::StartSymbolizerSubprocess() {
     CHECK(!internal_strchr(arg, '"') && "quotes in args unsupported");
     CHECK(arglen > 0 && arg[arglen - 1] != '\\' &&
           "args ending in backslash and empty args unsupported");
-    command_line.append("\"%s\" ", arg);
+    command_line.AppendF("\"%s\" ", arg);
   }
   VReport(3, "Launching symbolizer command: %s\n", command_line.data());
 
lib/tsan/sanitizer_common/sanitizer_syscall_linux_hexagon.inc
@@ -0,0 +1,131 @@
+//===-- sanitizer_syscall_linux_hexagon.inc ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementations of internal_syscall and internal_iserror for Linux/hexagon.
+//
+//===----------------------------------------------------------------------===//
+
+#define SYSCALL(name) __NR_##name
+
+#define __internal_syscall_LL_E(x) \
+  ((union {                        \
+    long long ll;                  \
+    long l[2];                     \
+  }){.ll = x})                     \
+      .l[0],                       \
+      ((union {                    \
+        long long ll;              \
+        long l[2];                 \
+      }){.ll = x})                 \
+          .l[1]
+#define __internal_syscall_LL_O(x) 0, __SYSCALL_LL_E((x))
+
+#define __asm_syscall(...)                                                 \
+  do {                                                                     \
+    __asm__ __volatile__("trap0(#1)" : "=r"(r0) : __VA_ARGS__ : "memory"); \
+    return r0;                                                             \
+  } while (0)
+
+#define __internal_syscall0(n) (__internal_syscall)(n)
+
+static uptr __internal_syscall(long n) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0");
+  __asm_syscall("r"(r6));
+}
+
+#define __internal_syscall1(n, a1) (__internal_syscall)(n, (long)(a1))
+
+static uptr __internal_syscall(long n, long a) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  __asm_syscall("r"(r6), "0"(r0));
+}
+
+#define __internal_syscall2(n, a1, a2) \
+  (__internal_syscall)(n, (long)(a1), (long)(a2))
+
+static uptr __internal_syscall(long n, long a, long b) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  register u32 r1 __asm__("r1") = b;
+  __asm_syscall("r"(r6), "0"(r0), "r"(r1));
+}
+
+#define __internal_syscall3(n, a1, a2, a3) \
+  (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3))
+
+static uptr __internal_syscall(long n, long a, long b, long c) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  register u32 r1 __asm__("r1") = b;
+  register u32 r2 __asm__("r2") = c;
+  __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2));
+}
+
+#define __internal_syscall4(n, a1, a2, a3, a4) \
+  (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  register u32 r1 __asm__("r1") = b;
+  register u32 r2 __asm__("r2") = c;
+  register u32 r3 __asm__("r3") = d;
+  __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3));
+}
+
+#define __internal_syscall5(n, a1, a2, a3, a4, a5)                        \
+  (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (long)(a5))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d, long e) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  register u32 r1 __asm__("r1") = b;
+  register u32 r2 __asm__("r2") = c;
+  register u32 r3 __asm__("r3") = d;
+  register u32 r4 __asm__("r4") = e;
+  __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4));
+}
+
+#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6)                    \
+  (__internal_syscall)(n, (long)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (long)(a5), (long)(a6))
+
+static uptr __internal_syscall(long n, long a, long b, long c, long d, long e,
+                               long f) {
+  register u32 r6 __asm__("r6") = n;
+  register u32 r0 __asm__("r0") = a;
+  register u32 r1 __asm__("r1") = b;
+  register u32 r2 __asm__("r2") = c;
+  register u32 r3 __asm__("r3") = d;
+  register u32 r4 __asm__("r4") = e;
+  register u32 r5 __asm__("r5") = f;
+  __asm_syscall("r"(r6), "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
+}
+
+#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n
+#define __SYSCALL_NARGS(...) \
+  __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, )
+#define __SYSCALL_CONCAT_X(a, b) a##b
+#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b)
+#define __SYSCALL_DISP(b, ...) \
+  __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__)
+
+// Helper function used to avoid clobbering of errno.
+bool internal_iserror(uptr retval, int *rverrno) {
+  if (retval >= (uptr)-4095) {
+    if (rverrno)
+      *rverrno = -retval;
+    return true;
+  }
+  return false;
+}
lib/tsan/sanitizer_common/sanitizer_syscall_linux_loongarch64.inc
@@ -0,0 +1,171 @@
+//===-- sanitizer_syscall_linux_loongarch64.inc -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementations of internal_syscall and internal_iserror for
+// Linux/loongarch64.
+//
+//===----------------------------------------------------------------------===//
+
+// About local register variables:
+// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+//
+// Kernel ABI:
+// https://lore.kernel.org/loongarch/1f353678-3398-e30b-1c87-6edb278f74db@xen0n.name/T/#m1613bc86c2d7bf5f6da92bd62984302bfd699a2f
+//  syscall number is placed in a7
+//  parameters, if present, are placed in a0-a6
+//  upon return:
+//    the return value is placed in a0
+//    t0-t8 should be considered clobbered
+//    all other registers are preserved
+#define SYSCALL(name) __NR_##name
+
+#define INTERNAL_SYSCALL_CLOBBERS \
+  "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"
+
+static uptr __internal_syscall(u64 nr) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0");
+  __asm__ volatile("syscall 0\n\t"
+                   : "=r"(a0)
+                   : "r"(a7)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall0(n) (__internal_syscall)(n)
+
+static uptr __internal_syscall(u64 nr, u64 arg1) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall1(n, a1) (__internal_syscall)(n, (u64)(a1))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall2(n, a1, a2) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  register u64 a2 asm("$a2") = arg3;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall3(n, a1, a2, a3) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3,
+                               u64 arg4) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  register u64 a2 asm("$a2") = arg3;
+  register u64 a3 asm("$a3") = arg4;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall4(n, a1, a2, a3, a4) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  register u64 a2 asm("$a2") = arg3;
+  register u64 a3 asm("$a3") = arg4;
+  register u64 a4 asm("$a4") = arg5;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall5(n, a1, a2, a3, a4, a5)                       \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5, long arg6) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  register u64 a2 asm("$a2") = arg3;
+  register u64 a3 asm("$a3") = arg4;
+  register u64 a4 asm("$a4") = arg5;
+  register u64 a5 asm("$a5") = arg6;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6)                   \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5), (long)(a6))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5, long arg6, long arg7) {
+  register u64 a7 asm("$a7") = nr;
+  register u64 a0 asm("$a0") = arg1;
+  register u64 a1 asm("$a1") = arg2;
+  register u64 a2 asm("$a2") = arg3;
+  register u64 a3 asm("$a3") = arg4;
+  register u64 a4 asm("$a4") = arg5;
+  register u64 a5 asm("$a5") = arg6;
+  register u64 a6 asm("$a6") = arg7;
+  __asm__ volatile("syscall 0\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
+                     "r"(a6)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall7(n, a1, a2, a3, a4, a5, a6, a7)               \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5), (long)(a6), (long)(a7))
+
+#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n
+#define __SYSCALL_NARGS(...) \
+  __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, )
+#define __SYSCALL_CONCAT_X(a, b) a##b
+#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b)
+#define __SYSCALL_DISP(b, ...) \
+  __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__)
+
+// Helper function used to avoid clobbering of errno.
+bool internal_iserror(uptr retval, int *internal_errno) {
+  if (retval >= (uptr)-4095) {
+    if (internal_errno)
+      *internal_errno = -retval;
+    return true;
+  }
+  return false;
+}
lib/tsan/sanitizer_common/sanitizer_syscall_linux_riscv64.inc
@@ -0,0 +1,174 @@
+//===-- sanitizer_syscall_linux_riscv64.inc ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementations of internal_syscall and internal_iserror for Linux/riscv64.
+//
+//===----------------------------------------------------------------------===//
+
+// About local register variables:
+// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
+//
+// Kernel ABI...
+// To my surprise I haven't found much information regarding it.
+// Kernel source and internet browsing shows that:
+//  syscall number is passed in a7
+//  (http://man7.org/linux/man-pages/man2/syscall.2.html) results are return in
+//  a0 and a1 (http://man7.org/linux/man-pages/man2/syscall.2.html) arguments
+//  are passed in: a0-a7 (see below)
+//
+//  Regarding the arguments. The only "documentation" I could find is
+//  this comment (!!!) by Bruce Hold on google forums (!!!):
+//    https://groups.google.com/a/groups.riscv.org/forum/#!topic/sw-dev/exbrzM3GZDQ
+//    Confirmed by inspecting glibc sources.
+//  Great way to document things.
+#define SYSCALL(name) __NR_##name
+
+#define INTERNAL_SYSCALL_CLOBBERS "memory"
+
+static uptr __internal_syscall(u64 nr) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0");
+  __asm__ volatile("ecall\n\t"
+                   : "=r"(a0)
+                   : "r"(a7)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall0(n) (__internal_syscall)(n)
+
+static uptr __internal_syscall(u64 nr, u64 arg1) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall1(n, a1) (__internal_syscall)(n, (u64)(a1))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall2(n, a1, a2) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  register u64 a2 asm("a2") = arg3;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall3(n, a1, a2, a3) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3,
+                               u64 arg4) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  register u64 a2 asm("a2") = arg3;
+  register u64 a3 asm("a3") = arg4;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall4(n, a1, a2, a3, a4) \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  register u64 a2 asm("a2") = arg3;
+  register u64 a3 asm("a3") = arg4;
+  register u64 a4 asm("a4") = arg5;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall5(n, a1, a2, a3, a4, a5)                       \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5, long arg6) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  register u64 a2 asm("a2") = arg3;
+  register u64 a3 asm("a3") = arg4;
+  register u64 a4 asm("a4") = arg5;
+  register u64 a5 asm("a5") = arg6;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall6(n, a1, a2, a3, a4, a5, a6)                   \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5), (long)(a6))
+
+static uptr __internal_syscall(u64 nr, u64 arg1, long arg2, long arg3, u64 arg4,
+                               long arg5, long arg6, long arg7) {
+  register u64 a7 asm("a7") = nr;
+  register u64 a0 asm("a0") = arg1;
+  register u64 a1 asm("a1") = arg2;
+  register u64 a2 asm("a2") = arg3;
+  register u64 a3 asm("a3") = arg4;
+  register u64 a4 asm("a4") = arg5;
+  register u64 a5 asm("a5") = arg6;
+  register u64 a6 asm("a6") = arg7;
+  __asm__ volatile("ecall\n\t"
+                   : "+r"(a0)
+                   : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
+                     "r"(a6)
+                   : INTERNAL_SYSCALL_CLOBBERS);
+  return a0;
+}
+#define __internal_syscall7(n, a1, a2, a3, a4, a5, a6, a7)               \
+  (__internal_syscall)(n, (u64)(a1), (long)(a2), (long)(a3), (long)(a4), \
+                       (u64)(a5), (long)(a6), (long)(a7))
+
+#define __SYSCALL_NARGS_X(a1, a2, a3, a4, a5, a6, a7, a8, n, ...) n
+#define __SYSCALL_NARGS(...) \
+  __SYSCALL_NARGS_X(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1, 0, )
+#define __SYSCALL_CONCAT_X(a, b) a##b
+#define __SYSCALL_CONCAT(a, b) __SYSCALL_CONCAT_X(a, b)
+#define __SYSCALL_DISP(b, ...) \
+  __SYSCALL_CONCAT(b, __SYSCALL_NARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#define internal_syscall(...) __SYSCALL_DISP(__internal_syscall, __VA_ARGS__)
+
+// Helper function used to avoid clobbering of errno.
+bool internal_iserror(uptr retval, int *rverrno) {
+  if (retval >= (uptr)-4095) {
+    if (rverrno)
+      *rverrno = -retval;
+    return true;
+  }
+  return false;
+}
lib/tsan/sanitizer_common/sanitizer_thread_arg_retval.cpp
@@ -23,6 +23,9 @@ void ThreadArgRetval::CreateLocked(uptr thread, bool detached,
   Data& t = data_[thread];
   t = {};
   t.gen = gen_++;
+  static_assert(sizeof(gen_) == sizeof(u32) && kInvalidGen == UINT32_MAX);
+  if (gen_ == kInvalidGen)
+    gen_ = 0;
   t.detached = detached;
   t.args = args;
 }
@@ -53,16 +56,28 @@ void ThreadArgRetval::Finish(uptr thread, void* retval) {
 u32 ThreadArgRetval::BeforeJoin(uptr thread) const {
   __sanitizer::Lock lock(&mtx_);
   auto t = data_.find(thread);
-  CHECK(t);
-  CHECK(!t->second.detached);
-  return t->second.gen;
+  if (t && !t->second.detached) {
+    return t->second.gen;
+  }
+  if (!common_flags()->detect_invalid_join)
+    return kInvalidGen;
+  const char* reason = "unknown";
+  if (!t) {
+    reason = "already joined";
+  } else if (t->second.detached) {
+    reason = "detached";
+  }
+  Report("ERROR: %s: Joining %s thread, aborting.\n", SanitizerToolName,
+         reason);
+  Die();
 }
 
 void ThreadArgRetval::AfterJoin(uptr thread, u32 gen) {
   __sanitizer::Lock lock(&mtx_);
   auto t = data_.find(thread);
   if (!t || gen != t->second.gen) {
-    // Thread was reused and erased by any other event.
+    // Thread was reused and erased by any other event, or we had an invalid
+    // join.
     return;
   }
   CHECK(!t->second.detached);
lib/tsan/sanitizer_common/sanitizer_thread_arg_retval.h
@@ -93,6 +93,7 @@ class SANITIZER_MUTEX ThreadArgRetval {
   // will keep pointers alive forever, missing leaks caused by cancelation.
 
  private:
+  static const u32 kInvalidGen = UINT32_MAX;
   struct Data {
     Args args;
     u32 gen;  // Avoid collision if thread id re-used.
lib/tsan/sanitizer_common/sanitizer_tls_get_addr.cpp
@@ -121,25 +121,26 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
   uptr tls_size = 0;
   uptr tls_beg = reinterpret_cast<uptr>(res) - arg->offset - kDtvOffset;
   VReport(2,
-          "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: 0x%zx; sp: %p "
+          "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: %p; sp: %p "
           "num_live_dtls %zd\n",
-          (void *)arg, arg->dso_id, arg->offset, res, tls_beg, (void *)&tls_beg,
+          (void *)arg, arg->dso_id, arg->offset, res, (void *)tls_beg,
+          (void *)&tls_beg,
           atomic_load(&number_of_live_dtls, memory_order_relaxed));
   if (dtls.last_memalign_ptr == tls_beg) {
     tls_size = dtls.last_memalign_size;
-    VReport(2, "__tls_get_addr: glibc <=2.24 suspected; tls={0x%zx,0x%zx}\n",
-            tls_beg, tls_size);
+    VReport(2, "__tls_get_addr: glibc <=2.24 suspected; tls={%p,0x%zx}\n",
+            (void *)tls_beg, tls_size);
   } else if (tls_beg >= static_tls_begin && tls_beg < static_tls_end) {
     // This is the static TLS block which was initialized / unpoisoned at thread
     // creation.
-    VReport(2, "__tls_get_addr: static tls: 0x%zx\n", tls_beg);
+    VReport(2, "__tls_get_addr: static tls: %p\n", (void *)tls_beg);
     tls_size = 0;
   } else if (const void *start =
                  __sanitizer_get_allocated_begin((void *)tls_beg)) {
     tls_beg = (uptr)start;
     tls_size = __sanitizer_get_allocated_size(start);
-    VReport(2, "__tls_get_addr: glibc >=2.25 suspected; tls={0x%zx,0x%zx}\n",
-            tls_beg, tls_size);
+    VReport(2, "__tls_get_addr: glibc >=2.25 suspected; tls={%p,0x%zx}\n",
+            (void *)tls_beg, tls_size);
   } else {
     VReport(2, "__tls_get_addr: Can't guess glibc version\n");
     // This may happen inside the DTOR of main thread, so just ignore it.
lib/tsan/sanitizer_common/sanitizer_unwind_fuchsia.cpp
@@ -0,0 +1,66 @@
+//===------------------ sanitizer_unwind_fuchsia.cpp
+//---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Sanitizer unwind Fuchsia specific functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_platform.h"
+#if SANITIZER_FUCHSIA
+
+#  include <limits.h>
+#  include <unwind.h>
+
+#  include "sanitizer_common.h"
+#  include "sanitizer_stacktrace.h"
+
+namespace __sanitizer {
+
+#  if SANITIZER_CAN_SLOW_UNWIND
+struct UnwindTraceArg {
+  BufferedStackTrace *stack;
+  u32 max_depth;
+};
+
+_Unwind_Reason_Code Unwind_Trace(struct _Unwind_Context *ctx, void *param) {
+  UnwindTraceArg *arg = static_cast<UnwindTraceArg *>(param);
+  CHECK_LT(arg->stack->size, arg->max_depth);
+  uptr pc = _Unwind_GetIP(ctx);
+  if (pc < GetPageSizeCached())
+    return _URC_NORMAL_STOP;
+  arg->stack->trace_buffer[arg->stack->size++] = pc;
+  return (arg->stack->size == arg->max_depth ? _URC_NORMAL_STOP
+                                             : _URC_NO_REASON);
+}
+
+void BufferedStackTrace::UnwindSlow(uptr pc, u32 max_depth) {
+  CHECK_GE(max_depth, 2);
+  size = 0;
+  UnwindTraceArg arg = {this, Min(max_depth + 1, kStackTraceMax)};
+  _Unwind_Backtrace(Unwind_Trace, &arg);
+  CHECK_GT(size, 0);
+  // We need to pop a few frames so that pc is on top.
+  uptr to_pop = LocatePcInTrace(pc);
+  // trace_buffer[0] belongs to the current function so we always pop it,
+  // unless there is only 1 frame in the stack trace (1 frame is always better
+  // than 0!).
+  PopStackFrames(Min(to_pop, static_cast<uptr>(1)));
+  trace_buffer[0] = pc;
+}
+
+void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) {
+  CHECK(context);
+  CHECK_GE(max_depth, 2);
+  UNREACHABLE("signal context doesn't exist");
+}
+#  endif  //  SANITIZER_CAN_SLOW_UNWIND
+
+}  // namespace __sanitizer
+
+#endif  // SANITIZER_FUCHSIA
lib/tsan/sanitizer_common/sanitizer_unwind_win.cpp
@@ -70,10 +70,17 @@ void BufferedStackTrace::UnwindSlow(uptr pc, void *context, u32 max_depth) {
   stack_frame.AddrStack.Offset = ctx.Rsp;
 #      endif
 #    else
+#      if SANITIZER_ARM
+  int machine_type = IMAGE_FILE_MACHINE_ARM;
+  stack_frame.AddrPC.Offset = ctx.Pc;
+  stack_frame.AddrFrame.Offset = ctx.R11;
+  stack_frame.AddrStack.Offset = ctx.Sp;
+#      else
   int machine_type = IMAGE_FILE_MACHINE_I386;
   stack_frame.AddrPC.Offset = ctx.Eip;
   stack_frame.AddrFrame.Offset = ctx.Ebp;
   stack_frame.AddrStack.Offset = ctx.Esp;
+#      endif
 #    endif
   stack_frame.AddrPC.Mode = AddrModeFlat;
   stack_frame.AddrFrame.Mode = AddrModeFlat;
lib/tsan/sanitizer_common/sanitizer_win.cpp
@@ -144,7 +144,7 @@ void *MmapOrDie(uptr size, const char *mem_type, bool raw_report) {
   return rv;
 }
 
-void UnmapOrDie(void *addr, uptr size) {
+void UnmapOrDie(void *addr, uptr size, bool raw_report) {
   if (!size || !addr)
     return;
 
@@ -156,10 +156,7 @@ void UnmapOrDie(void *addr, uptr size) {
   // fails try MEM_DECOMMIT.
   if (VirtualFree(addr, 0, MEM_RELEASE) == 0) {
     if (VirtualFree(addr, size, MEM_DECOMMIT) == 0) {
-      Report("ERROR: %s failed to "
-             "deallocate 0x%zx (%zd) bytes at address %p (error code: %d)\n",
-             SanitizerToolName, size, size, addr, GetLastError());
-      CHECK("unable to unmap" && 0);
+      ReportMunmapFailureAndDie(addr, size, GetLastError(), raw_report);
     }
   }
 }
@@ -279,8 +276,8 @@ void *MmapFixedOrDie(uptr fixed_addr, uptr size, const char *name) {
       MEM_COMMIT, PAGE_READWRITE);
   if (p == 0) {
     char mem_type[30];
-    internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
-                      fixed_addr);
+    internal_snprintf(mem_type, sizeof(mem_type), "memory at address %p",
+                      (void *)fixed_addr);
     ReportMmapFailureAndDie(size, mem_type, "allocate", GetLastError());
   }
   return p;
@@ -311,8 +308,8 @@ void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size, const char *name) {
       MEM_COMMIT, PAGE_READWRITE);
   if (p == 0) {
     char mem_type[30];
-    internal_snprintf(mem_type, sizeof(mem_type), "memory at address 0x%zx",
-                      fixed_addr);
+    internal_snprintf(mem_type, sizeof(mem_type), "memory at address %p",
+                      (void *)fixed_addr);
     return ReturnNullptrOnOOMOrDie(size, mem_type, "allocate");
   }
   return p;
@@ -387,9 +384,8 @@ bool DontDumpShadowMemory(uptr addr, uptr length) {
 }
 
 uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
-                      uptr min_shadow_base_alignment,
-                      UNUSED uptr &high_mem_end) {
-  const uptr granularity = GetMmapGranularity();
+                      uptr min_shadow_base_alignment, UNUSED uptr &high_mem_end,
+                      uptr granularity) {
   const uptr alignment =
       Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
   const uptr left_padding =
@@ -996,8 +992,13 @@ void SignalContext::InitPcSpBp() {
   sp = (uptr)context_record->Rsp;
 #    endif
 #  else
+#    if SANITIZER_ARM
+  bp = (uptr)context_record->R11;
+  sp = (uptr)context_record->Sp;
+#    else
   bp = (uptr)context_record->Ebp;
   sp = (uptr)context_record->Esp;
+#    endif
 #  endif
 }
 
lib/tsan/tsan_debugging.cpp
@@ -35,7 +35,9 @@ static const char *ReportTypeDescription(ReportType typ) {
     case ReportTypeSignalUnsafe: return "signal-unsafe-call";
     case ReportTypeErrnoInSignal: return "errno-in-signal-handler";
     case ReportTypeDeadlock: return "lock-order-inversion";
-    // No default case so compiler warns us if we miss one
+    case ReportTypeMutexHeldWrongContext:
+      return "mutex-held-in-wrong-context";
+      // No default case so compiler warns us if we miss one
   }
   UNREACHABLE("missing case");
 }
lib/tsan/tsan_defs.h
@@ -30,7 +30,7 @@
 #  define __MM_MALLOC_H
 #  include <emmintrin.h>
 #  include <smmintrin.h>
-#  define VECTOR_ALIGNED ALIGNED(16)
+#  define VECTOR_ALIGNED alignas(16)
 typedef __m128i m128;
 #else
 #  define VECTOR_ALIGNED
lib/tsan/tsan_dispatch_defs.h
@@ -56,13 +56,6 @@ extern const dispatch_block_t _dispatch_data_destructor_munmap;
 # define DISPATCH_NOESCAPE
 #endif
 
-#if SANITIZER_APPLE
-# define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak_import))
-#else
-# define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak))
-#endif
-
-
 // Data types used in dispatch APIs
 typedef unsigned long size_t;
 typedef unsigned long uintptr_t;
lib/tsan/tsan_interceptors_posix.cpp
@@ -14,6 +14,7 @@
 
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_glibc_version.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
 #include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
@@ -81,6 +82,8 @@ struct ucontext_t {
 #define PTHREAD_ABI_BASE  "GLIBC_2.17"
 #elif SANITIZER_LOONGARCH64
 #define PTHREAD_ABI_BASE  "GLIBC_2.36"
+#elif SANITIZER_RISCV64
+#  define PTHREAD_ABI_BASE "GLIBC_2.27"
 #endif
 
 extern "C" int pthread_attr_init(void *attr);
@@ -205,7 +208,7 @@ struct AtExitCtx {
 struct InterceptorContext {
   // The object is 64-byte aligned, because we want hot data to be located
   // in a single cache line if possible (it's accessed in every interceptor).
-  ALIGNED(64) LibIgnore libignore;
+  alignas(64) LibIgnore libignore;
   __sanitizer_sigaction sigactions[kSigCount];
 #if !SANITIZER_APPLE && !SANITIZER_NETBSD
   unsigned finalize_key;
@@ -217,7 +220,7 @@ struct InterceptorContext {
   InterceptorContext() : libignore(LINKER_INITIALIZED), atexit_mu(MutexTypeAtExit), AtExitStack() {}
 };
 
-static ALIGNED(64) char interceptor_placeholder[sizeof(InterceptorContext)];
+alignas(64) static char interceptor_placeholder[sizeof(InterceptorContext)];
 InterceptorContext *interceptor_ctx() {
   return reinterpret_cast<InterceptorContext*>(&interceptor_placeholder[0]);
 }
@@ -1085,7 +1088,18 @@ TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
   return res;
 }
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+// DEFINE_INTERNAL_PTHREAD_FUNCTIONS
+namespace __sanitizer {
+int internal_pthread_create(void *th, void *attr, void *(*callback)(void *),
+                            void *param) {
+  ScopedIgnoreInterceptors ignore;
+  return REAL(pthread_create)(th, attr, callback, param);
+}
+int internal_pthread_join(void *th, void **ret) {
+  ScopedIgnoreInterceptors ignore;
+  return REAL(pthread_join(th, ret));
+}
+}  // namespace __sanitizer
 
 TSAN_INTERCEPTOR(int, pthread_detach, void *th) {
   SCOPED_INTERCEPTOR_RAW(pthread_detach, th);
@@ -1338,7 +1352,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_destroy, void *m) {
 TSAN_INTERCEPTOR(int, pthread_mutex_lock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_lock, m);
   MutexPreLock(thr, pc, (uptr)m);
-  int res = REAL(pthread_mutex_lock)(m);
+  int res = BLOCK_REAL(pthread_mutex_lock)(m);
   if (res == errno_EOWNERDEAD)
     MutexRepair(thr, pc, (uptr)m);
   if (res == 0 || res == errno_EOWNERDEAD)
@@ -1378,6 +1392,22 @@ TSAN_INTERCEPTOR(int, pthread_mutex_unlock, void *m) {
   return res;
 }
 
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, pthread_mutex_clocklock, void *m,
+                 __sanitizer_clockid_t clock, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_clocklock, m, clock, abstime);
+  MutexPreLock(thr, pc, (uptr)m);
+  int res = BLOCK_REAL(pthread_mutex_clocklock)(m, clock, abstime);
+  if (res == errno_EOWNERDEAD)
+    MutexRepair(thr, pc, (uptr)m);
+  if (res == 0 || res == errno_EOWNERDEAD)
+    MutexPostLock(thr, pc, (uptr)m);
+  if (res == errno_EINVAL)
+    MutexInvalidAccess(thr, pc, (uptr)m);
+  return res;
+}
+#endif
+
 #if SANITIZER_GLIBC
 #  if !__GLIBC_PREREQ(2, 34)
 // glibc 2.34 applies a non-default version for the two functions. They are no
@@ -1385,7 +1415,7 @@ TSAN_INTERCEPTOR(int, pthread_mutex_unlock, void *m) {
 TSAN_INTERCEPTOR(int, __pthread_mutex_lock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(__pthread_mutex_lock, m);
   MutexPreLock(thr, pc, (uptr)m);
-  int res = REAL(__pthread_mutex_lock)(m);
+  int res = BLOCK_REAL(__pthread_mutex_lock)(m);
   if (res == errno_EOWNERDEAD)
     MutexRepair(thr, pc, (uptr)m);
   if (res == 0 || res == errno_EOWNERDEAD)
@@ -1428,7 +1458,7 @@ TSAN_INTERCEPTOR(int, pthread_spin_destroy, void *m) {
 TSAN_INTERCEPTOR(int, pthread_spin_lock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_spin_lock, m);
   MutexPreLock(thr, pc, (uptr)m);
-  int res = REAL(pthread_spin_lock)(m);
+  int res = BLOCK_REAL(pthread_spin_lock)(m);
   if (res == 0) {
     MutexPostLock(thr, pc, (uptr)m);
   }
@@ -1503,7 +1533,7 @@ TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) {
 TSAN_INTERCEPTOR(int, pthread_rwlock_wrlock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_wrlock, m);
   MutexPreLock(thr, pc, (uptr)m);
-  int res = REAL(pthread_rwlock_wrlock)(m);
+  int res = BLOCK_REAL(pthread_rwlock_wrlock)(m);
   if (res == 0) {
     MutexPostLock(thr, pc, (uptr)m);
   }
@@ -1595,47 +1625,40 @@ TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
     FdAccess(thr, pc, fd);
   return REAL(__fxstat)(version, fd, buf);
 }
-#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat)
+
+TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat64)(version, fd, buf);
+}
+#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat); TSAN_INTERCEPT(__fxstat64)
 #else
 #define TSAN_MAYBE_INTERCEPT___FXSTAT
 #endif
 
+#if !SANITIZER_GLIBC || __GLIBC_PREREQ(2, 33)
 TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
-#if SANITIZER_GLIBC
-  SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
-  if (fd > 0)
-    FdAccess(thr, pc, fd);
-  return REAL(__fxstat)(0, fd, buf);
-#else
   SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
   if (fd > 0)
     FdAccess(thr, pc, fd);
   return REAL(fstat)(fd, buf);
-#endif
-}
-
-#if SANITIZER_GLIBC
-TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
-  if (fd > 0)
-    FdAccess(thr, pc, fd);
-  return REAL(__fxstat64)(version, fd, buf);
 }
-#define TSAN_MAYBE_INTERCEPT___FXSTAT64 TSAN_INTERCEPT(__fxstat64)
+#  define TSAN_MAYBE_INTERCEPT_FSTAT TSAN_INTERCEPT(fstat)
 #else
-#define TSAN_MAYBE_INTERCEPT___FXSTAT64
+#  define TSAN_MAYBE_INTERCEPT_FSTAT
 #endif
 
-#if SANITIZER_GLIBC
+#if __GLIBC_PREREQ(2, 33)
 TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
-  SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf);
+  SCOPED_TSAN_INTERCEPTOR(fstat64, fd, buf);
   if (fd > 0)
     FdAccess(thr, pc, fd);
-  return REAL(__fxstat64)(0, fd, buf);
+  return REAL(fstat64)(fd, buf);
 }
-#define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64)
+#  define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64)
 #else
-#define TSAN_MAYBE_INTERCEPT_FSTAT64
+#  define TSAN_MAYBE_INTERCEPT_FSTAT64
 #endif
 
 TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) {
@@ -2565,7 +2588,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act,
     // Copy act into sigactions[sig].
     // Can't use struct copy, because compiler can emit call to memcpy.
     // Can't use internal_memcpy, because it copies byte-by-byte,
-    // and signal handler reads the handler concurrently. It it can read
+    // and signal handler reads the handler concurrently. It can read
     // some bytes from old value and some bytes from new value.
     // Use volatile to prevent insertion of memcpy.
     sigactions[sig].handler =
@@ -2655,6 +2678,25 @@ static USED void syscall_fd_release(uptr pc, int fd) {
   FdRelease(thr, pc, fd);
 }
 
+static USED void sycall_blocking_start() {
+  DPrintf("sycall_blocking_start()\n");
+  ThreadState *thr = cur_thread();
+  EnterBlockingFunc(thr);
+  // When we are in a "blocking call", we process signals asynchronously
+  // (right when they arrive). In this context we do not expect to be
+  // executing any user/runtime code. The known interceptor sequence when
+  // this is not true is: pthread_join -> munmap(stack). It's fine
+  // to ignore munmap in this case -- we handle stack shadow separately.
+  thr->ignore_interceptors++;
+}
+
+static USED void sycall_blocking_end() {
+  DPrintf("sycall_blocking_end()\n");
+  ThreadState *thr = cur_thread();
+  thr->ignore_interceptors--;
+  atomic_store(&thr->in_blocking_func, 0, memory_order_relaxed);
+}
+
 static void syscall_pre_fork(uptr pc) { ForkBefore(cur_thread(), pc); }
 
 static void syscall_post_fork(uptr pc, int pid) {
@@ -2709,6 +2751,9 @@ static void syscall_post_fork(uptr pc, int pid) {
 #define COMMON_SYSCALL_POST_FORK(res) \
   syscall_post_fork(GET_CALLER_PC(), res)
 
+#define COMMON_SYSCALL_BLOCKING_START() sycall_blocking_start()
+#define COMMON_SYSCALL_BLOCKING_END() sycall_blocking_end()
+
 #include "sanitizer_common/sanitizer_common_syscalls.inc"
 #include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
 
@@ -2843,8 +2888,21 @@ void InitializeInterceptors() {
   REAL(memcpy) = internal_memcpy;
 #endif
 
+  __interception::DoesNotSupportStaticLinking();
+
   new(interceptor_ctx()) InterceptorContext();
 
+  // Interpose __tls_get_addr before the common interposers. This is needed
+  // because dlsym() may call malloc on failure which could result in other
+  // interposed functions being called that could eventually make use of TLS.
+#ifdef NEED_TLS_GET_ADDR
+#  if !SANITIZER_S390
+  TSAN_INTERCEPT(__tls_get_addr);
+#  else
+  TSAN_INTERCEPT(__tls_get_addr_internal);
+  TSAN_INTERCEPT(__tls_get_offset);
+#  endif
+#endif
   InitializeCommonInterceptors();
   InitializeSignalInterceptors();
   InitializeLibdispatchInterceptors();
@@ -2900,6 +2958,9 @@ void InitializeInterceptors() {
   TSAN_INTERCEPT(pthread_mutex_trylock);
   TSAN_INTERCEPT(pthread_mutex_timedlock);
   TSAN_INTERCEPT(pthread_mutex_unlock);
+#if SANITIZER_LINUX
+  TSAN_INTERCEPT(pthread_mutex_clocklock);
+#endif
 #if SANITIZER_GLIBC
 #  if !__GLIBC_PREREQ(2, 34)
   TSAN_INTERCEPT(__pthread_mutex_lock);
@@ -2929,10 +2990,9 @@ void InitializeInterceptors() {
 
   TSAN_INTERCEPT(pthread_once);
 
-  TSAN_INTERCEPT(fstat);
   TSAN_MAYBE_INTERCEPT___FXSTAT;
+  TSAN_MAYBE_INTERCEPT_FSTAT;
   TSAN_MAYBE_INTERCEPT_FSTAT64;
-  TSAN_MAYBE_INTERCEPT___FXSTAT64;
   TSAN_INTERCEPT(open);
   TSAN_MAYBE_INTERCEPT_OPEN64;
   TSAN_INTERCEPT(creat);
@@ -2989,15 +3049,6 @@ void InitializeInterceptors() {
   TSAN_INTERCEPT(__cxa_atexit);
   TSAN_INTERCEPT(_exit);
 
-#ifdef NEED_TLS_GET_ADDR
-#if !SANITIZER_S390
-  TSAN_INTERCEPT(__tls_get_addr);
-#else
-  TSAN_INTERCEPT(__tls_get_addr_internal);
-  TSAN_INTERCEPT(__tls_get_offset);
-#endif
-#endif
-
   TSAN_MAYBE_INTERCEPT__LWP_EXIT;
   TSAN_MAYBE_INTERCEPT_THR_EXIT;
 
lib/tsan/tsan_interface.h
@@ -419,6 +419,14 @@ void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
 SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
lib/tsan/tsan_interface_ann.cpp
@@ -76,7 +76,7 @@ struct DynamicAnnContext {
 };
 
 static DynamicAnnContext *dyn_ann_ctx;
-static char dyn_ann_ctx_placeholder[sizeof(DynamicAnnContext)] ALIGNED(64);
+alignas(64) static char dyn_ann_ctx_placeholder[sizeof(DynamicAnnContext)];
 
 static void AddExpectRace(ExpectRace *list,
     char *f, int l, uptr addr, uptr size, char *desc) {
@@ -435,4 +435,26 @@ void __tsan_mutex_post_divert(void *addr, unsigned flagz) {
   ThreadIgnoreBegin(thr, 0);
   ThreadIgnoreSyncBegin(thr, 0);
 }
+
+static void ReportMutexHeldWrongContext(ThreadState *thr, uptr pc) {
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ScopedReport rep(ReportTypeMutexHeldWrongContext);
+  for (uptr i = 0; i < thr->mset.Size(); ++i) {
+    MutexSet::Desc desc = thr->mset.Get(i);
+    rep.AddMutex(desc.addr, desc.stack_id);
+  }
+  VarSizeStackTrace trace;
+  ObtainCurrentStack(thr, pc, &trace);
+  rep.AddStack(trace, true);
+  OutputReport(thr, rep);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_check_no_mutexes_held() {
+  SCOPED_ANNOTATION(__tsan_check_no_mutexes_held);
+  if (thr->mset.Size() == 0) {
+    return;
+  }
+  ReportMutexHeldWrongContext(thr, pc);
+}
 }  // extern "C"
lib/tsan/tsan_interface_atomic.cpp
@@ -894,6 +894,30 @@ void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
   ATOMIC_RET(FetchAdd, *(a64*)(a+16), *(a64**)a, *(a64*)(a+8), mo_acq_rel);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchAnd, *(a32 *)(a + 16), *(a32 **)a, *(a32 *)(a + 8),
+             mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_and(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchAnd, *(a64 *)(a + 16), *(a64 **)a, *(a64 *)(a + 8),
+             mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchOr, *(a32 *)(a + 16), *(a32 **)a, *(a32 *)(a + 8),
+             mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_or(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchOr, *(a64 *)(a + 16), *(a64 **)a, *(a64 *)(a + 8),
+             mo_acq_rel);
+}
+
 SANITIZER_INTERFACE_ATTRIBUTE
 void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
   ATOMIC_RET(Exchange, *(a32*)(a+16), *(a32**)a, *(a32*)(a+8), mo_acq_rel);
lib/tsan/tsan_mman.cpp
@@ -9,17 +9,19 @@
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
 //===----------------------------------------------------------------------===//
+#include "tsan_mman.h"
+
 #include "sanitizer_common/sanitizer_allocator_checks.h"
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_allocator_report.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_errno.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "tsan_flags.h"
 #include "tsan_interface.h"
-#include "tsan_mman.h"
-#include "tsan_rtl.h"
 #include "tsan_report.h"
-#include "tsan_flags.h"
+#include "tsan_rtl.h"
 
 namespace __tsan {
 
@@ -52,7 +54,7 @@ struct MapUnmapCallback {
   }
 };
 
-static char allocator_placeholder[sizeof(Allocator)] ALIGNED(64);
+alignas(64) static char allocator_placeholder[sizeof(Allocator)];
 Allocator *allocator() {
   return reinterpret_cast<Allocator*>(&allocator_placeholder);
 }
@@ -73,7 +75,7 @@ struct GlobalProc {
         internal_alloc_mtx(MutexTypeInternalAlloc) {}
 };
 
-static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64);
+alignas(64) static char global_proc_placeholder[sizeof(GlobalProc)];
 GlobalProc *global_proc() {
   return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
 }
@@ -115,12 +117,21 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() {
   gp->mtx.Unlock();
 }
 
-void AllocatorLock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+void AllocatorLockBeforeFork() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   global_proc()->internal_alloc_mtx.Lock();
   InternalAllocatorLock();
-}
-
-void AllocatorUnlock() SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+#if !SANITIZER_APPLE
+  // OS X allocates from hooks, see 6a3958247a.
+  allocator()->ForceLock();
+  StackDepotLockBeforeFork();
+#endif
+}
+
+void AllocatorUnlockAfterFork(bool child) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+#if !SANITIZER_APPLE
+  StackDepotUnlockAfterFork(child);
+  allocator()->ForceUnlock();
+#endif
   InternalAllocatorUnlock();
   global_proc()->internal_alloc_mtx.Unlock();
 }
lib/tsan/tsan_mman.h
@@ -24,8 +24,8 @@ void ReplaceSystemMalloc();
 void AllocatorProcStart(Processor *proc);
 void AllocatorProcFinish(Processor *proc);
 void AllocatorPrintStats();
-void AllocatorLock();
-void AllocatorUnlock();
+void AllocatorLockBeforeFork();
+void AllocatorUnlockAfterFork(bool child);
 void GlobalProcessorLock();
 void GlobalProcessorUnlock();
 
lib/tsan/tsan_platform.h
@@ -46,17 +46,16 @@ enum {
 
 /*
 C/C++ on linux/x86_64 and freebsd/x86_64
-0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB)
-0040 0000 0000 - 0100 0000 0000: -
-0100 0000 0000 - 1000 0000 0000: shadow
-1000 0000 0000 - 3000 0000 0000: -
-3000 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects)
-3400 0000 0000 - 5500 0000 0000: -
-5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels
-5680 0000 0000 - 7d00 0000 0000: -
-7b00 0000 0000 - 7c00 0000 0000: heap
-7c00 0000 0000 - 7e80 0000 0000: -
-7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
+0000 0000 1000 - 0200 0000 0000: main binary and/or MAP_32BIT mappings (2TB)
+0200 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 3000 0000 0000: shadow (32TB)
+3000 0000 0000 - 3800 0000 0000: metainfo (memory blocks and sync objects; 8TB)
+3800 0000 0000 - 5500 0000 0000: -
+5500 0000 0000 - 5a00 0000 0000: pie binaries without ASLR or on 4.1+ kernels
+5a00 0000 0000 - 7200 0000 0000: -
+7200 0000 0000 - 7300 0000 0000: heap (1TB)
+7300 0000 0000 - 7a00 0000 0000: -
+7a00 0000 0000 - 8000 0000 0000: modules and main thread stack (6TB)
 
 C/C++ on netbsd/amd64 can reuse the same mapping:
  * The address space starts from 0x1000 (option with 0x0) and ends with
@@ -72,20 +71,20 @@ C/C++ on netbsd/amd64 can reuse the same mapping:
 */
 struct Mapping48AddressSpace {
   static const uptr kMetaShadowBeg = 0x300000000000ull;
-  static const uptr kMetaShadowEnd = 0x340000000000ull;
-  static const uptr kShadowBeg     = 0x010000000000ull;
-  static const uptr kShadowEnd = 0x100000000000ull;
-  static const uptr kHeapMemBeg    = 0x7b0000000000ull;
-  static const uptr kHeapMemEnd    = 0x7c0000000000ull;
+  static const uptr kMetaShadowEnd = 0x380000000000ull;
+  static const uptr kShadowBeg = 0x100000000000ull;
+  static const uptr kShadowEnd = 0x300000000000ull;
+  static const uptr kHeapMemBeg = 0x720000000000ull;
+  static const uptr kHeapMemEnd = 0x730000000000ull;
   static const uptr kLoAppMemBeg   = 0x000000001000ull;
-  static const uptr kLoAppMemEnd   = 0x008000000000ull;
+  static const uptr kLoAppMemEnd = 0x020000000000ull;
   static const uptr kMidAppMemBeg  = 0x550000000000ull;
-  static const uptr kMidAppMemEnd  = 0x568000000000ull;
-  static const uptr kHiAppMemBeg   = 0x7e8000000000ull;
+  static const uptr kMidAppMemEnd = 0x5a0000000000ull;
+  static const uptr kHiAppMemBeg = 0x7a0000000000ull;
   static const uptr kHiAppMemEnd   = 0x800000000000ull;
-  static const uptr kShadowMsk = 0x780000000000ull;
-  static const uptr kShadowXor = 0x040000000000ull;
-  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kShadowMsk = 0x700000000000ull;
+  static const uptr kShadowXor = 0x000000000000ull;
+  static const uptr kShadowAdd = 0x100000000000ull;
   static const uptr kVdsoBeg       = 0xf000000000000000ull;
 };
 
@@ -377,6 +376,71 @@ struct MappingPPC64_47 {
   static const uptr kMidAppMemEnd = 0;
 };
 
+/*
+C/C++ on linux/riscv64 (39-bit VMA)
+0000 0010 00 - 0200 0000 00: main binary                      ( 8 GB)
+0200 0000 00 - 1000 0000 00: -
+1000 0000 00 - 4000 0000 00: shadow memory                    (64 GB)
+4000 0000 00 - 4800 0000 00: metainfo                         (16 GB)
+4800 0000 00 - 5500 0000 00: -
+5500 0000 00 - 5a00 0000 00: main binary (PIE)                (~8 GB)
+5600 0000 00 - 7c00 0000 00: -
+7d00 0000 00 - 7fff ffff ff: libraries and main thread stack  ( 8 GB)
+
+mmap by default allocates from top downwards
+VDSO sits below loader and above dynamic libraries, within HiApp region.
+Heap starts after program region whose position depends on pie or non-pie.
+Disable tracking them since their locations are not fixed.
+*/
+struct MappingRiscv64_39 {
+  static const uptr kLoAppMemBeg = 0x0000001000ull;
+  static const uptr kLoAppMemEnd = 0x0200000000ull;
+  static const uptr kShadowBeg = 0x1000000000ull;
+  static const uptr kShadowEnd = 0x2000000000ull;
+  static const uptr kMetaShadowBeg = 0x2000000000ull;
+  static const uptr kMetaShadowEnd = 0x2400000000ull;
+  static const uptr kMidAppMemBeg = 0x2aaaaaa000ull;
+  static const uptr kMidAppMemEnd = 0x2c00000000ull;
+  static const uptr kHeapMemBeg = 0x2c00000000ull;
+  static const uptr kHeapMemEnd = 0x2c00000000ull;
+  static const uptr kHiAppMemBeg = 0x3c00000000ull;
+  static const uptr kHiAppMemEnd = 0x3fffffffffull;
+  static const uptr kShadowMsk = 0x3800000000ull;
+  static const uptr kShadowXor = 0x0800000000ull;
+  static const uptr kShadowAdd = 0x0000000000ull;
+  static const uptr kVdsoBeg = 0x4000000000ull;
+};
+
+/*
+C/C++ on linux/riscv64 (48-bit VMA)
+0000 0000 1000 - 0400 0000 0000: main binary                      ( 4 TB)
+0500 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 4000 0000 0000: shadow memory                    (32 TB)
+4000 0000 0000 - 4800 0000 0000: metainfo                         ( 8 TB)
+4800 0000 0000 - 5555 5555 5000: -
+5555 5555 5000 - 5a00 0000 0000: main binary (PIE)                (~5 TB)
+5a00 0000 0000 - 7a00 0000 0000: -
+7a00 0000 0000 - 7fff ffff ffff: libraries and main thread stack  ( 6 TB)
+*/
+struct MappingRiscv64_48 {
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x040000000000ull;
+  static const uptr kShadowBeg = 0x200000000000ull;
+  static const uptr kShadowEnd = 0x400000000000ull;
+  static const uptr kMetaShadowBeg = 0x400000000000ull;
+  static const uptr kMetaShadowEnd = 0x480000000000ull;
+  static const uptr kMidAppMemBeg = 0x555555555000ull;
+  static const uptr kMidAppMemEnd = 0x5a0000000000ull;
+  static const uptr kHeapMemBeg = 0x5a0000000000ull;
+  static const uptr kHeapMemEnd = 0x5a0000000000ull;
+  static const uptr kHiAppMemBeg = 0x7a0000000000ull;
+  static const uptr kHiAppMemEnd = 0x7fffffffffffull;
+  static const uptr kShadowMsk = 0x700000000000ull;
+  static const uptr kShadowXor = 0x100000000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg = 0x800000000000ull;
+};
+
 /*
 C/C++ on linux/s390x
 While the kernel provides a 64-bit address space, we have to restrict ourselves
@@ -558,6 +622,35 @@ struct MappingGoAarch64 {
   static const uptr kShadowAdd = 0x200000000000ull;
 };
 
+/* Go on linux/loongarch64 (47-bit VMA)
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2800 0000 0000: shadow
+2800 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 3200 0000 0000: metainfo (memory blocks and sync objects)
+3200 0000 0000 - 8000 0000 0000: -
+*/
+struct MappingGoLoongArch64_47 {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x320000000000ull;
+  static const uptr kShadowBeg = 0x200000000000ull;
+  static const uptr kShadowEnd = 0x280000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
 /*
 Go on linux/mips64 (47-bit VMA)
 0000 0000 1000 - 0000 1000 0000: executable
@@ -633,6 +726,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) {
   return Func::template Apply<MappingGoS390x>(arg);
 #  elif defined(__aarch64__)
   return Func::template Apply<MappingGoAarch64>(arg);
+#  elif defined(__loongarch_lp64)
+  return Func::template Apply<MappingGoLoongArch64_47>(arg);
 #  elif SANITIZER_WINDOWS
   return Func::template Apply<MappingGoWindows>(arg);
 #  else
@@ -665,6 +760,13 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) {
   }
 #  elif defined(__mips64)
   return Func::template Apply<MappingMips64_40>(arg);
+#  elif SANITIZER_RISCV64
+  switch (vmaSize) {
+    case 39:
+      return Func::template Apply<MappingRiscv64_39>(arg);
+    case 48:
+      return Func::template Apply<MappingRiscv64_48>(arg);
+  }
 #  elif defined(__s390x__)
   return Func::template Apply<MappingS390x>(arg);
 #  else
@@ -686,12 +788,15 @@ void ForEachMapping() {
   Func::template Apply<MappingPPC64_44>();
   Func::template Apply<MappingPPC64_46>();
   Func::template Apply<MappingPPC64_47>();
+  Func::template Apply<MappingRiscv64_39>();
+  Func::template Apply<MappingRiscv64_48>();
   Func::template Apply<MappingS390x>();
   Func::template Apply<MappingGo48>();
   Func::template Apply<MappingGoWindows>();
   Func::template Apply<MappingGoPPC64_46>();
   Func::template Apply<MappingGoPPC64_47>();
   Func::template Apply<MappingGoAarch64>();
+  Func::template Apply<MappingGoLoongArch64_47>();
   Func::template Apply<MappingGoMips64_47>();
   Func::template Apply<MappingGoS390x>();
 }
@@ -919,7 +1024,7 @@ inline uptr RestoreAddr(uptr addr) {
 
 void InitializePlatform();
 void InitializePlatformEarly();
-void CheckAndProtect();
+bool CheckAndProtect(bool protect, bool ignore_heap, bool print_warnings);
 void InitializeShadowMemoryPlatform();
 void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns);
 int ExtractResolvFDs(void *state, int *fds, int nfd);
lib/tsan/tsan_platform_linux.cpp
@@ -152,7 +152,7 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
 #if !SANITIZER_GO
 // Mark shadow for .rodata sections with the special Shadow::kRodata marker.
 // Accesses to .rodata can't race, so this saves time, memory and trace space.
-static void MapRodata() {
+static NOINLINE void MapRodata(char* buffer, uptr size) {
   // First create temp file.
   const char *tmpdir = GetEnv("TMPDIR");
   if (tmpdir == 0)
@@ -163,13 +163,12 @@ static void MapRodata() {
 #endif
   if (tmpdir == 0)
     return;
-  char name[256];
-  internal_snprintf(name, sizeof(name), "%s/tsan.rodata.%d",
+  internal_snprintf(buffer, size, "%s/tsan.rodata.%d",
                     tmpdir, (int)internal_getpid());
-  uptr openrv = internal_open(name, O_RDWR | O_CREAT | O_EXCL, 0600);
+  uptr openrv = internal_open(buffer, O_RDWR | O_CREAT | O_EXCL, 0600);
   if (internal_iserror(openrv))
     return;
-  internal_unlink(name);  // Unlink it now, so that we can reuse the buffer.
+  internal_unlink(buffer);  // Unlink it now, so that we can reuse the buffer.
   fd_t fd = openrv;
   // Fill the file with Shadow::kRodata.
   const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow);
@@ -188,8 +187,8 @@ static void MapRodata() {
   }
   // Map the file into shadow of .rodata sections.
   MemoryMappingLayout proc_maps(/*cache_enabled*/true);
-  // Reusing the buffer 'name'.
-  MemoryMappedSegment segment(name, ARRAY_SIZE(name));
+  // Reusing the buffer 'buffer'.
+  MemoryMappedSegment segment(buffer, size);
   while (proc_maps.Next(&segment)) {
     if (segment.filename[0] != 0 && segment.filename[0] != '[' &&
         segment.IsReadable() && segment.IsExecutable() &&
@@ -209,11 +208,103 @@ static void MapRodata() {
 }
 
 void InitializeShadowMemoryPlatform() {
-  MapRodata();
+  char buffer[256];  // Keep in a different frame.
+  MapRodata(buffer, sizeof(buffer));
 }
 
 #endif  // #if !SANITIZER_GO
 
+#  if !SANITIZER_GO
+static void ReExecIfNeeded(bool ignore_heap) {
+  // Go maps shadow memory lazily and works fine with limited address space.
+  // Unlimited stack is not a problem as well, because the executable
+  // is not compiled with -pie.
+  bool reexec = false;
+  // TSan doesn't play well with unlimited stack size (as stack
+  // overlaps with shadow memory). If we detect unlimited stack size,
+  // we re-exec the program with limited stack size as a best effort.
+  if (StackSizeIsUnlimited()) {
+    const uptr kMaxStackSize = 32 * 1024 * 1024;
+    VReport(1,
+            "Program is run with unlimited stack size, which wouldn't "
+            "work with ThreadSanitizer.\n"
+            "Re-execing with stack size limited to %zd bytes.\n",
+            kMaxStackSize);
+    SetStackSizeLimitInBytes(kMaxStackSize);
+    reexec = true;
+  }
+
+  if (!AddressSpaceIsUnlimited()) {
+    Report(
+        "WARNING: Program is run with limited virtual address space,"
+        " which wouldn't work with ThreadSanitizer.\n");
+    Report("Re-execing with unlimited virtual address space.\n");
+    SetAddressSpaceUnlimited();
+    reexec = true;
+  }
+
+#    if SANITIZER_LINUX
+#      if SANITIZER_ANDROID && (defined(__aarch64__) || defined(__x86_64__))
+  // ASLR personality check.
+  int old_personality = personality(0xffffffff);
+  bool aslr_on =
+      (old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);
+
+  // After patch "arm64: mm: support ARCH_MMAP_RND_BITS." is introduced in
+  // linux kernel, the random gap between stack and mapped area is increased
+  // from 128M to 36G on 39-bit aarch64. As it is almost impossible to cover
+  // this big range, we should disable randomized virtual space on aarch64.
+  if (aslr_on) {
+    VReport(1,
+            "WARNING: Program is run with randomized virtual address "
+            "space, which wouldn't work with ThreadSanitizer on Android.\n"
+            "Re-execing with fixed virtual address space.\n");
+    CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+    reexec = true;
+  }
+#      endif
+
+  if (reexec) {
+    // Don't check the address space since we're going to re-exec anyway.
+  } else if (!CheckAndProtect(false, ignore_heap, false)) {
+    // ASLR personality check.
+    // N.B. 'personality' is sometimes forbidden by sandboxes, so we only call
+    // this as a last resort (when the memory mapping is incompatible and TSan
+    // would fail anyway).
+    int old_personality = personality(0xffffffff);
+    bool aslr_on =
+        (old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);
+
+    if (aslr_on) {
+      // Disable ASLR if the memory layout was incompatible.
+      // Alternatively, we could just keep re-execing until we get lucky
+      // with a compatible randomized layout, but the risk is that if it's
+      // not an ASLR-related issue, we will be stuck in an infinite loop of
+      // re-execing (unless we change ReExec to pass a parameter of the
+      // number of retries allowed.)
+      VReport(1,
+              "WARNING: ThreadSanitizer: memory layout is incompatible, "
+              "possibly due to high-entropy ASLR.\n"
+              "Re-execing with fixed virtual address space.\n"
+              "N.B. reducing ASLR entropy is preferable.\n");
+      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+      reexec = true;
+    } else {
+      Printf(
+          "FATAL: ThreadSanitizer: memory layout is incompatible, "
+          "even though ASLR is disabled.\n"
+          "Please file a bug.\n");
+      DumpProcessMap();
+      Die();
+    }
+  }
+#    endif  // SANITIZER_LINUX
+
+  if (reexec)
+    ReExec();
+}
+#  endif
+
 void InitializePlatformEarly() {
   vmaSize =
     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
@@ -238,7 +329,13 @@ void InitializePlatformEarly() {
     Printf("FATAL: Found %zd - Supported 47\n", vmaSize);
     Die();
   }
-# endif
+#    else
+  if (vmaSize != 47) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 47\n", vmaSize);
+    Die();
+  }
+#    endif
 #elif defined(__powerpc64__)
 # if !SANITIZER_GO
   if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) {
@@ -267,7 +364,22 @@ void InitializePlatformEarly() {
     Die();
   }
 # endif
-#endif
+#  elif SANITIZER_RISCV64
+  // the bottom half of vma is allocated for userspace
+  vmaSize = vmaSize + 1;
+#    if !SANITIZER_GO
+  if (vmaSize != 39 && vmaSize != 48) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 39 and 48\n", vmaSize);
+    Die();
+  }
+#    endif
+#  endif
+
+#  if !SANITIZER_GO
+  // Heap has not been allocated yet
+  ReExecIfNeeded(false);
+#  endif
 }
 
 void InitializePlatform() {
@@ -278,52 +390,34 @@ void InitializePlatform() {
   // is not compiled with -pie.
 #if !SANITIZER_GO
   {
-    bool reexec = false;
-    // TSan doesn't play well with unlimited stack size (as stack
-    // overlaps with shadow memory). If we detect unlimited stack size,
-    // we re-exec the program with limited stack size as a best effort.
-    if (StackSizeIsUnlimited()) {
-      const uptr kMaxStackSize = 32 * 1024 * 1024;
-      VReport(1, "Program is run with unlimited stack size, which wouldn't "
-                 "work with ThreadSanitizer.\n"
-                 "Re-execing with stack size limited to %zd bytes.\n",
-              kMaxStackSize);
-      SetStackSizeLimitInBytes(kMaxStackSize);
-      reexec = true;
-    }
-
-    if (!AddressSpaceIsUnlimited()) {
-      Report("WARNING: Program is run with limited virtual address space,"
-             " which wouldn't work with ThreadSanitizer.\n");
-      Report("Re-execing with unlimited virtual address space.\n");
-      SetAddressSpaceUnlimited();
-      reexec = true;
-    }
-#if SANITIZER_ANDROID && (defined(__aarch64__) || defined(__x86_64__))
-    // After patch "arm64: mm: support ARCH_MMAP_RND_BITS." is introduced in
-    // linux kernel, the random gap between stack and mapped area is increased
-    // from 128M to 36G on 39-bit aarch64. As it is almost impossible to cover
-    // this big range, we should disable randomized virtual space on aarch64.
-    // ASLR personality check.
-    int old_personality = personality(0xffffffff);
-    if (old_personality != -1 && (old_personality & ADDR_NO_RANDOMIZE) == 0) {
-      VReport(1, "WARNING: Program is run with randomized virtual address "
-              "space, which wouldn't work with ThreadSanitizer.\n"
-              "Re-execing with fixed virtual address space.\n");
-      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
-      reexec = true;
-    }
-
-#endif
-#if SANITIZER_LINUX && (defined(__aarch64__) || defined(__loongarch_lp64))
+#    if SANITIZER_LINUX && (defined(__aarch64__) || defined(__loongarch_lp64))
     // Initialize the xor key used in {sig}{set,long}jump.
     InitializeLongjmpXorKey();
-#endif
-    if (reexec)
-      ReExec();
+#    endif
+  }
+
+  // We called ReExecIfNeeded() in InitializePlatformEarly(), but there are
+  // intervening allocations that result in an edge case:
+  // 1) InitializePlatformEarly(): memory layout is compatible
+  // 2) Intervening allocations happen
+  // 3) InitializePlatform(): memory layout is incompatible and fails
+  //    CheckAndProtect()
+#    if !SANITIZER_GO
+  // Heap has already been allocated
+  ReExecIfNeeded(true);
+#    endif
+
+  // Earlier initialization steps already re-exec'ed until we got a compatible
+  // memory layout, so we don't expect any more issues here.
+  if (!CheckAndProtect(true, true, true)) {
+    Printf(
+        "FATAL: ThreadSanitizer: unexpectedly found incompatible memory "
+        "layout.\n");
+    Printf("FATAL: Please file a bug.\n");
+    DumpProcessMap();
+    Die();
   }
 
-  CheckAndProtect();
   InitTlsSize();
 #endif  // !SANITIZER_GO
 }
@@ -399,13 +493,15 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
   return mangled_sp ^ xor_key;
 #elif defined(__mips__)
   return mangled_sp;
-#elif defined(__s390x__)
+#    elif SANITIZER_RISCV64
+  return mangled_sp;
+#    elif defined(__s390x__)
   // tcbhead_t.stack_guard
   uptr xor_key = ((uptr *)__builtin_thread_pointer())[5];
   return mangled_sp ^ xor_key;
-#else
-  #error "Unknown platform"
-#endif
+#    else
+#      error "Unknown platform"
+#    endif
 }
 
 #if SANITIZER_NETBSD
@@ -429,11 +525,13 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 #  define LONG_JMP_SP_ENV_SLOT 1
 # elif defined(__mips64)
 #  define LONG_JMP_SP_ENV_SLOT 1
-# elif defined(__s390x__)
-#  define LONG_JMP_SP_ENV_SLOT 9
-# else
-#  define LONG_JMP_SP_ENV_SLOT 6
-# endif
+#      elif SANITIZER_RISCV64
+#        define LONG_JMP_SP_ENV_SLOT 13
+#      elif defined(__s390x__)
+#        define LONG_JMP_SP_ENV_SLOT 9
+#      else
+#        define LONG_JMP_SP_ENV_SLOT 6
+#      endif
 #endif
 
 uptr ExtractLongJmpSp(uptr *env) {
lib/tsan/tsan_platform_mac.cpp
@@ -46,8 +46,8 @@
 namespace __tsan {
 
 #if !SANITIZER_GO
-static char main_thread_state[sizeof(ThreadState)] ALIGNED(
-    SANITIZER_CACHE_LINE_SIZE);
+alignas(SANITIZER_CACHE_LINE_SIZE) static char main_thread_state[sizeof(
+    ThreadState)];
 static ThreadState *dead_thread_state;
 static pthread_key_t thread_state_key;
 
@@ -239,7 +239,10 @@ static uptr longjmp_xor_key = 0;
 void InitializePlatform() {
   DisableCoreDumperIfNecessary();
 #if !SANITIZER_GO
-  CheckAndProtect();
+  if (!CheckAndProtect(true, true, true)) {
+    Printf("FATAL: ThreadSanitizer: found incompatible memory layout.\n");
+    Die();
+  }
 
   InitializeThreadStateStorage();
 
lib/tsan/tsan_platform_posix.cpp
@@ -94,22 +94,51 @@ static void ProtectRange(uptr beg, uptr end) {
   }
 }
 
-void CheckAndProtect() {
+// CheckAndProtect will check if the memory layout is compatible with TSan.
+// Optionally (if 'protect' is true), it will set the memory regions between
+// app memory to be inaccessible.
+// 'ignore_heap' means it will not consider heap memory allocations to be a
+// conflict. Set this based on whether we are calling CheckAndProtect before
+// or after the allocator has initialized the heap.
+bool CheckAndProtect(bool protect, bool ignore_heap, bool print_warnings) {
   // Ensure that the binary is indeed compiled with -pie.
   MemoryMappingLayout proc_maps(true);
   MemoryMappedSegment segment;
   while (proc_maps.Next(&segment)) {
-    if (IsAppMem(segment.start)) continue;
+    if (segment.start >= HeapMemBeg() && segment.end <= HeapEnd()) {
+      if (ignore_heap) {
+        continue;
+      } else {
+        return false;
+      }
+    }
+
+    // Note: IsAppMem includes if it is heap memory, hence we must
+    // put this check after the heap bounds check.
+    if (IsAppMem(segment.start) && IsAppMem(segment.end - 1))
+      continue;
+
+    // Guard page after the heap end
     if (segment.start >= HeapMemEnd() && segment.start < HeapEnd()) continue;
+
     if (segment.protection == 0)  // Zero page or mprotected.
       continue;
+
     if (segment.start >= VdsoBeg())  // vdso
       break;
-    Printf("FATAL: ThreadSanitizer: unexpected memory mapping 0x%zx-0x%zx\n",
-           segment.start, segment.end);
-    Die();
+
+    // Debug output can break tests. Suppress this message in most cases.
+    if (print_warnings)
+      Printf(
+          "WARNING: ThreadSanitizer: unexpected memory mapping 0x%zx-0x%zx\n",
+          segment.start, segment.end);
+
+    return false;
   }
 
+  if (!protect)
+    return true;
+
 #    if SANITIZER_IOS && !SANITIZER_IOSSIM
   ProtectRange(HeapMemEnd(), ShadowBeg());
   ProtectRange(ShadowEnd(), MetaShadowBeg());
@@ -135,8 +164,10 @@ void CheckAndProtect() {
   // Older s390x kernels may not support 5-level page tables.
   TryProtectRange(user_addr_max_l4, user_addr_max_l5);
 #endif
+
+  return true;
 }
-#endif
+#  endif
 
 }  // namespace __tsan
 
lib/tsan/tsan_preinit.cpp
@@ -16,11 +16,9 @@
 
 #if SANITIZER_CAN_USE_PREINIT_ARRAY
 
-// The symbol is called __local_tsan_preinit, because it's not intended to be
-// exported.
-// This code linked into the main executable when -fsanitize=thread is in
-// the link flags. It can only use exported interface functions.
-__attribute__((section(".preinit_array"), used))
-void (*__local_tsan_preinit)(void) = __tsan_init;
+// This section is linked into the main executable when -fsanitize=thread is
+// specified to perform initialization at a very early stage.
+__attribute__((section(".preinit_array"), used)) static auto preinit =
+    __tsan_init;
 
 #endif
lib/tsan/tsan_report.cpp
@@ -93,7 +93,9 @@ static const char *ReportTypeString(ReportType typ, uptr tag) {
       return "signal handler spoils errno";
     case ReportTypeDeadlock:
       return "lock-order-inversion (potential deadlock)";
-    // No default case so compiler warns us if we miss one
+    case ReportTypeMutexHeldWrongContext:
+      return "mutex held in the wrong context";
+      // No default case so compiler warns us if we miss one
   }
   UNREACHABLE("missing case");
 }
@@ -106,10 +108,10 @@ void PrintStack(const ReportStack *ent) {
   SymbolizedStack *frame = ent->frames;
   for (int i = 0; frame && frame->info.address; frame = frame->next, i++) {
     InternalScopedString res;
-    RenderFrame(&res, common_flags()->stack_trace_format, i,
-                frame->info.address, &frame->info,
-                common_flags()->symbolize_vs_style,
-                common_flags()->strip_path_prefix);
+    StackTracePrinter::GetOrInit()->RenderFrame(
+        &res, common_flags()->stack_trace_format, i, frame->info.address,
+        &frame->info, common_flags()->symbolize_vs_style,
+        common_flags()->strip_path_prefix);
     Printf("%s\n", res.data());
   }
   Printf("\n");
@@ -271,26 +273,10 @@ static ReportStack *ChooseSummaryStack(const ReportDesc *rep) {
   return 0;
 }
 
-static bool FrameIsInternal(const SymbolizedStack *frame) {
-  if (frame == 0)
-    return false;
-  const char *file = frame->info.file;
-  const char *module = frame->info.module;
-  if (file != 0 &&
-      (internal_strstr(file, "tsan_interceptors_posix.cpp") ||
-       internal_strstr(file, "tsan_interceptors_memintrinsics.cpp") ||
-       internal_strstr(file, "sanitizer_common_interceptors.inc") ||
-       internal_strstr(file, "tsan_interface_")))
-    return true;
-  if (module != 0 && (internal_strstr(module, "libclang_rt.tsan_")))
-    return true;
-  return false;
-}
-
-static SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) {
-  while (FrameIsInternal(frames) && frames->next)
-    frames = frames->next;
-  return frames;
+static const SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) {
+  if (const SymbolizedStack *f = SkipInternalFrames(frames))
+    return f;
+  return frames;  // Fallback to the top frame.
 }
 
 void PrintReport(const ReportDesc *rep) {
@@ -364,7 +350,7 @@ void PrintReport(const ReportDesc *rep) {
     Printf("  And %d more similar thread leaks.\n\n", rep->count - 1);
 
   if (ReportStack *stack = ChooseSummaryStack(rep)) {
-    if (SymbolizedStack *frame = SkipTsanInternalFrames(stack->frames))
+    if (const SymbolizedStack *frame = SkipTsanInternalFrames(stack->frames))
       ReportErrorSummary(rep_typ_str, frame->info);
   }
 
lib/tsan/tsan_report.h
@@ -34,7 +34,8 @@ enum ReportType {
   ReportTypeMutexBadReadUnlock,
   ReportTypeSignalUnsafe,
   ReportTypeErrnoInSignal,
-  ReportTypeDeadlock
+  ReportTypeDeadlock,
+  ReportTypeMutexHeldWrongContext
 };
 
 struct ReportStack {
lib/tsan/tsan_rtl.cpp
@@ -35,8 +35,10 @@ extern "C" void __tsan_resume() {
   __tsan_resumed = 1;
 }
 
+#if SANITIZER_APPLE
 SANITIZER_WEAK_DEFAULT_IMPL
 void __tsan_test_only_on_fork() {}
+#endif
 
 namespace __tsan {
 
@@ -46,11 +48,10 @@ int (*on_finalize)(int);
 #endif
 
 #if !SANITIZER_GO && !SANITIZER_APPLE
-__attribute__((tls_model("initial-exec")))
-THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(
-    SANITIZER_CACHE_LINE_SIZE);
+alignas(SANITIZER_CACHE_LINE_SIZE) THREADLOCAL __attribute__((tls_model(
+    "initial-exec"))) char cur_thread_placeholder[sizeof(ThreadState)];
 #endif
-static char ctx_placeholder[sizeof(Context)] ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+alignas(SANITIZER_CACHE_LINE_SIZE) static char ctx_placeholder[sizeof(Context)];
 Context *ctx;
 
 // Can be overriden by a front-end.
@@ -446,7 +447,7 @@ static bool InitializeMemoryProfiler() {
     ctx->memprof_fd = 2;
   } else {
     InternalScopedString filename;
-    filename.append("%s.%d", fname, (int)internal_getpid());
+    filename.AppendF("%s.%d", fname, (int)internal_getpid());
     ctx->memprof_fd = OpenFile(filename.data(), WrOnly);
     if (ctx->memprof_fd == kInvalidFd) {
       Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
@@ -813,7 +814,7 @@ void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   ctx->thread_registry.Lock();
   ctx->slot_mtx.Lock();
   ScopedErrorReportLock::Lock();
-  AllocatorLock();
+  AllocatorLockBeforeFork();
   // Suppress all reports in the pthread_atfork callbacks.
   // Reports will deadlock on the report_mtx.
   // We could ignore sync operations as well,
@@ -828,14 +829,17 @@ void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   // Disables memory write in OnUserAlloc/Free.
   thr->ignore_reads_and_writes++;
 
+#  if SANITIZER_APPLE
   __tsan_test_only_on_fork();
+#  endif
 }
 
-static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
+static void ForkAfter(ThreadState* thr,
+                      bool child) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   thr->suppress_reports--;  // Enabled in ForkBefore.
   thr->ignore_interceptors--;
   thr->ignore_reads_and_writes--;
-  AllocatorUnlock();
+  AllocatorUnlockAfterFork(child);
   ScopedErrorReportLock::Unlock();
   ctx->slot_mtx.Unlock();
   ctx->thread_registry.Unlock();
@@ -845,10 +849,10 @@ static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   GlobalProcessorUnlock();
 }
 
-void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); }
+void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr, false); }
 
 void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
-  ForkAfter(thr);
+  ForkAfter(thr, true);
   u32 nthread = ctx->thread_registry.OnFork(thr->tid);
   VPrintf(1,
           "ThreadSanitizer: forked new process with pid %d,"
lib/tsan/tsan_rtl.h
@@ -56,8 +56,8 @@ namespace __tsan {
 
 #if !SANITIZER_GO
 struct MapUnmapCallback;
-#if defined(__mips64) || defined(__aarch64__) || defined(__loongarch__) || \
-    defined(__powerpc__)
+#  if defined(__mips64) || defined(__aarch64__) || defined(__loongarch__) || \
+      defined(__powerpc__) || SANITIZER_RISCV64
 
 struct AP32 {
   static const uptr kSpaceBeg = 0;
@@ -136,7 +136,7 @@ struct TidEpoch {
   Epoch epoch;
 };
 
-struct TidSlot {
+struct alignas(SANITIZER_CACHE_LINE_SIZE) TidSlot {
   Mutex mtx;
   Sid sid;
   atomic_uint32_t raw_epoch;
@@ -153,10 +153,10 @@ struct TidSlot {
   }
 
   TidSlot();
-} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+};
 
 // This struct is stored in TLS.
-struct ThreadState {
+struct alignas(SANITIZER_CACHE_LINE_SIZE) ThreadState {
   FastState fast_state;
   int ignore_sync;
 #if !SANITIZER_GO
@@ -234,7 +234,7 @@ struct ThreadState {
   const ReportDesc *current_report;
 
   explicit ThreadState(Tid tid);
-} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+};
 
 #if !SANITIZER_GO
 #if SANITIZER_APPLE || SANITIZER_ANDROID
lib/tsan/tsan_rtl_aarch64.S
@@ -2,6 +2,7 @@
 #if defined(__aarch64__)
 
 #include "sanitizer_common/sanitizer_asm.h"
+#include "builtins/assembly.h"
 
 #if !defined(__APPLE__)
 .section .text
@@ -16,6 +17,7 @@ ASM_HIDDEN(__tsan_setjmp)
 ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
 ASM_SYMBOL_INTERCEPTOR(setjmp):
   CFI_STARTPROC
+  BTI_C
 
   // Save frame/link register
   stp     x29, x30, [sp, -32]!
@@ -66,6 +68,7 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
 ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
 ASM_SYMBOL_INTERCEPTOR(_setjmp):
   CFI_STARTPROC
+  BTI_C
 
   // Save frame/link register
   stp     x29, x30, [sp, -32]!
@@ -116,6 +119,7 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
 ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
 ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
   CFI_STARTPROC
+  BTI_C
 
   // Save frame/link register
   stp     x29, x30, [sp, -32]!
@@ -168,6 +172,7 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
 ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
 ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
   CFI_STARTPROC
+  BTI_C
 
   // Save frame/link register
   stp     x29, x30, [sp, -32]!
@@ -217,4 +222,6 @@ ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
 
 NO_EXEC_STACK_DIRECTIVE
 
+GNU_PROPERTY_BTI_PAC
+
 #endif
lib/tsan/tsan_rtl_access.cpp
@@ -672,22 +672,28 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
 
 #if SANITIZER_DEBUG
   if (!IsAppMem(addr)) {
-    Printf("Access to non app mem %zx\n", addr);
+    Printf("Access to non app mem start: %p\n", (void*)addr);
     DCHECK(IsAppMem(addr));
   }
   if (!IsAppMem(addr + size - 1)) {
-    Printf("Access to non app mem %zx\n", addr + size - 1);
+    Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
     DCHECK(IsAppMem(addr + size - 1));
   }
   if (!IsShadowMem(shadow_mem)) {
-    Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr);
+    Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
     DCHECK(IsShadowMem(shadow_mem));
   }
-  if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) {
-    Printf("Bad shadow addr %p (%zx)\n",
-           static_cast<void*>(shadow_mem + size * kShadowCnt - 1),
-           addr + size - 1);
-    DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1));
+
+  RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
+      reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
+  if (!IsShadowMem(shadow_mem_end)) {
+    Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
+           (void*)(addr + size - 1));
+    Printf(
+        "Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
+        "%zx\n",
+        shadow_mem, (void*)addr, size, kShadowMultiplier);
+    DCHECK(IsShadowMem(shadow_mem_end));
   }
 #endif
 
lib/tsan/tsan_rtl_loongarch64.S
@@ -0,0 +1,196 @@
+#include "sanitizer_common/sanitizer_asm.h"
+
+.section .text
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
+ASM_SYMBOL_INTERCEPTOR(setjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi.d $sp, $sp, -32
+  st.d $ra, $sp, 24
+  st.d $fp, $sp, 16
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (22, -16)
+
+  // Adjust the SP for previous frame
+  addi.d $fp, $sp, 32
+  CFI_DEF_CFA_REGISTER (22)
+
+  // Save env parameter
+  st.d $a0, $sp, 8
+  CFI_OFFSET (4, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi.d  $a0, $fp, 0
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld.d $a0, $sp, 8
+  CFI_RESTORE (4)
+
+  // Restore frame/link register
+  ld.d $fp, $sp, 16
+  ld.d $ra, $sp, 24
+  addi.d $sp, $sp, 32
+  CFI_RESTORE (22)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (3, 0)
+
+  // tail jump to libc setjmp
+  la.local $a1, _ZN14__interception11real_setjmpE
+  ld.d $a1, $a1, 0
+  jr $a1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
+
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_SYMBOL_INTERCEPTOR(_setjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi.d $sp, $sp, -32
+  st.d $ra, $sp, 24
+  st.d $fp, $sp, 16
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (22, -16)
+
+  // Adjust the SP for previous frame
+  addi.d $fp, $sp, 32
+  CFI_DEF_CFA_REGISTER (22)
+
+  // Save env parameter
+  st.d $a0, $sp, 8
+  CFI_OFFSET (4, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi.d  $a0, $fp, 0
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld.d $a0, $sp, 8
+  CFI_RESTORE (4)
+
+  // Restore frame/link register
+  ld.d $fp, $sp, 16
+  ld.d $ra, $sp, 24
+  addi.d $sp, $sp, 32
+  CFI_RESTORE (22)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (3, 0)
+
+  // tail jump to libc setjmp
+  la.local $a1, _ZN14__interception12real__setjmpE
+  ld.d $a1, $a1, 0
+  jr $a1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi.d $sp, $sp, -32
+  st.d $ra, $sp, 24
+  st.d $fp, $sp, 16
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (22, -16)
+
+  // Adjust the SP for previous frame
+  addi.d $fp, $sp, 32
+  CFI_DEF_CFA_REGISTER (22)
+
+  // Save env parameter
+  st.d $a0, $sp, 8
+  CFI_OFFSET (4, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi.d  $a0, $fp, 0
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld.d $a0, $sp, 8
+  CFI_RESTORE (4)
+
+  // Restore frame/link register
+  ld.d $fp, $sp, 16
+  ld.d $ra, $sp, 24
+  addi.d $sp, $sp, 32
+  CFI_RESTORE (22)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (3, 0)
+
+  // tail jump to libc setjmp
+  la.local $a1, _ZN14__interception14real_sigsetjmpE
+  ld.d $a1, $a1, 0
+  jr $a1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi.d $sp, $sp, -32
+  st.d $ra, $sp, 24
+  st.d $fp, $sp, 16
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (22, -16)
+
+  // Adjust the SP for previous frame
+  addi.d $fp, $sp, 32
+  CFI_DEF_CFA_REGISTER (22)
+
+  // Save env parameter
+  st.d $a0, $sp, 8
+  CFI_OFFSET (4, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi.d  $a0, $fp, 0
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld.d $a0, $sp, 8
+  CFI_RESTORE (4)
+
+  // Restore frame/link register
+  ld.d $fp, $sp, 16
+  ld.d $ra, $sp, 24
+  addi.d $sp, $sp, 32
+  CFI_RESTORE (22)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (3, 0)
+
+  // tail jump to libc setjmp
+  la.local $a1, _ZN14__interception16real___sigsetjmpE
+  ld.d $a1, $a1, 0
+  jr $a1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
lib/tsan/tsan_rtl_mutex.cpp
@@ -446,9 +446,9 @@ void Acquire(ThreadState *thr, uptr pc, uptr addr) {
   if (!s)
     return;
   SlotLocker locker(thr);
+  ReadLock lock(&s->mtx);
   if (!s->clock)
     return;
-  ReadLock lock(&s->mtx);
   thr->clock.Acquire(s->clock);
 }
 
lib/tsan/tsan_rtl_riscv64.S
@@ -0,0 +1,203 @@
+#include "sanitizer_common/sanitizer_asm.h"
+
+.section .text
+
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
+ASM_SYMBOL_INTERCEPTOR(setjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi sp, sp, -32
+  sd ra, 24(sp)
+  sd s0, 16(sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (8, -16)
+
+  // Adjust the SP for previous frame
+  addi s0, sp, 32
+  CFI_DEF_CFA_REGISTER (8)
+
+  // Save env parameter
+  sd a0, 8(sp)
+  CFI_OFFSET (10, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi  a0, s0, 0
+
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld a0, 8(sp)
+  CFI_RESTORE (10)
+
+  // Restore frame/link register
+  ld s0, 16(sp)
+  ld ra, 24(sp)
+  addi sp, sp, 32
+  CFI_RESTORE (8)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (2, 0)
+
+  // tail jump to libc setjmp
+  la t1, _ZN14__interception11real_setjmpE
+  ld t1, 0(t1)
+  jr t1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
+
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_SYMBOL_INTERCEPTOR(_setjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi sp, sp, -32
+  sd ra, 24(sp)
+  sd s0, 16(sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (8, -16)
+
+  // Adjust the SP for previous frame
+  addi s0, sp, 32
+  CFI_DEF_CFA_REGISTER (8)
+
+  // Save env parameter
+  sd a0, 8(sp)
+  CFI_OFFSET (10, -24)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi  a0, s0, 0
+
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld a0, 8(sp)
+  CFI_RESTORE (10)
+
+  // Restore frame/link register
+  ld s0, 16(sp)
+  ld ra, 24(sp)
+  addi sp, sp, 32
+  CFI_RESTORE (8)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (2, 0)
+
+  // tail jump to libc setjmp
+  la t1, _ZN14__interception12real__setjmpE
+  ld t1, 0(t1)
+  jr t1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi sp, sp, -32
+  sd ra, 24(sp)
+  sd s0, 16(sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (8, -16)
+
+  // Adjust the SP for previous frame
+  addi s0, sp, 32
+  CFI_DEF_CFA_REGISTER (8)
+
+  // Save env parameter
+  sd a0, 8(sp)
+  sd a1, 0(sp)
+  CFI_OFFSET (10, -24)
+  CFI_OFFSET (11, -32)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi  a0, s0, 0
+
+  // call tsan interceptor
+  call      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld a0, 8(sp)
+  ld a1, 0(sp)
+  CFI_RESTORE (10)
+  CFI_RESTORE (11)
+
+  // Restore frame/link register
+  ld s0, 16(sp)
+  ld ra, 24(sp)
+  addi sp, sp, 32
+  CFI_RESTORE (8)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (2, 0)
+
+  // tail jump to libc setjmp
+  la t1, _ZN14__interception14real_sigsetjmpE
+  ld t1, 0(t1)
+  jr t1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame pointer and return address register
+  addi sp, sp, -32
+  sd ra, 24(sp)
+  sd s0, 16(sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (1, -8)
+  CFI_OFFSET (8, -16)
+
+  // Adjust the SP for previous frame
+  addi s0, sp, 32
+  CFI_DEF_CFA_REGISTER (8)
+
+  // Save env parameter
+  sd a0, 8(sp)
+  sd a1, 0(sp)
+  CFI_OFFSET (10, -24)
+  CFI_OFFSET (11, -32)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  addi  a0, s0, 0
+
+  // call tsan interceptor
+  call      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ld a0, 8(sp)
+  ld a1, 0(sp)
+  CFI_RESTORE (10)
+  CFI_RESTORE (11)
+
+  // Restore frame/link register
+  ld s0, 16(sp)
+  ld ra, 24(sp)
+  addi sp, sp, 32
+  CFI_RESTORE (8)
+  CFI_RESTORE (1)
+  CFI_DEF_CFA (2, 0)
+
+  // tail jump to libc setjmp
+  la t1, _ZN14__interception16real___sigsetjmpE
+  ld t1, 0(t1)
+  jr t1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
lib/tsan/tsan_rtl_s390x.S
@@ -0,0 +1,49 @@
+#include "sanitizer_common/sanitizer_asm.h"
+
+#define CFA_OFFSET 160
+#define R2_REL_OFFSET 16
+#define R3_REL_OFFSET 24
+#define R14_REL_OFFSET 112
+#define R15_REL_OFFSET 120
+#define FRAME_SIZE 160
+
+.text
+
+ASM_HIDDEN(__tsan_setjmp)
+
+.macro intercept symbol, real
+.comm \real, 8, 8
+.globl ASM_SYMBOL_INTERCEPTOR(\symbol)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(\symbol))
+ASM_SYMBOL_INTERCEPTOR(\symbol):
+  CFI_STARTPROC
+  stmg %r2, %r3, R2_REL_OFFSET(%r15)
+  CFI_REL_OFFSET(%r2, R2_REL_OFFSET)
+  CFI_REL_OFFSET(%r3, R3_REL_OFFSET)
+  stmg %r14, %r15, R14_REL_OFFSET(%r15)
+  CFI_REL_OFFSET(%r14, R14_REL_OFFSET)
+  CFI_REL_OFFSET(%r15, R15_REL_OFFSET)
+  aghi %r15, -FRAME_SIZE
+  CFI_ADJUST_CFA_OFFSET(FRAME_SIZE)
+  la %r2, FRAME_SIZE(%r15)
+  brasl %r14, ASM_SYMBOL(__tsan_setjmp)
+  lmg %r14, %r15, FRAME_SIZE + R14_REL_OFFSET(%r15)
+  CFI_RESTORE(%r14)
+  CFI_RESTORE(%r15)
+  CFI_DEF_CFA_OFFSET(CFA_OFFSET)
+  lmg %r2, %r3, R2_REL_OFFSET(%r15)
+  CFI_RESTORE(%r2)
+  CFI_RESTORE(%r3)
+  larl %r1, \real
+  lg %r1, 0(%r1)
+  br %r1
+  CFI_ENDPROC
+  ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(\symbol))
+.endm
+
+intercept setjmp, _ZN14__interception11real_setjmpE
+intercept _setjmp, _ZN14__interception12real__setjmpE
+intercept sigsetjmp, _ZN14__interception14real_sigsetjmpE
+intercept __sigsetjmp, _ZN14__interception16real___sigsetjmpE
+
+NO_EXEC_STACK_DIRECTIVE
lib/tsan/tsan_rtl_thread.cpp
@@ -160,6 +160,10 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
   }
   Free(thr->tctx->sync);
 
+#if !SANITIZER_GO
+  thr->is_inited = true;
+#endif
+
   uptr stk_addr = 0;
   uptr stk_size = 0;
   uptr tls_addr = 0;
@@ -200,15 +204,11 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
 }
 
 void ThreadContext::OnStarted(void *arg) {
-  thr = static_cast<ThreadState *>(arg);
   DPrintf("#%d: ThreadStart\n", tid);
-  new (thr) ThreadState(tid);
+  thr = new (arg) ThreadState(tid);
   if (common_flags()->detect_deadlocks)
     thr->dd_lt = ctx->dd->CreateLogicalThread(tid);
   thr->tctx = this;
-#if !SANITIZER_GO
-  thr->is_inited = true;
-#endif
 }
 
 void ThreadFinish(ThreadState *thr) {
lib/tsan/tsan_suppressions.cpp
@@ -42,7 +42,7 @@ const char *__tsan_default_suppressions() {
 
 namespace __tsan {
 
-ALIGNED(64) static char suppression_placeholder[sizeof(SuppressionContext)];
+alignas(64) static char suppression_placeholder[sizeof(SuppressionContext)];
 static SuppressionContext *suppression_ctx = nullptr;
 static const char *kSuppressionTypes[] = {
     kSuppressionRace,   kSuppressionRaceTop, kSuppressionMutex,
@@ -81,6 +81,7 @@ static const char *conv(ReportType typ) {
     case ReportTypeMutexBadUnlock:
     case ReportTypeMutexBadReadLock:
     case ReportTypeMutexBadReadUnlock:
+    case ReportTypeMutexHeldWrongContext:
       return kSuppressionMutex;
     case ReportTypeSignalUnsafe:
     case ReportTypeErrnoInSignal:
lib/tsan/tsan_vector_clock.h
@@ -34,7 +34,7 @@ class VectorClock {
   VectorClock& operator=(const VectorClock& other);
 
  private:
-  Epoch clk_[kThreadSlotCount] VECTOR_ALIGNED;
+  VECTOR_ALIGNED Epoch clk_[kThreadSlotCount];
 };
 
 ALWAYS_INLINE Epoch VectorClock::Get(Sid sid) const {
src/libtsan.zig
@@ -160,10 +160,13 @@ pub fn buildTsan(comp: *Compilation, prog_node: std.Progress.Node) BuildError!vo
     }
     {
         const asm_source = switch (target.cpu.arch) {
-            .aarch64 => "tsan_rtl_aarch64.S",
+            .aarch64, .aarch64_be => "tsan_rtl_aarch64.S",
+            .loongarch64 => "tsan_rtl_loongarch64.S",
+            .mips64, .mips64el => "tsan_rtl_mips64.S",
+            .powerpc64, .powerpc64le => "tsan_rtl_ppc64.S",
+            .riscv64 => "tsan_rtl_riscv64.S",
+            .s390x => "tsan_rtl_s390x.S",
             .x86_64 => "tsan_rtl_amd64.S",
-            .mips64 => "tsan_rtl_mips64.S",
-            .powerpc64 => "tsan_rtl_ppc64.S",
             else => return error.TSANUnsupportedCPUArchitecture,
         };
         var cflags = std.ArrayList([]const u8).init(arena);
@@ -416,7 +419,6 @@ const sanitizer_common_sources = [_][]const u8{
     "sanitizer_platform_limits_freebsd.cpp",
     "sanitizer_platform_limits_linux.cpp",
     "sanitizer_platform_limits_netbsd.cpp",
-    "sanitizer_platform_limits_openbsd.cpp",
     "sanitizer_platform_limits_posix.cpp",
     "sanitizer_platform_limits_solaris.cpp",
     "sanitizer_posix.cpp",
@@ -429,7 +431,6 @@ const sanitizer_common_sources = [_][]const u8{
     "sanitizer_procmaps_solaris.cpp",
     "sanitizer_range.cpp",
     "sanitizer_solaris.cpp",
-    "sanitizer_stack_store.cpp",
     "sanitizer_stoptheworld_fuchsia.cpp",
     "sanitizer_stoptheworld_mac.cpp",
     "sanitizer_stoptheworld_win.cpp",
@@ -452,6 +453,7 @@ const sanitizer_nolibc_sources = [_][]const u8{
 const sanitizer_libcdep_sources = [_][]const u8{
     "sanitizer_common_libcdep.cpp",
     "sanitizer_allocator_checks.cpp",
+    "sanitizer_dl.cpp",
     "sanitizer_linux_libcdep.cpp",
     "sanitizer_mac_libcdep.cpp",
     "sanitizer_posix_libcdep.cpp",
@@ -461,6 +463,7 @@ const sanitizer_libcdep_sources = [_][]const u8{
 
 const sanitizer_symbolizer_sources = [_][]const u8{
     "sanitizer_allocator_report.cpp",
+    "sanitizer_stack_store.cpp",
     "sanitizer_stackdepot.cpp",
     "sanitizer_stacktrace.cpp",
     "sanitizer_stacktrace_libcdep.cpp",
@@ -471,10 +474,13 @@ const sanitizer_symbolizer_sources = [_][]const u8{
     "sanitizer_symbolizer_libcdep.cpp",
     "sanitizer_symbolizer_mac.cpp",
     "sanitizer_symbolizer_markup.cpp",
+    "sanitizer_symbolizer_markup_fuchsia.cpp",
     "sanitizer_symbolizer_posix_libcdep.cpp",
     "sanitizer_symbolizer_report.cpp",
+    "sanitizer_symbolizer_report_fuchsia.cpp",
     "sanitizer_symbolizer_win.cpp",
     "sanitizer_unwind_linux_libcdep.cpp",
+    "sanitizer_unwind_fuchsia.cpp",
     "sanitizer_unwind_win.cpp",
 };