Commit dd8d59686a

mlugg <mlugg@mlugg.co.uk>
2025-09-18 00:03:45
std.debug: miscellaneous fixes
Mostly on macOS, since Loris showed me a not-great stack trace, and I spent 8 hours trying to make it better. The dyld shared cache is designed in a way which makes this really hard to do right, and documentation is non-existent, but this *seems* to work pretty well. I'll leave the ruling on whether I did a good job to CI and our users.
1 parent a18fd41
lib/std/c/darwin.zig
@@ -354,6 +354,14 @@ pub extern "c" fn _dyld_image_count() u32;
 pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header;
 pub extern "c" fn _dyld_get_image_vmaddr_slide(image_index: u32) usize;
 pub extern "c" fn _dyld_get_image_name(image_index: u32) [*:0]const u8;
+pub extern "c" fn dladdr(addr: *const anyopaque, info: *dl_info) c_int;
+
+pub const dl_info = extern struct {
+    fname: [*:0]const u8,
+    fbase: *anyopaque,
+    sname: ?[*:0]const u8,
+    saddr: ?*anyopaque,
+};
 
 pub const COPYFILE = packed struct(u32) {
     ACL: bool = false,
lib/std/debug/SelfInfo/DarwinModule.zig
@@ -1,6 +1,5 @@
 /// The runtime address where __TEXT is loaded.
 text_base: usize,
-load_offset: usize,
 name: []const u8,
 
 pub fn key(m: *const DarwinModule) usize {
@@ -12,38 +11,14 @@ pub const LookupCache = void;
 pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule {
     _ = cache;
     _ = gpa;
-    const image_count = std.c._dyld_image_count();
-    for (0..image_count) |image_idx| {
-        const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue;
-        const text_base = @intFromPtr(header);
-        if (address < text_base) continue;
-        const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx));
-
-        // Find the __TEXT segment
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = header.ncmds,
-            .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
-        };
-        const text_segment_cmd = while (it.next()) |load_cmd| {
-            if (load_cmd.cmd() != .SEGMENT_64) continue;
-            const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
-            if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
-            break segment_cmd;
-        } else continue;
-
-        const seg_start = load_offset + text_segment_cmd.vmaddr;
-        assert(seg_start == text_base);
-        const seg_end = seg_start + text_segment_cmd.vmsize;
-        if (address < seg_start or address >= seg_end) continue;
-
-        // We've found the matching __TEXT segment. This is the image we need.
-        return .{
-            .text_base = text_base,
-            .load_offset = load_offset,
-            .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))),
-        };
+    var info: std.c.dl_info = undefined;
+    switch (std.c.dladdr(@ptrFromInt(address), &info)) {
+        0 => return error.MissingDebugInfo,
+        else => return .{
+            .name = std.mem.span(info.fname),
+            .text_base = @intFromPtr(info.fbase),
+        },
     }
-    return error.MissingDebugInfo;
 }
 fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind {
     const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
@@ -52,56 +27,115 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind {
         .ncmds = header.ncmds,
         .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
     };
-    const sections = while (it.next()) |load_cmd| {
+    const sections, const text_vmaddr = while (it.next()) |load_cmd| {
         if (load_cmd.cmd() != .SEGMENT_64) continue;
         const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
         if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
-        break load_cmd.getSections();
+        break .{ load_cmd.getSections(), segment_cmd.vmaddr };
     } else unreachable;
 
+    const vmaddr_slide = module.text_base - text_vmaddr;
+
     var unwind_info: ?[]const u8 = null;
     var eh_frame: ?[]const u8 = null;
     for (sections) |sect| {
         if (mem.eql(u8, sect.sectName(), "__unwind_info")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
+            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr)));
             unwind_info = sect_ptr[0..@intCast(sect.size)];
         } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
+            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr)));
             eh_frame = sect_ptr[0..@intCast(sect.size)];
         }
     }
     return .{
+        .vmaddr_slide = vmaddr_slide,
         .unwind_info = unwind_info,
         .eh_frame = eh_frame,
     };
 }
 fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO {
-    const mapped_mem = try mapDebugInfoFile(module.name);
-    errdefer posix.munmap(mapped_mem);
+    const all_mapped_memory = try mapDebugInfoFile(module.name);
+    errdefer posix.munmap(all_mapped_memory);
+
+    // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
+    // binary": a simple file format which contains Mach-O binaries for multiple targets. For
+    // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
+    // for both ARM64 Macs and x86_64 Macs.
+    if (all_mapped_memory.len < 4) return error.InvalidDebugInfo;
+    const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*;
+    // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
+    const mapped_macho = switch (magic) {
+        macho.MH_MAGIC_64 => all_mapped_memory,
+
+        macho.FAT_CIGAM => mapped_macho: {
+            // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing
+            // is big-endian, so we'll be swapping some bytes.
+            if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo;
+            const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr);
+            const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header));
+            const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)];
+            const native_cpu_type = switch (builtin.cpu.arch) {
+                .x86_64 => macho.CPU_TYPE_X86_64,
+                .aarch64 => macho.CPU_TYPE_ARM64,
+                else => comptime unreachable,
+            };
+            for (archs) |*arch| {
+                if (@byteSwap(arch.cputype) != native_cpu_type) continue;
+                const offset = @byteSwap(arch.offset);
+                const size = @byteSwap(arch.size);
+                break :mapped_macho all_mapped_memory[offset..][0..size];
+            }
+            // Our native architecture was not present in the fat binary.
+            return error.MissingDebugInfo;
+        },
+
+        // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
+        // will be fairly easy to add support here if necessary; it's very similar to above.
+        macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
 
-    const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
+        else => return error.InvalidDebugInfo,
+    };
+
+    const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr));
     if (hdr.magic != macho.MH_MAGIC_64)
         return error.InvalidDebugInfo;
 
-    const symtab: macho.symtab_command = symtab: {
+    const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: {
         var it: macho.LoadCommandIterator = .{
             .ncmds = hdr.ncmds,
-            .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
+            .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
         };
+        var symtab: ?macho.symtab_command = null;
+        var text_vmaddr: ?u64 = null;
         while (it.next()) |cmd| switch (cmd.cmd()) {
-            .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
+            .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
+            .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
+                if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
+                text_vmaddr = seg_cmd.vmaddr;
+            },
             else => {},
         };
-        return error.MissingDebugInfo;
+        break :lc_iter .{
+            symtab orelse return error.MissingDebugInfo,
+            text_vmaddr orelse return error.MissingDebugInfo,
+        };
     };
 
-    const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]);
+    const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]);
     const syms = syms_ptr[0..symtab.nsyms];
-    const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0];
+    const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
 
     var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len);
     defer symbols.deinit(gpa);
 
+    // This map is temporary; it is used only to detect duplicates here. This is
+    // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
+    // but they might not be present, so we track normal symbols too.
+    // Indices match 1-1 with those of `symbols`.
+    var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
+    defer symbol_names.deinit(gpa);
+    try symbol_names.ensureUnusedCapacity(gpa, syms.len);
+
     var ofile: u32 = undefined;
     var last_sym: MachoSymbol = undefined;
     var state: enum {
@@ -115,7 +149,25 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO
     } = .init;
 
     for (syms) |*sym| {
-        if (sym.n_type.bits.is_stab == 0) continue;
+        if (sym.n_type.bits.is_stab == 0) {
+            if (sym.n_strx == 0) continue;
+            switch (sym.n_type.bits.type) {
+                .undf, .pbud, .indr, .abs, _ => continue,
+                .sect => {
+                    const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
+                    const gop = symbol_names.getOrPutAssumeCapacity(name);
+                    if (!gop.found_existing) {
+                        assert(gop.index == symbols.items.len);
+                        symbols.appendAssumeCapacity(.{
+                            .strx = sym.n_strx,
+                            .addr = sym.n_value,
+                            .ofile = MachoSymbol.unknown_ofile,
+                        });
+                    }
+                },
+            }
+            continue;
+        }
 
         // TODO handle globals N_GSYM, and statics N_STSYM
         switch (sym.n_type.stab) {
@@ -132,7 +184,6 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO
                     last_sym = .{
                         .strx = 0,
                         .addr = sym.n_value,
-                        .size = 0,
                         .ofile = ofile,
                     };
                 },
@@ -145,14 +196,22 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO
                 },
                 .fun_strx => {
                     state = .fun_size;
-                    last_sym.size = @intCast(sym.n_value);
                 },
                 else => return error.InvalidDebugInfo,
             },
             .ensym => switch (state) {
                 .fun_size => {
                     state = .ensym;
-                    symbols.appendAssumeCapacity(last_sym);
+                    if (last_sym.strx != 0) {
+                        const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
+                        const gop = symbol_names.getOrPutAssumeCapacity(name);
+                        if (!gop.found_existing) {
+                            assert(gop.index == symbols.items.len);
+                            symbols.appendAssumeCapacity(last_sym);
+                        } else {
+                            symbols.items[gop.index] = last_sym;
+                        }
+                    }
                 },
                 else => return error.InvalidDebugInfo,
             },
@@ -168,9 +227,12 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO
     }
 
     switch (state) {
-        .init => return error.MissingDebugInfo,
+        .init => {
+            // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
+            if (symbols.items.len == 0) return error.MissingDebugInfo;
+        },
         .oso_close => {},
-        else => return error.InvalidDebugInfo,
+        else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab
     }
 
     const symbols_slice = try symbols.toOwnedSlice(gpa);
@@ -182,10 +244,11 @@ fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO
     mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
 
     return .{
-        .mapped_memory = mapped_mem,
+        .mapped_memory = all_mapped_memory,
         .symbols = symbols_slice,
         .strings = strings,
         .ofiles = .empty,
+        .vaddr_offset = module.text_base - text_vmaddr,
     };
 }
 pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol {
@@ -195,7 +258,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
     };
     const loaded_macho = &di.loaded_macho.?;
 
-    const vaddr = address - module.load_offset;
+    const vaddr = address - loaded_macho.vaddr_offset;
     const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown;
 
     // offset of `address` from start of `symbol`
@@ -212,6 +275,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
         .source_location = null,
     };
 
+    if (symbol.ofile == MachoSymbol.unknown_ofile) {
+        // We don't have STAB info, so can't track down the object file; all we can do is the symbol name.
+        return sym_only_result;
+    }
+
     const o_file: *DebugInfo.OFile = of: {
         const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile);
         if (!gop.found_existing) {
@@ -233,7 +301,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
     const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result;
 
     return .{
-        .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol,
+        .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol,
         .compile_unit_name = compile_unit.die.getAttrString(
             &o_file.dwarf,
             native_endian,
@@ -256,7 +324,7 @@ pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext;
 /// Unwind a frame using MachO compact unwind info (from __unwind_info).
 /// If the compact encoding can't encode a way to unwind a frame, it will
 /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available.
-pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize {
+pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void {
     return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) {
         error.InvalidDebugInfo,
         error.MissingDebugInfo,
@@ -272,7 +340,7 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo,
         => return error.InvalidDebugInfo,
     };
 }
-fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize {
+fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void {
     if (di.unwind == null) di.unwind = module.loadUnwindInfo();
     const unwind = &di.unwind.?;
 
@@ -500,11 +568,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo,
             },
             .DWARF => {
                 const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo;
-                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset;
+                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide;
                 return context.unwindFrame(
                     gpa,
                     &.initSection(.eh_frame, eh_frame_vaddr, eh_frame),
-                    module.load_offset,
+                    unwind.vmaddr_slide,
                     @intCast(encoding.value.x86_64.dwarf),
                 );
             },
@@ -520,11 +588,11 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo,
             },
             .DWARF => {
                 const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo;
-                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset;
+                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide;
                 return context.unwindFrame(
                     gpa,
                     &.initSection(.eh_frame, eh_frame_vaddr, eh_frame),
-                    module.load_offset,
+                    unwind.vmaddr_slide,
                     @intCast(encoding.value.x86_64.dwarf),
                 );
             },
@@ -572,9 +640,7 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo,
         else => comptime unreachable, // unimplemented
     };
 
-    context.pc = UnwindContext.stripInstructionPtrAuthCode(new_ip);
-    if (context.pc > 0) context.pc -= 1;
-    return new_ip;
+    context.pc = std.debug.stripInstructionPtrAuthCode(new_ip) -| 1;
 }
 pub const DebugInfo = struct {
     unwind: ?Unwind,
@@ -590,6 +656,7 @@ pub const DebugInfo = struct {
             for (loaded_macho.ofiles.values()) |*ofile| {
                 ofile.dwarf.deinit(gpa);
                 ofile.symbols_by_name.deinit(gpa);
+                posix.munmap(ofile.mapped_memory);
             }
             loaded_macho.ofiles.deinit(gpa);
             gpa.free(loaded_macho.symbols);
@@ -598,6 +665,9 @@ pub const DebugInfo = struct {
     }
 
     const Unwind = struct {
+        /// The slide applied to the following sections. So, `unwind_info.ptr` is this many bytes
+        /// higher than the vmaddr of `__unwind_info`, and likewise for `__eh_frame`.
+        vmaddr_slide: u64,
         // Backed by the in-memory sections mapped by the loader
         unwind_info: ?[]const u8,
         eh_frame: ?[]const u8,
@@ -606,21 +676,31 @@ pub const DebugInfo = struct {
     const LoadedMachO = struct {
         mapped_memory: []align(std.heap.page_size_min) const u8,
         symbols: []const MachoSymbol,
-        strings: [:0]const u8,
+        strings: []const u8,
         /// Key is index into `strings` of the file path.
         ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile),
+        /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is
+        /// because the segments in the file on disk might differ from the ones in memory. Normally
+        /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
+        /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
+        /// the dyld cache (dyld actually restart itself from cache after loading it), and the two
+        /// versions have (very) different segment base addresses. It's sort of like a large slide
+        /// has been applied to all addresses in memory. For an optimal experience, we consider the
+        /// on-disk vmaddr instead of the in-memory one.
+        vaddr_offset: usize,
     };
 
     const OFile = struct {
+        mapped_memory: []align(std.heap.page_size_min) const u8,
         dwarf: Dwarf,
-        strtab: [:0]const u8,
+        strtab: []const u8,
         symtab: []align(1) const macho.nlist_64,
         /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
         /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
         symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
 
         const SymbolAdapter = struct {
-            strtab: [:0]const u8,
+            strtab: []const u8,
             symtab: []align(1) const macho.nlist_64,
             pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
                 _ = ctx;
@@ -663,7 +743,7 @@ pub const DebugInfo = struct {
 
         if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
         if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
-        const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1 :0];
+        const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
 
         const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
         if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
@@ -717,6 +797,7 @@ pub const DebugInfo = struct {
         try dwarf.open(gpa, native_endian);
 
         return .{
+            .mapped_memory = mapped_mem,
             .dwarf = dwarf,
             .strtab = strtab,
             .symtab = symtab,
@@ -728,8 +809,9 @@ pub const DebugInfo = struct {
 const MachoSymbol = struct {
     strx: u32,
     addr: u64,
-    size: u32,
+    /// Value may be `unknown_ofile`.
     ofile: u32,
+    const unknown_ofile = std.math.maxInt(u32);
     fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool {
         _ = context;
         return lhs.addr < rhs.addr;
@@ -754,9 +836,9 @@ const MachoSymbol = struct {
 
     test find {
         const symbols: []const MachoSymbol = &.{
-            .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined },
-            .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined },
-            .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined },
+            .{ .addr = 100, .strx = undefined, .ofile = undefined },
+            .{ .addr = 200, .strx = undefined, .ofile = undefined },
+            .{ .addr = 300, .strx = undefined, .ofile = undefined },
         };
 
         try testing.expectEqual(null, find(symbols, 0));
lib/std/debug/SelfInfo/ElfModule.zig
@@ -230,7 +230,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro
         else => unreachable,
     }
 }
-pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize {
+pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!void {
     if (di.unwind[0] == null) try module.loadUnwindInfo(gpa, di);
     std.debug.assert(di.unwind[0] != null);
     for (&di.unwind) |*opt_unwind| {
lib/std/debug/SelfInfo/WindowsModule.zig
@@ -332,6 +332,34 @@ pub const UnwindContext = struct {
                     .Wcr = @splat(0),
                     .Wvr = @splat(0),
                 },
+                .thumb => .{
+                    .ContextFlags = 0,
+                    .R0 = ctx.r[0],
+                    .R1 = ctx.r[1],
+                    .R2 = ctx.r[2],
+                    .R3 = ctx.r[3],
+                    .R4 = ctx.r[4],
+                    .R5 = ctx.r[5],
+                    .R6 = ctx.r[6],
+                    .R7 = ctx.r[7],
+                    .R8 = ctx.r[8],
+                    .R9 = ctx.r[9],
+                    .R10 = ctx.r[10],
+                    .R11 = ctx.r[11],
+                    .R12 = ctx.r[12],
+                    .Sp = ctx.r[13],
+                    .Lr = ctx.r[14],
+                    .Pc = ctx.r[15],
+                    .Cpsr = 0,
+                    .Fpcsr = 0,
+                    .Padding = 0,
+                    .DUMMYUNIONNAME = .{ .S = @splat(0) },
+                    .Bvr = @splat(0),
+                    .Bcr = @splat(0),
+                    .Wvr = @splat(0),
+                    .Wcr = @splat(0),
+                    .Padding2 = @splat(0),
+                },
                 else => comptime unreachable,
             },
             .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE),
@@ -345,7 +373,7 @@ pub const UnwindContext = struct {
         return ctx.cur.getRegs().bp;
     }
 };
-pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize {
+pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !void {
     _ = module;
     _ = gpa;
     _ = di;
@@ -374,10 +402,10 @@ pub fn unwindFrame(module: *const WindowsModule, gpa: Allocator, di: *DebugInfo,
     const next_regs = context.cur.getRegs();
     const tib = &windows.teb().NtTib;
     if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) {
-        return 0;
+        context.pc = 0;
+    } else {
+        context.pc = next_regs.ip -| 1;
     }
-    context.pc = next_regs.ip -| 1;
-    return next_regs.ip;
 }
 
 const WindowsModule = @This();
lib/std/debug/cpu_context.zig
@@ -214,6 +214,12 @@ pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native {
             .sp = ctx.Sp,
             .pc = ctx.Pc,
         },
+        .thumb => .{ .r = .{
+            ctx.R0,  ctx.R1, ctx.R2,  ctx.R3,
+            ctx.R4,  ctx.R5, ctx.R6,  ctx.R7,
+            ctx.R8,  ctx.R9, ctx.R10, ctx.R11,
+            ctx.R12, ctx.Sp, ctx.Lr,  ctx.Pc,
+        } },
         else => comptime unreachable,
     };
 }
lib/std/debug/SelfInfo.zig
@@ -2,7 +2,6 @@
 //! goal of minimal code bloat and compilation speed penalty.
 
 const builtin = @import("builtin");
-const native_os = builtin.os.tag;
 const native_endian = native_arch.endian();
 const native_arch = builtin.cpu.arch;
 
@@ -13,6 +12,8 @@ const assert = std.debug.assert;
 const Dwarf = std.debug.Dwarf;
 const CpuContext = std.debug.cpu_context.Native;
 
+const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode;
+
 const root = @import("root");
 
 const SelfInfo = @This();
@@ -52,7 +53,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void {
     if (Module.LookupCache != void) self.lookup_cache.deinit(gpa);
 }
 
-pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize {
+pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!void {
     comptime assert(supports_unwinding);
     const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc);
     const gop = try self.modules.getOrPut(gpa, module.key());
@@ -115,7 +116,7 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize)
 /// pub const supports_unwinding: bool;
 /// /// Only required if `supports_unwinding == true`.
 /// pub const UnwindContext = struct {
-///     /// A PC value inside the function of the last unwound frame.
+///     /// A PC value representing the location in the last frame.
 ///     pc: usize,
 ///     pub fn init(ctx: *std.debug.cpu_context.Native, gpa: Allocator) Allocator.Error!UnwindContext;
 ///     pub fn deinit(uc: *UnwindContext, gpa: Allocator) void;
@@ -123,21 +124,22 @@ pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize)
 ///     /// pointer is unknown, 0 may be returned instead.
 ///     pub fn getFp(uc: *UnwindContext) usize;
 /// };
-/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame and returns
-/// /// the next return address (which may be 0 indicating end of stack).
+/// /// Only required if `supports_unwinding == true`. Unwinds a single stack frame.
+/// /// The caller will read the new instruction poiter from the `pc` field.
+/// /// `pc = 0` indicates end of stack / no more frames.
 /// pub fn unwindFrame(
 ///     mod: *const Module,
 ///     gpa: Allocator,
 ///     di: *DebugInfo,
 ///     ctx: *UnwindContext,
-/// ) SelfInfo.Error!usize;
+/// ) SelfInfo.Error!void;
 /// ```
 const Module: type = Module: {
     // Allow overriding the target-specific `SelfInfo` implementation by exposing `root.debug.Module`.
     if (@hasDecl(root, "debug") and @hasDecl(root.debug, "Module")) {
         break :Module root.debug.Module;
     }
-    break :Module switch (native_os) {
+    break :Module switch (builtin.os.tag) {
         .linux,
         .netbsd,
         .freebsd,
@@ -222,7 +224,7 @@ pub const DwarfUnwindContext = struct {
                 const register = col.register orelse return error.InvalidRegister;
                 // The default type is usually undefined, but can be overriden by ABI authors.
                 // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`.
-                if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) {
+                if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) {
                     // Callee-saved registers are initialized as if they had the .same_value rule
                     const src = try context.cpu_context.dwarfRegisterBytes(register);
                     if (src.len != out.len) return error.RegisterSizeMismatch;
@@ -310,7 +312,7 @@ pub const DwarfUnwindContext = struct {
         unwind: *const Dwarf.Unwind,
         load_offset: usize,
         explicit_fde_offset: ?usize,
-    ) Error!usize {
+    ) Error!void {
         return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) {
             error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e,
 
@@ -358,9 +360,10 @@ pub const DwarfUnwindContext = struct {
         unwind: *const Dwarf.Unwind,
         load_offset: usize,
         explicit_fde_offset: ?usize,
-    ) !usize {
-        if (!supports_unwinding) return error.UnsupportedCpuArchitecture;
-        if (context.pc == 0) return 0;
+    ) !void {
+        comptime assert(supports_unwinding);
+
+        if (context.pc == 0) return;
 
         const pc_vaddr = context.pc - load_offset;
 
@@ -430,12 +433,12 @@ pub const DwarfUnwindContext = struct {
             }
         }
 
-        const return_address: u64 = if (has_return_address) pc: {
+        const return_address: usize = if (has_return_address) pc: {
             const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register);
             break :pc stripInstructionPtrAuthCode(raw_ptr.*);
         } else 0;
 
-        (try regNative(new_cpu_context, ip_reg_num)).* = return_address;
+        (try regNative(&new_cpu_context, ip_reg_num)).* = return_address;
 
         // The new CPU context is complete; flush changes.
         context.cpu_context = new_cpu_context;
@@ -444,11 +447,9 @@ pub const DwarfUnwindContext = struct {
         // *after* the call, it could (in the case of noreturn functions) actually point outside of
         // the caller's address range, meaning an FDE lookup would fail. We can handle this by
         // subtracting 1 from `return_address` so that the next lookup is guaranteed to land inside
-        // the `call` instruction`. The exception to this rule is signal frames, where the return
+        // the `call` instruction. The exception to this rule is signal frames, where the return
         // address is the same instruction that triggered the handler.
         context.pc = if (cie.is_signal_frame) return_address else return_address -| 1;
-
-        return return_address;
     }
     /// Since register rules are applied (usually) during a panic,
     /// checked addition / subtraction is used so that we can return
@@ -459,25 +460,6 @@ pub const DwarfUnwindContext = struct {
         else
             try std.math.sub(usize, base, @as(usize, @intCast(-offset)));
     }
-    /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature.
-    /// This function clears these signature bits to make the pointer usable.
-    pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize {
-        if (native_arch.isAARCH64()) {
-            // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it)
-            // The save / restore is because `xpaclri` operates on x30 (LR)
-            return asm (
-                \\mov x16, x30
-                \\mov x30, x15
-                \\hint 0x07
-                \\mov x15, x30
-                \\mov x30, x16
-                : [ret] "={x15}" (-> usize),
-                : [ptr] "{x15}" (ptr),
-                : .{ .x16 = true });
-        }
-
-        return ptr;
-    }
 
     pub fn regNative(ctx: *CpuContext, num: u16) error{
         InvalidRegister,
lib/std/c.zig
@@ -10994,6 +10994,9 @@ pub extern "c" fn dlclose(handle: *anyopaque) c_int;
 pub extern "c" fn dlsym(handle: ?*anyopaque, symbol: [*:0]const u8) ?*anyopaque;
 pub extern "c" fn dlerror() ?[*:0]u8;
 
+pub const dladdr = if (native_os.isDarwin()) darwin.dladdr else {};
+pub const dl_info = if (native_os.isDarwin()) darwin.dl_info else {};
+
 pub extern "c" fn sync() void;
 pub extern "c" fn syncfs(fd: c_int) c_int;
 pub extern "c" fn fsync(fd: c_int) c_int;
lib/std/debug.zig
@@ -585,12 +585,14 @@ pub fn captureCurrentStackTrace(options: StackUnwindOptions, addr_buf: []usize)
     while (true) switch (it.next()) {
         .switch_to_fp => if (!it.stratOk(options.allow_unsafe_unwind)) break,
         .end => break,
-        .frame => |return_address| {
+        .frame => |pc_addr| {
             if (wait_for) |target| {
-                if (return_address != target) continue;
+                // Possible off-by-one error: `pc_addr` might be one less than the return address (so
+                // that it falls *inside* the function call), while `target` *is* a return address.
+                if (pc_addr != target and pc_addr + 1 != target) continue;
                 wait_for = null;
             }
-            if (frame_idx < addr_buf.len) addr_buf[frame_idx] = return_address;
+            if (frame_idx < addr_buf.len) addr_buf[frame_idx] = pc_addr;
             frame_idx += 1;
         },
     };
@@ -631,6 +633,7 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_
     var printed_any_frame = false;
     while (true) switch (it.next()) {
         .switch_to_fp => |unwind_error| {
+            if (StackIterator.fp_unwind_is_safe) continue; // no need to even warn
             const module_name = di.getModuleNameForAddress(di_gpa, unwind_error.address) catch "???";
             const caption: []const u8 = switch (unwind_error.err) {
                 error.MissingDebugInfo => "unwind info unavailable",
@@ -658,12 +661,14 @@ pub fn writeCurrentStackTrace(options: StackUnwindOptions, writer: *Writer, tty_
             }
         },
         .end => break,
-        .frame => |return_address| {
+        .frame => |pc_addr| {
             if (wait_for) |target| {
-                if (return_address != target) continue;
+                // Possible off-by-one error: `pc_addr` might be one less than the return address (so
+                // that it falls *inside* the function call), while `target` *is* a return address.
+                if (pc_addr != target and pc_addr + 1 != target) continue;
                 wait_for = null;
             }
-            try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config);
+            try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config);
             printed_any_frame = true;
         },
     };
@@ -703,8 +708,8 @@ pub fn writeStackTrace(st: *const std.builtin.StackTrace, writer: *Writer, tty_c
         },
     };
     const captured_frames = @min(n_frames, st.instruction_addresses.len);
-    for (st.instruction_addresses[0..captured_frames]) |return_address| {
-        try printSourceAtAddress(di_gpa, di, writer, return_address -| 1, tty_config);
+    for (st.instruction_addresses[0..captured_frames]) |pc_addr| {
+        try printSourceAtAddress(di_gpa, di, writer, pc_addr, tty_config);
     }
     if (n_frames > captured_frames) {
         tty_config.setColor(writer, .bold) catch {};
@@ -725,6 +730,8 @@ pub fn dumpStackTrace(st: *const std.builtin.StackTrace) void {
 const StackIterator = union(enum) {
     /// Unwinding using debug info (e.g. DWARF CFI).
     di: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn,
+    /// We will first report the *current* PC of this `UnwindContext`, then we will switch to `di`.
+    di_first: if (SelfInfo.supports_unwinding) SelfInfo.UnwindContext else noreturn,
     /// Naive frame-pointer-based unwinding. Very simple, but typically unreliable.
     fp: usize,
 
@@ -742,9 +749,12 @@ const StackIterator = union(enum) {
         }
         if (opt_context_ptr) |context_ptr| {
             if (!SelfInfo.supports_unwinding) return error.CannotUnwindFromContext;
-            return .{ .di = .init(context_ptr) };
+            // Use `di_first` here so we report the PC in the context before unwinding any further.
+            return .{ .di_first = .init(context_ptr) };
         }
         if (SelfInfo.supports_unwinding and cpu_context.Native != noreturn) {
+            // We don't need `di_first` here, because our PC is in `std.debug`; we're only interested
+            // in our caller's frame and above.
             return .{ .di = .init(&.current()) };
         }
         return .{ .fp = @frameAddress() };
@@ -752,7 +762,7 @@ const StackIterator = union(enum) {
     fn deinit(si: *StackIterator) void {
         switch (si.*) {
             .fp => {},
-            .di => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()),
+            .di, .di_first => |*unwind_context| unwind_context.deinit(getDebugInfoAllocator()),
         }
     }
 
@@ -763,7 +773,7 @@ const StackIterator = union(enum) {
     /// Whether the current unwind strategy is allowed given `allow_unsafe`.
     fn stratOk(it: *const StackIterator, allow_unsafe: bool) bool {
         return switch (it.*) {
-            .di => true,
+            .di, .di_first => true,
             // If we omitted frame pointers from *this* compilation, FP unwinding would crash
             // immediately regardless of anything. But FPs could also be omitted from a different
             // linked object, so it's not guaranteed to be safe, unless the target specifically
@@ -773,11 +783,11 @@ const StackIterator = union(enum) {
     }
 
     const Result = union(enum) {
-        /// A stack frame has been found; this is the corresponding return address.
+        /// A stack frame has been found; this is the corresponding program counter address.
         frame: usize,
         /// The end of the stack has been reached.
         end,
-        /// We were using the `.di` strategy, but are now switching to `.fp` due to this error.
+        /// We were using `SelfInfo.UnwindInfo`, but are now switching to FP unwinding due to this error.
         switch_to_fp: struct {
             address: usize,
             err: SelfInfo.Error,
@@ -785,20 +795,25 @@ const StackIterator = union(enum) {
     };
     fn next(it: *StackIterator) Result {
         switch (it.*) {
+            .di_first => |unwind_context| {
+                const first_pc = unwind_context.pc;
+                if (first_pc == 0) return .end;
+                it.* = .{ .di = unwind_context };
+                return .{ .frame = first_pc };
+            },
             .di => |*unwind_context| {
                 const di = getSelfDebugInfo() catch unreachable;
                 const di_gpa = getDebugInfoAllocator();
-                if (di.unwindFrame(di_gpa, unwind_context)) |ra| {
-                    if (ra <= 1) return .end;
-                    return .{ .frame = ra };
-                } else |err| {
+                di.unwindFrame(di_gpa, unwind_context) catch |err| {
                     const pc = unwind_context.pc;
                     it.* = .{ .fp = unwind_context.getFp() };
                     return .{ .switch_to_fp = .{
                         .address = pc,
                         .err = err,
                     } };
-                }
+                };
+                const pc = unwind_context.pc;
+                return if (pc == 0) .end else .{ .frame = pc };
             },
             .fp => |fp| {
                 if (fp == 0) return .end; // we reached the "sentinel" base pointer
@@ -824,9 +839,9 @@ const StackIterator = union(enum) {
                 if (bp != 0 and bp <= fp) return .end;
 
                 it.fp = bp;
-                const ra = ra_ptr.*;
+                const ra = stripInstructionPtrAuthCode(ra_ptr.*);
                 if (ra <= 1) return .end;
-                return .{ .frame = ra };
+                return .{ .frame = ra - 1 };
             },
         }
     }
@@ -860,6 +875,26 @@ const StackIterator = union(enum) {
     }
 };
 
+/// Some platforms use pointer authentication: the upper bits of instruction pointers contain a
+/// signature. This function clears those signature bits to make the pointer directly usable.
+pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize {
+    if (native_arch.isAARCH64()) {
+        // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it)
+        // The save / restore is because `xpaclri` operates on x30 (LR)
+        return asm (
+            \\mov x16, x30
+            \\mov x30, x15
+            \\hint 0x07
+            \\mov x15, x30
+            \\mov x30, x16
+            : [ret] "={x15}" (-> usize),
+            : [ptr] "{x15}" (ptr),
+            : .{ .x16 = true });
+    }
+
+    return ptr;
+}
+
 fn printSourceAtAddress(gpa: Allocator, debug_info: *SelfInfo, writer: *Writer, address: usize, tty_config: tty.Config) Writer.Error!void {
     const symbol: Symbol = debug_info.getSymbolAtAddress(gpa, address) catch |err| switch (err) {
         error.MissingDebugInfo,