Commit 4b47a37717

mlugg <mlugg@mlugg.co.uk>
2025-09-03 00:17:09
stash? more like no
1 parent 665f13b
Changed files (4)
lib/std/debug/Dwarf/Unwind.zig
@@ -553,8 +553,8 @@ fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian:
     };
 }
 /// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`.
-fn readEhPointer(fbr: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 {
-    const offset = try readEhPointerAbs(fbr, enc.type, addr_size_bytes, endian);
+fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 {
+    const offset = try readEhPointerAbs(r, enc.type, addr_size_bytes, endian);
     const base = switch (enc.rel) {
         .abs, .aligned => 0,
         .pcrel => ctx.pc_rel_base,
lib/std/debug/SelfInfo/DarwinModule.zig
@@ -43,7 +43,37 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !DarwinModule
     }
     return error.MissingDebugInfo;
 }
-fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void {
+fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind {
+    const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
+
+    var it: macho.LoadCommandIterator = .{
+        .ncmds = header.ncmds,
+        .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
+    };
+    const sections = while (it.next()) |load_cmd| {
+        if (load_cmd.cmd() != .SEGMENT_64) continue;
+        const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
+        if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
+        break load_cmd.getSections();
+    } else unreachable;
+
+    var unwind_info: ?[]const u8 = null;
+    var eh_frame: ?[]const u8 = null;
+    for (sections) |sect| {
+        if (mem.eql(u8, sect.sectName(), "__unwind_info")) {
+            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
+            unwind_info = sect_ptr[0..@intCast(sect.size)];
+        } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
+            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
+            eh_frame = sect_ptr[0..@intCast(sect.size)];
+        }
+    }
+    return .{
+        .unwind_info = unwind_info,
+        .eh_frame = eh_frame,
+    };
+}
+fn loadFullInfo(module: *const DarwinModule, gpa: Allocator) !DebugInfo.Full {
     const mapped_mem = try mapDebugInfoFile(module.name);
     errdefer posix.munmap(mapped_mem);
 
@@ -149,49 +179,19 @@ fn loadLocationInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo)
     // This sort is so that we can binary search later.
     mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
 
-    di.full = .{
+    return .{
         .mapped_memory = mapped_mem,
         .symbols = symbols_slice,
         .strings = strings,
         .ofiles = .empty,
     };
 }
-fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo) !void {
-    _ = gpa;
-
-    const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
-
-    var it: macho.LoadCommandIterator = .{
-        .ncmds = header.ncmds,
-        .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
-    };
-    const sections = while (it.next()) |load_cmd| {
-        if (load_cmd.cmd() != .SEGMENT_64) continue;
-        const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
-        if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
-        break load_cmd.getSections();
-    } else unreachable;
-
-    var unwind_info: ?[]const u8 = null;
-    var eh_frame: ?[]const u8 = null;
-    for (sections) |sect| {
-        if (mem.eql(u8, sect.sectName(), "__unwind_info")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
-            unwind_info = sect_ptr[0..@intCast(sect.size)];
-        } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(module.load_offset + sect.addr)));
-            eh_frame = sect_ptr[0..@intCast(sect.size)];
-        }
-    }
-    di.unwind = .{
-        .unwind_info = unwind_info,
-        .eh_frame = eh_frame,
-    };
-}
 pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol {
-    if (di.full == null) try module.loadLocationInfo(gpa, di);
+    if (di.full == null) di.full = try module.loadFullInfo(gpa);
+    const full = &di.full.?;
+
     const vaddr = address - module.load_offset;
-    const symbol = MachoSymbol.find(di.full.?.symbols, vaddr) orelse return .{
+    const symbol = MachoSymbol.find(full.symbols, vaddr) orelse return .{
         .name = null,
         .compile_unit_name = null,
         .source_location = null,
@@ -202,8 +202,7 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
 
     // Take the symbol name from the N_FUN STAB entry, we're going to
     // use it if we fail to find the DWARF infos
-    const stab_symbol = mem.sliceTo(di.full.?.strings[symbol.strx..], 0);
-    const o_file_path = mem.sliceTo(di.full.?.strings[symbol.ofile..], 0);
+    const stab_symbol = mem.sliceTo(full.strings[symbol.strx..], 0);
 
     // If any information is missing, we can at least return this from now on.
     const sym_only_result: std.debug.Symbol = .{
@@ -213,10 +212,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
     };
 
     const o_file: *DebugInfo.OFile = of: {
-        const gop = try di.full.?.ofiles.getOrPut(gpa, o_file_path);
+        const gop = try full.ofiles.getOrPut(gpa, symbol.ofile);
         if (!gop.found_existing) {
+            const o_file_path = mem.sliceTo(full.strings[symbol.ofile..], 0);
             gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| {
-                defer _ = di.full.?.ofiles.pop().?;
+                defer _ = full.ofiles.pop().?;
                 switch (err) {
                     error.MissingDebugInfo,
                     error.InvalidDebugInfo,
@@ -228,7 +228,11 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
         break :of gop.value_ptr;
     };
 
-    const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return sym_only_result;
+    const symbol_index = o_file.symbols_by_name.getKeyAdapted(
+        @as([]const u8, stab_symbol),
+        @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }),
+    ) orelse return sym_only_result;
+    const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value;
 
     const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) {
         error.MissingDebugInfo, error.InvalidDebugInfo => return sym_only_result,
@@ -257,28 +261,15 @@ pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *Debu
         },
     };
 }
-pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize {
-    if (di.unwind == null) try module.loadUnwindInfo(gpa, di);
-    const unwind_info = di.unwind.?.unwind_info orelse return error.MissingUnwindInfo;
-    // MLUGG TODO: inline?
-    return unwindFrameMachO(
-        module.text_base,
-        module.load_offset,
-        context,
-        unwind_info,
-        di.unwind.?.eh_frame,
-    );
-}
 /// Unwind a frame using MachO compact unwind info (from __unwind_info).
 /// If the compact encoding can't encode a way to unwind a frame, it will
 /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available.
-fn unwindFrameMachO(
-    text_base: usize,
-    load_offset: usize,
-    context: *UnwindContext,
-    unwind_info: []const u8,
-    opt_eh_frame: ?[]const u8,
-) !usize {
+pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize {
+    _ = gpa;
+    if (di.unwind == null) di.unwind = module.loadUnwindInfo();
+    const unwind = &di.unwind.?;
+
+    const unwind_info = unwind.unwind_info orelse return error.MissingUnwindInfo;
     if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo;
     const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info);
 
@@ -288,7 +279,7 @@ fn unwindFrameMachO(
     if (indices.len == 0) return error.MissingUnwindInfo;
 
     // offset of the PC into the `__TEXT` segment
-    const pc_text_offset = context.pc - text_base;
+    const pc_text_offset = context.pc - module.text_base;
 
     const start_offset: u32, const first_level_offset: u32 = index: {
         var left: usize = 0;
@@ -443,7 +434,7 @@ fn unwindFrameMachO(
                     }
                     // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function.
                     const sub_offset_addr =
-                        text_base +
+                        module.text_base +
                         entry.function_offset +
                         frameless.stack.indirect.sub_offset;
                     // `sub_offset_addr` points to the offset of the literal within the instruction
@@ -502,11 +493,11 @@ fn unwindFrameMachO(
                 break :ip new_ip;
             },
             .DWARF => {
-                const eh_frame = opt_eh_frame orelse return error.MissingEhFrame;
-                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset;
+                const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame;
+                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset;
                 return context.unwindFrameDwarf(
                     &.initSection(.eh_frame, eh_frame_vaddr, eh_frame),
-                    load_offset,
+                    module.load_offset,
                     @intCast(encoding.value.x86_64.dwarf),
                 );
             },
@@ -521,11 +512,11 @@ fn unwindFrameMachO(
                 break :ip new_ip;
             },
             .DWARF => {
-                const eh_frame = opt_eh_frame orelse return error.MissingEhFrame;
-                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - load_offset;
+                const eh_frame = unwind.eh_frame orelse return error.MissingEhFrame;
+                const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - module.load_offset;
                 return context.unwindFrameDwarf(
                     &.initSection(.eh_frame, eh_frame_vaddr, eh_frame),
-                    load_offset,
+                    module.load_offset,
                     @intCast(encoding.value.x86_64.dwarf),
                 );
             },
@@ -580,19 +571,9 @@ fn unwindFrameMachO(
 /// No cache needed, because `_dyld_get_image_header` etc are already fast.
 pub const LookupCache = void;
 pub const DebugInfo = struct {
-    unwind: ?struct {
-        // Backed by the in-memory sections mapped by the loader
-        unwind_info: ?[]const u8,
-        eh_frame: ?[]const u8,
-    },
+    unwind: ?Unwind,
     // MLUGG TODO: awful field name
-    full: ?struct {
-        mapped_memory: []align(std.heap.page_size_min) const u8,
-        symbols: []const MachoSymbol,
-        strings: [:0]const u8,
-        // MLUGG TODO: this could use an adapter to just index straight into `strings`!
-        ofiles: std.StringArrayHashMapUnmanaged(OFile),
-    },
+    full: ?Full,
 
     pub const init: DebugInfo = .{
         .unwind = null,
@@ -603,7 +584,7 @@ pub const DebugInfo = struct {
         if (di.full) |*full| {
             for (full.ofiles.values()) |*ofile| {
                 ofile.dwarf.deinit(gpa);
-                ofile.addr_table.deinit(gpa);
+                ofile.symbols_by_name.deinit(gpa);
             }
             full.ofiles.deinit(gpa);
             gpa.free(full.symbols);
@@ -611,10 +592,42 @@ pub const DebugInfo = struct {
         }
     }
 
+    const Unwind = struct {
+        // Backed by the in-memory sections mapped by the loader
+        unwind_info: ?[]const u8,
+        eh_frame: ?[]const u8,
+    };
+
+    const Full = struct {
+        mapped_memory: []align(std.heap.page_size_min) const u8,
+        symbols: []const MachoSymbol,
+        strings: [:0]const u8,
+        /// Key is index into `strings` of the file path.
+        ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile),
+    };
+
     const OFile = struct {
         dwarf: Dwarf,
-        // MLUGG TODO: this could use an adapter to just index straight into the strtab!
-        addr_table: std.StringArrayHashMapUnmanaged(u64),
+        strtab: [:0]const u8,
+        symtab: []align(1) const macho.nlist_64,
+        /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
+        /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
+        symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
+
+        const SymbolAdapter = struct {
+            strtab: [:0]const u8,
+            symtab: []align(1) const macho.nlist_64,
+            pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
+                _ = ctx;
+                return @truncate(std.hash.Wyhash.hash(0, sym_name));
+            }
+            pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
+                _ = b_index;
+                const b_sym = ctx.symtab[b_sym_index];
+                const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
+                return mem.eql(u8, a_sym_name, b_sym_name);
+            }
+        };
     };
 
     fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
@@ -645,17 +658,17 @@ pub const DebugInfo = struct {
 
         if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
         if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
-        const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
+        const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1 :0];
 
         const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
         if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
         const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
 
         // TODO handle tentative (common) symbols
-        var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty;
-        defer addr_table.deinit(gpa);
-        try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len));
-        for (symtab) |sym| {
+        var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
+        defer symbols_by_name.deinit(gpa);
+        try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len));
+        for (symtab, 0..) |sym, sym_index| {
             if (sym.n_strx == 0) continue;
             switch (sym.n_type.bits.type) {
                 .undf => continue, // includes tentative symbols
@@ -663,9 +676,12 @@ pub const DebugInfo = struct {
                 else => {},
             }
             const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
-            const gop = addr_table.getOrPutAssumeCapacity(sym_name);
+            const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
+                @as([]const u8, sym_name),
+                @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }),
+            );
             if (gop.found_existing) return error.InvalidDebugInfo;
-            gop.value_ptr.* = sym.n_value;
+            gop.key_ptr.* = @intCast(sym_index);
         }
 
         var sections: Dwarf.SectionArray = @splat(null);
@@ -697,7 +713,9 @@ pub const DebugInfo = struct {
 
         return .{
             .dwarf = dwarf,
-            .addr_table = addr_table.move(),
+            .strtab = strtab,
+            .symtab = symtab,
+            .symbols_by_name = symbols_by_name.move(),
         };
     }
 };
lib/std/debug/SelfInfo/ElfModule.zig
@@ -92,7 +92,7 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !ElfModule {
     };
     return error.MissingDebugInfo;
 }
-fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void {
+fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !void {
     if (module.name.len > 0) {
         di.loaded_elf = Dwarf.ElfModule.load(gpa, .{
             .root_dir = .cwd(),
@@ -116,7 +116,7 @@ fn loadLocationInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) !v
     }
 }
 pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol {
-    if (di.loaded_elf == null) try module.loadLocationInfo(gpa, di);
+    if (di.loaded_elf == null) try module.loadDwarf(gpa, di);
     const vaddr = address - module.load_offset;
     return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr);
 }
lib/std/debug/SelfInfo.zig
@@ -76,7 +76,7 @@ pub fn deinit(self: *SelfInfo, gpa: Allocator) void {
     self.modules.deinit(gpa);
     if (Module.LookupCache != void) self.lookup_cache.deinit(gpa);
 }
-comptime {
+test {
     // `std.debug` does not currently utilize `deinit`, as it keeps hold of debug info for the
     // whole lifetime of the program. Let's try to avoid it bitrotting.
     _ = &deinit;
@@ -85,7 +85,7 @@ comptime {
 pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize {
     comptime assert(supports_unwinding);
     const module: Module = try .lookup(&self.lookup_cache, gpa, context.pc);
-    const gop = try self.modules.getOrPut(gpa, module.load_offset);
+    const gop = try self.modules.getOrPut(gpa, module.key());
     self.modules.lockPointers();
     defer self.modules.unlockPointers();
     if (!gop.found_existing) gop.value_ptr.* = .init;
@@ -128,9 +128,7 @@ const Module = switch (native_os) {
     .macos, .ios, .watchos, .tvos, .visionos => @import("SelfInfo/DarwinModule.zig"),
     .uefi, .windows => @import("SelfInfo/WindowsModule.zig"),
     .wasi, .emscripten => struct {
-        const LookupCache = struct {
-            const init: LookupCache = .{};
-        };
+        const LookupCache = void;
         fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) !Module {
             _ = cache;
             _ = gpa;