Commit 0caca625eb

Matthew Lugg <mlugg@mlugg.co.uk>
2025-11-19 13:01:49
std.debug: split up Mach-O debug info handling
Like ELF, we now have `std.debug.MachOFile` for the host-independent parts, and `std.debug.SelfInfo.MachO` for logic requiring the file to correspond to the running program.
1 parent 7b325e0
Changed files (8)
lib/std/Build/Step/CheckObject.zig
@@ -729,10 +729,10 @@ const MachODumper = struct {
         imports: std.ArrayListUnmanaged([]const u8) = .empty,
 
         fn parse(ctx: *ObjectContext) !void {
-            var it = ctx.getLoadCommandIterator();
+            var it = try ctx.getLoadCommandIterator();
             var i: usize = 0;
-            while (it.next()) |cmd| {
-                switch (cmd.cmd()) {
+            while (try it.next()) |cmd| {
+                switch (cmd.hdr.cmd) {
                     .SEGMENT_64 => {
                         const seg = cmd.cast(macho.segment_command_64).?;
                         try ctx.segments.append(ctx.gpa, seg);
@@ -771,14 +771,13 @@ const MachODumper = struct {
             return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0);
         }
 
-        fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator {
-            const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds];
-            return .{ .ncmds = ctx.header.ncmds, .buffer = data };
+        fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator {
+            return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]);
         }
 
-        fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand {
-            var it = ctx.getLoadCommandIterator();
-            while (it.next()) |lc| if (lc.cmd() == cmd) {
+        fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand {
+            var it = try ctx.getLoadCommandIterator();
+            while (try it.next()) |lc| if (lc.hdr.cmd == cmd) {
                 return lc;
             };
             return null;
@@ -872,9 +871,9 @@ const MachODumper = struct {
                 \\LC {d}
                 \\cmd {s}
                 \\cmdsize {d}
-            , .{ index, @tagName(lc.cmd()), lc.cmdsize() });
+            , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize });
 
-            switch (lc.cmd()) {
+            switch (lc.hdr.cmd) {
                 .SEGMENT_64 => {
                     const seg = lc.cast(macho.segment_command_64).?;
                     try writer.writeByte('\n');
@@ -1592,9 +1591,9 @@ const MachODumper = struct {
             .headers => {
                 try ObjectContext.dumpHeader(ctx.header, writer);
 
-                var it = ctx.getLoadCommandIterator();
+                var it = try ctx.getLoadCommandIterator();
                 var i: usize = 0;
-                while (it.next()) |cmd| {
+                while (try it.next()) |cmd| {
                     try ObjectContext.dumpLoadCommand(cmd, i, writer);
                     try writer.writeByte('\n');
 
@@ -1615,7 +1614,7 @@ const MachODumper = struct {
             .dyld_weak_bind,
             .dyld_lazy_bind,
             => {
-                const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse
+                const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse
                     return step.fail("no dyld info found", .{});
                 const lc = cmd.cast(macho.dyld_info_command).?;
 
@@ -1649,7 +1648,7 @@ const MachODumper = struct {
             },
 
             .exports => blk: {
-                if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| {
+                if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| {
                     const lc = cmd.cast(macho.dyld_info_command).?;
                     if (lc.export_size > 0) {
                         const data = ctx.data[lc.export_off..][0..lc.export_size];
lib/std/debug/SelfInfo/MachO.zig
@@ -1,12 +1,10 @@
 mutex: std.Thread.Mutex,
 /// Accessed through `Module.Adapter`.
 modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false),
-ofiles: std.StringArrayHashMapUnmanaged(?OFile),
 
 pub const init: SelfInfo = .{
     .mutex = .{},
     .modules = .empty,
-    .ofiles = .empty,
 };
 pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
     for (si.modules.keys()) |*module| {
@@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
             const u = &(module.unwind orelse break :unwind catch break :unwind);
             if (u.dwarf) |*dwarf| dwarf.deinit(gpa);
         }
-        loaded: {
-            const l = &(module.loaded_macho orelse break :loaded catch break :loaded);
-            gpa.free(l.symbols);
-            posix.munmap(l.mapped_memory);
+        file: {
+            const f = &(module.file orelse break :file catch break :file);
+            f.deinit(gpa);
         }
     }
-    for (si.ofiles.values()) |*opt_ofile| {
-        const ofile = &(opt_ofile.* orelse continue);
-        ofile.dwarf.deinit(gpa);
-        ofile.symbols_by_name.deinit(gpa);
-        posix.munmap(ofile.mapped_memory);
-    }
     si.modules.deinit(gpa);
-    si.ofiles.deinit(gpa);
 }
 
 pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
@@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st
     const module = try si.findModule(gpa, address);
     defer si.mutex.unlock();
 
-    const loaded_macho = try module.getLoadedMachO(gpa);
-
-    const vaddr = address - loaded_macho.vaddr_offset;
-    const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown;
+    const file = try module.getFile(gpa);
 
-    // offset of `address` from start of `symbol`
-    const address_symbol_offset = vaddr - symbol.addr;
+    // This is not necessarily the same as the vmaddr_slide that dyld would report. This is
+    // because the segments in the file on disk might differ from the ones in memory. Normally
+    // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
+    // it exists on disk (necessarily, because the kernel needs to load it!), but is also in
+    // the dyld cache (dyld actually restart itself from cache after loading it), and the two
+    // versions have (very) different segment base addresses. It's sort of like a large slide
+    // has been applied to all addresses in memory. For an optimal experience, we consider the
+    // on-disk vmaddr instead of the in-memory one.
+    const vaddr_offset = module.text_base - file.text_vmaddr;
 
-    // Take the symbol name from the N_FUN STAB entry, we're going to
-    // use it if we fail to find the DWARF infos
-    const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0);
+    const vaddr = address - vaddr_offset;
 
-    // If any information is missing, we can at least return this from now on.
-    const sym_only_result: std.debug.Symbol = .{
-        .name = stab_symbol,
-        .compile_unit_name = null,
-        .source_location = null,
+    const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch {
+        // Return at least the symbol name if available.
+        return .{
+            .name = try file.lookupSymbolName(vaddr),
+            .compile_unit_name = null,
+            .source_location = null,
+        };
     };
 
-    if (symbol.ofile == MachoSymbol.unknown_ofile) {
-        // We don't have STAB info, so can't track down the object file; all we can do is the symbol name.
-        return sym_only_result;
-    }
-
-    const o_file: *OFile = of: {
-        const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0);
-        const gop = try si.ofiles.getOrPut(gpa, path);
-        if (!gop.found_existing) {
-            gop.value_ptr.* = loadOFile(gpa, path) catch null;
-        }
-        if (gop.value_ptr.*) |*o_file| {
-            break :of o_file;
-        } else {
-            return sym_only_result;
-        }
+    const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch {
+        // Return at least the symbol name if available.
+        return .{
+            .name = try file.lookupSymbolName(vaddr),
+            .compile_unit_name = null,
+            .source_location = null,
+        };
     };
 
-    const symbol_index = o_file.symbols_by_name.getKeyAdapted(
-        @as([]const u8, stab_symbol),
-        @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }),
-    ) orelse return sym_only_result;
-    const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value;
-
-    const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result;
-
     return .{
-        .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol,
+        .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse
+            try file.lookupSymbolName(vaddr),
         .compile_unit_name = compile_unit.die.getAttrString(
-            &o_file.dwarf,
+            ofile_dwarf,
             native_endian,
             std.dwarf.AT.name,
-            o_file.dwarf.section(.debug_str),
+            ofile_dwarf.section(.debug_str),
             compile_unit,
         ) catch |err| switch (err) {
             error.MissingDebugInfo, error.InvalidDebugInfo => null,
         },
-        .source_location = o_file.dwarf.getLineNumberInfo(
+        .source_location = ofile_dwarf.getLineNumberInfo(
             gpa,
             native_endian,
             compile_unit,
-            symbol_ofile_vaddr + address_symbol_offset,
+            ofile_vaddr,
         ) catch null,
     };
 }
@@ -447,7 +425,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module {
             .text_base = @intFromPtr(info.fbase),
             .name = std.mem.span(info.fname),
             .unwind = null,
-            .loaded_macho = null,
+            .file = null,
         };
     }
     return gop.key_ptr;
@@ -457,7 +435,7 @@ const Module = struct {
     text_base: usize,
     name: []const u8,
     unwind: ?(Error!Unwind),
-    loaded_macho: ?(Error!LoadedMachO),
+    file: ?(Error!MachOFile),
 
     const Adapter = struct {
         pub fn hash(_: Adapter, text_base: usize) u32 {
@@ -488,34 +466,17 @@ const Module = struct {
         dwarf: ?Dwarf.Unwind,
     };
 
-    const LoadedMachO = struct {
-        mapped_memory: []align(std.heap.page_size_min) const u8,
-        symbols: []const MachoSymbol,
-        strings: []const u8,
-        /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is
-        /// because the segments in the file on disk might differ from the ones in memory. Normally
-        /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
-        /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
-        /// the dyld cache (dyld actually restart itself from cache after loading it), and the two
-        /// versions have (very) different segment base addresses. It's sort of like a large slide
-        /// has been applied to all addresses in memory. For an optimal experience, we consider the
-        /// on-disk vmaddr instead of the in-memory one.
-        vaddr_offset: usize,
-    };
-
     fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind {
         if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa);
         return if (module.unwind.?) |*unwind| unwind else |err| err;
     }
     fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind {
-        const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
+        const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
 
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = header.ncmds,
-            .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
-        };
-        const sections, const text_vmaddr = while (it.next()) |load_cmd| {
-            if (load_cmd.cmd() != .SEGMENT_64) continue;
+        const raw_macho: [*]u8 = @ptrCast(header);
+        var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
+        const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| {
+            if (load_cmd.hdr.cmd != .SEGMENT_64) continue;
             const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
             if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
             break .{ load_cmd.getSections(), segment_cmd.vmaddr };
@@ -568,237 +529,15 @@ const Module = struct {
         };
     }
 
-    fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO {
-        if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) {
-            error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e,
-            else => error.ReadFailed,
-        };
-        return if (module.loaded_macho.?) |*lm| lm else |err| err;
-    }
-    fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO {
-        const all_mapped_memory = try mapDebugInfoFile(module.name);
-        errdefer posix.munmap(all_mapped_memory);
-
-        // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
-        // binary": a simple file format which contains Mach-O binaries for multiple targets. For
-        // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
-        // for both ARM64 macOS and x86_64 macOS.
-        if (all_mapped_memory.len < 4) return error.InvalidDebugInfo;
-        const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*;
-        // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
-        const mapped_macho = switch (magic) {
-            macho.MH_MAGIC_64 => all_mapped_memory,
-
-            macho.FAT_CIGAM => mapped_macho: {
-                // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing
-                // is big-endian, so we'll be swapping some bytes.
-                if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo;
-                const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr);
-                const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header));
-                const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)];
-                const native_cpu_type = switch (builtin.cpu.arch) {
-                    .x86_64 => macho.CPU_TYPE_X86_64,
-                    .aarch64 => macho.CPU_TYPE_ARM64,
-                    else => comptime unreachable,
-                };
-                for (archs) |*arch| {
-                    if (@byteSwap(arch.cputype) != native_cpu_type) continue;
-                    const offset = @byteSwap(arch.offset);
-                    const size = @byteSwap(arch.size);
-                    break :mapped_macho all_mapped_memory[offset..][0..size];
-                }
-                // Our native architecture was not present in the fat binary.
-                return error.MissingDebugInfo;
-            },
-
-            // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
-            // will be fairly easy to add support here if necessary; it's very similar to above.
-            macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
-
-            else => return error.InvalidDebugInfo,
-        };
-
-        const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr));
-        if (hdr.magic != macho.MH_MAGIC_64)
-            return error.InvalidDebugInfo;
-
-        const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: {
-            var it: macho.LoadCommandIterator = .{
-                .ncmds = hdr.ncmds,
-                .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-            };
-            var symtab: ?macho.symtab_command = null;
-            var text_vmaddr: ?u64 = null;
-            while (it.next()) |cmd| switch (cmd.cmd()) {
-                .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-                .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
-                    if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
-                    text_vmaddr = seg_cmd.vmaddr;
-                },
-                else => {},
-            };
-            break :lc_iter .{
-                symtab orelse return error.MissingDebugInfo,
-                text_vmaddr orelse return error.MissingDebugInfo,
-            };
-        };
-
-        const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]);
-        const syms = syms_ptr[0..symtab.nsyms];
-        const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
-
-        var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len);
-        defer symbols.deinit(gpa);
-
-        // This map is temporary; it is used only to detect duplicates here. This is
-        // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
-        // but they might not be present, so we track normal symbols too.
-        // Indices match 1-1 with those of `symbols`.
-        var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
-        defer symbol_names.deinit(gpa);
-        try symbol_names.ensureUnusedCapacity(gpa, syms.len);
-
-        var ofile: u32 = undefined;
-        var last_sym: MachoSymbol = undefined;
-        var state: enum {
-            init,
-            oso_open,
-            oso_close,
-            bnsym,
-            fun_strx,
-            fun_size,
-            ensym,
-        } = .init;
-
-        for (syms) |*sym| {
-            if (sym.n_type.bits.is_stab == 0) {
-                if (sym.n_strx == 0) continue;
-                switch (sym.n_type.bits.type) {
-                    .undf, .pbud, .indr, .abs, _ => continue,
-                    .sect => {
-                        const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
-                        const gop = symbol_names.getOrPutAssumeCapacity(name);
-                        if (!gop.found_existing) {
-                            assert(gop.index == symbols.items.len);
-                            symbols.appendAssumeCapacity(.{
-                                .strx = sym.n_strx,
-                                .addr = sym.n_value,
-                                .ofile = MachoSymbol.unknown_ofile,
-                            });
-                        }
-                    },
-                }
-                continue;
-            }
-
-            // TODO handle globals N_GSYM, and statics N_STSYM
-            switch (sym.n_type.stab) {
-                .oso => switch (state) {
-                    .init, .oso_close => {
-                        state = .oso_open;
-                        ofile = sym.n_strx;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .bnsym => switch (state) {
-                    .oso_open, .ensym => {
-                        state = .bnsym;
-                        last_sym = .{
-                            .strx = 0,
-                            .addr = sym.n_value,
-                            .ofile = ofile,
-                        };
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .fun => switch (state) {
-                    .bnsym => {
-                        state = .fun_strx;
-                        last_sym.strx = sym.n_strx;
-                    },
-                    .fun_strx => {
-                        state = .fun_size;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .ensym => switch (state) {
-                    .fun_size => {
-                        state = .ensym;
-                        if (last_sym.strx != 0) {
-                            const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
-                            const gop = symbol_names.getOrPutAssumeCapacity(name);
-                            if (!gop.found_existing) {
-                                assert(gop.index == symbols.items.len);
-                                symbols.appendAssumeCapacity(last_sym);
-                            } else {
-                                symbols.items[gop.index] = last_sym;
-                            }
-                        }
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .so => switch (state) {
-                    .init, .oso_close => {},
-                    .oso_open, .ensym => {
-                        state = .oso_close;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                else => {},
-            }
-        }
-
-        switch (state) {
-            .init => {
-                // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
-                if (symbols.items.len == 0) return error.MissingDebugInfo;
-            },
-            .oso_close => {},
-            else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab
-        }
-
-        const symbols_slice = try symbols.toOwnedSlice(gpa);
-        errdefer gpa.free(symbols_slice);
-
-        // Even though lld emits symbols in ascending order, this debug code
-        // should work for programs linked in any valid way.
-        // This sort is so that we can binary search later.
-        mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
-
-        return .{
-            .mapped_memory = all_mapped_memory,
-            .symbols = symbols_slice,
-            .strings = strings,
-            .vaddr_offset = module.text_base - text_vmaddr,
+    fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile {
+        if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) {
+            error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo,
+            error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e,
         };
+        return if (module.file.?) |*f| f else |err| err;
     }
 };
 
-const OFile = struct {
-    mapped_memory: []align(std.heap.page_size_min) const u8,
-    dwarf: Dwarf,
-    strtab: []const u8,
-    symtab: []align(1) const macho.nlist_64,
-    /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
-    /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
-    symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
-
-    const SymbolAdapter = struct {
-        strtab: []const u8,
-        symtab: []align(1) const macho.nlist_64,
-        pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
-            _ = ctx;
-            return @truncate(std.hash.Wyhash.hash(0, sym_name));
-        }
-        pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
-            _ = b_index;
-            const b_sym = ctx.symtab[b_sym_index];
-            const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
-            return mem.eql(u8, a_sym_name, b_sym_name);
-        }
-    };
-};
-
 const MachoSymbol = struct {
     strx: u32,
     addr: u64,
@@ -880,101 +619,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8
     };
 }
 
-fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
-    const mapped_mem = try mapDebugInfoFile(o_file_path);
-    errdefer posix.munmap(mapped_mem);
-
-    if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo;
-    const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
-    if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo;
-
-    const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
-        var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
-        var symtab_cmd: ?macho.symtab_command = null;
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = hdr.ncmds,
-            .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-        };
-        while (it.next()) |cmd| switch (cmd.cmd()) {
-            .SEGMENT_64 => seg_cmd = cmd,
-            .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-            else => {},
-        };
-        break :cmds .{
-            seg_cmd orelse return error.MissingDebugInfo,
-            symtab_cmd orelse return error.MissingDebugInfo,
-        };
-    };
-
-    if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
-    if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
-    const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
-
-    const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
-    if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
-    const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
-
-    // TODO handle tentative (common) symbols
-    var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
-    defer symbols_by_name.deinit(gpa);
-    try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len));
-    for (symtab, 0..) |sym, sym_index| {
-        if (sym.n_strx == 0) continue;
-        switch (sym.n_type.bits.type) {
-            .undf => continue, // includes tentative symbols
-            .abs => continue,
-            else => {},
-        }
-        const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
-        const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
-            @as([]const u8, sym_name),
-            @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }),
-        );
-        if (gop.found_existing) return error.InvalidDebugInfo;
-        gop.key_ptr.* = @intCast(sym_index);
-    }
-
-    var sections: Dwarf.SectionArray = @splat(null);
-    for (seg_cmd.getSections()) |sect| {
-        if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
-
-        const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
-            if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
-        } else continue;
-
-        if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo;
-        const section_bytes = mapped_mem[sect.offset..][0..sect.size];
-        sections[section_index] = .{
-            .data = section_bytes,
-            .owned = false,
-        };
-    }
-
-    const missing_debug_info =
-        sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
-    if (missing_debug_info) return error.MissingDebugInfo;
-
-    var dwarf: Dwarf = .{ .sections = sections };
-    errdefer dwarf.deinit(gpa);
-    try dwarf.open(gpa, native_endian);
-
-    return .{
-        .mapped_memory = mapped_mem,
-        .dwarf = dwarf,
-        .strtab = strtab,
-        .symtab = symtab,
-        .symbols_by_name = symbols_by_name.move(),
-    };
-}
-
 const std = @import("std");
 const Io = std.Io;
 const Allocator = std.mem.Allocator;
 const Dwarf = std.debug.Dwarf;
 const Error = std.debug.SelfInfoError;
+const MachOFile = std.debug.MachOFile;
 const assert = std.debug.assert;
 const posix = std.posix;
 const macho = std.macho;
lib/std/debug/MachOFile.zig
@@ -0,0 +1,501 @@
+mapped_memory: []align(std.heap.page_size_min) const u8,
+symbols: []const Symbol,
+strings: []const u8,
+text_vmaddr: u64,
+
+/// Key is index into `strings` of the file path.
+ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile),
+
+pub const Error = error{
+    InvalidMachO,
+    InvalidDwarf,
+    MissingDebugInfo,
+    UnsupportedDebugInfo,
+    ReadFailed,
+    OutOfMemory,
+};
+
+pub fn deinit(mf: *MachOFile, gpa: Allocator) void {
+    for (mf.ofiles.values()) |*maybe_of| {
+        const of = &(maybe_of.* catch continue);
+        posix.munmap(of.mapped_memory);
+        of.dwarf.deinit(gpa);
+        of.symbols_by_name.deinit(gpa);
+    }
+    mf.ofiles.deinit(gpa);
+    gpa.free(mf.symbols);
+    posix.munmap(mf.mapped_memory);
+}
+
+pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile {
+    switch (arch) {
+        .x86_64, .aarch64 => {},
+        else => unreachable,
+    }
+
+    const all_mapped_memory = try mapDebugInfoFile(path);
+    errdefer posix.munmap(all_mapped_memory);
+
+    // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
+    // binary": a simple file format which contains Mach-O binaries for multiple targets. For
+    // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
+    // for both ARM64 macOS and x86_64 macOS.
+    if (all_mapped_memory.len < 4) return error.InvalidMachO;
+    const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little);
+
+    // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
+    const mapped_macho = switch (magic) {
+        macho.MH_MAGIC_64 => all_mapped_memory,
+
+        macho.FAT_CIGAM => mapped_macho: {
+            // This is the universal binary format (aka a "fat binary").
+            var fat_r: Io.Reader = .fixed(all_mapped_memory);
+            const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) {
+                error.ReadFailed => unreachable,
+                error.EndOfStream => return error.InvalidMachO,
+            };
+            const want_cpu_type = switch (arch) {
+                .x86_64 => macho.CPU_TYPE_X86_64,
+                .aarch64 => macho.CPU_TYPE_ARM64,
+                else => unreachable,
+            };
+            for (0..hdr.nfat_arch) |_| {
+                const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) {
+                    error.ReadFailed => unreachable,
+                    error.EndOfStream => return error.InvalidMachO,
+                };
+                if (fat_arch.cputype != want_cpu_type) continue;
+                if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO;
+                break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size];
+            }
+            // `arch` was not present in the fat binary.
+            return error.MissingDebugInfo;
+        },
+
+        // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
+        // will be fairly easy to add support here if necessary; it's very similar to above.
+        macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
+
+        else => return error.InvalidMachO,
+    };
+
+    var r: Io.Reader = .fixed(mapped_macho);
+    const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
+        error.ReadFailed => unreachable,
+        error.EndOfStream => return error.InvalidMachO,
+    };
+
+    if (hdr.magic != macho.MH_MAGIC_64)
+        return error.InvalidMachO;
+
+    const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: {
+        var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]);
+        var symtab: ?macho.symtab_command = null;
+        var text_vmaddr: ?u64 = null;
+        while (try it.next()) |cmd| switch (cmd.hdr.cmd) {
+            .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO,
+            .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
+                if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
+                text_vmaddr = seg_cmd.vmaddr;
+            },
+            else => {},
+        };
+        break :lcs .{
+            symtab orelse return error.MissingDebugInfo,
+            text_vmaddr orelse return error.MissingDebugInfo,
+        };
+    };
+
+    const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
+
+    var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms);
+    defer symbols.deinit(gpa);
+
+    // This map is temporary; it is used only to detect duplicates here. This is
+    // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
+    // but they might not be present, so we track normal symbols too.
+    // Indices match 1-1 with those of `symbols`.
+    var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
+    defer symbol_names.deinit(gpa);
+    try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms);
+
+    var ofile: u32 = undefined;
+    var last_sym: Symbol = undefined;
+    var state: enum {
+        init,
+        oso_open,
+        oso_close,
+        bnsym,
+        fun_strx,
+        fun_size,
+        ensym,
+    } = .init;
+
+    var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]);
+    for (0..symtab.nsyms) |_| {
+        const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) {
+            error.ReadFailed => unreachable,
+            error.EndOfStream => return error.InvalidMachO,
+        };
+        if (sym.n_type.bits.is_stab == 0) {
+            if (sym.n_strx == 0) continue;
+            switch (sym.n_type.bits.type) {
+                .undf, .pbud, .indr, .abs, _ => continue,
+                .sect => {
+                    const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
+                    const gop = symbol_names.getOrPutAssumeCapacity(name);
+                    if (!gop.found_existing) {
+                        assert(gop.index == symbols.items.len);
+                        symbols.appendAssumeCapacity(.{
+                            .strx = sym.n_strx,
+                            .addr = sym.n_value,
+                            .ofile = Symbol.unknown_ofile,
+                        });
+                    }
+                },
+            }
+            continue;
+        }
+
+        // TODO handle globals N_GSYM, and statics N_STSYM
+        switch (sym.n_type.stab) {
+            .oso => switch (state) {
+                .init, .oso_close => {
+                    state = .oso_open;
+                    ofile = sym.n_strx;
+                },
+                else => return error.InvalidMachO,
+            },
+            .bnsym => switch (state) {
+                .oso_open, .ensym => {
+                    state = .bnsym;
+                    last_sym = .{
+                        .strx = 0,
+                        .addr = sym.n_value,
+                        .ofile = ofile,
+                    };
+                },
+                else => return error.InvalidMachO,
+            },
+            .fun => switch (state) {
+                .bnsym => {
+                    state = .fun_strx;
+                    last_sym.strx = sym.n_strx;
+                },
+                .fun_strx => {
+                    state = .fun_size;
+                },
+                else => return error.InvalidMachO,
+            },
+            .ensym => switch (state) {
+                .fun_size => {
+                    state = .ensym;
+                    if (last_sym.strx != 0) {
+                        const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
+                        const gop = symbol_names.getOrPutAssumeCapacity(name);
+                        if (!gop.found_existing) {
+                            assert(gop.index == symbols.items.len);
+                            symbols.appendAssumeCapacity(last_sym);
+                        } else {
+                            symbols.items[gop.index] = last_sym;
+                        }
+                    }
+                },
+                else => return error.InvalidMachO,
+            },
+            .so => switch (state) {
+                .init, .oso_close => {},
+                .oso_open, .ensym => {
+                    state = .oso_close;
+                },
+                else => return error.InvalidMachO,
+            },
+            else => {},
+        }
+    }
+
+    switch (state) {
+        .init => {
+            // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
+            if (symbols.items.len == 0) return error.MissingDebugInfo;
+        },
+        .oso_close => {},
+        else => return error.InvalidMachO, // corrupted STAB entries in symtab
+    }
+
+    const symbols_slice = try symbols.toOwnedSlice(gpa);
+    errdefer gpa.free(symbols_slice);
+
+    // Even though lld emits symbols in ascending order, this debug code
+    // should work for programs linked in any valid way.
+    // This sort is so that we can binary search later.
+    mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan);
+
+    return .{
+        .mapped_memory = all_mapped_memory,
+        .symbols = symbols_slice,
+        .strings = strings,
+        .ofiles = .empty,
+        .text_vmaddr = text_vmaddr,
+    };
+}
+pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } {
+    const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
+
+    if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo;
+
+    // offset of `address` from start of `symbol`
+    const address_symbol_offset = vaddr - symbol.addr;
+
+    // Take the symbol name from the N_FUN STAB entry, we're going to
+    // use it if we fail to find the DWARF infos
+    const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0);
+
+    const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile);
+    if (!gop.found_existing) {
+        const name = mem.sliceTo(mf.strings[symbol.ofile..], 0);
+        gop.value_ptr.* = loadOFile(gpa, name);
+    }
+    const of = &(gop.value_ptr.* catch |err| return err);
+
+    const symbol_index = of.symbols_by_name.getKeyAdapted(
+        @as([]const u8, stab_symbol),
+        @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }),
+    ) orelse return error.MissingDebugInfo;
+
+    const symbol_ofile_vaddr = vaddr: {
+        var sym = of.symtab_raw[symbol_index];
+        if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
+        break :vaddr sym.n_value;
+    };
+
+    return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset };
+}
+pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 {
+    const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo;
+    return mem.sliceTo(mf.strings[symbol.strx..], 0);
+}
+
+const OFile = struct {
+    mapped_memory: []align(std.heap.page_size_min) const u8,
+    dwarf: Dwarf,
+    strtab: []const u8,
+    symtab_raw: []align(1) const macho.nlist_64,
+    /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed
+    /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
+    symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
+
+    const SymbolAdapter = struct {
+        strtab: []const u8,
+        symtab_raw: []align(1) const macho.nlist_64,
+        pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
+            _ = ctx;
+            return @truncate(std.hash.Wyhash.hash(0, sym_name));
+        }
+        pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
+            _ = b_index;
+            var b_sym = ctx.symtab_raw[b_sym_index];
+            if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym);
+            const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
+            return mem.eql(u8, a_sym_name, b_sym_name);
+        }
+    };
+};
+
+const Symbol = struct {
+    strx: u32,
+    addr: u64,
+    /// Value may be `unknown_ofile`.
+    ofile: u32,
+    const unknown_ofile = std.math.maxInt(u32);
+    fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool {
+        _ = context;
+        return lhs.addr < rhs.addr;
+    }
+    /// Assumes that `symbols` is sorted in order of ascending `addr`.
+    fn find(symbols: []const Symbol, address: usize) ?*const Symbol {
+        if (symbols.len == 0) return null; // no potential match
+        if (address < symbols[0].addr) return null; // address is before the lowest-address symbol
+        var left: usize = 0;
+        var len: usize = symbols.len;
+        while (len > 1) {
+            const mid = left + len / 2;
+            if (address < symbols[mid].addr) {
+                len /= 2;
+            } else {
+                left = mid;
+                len -= len / 2;
+            }
+        }
+        return &symbols[left];
+    }
+
+    test find {
+        const symbols: []const Symbol = &.{
+            .{ .addr = 100, .strx = undefined, .ofile = undefined },
+            .{ .addr = 200, .strx = undefined, .ofile = undefined },
+            .{ .addr = 300, .strx = undefined, .ofile = undefined },
+        };
+
+        try testing.expectEqual(null, find(symbols, 0));
+        try testing.expectEqual(null, find(symbols, 99));
+        try testing.expectEqual(&symbols[0], find(symbols, 100).?);
+        try testing.expectEqual(&symbols[0], find(symbols, 150).?);
+        try testing.expectEqual(&symbols[0], find(symbols, 199).?);
+
+        try testing.expectEqual(&symbols[1], find(symbols, 200).?);
+        try testing.expectEqual(&symbols[1], find(symbols, 250).?);
+        try testing.expectEqual(&symbols[1], find(symbols, 299).?);
+
+        try testing.expectEqual(&symbols[2], find(symbols, 300).?);
+        try testing.expectEqual(&symbols[2], find(symbols, 301).?);
+        try testing.expectEqual(&symbols[2], find(symbols, 5000).?);
+    }
+};
+test {
+    _ = Symbol;
+}
+
+fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
+    const mapped_mem = try mapDebugInfoFile(o_file_path);
+    errdefer posix.munmap(mapped_mem);
+
+    var r: Io.Reader = .fixed(mapped_mem);
+    const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) {
+        error.ReadFailed => unreachable,
+        error.EndOfStream => return error.InvalidMachO,
+    };
+    if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO;
+
+    const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
+        var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
+        var symtab_cmd: ?macho.symtab_command = null;
+        var it: macho.LoadCommandIterator = try .init(&hdr, mapped_mem[@sizeOf(macho.mach_header_64)..]);
+        while (try it.next()) |lc| switch (lc.hdr.cmd) {
+            .SEGMENT_64 => seg_cmd = lc,
+            .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO,
+            else => {},
+        };
+        break :cmds .{
+            seg_cmd orelse return error.MissingDebugInfo,
+            symtab_cmd orelse return error.MissingDebugInfo,
+        };
+    };
+
+    if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO;
+    if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO;
+    const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
+
+    const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
+    if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO;
+    const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
+
+    // TODO handle tentative (common) symbols
+    var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
+    defer symbols_by_name.deinit(gpa);
+    try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len));
+    for (symtab_raw, 0..) |sym_raw, sym_index| {
+        var sym = sym_raw;
+        if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym);
+        if (sym.n_strx == 0) continue;
+        switch (sym.n_type.bits.type) {
+            .undf => continue, // includes tentative symbols
+            .abs => continue,
+            else => {},
+        }
+        const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
+        const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
+            @as([]const u8, sym_name),
+            @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }),
+        );
+        if (gop.found_existing) return error.InvalidMachO;
+        gop.key_ptr.* = @intCast(sym_index);
+    }
+
+    var sections: Dwarf.SectionArray = @splat(null);
+    for (seg_cmd.getSections()) |sect_raw| {
+        var sect = sect_raw;
+        if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, &sect);
+
+        if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
+
+        const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
+            if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
+        } else continue;
+
+        if (mapped_mem.len < sect.offset + sect.size) return error.InvalidMachO;
+        const section_bytes = mapped_mem[sect.offset..][0..sect.size];
+        sections[section_index] = .{
+            .data = section_bytes,
+            .owned = false,
+        };
+    }
+
+    if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
+        sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
+        sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
+        sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null)
+    {
+        return error.MissingDebugInfo;
+    }
+
+    var dwarf: Dwarf = .{ .sections = sections };
+    errdefer dwarf.deinit(gpa);
+    dwarf.open(gpa, .little) catch |err| switch (err) {
+        error.InvalidDebugInfo,
+        error.EndOfStream,
+        error.Overflow,
+        error.StreamTooLong,
+        => return error.InvalidDwarf,
+
+        error.MissingDebugInfo,
+        error.ReadFailed,
+        error.OutOfMemory,
+        => |e| return e,
+    };
+
+    return .{
+        .mapped_memory = mapped_mem,
+        .dwarf = dwarf,
+        .strtab = strtab,
+        .symtab_raw = symtab_raw,
+        .symbols_by_name = symbols_by_name.move(),
+    };
+}
+
+/// Uses `mmap` to map the file at `path` into memory.
+fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 {
+    const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) {
+        error.FileNotFound => return error.MissingDebugInfo,
+        else => return error.ReadFailed,
+    };
+    defer file.close();
+
+    const file_len = std.math.cast(
+        usize,
+        file.getEndPos() catch return error.ReadFailed,
+    ) orelse return error.ReadFailed;
+
+    return posix.mmap(
+        null,
+        file_len,
+        posix.PROT.READ,
+        .{ .TYPE = .SHARED },
+        file.handle,
+        0,
+    ) catch return error.ReadFailed;
+}
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const Dwarf = std.debug.Dwarf;
+const Io = std.Io;
+const assert = std.debug.assert;
+const posix = std.posix;
+const macho = std.macho;
+const mem = std.mem;
+const testing = std.testing;
+
+const builtin = @import("builtin");
+
+const MachOFile = @This();
lib/std/debug.zig
@@ -21,6 +21,7 @@ const root = @import("root");
 pub const Dwarf = @import("debug/Dwarf.zig");
 pub const Pdb = @import("debug/Pdb.zig");
 pub const ElfFile = @import("debug/ElfFile.zig");
+pub const MachOFile = @import("debug/MachOFile.zig");
 pub const Info = @import("debug/Info.zig");
 pub const Coverage = @import("debug/Coverage.zig");
 pub const cpu_context = @import("debug/cpu_context.zig");
lib/std/macho.zig
@@ -1902,74 +1902,76 @@ pub const data_in_code_entry = extern struct {
 };
 
 pub const LoadCommandIterator = struct {
+    next_index: usize,
     ncmds: usize,
-    buffer: []const u8,
-    index: usize = 0,
+    r: std.Io.Reader,
 
     pub const LoadCommand = struct {
         hdr: load_command,
         data: []const u8,
 
-        pub fn cmd(lc: LoadCommand) LC {
-            return lc.hdr.cmd;
-        }
-
-        pub fn cmdsize(lc: LoadCommand) u32 {
-            return lc.hdr.cmdsize;
-        }
-
         pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd {
             if (lc.data.len < @sizeOf(Cmd)) return null;
-            return @as(*align(1) const Cmd, @ptrCast(lc.data.ptr)).*;
+            const ptr: *align(1) const Cmd = @ptrCast(lc.data.ptr);
+            var cmd = ptr.*;
+            if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(Cmd, &cmd);
+            return cmd;
         }
 
         /// Asserts LoadCommand is of type segment_command_64.
+        /// If the native endian is not `.little`, the `section_64` values must be byte-swapped by the caller.
         pub fn getSections(lc: LoadCommand) []align(1) const section_64 {
             const segment_lc = lc.cast(segment_command_64).?;
-            if (segment_lc.nsects == 0) return &[0]section_64{};
-            const data = lc.data[@sizeOf(segment_command_64)..];
-            const sections = @as([*]align(1) const section_64, @ptrCast(data.ptr))[0..segment_lc.nsects];
-            return sections;
+            const sects_ptr: [*]align(1) const section_64 = @ptrCast(lc.data[@sizeOf(segment_command_64)..]);
+            return sects_ptr[0..segment_lc.nsects];
         }
 
         /// Asserts LoadCommand is of type dylib_command.
         pub fn getDylibPathName(lc: LoadCommand) []const u8 {
             const dylib_lc = lc.cast(dylib_command).?;
-            const data = lc.data[dylib_lc.dylib.name..];
-            return mem.sliceTo(data, 0);
+            return mem.sliceTo(lc.data[dylib_lc.dylib.name..], 0);
         }
 
         /// Asserts LoadCommand is of type rpath_command.
         pub fn getRpathPathName(lc: LoadCommand) []const u8 {
             const rpath_lc = lc.cast(rpath_command).?;
-            const data = lc.data[rpath_lc.path..];
-            return mem.sliceTo(data, 0);
+            return mem.sliceTo(lc.data[rpath_lc.path..], 0);
         }
 
         /// Asserts LoadCommand is of type build_version_command.
+        /// If the native endian is not `.little`, the `build_tool_version` values must be byte-swapped by the caller.
         pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version {
             const build_lc = lc.cast(build_version_command).?;
-            const ntools = build_lc.ntools;
-            if (ntools == 0) return &[0]build_tool_version{};
-            const data = lc.data[@sizeOf(build_version_command)..];
-            const tools = @as([*]align(1) const build_tool_version, @ptrCast(data.ptr))[0..ntools];
-            return tools;
+            const tools_ptr: [*]align(1) const build_tool_version = @ptrCast(lc.data[@sizeOf(build_version_command)..]);
+            return tools_ptr[0..build_lc.ntools];
         }
     };
 
-    pub fn next(it: *LoadCommandIterator) ?LoadCommand {
-        if (it.index >= it.ncmds) return null;
+    pub fn next(it: *LoadCommandIterator) error{InvalidMachO}!?LoadCommand {
+        if (it.next_index >= it.ncmds) return null;
 
-        const hdr = @as(*align(1) const load_command, @ptrCast(it.buffer.ptr)).*;
-        const cmd = LoadCommand{
-            .hdr = hdr,
-            .data = it.buffer[0..hdr.cmdsize],
+        const hdr = it.r.peekStruct(load_command, .little) catch |err| switch (err) {
+            error.ReadFailed => unreachable,
+            error.EndOfStream => return error.InvalidMachO,
+        };
+        const data = it.r.take(hdr.cmdsize) catch |err| switch (err) {
+            error.ReadFailed => unreachable,
+            error.EndOfStream => return error.InvalidMachO,
         };
 
-        it.buffer = it.buffer[hdr.cmdsize..];
-        it.index += 1;
+        it.next_index += 1;
+        return .{ .hdr = hdr, .data = data };
+    }
 
-        return cmd;
+    pub fn init(hdr: *const mach_header_64, cmds_buf_overlong: []const u8) error{InvalidMachO}!LoadCommandIterator {
+        if (cmds_buf_overlong.len < hdr.sizeofcmds) return error.InvalidMachO;
+        if (hdr.ncmds > 0 and hdr.sizeofcmds < @sizeOf(load_command)) return error.InvalidMachO;
+        const cmds_buf = cmds_buf_overlong[0..hdr.sizeofcmds];
+        return .{
+            .next_index = 0,
+            .ncmds = hdr.ncmds,
+            .r = .fixed(cmds_buf),
+        };
     }
 };
 
src/link/MachO/Dylib.zig
@@ -90,11 +90,8 @@ fn parseBinary(self: *Dylib, macho_file: *MachO) !void {
         if (amt != lc_buffer.len) return error.InputOutput;
     }
 
-    var it = LoadCommandIterator{
-        .ncmds = header.ncmds,
-        .buffer = lc_buffer,
-    };
-    while (it.next()) |cmd| switch (cmd.cmd()) {
+    var it = LoadCommandIterator.init(&header, lc_buffer) catch |err| std.debug.panic("bad dylib: {t}", .{err});
+    while (it.next() catch |err| std.debug.panic("bad dylib: {t}", .{err})) |cmd| switch (cmd.hdr.cmd) {
         .ID_DYLIB => {
             self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName());
         },
src/link/MachO/Object.zig
@@ -109,11 +109,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
         if (amt != self.header.?.sizeofcmds) return error.InputOutput;
     }
 
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = lc_buffer,
-    };
-    while (it.next()) |lc| switch (lc.cmd()) {
+    var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err});
+    while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) {
         .SEGMENT_64 => {
             const sections = lc.getSections();
             try self.sections.ensureUnusedCapacity(gpa, sections.len);
@@ -1644,11 +1641,8 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void {
         if (amt != self.header.?.sizeofcmds) return error.InputOutput;
     }
 
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = lc_buffer,
-    };
-    while (it.next()) |lc| switch (lc.cmd()) {
+    var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err});
+    while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) {
         .SYMTAB => {
             const cmd = lc.cast(macho.symtab_command).?;
             try self.strtab.resize(gpa, cmd.strsize);
src/link/MachO.zig
@@ -4167,7 +4167,7 @@ pub const Platform = struct {
     /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to
     /// the extracted minimum platform version.
     pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform {
-        switch (lc.cmd()) {
+        switch (lc.hdr.cmd) {
             .BUILD_VERSION => {
                 const cmd = lc.cast(macho.build_version_command).?;
                 return .{
@@ -4200,7 +4200,7 @@ pub const Platform = struct {
                 // We can't distinguish Mac Catalyst here, but this is legacy stuff anyway.
                 const cmd = lc.cast(macho.version_min_command).?;
                 return .{
-                    .os_tag = switch (lc.cmd()) {
+                    .os_tag = switch (lc.hdr.cmd) {
                         .VERSION_MIN_IPHONEOS => .ios,
                         .VERSION_MIN_MACOSX => .macos,
                         .VERSION_MIN_TVOS => .tvos,