Commit 8e81500051

Jakub Konka <kubkon@jakubkonka.com>
2024-10-16 12:28:13
macho: handle DWARFv5 when parsing debug info in objects
1 parent 808306f
Changed files (2)
src
src/link/MachO/Dwarf.zig
@@ -1,25 +1,73 @@
+debug_info: []u8 = &[0]u8{},
+debug_abbrev: []u8 = &[0]u8{},
+debug_str: []u8 = &[0]u8{},
+debug_str_offsets: []u8 = &[0]u8{},
+
+pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void {
+    allocator.free(dwarf.debug_info);
+    allocator.free(dwarf.debug_abbrev);
+    allocator.free(dwarf.debug_str);
+    allocator.free(dwarf.debug_str_offsets);
+}
+
+/// Pulls an offset into __debug_str section from a __debug_str_offs section.
+/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg)
+/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header
+/// of a "referencing entity" such as DW_TAG_compile_unit.
+fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) u64 {
+    return switch (dw_fmt) {
+        .dwarf32 => @as(*align(1) const u32, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u32))).*,
+        .dwarf64 => @as(*align(1) const u64, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u64))).*,
+    };
+}
+
 pub const InfoReader = struct {
-    bytes: []const u8,
-    strtab: []const u8,
+    ctx: Dwarf,
     pos: usize = 0,
 
-    pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader {
+    fn bytes(p: InfoReader) []const u8 {
+        return p.ctx.debug_info;
+    }
+
+    pub fn readCompileUnitHeader(p: *InfoReader, macho_file: *MachO) !CompileUnitHeader {
+        _ = macho_file;
         var length: u64 = try p.readInt(u32);
         const is_64bit = length == 0xffffffff;
         if (is_64bit) {
             length = try p.readInt(u64);
         }
         const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32;
+        const version = try p.readInt(Version);
+        const rest: struct {
+            debug_abbrev_offset: u64,
+            address_size: u8,
+            unit_type: u8,
+        } = switch (version) {
+            4 => .{
+                .debug_abbrev_offset = try p.readOffset(dw_fmt),
+                .address_size = try p.readByte(),
+                .unit_type = 0,
+            },
+            5 => .{
+                // According to the spec, version 5 introduced .unit_type field in the header, and
+                // it reordered .debug_abbrev_offset with .address_size fields.
+                .unit_type = try p.readByte(),
+                .address_size = try p.readByte(),
+                .debug_abbrev_offset = try p.readOffset(dw_fmt),
+            },
+            else => return error.InvalidVersion,
+        };
         return .{
             .format = dw_fmt,
             .length = length,
-            .version = try p.readInt(u16),
-            .debug_abbrev_offset = try p.readOffset(dw_fmt),
-            .address_size = try p.readByte(),
+            .version = version,
+            .debug_abbrev_offset = rest.debug_abbrev_offset,
+            .address_size = rest.address_size,
+            .unit_type = rest.unit_type,
         };
     }
 
-    pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void {
+    pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader, macho_file: *MachO) !void {
         const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow;
         const end_pos = p.pos + switch (cuh.format) {
             .dwarf32 => @as(usize, 4),
@@ -27,72 +75,100 @@ pub const InfoReader = struct {
         } + cuh_length;
         while (p.pos < end_pos) {
             const di_code = try p.readUleb128(u64);
-            if (di_code == 0) return error.Eof;
+            if (di_code == 0) return error.UnexpectedEndOfFile;
             if (di_code == code) return;
 
-            while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
-                dwarf.FORM.sec_offset,
-                dwarf.FORM.ref_addr,
-                => {
-                    _ = try p.readOffset(cuh.format);
-                },
+            while (try abbrev_reader.readAttr()) |attr| {
+                try p.skip(attr.form, cuh, macho_file);
+            }
+        }
+        return error.UnexpectedEndOfFile;
+    }
 
-                dwarf.FORM.addr => {
-                    _ = try p.readNBytes(cuh.address_size);
-                },
+    /// When skipping attributes, we don't really need to be able to handle them all
+    /// since we only ever care about the DW_TAG_compile_unit.
+    pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader, macho_file: *MachO) !void {
+        _ = macho_file;
+        switch (form) {
+            dw.FORM.sec_offset,
+            dw.FORM.ref_addr,
+            => {
+                _ = try p.readOffset(cuh.format);
+            },
 
-                dwarf.FORM.block1,
-                dwarf.FORM.block2,
-                dwarf.FORM.block4,
-                dwarf.FORM.block,
-                => {
-                    _ = try p.readBlock(attr.form);
-                },
+            dw.FORM.addr => {
+                _ = try p.readNBytes(cuh.address_size);
+            },
 
-                dwarf.FORM.exprloc => {
-                    _ = try p.readExprLoc();
-                },
+            dw.FORM.block1,
+            dw.FORM.block2,
+            dw.FORM.block4,
+            dw.FORM.block,
+            => {
+                _ = try p.readBlock(form);
+            },
+
+            dw.FORM.exprloc => {
+                _ = try p.readExprLoc();
+            },
 
-                dwarf.FORM.flag_present => {},
-
-                dwarf.FORM.data1,
-                dwarf.FORM.ref1,
-                dwarf.FORM.flag,
-                dwarf.FORM.data2,
-                dwarf.FORM.ref2,
-                dwarf.FORM.data4,
-                dwarf.FORM.ref4,
-                dwarf.FORM.data8,
-                dwarf.FORM.ref8,
-                dwarf.FORM.ref_sig8,
-                dwarf.FORM.udata,
-                dwarf.FORM.ref_udata,
-                dwarf.FORM.sdata,
+            dw.FORM.flag_present => {},
+
+            dw.FORM.data1,
+            dw.FORM.ref1,
+            dw.FORM.flag,
+            dw.FORM.data2,
+            dw.FORM.ref2,
+            dw.FORM.data4,
+            dw.FORM.ref4,
+            dw.FORM.data8,
+            dw.FORM.ref8,
+            dw.FORM.ref_sig8,
+            dw.FORM.udata,
+            dw.FORM.ref_udata,
+            dw.FORM.sdata,
+            => {
+                _ = try p.readConstant(form);
+            },
+
+            dw.FORM.strp,
+            dw.FORM.string,
+            => {
+                _ = try p.readString(form, cuh);
+            },
+
+            else => if (cuh.version >= 5) switch (form) {
+                dw.FORM.strx,
+                dw.FORM.strx1,
+                dw.FORM.strx2,
+                dw.FORM.strx3,
+                dw.FORM.strx4,
                 => {
-                    _ = try p.readConstant(attr.form);
+                    // We are just iterating over the __debug_info data, so we don't care about an actual
+                    // string, therefore we set the `base = 0`.
+                    _ = try p.readStringIndexed(form, cuh, 0);
                 },
 
-                dwarf.FORM.strp,
-                dwarf.FORM.string,
+                dw.FORM.addrx,
+                dw.FORM.addrx1,
+                dw.FORM.addrx2,
+                dw.FORM.addrx3,
+                dw.FORM.addrx4,
                 => {
-                    _ = try p.readString(attr.form, cuh);
+                    _ = try p.readIndex(form);
                 },
 
-                else => {
-                    // TODO better errors
-                    log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
-                    return error.UnhandledDwFormValue;
-                },
-            };
+                else => return error.UnknownForm,
+            } else return error.UnknownForm,
         }
     }
 
     pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 {
         const len: u64 = switch (form) {
-            dwarf.FORM.block1 => try p.readByte(),
-            dwarf.FORM.block2 => try p.readInt(u16),
-            dwarf.FORM.block4 => try p.readInt(u32),
-            dwarf.FORM.block => try p.readUleb128(u64),
+            dw.FORM.block1 => try p.readByte(),
+            dw.FORM.block2 => try p.readInt(u16),
+            dw.FORM.block4 => try p.readInt(u32),
+            dw.FORM.block => try p.readUleb128(u64),
             else => unreachable,
         };
         return p.readNBytes(len);
@@ -105,52 +181,79 @@ pub const InfoReader = struct {
 
     pub fn readConstant(p: *InfoReader, form: Form) !u64 {
         return switch (form) {
-            dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(),
-            dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16),
-            dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32),
-            dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64),
-            dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64),
-            dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)),
+            dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(),
+            dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16),
+            dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32),
+            dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64),
+            dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64),
+            dw.FORM.sdata => @bitCast(try p.readIleb128(i64)),
             else => return error.UnhandledConstantForm,
         };
     }
 
+    pub fn readIndex(p: *InfoReader, form: Form) !u64 {
+        return switch (form) {
+            dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(),
+            dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16),
+            dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledDwForm,
+            dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32),
+            dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64),
+            else => return error.UnhandledIndexForm,
+        };
+    }
+
     pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 {
         switch (form) {
-            dwarf.FORM.strp => {
+            dw.FORM.strp => {
                 const off = try p.readOffset(cuh.format);
                 const off_u = math.cast(usize, off) orelse return error.Overflow;
-                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0);
+                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0);
             },
-            dwarf.FORM.string => {
+            dw.FORM.string => {
                 const start = p.pos;
-                while (p.pos < p.bytes.len) : (p.pos += 1) {
-                    if (p.bytes[p.pos] == 0) break;
+                while (p.pos < p.bytes().len) : (p.pos += 1) {
+                    if (p.bytes()[p.pos] == 0) break;
                 }
-                if (p.bytes[p.pos] != 0) return error.Eof;
-                return p.bytes[start..p.pos :0];
+                if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile;
+                return p.bytes()[start..p.pos :0];
+            },
+            else => unreachable,
+        }
+    }
+
+    pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 {
+        switch (form) {
+            dw.FORM.strx,
+            dw.FORM.strx1,
+            dw.FORM.strx2,
+            dw.FORM.strx3,
+            dw.FORM.strx4,
+            => {
+                const index = try p.readIndex(form);
+                const off = getOffset(p.ctx.debug_str_offsets, base, index, cuh.format);
+                return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0);
             },
             else => unreachable,
         }
     }
 
     pub fn readByte(p: *InfoReader) !u8 {
-        if (p.pos + 1 > p.bytes.len) return error.Eof;
+        if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile;
         defer p.pos += 1;
-        return p.bytes[p.pos];
+        return p.bytes()[p.pos];
     }
 
     pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 {
         const num_usize = math.cast(usize, num) orelse return error.Overflow;
-        if (p.pos + num_usize > p.bytes.len) return error.Eof;
+        if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile;
         defer p.pos += num_usize;
-        return p.bytes[p.pos..][0..num_usize];
+        return p.bytes()[p.pos..][0..num_usize];
     }
 
     pub fn readInt(p: *InfoReader, comptime Int: type) !Int {
-        if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof;
+        if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile;
         defer p.pos += @sizeOf(Int);
-        return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little);
+        return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little);
     }
 
     pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 {
@@ -161,7 +264,7 @@ pub const InfoReader = struct {
     }
 
     pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
         var creader = std.io.countingReader(stream.reader());
         const value: Type = try leb.readUleb128(Type, creader.reader());
         p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
@@ -169,7 +272,7 @@ pub const InfoReader = struct {
     }
 
     pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
         var creader = std.io.countingReader(stream.reader());
         const value: Type = try leb.readIleb128(Type, creader.reader());
         p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
@@ -182,11 +285,15 @@ pub const InfoReader = struct {
 };
 
 pub const AbbrevReader = struct {
-    bytes: []const u8,
+    ctx: Dwarf,
     pos: usize = 0,
 
+    fn bytes(p: AbbrevReader) []const u8 {
+        return p.ctx.debug_abbrev;
+    }
+
     pub fn hasMore(p: AbbrevReader) bool {
-        return p.pos < p.bytes.len;
+        return p.pos < p.bytes().len;
     }
 
     pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl {
@@ -218,13 +325,13 @@ pub const AbbrevReader = struct {
     }
 
     pub fn readByte(p: *AbbrevReader) !u8 {
-        if (p.pos + 1 > p.bytes.len) return error.Eof;
+        if (p.pos + 1 > p.bytes().len) return error.Eof;
         defer p.pos += 1;
-        return p.bytes[p.pos];
+        return p.bytes()[p.pos];
     }
 
     pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type {
-        var stream = std.io.fixedBufferStream(p.bytes[p.pos..]);
+        var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]);
         var creader = std.io.countingReader(stream.reader());
         const value: Type = try leb.readUleb128(Type, creader.reader());
         p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow;
@@ -254,9 +361,10 @@ const AbbrevAttr = struct {
 const CompileUnitHeader = struct {
     format: DwarfFormat,
     length: u64,
-    version: u16,
+    version: Version,
     debug_abbrev_offset: u64,
     address_size: u8,
+    unit_type: u8,
 };
 
 const Die = struct {
@@ -269,18 +377,24 @@ const DwarfFormat = enum {
     dwarf64,
 };
 
-const dwarf = std.dwarf;
+const dw = std.dwarf;
 const leb = std.leb;
 const log = std.log.scoped(.link);
 const math = std.math;
 const mem = std.mem;
 const std = @import("std");
-
-const At = u64;
-const Code = u64;
-const Form = u64;
-const Tag = u64;
-
-pub const AT = dwarf.AT;
-pub const FORM = dwarf.FORM;
-pub const TAG = dwarf.TAG;
+const Allocator = mem.Allocator;
+const Dwarf = @This();
+const File = @import("file.zig").File;
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+
+pub const At = u64;
+pub const Code = u64;
+pub const Form = u64;
+pub const Tag = u64;
+pub const Version = u16;
+
+pub const AT = dw.AT;
+pub const FORM = dw.FORM;
+pub const TAG = dw.TAG;
src/link/MachO/Object.zig
@@ -1359,151 +1359,102 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void {
     defer tracy.end();
 
     const gpa = macho_file.base.comp.gpa;
+    const file = macho_file.getFileHandle(self.file_handle);
 
-    var debug_info_index: ?usize = null;
-    var debug_abbrev_index: ?usize = null;
-    var debug_str_index: ?usize = null;
+    var dwarf: Dwarf = .{};
+    defer dwarf.deinit(gpa);
 
     for (self.sections.items(.header), 0..) |sect, index| {
+        const n_sect: u8 = @intCast(index);
         if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue;
-        if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index;
-        if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index;
-        if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index;
+        if (mem.eql(u8, sect.sectName(), "__debug_info")) {
+            dwarf.debug_info = try self.readSectionData(gpa, file, n_sect);
+        }
+        if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) {
+            dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect);
+        }
+        if (mem.eql(u8, sect.sectName(), "__debug_str")) {
+            dwarf.debug_str = try self.readSectionData(gpa, file, n_sect);
+        }
+        if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) {
+            dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect);
+        }
     }
 
-    if (debug_info_index == null or debug_abbrev_index == null) return;
-
-    const slice = self.sections.slice();
-    const file = macho_file.getFileHandle(self.file_handle);
-    const debug_info = blk: {
-        const sect = slice.items(.header)[debug_info_index.?];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    };
-    defer gpa.free(debug_info);
-    const debug_abbrev = blk: {
-        const sect = slice.items(.header)[debug_abbrev_index.?];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    };
-    defer gpa.free(debug_abbrev);
-    const debug_str = if (debug_str_index) |sid| blk: {
-        const sect = slice.items(.header)[sid];
-        const size = math.cast(usize, sect.size) orelse return error.Overflow;
-        const data = try gpa.alloc(u8, size);
-        const amt = try file.preadAll(data, sect.offset + self.offset);
-        if (amt != data.len) return error.InputOutput;
-        break :blk data;
-    } else &[0]u8{};
-    defer gpa.free(debug_str);
-
-    self.compile_unit = self.findCompileUnit(.{
-        .gpa = gpa,
-        .debug_info = debug_info,
-        .debug_abbrev = debug_abbrev,
-        .debug_str = debug_str,
-    }) catch null; // TODO figure out what errors are fatal, and when we silently fail
-}
-
-fn findCompileUnit(self: *Object, args: struct {
-    gpa: Allocator,
-    debug_info: []const u8,
-    debug_abbrev: []const u8,
-    debug_str: []const u8,
-}) !CompileUnit {
-    var cu_wip: struct {
-        comp_dir: ?[:0]const u8 = null,
-        tu_name: ?[:0]const u8 = null,
-    } = .{};
-
-    const gpa = args.gpa;
-    var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str };
-    var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev };
-
-    const cuh = try info_reader.readCompileUnitHeader();
-    try abbrev_reader.seekTo(cuh.debug_abbrev_offset);
-
-    const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof;
-    if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag;
-
-    try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader);
-
-    while (try abbrev_reader.readAttr()) |attr| switch (attr.at) {
-        dwarf.AT.name => {
-            cu_wip.tu_name = try info_reader.readString(attr.form, cuh);
-        },
-        dwarf.AT.comp_dir => {
-            cu_wip.comp_dir = try info_reader.readString(attr.form, cuh);
-        },
-        else => switch (attr.form) {
-            dwarf.FORM.sec_offset,
-            dwarf.FORM.ref_addr,
-            => {
-                _ = try info_reader.readOffset(cuh.format);
-            },
+    if (dwarf.debug_info.len == 0) return;
 
-            dwarf.FORM.addr => {
-                _ = try info_reader.readNBytes(cuh.address_size);
-            },
+    self.compile_unit = try self.findCompileUnit(gpa, dwarf, macho_file);
+}
 
-            dwarf.FORM.block1,
-            dwarf.FORM.block2,
-            dwarf.FORM.block4,
-            dwarf.FORM.block,
-            => {
-                _ = try info_reader.readBlock(attr.form);
-            },
+fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf, macho_file: *MachO) !CompileUnit {
+    var info_reader = Dwarf.InfoReader{ .ctx = ctx };
+    var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx };
 
-            dwarf.FORM.exprloc => {
-                _ = try info_reader.readExprLoc();
-            },
+    const cuh = try info_reader.readCompileUnitHeader(macho_file);
+    try abbrev_reader.seekTo(cuh.debug_abbrev_offset);
 
-            dwarf.FORM.flag_present => {},
-
-            dwarf.FORM.data1,
-            dwarf.FORM.ref1,
-            dwarf.FORM.flag,
-            dwarf.FORM.data2,
-            dwarf.FORM.ref2,
-            dwarf.FORM.data4,
-            dwarf.FORM.ref4,
-            dwarf.FORM.data8,
-            dwarf.FORM.ref8,
-            dwarf.FORM.ref_sig8,
-            dwarf.FORM.udata,
-            dwarf.FORM.ref_udata,
-            dwarf.FORM.sdata,
-            => {
-                _ = try info_reader.readConstant(attr.form);
-            },
+    const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile;
+    if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag;
 
-            dwarf.FORM.strp,
-            dwarf.FORM.string,
-            => {
-                _ = try info_reader.readString(attr.form, cuh);
-            },
+    try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader, macho_file);
 
-            else => {
-                // TODO actual errors?
-                log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form});
-                return error.UnhandledForm;
-            },
-        },
+    const Pos = struct {
+        pos: usize,
+        form: Dwarf.Form,
     };
-
-    if (cu_wip.comp_dir == null) return error.MissingCompDir;
-    if (cu_wip.tu_name == null) return error.MissingTuName;
-
-    return .{
-        .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?),
-        .tu_name = try self.addString(gpa, cu_wip.tu_name.?),
+    var saved: struct {
+        tu_name: ?Pos,
+        comp_dir: ?Pos,
+        str_offsets_base: ?Pos,
+    } = .{
+        .tu_name = null,
+        .comp_dir = null,
+        .str_offsets_base = null,
     };
+    while (try abbrev_reader.readAttr()) |attr| {
+        const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form };
+        switch (attr.at) {
+            Dwarf.AT.name => saved.tu_name = pos,
+            Dwarf.AT.comp_dir => saved.comp_dir = pos,
+            Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos,
+            else => {},
+        }
+        try info_reader.skip(attr.form, cuh, macho_file);
+    }
+
+    if (saved.comp_dir == null) return error.MissingCompDir;
+    if (saved.tu_name == null) return error.MissingTuName;
+
+    const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: {
+        try info_reader.seekTo(str_offsets_base.pos);
+        break :str_offsets_base try info_reader.readOffset(cuh.format);
+    } else null;
+
+    var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} };
+    for (&[_]struct { Pos, *MachO.String }{
+        .{ saved.comp_dir.?, &cu.comp_dir },
+        .{ saved.tu_name.?, &cu.tu_name },
+    }) |tuple| {
+        const pos, const str_offset_ptr = tuple;
+        try info_reader.seekTo(pos.pos);
+        str_offset_ptr.* = switch (pos.form) {
+            Dwarf.FORM.strp,
+            Dwarf.FORM.string,
+            => try self.addString(gpa, try info_reader.readString(pos.form, cuh)),
+            Dwarf.FORM.strx,
+            Dwarf.FORM.strx1,
+            Dwarf.FORM.strx2,
+            Dwarf.FORM.strx3,
+            Dwarf.FORM.strx4,
+            => blk: {
+                const base = str_offsets_base orelse return error.MalformedDwarf;
+                break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base));
+            },
+            else => return error.InvalidForm,
+        };
+    }
+
+    return cu;
 }
 
 pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void {
@@ -2561,6 +2512,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf
     return &self.unwind_records.items[index];
 }
 
+/// Caller owns the memory.
+pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 {
+    const header = self.sections.items(.header)[n_sect];
+    const size = math.cast(usize, header.size) orelse return error.Overflow;
+    const data = try allocator.alloc(u8, size);
+    const amt = try file.preadAll(data, header.offset + self.offset);
+    errdefer allocator.free(data);
+    if (amt != data.len) return error.InputOutput;
+    return data;
+}
+
 pub fn format(
     self: *Object,
     comptime unused_fmt_string: []const u8,
@@ -3219,7 +3181,6 @@ const aarch64 = struct {
 };
 
 const assert = std.debug.assert;
-const dwarf = @import("dwarf.zig");
 const eh_frame = @import("eh_frame.zig");
 const log = std.log.scoped(.link);
 const macho = std.macho;
@@ -3233,6 +3194,7 @@ const Allocator = mem.Allocator;
 const Archive = @import("Archive.zig");
 const Atom = @import("Atom.zig");
 const Cie = eh_frame.Cie;
+const Dwarf = @import("Dwarf.zig");
 const Fde = eh_frame.Fde;
 const File = @import("file.zig").File;
 const LoadCommandIterator = macho.LoadCommandIterator;