Commit `400faec10b`

Jakub Konka <kubkon@jakubkonka.com>

2023-10-18 23:14:23

dwarf: introduce Dwarf.Format to be able to select 32/64bit format at whim

master

1 parent 4f66efd

Changed files (4)

src

link

@@ -1,32 +1,7 @@
-const Dwarf = @This();
-
-const std = @import("std");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const fs = std.fs;
-const leb128 = std.leb;
-const log = std.log.scoped(.dwarf);
-const mem = std.mem;
-
-const link = @import("../link.zig");
-const trace = @import("../tracy.zig").trace;
-
-const Allocator = mem.Allocator;
-const DW = std.dwarf;
-const File = link.File;
-const LinkBlock = File.LinkBlock;
-const LinkFn = File.LinkFn;
-const LinkerLoad = @import("../codegen.zig").LinkerLoad;
-const Module = @import("../Module.zig");
-const InternPool = @import("../InternPool.zig");
-const StringTable = @import("strtab.zig").StringTable;
-const Type = @import("../type.zig").Type;
-const Value = @import("../value.zig").Value;
-
 allocator: Allocator,
 bin_file: *File,
+format: Format,
 ptr_width: PtrWidth,
-target: std.Target,
 
 /// A list of `Atom`s whose Line Number Programs have surplus capacity.
 /// This is the same concept as `Section.free_list` in Elf; see those doc comments.
@@ -983,17 +958,17 @@ const min_nop_size = 2;
 /// actual_capacity + (actual_capacity / ideal_factor)
 const ideal_factor = 3;
 
-pub fn init(allocator: Allocator, bin_file: *File, target: std.Target) Dwarf {
-    const ptr_width: PtrWidth = switch (target.ptrBitWidth()) {
+pub fn init(allocator: Allocator, bin_file: *File, format: Format) Dwarf {
+    const ptr_width: PtrWidth = switch (bin_file.options.target.ptrBitWidth()) {
         0...32 => .p32,
         33...64 => .p64,
         else => unreachable,
     };
-    return Dwarf{
+    return .{
         .allocator = allocator,
         .bin_file = bin_file,
+        .format = format,
         .ptr_width = ptr_width,
-        .target = target,
     };
 }
 
@@ -1129,7 +1104,7 @@ pub fn commitDeclState(
     const decl = mod.declPtr(decl_index);
     const ip = &mod.intern_pool;
 
-    const target_endian = self.target.cpu.arch.endian();
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
 
     assert(decl.has_tv);
     switch (decl.ty.zigTypeTag(mod)) {
@@ -1837,12 +1812,10 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
     var di_buf = try std.ArrayList(u8).initCapacity(self.allocator, needed_bytes);
     defer di_buf.deinit();
 
-    const target_endian = self.target.cpu.arch.endian();
-    const init_len_size: usize = if (self.bin_file.tag == .macho)
-        4
-    else switch (self.ptr_width) {
-        .p32 => @as(usize, 4),
-        .p64 => 12,
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
+    const init_len_size: usize = switch (self.format) {
+        .dwarf32 => 4,
+        .dwarf64 => 12,
     };
 
     // initial length - length of the .debug_info contribution for this compilation unit,
@@ -1851,32 +1824,16 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
     const after_init_len = di_buf.items.len + init_len_size;
     const dbg_info_end = self.getDebugInfoEnd().?;
     const init_len = dbg_info_end - after_init_len;
-    if (self.bin_file.tag == .macho) {
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(init_len)));
-    } else switch (self.ptr_width) {
-        .p32 => {
-            mem.writeInt(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(init_len)), target_endian);
-        },
-        .p64 => {
-            di_buf.appendNTimesAssumeCapacity(0xff, 4);
-            mem.writeInt(u64, di_buf.addManyAsArrayAssumeCapacity(8), init_len, target_endian);
-        },
-    }
+
+    if (self.format == .dwarf64) di_buf.appendNTimesAssumeCapacity(0xff, 4);
+    self.writeOffsetAssumeCapacity(&di_buf, init_len);
+
     mem.writeInt(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4, target_endian); // DWARF version
     const abbrev_offset = self.abbrev_table_offset.?;
-    if (self.bin_file.tag == .macho) {
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(abbrev_offset)));
-        di_buf.appendAssumeCapacity(8); // address size
-    } else switch (self.ptr_width) {
-        .p32 => {
-            mem.writeInt(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(abbrev_offset)), target_endian);
-            di_buf.appendAssumeCapacity(4); // address size
-        },
-        .p64 => {
-            mem.writeInt(u64, di_buf.addManyAsArrayAssumeCapacity(8), abbrev_offset, target_endian);
-            di_buf.appendAssumeCapacity(8); // address size
-        },
-    }
+
+    self.writeOffsetAssumeCapacity(&di_buf, abbrev_offset);
+    di_buf.appendAssumeCapacity(self.ptrWidthBytes()); // address size
+
     // Write the form for the compile unit, which must match the abbrev table above.
     const name_strp = try self.strtab.insert(self.allocator, module.root_mod.root_src_path);
     var compile_unit_dir_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
@@ -1885,21 +1842,13 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u
     const producer_strp = try self.strtab.insert(self.allocator, link.producer_string);
 
     di_buf.appendAssumeCapacity(@intFromEnum(AbbrevKind.compile_unit));
-    if (self.bin_file.tag == .macho) {
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); // DW.AT.stmt_list, DW.FORM.sec_offset
-        mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), low_pc);
-        mem.writeIntLittle(u64, di_buf.addManyAsArrayAssumeCapacity(8), high_pc);
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(name_strp)));
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(comp_dir_strp)));
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(producer_strp)));
-    } else {
-        self.writeAddrAssumeCapacity(&di_buf, 0); // DW.AT.stmt_list, DW.FORM.sec_offset
-        self.writeAddrAssumeCapacity(&di_buf, low_pc);
-        self.writeAddrAssumeCapacity(&di_buf, high_pc);
-        self.writeAddrAssumeCapacity(&di_buf, name_strp);
-        self.writeAddrAssumeCapacity(&di_buf, comp_dir_strp);
-        self.writeAddrAssumeCapacity(&di_buf, producer_strp);
-    }
+    self.writeOffsetAssumeCapacity(&di_buf, 0); // DW.AT.stmt_list, DW.FORM.sec_offset
+    self.writeAddrAssumeCapacity(&di_buf, low_pc);
+    self.writeAddrAssumeCapacity(&di_buf, high_pc);
+    self.writeOffsetAssumeCapacity(&di_buf, name_strp);
+    self.writeOffsetAssumeCapacity(&di_buf, comp_dir_strp);
+    self.writeOffsetAssumeCapacity(&di_buf, producer_strp);
+
     // We are still waiting on dwarf-std.org to assign DW_LANG_Zig a number:
     // http://dwarfstd.org/ShowIssue.php?issue=171115.1
     // Until then we say it is C99.
@@ -1952,13 +1901,26 @@ fn resolveCompilationDir(module: *Module, buffer: *[std.fs.MAX_PATH_BYTES]u8) []
 }
 
 fn writeAddrAssumeCapacity(self: *Dwarf, buf: *std.ArrayList(u8), addr: u64) void {
-    const target_endian = self.target.cpu.arch.endian();
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
     switch (self.ptr_width) {
         .p32 => mem.writeInt(u32, buf.addManyAsArrayAssumeCapacity(4), @as(u32, @intCast(addr)), target_endian),
         .p64 => mem.writeInt(u64, buf.addManyAsArrayAssumeCapacity(8), addr, target_endian),
     }
 }
 
+fn writeOffsetAssumeCapacity(self: *Dwarf, buf: *std.ArrayList(u8), off: u64) void {
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
+    switch (self.format) {
+        .dwarf32 => mem.writeInt(
+            u32,
+            buf.addManyAsArrayAssumeCapacity(4),
+            @as(u32, @intCast(off)),
+            target_endian,
+        ),
+        .dwarf64 => mem.writeInt(u64, buf.addManyAsArrayAssumeCapacity(8), off, target_endian),
+    }
+}
+
 /// Writes to the file a buffer, prefixed and suffixed by the specified number of
 /// bytes of NOPs. Asserts each padding size is at least `min_nop_size` and total padding bytes
 /// are less than 1044480 bytes (if this limit is ever reached, this function can be
@@ -2174,13 +2136,7 @@ fn writeDbgInfoNopsToArrayList(
 }
 
 pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
-    const target_endian = self.target.cpu.arch.endian();
-    const init_len_size: usize = if (self.bin_file.tag == .macho)
-        4
-    else switch (self.ptr_width) {
-        .p32 => @as(usize, 4),
-        .p64 => 12,
-    };
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
     const ptr_width_bytes = self.ptrWidthBytes();
 
     // Enough for all the data without resizing. When support for more compilation units
@@ -2191,17 +2147,15 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
     // initial length - length of the .debug_aranges contribution for this compilation unit,
     // not including the initial length itself.
     // We have to come back and write it later after we know the size.
+    if (self.format == .dwarf64) di_buf.appendNTimesAssumeCapacity(0xff, 4);
     const init_len_index = di_buf.items.len;
-    di_buf.items.len += init_len_size;
+    self.writeOffsetAssumeCapacity(&di_buf, 0);
     const after_init_len = di_buf.items.len;
     mem.writeInt(u16, di_buf.addManyAsArrayAssumeCapacity(2), 2, target_endian); // version
+
     // When more than one compilation unit is supported, this will be the offset to it.
     // For now it is always at offset 0 in .debug_info.
-    if (self.bin_file.tag == .macho) {
-        mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), 0); // __debug_info offset
-    } else {
-        self.writeAddrAssumeCapacity(&di_buf, 0); // .debug_info offset
-    }
+    self.writeOffsetAssumeCapacity(&di_buf, 0); // .debug_info offset
     di_buf.appendAssumeCapacity(ptr_width_bytes); // address_size
     di_buf.appendAssumeCapacity(0); // segment_selector_size
 
@@ -2220,18 +2174,14 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
 
     // Go back and populate the initial length.
     const init_len = di_buf.items.len - after_init_len;
-    if (self.bin_file.tag == .macho) {
-        mem.writeIntLittle(u32, di_buf.items[init_len_index..][0..4], @as(u32, @intCast(init_len)));
-    } else switch (self.ptr_width) {
-        .p32 => {
-            mem.writeInt(u32, di_buf.items[init_len_index..][0..4], @as(u32, @intCast(init_len)), target_endian);
-        },
-        .p64 => {
-            // initial length - length of the .debug_aranges contribution for this compilation unit,
-            // not including the initial length itself.
-            di_buf.items[init_len_index..][0..4].* = [_]u8{ 0xff, 0xff, 0xff, 0xff };
-            mem.writeInt(u64, di_buf.items[init_len_index + 4 ..][0..8], init_len, target_endian);
-        },
+    switch (self.format) {
+        .dwarf32 => mem.writeInt(
+            u32,
+            di_buf.items[init_len_index..][0..4],
+            @as(u32, @intCast(init_len)),
+            target_endian,
+        ),
+        .dwarf64 => mem.writeInt(u64, di_buf.items[init_len_index..][0..8], init_len, target_endian),
     }
 
     const needed_size = @as(u32, @intCast(di_buf.items.len));
@@ -2265,12 +2215,10 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void {
 pub fn writeDbgLineHeader(self: *Dwarf) !void {
     const gpa = self.allocator;
 
-    const target_endian = self.target.cpu.arch.endian();
-    const init_len_size: usize = if (self.bin_file.tag == .macho)
-        4
-    else switch (self.ptr_width) {
-        .p32 => @as(usize, 4),
-        .p64 => 12,
+    const target_endian = self.bin_file.options.target.cpu.arch.endian();
+    const init_len_size: usize = switch (self.format) {
+        .dwarf32 => 4,
+        .dwarf64 => 12,
     };
 
     const dbg_line_prg_off = self.getDebugLineProgramOff() orelse return;
@@ -2288,20 +2236,8 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void {
     var di_buf = try std.ArrayList(u8).initCapacity(gpa, needed_bytes);
     defer di_buf.deinit();
 
-    switch (self.bin_file.tag) {
-        .macho => {
-            mem.writeIntLittle(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, 0));
-        },
-        else => switch (self.ptr_width) {
-            .p32 => {
-                mem.writeInt(u32, di_buf.addManyAsArrayAssumeCapacity(4), @as(u32, 0), target_endian);
-            },
-            .p64 => {
-                di_buf.appendNTimesAssumeCapacity(0xff, 4);
-                mem.writeInt(u64, di_buf.addManyAsArrayAssumeCapacity(8), @as(u64, 0), target_endian);
-            },
-        },
-    }
+    if (self.format == .dwarf64) di_buf.appendNTimesAssumeCapacity(0xff, 4);
+    self.writeOffsetAssumeCapacity(&di_buf, 0);
 
     mem.writeInt(u16, di_buf.addManyAsArrayAssumeCapacity(2), 4, target_endian); // version
 
@@ -2310,16 +2246,7 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void {
     // Therefore we rely on the NOP jump at the beginning of the Line Number Program for
     // padding rather than this field.
     const before_header_len = di_buf.items.len;
-
-    // We will come back and write this.
-    switch (self.bin_file.tag) {
-        .macho => di_buf.appendNTimesAssumeCapacity(0, 4),
-        else => switch (self.ptr_width) {
-            .p32 => di_buf.appendNTimesAssumeCapacity(0, 4),
-            .p64 => di_buf.appendNTimesAssumeCapacity(0, 8),
-        },
-    }
-
+    self.writeOffsetAssumeCapacity(&di_buf, 0); // We will come back and write this.
     const after_header_len = di_buf.items.len;
 
     const opcode_base = DW.LNS.set_isa + 1;
@@ -2372,19 +2299,14 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void {
     di_buf.appendAssumeCapacity(0); // file names sentinel
 
     const header_len = di_buf.items.len - after_header_len;
-
-    switch (self.bin_file.tag) {
-        .macho => {
-            mem.writeIntLittle(u32, di_buf.items[before_header_len..][0..4], @as(u32, @intCast(header_len)));
-        },
-        else => switch (self.ptr_width) {
-            .p32 => {
-                mem.writeInt(u32, di_buf.items[before_header_len..][0..4], @as(u32, @intCast(header_len)), target_endian);
-            },
-            .p64 => {
-                mem.writeInt(u64, di_buf.items[before_header_len..][0..8], header_len, target_endian);
-            },
-        },
+    switch (self.format) {
+        .dwarf32 => mem.writeInt(
+            u32,
+            di_buf.items[before_header_len..][0..4],
+            @as(u32, @intCast(header_len)),
+            target_endian,
+        ),
+        .dwarf64 => mem.writeInt(u64, di_buf.items[before_header_len..][0..8], header_len, target_endian),
     }
 
     assert(needed_bytes == di_buf.items.len);
@@ -2453,17 +2375,12 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void {
 
     // Backpatch actual length of the debug line program
     const init_len = self.getDebugLineProgramEnd().? - init_len_size;
-    switch (self.bin_file.tag) {
-        .macho => {
-            mem.writeIntLittle(u32, di_buf.items[0..4], @as(u32, @intCast(init_len)));
+    switch (self.format) {
+        .dwarf32 => {
+            mem.writeInt(u32, di_buf.items[0..4], @as(u32, @intCast(init_len)), target_endian);
         },
-        else => switch (self.ptr_width) {
-            .p32 => {
-                mem.writeInt(u32, di_buf.items[0..4], @as(u32, @intCast(init_len)), target_endian);
-            },
-            .p64 => {
-                mem.writeInt(u64, di_buf.items[4..][0..8], init_len, target_endian);
-            },
+        .dwarf64 => {
+            mem.writeInt(u64, di_buf.items[4..][0..8], init_len, target_endian);
         },
     }
 
@@ -2524,17 +2441,14 @@ fn ptrWidthBytes(self: Dwarf) u8 {
 }
 
 fn dbgLineNeededHeaderBytes(self: Dwarf, dirs: []const []const u8, files: []const []const u8) u32 {
-    var size = switch (self.bin_file.tag) { // length field
-        .macho => @sizeOf(u32),
-        else => switch (self.ptr_width) {
-            .p32 => @as(usize, @sizeOf(u32)),
-            .p64 => @sizeOf(u32) + @sizeOf(u64),
-        },
+    var size: usize = switch (self.format) { // length field
+        .dwarf32 => @as(usize, 4),
+        .dwarf64 => 12,
     };
     size += @sizeOf(u16); // version field
-    size += switch (self.bin_file.tag) { // offset to end-of-header
-        .macho => @sizeOf(u32),
-        else => self.ptrWidthBytes(),
+    size += switch (self.format) { // offset to end-of-header
+        .dwarf32 => @as(usize, 4),
+        .dwarf64 => 8,
     };
     size += 18; // opcodes
 
@@ -2570,6 +2484,8 @@ fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 }
 
 pub fn flushModule(self: *Dwarf, module: *Module) !void {
+    const target = self.bin_file.options.target;
+
     if (self.global_abbrev_relocs.items.len > 0) {
         const gpa = self.allocator;
         var arena_alloc = std.heap.ArenaAllocator.init(gpa);
@@ -2581,7 +2497,7 @@ pub fn flushModule(self: *Dwarf, module: *Module) !void {
             module,
             Type.anyerror,
             module.global_error_set.keys(),
-            self.target,
+            target,
             &dbg_info_buffer,
         );
 
@@ -2610,7 +2526,7 @@ pub fn flushModule(self: *Dwarf, module: *Module) !void {
         };
 
         var buf: [@sizeOf(u32)]u8 = undefined;
-        mem.writeInt(u32, &buf, self.getAtom(.di_atom, di_atom_index).off, self.target.cpu.arch.endian());
+        mem.writeInt(u32, &buf, self.getAtom(.di_atom, di_atom_index).off, target.cpu.arch.endian());
 
         while (self.global_abbrev_relocs.popOrNull()) |reloc| {
             const atom = self.getAtom(.di_atom, reloc.atom_index);
@@ -2805,3 +2721,33 @@ fn getAtomPtr(self: *Dwarf, comptime kind: Kind, index: Atom.Index) *Atom {
         .di_atom => &self.di_atoms.items[index],
     };
 }
+
+pub const Format = enum {
+    dwarf32,
+    dwarf64,
+};
+
+const Dwarf = @This();
+
+const std = @import("std");
+const builtin = @import("builtin");
+const assert = std.debug.assert;
+const fs = std.fs;
+const leb128 = std.leb;
+const log = std.log.scoped(.dwarf);
+const mem = std.mem;
+
+const link = @import("../link.zig");
+const trace = @import("../tracy.zig").trace;
+
+const Allocator = mem.Allocator;
+const DW = std.dwarf;
+const File = link.File;
+const LinkBlock = File.LinkBlock;
+const LinkFn = File.LinkFn;
+const LinkerLoad = @import("../codegen.zig").LinkerLoad;
+const Module = @import("../Module.zig");
+const InternPool = @import("../InternPool.zig");
+const StringTable = @import("strtab.zig").StringTable;
+const Type = @import("../type.zig").Type;
+const Value = @import("../value.zig").Value;

@@ -310,7 +310,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
 
     if (options.module != null and !options.use_llvm) {
         if (!options.strip) {
-            self.dwarf = Dwarf.init(allocator, &self.base, options.target);
+            self.dwarf = Dwarf.init(allocator, &self.base, .dwarf32);
         }
 
         const index = @as(File.Index, @intCast(try self.files.addOne(allocator)));

@@ -206,7 +206,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO {
 
         self.d_sym = .{
             .allocator = allocator,
-            .dwarf = link.File.Dwarf.init(allocator, &self.base, options.target),
+            .dwarf = link.File.Dwarf.init(allocator, &self.base, .dwarf32),
             .file = d_sym_file,
         };
     }

@@ -507,7 +507,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
     }
 
     // if (!options.strip and options.module != null) {
-    //     wasm_bin.dwarf = Dwarf.init(allocator, &wasm_bin.base, options.target);
+    //     wasm_bin.dwarf = Dwarf.init(allocator, &wasm_bin.base, .dwarf32);
     //     try wasm_bin.initDebugSections();
     // }

Commit 400faec10b

Commit `400faec10b`