Commit a8d137d05a

Luuk de Gram <luuk@degram.dev>
2022-09-06 20:21:33
wasm-linker: support incremental debug info
Although the wasm-linker previously already supported debug information in incremental-mode, this was no longer working as-is with the addition of supporting object-file-parsed debug information. This commit implements the Zig-created debug information structure from scratch which is a lot more robust and also allows being linked with debug information from other object files.
1 parent 971327d
Changed files (4)
src/link/Wasm/Atom.zig
@@ -190,7 +190,7 @@ fn relocationValue(self: Atom, relocation: types.Relocation, wasm_bin: *const Wa
             const target_atom = wasm_bin.symbol_atom.get(target_loc).?;
             const segment_info = if (target_atom.file) |object_index| blk: {
                 break :blk wasm_bin.objects.items[object_index].segment_info;
-            } else wasm_bin.segment_info.items;
+            } else wasm_bin.segment_info.values();
             const segment_name = segment_info[symbol.index].outputName(merge_segment);
             const segment_index = wasm_bin.data_segments.get(segment_name).?;
             const segment = wasm_bin.segments.items[segment_index];
src/link/Wasm/Object.zig
@@ -953,13 +953,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
             segment.alignment = std.math.max(segment.alignment, atom.alignment);
         }
 
-        if (wasm_bin.atoms.getPtr(final_index)) |last| {
-            last.*.next = atom;
-            atom.prev = last.*;
-            last.* = atom;
-        } else {
-            try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
-        }
+        try wasm_bin.appendAtomAtIndex(final_index, atom);
         log.debug("Parsed into atom: '{s}' at segment index {d}", .{ self.string_table.get(self.symtable[atom.sym_index].name), final_index });
     }
 }
src/link/Dwarf.zig
@@ -861,9 +861,7 @@ pub fn commitDeclState(
                             },
                             .wasm => {
                                 const wasm_file = file.cast(File.Wasm).?;
-                                const segment_index = wasm_file.debug_line_index.?;
-                                const atom = wasm_file.atoms.get(segment_index).?;
-                                const debug_line = atom.getFirstZigAtom().code;
+                                const debug_line = wasm_file.debug_line_atom.?.code;
                                 writeDbgLineNopsBuffered(debug_line.items, src_fn.off, 0, &.{}, src_fn.len);
                             },
                             else => unreachable,
@@ -975,24 +973,21 @@ pub fn commitDeclState(
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = wasm_file.debug_line_index.?;
-                    const segment = &wasm_file.segments.items[segment_index];
-                    const atom = wasm_file.atoms.get(segment_index).?;
-                    const debug_line = &atom.getFirstZigAtom().code;
-                    if (needed_size != segment.size) {
+                    const atom = wasm_file.debug_line_atom.?;
+                    const debug_line = &atom.code;
+                    const segment_size = debug_line.items.len;
+                    if (needed_size != segment_size) {
                         log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                        if (needed_size > segment.size) {
-                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment.size});
+                        if (needed_size > segment_size) {
+                            log.debug("  allocating {d} bytes for 'debug line' information", .{needed_size - segment_size});
                             try debug_line.resize(self.allocator, needed_size);
-                            mem.set(u8, debug_line.items[segment.size..], 0);
+                            mem.set(u8, debug_line.items[segment_size..], 0);
                         }
-                        segment.size = needed_size;
                         debug_line.items.len = needed_size;
                     }
-                    const offset = segment.offset + src_fn.off;
                     writeDbgLineNopsBuffered(
                         debug_line.items,
-                        offset,
+                        src_fn.off,
                         prev_padding_size,
                         dbg_line_buffer.items,
                         next_padding_size,
@@ -1150,12 +1145,8 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3
                     },
                     .wasm => {
                         const wasm_file = file.cast(File.Wasm).?;
-                        const segment_index = wasm_file.debug_info_index.?;
-                        const segment = &wasm_file.segments.items[segment_index];
-                        const info_atom = wasm_file.atoms.get(segment_index).?;
-                        const debug_info = &info_atom.getFirstZigAtom().code;
-                        const offset = segment.offset + atom.off;
-                        try writeDbgInfoNopsToArrayList(gpa, debug_info, offset, 0, &.{0}, atom.len, false);
+                        const debug_info = &wasm_file.debug_info_atom.?.code;
+                        try writeDbgInfoNopsToArrayList(gpa, debug_info, atom.off, 0, &.{0}, atom.len, false);
                     },
                     else => unreachable,
                 }
@@ -1282,28 +1273,25 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_info_index.?;
-            const segment = &wasm_file.segments.items[segment_index];
-            const info_atom = wasm_file.atoms.get(segment_index).?;
-            const debug_info = &info_atom.getFirstZigAtom().code;
-            if (needed_size != segment.size) {
+            const info_atom = wasm_file.debug_info_atom.?;
+            const debug_info = &info_atom.code;
+            const segment_size = debug_info.items.len;
+            if (needed_size != segment_size) {
                 log.debug(" needed size does not equal allocated size: {d}", .{needed_size});
-                if (needed_size > segment.size) {
-                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment.size});
+                if (needed_size > segment_size) {
+                    log.debug("  allocating {d} bytes for 'debug info' information", .{needed_size - segment_size});
                     try debug_info.resize(self.allocator, needed_size);
-                    mem.set(u8, debug_info.items[segment.size..], 0);
+                    mem.set(u8, debug_info.items[segment_size..], 0);
                 }
-                segment.size = needed_size;
                 debug_info.items.len = needed_size;
             }
-            const offset = segment.offset + atom.off;
             log.debug(" writeDbgInfoNopsToArrayList debug_info_len={d} offset={d} content_len={d} next_padding_size={d}", .{
-                debug_info.items.len, offset, dbg_info_buf.len, next_padding_size,
+                debug_info.items.len, atom.off, dbg_info_buf.len, next_padding_size,
             });
             try writeDbgInfoNopsToArrayList(
                 gpa,
                 debug_info,
-                offset,
+                atom.off,
                 prev_padding_size,
                 dbg_info_buf,
                 next_padding_size,
@@ -1344,10 +1332,8 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl)
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_line_index.?;
-            const segment = wasm_file.segments.items[segment_index];
-            const offset = segment.offset + decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
-            const atom = wasm_file.atoms.get(segment_index).?.getFirstZigAtom();
+            const offset = decl.fn_link.wasm.src_fn.off + self.getRelocDbgLineOff();
+            const atom = wasm_file.debug_line_atom.?;
             mem.copy(u8, atom.code.items[offset..], &data);
         },
         else => unreachable,
@@ -1584,8 +1570,7 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_abbrev_index.?;
-            const debug_abbrev = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_abbrev = &wasm_file.debug_abbrev_atom.?.code;
             try debug_abbrev.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_abbrev.items, &abbrev_buf);
         },
@@ -1697,8 +1682,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_info_index.?;
-            const debug_info = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_info = &wasm_file.debug_info_atom.?.code;
             try writeDbgInfoNopsToArrayList(self.allocator, debug_info, 0, 0, di_buf.items, jmp_amt, false);
         },
         else => unreachable,
@@ -2028,8 +2012,7 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_ranges_index.?;
-            const debug_ranges = &wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_ranges = &wasm_file.debug_ranges_atom.?.code;
             try debug_ranges.resize(wasm_file.base.allocator, needed_size);
             mem.copy(u8, debug_ranges.items, di_buf.items);
         },
@@ -2153,8 +2136,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void {
         },
         .wasm => {
             const wasm_file = file.cast(File.Wasm).?;
-            const segment_index = wasm_file.debug_line_index.?;
-            const debug_line = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+            const debug_line = wasm_file.debug_line_atom.?.code;
             writeDbgLineNopsBuffered(debug_line.items, 0, 0, di_buf.items, jmp_amt);
         },
         else => unreachable,
@@ -2303,8 +2285,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void {
                 },
                 .wasm => {
                     const wasm_file = file.cast(File.Wasm).?;
-                    const segment_index = wasm_file.debug_info_index.?;
-                    const debug_info = wasm_file.atoms.get(segment_index).?.getFirstZigAtom().code;
+                    const debug_info = wasm_file.debug_info_atom.?.code;
                     mem.copy(u8, debug_info.items[reloc.atom.off + reloc.offset ..], &buf);
                 },
                 else => unreachable,
src/link/Wasm.zig
@@ -95,9 +95,10 @@ imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{},
 segments: std.ArrayListUnmanaged(Segment) = .{},
 /// Maps a data segment key (such as .rodata) to the index into `segments`.
 data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
-/// A list of `types.Segment` which provide meta data
-/// about a data symbol such as its name
-segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
+/// A table of `types.Segment` which provide meta data
+/// about a data symbol such as its name where the key is
+/// the segment index, which can be found from `data_segments`
+segment_info: std.AutoArrayHashMapUnmanaged(u32, types.Segment) = .{},
 /// Deduplicated string table for strings used by symbols, imports and exports.
 string_table: StringTable = .{},
 /// Debug information for wasm
@@ -158,6 +159,19 @@ export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{},
 /// The actual table is populated during `flush`.
 error_table_symbol: ?u32 = null,
 
+// Debug section atoms. These are only set when the current compilation
+// unit contains Zig code. The lifetime of these atoms are extended
+// until the end of the compiler's lifetime. Meaning they're not freed
+// during `flush()` in incremental-mode.
+debug_info_atom: ?*Atom = null,
+debug_line_atom: ?*Atom = null,
+debug_loc_atom: ?*Atom = null,
+debug_ranges_atom: ?*Atom = null,
+debug_abbrev_atom: ?*Atom = null,
+debug_str_atom: ?*Atom = null,
+debug_pubnames_atom: ?*Atom = null,
+debug_pubtypes_atom: ?*Atom = null,
+
 pub const Segment = struct {
     alignment: u32,
     size: u32,
@@ -384,15 +398,16 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
 /// and symbols come from the object files instead.
 pub fn initDebugSections(self: *Wasm) !void {
     if (self.dwarf == null) return; // not compiling Zig code, so no need to pre-initialize debug sections
+    assert(self.debug_info_index == null);
     // this will create an Atom and set the index for us.
-    try self.createDebugSectionForIndex(&self.debug_info_index);
-    try self.createDebugSectionForIndex(&self.debug_line_index);
-    try self.createDebugSectionForIndex(&self.debug_loc_index);
-    try self.createDebugSectionForIndex(&self.debug_abbrev_index);
-    try self.createDebugSectionForIndex(&self.debug_ranges_index);
-    try self.createDebugSectionForIndex(&self.debug_str_index);
-    try self.createDebugSectionForIndex(&self.debug_pubnames_index);
-    try self.createDebugSectionForIndex(&self.debug_pubtypes_index);
+    self.debug_info_atom = try self.createDebugSectionForIndex(&self.debug_info_index, ".debug_info");
+    self.debug_line_atom = try self.createDebugSectionForIndex(&self.debug_line_index, ".debug_line");
+    self.debug_loc_atom = try self.createDebugSectionForIndex(&self.debug_loc_index, ".debug_loc");
+    self.debug_abbrev_atom = try self.createDebugSectionForIndex(&self.debug_abbrev_index, ".debug_abbrev");
+    self.debug_ranges_atom = try self.createDebugSectionForIndex(&self.debug_ranges_index, ".debug_ranges");
+    self.debug_str_atom = try self.createDebugSectionForIndex(&self.debug_str_index, ".debug_str");
+    self.debug_pubnames_atom = try self.createDebugSectionForIndex(&self.debug_pubnames_index, ".debug_pubnames");
+    self.debug_pubtypes_atom = try self.createDebugSectionForIndex(&self.debug_pubtypes_index, ".debug_pubtypes");
 }
 
 fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
@@ -676,7 +691,7 @@ pub fn deinit(self: *Wasm) void {
     for (self.func_types.items) |*func_type| {
         func_type.deinit(gpa);
     }
-    for (self.segment_info.items) |segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         gpa.free(segment_info.name);
     }
     for (self.objects.items) |*object| {
@@ -1364,16 +1379,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 const index = gop.value_ptr.*;
                 self.segments.items[index].size += atom.size;
 
-                // segment indexes can be off by 1 due to also containing a segment
-                // for the code section, so we must check if the existing segment
-                // is larger than that of the code section, and substract the index by 1 in such case.
-                var info_add = if (self.code_section_index) |idx| blk: {
-                    if (idx < index) break :blk @as(u32, 1);
-                    break :blk 0;
-                } else @as(u32, 0);
-                if (self.debug_info_index != null) info_add += 1;
-                if (self.debug_line_index != null) info_add += 1;
-                symbol.index = index - info_add;
+                symbol.index = @intCast(u32, self.segment_info.getIndex(index).?);
                 // segment info already exists, so free its memory
                 self.base.allocator.free(segment_name);
                 break :result index;
@@ -1386,8 +1392,8 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
                 });
                 gop.value_ptr.* = index;
 
-                const info_index = @intCast(u32, self.segment_info.items.len);
-                try self.segment_info.append(self.base.allocator, segment_info);
+                const info_index = @intCast(u32, self.segment_info.count());
+                try self.segment_info.put(self.base.allocator, index, segment_info);
                 symbol.index = info_index;
                 break :result index;
             }
@@ -1397,18 +1403,54 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
     const segment: *Segment = &self.segments.items[final_index];
     segment.alignment = std.math.max(segment.alignment, atom.alignment);
 
-    if (self.atoms.getPtr(final_index)) |last| {
+    try self.appendAtomAtIndex(final_index, atom);
+}
+
+/// From a given index, append the given `Atom` at the back of the linked list.
+/// Simply inserts it into the map of atoms when it doesn't exist yet.
+pub fn appendAtomAtIndex(self: *Wasm, index: u32, atom: *Atom) !void {
+    if (self.atoms.getPtr(index)) |last| {
         last.*.next = atom;
         atom.prev = last.*;
         last.* = atom;
     } else {
-        try self.atoms.putNoClobber(self.base.allocator, final_index, atom);
+        try self.atoms.putNoClobber(self.base.allocator, index, atom);
     }
 }
 
+/// Allocates debug atoms into their respective debug sections
+/// to merge them with maybe-existing debug atoms from object files.
+fn allocateDebugAtoms(self: *Wasm) !void {
+    if (self.dwarf == null) return;
+
+    const allocAtom = struct {
+        fn f(bin: *Wasm, maybe_index: *?u32, atom: *Atom) !void {
+            const index = maybe_index.* orelse idx: {
+                const index = @intCast(u32, bin.segments.items.len);
+                try bin.appendDummySegment();
+                maybe_index.* = index;
+                break :idx index;
+            };
+            atom.size = @intCast(u32, atom.code.items.len);
+            bin.symbols.items[atom.sym_index].index = index;
+            try bin.appendAtomAtIndex(index, atom);
+        }
+    }.f;
+
+    try allocAtom(self, &self.debug_info_index, self.debug_info_atom.?);
+    try allocAtom(self, &self.debug_line_index, self.debug_line_atom.?);
+    try allocAtom(self, &self.debug_loc_index, self.debug_loc_atom.?);
+    try allocAtom(self, &self.debug_str_index, self.debug_str_atom.?);
+    try allocAtom(self, &self.debug_ranges_index, self.debug_ranges_atom.?);
+    try allocAtom(self, &self.debug_abbrev_index, self.debug_abbrev_atom.?);
+    try allocAtom(self, &self.debug_pubnames_index, self.debug_pubnames_atom.?);
+    try allocAtom(self, &self.debug_pubtypes_index, self.debug_pubtypes_atom.?);
+}
+
 fn allocateAtoms(self: *Wasm) !void {
     // first sort the data segments
     try sortDataSegments(self);
+    try allocateDebugAtoms(self);
 
     var it = self.atoms.iterator();
     while (it.next()) |entry| {
@@ -1426,7 +1468,7 @@ fn allocateAtoms(self: *Wasm) !void {
                 atom.size,
             });
             offset += atom.size;
-            self.symbol_atom.putAssumeCapacity(atom.symbolLoc(), atom); // Update atom pointers
+            try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom); // Update atom pointers
             atom = atom.next orelse break;
         }
         segment.size = std.mem.alignForwardGeneric(u32, offset, segment.alignment);
@@ -1989,20 +2031,35 @@ fn populateErrorNameTable(self: *Wasm) !void {
 /// From a given index variable, creates a new debug section.
 /// This initializes the index, appends a new segment,
 /// and finally, creates a managed `Atom`.
-pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32) !void {
+pub fn createDebugSectionForIndex(self: *Wasm, index: *?u32, name: []const u8) !*Atom {
     const new_index = @intCast(u32, self.segments.items.len);
     index.* = new_index;
     try self.appendDummySegment();
+    // _ = index;
+
+    const sym_index = self.symbols_free_list.popOrNull() orelse idx: {
+        const tmp_index = @intCast(u32, self.symbols.items.len);
+        _ = try self.symbols.addOne(self.base.allocator);
+        break :idx tmp_index;
+    };
+    self.symbols.items[sym_index] = .{
+        .tag = .section,
+        .name = try self.string_table.put(self.base.allocator, name),
+        .index = 0,
+        .flags = @enumToInt(Symbol.Flag.WASM_SYM_BINDING_LOCAL),
+    };
 
     const atom = try self.base.allocator.create(Atom);
     atom.* = Atom.empty;
     atom.alignment = 1; // debug sections are always 1-byte-aligned
+    atom.sym_index = sym_index;
     try self.managed_atoms.append(self.base.allocator, atom);
-    try self.atoms.put(self.base.allocator, new_index, atom);
+    try self.symbol_atom.put(self.base.allocator, atom.symbolLoc(), atom);
+    return atom;
 }
 
 fn resetState(self: *Wasm) void {
-    for (self.segment_info.items) |*segment_info| {
+    for (self.segment_info.values()) |segment_info| {
         self.base.allocator.free(segment_info.name);
     }
     if (self.base.options.module) |mod| {
@@ -2029,6 +2086,12 @@ fn resetState(self: *Wasm) void {
     self.code_section_index = null;
     self.debug_info_index = null;
     self.debug_line_index = null;
+    self.debug_loc_index = null;
+    self.debug_str_index = null;
+    self.debug_ranges_index = null;
+    self.debug_abbrev_index = null;
+    self.debug_pubnames_index = null;
+    self.debug_pubtypes_index = null;
 }
 
 pub fn flush(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -2508,26 +2571,31 @@ pub fn flushModule(self: *Wasm, comp: *Compilation, prog_node: *std.Progress.Nod
         var debug_bytes = std.ArrayList(u8).init(self.base.allocator);
         defer debug_bytes.deinit();
 
-        const debug_sections = .{
-            .{ ".debug_info", self.debug_info_index },
-            .{ ".debug_pubtypes", self.debug_pubtypes_index },
-            .{ ".debug_abbrev", self.debug_abbrev_index },
-            .{ ".debug_line", self.debug_line_index },
-            .{ ".debug_str", self.debug_str_index },
-            .{ ".debug_pubnames", self.debug_pubnames_index },
-            .{ ".debug_loc", self.debug_loc_index },
-            .{ ".debug_ranges", self.debug_ranges_index },
+        const DebugSection = struct {
+            name: []const u8,
+            index: ?u32,
+        };
+
+        const debug_sections: []const DebugSection = &.{
+            .{ .name = ".debug_info", .index = self.debug_info_index },
+            .{ .name = ".debug_pubtypes", .index = self.debug_pubtypes_index },
+            .{ .name = ".debug_abbrev", .index = self.debug_abbrev_index },
+            .{ .name = ".debug_line", .index = self.debug_line_index },
+            .{ .name = ".debug_str", .index = self.debug_str_index },
+            .{ .name = ".debug_pubnames", .index = self.debug_pubnames_index },
+            .{ .name = ".debug_loc", .index = self.debug_loc_index },
+            .{ .name = ".debug_ranges", .index = self.debug_ranges_index },
         };
 
-        inline for (debug_sections) |item| {
-            if (item[1]) |index| {
+        for (debug_sections) |item| {
+            if (item.index) |index| {
                 var atom = self.atoms.get(index).?.getFirst();
                 while (true) {
                     atom.resolveRelocs(self);
                     try debug_bytes.appendSlice(atom.code.items);
                     atom = atom.next orelse break;
                 }
-                try emitDebugSection(file, debug_bytes.items, item[0]);
+                try emitDebugSection(file, debug_bytes.items, item.name);
                 debug_bytes.clearRetainingCapacity();
             }
         }
@@ -3242,8 +3310,8 @@ fn emitSegmentInfo(self: *Wasm, file: fs.File, arena: Allocator) !void {
     var payload = std.ArrayList(u8).init(arena);
     const writer = payload.writer();
     try leb.writeULEB128(file.writer(), @enumToInt(types.SubsectionType.WASM_SEGMENT_INFO));
-    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.items.len));
-    for (self.segment_info.items) |segment_info| {
+    try leb.writeULEB128(writer, @intCast(u32, self.segment_info.count()));
+    for (self.segment_info.values()) |segment_info| {
         log.debug("Emit segment: {s} align({d}) flags({b})", .{
             segment_info.name,
             @ctz(segment_info.alignment),