Commit 7fe629a812

Luuk de Gram <luuk@degram.dev>
2024-01-10 06:35:50
wasm-linker: delay code atom allocation till write
We delay atom allocation for the code section until we write the actual atoms. We do this to ensure the offset of the atom also includes the 'size' field which is leb128-encoded and therefore variable. We need this correct offset to ensure debug info works correctly. The ordering of the code section is now automatic due to iterating the function section and then finding the corresponding atom to each function. This also ensures each function corresponds to the right atom, and they do not go out-of-sync. Lastly, we removed the `next` field as it is no longer required and also removed manually setting the offset in synthetic functions. This means atoms use less memory and synthetic functions are less prone. They will also be placed in order of function order correctly.
1 parent 2b3e6f6
Changed files (2)
src
src/link/Wasm/Atom.zig
@@ -26,18 +26,12 @@ offset: u32,
 /// The original offset within the object file. This value is substracted from
 /// relocation offsets to determine where in the `data` to rewrite the value
 original_offset: u32,
-
 /// Represents the index of the file this atom was generated from.
 /// This is 'null' when the atom was generated by a Decl from Zig code.
 file: ?u16,
-
-/// Next atom in relation to this atom.
-/// When null, this atom is the last atom
-next: ?Atom.Index,
 /// Previous atom in relation to this atom.
 /// is null when this atom is the first in its order
 prev: ?Atom.Index,
-
 /// Contains atoms local to a decl, all managed by this `Atom`.
 /// When the parent atom is being freed, it will also do so for all local atoms.
 locals: std.ArrayListUnmanaged(Atom.Index) = .{},
@@ -49,7 +43,6 @@ pub const Index = u32;
 pub const empty: Atom = .{
     .alignment = .@"1",
     .file = null,
-    .next = null,
     .offset = 0,
     .prev = null,
     .size = 0,
src/link/Wasm.zig
@@ -2054,6 +2054,7 @@ pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void {
     const decl = mod.declPtr(decl_index);
     const atom_index = wasm.decls.get(decl_index).?;
     const atom = wasm.getAtomPtr(atom_index);
+    atom.prev = null;
     wasm.symbols_free_list.append(gpa, atom.sym_index) catch {};
     _ = wasm.decls.remove(decl_index);
     wasm.symbols.items[atom.sym_index].tag = .dead;
@@ -2076,16 +2077,6 @@ pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void {
     //     dwarf.freeDecl(decl_index);
     // }
 
-    if (atom.next) |next_atom_index| {
-        const next_atom = wasm.getAtomPtr(next_atom_index);
-        next_atom.prev = atom.prev;
-        atom.next = null;
-    }
-    if (atom.prev) |prev_index| {
-        const prev_atom = wasm.getAtomPtr(prev_index);
-        prev_atom.next = atom.next;
-        atom.prev = null;
-    }
 }
 
 /// Appends a new entry to the indirect function table
@@ -2327,8 +2318,6 @@ pub fn appendAtomAtIndex(wasm: *Wasm, index: u32, atom_index: Atom.Index) !void
     const gpa = wasm.base.comp.gpa;
     const atom = wasm.getAtomPtr(atom_index);
     if (wasm.atoms.getPtr(index)) |last_index_ptr| {
-        const last = wasm.getAtomPtr(last_index_ptr.*);
-        last.*.next = atom_index;
         atom.prev = last_index_ptr.*;
         last_index_ptr.* = atom_index;
     } else {
@@ -2375,6 +2364,11 @@ fn allocateAtoms(wasm: *Wasm) !void {
     while (it.next()) |entry| {
         const segment = &wasm.segments.items[entry.key_ptr.*];
         var atom_index = entry.value_ptr.*;
+        if (entry.key_ptr.* == wasm.code_section_index) {
+            // Code section is allocated upon writing as they are required to be ordered
+            // to synchronise with the function section.
+            continue;
+        }
         var offset: u32 = 0;
         while (true) {
             const atom = wasm.getAtomPtr(atom_index);
@@ -2387,28 +2381,17 @@ fn allocateAtoms(wasm: *Wasm) !void {
                 break :sym object.symtable[symbol_loc.index];
             } else wasm.symbols.items[symbol_loc.index];
 
+            // Dead symbols must be unlinked from the linked-list to prevent them
+            // from being emit into the binary.
             if (sym.isDead()) {
-                // Dead symbols must be unlinked from the linked-list to prevent them
-                // from being emit into the binary.
-                if (atom.next) |next_index| {
-                    const next = wasm.getAtomPtr(next_index);
-                    next.prev = atom.prev;
-                } else if (entry.value_ptr.* == atom_index) {
+                if (entry.value_ptr.* == atom_index and atom.prev != null) {
                     // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update
                     // the entry to point it to the previous atom to ensure we do not start with a dead symbol that
                     // was removed and therefore do not emit any code at all.
-                    if (atom.prev) |prev| {
-                        entry.value_ptr.* = prev;
-                    }
+                    entry.value_ptr.* = atom.prev.?;
                 }
-                atom_index = atom.prev orelse {
-                    atom.next = null;
-                    break;
-                };
-                const prev = wasm.getAtomPtr(atom_index);
-                prev.next = atom.next;
+                atom_index = atom.prev orelse break;
                 atom.prev = null;
-                atom.next = null;
                 continue;
             }
             offset = @intCast(atom.alignment.forward(offset));
@@ -2546,16 +2529,6 @@ fn setupErrorsLen(wasm: *Wasm) !void {
     // if not, allcoate a new atom.
     const atom_index = if (wasm.symbol_atom.get(loc)) |index| blk: {
         const atom = wasm.getAtomPtr(index);
-        if (atom.next) |next_atom_index| {
-            const next_atom = wasm.getAtomPtr(next_atom_index);
-            next_atom.prev = atom.prev;
-            atom.next = null;
-        }
-        if (atom.prev) |prev_index| {
-            const prev_atom = wasm.getAtomPtr(prev_index);
-            prev_atom.next = atom.next;
-            atom.prev = null;
-        }
         atom.deinit(gpa);
         break :blk index;
     } else new_atom: {
@@ -2658,18 +2631,12 @@ fn createSyntheticFunction(
         .sym_index = loc.index,
         .file = null,
         .alignment = .@"1",
-        .next = null,
         .prev = null,
         .code = function_body.moveToUnmanaged(),
         .original_offset = 0,
     };
     try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index);
     try wasm.symbol_atom.putNoClobber(gpa, loc, atom_index);
-
-    // `allocateAtoms` has already been called, set the atom's offset manually.
-    // This is fine to do manually as we insert the atom at the very end.
-    const prev_atom = wasm.getAtom(atom.prev.?);
-    atom.offset = prev_atom.offset + prev_atom.size;
 }
 
 /// Unlike `createSyntheticFunction` this function is to be called by
@@ -2695,7 +2662,6 @@ pub fn createFunction(
         .sym_index = loc.index,
         .file = null,
         .alignment = .@"1",
-        .next = null,
         .prev = null,
         .code = function_body.moveToUnmanaged(),
         .relocs = relocations.moveToUnmanaged(),
@@ -3452,12 +3418,10 @@ fn resetState(wasm: *Wasm) void {
     var atom_it = wasm.decls.valueIterator();
     while (atom_it.next()) |atom_index| {
         const atom = wasm.getAtomPtr(atom_index.*);
-        atom.next = null;
         atom.prev = null;
 
         for (atom.locals.items) |local_atom_index| {
             const local_atom = wasm.getAtomPtr(local_atom_index);
-            local_atom.next = null;
             local_atom.prev = null;
         }
     }
@@ -4085,46 +4049,29 @@ fn writeToFile(
     }
 
     // Code section
-    var code_section_size: u32 = 0;
-    if (wasm.code_section_index) |code_index| {
+    if (wasm.code_section_index != null) {
         const header_offset = try reserveVecSectionHeader(&binary_bytes);
-        var atom_index = wasm.atoms.get(code_index).?;
+        const start_offset = binary_bytes.items.len - 5; // minus 5 so start offset is 5 to include entry count
 
-        // The code section must be sorted in line with the function order.
-        var sorted_atoms = try std.ArrayList(*const Atom).initCapacity(gpa, wasm.functions.count());
-        defer sorted_atoms.deinit();
-
-        while (true) {
+        var func_it = wasm.functions.iterator();
+        while (func_it.next()) |entry| {
+            const sym_loc: SymbolLoc = .{ .index = entry.value_ptr.sym_index, .file = entry.key_ptr.file };
+            const atom_index = wasm.symbol_atom.get(sym_loc).?;
             const atom = wasm.getAtomPtr(atom_index);
+
             if (!is_obj) {
                 atom.resolveRelocs(wasm);
             }
-            sorted_atoms.appendAssumeCapacity(atom); // found more code atoms than functions
-            atom_index = atom.prev orelse break;
-        }
-        assert(wasm.functions.count() == sorted_atoms.items.len);
-
-        const atom_sort_fn = struct {
-            fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool {
-                const lhs_sym = lhs.symbolLoc().getSymbol(ctx);
-                const rhs_sym = rhs.symbolLoc().getSymbol(ctx);
-                return lhs_sym.index < rhs_sym.index;
-            }
-        }.sort;
-
-        mem.sort(*const Atom, sorted_atoms.items, wasm, atom_sort_fn);
-
-        for (sorted_atoms.items) |sorted_atom| {
-            try leb.writeULEB128(binary_writer, sorted_atom.size);
-            try binary_writer.writeAll(sorted_atom.code.items);
+            atom.offset = @intCast(binary_bytes.items.len - start_offset);
+            try leb.writeULEB128(binary_writer, atom.size);
+            try binary_writer.writeAll(atom.code.items);
         }
 
-        code_section_size = @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size));
         try writeVecSectionHeader(
             binary_bytes.items,
             header_offset,
             .code,
-            code_section_size,
+            @intCast(binary_bytes.items.len - header_offset - header_size),
             @intCast(wasm.functions.count()),
         );
         code_section_index = section_count;