Commit 766284fec8

Andrew Kelley <andrew@ziglang.org>
2024-12-19 03:28:23
wasm linker: fix data segments memory flow
Recognize three distinct phases: * before prelink ("object phase") * after prelink, before flush ("zcu phase") * during flush ("flush phase") With this setup, we create data structures during the object phase, then mutate them during the zcu phase, and then further mutate them during the flush phase. In order to make the flush phase repeatable, the data structures are copied just before starting the flush phase. Further Zcu updates occur against the non-copied data structures. What's not implemented is frontend garbage collection, in which case some more changes will be needed in this linker logic to achieve a valid state with data invariants intact.
1 parent 3cb00c5
Changed files (2)
src
src/link/Wasm/Flush.zig
@@ -19,12 +19,15 @@ const leb = std.leb;
 const log = std.log.scoped(.link);
 const assert = std.debug.assert;
 
+data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataSegment.Id, u32) = .empty,
 /// Each time a `data_segment` offset equals zero it indicates a new group, and
 /// the next element in this array will contain the total merged segment size.
 data_segment_groups: std.ArrayListUnmanaged(u32) = .empty,
 
 binary_bytes: std.ArrayListUnmanaged(u8) = .empty,
 missing_exports: std.AutoArrayHashMapUnmanaged(String, void) = .empty,
+function_imports: std.AutoArrayHashMapUnmanaged(String, Wasm.FunctionImportId) = .empty,
+global_imports: std.AutoArrayHashMapUnmanaged(String, Wasm.GlobalImportId) = .empty,
 
 indirect_function_table: std.AutoArrayHashMapUnmanaged(Wasm.OutputFunctionIndex, u32) = .empty,
 
@@ -39,8 +42,12 @@ pub fn clear(f: *Flush) void {
 }
 
 pub fn deinit(f: *Flush, gpa: Allocator) void {
-    f.binary_bytes.deinit(gpa);
+    f.data_segments.deinit(gpa);
     f.data_segment_groups.deinit(gpa);
+    f.binary_bytes.deinit(gpa);
+    f.missing_exports.deinit(gpa);
+    f.function_imports.deinit(gpa);
+    f.global_imports.deinit(gpa);
     f.indirect_function_table.deinit(gpa);
     f.* = undefined;
 }
@@ -58,18 +65,9 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
     const zcu = wasm.base.comp.zcu.?;
     const ip: *const InternPool = &zcu.intern_pool; // No mutations allowed!
 
-    if (wasm.any_exports_updated) {
-        wasm.any_exports_updated = false;
-
-        wasm.function_exports.shrinkRetainingCapacity(wasm.function_exports_len);
-        wasm.global_exports.shrinkRetainingCapacity(wasm.global_exports_len);
-
+    {
         const entry_name = if (wasm.entry_resolution.isNavOrUnresolved(wasm)) wasm.entry_name else .none;
 
-        try f.missing_exports.reinit(gpa, wasm.missing_exports_init, &.{});
-        try wasm.function_imports.reinit(gpa, wasm.function_imports_init_keys, wasm.function_imports_init_vals);
-        try wasm.global_imports.reinit(gpa, wasm.global_imports_init_keys, wasm.global_imports_init_vals);
-
         for (wasm.nav_exports.keys()) |*nav_export| {
             if (ip.isFunctionType(ip.getNav(nav_export.nav_index).typeOf(ip))) {
                 log.debug("flush export '{s}' nav={d}", .{ nav_export.name.slice(wasm), nav_export.nav_index });
@@ -134,17 +132,17 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
 
     // Merge and order the data segments. Depends on garbage collection so that
     // unused segments can be omitted.
-    try wasm.data_segments.ensureUnusedCapacity(gpa, wasm.object_data_segments.items.len + 1);
+    try f.data_segments.ensureUnusedCapacity(gpa, wasm.object_data_segments.items.len + 1);
     for (wasm.object_data_segments.items, 0..) |*ds, i| {
         if (!ds.flags.alive) continue;
         const data_segment_index: Wasm.ObjectDataSegmentIndex = @enumFromInt(i);
         any_passive_inits = any_passive_inits or ds.flags.is_passive or (import_memory and !wasm.isBss(ds.name));
-        wasm.data_segments.putAssumeCapacityNoClobber(.pack(wasm, .{
+        f.data_segments.putAssumeCapacityNoClobber(.pack(wasm, .{
             .object = data_segment_index,
         }), @as(u32, undefined));
     }
     if (wasm.error_name_table_ref_count > 0) {
-        wasm.data_segments.putAssumeCapacity(.__zig_error_name_table, @as(u32, undefined));
+        f.data_segments.putAssumeCapacity(.__zig_error_name_table, @as(u32, undefined));
     }
 
     try wasm.functions.ensureUnusedCapacity(gpa, 3);
@@ -223,9 +221,9 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
             return @intFromEnum(lhs_segment) < @intFromEnum(rhs_segment);
         }
     };
-    wasm.data_segments.sortUnstable(@as(Sort, .{
+    f.data_segments.sortUnstable(@as(Sort, .{
         .wasm = wasm,
-        .segments = wasm.data_segments.keys(),
+        .segments = f.data_segments.keys(),
     }));
 
     const page_size = std.wasm.page_size; // 64kb
@@ -260,8 +258,8 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
         virtual_addrs.stack_pointer = @intCast(memory_ptr);
     }
 
-    const segment_ids = wasm.data_segments.keys();
-    const segment_offsets = wasm.data_segments.values();
+    const segment_ids = f.data_segments.keys();
+    const segment_offsets = f.data_segments.values();
     assert(f.data_segment_groups.items.len == 0);
     {
         var seen_tls: enum { before, during, after } = .before;
@@ -703,11 +701,11 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
         //    try wasm.emitCodeRelocations(binary_bytes, code_index, symbol_table);
         //}
         //if (data_section_index) |data_index| {
-        //    if (wasm.data_segments.count() > 0)
+        //    if (f.data_segments.count() > 0)
         //        try wasm.emitDataRelocations(binary_bytes, data_index, symbol_table);
         //}
     } else if (comp.config.debug_format != .strip) {
-        try emitNameSection(wasm, &wasm.data_segments, binary_bytes);
+        try emitNameSection(wasm, &f.data_segments, binary_bytes);
     }
 
     if (comp.config.debug_format != .strip) {
@@ -993,7 +991,7 @@ fn emitProducerSection(gpa: Allocator, binary_bytes: *std.ArrayListUnmanaged(u8)
 //    var count: u32 = 0;
 //    // for each atom, we calculate the uleb size and append that
 //    var size_offset: u32 = 5; // account for code section size leb128
-//    for (wasm.data_segments.values()) |segment_index| {
+//    for (f.data_segments.values()) |segment_index| {
 //        var atom: *Atom = wasm.atoms.get(segment_index).?.ptr(wasm);
 //        while (true) {
 //            size_offset += getUleb128Size(atom.code.len);
src/link/Wasm.zig
@@ -189,7 +189,10 @@ debug_sections: DebugSections = .{},
 
 flush_buffer: Flush = .{},
 
-missing_exports_init: []String = &.{},
+/// Empty until `prelink`. There it is populated based on object files.
+/// Next, it is copied into `Flush.missing_exports` just before `flush`
+/// and that data is used during `flush`.
+missing_exports: std.AutoArrayHashMapUnmanaged(String, void) = .empty,
 entry_resolution: FunctionImport.Resolution = .unresolved,
 
 /// Empty when outputting an object.
@@ -206,13 +209,7 @@ functions: std.AutoArrayHashMapUnmanaged(FunctionImport.Resolution, void) = .emp
 /// Tracks the value at the end of prelink, at which point `functions`
 /// contains only object file functions, and nothing from the Zcu yet.
 functions_end_prelink: u32 = 0,
-/// Immutable after prelink. The undefined functions coming only from all object files.
-/// The Zcu must satisfy these.
-function_imports_init_keys: []String = &.{},
-function_imports_init_vals: []FunctionImportId = &.{},
-/// Initialized as copy of `function_imports_init_keys` and
-/// `function_import_init_vals`; entries are deleted as they are satisfied by
-/// the Zcu.
+/// Entries are deleted as they are satisfied by the Zcu.
 function_imports: std.AutoArrayHashMapUnmanaged(String, FunctionImportId) = .empty,
 
 /// Ordered list of non-import globals that will appear in the final binary.
@@ -221,8 +218,6 @@ globals: std.AutoArrayHashMapUnmanaged(GlobalImport.Resolution, void) = .empty,
 /// Tracks the value at the end of prelink, at which point `globals`
 /// contains only object file globals, and nothing from the Zcu yet.
 globals_end_prelink: u32 = 0,
-global_imports_init_keys: []String = &.{},
-global_imports_init_vals: []GlobalImportId = &.{},
 global_imports: std.AutoArrayHashMapUnmanaged(String, GlobalImportId) = .empty,
 
 /// Ordered list of non-import tables that will appear in the final binary.
@@ -233,11 +228,10 @@ table_imports: std.AutoArrayHashMapUnmanaged(String, TableImport.Index) = .empty
 /// Ordered list of data segments that will appear in the final binary.
 /// When sorted, to-be-merged segments will be made adjacent.
 /// Values are offset relative to segment start.
-data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataSegment.Id, u32) = .empty,
+data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataSegment.Id, void) = .empty,
 
 error_name_table_ref_count: u32 = 0,
 
-any_exports_updated: bool = true,
 /// Set to true if any `GLOBAL_INDEX` relocation is encountered with
 /// `SymbolFlags.tls` set to true. This is for objects only; final
 /// value must be this OR'd with the same logic for zig functions
@@ -313,7 +307,7 @@ pub const GlobalExport = extern struct {
     global_index: GlobalIndex,
 };
 
-/// 0. Index into `function_imports`
+/// 0. Index into `Flush.function_imports`
 /// 1. Index into `functions`.
 ///
 /// Note that function_imports indexes are subject to swap removals during
@@ -529,13 +523,6 @@ pub const SymbolFlags = packed struct(u32) {
         return flags.exported;
     }
 
-    pub fn requiresImport(flags: SymbolFlags, is_data: bool) bool {
-        if (is_data) return false;
-        if (!flags.undefined) return false;
-        if (flags.binding == .weak) return false;
-        return true;
-    }
-
     /// Returns the name as how it will be output into the final object
     /// file or binary. When `merge` is true, this will return the
     /// short name. i.e. ".rodata". When false, it returns the entire name instead.
@@ -2268,6 +2255,8 @@ pub fn deinit(wasm: *Wasm) void {
 
     wasm.params_scratch.deinit(gpa);
     wasm.returns_scratch.deinit(gpa);
+
+    wasm.missing_exports.deinit(gpa);
 }
 
 pub fn updateFunc(wasm: *Wasm, pt: Zcu.PerThread, func_index: InternPool.Index, air: Air, liveness: Liveness) !void {
@@ -2306,28 +2295,27 @@ pub fn updateNav(wasm: *Wasm, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index
     const gpa = comp.gpa;
     const is_obj = comp.config.output_mode == .Obj;
 
-    const is_extern, const nav_init = switch (ip.indexToKey(nav.status.resolved.val)) {
-        .func => return,
-        .@"extern" => .{ true, .none },
-        .variable => |variable| .{ false, variable.init },
-        else => .{ false, nav.status.resolved.val },
+    const nav_init = switch (ip.indexToKey(nav.status.resolved.val)) {
+        .func => return, // global const which is a function alias
+        .@"extern" => {
+            if (is_obj) {
+                assert(!wasm.navs_obj.contains(nav_index));
+            } else {
+                assert(!wasm.navs_exe.contains(nav_index));
+            }
+            try wasm.imports.put(gpa, nav_index, {});
+            return;
+        },
+        .variable => |variable| variable.init,
+        else => nav.status.resolved.val,
     };
-    if (is_extern) {
-        try wasm.imports.put(gpa, nav_index, {});
-        if (is_obj) {
-            if (wasm.navs_obj.swapRemove(nav_index)) @panic("TODO reclaim resources");
-        } else {
-            if (wasm.navs_exe.swapRemove(nav_index)) @panic("TODO reclaim resources");
-        }
-        return;
-    }
-    _ = wasm.imports.swapRemove(nav_index);
+    assert(!wasm.imports.contains(nav_index));
 
     if (nav_init != .none and !Value.fromInterned(nav_init).typeOf(zcu).hasRuntimeBits(zcu)) {
         if (is_obj) {
-            if (wasm.navs_obj.swapRemove(nav_index)) @panic("TODO reclaim resources");
+            assert(!wasm.navs_obj.contains(nav_index));
         } else {
-            if (wasm.navs_exe.swapRemove(nav_index)) @panic("TODO reclaim resources");
+            assert(!wasm.navs_exe.contains(nav_index));
         }
         return;
     }
@@ -2339,9 +2327,7 @@ pub fn updateNav(wasm: *Wasm, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index
     if (is_obj) {
         const gop = try wasm.navs_obj.getOrPut(gpa, nav_index);
         gop.value_ptr.* = zcu_data;
-        wasm.data_segments.putAssumeCapacity(.pack(wasm, .{
-            .nav_obj = @enumFromInt(gop.index),
-        }), @as(u32, undefined));
+        wasm.data_segments.putAssumeCapacity(.pack(wasm, .{ .nav_obj = @enumFromInt(gop.index) }), {});
     }
 
     assert(zcu_data.relocs.len == 0);
@@ -2351,9 +2337,7 @@ pub fn updateNav(wasm: *Wasm, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index
         .code = zcu_data.code,
         .count = 0,
     };
-    wasm.data_segments.putAssumeCapacity(.pack(wasm, .{
-        .nav_exe = @enumFromInt(gop.index),
-    }), @as(u32, undefined));
+    wasm.data_segments.putAssumeCapacity(.pack(wasm, .{ .nav_exe = @enumFromInt(gop.index) }), {});
 }
 
 pub fn updateLineNumber(wasm: *Wasm, pt: Zcu.PerThread, ti_id: InternPool.TrackedInst.Index) !void {
@@ -2380,7 +2364,6 @@ pub fn deleteExport(
         },
         .uav => |uav_index| assert(wasm.uav_exports.swapRemove(.{ .uav_index = uav_index, .name = export_name })),
     }
-    wasm.any_exports_updated = true;
 }
 
 pub fn updateExports(
@@ -2409,7 +2392,6 @@ pub fn updateExports(
             .uav => |uav_index| try wasm.uav_exports.put(gpa, .{ .uav_index = uav_index, .name = name }, export_idx),
         }
     }
-    wasm.any_exports_updated = true;
 }
 
 pub fn loadInput(wasm: *Wasm, input: link.Input) !void {
@@ -2464,32 +2446,28 @@ pub fn prelink(wasm: *Wasm, prog_node: std.Progress.Node) link.File.FlushError!v
     const gpa = comp.gpa;
     const rdynamic = comp.config.rdynamic;
 
-    {
-        var missing_exports: std.AutoArrayHashMapUnmanaged(String, void) = .empty;
-        defer missing_exports.deinit(gpa);
-        for (wasm.export_symbol_names) |exp_name| {
-            const exp_name_interned = try wasm.internString(exp_name);
-            if (wasm.object_function_imports.getPtr(exp_name_interned)) |import| {
-                if (import.resolution != .unresolved) {
-                    import.flags.exported = true;
-                    continue;
-                }
+    assert(wasm.missing_exports.entries.len == 0);
+    for (wasm.export_symbol_names) |exp_name| {
+        const exp_name_interned = try wasm.internString(exp_name);
+        if (wasm.object_function_imports.getPtr(exp_name_interned)) |import| {
+            if (import.resolution != .unresolved) {
+                import.flags.exported = true;
+                continue;
             }
-            if (wasm.object_global_imports.getPtr(exp_name_interned)) |import| {
-                if (import.resolution != .unresolved) {
-                    import.flags.exported = true;
-                    continue;
-                }
+        }
+        if (wasm.object_global_imports.getPtr(exp_name_interned)) |import| {
+            if (import.resolution != .unresolved) {
+                import.flags.exported = true;
+                continue;
             }
-            if (wasm.object_table_imports.getPtr(exp_name_interned)) |import| {
-                if (import.resolution != .unresolved) {
-                    import.flags.exported = true;
-                    continue;
-                }
+        }
+        if (wasm.object_table_imports.getPtr(exp_name_interned)) |import| {
+            if (import.resolution != .unresolved) {
+                import.flags.exported = true;
+                continue;
             }
-            try missing_exports.put(gpa, exp_name_interned, {});
         }
-        wasm.missing_exports_init = try gpa.dupe(String, missing_exports.keys());
+        try wasm.missing_exports.put(gpa, exp_name_interned, {});
     }
 
     if (wasm.entry_name.unwrap()) |entry_name| {
@@ -2515,8 +2493,6 @@ pub fn prelink(wasm: *Wasm, prog_node: std.Progress.Node) link.File.FlushError!v
         }
     }
     wasm.functions_end_prelink = @intCast(wasm.functions.entries.len);
-    wasm.function_imports_init_keys = try gpa.dupe(String, wasm.function_imports.keys());
-    wasm.function_imports_init_vals = try gpa.dupe(FunctionImportId, wasm.function_imports.values());
     wasm.function_exports_len = @intCast(wasm.function_exports.items.len);
 
     for (wasm.object_global_imports.keys(), wasm.object_global_imports.values(), 0..) |name, *import, i| {
@@ -2525,8 +2501,6 @@ pub fn prelink(wasm: *Wasm, prog_node: std.Progress.Node) link.File.FlushError!v
         }
     }
     wasm.globals_end_prelink = @intCast(wasm.globals.entries.len);
-    wasm.global_imports_init_keys = try gpa.dupe(String, wasm.global_imports.keys());
-    wasm.global_imports_init_vals = try gpa.dupe(GlobalImportId, wasm.global_imports.values());
     wasm.global_exports_len = @intCast(wasm.global_exports.items.len);
 
     for (wasm.object_table_imports.keys(), wasm.object_table_imports.values(), 0..) |name, *import, i| {
@@ -2692,6 +2666,7 @@ pub fn flushModule(
     const comp = wasm.base.comp;
     const use_lld = build_options.have_llvm and comp.config.use_lld;
     const diags = &comp.link_diags;
+    const gpa = comp.gpa;
 
     if (wasm.llvm_object) |llvm_object| {
         try wasm.base.emitLlvmObject(arena, llvm_object, prog_node);
@@ -2728,6 +2703,10 @@ pub fn flushModule(
     defer wasm.data_segments.shrinkRetainingCapacity(data_segments_end_zcu);
 
     wasm.flush_buffer.clear();
+    try wasm.flush_buffer.missing_exports.reinit(gpa, wasm.missing_exports.keys(), &.{});
+    try wasm.flush_buffer.data_segments.reinit(gpa, wasm.data_segments.keys(), &.{});
+    try wasm.flush_buffer.function_imports.reinit(gpa, wasm.function_imports.keys(), wasm.function_imports.values());
+    try wasm.flush_buffer.global_imports.reinit(gpa, wasm.global_imports.keys(), wasm.global_imports.values());
 
     return wasm.flush_buffer.finish(wasm) catch |err| switch (err) {
         error.OutOfMemory => return error.OutOfMemory,
@@ -3330,7 +3309,7 @@ pub fn refUavObj(wasm: *Wasm, pt: Zcu.PerThread, ip_index: InternPool.Index) !Ua
     const gop = try wasm.uavs_obj.getOrPut(gpa, ip_index);
     if (!gop.found_existing) gop.value_ptr.* = try lowerZcuData(wasm, pt, ip_index);
     const uav_index: UavsObjIndex = @enumFromInt(gop.index);
-    try wasm.data_segments.put(gpa, .pack(wasm, .{ .uav_obj = uav_index }), @as(u32, undefined));
+    try wasm.data_segments.put(gpa, .pack(wasm, .{ .uav_obj = uav_index }), {});
     return uav_index;
 }
 
@@ -3349,34 +3328,34 @@ pub fn refUavExe(wasm: *Wasm, pt: Zcu.PerThread, ip_index: InternPool.Index) !Ua
         };
     }
     const uav_index: UavsExeIndex = @enumFromInt(gop.index);
-    try wasm.data_segments.put(gpa, .pack(wasm, .{ .uav_exe = uav_index }), @as(u32, undefined));
+    try wasm.data_segments.put(gpa, .pack(wasm, .{ .uav_exe = uav_index }), {});
     return uav_index;
 }
 
-/// Asserts it is called after `Wasm.data_segments` is fully populated and sorted.
+/// Asserts it is called after `Flush.data_segments` is fully populated and sorted.
 pub fn uavAddr(wasm: *Wasm, uav_index: UavsExeIndex) u32 {
     assert(wasm.flush_buffer.memory_layout_finished);
     const comp = wasm.base.comp;
     assert(comp.config.output_mode != .Obj);
     const ds_id: DataSegment.Id = .pack(wasm, .{ .uav_exe = uav_index });
-    return wasm.data_segments.get(ds_id).?;
+    return wasm.flush_buffer.data_segments.get(ds_id).?;
 }
 
-/// Asserts it is called after `Wasm.data_segments` is fully populated and sorted.
+/// Asserts it is called after `Flush.data_segments` is fully populated and sorted.
 pub fn navAddr(wasm: *Wasm, nav_index: InternPool.Nav.Index) u32 {
     assert(wasm.flush_buffer.memory_layout_finished);
     const comp = wasm.base.comp;
     assert(comp.config.output_mode != .Obj);
     const ds_id: DataSegment.Id = .pack(wasm, .{ .nav_exe = @enumFromInt(wasm.navs_exe.getIndex(nav_index).?) });
-    return wasm.data_segments.get(ds_id).?;
+    return wasm.flush_buffer.data_segments.get(ds_id).?;
 }
 
-/// Asserts it is called after `Wasm.data_segments` is fully populated and sorted.
+/// Asserts it is called after `Flush.data_segments` is fully populated and sorted.
 pub fn errorNameTableAddr(wasm: *Wasm) u32 {
     assert(wasm.flush_buffer.memory_layout_finished);
     const comp = wasm.base.comp;
     assert(comp.config.output_mode != .Obj);
-    return wasm.data_segments.get(.__zig_error_name_table).?;
+    return wasm.flush_buffer.data_segments.get(.__zig_error_name_table).?;
 }
 
 fn convertZcuFnType(