Commit 1fd708b1bc

Andrew Kelley <andrew@ziglang.org>
2025-01-05 03:13:25
wasm linker: implement data relocs
1 parent abdbc38
Changed files (2)
src
src/link/Wasm/Flush.zig
@@ -22,7 +22,7 @@ const assert = std.debug.assert;
 /// Ordered list of data segments that will appear in the final binary.
 /// When sorted, to-be-merged segments will be made adjacent.
 /// Values are virtual address.
-data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataId, u32) = .empty,
+data_segments: std.AutoArrayHashMapUnmanaged(Wasm.DataSegmentId, u32) = .empty,
 /// Each time a `data_segment` offset equals zero it indicates a new group, and
 /// the next element in this array will contain the total merged segment size.
 /// Value is the virtual memory address of the end of the segment.
@@ -228,7 +228,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
     // For the purposes of sorting, they are implicitly all named ".data".
     const Sort = struct {
         wasm: *const Wasm,
-        segments: []const Wasm.DataId,
+        segments: []const Wasm.DataSegmentId,
         pub fn lessThan(ctx: @This(), lhs: usize, rhs: usize) bool {
             const lhs_segment = ctx.segments[lhs];
             const rhs_segment = ctx.segments[rhs];
@@ -311,7 +311,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
     const data_vaddr: u32 = @intCast(memory_ptr);
     {
         var seen_tls: enum { before, during, after } = .before;
-        var category: Wasm.DataId.Category = undefined;
+        var category: Wasm.DataSegmentId.Category = undefined;
         for (segment_ids, segment_vaddrs, 0..) |segment_id, *segment_vaddr, i| {
             const alignment = segment_id.alignment(wasm);
             category = segment_id.category(wasm);
@@ -710,7 +710,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
 
     if (!is_obj) {
         for (wasm.uav_fixups.items) |uav_fixup| {
-            const ds_id: Wasm.DataId = .pack(wasm, .{ .uav_exe = uav_fixup.uavs_exe_index });
+            const ds_id: Wasm.DataSegmentId = .pack(wasm, .{ .uav_exe = uav_fixup.uavs_exe_index });
             const vaddr = f.data_segments.get(ds_id).?;
             if (!is64) {
                 mem.writeInt(u32, wasm.string_bytes.items[uav_fixup.offset..][0..4], vaddr, .little);
@@ -719,7 +719,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
             }
         }
         for (wasm.nav_fixups.items) |nav_fixup| {
-            const ds_id: Wasm.DataId = .pack(wasm, .{ .nav_exe = nav_fixup.navs_exe_index });
+            const ds_id: Wasm.DataSegmentId = .pack(wasm, .{ .nav_exe = nav_fixup.navs_exe_index });
             const vaddr = f.data_segments.get(ds_id).?;
             if (!is64) {
                 mem.writeInt(u32, wasm.string_bytes.items[nav_fixup.offset..][0..4], vaddr, .little);
@@ -867,7 +867,7 @@ pub fn finish(f: *Flush, wasm: *Wasm) !void {
 
 fn emitNameSection(
     wasm: *Wasm,
-    data_segments: *const std.AutoArrayHashMapUnmanaged(Wasm.DataId, u32),
+    data_segments: *const std.AutoArrayHashMapUnmanaged(Wasm.DataSegmentId, u32),
     binary_bytes: *std.ArrayListUnmanaged(u8),
 ) !void {
     const f = &wasm.flush_buffer;
@@ -1142,9 +1142,9 @@ fn splitSegmentName(name: []const u8) struct { []const u8, []const u8 } {
 
 fn wantSegmentMerge(
     wasm: *const Wasm,
-    a_id: Wasm.DataId,
-    b_id: Wasm.DataId,
-    b_category: Wasm.DataId.Category,
+    a_id: Wasm.DataSegmentId,
+    b_id: Wasm.DataSegmentId,
+    b_category: Wasm.DataSegmentId.Category,
 ) bool {
     const a_category = a_id.category(wasm);
     if (a_category != b_category) return false;
@@ -1519,17 +1519,23 @@ const RelocAddr = struct {
     addr: u32,
 
     fn fromObjectData(wasm: *const Wasm, i: Wasm.ObjectData.Index, addend: i32) RelocAddr {
-        const ptr = i.ptr(wasm);
-        const f = &wasm.flush_buffer;
-        const addr = f.data_segments.get(.fromObjectDataSegment(wasm, ptr.segment)).?;
-        return .{ .addr = @intCast(@as(i64, addr) + addend) };
+        return fromDataLoc(&wasm.flush_buffer, .fromObjectDataIndex(wasm, i), addend);
     }
 
     fn fromSymbolName(wasm: *const Wasm, name: String, addend: i32) RelocAddr {
-        _ = wasm;
-        _ = name;
-        _ = addend;
-        @panic("TODO implement data symbol resolution");
+        const flush = &wasm.flush_buffer;
+        if (wasm.object_data_imports.getPtr(name)) |import| {
+            return fromDataLoc(flush, import.resolution.dataLoc(wasm), addend);
+        } else if (wasm.data_imports.get(name)) |id| {
+            return fromDataLoc(flush, .fromDataImportId(wasm, id), addend);
+        } else {
+            unreachable;
+        }
+    }
+
+    fn fromDataLoc(flush: *const Flush, data_loc: Wasm.DataLoc, addend: i32) RelocAddr {
+        const base_addr: i64 = flush.data_segments.get(data_loc.segment).?;
+        return .{ .addr = @intCast(base_addr + data_loc.offset + addend) };
     }
 };
 
src/link/Wasm.zig
@@ -242,7 +242,7 @@ function_imports: std.AutoArrayHashMapUnmanaged(String, FunctionImportId) = .emp
 data_imports: std.AutoArrayHashMapUnmanaged(String, DataImportId) = .empty,
 /// Set of data symbols that will appear in the final binary. Used to populate
 /// `Flush.data_segments` before sorting.
-data_segments: std.AutoArrayHashMapUnmanaged(DataId, void) = .empty,
+data_segments: std.AutoArrayHashMapUnmanaged(DataSegmentId, void) = .empty,
 
 /// Ordered list of non-import globals that will appear in the final binary.
 /// Empty until prelink.
@@ -1523,27 +1523,120 @@ pub const ObjectDataImport = extern struct {
     source_location: SourceLocation,
 
     pub const Resolution = enum(u32) {
+        unresolved,
         __zig_error_names,
         __zig_error_name_table,
         __heap_base,
         __heap_end,
-        unresolved = std.math.maxInt(u32),
+        /// Next, an `ObjectData.Index`.
+        /// Next, index into `uavs_obj` or `uavs_exe` depending on whether emitting an object.
+        /// Next, index into `navs_obj` or `navs_exe` depending on whether emitting an object.
         _,
 
-        comptime {
-            assert(@intFromEnum(Resolution.__zig_error_names) == @intFromEnum(DataId.__zig_error_names));
-            assert(@intFromEnum(Resolution.__zig_error_name_table) == @intFromEnum(DataId.__zig_error_name_table));
-            assert(@intFromEnum(Resolution.__heap_base) == @intFromEnum(DataId.__heap_base));
-            assert(@intFromEnum(Resolution.__heap_end) == @intFromEnum(DataId.__heap_end));
+        const first_object = @intFromEnum(Resolution.__heap_end) + 1;
+
+        pub const Unpacked = union(enum) {
+            unresolved,
+            __zig_error_names,
+            __zig_error_name_table,
+            __heap_base,
+            __heap_end,
+            object: ObjectData.Index,
+            uav_exe: UavsExeIndex,
+            uav_obj: UavsObjIndex,
+            nav_exe: NavsExeIndex,
+            nav_obj: NavsObjIndex,
+        };
+
+        pub fn unpack(r: Resolution, wasm: *const Wasm) Unpacked {
+            return switch (r) {
+                .unresolved => .unresolved,
+                .__zig_error_names => .__zig_error_names,
+                .__zig_error_name_table => .__zig_error_name_table,
+                .__heap_base => .__heap_base,
+                .__heap_end => .__heap_end,
+                _ => {
+                    const object_index = @intFromEnum(r) - first_object;
+
+                    const uav_index = if (object_index < wasm.object_datas.items.len)
+                        return .{ .object = @enumFromInt(object_index) }
+                    else
+                        object_index - wasm.object_datas.items.len;
+
+                    const comp = wasm.base.comp;
+                    const is_obj = comp.config.output_mode == .Obj;
+                    if (is_obj) {
+                        const nav_index = if (uav_index < wasm.uavs_obj.entries.len)
+                            return .{ .uav_obj = @enumFromInt(uav_index) }
+                        else
+                            uav_index - wasm.uavs_obj.entries.len;
+
+                        return .{ .nav_obj = @enumFromInt(nav_index) };
+                    } else {
+                        const nav_index = if (uav_index < wasm.uavs_exe.entries.len)
+                            return .{ .uav_exe = @enumFromInt(uav_index) }
+                        else
+                            uav_index - wasm.uavs_exe.entries.len;
+
+                        return .{ .nav_exe = @enumFromInt(nav_index) };
+                    }
+                },
+            };
         }
 
-        pub fn toDataId(r: Resolution) ?DataId {
-            if (r == .unresolved) return null;
-            return @enumFromInt(@intFromEnum(r));
+        pub fn pack(wasm: *const Wasm, unpacked: Unpacked) Resolution {
+            return switch (unpacked) {
+                .unresolved => .unresolved,
+                .__zig_error_names => .__zig_error_names,
+                .__zig_error_name_table => .__zig_error_name_table,
+                .__heap_base => .__heap_base,
+                .__heap_end => .__heap_end,
+                .object => |i| @enumFromInt(first_object + @intFromEnum(i)),
+                inline .uav_exe, .uav_obj => |i| @enumFromInt(first_object + wasm.object_datas.items.len + @intFromEnum(i)),
+                .nav_exe => |i| @enumFromInt(first_object + wasm.object_datas.items.len + wasm.uavs_exe.entries.len + @intFromEnum(i)),
+                .nav_obj => |i| @enumFromInt(first_object + wasm.object_datas.items.len + wasm.uavs_obj.entries.len + @intFromEnum(i)),
+            };
         }
 
         pub fn fromObjectDataIndex(wasm: *const Wasm, object_data_index: ObjectData.Index) Resolution {
-            return @enumFromInt(@intFromEnum(DataId.pack(wasm, .{ .object = object_data_index.ptr(wasm).segment })));
+            return pack(wasm, .{ .object = object_data_index });
+        }
+
+        pub fn objectDataSegment(r: Resolution, wasm: *const Wasm) ?ObjectDataSegment.Index {
+            return switch (unpack(r, wasm)) {
+                .unresolved => unreachable,
+                .object => |i| i.ptr(wasm).segment,
+                .__zig_error_names,
+                .__zig_error_name_table,
+                .__heap_base,
+                .__heap_end,
+                .uav_exe,
+                .uav_obj,
+                .nav_exe,
+                .nav_obj,
+                => null,
+            };
+        }
+
+        pub fn dataLoc(r: Resolution, wasm: *const Wasm) DataLoc {
+            return switch (unpack(r, wasm)) {
+                .unresolved => unreachable,
+                .object => |i| {
+                    const ptr = i.ptr(wasm);
+                    return .{
+                        .segment = .fromObjectDataSegment(wasm, ptr.segment),
+                        .offset = ptr.offset,
+                    };
+                },
+                .__zig_error_names => .{ .segment = .__zig_error_names, .offset = 0 },
+                .__zig_error_name_table => .{ .segment = .__zig_error_name_table, .offset = 0 },
+                .__heap_base => .{ .segment = .__heap_base, .offset = 0 },
+                .__heap_end => .{ .segment = .__heap_end, .offset = 0 },
+                .uav_exe => @panic("TODO"),
+                .uav_obj => @panic("TODO"),
+                .nav_exe => @panic("TODO"),
+                .nav_obj => @panic("TODO"),
+            };
         }
     };
 
@@ -1583,7 +1676,7 @@ pub const DataPayload = extern struct {
 };
 
 /// A reference to a local or exported global const.
-pub const DataId = enum(u32) {
+pub const DataSegmentId = enum(u32) {
     __zig_error_names,
     __zig_error_name_table,
     /// This and `__heap_end` are better retrieved via a global, but there is
@@ -1596,7 +1689,7 @@ pub const DataId = enum(u32) {
     /// Next, index into `navs_obj` or `navs_exe` depending on whether emitting an object.
     _,
 
-    const first_object = @intFromEnum(DataId.__heap_end) + 1;
+    const first_object = @intFromEnum(DataSegmentId.__heap_end) + 1;
 
     pub const Category = enum {
         /// Thread-local variables.
@@ -1620,7 +1713,7 @@ pub const DataId = enum(u32) {
         nav_obj: NavsObjIndex,
     };
 
-    pub fn pack(wasm: *const Wasm, unpacked: Unpacked) DataId {
+    pub fn pack(wasm: *const Wasm, unpacked: Unpacked) DataSegmentId {
         return switch (unpacked) {
             .__zig_error_names => .__zig_error_names,
             .__zig_error_name_table => .__zig_error_name_table,
@@ -1633,7 +1726,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn unpack(id: DataId, wasm: *const Wasm) Unpacked {
+    pub fn unpack(id: DataSegmentId, wasm: *const Wasm) Unpacked {
         return switch (id) {
             .__zig_error_names => .__zig_error_names,
             .__zig_error_name_table => .__zig_error_name_table,
@@ -1668,11 +1761,21 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn fromObjectDataSegment(wasm: *const Wasm, object_data_segment: ObjectDataSegment.Index) DataId {
+    pub fn fromNav(wasm: *const Wasm, nav_index: InternPool.Nav.Index) DataSegmentId {
+        const comp = wasm.base.comp;
+        const is_obj = comp.config.output_mode == .Obj;
+        return pack(wasm, if (is_obj) .{
+            .nav_obj = @enumFromInt(wasm.navs_obj.getIndex(nav_index).?),
+        } else .{
+            .nav_exe = @enumFromInt(wasm.navs_exe.getIndex(nav_index).?),
+        });
+    }
+
+    pub fn fromObjectDataSegment(wasm: *const Wasm, object_data_segment: ObjectDataSegment.Index) DataSegmentId {
         return pack(wasm, .{ .object = object_data_segment });
     }
 
-    pub fn category(id: DataId, wasm: *const Wasm) Category {
+    pub fn category(id: DataSegmentId, wasm: *const Wasm) Category {
         return switch (unpack(id, wasm)) {
             .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => .data,
             .object => |i| {
@@ -1693,7 +1796,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn isTls(id: DataId, wasm: *const Wasm) bool {
+    pub fn isTls(id: DataSegmentId, wasm: *const Wasm) bool {
         return switch (unpack(id, wasm)) {
             .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => false,
             .object => |i| i.ptr(wasm).flags.tls,
@@ -1707,11 +1810,11 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn isBss(id: DataId, wasm: *const Wasm) bool {
+    pub fn isBss(id: DataSegmentId, wasm: *const Wasm) bool {
         return id.category(wasm) == .zero;
     }
 
-    pub fn name(id: DataId, wasm: *const Wasm) []const u8 {
+    pub fn name(id: DataSegmentId, wasm: *const Wasm) []const u8 {
         return switch (unpack(id, wasm)) {
             .__zig_error_names, .__zig_error_name_table, .uav_exe, .uav_obj, .__heap_base, .__heap_end => ".data",
             .object => |i| i.ptr(wasm).name.unwrap().?.slice(wasm),
@@ -1724,7 +1827,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn alignment(id: DataId, wasm: *const Wasm) Alignment {
+    pub fn alignment(id: DataSegmentId, wasm: *const Wasm) Alignment {
         return switch (unpack(id, wasm)) {
             .__zig_error_names => .@"1",
             .__zig_error_name_table, .__heap_base, .__heap_end => wasm.pointerAlignment(),
@@ -1752,7 +1855,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn refCount(id: DataId, wasm: *const Wasm) u32 {
+    pub fn refCount(id: DataSegmentId, wasm: *const Wasm) u32 {
         return switch (unpack(id, wasm)) {
             .__zig_error_names => @intCast(wasm.error_name_offs.items.len),
             .__zig_error_name_table => wasm.error_name_table_ref_count,
@@ -1761,7 +1864,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn isPassive(id: DataId, wasm: *const Wasm) bool {
+    pub fn isPassive(id: DataSegmentId, wasm: *const Wasm) bool {
         const comp = wasm.base.comp;
         if (comp.config.import_memory and !id.isBss(wasm)) return true;
         return switch (unpack(id, wasm)) {
@@ -1771,7 +1874,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn isEmpty(id: DataId, wasm: *const Wasm) bool {
+    pub fn isEmpty(id: DataSegmentId, wasm: *const Wasm) bool {
         return switch (unpack(id, wasm)) {
             .__zig_error_names, .__zig_error_name_table, .__heap_base, .__heap_end => false,
             .object => |i| i.ptr(wasm).payload.off == .none,
@@ -1779,7 +1882,7 @@ pub const DataId = enum(u32) {
         };
     }
 
-    pub fn size(id: DataId, wasm: *const Wasm) u32 {
+    pub fn size(id: DataSegmentId, wasm: *const Wasm) u32 {
         return switch (unpack(id, wasm)) {
             .__zig_error_names => @intCast(wasm.error_name_bytes.items.len),
             .__zig_error_name_table => {
@@ -1796,6 +1899,38 @@ pub const DataId = enum(u32) {
     }
 };
 
+pub const DataLoc = struct {
+    segment: Wasm.DataSegmentId,
+    offset: u32,
+
+    pub fn fromObjectDataIndex(wasm: *const Wasm, i: Wasm.ObjectData.Index) DataLoc {
+        const ptr = i.ptr(wasm);
+        return .{
+            .segment = .fromObjectDataSegment(wasm, ptr.segment),
+            .offset = ptr.offset,
+        };
+    }
+
+    pub fn fromDataImportId(wasm: *const Wasm, id: Wasm.DataImportId) DataLoc {
+        return switch (id.unpack(wasm)) {
+            .object_data_import => |i| .fromObjectDataImportIndex(wasm, i),
+            .zcu_import => |i| .fromZcuImport(wasm, i),
+        };
+    }
+
+    pub fn fromObjectDataImportIndex(wasm: *const Wasm, i: Wasm.ObjectDataImport.Index) DataLoc {
+        return i.value(wasm).resolution.dataLoc(wasm);
+    }
+
+    pub fn fromZcuImport(wasm: *const Wasm, zcu_import: ZcuImportIndex) DataLoc {
+        const nav_index = zcu_import.ptr(wasm).*;
+        return .{
+            .segment = .fromNav(wasm, nav_index),
+            .offset = 0,
+        };
+    }
+};
+
 /// Index into `Wasm.uavs`.
 pub const UavIndex = enum(u32) {
     _,
@@ -3330,8 +3465,8 @@ fn markDataImport(
         } else {
             try wasm.data_imports.put(gpa, name, .fromObject(data_index, wasm));
         }
-    } else {
-        try markDataSegment(wasm, import.resolution.toDataId().?.unpack(wasm).object);
+    } else if (import.resolution.objectDataSegment(wasm)) |segment_index| {
+        try markDataSegment(wasm, segment_index);
     }
 }
 
@@ -4144,7 +4279,7 @@ pub fn uavAddr(wasm: *Wasm, uav_index: UavsExeIndex) u32 {
     assert(wasm.flush_buffer.memory_layout_finished);
     const comp = wasm.base.comp;
     assert(comp.config.output_mode != .Obj);
-    const ds_id: DataId = .pack(wasm, .{ .uav_exe = uav_index });
+    const ds_id: DataSegmentId = .pack(wasm, .{ .uav_exe = uav_index });
     return wasm.flush_buffer.data_segments.get(ds_id).?;
 }
 
@@ -4155,7 +4290,7 @@ pub fn navAddr(wasm: *Wasm, nav_index: InternPool.Nav.Index) u32 {
     assert(comp.config.output_mode != .Obj);
     const navs_exe_index: NavsExeIndex = @enumFromInt(wasm.navs_exe.getIndex(nav_index).?);
     log.debug("navAddr {s} {}", .{ navs_exe_index.name(wasm), nav_index });
-    const ds_id: DataId = .pack(wasm, .{ .nav_exe = navs_exe_index });
+    const ds_id: DataSegmentId = .pack(wasm, .{ .nav_exe = navs_exe_index });
     return wasm.flush_buffer.data_segments.get(ds_id).?;
 }