Commit 0a030d6598

Luuk de Gram <luuk@degram.dev>
2024-01-21 12:06:33
wasm: Use `File.Index` for symbol locations
Rather than using the optional, we now directly use `File.Index` which can already represent an unknown file due to its `.null` value. This means we do not pay for the memory cost. This type of index is now used for: - SymbolLoc - Key of the functions map - InitFunc Now we can simply pass things like atom.file, object.file, loc.file etc whenever we need to access its representing object file which makes it a lot easier.
1 parent 94f3a18
Changed files (4)
src/link/Wasm/Atom.zig
@@ -59,10 +59,7 @@ pub fn format(atom: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptio
 
 /// Returns the location of the symbol that represents this `Atom`
 pub fn symbolLoc(atom: Atom) Wasm.SymbolLoc {
-    if (atom.file == .null) {
-        return .{ .file = null, .index = atom.sym_index };
-    }
-    return .{ .file = @intFromEnum(atom.file), .index = atom.sym_index };
+    return .{ .file = atom.file, .index = atom.sym_index };
 }
 
 pub fn getSymbolIndex(atom: Atom) ?u32 {
@@ -83,7 +80,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void {
     for (atom.relocs.items) |reloc| {
         const value = atom.relocationValue(reloc, wasm_bin);
         log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{
-            (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = reloc.index }).getName(wasm_bin),
+            (Wasm.SymbolLoc{ .file = atom.file, .index = reloc.index }).getName(wasm_bin),
             symbol_name,
             reloc.offset,
             value,
@@ -122,11 +119,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void {
 /// All values will be represented as a `u64` as all values can fit within it.
 /// The final value must be casted to the correct size.
 fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 {
-    const target_loc = if (atom.file == .null)
-        (Wasm.SymbolLoc{ .file = null, .index = relocation.index }).finalLoc(wasm_bin)
-    else
-        (Wasm.SymbolLoc{ .file = @intFromEnum(atom.file), .index = relocation.index }).finalLoc(wasm_bin);
-
+    const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin);
     const symbol = target_loc.getSymbol(wasm_bin);
     if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and
         symbol.tag != .section and
@@ -142,7 +135,7 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa
         .R_WASM_TABLE_INDEX_I64,
         .R_WASM_TABLE_INDEX_SLEB,
         .R_WASM_TABLE_INDEX_SLEB64,
-        => return wasm_bin.function_table.get(.{ .file = @intFromEnum(atom.file), .index = relocation.index }) orelse 0,
+        => return wasm_bin.function_table.get(.{ .file = atom.file, .index = relocation.index }) orelse 0,
         .R_WASM_TYPE_INDEX_LEB => {
             const obj_file = wasm_bin.file(atom.file) orelse return relocation.index;
             const original_type = obj_file.funcTypes()[relocation.index];
src/link/Wasm/Object.zig
@@ -952,7 +952,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato
                 .R_WASM_TABLE_INDEX_SLEB64,
                 => {
                     try wasm.function_table.put(gpa, .{
-                        .file = @intFromEnum(object.index),
+                        .file = object.index,
                         .index = reloc.index,
                     }, 0);
                 },
@@ -961,10 +961,7 @@ pub fn parseSymbolIntoAtom(object: *Object, wasm: *Wasm, symbol_index: u32) !Ato
                 => {
                     const sym = object.symtable[reloc.index];
                     if (sym.tag != .global) {
-                        try wasm.got_symbols.append(
-                            gpa,
-                            .{ .file = @intFromEnum(object.index), .index = reloc.index },
-                        );
+                        try wasm.got_symbols.append(gpa, .{ .file = object.index, .index = reloc.index });
                     }
                 },
                 else => {},
src/link/Wasm/ZigObject.zig
@@ -468,7 +468,7 @@ pub fn getErrorTableSymbol(zig_object: *ZigObject, wasm_file: *Wasm) !u32 {
 fn populateErrorNameTable(zig_object: *ZigObject, wasm_file: *Wasm) !void {
     const symbol_index = zig_object.error_table_symbol orelse return;
     const gpa = wasm_file.base.comp.gpa;
-    const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = symbol_index }).?;
+    const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = symbol_index }).?;
 
     // Rather than creating a symbol for each individual error name,
     // we create a symbol for the entire region of error names. We then calculate
@@ -633,7 +633,7 @@ pub fn getDeclVAddr(
     const target_symbol_index = wasm_file.getAtom(target_atom_index).sym_index;
 
     std.debug.assert(reloc_info.parent_atom_index != 0);
-    const atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?;
+    const atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?;
     const atom = wasm_file.getAtomPtr(atom_index);
     const is_wasm32 = target.cpu.arch == .wasm32;
     if (decl.ty.zigTypeTag(mod) == .Fn) {
@@ -670,7 +670,7 @@ pub fn getAnonDeclVAddr(
     const atom_index = zig_object.anon_decls.get(decl_val).?;
     const target_symbol_index = wasm_file.getAtom(atom_index).getSymbolIndex().?;
 
-    const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = null, .index = reloc_info.parent_atom_index }).?;
+    const parent_atom_index = wasm_file.symbol_atom.get(.{ .file = zig_object.index, .index = reloc_info.parent_atom_index }).?;
     const parent_atom = wasm_file.getAtomPtr(parent_atom_index);
     const is_wasm32 = target.cpu.arch == .wasm32;
     const mod = wasm_file.base.comp.module.?;
@@ -705,7 +705,7 @@ pub fn deleteDeclExport(
 ) void {
     const atom_index = zig_object.decls.get(decl_index) orelse return;
     const sym_index = wasm_file.getAtom(atom_index).sym_index;
-    const loc: Wasm.SymbolLoc = .{ .file = null, .index = sym_index };
+    const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = sym_index };
     const sym = loc.getSymbol(wasm_file);
     std.debug.assert(zig_object.global_syms.remove(sym.name));
 }
@@ -1161,7 +1161,7 @@ pub fn storeDeclType(zig_object: *ZigObject, gpa: std.mem.Allocator, decl_index:
 /// its relocations and create any GOT symbols or function table indexes it may require.
 pub fn parseSymbolIntoAtom(zig_object: *ZigObject, wasm_file: *Wasm, index: u32) !Atom.Index {
     const gpa = wasm_file.base.comp.gpa;
-    const loc: Wasm.SymbolLoc = .{ .file = @intFromEnum(zig_object.index), .index = index };
+    const loc: Wasm.SymbolLoc = .{ .file = zig_object.index, .index = index };
     const final_index = try wasm_file.getMatchingSegment(zig_object.index, index);
     const atom_index = wasm_file.symbol_atom.get(loc).?;
     try wasm_file.appendAtomAtIndex(final_index, atom_index);
src/link/Wasm.zig
@@ -125,7 +125,10 @@ func_types: std.ArrayListUnmanaged(std.wasm.Type) = .{},
 /// Output function section where the key is the original
 /// function index and the value is function.
 /// This allows us to map multiple symbols to the same function.
-functions: std.AutoArrayHashMapUnmanaged(struct { file: ?u16, index: u32 }, struct { func: std.wasm.Func, sym_index: u32 }) = .{},
+functions: std.AutoArrayHashMapUnmanaged(
+    struct { file: File.Index, index: u32 },
+    struct { func: std.wasm.Func, sym_index: u32 },
+) = .{},
 /// Output global section
 wasm_globals: std.ArrayListUnmanaged(std.wasm.Global) = .{},
 /// Memory section
@@ -217,16 +220,14 @@ pub const SymbolLoc = struct {
     /// The index of the symbol within the specified file
     index: u32,
     /// The index of the object file where the symbol resides.
-    /// When this is `null` the symbol comes from a non-object file.
-    file: ?u16,
+    file: File.Index,
 
     /// From a given location, returns the corresponding symbol in the wasm binary
     pub fn getSymbol(loc: SymbolLoc, wasm_file: *const Wasm) *Symbol {
         if (wasm_file.discarded.get(loc)) |new_loc| {
             return new_loc.getSymbol(wasm_file);
         }
-        if (loc.file) |object_index| {
-            const obj_file = wasm_file.file(@enumFromInt(object_index)).?;
+        if (wasm_file.file(loc.file)) |obj_file| {
             return obj_file.symbol(loc.index);
         }
         return &wasm_file.synthetic_symbols.items[loc.index];
@@ -237,8 +238,7 @@ pub const SymbolLoc = struct {
         if (wasm_file.discarded.get(loc)) |new_loc| {
             return new_loc.getName(wasm_file);
         }
-        if (loc.file) |object_index| {
-            const obj_file = wasm_file.file(@enumFromInt(object_index)).?;
+        if (wasm_file.file(loc.file)) |obj_file| {
             return obj_file.symbolName(loc.index);
         }
         return wasm_file.string_table.get(wasm_file.synthetic_symbols.items[loc.index].name);
@@ -263,7 +263,7 @@ pub const InitFuncLoc = struct {
     /// object file index in the list of objects.
     /// Unlike `SymbolLoc` this cannot be `null` as we never define
     /// our own ctors.
-    file: u16,
+    file: File.Index,
     /// Symbol index within the corresponding object file.
     index: u32,
     /// The priority in which the constructor must be called.
@@ -633,7 +633,7 @@ fn createSyntheticSymbol(wasm: *Wasm, name: []const u8, tag: Symbol.Tag) !Symbol
 
 fn createSyntheticSymbolOffset(wasm: *Wasm, name_offset: u32, tag: Symbol.Tag) !SymbolLoc {
     const sym_index = @as(u32, @intCast(wasm.synthetic_symbols.items.len));
-    const loc: SymbolLoc = .{ .index = sym_index, .file = null };
+    const loc: SymbolLoc = .{ .index = sym_index, .file = .null };
     const gpa = wasm.base.comp.gpa;
     try wasm.synthetic_symbols.append(gpa, .{
         .name = name_offset,
@@ -680,7 +680,7 @@ pub fn createAtom(wasm: *Wasm, sym_index: u32, file_index: File.Index) !Atom.Ind
     const index: Atom.Index = @intCast(wasm.managed_atoms.items.len);
     const atom = try wasm.managed_atoms.addOne(gpa);
     atom.* = .{ .file = file_index, .sym_index = sym_index };
-    try wasm.symbol_atom.putNoClobber(gpa, .{ .file = null, .index = sym_index }, index);
+    try wasm.symbol_atom.putNoClobber(gpa, atom.symbolLoc(), index);
 
     return index;
 }
@@ -763,10 +763,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
 
     for (obj_file.symbols(), 0..) |symbol, i| {
         const sym_index: u32 = @intCast(i);
-        const location: SymbolLoc = .{
-            .file = @intFromEnum(file_index),
-            .index = sym_index,
-        };
+        const location: SymbolLoc = .{ .file = file_index, .index = sym_index };
         const sym_name = obj_file.string(symbol.name);
         if (mem.eql(u8, sym_name, "__indirect_function_table")) {
             continue;
@@ -796,9 +793,10 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
 
         const existing_loc = maybe_existing.value_ptr.*;
         const existing_sym: *Symbol = existing_loc.getSymbol(wasm);
+        const existing_file = wasm.file(existing_loc.file);
 
-        const existing_file_path = if (existing_loc.file) |existing_file_index|
-            wasm.file(@enumFromInt(existing_file_index)).?.path()
+        const existing_file_path = if (existing_file) |existing_obj_file|
+            existing_obj_file.path()
         else
             wasm.name;
 
@@ -831,8 +829,7 @@ fn resolveSymbolsInObject(wasm: *Wasm, file_index: File.Index) !void {
         if (existing_sym.isUndefined() and symbol.isUndefined()) {
             // only verify module/import name for function symbols
             if (symbol.tag == .function) {
-                const existing_name = if (existing_loc.file) |existing_file_index| blk: {
-                    const existing_obj = wasm.file(@enumFromInt(existing_file_index)).?;
+                const existing_name = if (existing_file) |existing_obj| blk: {
                     const imp = existing_obj.import(existing_loc.index);
                     break :blk existing_obj.string(imp.module_name);
                 } else blk: {
@@ -1363,8 +1360,8 @@ fn checkUndefinedSymbols(wasm: *const Wasm) !void {
         const symbol = undef.getSymbol(wasm);
         if (symbol.tag == .data) {
             found_undefined_symbols = true;
-            const file_name = if (undef.file) |file_index|
-                wasm.file(@enumFromInt(file_index)).?.path()
+            const file_name = if (wasm.file(undef.file)) |obj_file|
+                obj_file.path()
             else
                 wasm.name;
             const symbol_name = undef.getName(wasm);
@@ -1461,8 +1458,7 @@ fn getGlobalType(wasm: *const Wasm, loc: SymbolLoc) std.wasm.GlobalType {
     const symbol = loc.getSymbol(wasm);
     assert(symbol.tag == .global);
     const is_undefined = symbol.isUndefined();
-    if (loc.file) |file_index| {
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
+    if (wasm.file(loc.file)) |obj_file| {
         if (is_undefined) {
             return obj_file.import(loc.index).kind.global;
         }
@@ -1480,8 +1476,7 @@ fn getFunctionSignature(wasm: *const Wasm, loc: SymbolLoc) std.wasm.Type {
     const symbol = loc.getSymbol(wasm);
     assert(symbol.tag == .function);
     const is_undefined = symbol.isUndefined();
-    if (loc.file) |file_index| {
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
+    if (wasm.file(loc.file)) |obj_file| {
         if (is_undefined) {
             const ty_index = obj_file.import(loc.index).kind.function;
             return obj_file.funcTypes()[ty_index];
@@ -1625,8 +1620,8 @@ fn allocateAtoms(wasm: *Wasm) !void {
             // Ensure we get the original symbol, so we verify the correct symbol on whether
             // it is dead or not and ensure an atom is removed when dead.
             // This is required as we may have parsed aliases into atoms.
-            const sym = if (symbol_loc.file) |file_index|
-                wasm.file(@enumFromInt(file_index)).?.symbol(symbol_loc.index).*
+            const sym = if (wasm.file(symbol_loc.file)) |obj_file|
+                obj_file.symbol(symbol_loc.index).*
             else
                 wasm.synthetic_symbols.items[symbol_loc.index];
 
@@ -1754,10 +1749,10 @@ fn setupInitFunctions(wasm: *Wasm) !void {
             log.debug("appended init func '{s}'\n", .{object.string_table.get(symbol.name)});
             wasm.init_funcs.appendAssumeCapacity(.{
                 .index = init_func.symbol_index,
-                .file = @intFromEnum(file_index),
+                .file = file_index,
                 .priority = init_func.priority,
             });
-            try wasm.mark(.{ .index = init_func.symbol_index, .file = @intFromEnum(file_index) });
+            try wasm.mark(.{ .index = init_func.symbol_index, .file = file_index });
         }
     }
 
@@ -1841,7 +1836,7 @@ fn createSyntheticFunction(
     const func_index = wasm.imported_functions_count + @as(u32, @intCast(wasm.functions.count()));
     try wasm.functions.putNoClobber(
         gpa,
-        .{ .file = null, .index = func_index },
+        .{ .file = .null, .index = func_index },
         .{ .func = .{ .type_index = ty_index }, .sym_index = loc.index },
     );
     symbol.index = func_index;
@@ -1849,8 +1844,8 @@ fn createSyntheticFunction(
     // create the atom that will be output into the final binary
     const atom_index = try wasm.createAtom(loc.index, .null);
     const atom = wasm.getAtomPtr(atom_index);
-    atom.code = function_body.moveToUnmanaged();
     atom.size = @intCast(function_body.items.len);
+    atom.code = function_body.moveToUnmanaged();
     try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index);
 }
 
@@ -1969,20 +1964,8 @@ fn initializeTLSFunction(wasm: *Wasm) !void {
 fn setupImports(wasm: *Wasm) !void {
     const gpa = wasm.base.comp.gpa;
     log.debug("Merging imports", .{});
-    var discarded_it = wasm.discarded.keyIterator();
-    while (discarded_it.next()) |discarded| {
-        if (discarded.file == null) {
-            // remove an import if it was resolved
-            if (wasm.imports.remove(discarded.*)) {
-                log.debug("Removed symbol '{s}' as an import", .{
-                    discarded.getName(wasm),
-                });
-            }
-        }
-    }
-
     for (wasm.resolved_symbols.keys()) |symbol_loc| {
-        const file_index = symbol_loc.file orelse {
+        const obj_file = wasm.file(symbol_loc.file) orelse {
             // Synthetic symbols will already exist in the `import` section
             continue;
         };
@@ -1996,7 +1979,6 @@ fn setupImports(wasm: *Wasm) !void {
         }
 
         log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(wasm)});
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
         const import = obj_file.import(symbol_loc.index);
 
         // We copy the import to a new import to ensure the names contain references
@@ -2054,15 +2036,13 @@ fn mergeSections(wasm: *Wasm) !void {
     defer removed_duplicates.deinit();
 
     for (wasm.resolved_symbols.keys()) |sym_loc| {
-        const file_index = sym_loc.file orelse {
+        const obj_file = wasm.file(sym_loc.file) orelse {
             // Zig code-generated symbols are already within the sections and do not
             // require to be merged
             continue;
         };
 
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
         const symbol = obj_file.symbol(sym_loc.index);
-
         if (symbol.isDead() or symbol.isUndefined()) {
             // Skip undefined symbols as they go in the `import` section
             continue;
@@ -2105,7 +2085,7 @@ fn mergeSections(wasm: *Wasm) !void {
                 symbol.index = @as(u32, @intCast(wasm.tables.items.len)) + wasm.imported_tables_count;
                 try wasm.tables.append(gpa, original_table);
             },
-            else => continue,
+            else => {},
         }
     }
 
@@ -2132,12 +2112,11 @@ fn mergeTypes(wasm: *Wasm) !void {
     defer dirty.deinit();
 
     for (wasm.resolved_symbols.keys()) |sym_loc| {
-        const file_index = sym_loc.file orelse {
+        const obj_file = wasm.file(sym_loc.file) orelse {
             // zig code-generated symbols are already present in final type section
             continue;
         };
 
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
         const symbol = obj_file.symbol(sym_loc.index);
         if (symbol.tag != .function or symbol.isDead()) {
             // Only functions have types. Only retrieve the type of referenced functions.
@@ -2191,7 +2170,7 @@ fn setupExports(wasm: *Wasm) !void {
 
         const sym_name = sym_loc.getName(wasm);
         const export_name = if (wasm.export_names.get(sym_loc)) |name| name else blk: {
-            if (sym_loc.file == null) break :blk symbol.name;
+            if (sym_loc.file == .null) break :blk symbol.name;
             break :blk try wasm.string_table.put(gpa, sym_name);
         };
         const exp: types.Export = if (symbol.tag == .data) exp: {
@@ -2425,7 +2404,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32
             break :blk index;
         },
         .section => {
-            const section_name = obj_file.symbolName(symbol.index);
+            const section_name = obj_file.symbolName(symbol_index);
             if (mem.eql(u8, section_name, ".debug_info")) {
                 return wasm.debug_info_index orelse blk: {
                     wasm.debug_info_index = index;
@@ -2475,7 +2454,7 @@ pub fn getMatchingSegment(wasm: *Wasm, file_index: File.Index, symbol_index: u32
                     break :blk index;
                 };
             } else {
-                log.warn("found unknown section '{s}'", .{section_name});
+                log.err("found unknown section '{s}'", .{section_name});
                 return error.UnexpectedValue;
             }
         },
@@ -4221,10 +4200,7 @@ fn emitDataRelocations(
             size_offset += getULEB128Size(atom.size);
             for (atom.relocs.items) |relocation| {
                 count += 1;
-                const sym_loc: SymbolLoc = .{
-                    .file = atom.file,
-                    .index = relocation.index,
-                };
+                const sym_loc: SymbolLoc = .{ .file = atom.file, .index = relocation.index };
                 const symbol_index = symbol_table.get(sym_loc).?;
                 try leb.writeULEB128(writer, @intFromEnum(relocation.relocation_type));
                 const offset = atom.offset + relocation.offset + size_offset;
@@ -4322,8 +4298,7 @@ fn markReferences(wasm: *Wasm) !void {
         // Debug sections may require to be parsed and marked when it contains
         // relocations to alive symbols.
         if (sym.tag == .section and comp.config.debug_format != .strip) {
-            const file_index = sym_loc.file orelse continue; // Incremental debug info is done independently
-            const obj_file = wasm.file(@enumFromInt(file_index)).?;
+            const obj_file = wasm.file(sym_loc.file) orelse continue; // Incremental debug info is done independently
             _ = try obj_file.parseSymbolIntoAtom(wasm, sym_loc.index);
             sym.mark();
         }
@@ -4347,10 +4322,10 @@ fn mark(wasm: *Wasm, loc: SymbolLoc) !void {
         return;
     }
 
-    const atom_index = if (loc.file) |file_index| idx: {
-        const obj_file = wasm.file(@enumFromInt(file_index)).?;
-        break :idx try obj_file.parseSymbolIntoAtom(wasm, loc.index);
-    } else wasm.symbol_atom.get(loc) orelse return;
+    const atom_index = if (wasm.file(loc.file)) |obj_file|
+        try obj_file.parseSymbolIntoAtom(wasm, loc.index)
+    else
+        wasm.symbol_atom.get(loc) orelse return;
 
     const atom = wasm.getAtom(atom_index);
     for (atom.relocs.items) |reloc| {