Commit f5a31cb0d6
Changed files (3)
src
link
src/link/Wasm/Object.zig
@@ -24,7 +24,7 @@ name: []const u8,
/// Parsed type section
func_types: []const std.wasm.Type = &.{},
/// A list of all imports for this module
-imports: []const std.wasm.Import = &.{},
+imports: []const types.Import = &.{},
/// Parsed function section
functions: []const std.wasm.Func = &.{},
/// Parsed table section
@@ -34,7 +34,7 @@ memories: []const std.wasm.Memory = &.{},
/// Parsed global section
globals: []const std.wasm.Global = &.{},
/// Parsed export section
-exports: []const std.wasm.Export = &.{},
+exports: []const types.Export = &.{},
/// Parsed element section
elements: []const std.wasm.Element = &.{},
/// Represents the function ID that must be called on startup.
@@ -127,18 +127,11 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(func_ty.returns);
}
gpa.free(self.func_types);
- for (self.imports) |imp| {
- gpa.free(imp.name);
- gpa.free(imp.module_name);
- }
gpa.free(self.functions);
gpa.free(self.imports);
gpa.free(self.tables);
gpa.free(self.memories);
gpa.free(self.globals);
- for (self.exports) |exp| {
- gpa.free(exp.name);
- }
gpa.free(self.exports);
gpa.free(self.elements);
gpa.free(self.features);
@@ -163,7 +156,7 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
/// Finds the import within the list of imports from a given kind and index of that kind.
/// Asserts the import exists
-pub fn findImport(self: *const Object, import_kind: std.wasm.ExternalKind, index: u32) std.wasm.Import {
+pub fn findImport(self: *const Object, import_kind: std.wasm.ExternalKind, index: u32) types.Import {
var i: u32 = 0;
return for (self.imports) |import| {
if (std.meta.activeTag(import.kind) == import_kind) {
@@ -187,7 +180,7 @@ pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32
/// we initialize a new table symbol that corresponds to that import and return that symbol.
///
/// When the object file is *NOT* MVP, we return `null`.
-fn checkLegacyIndirectFunctionTable(self: *Object, gpa: Allocator) !?Symbol {
+fn checkLegacyIndirectFunctionTable(self: *Object) !?Symbol {
var table_count: usize = 0;
for (self.symtable) |sym| {
if (sym.tag == .table) table_count += 1;
@@ -217,20 +210,20 @@ fn checkLegacyIndirectFunctionTable(self: *Object, gpa: Allocator) !?Symbol {
return error.MissingTableSymbols;
}
- var table_import: std.wasm.Import = for (self.imports) |imp| {
+ var table_import: types.Import = for (self.imports) |imp| {
if (imp.kind == .table) {
break imp;
}
} else unreachable;
- if (!std.mem.eql(u8, table_import.name, "__indirect_function_table")) {
- log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{table_import.name});
+ if (!std.mem.eql(u8, self.string_table.get(table_import.name), "__indirect_function_table")) {
+ log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{self.string_table.get(table_import.name)});
return error.MissingTableSymbols;
}
var table_symbol: Symbol = .{
.flags = 0,
- .name = try self.string_table.put(gpa, table_import.name),
+ .name = table_import.name,
.tag = .table,
.index = 0,
};
@@ -353,12 +346,12 @@ fn Parser(comptime ReaderType: type) type {
for (try readVec(&self.object.imports, reader, gpa)) |*import| {
const module_len = try readLeb(u32, reader);
const module_name = try gpa.alloc(u8, module_len);
- errdefer gpa.free(module_name);
+ defer gpa.free(module_name);
try reader.readNoEof(module_name);
const name_len = try readLeb(u32, reader);
const name = try gpa.alloc(u8, name_len);
- errdefer gpa.free(name);
+ defer gpa.free(name);
try reader.readNoEof(name);
const kind = try readEnum(std.wasm.ExternalKind, reader);
@@ -376,8 +369,8 @@ fn Parser(comptime ReaderType: type) type {
};
import.* = .{
- .module_name = module_name,
- .name = name,
+ .module_name = try self.object.string_table.put(gpa, module_name),
+ .name = try self.object.string_table.put(gpa, name),
.kind = kind_value,
};
}
@@ -420,10 +413,10 @@ fn Parser(comptime ReaderType: type) type {
for (try readVec(&self.object.exports, reader, gpa)) |*exp| {
const name_len = try readLeb(u32, reader);
const name = try gpa.alloc(u8, name_len);
- errdefer gpa.free(name);
+ defer gpa.free(name);
try reader.readNoEof(name);
exp.* = .{
- .name = name,
+ .name = try self.object.string_table.put(gpa, name),
.kind = try readEnum(std.wasm.ExternalKind, reader),
.index = try readLeb(u32, reader),
};
@@ -675,7 +668,7 @@ fn Parser(comptime ReaderType: type) type {
// we found all symbols, check for indirect function table
// in case of an MVP object file
- if (try self.object.checkLegacyIndirectFunctionTable(gpa)) |symbol| {
+ if (try self.object.checkLegacyIndirectFunctionTable()) |symbol| {
try symbols.append(symbol);
log.debug("Found legacy indirect function table. Created symbol", .{});
}
@@ -720,7 +713,7 @@ fn Parser(comptime ReaderType: type) type {
},
else => {
symbol.index = try leb.readULEB128(u32, reader);
- var maybe_import: ?std.wasm.Import = null;
+ var maybe_import: ?types.Import = null;
const is_undefined = symbol.isUndefined();
if (is_undefined) {
@@ -734,7 +727,7 @@ fn Parser(comptime ReaderType: type) type {
try reader.readNoEof(name);
symbol.name = try self.object.string_table.put(gpa, name);
} else {
- symbol.name = try self.object.string_table.put(gpa, maybe_import.?.name);
+ symbol.name = maybe_import.?.name;
}
},
}
src/link/Wasm/types.zig
@@ -78,6 +78,26 @@ pub const Relocation = struct {
}
};
+/// Unlike the `Import` object defined by the wasm spec, and existing
+/// in the std.wasm namespace, this construct saves the 'module name' and 'name'
+/// of the import using offsets into a string table, rather than the slices itself.
+/// This saves us (potentially) 24 bytes per import on 64bit machines.
+pub const Import = struct {
+ module_name: u32,
+ name: u32,
+ kind: std.wasm.Import.Kind,
+};
+
+/// Unlike the `Export` object defined by the wasm spec, and existing
+/// in the std.wasm namespace, this construct saves the 'name'
+/// of the export using offsets into a string table, rather than the slice itself.
+/// This saves us (potentially) 12 bytes per export on 64bit machines.
+pub const Export = struct {
+ name: u32,
+ index: u32,
+ kind: std.wasm.ExternalKind,
+};
+
pub const SubsectionType = enum(u8) {
WASM_SEGMENT_INFO = 5,
WASM_INIT_FUNCS = 6,
src/link/Wasm.zig
@@ -69,8 +69,8 @@ imported_globals_count: u32 = 0,
/// The count of imported tables. This number will be appended
/// to the table indexes when sections are merged.
imported_tables_count: u32 = 0,
-/// Map of symbol locations, represented by its `wasm.Import`
-imports: std.AutoHashMapUnmanaged(SymbolLoc, wasm.Import) = .{},
+/// Map of symbol locations, represented by its `types.Import`
+imports: std.AutoHashMapUnmanaged(SymbolLoc, types.Import) = .{},
/// Represents non-synthetic section entries.
/// Used for code, data and custom sections.
segments: std.ArrayListUnmanaged(Segment) = .{},
@@ -94,7 +94,7 @@ memories: wasm.Memory = .{ .limits = .{ .min = 0, .max = null } },
/// Output table section
tables: std.ArrayListUnmanaged(wasm.Table) = .{},
/// Output export section
-exports: std.ArrayListUnmanaged(wasm.Export) = .{},
+exports: std.ArrayListUnmanaged(types.Export) = .{},
/// Indirect function table, used to call function pointers
/// When this is non-zero, we must emit a table entry,
@@ -105,8 +105,8 @@ function_table: std.AutoHashMapUnmanaged(u32, u32) = .{},
/// All object files and their data which are linked into the final binary
objects: std.ArrayListUnmanaged(Object) = .{},
-/// A map of global names to their symbol location
-globals: std.StringHashMapUnmanaged(SymbolLoc) = .{},
+/// A map of global names (read: offset into string table) to their symbol location
+globals: std.AutoHashMapUnmanaged(u32, SymbolLoc) = .{},
/// Maps discarded symbols and their positions to the location of the symbol
/// it was resolved to
discarded: std.AutoHashMapUnmanaged(SymbolLoc, SymbolLoc) = .{},
@@ -119,7 +119,8 @@ resolved_symbols: std.AutoArrayHashMapUnmanaged(SymbolLoc, void) = .{},
symbol_atom: std.AutoHashMapUnmanaged(SymbolLoc, *Atom) = .{},
/// Maps a symbol's location to its export name, which may differ from the decl's name
/// which does the exporting.
-export_names: std.AutoHashMapUnmanaged(SymbolLoc, []const u8) = .{},
+/// Note: The value represents the offset into the string table, rather than the actual string.
+export_names: std.AutoHashMapUnmanaged(SymbolLoc, u32) = .{},
pub const Segment = struct {
alignment: u32,
@@ -223,6 +224,15 @@ pub const StringTable = struct {
return mem.sliceTo(@ptrCast([*:0]const u8, self.string_data.items.ptr + off), 0);
}
+ /// Returns the offset of a given string when it exists.
+ /// Will return null if the given string does not yet exist within the string table.
+ pub fn getOffset(self: *StringTable, string: []const u8) ?u32 {
+ return self.string_table.getKeyAdapted(
+ string,
+ std.hash_map.StringIndexAdapter{ .bytes = &self.string_data },
+ );
+ }
+
/// Frees all resources of the string table. Any references pointing
/// to the strings will be invalid.
pub fn deinit(self: *StringTable, allocator: Allocator) void {
@@ -250,16 +260,17 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
try file.writeAll(&(wasm.magic ++ wasm.version));
// As sym_index '0' is reserved, we use it for our stack pointer symbol
+ const sym_name = try wasm_bin.string_table.put(allocator, "__stack_pointer");
const symbol = try wasm_bin.symbols.addOne(allocator);
symbol.* = .{
- .name = try wasm_bin.string_table.put(allocator, "__stack_pointer"),
+ .name = sym_name,
.tag = .global,
.flags = 0,
.index = 0,
};
const loc: SymbolLoc = .{ .file = null, .index = 0 };
try wasm_bin.resolved_symbols.putNoClobber(allocator, loc, {});
- try wasm_bin.globals.putNoClobber(allocator, "__stack_pointer", loc);
+ try wasm_bin.globals.putNoClobber(allocator, sym_name, loc);
// For object files we will import the stack pointer symbol
if (options.output_mode == .Obj) {
@@ -268,8 +279,8 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
allocator,
.{ .file = null, .index = 0 },
.{
- .module_name = wasm_bin.host_name,
- .name = "__stack_pointer",
+ .module_name = try wasm_bin.string_table.put(allocator, wasm_bin.host_name),
+ .name = sym_name,
.kind = .{ .global = .{ .valtype = .i32, .mutable = true } },
},
);
@@ -344,6 +355,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
.index = sym_index,
};
const sym_name = object.string_table.get(symbol.name);
+ const sym_name_index = try self.string_table.put(self.base.allocator, sym_name);
if (symbol.isLocal()) {
if (symbol.isUndefined()) {
@@ -358,7 +370,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
// TODO: locals are allowed to have duplicate symbol names
// TODO: Store undefined symbols so we can verify at the end if they've all been found
// if not, emit an error (unless --allow-undefined is enabled).
- const maybe_existing = try self.globals.getOrPut(self.base.allocator, sym_name);
+ const maybe_existing = try self.globals.getOrPut(self.base.allocator, sym_name_index);
if (!maybe_existing.found_existing) {
maybe_existing.value_ptr.* = location;
try self.resolved_symbols.putNoClobber(self.base.allocator, location, {});
@@ -383,13 +395,18 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
continue; // Do not overwrite defined symbols with undefined symbols
}
+ // when both symbols are weak, we skip overwriting
+ if (existing_sym.isWeak() and symbol.isWeak()) {
+ continue;
+ }
+
// simply overwrite with the new symbol
log.debug("Overwriting symbol '{s}'", .{sym_name});
log.debug(" old definition in '{s}'", .{existing_file_path});
log.debug(" new definition in '{s}'", .{object.name});
try self.discarded.putNoClobber(self.base.allocator, maybe_existing.value_ptr.*, location);
maybe_existing.value_ptr.* = location;
- try self.globals.put(self.base.allocator, sym_name, location);
+ try self.globals.put(self.base.allocator, sym_name_index, location);
try self.resolved_symbols.put(self.base.allocator, location, {});
assert(self.resolved_symbols.swapRemove(existing_loc));
}
@@ -696,7 +713,7 @@ pub fn deleteExport(self: *Wasm, exp: Export) void {
if (self.export_names.fetchRemove(loc)) |kv| {
assert(self.globals.remove(kv.value));
} else {
- assert(self.globals.remove(symbol_name));
+ assert(self.globals.remove(symbol.name));
}
}
@@ -723,7 +740,9 @@ pub fn updateDeclExports(
));
continue;
}
- if (self.globals.getPtr(exp.options.name)) |existing_loc| {
+
+ const export_name = try self.string_table.put(self.base.allocator, exp.options.name);
+ if (self.globals.getPtr(export_name)) |existing_loc| {
if (existing_loc.index == decl.link.wasm.sym_index) continue;
const existing_sym: Symbol = existing_loc.getSymbol(self).*;
@@ -775,13 +794,13 @@ pub fn updateDeclExports(
}
// Ensure the symbol will be exported using the given name
if (!mem.eql(u8, exp.options.name, sym_loc.getName(self))) {
- try self.export_names.put(self.base.allocator, sym_loc, exp.options.name);
+ try self.export_names.put(self.base.allocator, sym_loc, export_name);
}
symbol.setGlobal(true);
try self.globals.put(
self.base.allocator,
- exp.options.name,
+ export_name,
sym_loc,
);
@@ -832,14 +851,14 @@ fn mapFunctionTable(self: *Wasm) void {
fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
// For the import name itself, we use the decl's name, rather than the fully qualified name
- const decl_name = mem.sliceTo(decl.name, 0);
+ const decl_name_index = try self.string_table.put(self.base.allocator, mem.sliceTo(decl.name, 0));
const symbol_index = decl.link.wasm.sym_index;
const symbol: *Symbol = &self.symbols.items[symbol_index];
symbol.setUndefined(true);
symbol.setGlobal(true);
try self.globals.putNoClobber(
self.base.allocator,
- decl_name,
+ decl_name_index,
.{ .file = null, .index = symbol_index },
);
try self.resolved_symbols.put(self.base.allocator, .{ .file = null, .index = symbol_index }, {});
@@ -852,8 +871,8 @@ fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
} else self.host_name;
if (!gop.found_existing) {
gop.value_ptr.* = .{
- .module_name = module_name,
- .name = decl_name,
+ .module_name = try self.string_table.put(self.base.allocator, module_name),
+ .name = decl_name_index,
.kind = .{ .function = decl.fn_link.wasm.type_index },
};
}
@@ -1001,9 +1020,18 @@ fn setupImports(self: *Wasm) !void {
}
log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(self)});
- const import = self.objects.items[symbol_loc.file.?].findImport(symbol.tag.externalType(), symbol.index);
- // TODO: De-duplicate imports
- try self.imports.putNoClobber(self.base.allocator, symbol_loc, import);
+ const object = self.objects.items[symbol_loc.file.?];
+ const import = object.findImport(symbol.tag.externalType(), symbol.index);
+
+ // We copy the import to a new import to ensure the names contain references
+ // to the internal string table, rather than of the object file.
+ var new_imp: types.Import = .{
+ .module_name = try self.string_table.put(self.base.allocator, object.string_table.get(import.module_name)),
+ .name = try self.string_table.put(self.base.allocator, object.string_table.get(import.name)),
+ .kind = import.kind,
+ };
+ // TODO: De-duplicate imports when they contain the same names and type
+ try self.imports.putNoClobber(self.base.allocator, symbol_loc, new_imp);
}
// Assign all indexes of the imports to their representing symbols
@@ -1013,7 +1041,7 @@ fn setupImports(self: *Wasm) !void {
var it = self.imports.iterator();
while (it.next()) |entry| {
const symbol = entry.key_ptr.*.getSymbol(self);
- const import: wasm.Import = entry.value_ptr.*;
+ const import: types.Import = entry.value_ptr.*;
switch (import.kind) {
.function => {
symbol.index = function_index;
@@ -1045,7 +1073,8 @@ fn setupImports(self: *Wasm) !void {
/// and merges it into a single section for each.
fn mergeSections(self: *Wasm) !void {
// append the indirect function table if initialized
- if (self.globals.get("__indirect_function_table")) |sym_loc| {
+ if (self.string_table.getOffset("__indirect_function_table")) |offset| {
+ const sym_loc = self.globals.get(offset).?;
const table: wasm.Table = .{
.limits = .{ .min = @intCast(u32, self.function_table.count()), .max = null },
.reftype = .funcref,
@@ -1114,7 +1143,7 @@ fn mergeTypes(self: *Wasm) !void {
if (symbol.isUndefined()) {
log.debug("Adding type from extern function '{s}'", .{sym_loc.getName(self)});
- const import: *wasm.Import = self.imports.getPtr(sym_loc).?;
+ const import: *types.Import = self.imports.getPtr(sym_loc).?;
const original_type = object.func_types[import.kind.function];
import.kind.function = try self.putOrGetFuncType(original_type);
} else {
@@ -1135,13 +1164,13 @@ fn setupExports(self: *Wasm) !void {
if (!symbol.isExported()) continue;
const sym_name = sym_loc.getName(self);
- const export_name = if (self.export_names.get(sym_loc)) |name| name else sym_name;
- const exp: wasm.Export = .{
+ const export_name = if (self.export_names.get(sym_loc)) |name| name else symbol.name;
+ const exp: types.Export = .{
.name = export_name,
.kind = symbol.tag.externalType(),
.index = symbol.index,
};
- log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ sym_name, exp.name, exp.index });
+ log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ sym_name, self.string_table.get(exp.name), exp.index });
try self.exports.append(self.base.allocator, exp);
}
@@ -1151,7 +1180,7 @@ fn setupExports(self: *Wasm) !void {
fn setupStart(self: *Wasm) !void {
const entry_name = self.base.options.entry orelse "_start";
- const symbol_loc = self.globals.get(entry_name) orelse {
+ const symbol_name_offset = self.string_table.getOffset(entry_name) orelse {
if (self.base.options.output_mode == .Exe) {
if (self.base.options.wasi_exec_model == .reactor) return; // Not required for reactors
} else {
@@ -1161,6 +1190,7 @@ fn setupStart(self: *Wasm) !void {
return error.MissingSymbol;
};
+ const symbol_loc = self.globals.get(symbol_name_offset).?;
const symbol = symbol_loc.getSymbol(self);
if (symbol.tag != .function) {
log.err("Entry symbol '{s}' is not a function", .{entry_name});
@@ -1443,9 +1473,9 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
// import table is always first table so emit that first
if (import_table) {
- const table_imp: wasm.Import = .{
- .module_name = self.host_name,
- .name = "__indirect_function_table",
+ const table_imp: types.Import = .{
+ .module_name = try self.string_table.put(self.base.allocator, self.host_name),
+ .name = try self.string_table.put(self.base.allocator, "__indirect_function_table"),
.kind = .{
.table = .{
.limits = .{
@@ -1456,23 +1486,23 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
},
},
};
- try emitImport(writer, table_imp);
+ try self.emitImport(writer, table_imp);
}
var it = self.imports.iterator();
while (it.next()) |entry| {
assert(entry.key_ptr.*.getSymbol(self).isUndefined());
const import = entry.value_ptr.*;
- try emitImport(writer, import);
+ try self.emitImport(writer, import);
}
if (import_memory) {
- const mem_imp: wasm.Import = .{
- .module_name = self.host_name,
- .name = "__linear_memory",
+ const mem_imp: types.Import = .{
+ .module_name = try self.string_table.put(self.base.allocator, self.host_name),
+ .name = try self.string_table.put(self.base.allocator, "__linear_memory"),
.kind = .{ .memory = self.memories.limits },
};
- try emitImport(writer, mem_imp);
+ try self.emitImport(writer, mem_imp);
}
try writeVecSectionHeader(
@@ -1567,8 +1597,9 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer();
for (self.exports.items) |exp| {
- try leb.writeULEB128(writer, @intCast(u32, exp.name.len));
- try writer.writeAll(exp.name);
+ const name = self.string_table.get(exp.name);
+ try leb.writeULEB128(writer, @intCast(u32, name.len));
+ try writer.writeAll(name);
try leb.writeULEB128(writer, @enumToInt(exp.kind));
try leb.writeULEB128(writer, exp.index);
}
@@ -1747,9 +1778,12 @@ fn emitNameSection(self: *Wasm, file: fs.File, arena: Allocator) !void {
for (self.resolved_symbols.keys()) |sym_loc| {
const symbol = sym_loc.getSymbol(self).*;
+ const name = if (symbol.isUndefined()) blk: {
+ break :blk self.string_table.get(self.imports.get(sym_loc).?.name);
+ } else sym_loc.getName(self);
switch (symbol.tag) {
- .function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
- .global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
+ .function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = name }),
+ .global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = name }),
else => {},
}
}
@@ -1831,12 +1865,14 @@ fn emitInit(writer: anytype, init_expr: wasm.InitExpression) !void {
try writer.writeByte(wasm.opcode(.end));
}
-fn emitImport(writer: anytype, import: wasm.Import) !void {
- try leb.writeULEB128(writer, @intCast(u32, import.module_name.len));
- try writer.writeAll(import.module_name);
+fn emitImport(self: *Wasm, writer: anytype, import: types.Import) !void {
+ const module_name = self.string_table.get(import.module_name);
+ try leb.writeULEB128(writer, @intCast(u32, module_name.len));
+ try writer.writeAll(module_name);
- try leb.writeULEB128(writer, @intCast(u32, import.name.len));
- try writer.writeAll(import.name);
+ const name = self.string_table.get(import.name);
+ try leb.writeULEB128(writer, @intCast(u32, name.len));
+ try writer.writeAll(name);
try writer.writeByte(@enumToInt(import.kind));
switch (import.kind) {
@@ -2353,7 +2389,7 @@ fn emitSymbolTable(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *
try leb.writeULEB128(writer, @enumToInt(symbol.tag));
try leb.writeULEB128(writer, symbol.flags);
- const sym_name = if (self.export_names.get(sym_loc)) |exp_name| exp_name else sym_loc.getName(self);
+ const sym_name = if (self.export_names.get(sym_loc)) |exp_name| self.string_table.get(exp_name) else sym_loc.getName(self);
switch (symbol.tag) {
.data => {
try leb.writeULEB128(writer, @intCast(u32, sym_name.len));