Commit b1159ab7ae
Changed files (4)
src
link
src/link/Wasm/Atom.zig
@@ -103,17 +103,17 @@ pub fn symbolLoc(self: Atom) Wasm.SymbolLoc {
/// at the calculated offset.
pub fn resolveRelocs(self: *Atom, wasm_bin: *const Wasm) !void {
if (self.relocs.items.len == 0) return;
- const symbol = self.symbolLoc().getSymbol(wasm_bin).*;
+ const symbol_name = self.symbolLoc().getName(wasm_bin);
log.debug("Resolving relocs in atom '{s}' count({d})", .{
- symbol.name,
+ symbol_name,
self.relocs.items.len,
});
for (self.relocs.items) |reloc| {
const value = try self.relocationValue(reloc, wasm_bin);
log.debug("Relocating '{s}' referenced in '{s}' offset=0x{x:0>8} value={d}", .{
- (Wasm.SymbolLoc{ .file = self.file, .index = reloc.index }).getSymbol(wasm_bin).name,
- symbol.name,
+ (Wasm.SymbolLoc{ .file = self.file, .index = reloc.index }).getName(wasm_bin),
+ symbol_name,
reloc.offset,
value,
});
src/link/Wasm/Object.zig
@@ -59,6 +59,10 @@ comdat_info: []const types.Comdat = &.{},
/// Represents non-synthetic sections that can essentially be mem-cpy'd into place
/// after performing relocations.
relocatable_data: []const RelocatableData = &.{},
+/// String table for all strings required by the object file, such as symbol names,
+/// import name, module name and export names. Each string will be deduplicated
+/// and returns an offset into the table.
+string_table: Wasm.StringTable = .{},
/// Represents a single item within a section (depending on its `type`)
const RelocatableData = struct {
@@ -142,9 +146,6 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(val);
}
self.relocations.deinit(gpa);
- for (self.symtable) |symbol| {
- gpa.free(std.mem.sliceTo(symbol.name, 0));
- }
gpa.free(self.symtable);
gpa.free(self.comdat_info);
gpa.free(self.init_funcs);
@@ -156,6 +157,7 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
gpa.free(rel_data.data[0..rel_data.size]);
}
gpa.free(self.relocatable_data);
+ self.string_table.deinit(gpa);
self.* = undefined;
}
@@ -228,7 +230,7 @@ fn checkLegacyIndirectFunctionTable(self: *Object, gpa: Allocator) !?Symbol {
var table_symbol: Symbol = .{
.flags = 0,
- .name = try gpa.dupeZ(u8, table_import.name),
+ .name = try self.string_table.put(gpa, table_import.name),
.tag = .table,
.index = 0,
};
@@ -666,7 +668,7 @@ fn Parser(comptime ReaderType: type) type {
symbol.* = try self.parseSymbol(gpa, reader);
log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{
@tagName(symbol.tag),
- symbol.name,
+ self.object.string_table.get(symbol.name),
symbol.flags,
});
}
@@ -699,10 +701,10 @@ fn Parser(comptime ReaderType: type) type {
switch (tag) {
.data => {
const name_len = try leb.readULEB128(u32, reader);
- const name = try gpa.allocSentinel(u8, name_len, 0);
- errdefer gpa.free(name);
+ const name = try gpa.alloc(u8, name_len);
+ defer gpa.free(name);
try reader.readNoEof(name);
- symbol.name = name;
+ symbol.name = try self.object.string_table.put(gpa, name);
// Data symbols only have the following fields if the symbol is defined
if (symbol.isDefined()) {
@@ -714,7 +716,7 @@ fn Parser(comptime ReaderType: type) type {
},
.section => {
symbol.index = try leb.readULEB128(u32, reader);
- symbol.name = @tagName(symbol.tag);
+ symbol.name = try self.object.string_table.put(gpa, @tagName(symbol.tag));
},
else => {
symbol.index = try leb.readULEB128(u32, reader);
@@ -727,12 +729,12 @@ fn Parser(comptime ReaderType: type) type {
const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME);
if (!(is_undefined and !explicit_name)) {
const name_len = try leb.readULEB128(u32, reader);
- const name = try gpa.allocSentinel(u8, name_len, 0);
- errdefer gpa.free(name);
+ const name = try gpa.alloc(u8, name_len);
+ defer gpa.free(name);
try reader.readNoEof(name);
- symbol.name = name;
+ symbol.name = try self.object.string_table.put(gpa, name);
} else {
- symbol.name = try gpa.dupeZ(u8, maybe_import.?.name);
+ symbol.name = try self.object.string_table.put(gpa, maybe_import.?.name);
}
},
}
@@ -882,7 +884,7 @@ pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin
} else {
try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
}
- log.debug("Parsed into atom: '{s}'", .{self.symtable[atom.sym_index].name});
+ log.debug("Parsed into atom: '{s}'", .{self.string_table.get(self.symtable[atom.sym_index].name)});
}
}
src/link/Wasm/Symbol.zig
@@ -1,5 +1,8 @@
-//! Wasm symbols describing its kind,
-//! name and its properties.
+//! Represents a wasm symbol. Containing all of its properties,
+//! as well as providing helper methods to determine its functionality
+//! and how it will/must be linked.
+//! The name of the symbol can be found by providing the offset, found
+//! on the `name` field, to a string table in the wasm binary or object file.
const Symbol = @This();
const std = @import("std");
@@ -8,15 +11,15 @@ const types = @import("types.zig");
/// Bitfield containings flags for a symbol
/// Can contain any of the flags defined in `Flag`
flags: u32,
-/// Symbol name, when undefined this will be taken from the import.
-name: [*:0]const u8,
-/// An union that represents both the type of symbol
-/// as well as the data it holds.
-tag: Tag,
+/// Symbol name, when the symbol is undefined the name will be taken from the import.
+/// Note: This is an index into the string table.
+name: u32,
/// Index into the list of objects based on set `tag`
/// NOTE: This will be set to `undefined` when `tag` is `data`
/// and the symbol is undefined.
index: u32,
+/// Represents the kind of the symbol, such as a function or global.
+tag: Tag,
pub const Tag = enum {
function,
@@ -164,7 +167,7 @@ pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOpt
const binding: []const u8 = if (self.isLocal()) "local" else "global";
try writer.print(
- "{c} binding={s} visible={s} id={d} name={s}",
+ "{c} binding={s} visible={s} id={d} name_offset={d}",
.{ kind_fmt, binding, visible, self.index, self.name },
);
}
src/link/Wasm.zig
@@ -79,6 +79,8 @@ data_segments: std.StringArrayHashMapUnmanaged(u32) = .{},
/// A list of `types.Segment` which provide meta data
/// about a data symbol such as its name
segment_info: std.ArrayListUnmanaged(types.Segment) = .{},
+/// Deduplicated string table for strings used by symbols, imports and exports.
+string_table: StringTable = .{},
// Output sections
/// Output type section
@@ -155,6 +157,79 @@ pub const SymbolLoc = struct {
}
return &wasm_bin.symbols.items[self.index];
}
+
+ /// From a given location, returns the name of the symbol.
+ pub fn getName(self: SymbolLoc, wasm_bin: *const Wasm) []const u8 {
+ if (wasm_bin.discarded.get(self)) |new_loc| {
+ return new_loc.getName(wasm_bin);
+ }
+ if (self.file) |object_index| {
+ const object = wasm_bin.objects.items[object_index];
+ return object.string_table.get(object.symtable[self.index].name);
+ }
+ return wasm_bin.string_table.get(wasm_bin.symbols.items[self.index].name);
+ }
+};
+
+/// Generic string table that duplicates strings
+/// and converts them into offsets instead.
+pub const StringTable = struct {
+ /// Table that maps string offsets, which is used to de-duplicate strings.
+ /// Rather than having the offset map to the data, the `StringContext` holds all bytes of the string.
+ /// The strings are stored as a contigious array where each string is zero-terminated.
+ string_table: std.HashMapUnmanaged(
+ u32,
+ void,
+ std.hash_map.StringIndexContext,
+ std.hash_map.default_max_load_percentage,
+ ) = .{},
+ /// Holds the actual data of the string table.
+ string_data: std.ArrayListUnmanaged(u8) = .{},
+
+ /// Accepts a string and searches for a corresponding string.
+ /// When found, de-duplicates the string and returns the existing offset instead.
+ /// When the string is not found in the `string_table`, a new entry will be inserted
+ /// and the new offset to its data will be returned.
+ pub fn put(self: *StringTable, allocator: Allocator, string: []const u8) !u32 {
+ const gop = try self.string_table.getOrPutContextAdapted(
+ allocator,
+ string,
+ std.hash_map.StringIndexAdapter{ .bytes = &self.string_data },
+ .{ .bytes = &self.string_data },
+ );
+ if (gop.found_existing) {
+ const off = gop.key_ptr.*;
+ log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
+ return off;
+ }
+
+ try self.string_data.ensureUnusedCapacity(allocator, string.len + 1);
+ const offset = @intCast(u32, self.string_data.items.len);
+
+ log.debug("writing new string '{s}' at offset 0x{x}", .{ string, offset });
+
+ self.string_data.appendSliceAssumeCapacity(string);
+ self.string_data.appendAssumeCapacity(0);
+
+ gop.key_ptr.* = offset;
+
+ return offset;
+ }
+
+ /// From a given offset, returns its corresponding string value.
+ /// Asserts offset does not exceed bounds.
+ pub fn get(self: StringTable, off: u32) []const u8 {
+ assert(off < self.string_data.items.len);
+ return mem.sliceTo(@ptrCast([*:0]const u8, self.string_data.items.ptr + off), 0);
+ }
+
+ /// Frees all resources of the string table. Any references pointing
+ /// to the strings will be invalid.
+ pub fn deinit(self: *StringTable, allocator: Allocator) void {
+ self.string_data.deinit(allocator);
+ self.string_table.deinit(allocator);
+ self.* = undefined;
+ }
};
pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
@@ -177,7 +252,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
// As sym_index '0' is reserved, we use it for our stack pointer symbol
const symbol = try wasm_bin.symbols.addOne(allocator);
symbol.* = .{
- .name = "__stack_pointer",
+ .name = try wasm_bin.string_table.put(allocator, "__stack_pointer"),
.tag = .global,
.flags = 0,
.index = 0,
@@ -268,12 +343,12 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
.file = object_index,
.index = sym_index,
};
- const sym_name = std.mem.sliceTo(symbol.name, 0);
+ const sym_name = object.string_table.get(symbol.name);
if (symbol.isLocal()) {
if (symbol.isUndefined()) {
log.err("Local symbols are not allowed to reference imports", .{});
- log.err(" symbol '{s}' defined in '{s}'", .{ symbol.name, object.name });
+ log.err(" symbol '{s}' defined in '{s}'", .{ sym_name, object.name });
return error.undefinedLocal;
}
try self.resolved_symbols.putNoClobber(self.base.allocator, location, {});
@@ -299,7 +374,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
if (!existing_sym.isUndefined()) {
if (!symbol.isUndefined()) {
- log.err("symbol '{s}' defined multiple times", .{existing_sym.name});
+ log.err("symbol '{s}' defined multiple times", .{sym_name});
log.err(" first definition in '{s}'", .{existing_file_path});
log.err(" next definition in '{s}'", .{object.name});
return error.SymbolCollision;
@@ -309,7 +384,7 @@ fn resolveSymbolsInObject(self: *Wasm, object_index: u16) !void {
}
// simply overwrite with the new symbol
- log.debug("Overwriting symbol '{s}'", .{symbol.name});
+ log.debug("Overwriting symbol '{s}'", .{sym_name});
log.debug(" old definition in '{s}'", .{existing_file_path});
log.debug(" new definition in '{s}'", .{object.name});
try self.discarded.putNoClobber(self.base.allocator, maybe_existing.value_ptr.*, location);
@@ -328,12 +403,7 @@ pub fn deinit(self: *Wasm) void {
var decl_it = self.decls.keyIterator();
while (decl_it.next()) |decl_ptr| {
- const decl = decl_ptr.*;
- const atom: *Atom = &decl.link.wasm;
- for (atom.locals.items) |local| {
- gpa.free(mem.sliceTo(self.symbols.items[local.sym_index].name, 0));
- }
- decl.link.wasm.deinit(gpa);
+ decl_ptr.*.link.wasm.deinit(gpa);
}
for (self.func_types.items) |*func_type| {
@@ -374,6 +444,8 @@ pub fn deinit(self: *Wasm) void {
self.function_table.deinit(gpa);
self.tables.deinit(gpa);
self.exports.deinit(gpa);
+
+ self.string_table.deinit(gpa);
}
pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
@@ -498,7 +570,10 @@ fn finishUpdateDecl(self: *Wasm, decl: *Module.Decl, code: []const u8) !void {
atom.size = @intCast(u32, code.len);
atom.alignment = decl.ty.abiAlignment(self.base.options.target);
const symbol = &self.symbols.items[atom.sym_index];
- symbol.name = decl.name;
+
+ const full_name = try decl.getFullyQualifiedName(self.base.allocator);
+ defer self.base.allocator.free(full_name);
+ symbol.name = try self.string_table.put(self.base.allocator, full_name);
try atom.code.appendSlice(self.base.allocator, code);
}
@@ -511,8 +586,9 @@ pub fn lowerUnnamedConst(self: *Wasm, decl: *Module.Decl, tv: TypedValue) !u32 {
// Create and initialize a new local symbol and atom
const local_index = decl.link.wasm.locals.items.len;
const name = try std.fmt.allocPrintZ(self.base.allocator, "__unnamed_{s}_{d}", .{ decl.name, local_index });
+ defer self.base.allocator.free(name);
var symbol: Symbol = .{
- .name = name,
+ .name = try self.string_table.put(self.base.allocator, name),
.flags = 0,
.tag = .data,
.index = undefined,
@@ -615,7 +691,7 @@ pub fn deleteExport(self: *Wasm, exp: Export) void {
const sym_index = exp.sym_index orelse return;
const loc: SymbolLoc = .{ .file = null, .index = sym_index };
const symbol = loc.getSymbol(self);
- const symbol_name = mem.sliceTo(symbol.name, 0);
+ const symbol_name = self.string_table.get(symbol.name);
log.debug("Deleting export for decl '{s}'", .{symbol_name});
if (self.export_names.fetchRemove(loc)) |kv| {
assert(self.globals.remove(kv.value));
@@ -656,7 +732,7 @@ pub fn updateDeclExports(
// are strong symbols, we have a linker error.
// In the other case we replace one with the other.
if (!exp_is_weak and !existing_sym.isWeak()) {
- try module.failed_exports.putNoClobber(module.gpa, exp, try Module.ErrorMsg.create(
+ try module.failed_exports.put(module.gpa, exp, try Module.ErrorMsg.create(
module.gpa,
decl.srcLoc(),
\\LinkError: symbol '{s}' defined multiple times
@@ -665,6 +741,7 @@ pub fn updateDeclExports(
,
.{ exp.options.name, self.name, self.name },
));
+ continue;
} else if (exp_is_weak) {
continue; // to-be-exported symbol is weak, so we keep the existing symbol
} else {
@@ -697,7 +774,7 @@ pub fn updateDeclExports(
},
}
// Ensure the symbol will be exported using the given name
- if (!mem.eql(u8, exp.options.name, mem.sliceTo(exp.exported_decl.name, 0))) {
+ if (!mem.eql(u8, exp.options.name, sym_loc.getName(self))) {
try self.export_names.put(self.base.allocator, sym_loc, exp.options.name);
}
@@ -725,7 +802,6 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
for (atom.locals.items) |local_atom| {
const local_symbol = &self.symbols.items[local_atom.sym_index];
local_symbol.tag = .dead; // also for any local symbol
- self.base.allocator.free(mem.sliceTo(local_symbol.name, 0));
self.symbols_free_list.append(self.base.allocator, local_atom.sym_index) catch {};
assert(self.resolved_symbols.swapRemove(local_atom.symbolLoc()));
}
@@ -755,14 +831,15 @@ fn mapFunctionTable(self: *Wasm) void {
}
fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
+ // For the import name itself, we use the decl's name, rather than the fully qualified name
+ const decl_name = mem.sliceTo(decl.name, 0);
const symbol_index = decl.link.wasm.sym_index;
const symbol: *Symbol = &self.symbols.items[symbol_index];
- symbol.name = decl.name;
symbol.setUndefined(true);
symbol.setGlobal(true);
try self.globals.putNoClobber(
self.base.allocator,
- mem.sliceTo(symbol.name, 0),
+ decl_name,
.{ .file = null, .index = symbol_index },
);
try self.resolved_symbols.put(self.base.allocator, .{ .file = null, .index = symbol_index }, {});
@@ -776,7 +853,7 @@ fn addOrUpdateImport(self: *Wasm, decl: *Module.Decl) !void {
if (!gop.found_existing) {
gop.value_ptr.* = .{
.module_name = module_name,
- .name = mem.sliceTo(symbol.name, 0),
+ .name = decl_name,
.kind = .{ .function = decl.fn_link.wasm.type_index },
};
}
@@ -815,7 +892,7 @@ fn parseAtom(self: *Wasm, atom: *Atom, kind: Kind) !void {
// TODO: Add mutables global decls to .bss section instead
const segment_name = try std.mem.concat(self.base.allocator, u8, &.{
".rodata.",
- std.mem.span(symbol.name),
+ self.string_table.get(symbol.name),
});
errdefer self.base.allocator.free(segment_name);
const segment_info: types.Segment = .{
@@ -886,7 +963,7 @@ fn allocateAtoms(self: *Wasm) !void {
atom.offset = offset;
const symbol_loc = atom.symbolLoc();
log.debug("Atom '{s}' allocated from 0x{x:0>8} to 0x{x:0>8} size={d}", .{
- symbol_loc.getSymbol(self).name,
+ symbol_loc.getName(self),
offset,
offset + atom.size,
atom.size,
@@ -906,7 +983,7 @@ fn setupImports(self: *Wasm) !void {
// remove an import if it was resolved
if (self.imports.remove(discarded.*)) {
log.debug("Removed symbol '{s}' as an import", .{
- discarded.getSymbol(self).name,
+ discarded.getName(self),
});
}
}
@@ -923,7 +1000,7 @@ fn setupImports(self: *Wasm) !void {
continue;
}
- log.debug("Symbol '{s}' will be imported from the host", .{symbol.name});
+ log.debug("Symbol '{s}' will be imported from the host", .{symbol_loc.getName(self)});
const import = self.objects.items[symbol_loc.file.?].findImport(symbol.tag.externalType(), symbol.index);
// TODO: De-duplicate imports
try self.imports.putNoClobber(self.base.allocator, symbol_loc, import);
@@ -1036,12 +1113,12 @@ fn mergeTypes(self: *Wasm) !void {
}
if (symbol.isUndefined()) {
- log.debug("Adding type from extern function '{s}'", .{symbol.name});
+ log.debug("Adding type from extern function '{s}'", .{sym_loc.getName(self)});
const import: *wasm.Import = self.imports.getPtr(sym_loc).?;
const original_type = object.func_types[import.kind.function];
import.kind.function = try self.putOrGetFuncType(original_type);
} else {
- log.debug("Adding type from function '{s}'", .{symbol.name});
+ log.debug("Adding type from function '{s}'", .{sym_loc.getName(self)});
const func = &self.functions.items[symbol.index - self.imported_functions_count];
func.type_index = try self.putOrGetFuncType(object.func_types[func.type_index]);
}
@@ -1057,13 +1134,14 @@ fn setupExports(self: *Wasm) !void {
const symbol = sym_loc.getSymbol(self);
if (!symbol.isExported()) continue;
- const export_name = if (self.export_names.get(sym_loc)) |name| name else mem.sliceTo(symbol.name, 0);
+ const sym_name = sym_loc.getName(self);
+ const export_name = if (self.export_names.get(sym_loc)) |name| name else sym_name;
const exp: wasm.Export = .{
.name = export_name,
.kind = symbol.tag.externalType(),
.index = symbol.index,
};
- log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ symbol.name, exp.name, exp.index });
+ log.debug("Exporting symbol '{s}' as '{s}' at index: ({d})", .{ sym_name, exp.name, exp.index });
try self.exports.append(self.base.allocator, exp);
}
@@ -1670,8 +1748,8 @@ fn emitNameSection(self: *Wasm, file: fs.File, arena: Allocator) !void {
for (self.resolved_symbols.keys()) |sym_loc| {
const symbol = sym_loc.getSymbol(self).*;
switch (symbol.tag) {
- .function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = mem.sliceTo(symbol.name, 0) }),
- .global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = mem.sliceTo(symbol.name, 0) }),
+ .function => funcs.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
+ .global => globals.appendAssumeCapacity(.{ .index = symbol.index, .name = sym_loc.getName(self) }),
else => {},
}
}
@@ -2275,11 +2353,11 @@ fn emitSymbolTable(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *
try leb.writeULEB128(writer, @enumToInt(symbol.tag));
try leb.writeULEB128(writer, symbol.flags);
+ const sym_name = if (self.export_names.get(sym_loc)) |exp_name| exp_name else sym_loc.getName(self);
switch (symbol.tag) {
.data => {
- const name = mem.sliceTo(symbol.name, 0);
- try leb.writeULEB128(writer, @intCast(u32, name.len));
- try writer.writeAll(name);
+ try leb.writeULEB128(writer, @intCast(u32, sym_name.len));
+ try writer.writeAll(sym_name);
if (symbol.isDefined()) {
try leb.writeULEB128(writer, symbol.index);
@@ -2294,9 +2372,8 @@ fn emitSymbolTable(self: *Wasm, file: fs.File, arena: Allocator, symbol_table: *
else => {
try leb.writeULEB128(writer, symbol.index);
if (symbol.isDefined()) {
- const name = mem.sliceTo(symbol.name, 0);
- try leb.writeULEB128(writer, @intCast(u32, name.len));
- try writer.writeAll(name);
+ try leb.writeULEB128(writer, @intCast(u32, sym_name.len));
+ try writer.writeAll(sym_name);
}
},
}