Commit 0135b46659

Jakub Konka <kubkon@jakubkonka.com>
2021-07-14 22:13:21
zld: remove StringTable abstraction
1 parent f87424a
src/link/MachO/DebugSymbols.zig
@@ -814,7 +814,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
 
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const allocated_size = self.allocatedSizeLinkedit(symtab.stroff);
-    const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.size(), @alignOf(u64));
+    const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64));
 
     if (needed_size > allocated_size) {
         symtab.strsize = 0;
@@ -823,7 +823,7 @@ fn writeStringTable(self: *DebugSymbols) !void {
     symtab.strsize = @intCast(u32, needed_size);
     log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
 
-    try self.file.pwriteAll(self.base.strtab.asSlice(), symtab.stroff);
+    try self.file.pwriteAll(self.base.strtab.items, symtab.stroff);
     self.load_commands_dirty = true;
     self.strtab_dirty = false;
 }
src/link/MachO/StringTable.zig
@@ -1,64 +0,0 @@
-const StringTable = @This();
-
-const std = @import("std");
-const log = std.log.scoped(.strtab);
-const mem = std.mem;
-
-const Allocator = mem.Allocator;
-
-allocator: *Allocator,
-buffer: std.ArrayListUnmanaged(u8) = .{},
-cache: std.StringHashMapUnmanaged(u32) = .{},
-
-pub const Error = error{OutOfMemory};
-
-pub fn init(allocator: *Allocator) Error!StringTable {
-    var strtab = StringTable{
-        .allocator = allocator,
-    };
-    try strtab.buffer.append(allocator, 0);
-    return strtab;
-}
-
-pub fn deinit(self: *StringTable) void {
-    {
-        var it = self.cache.keyIterator();
-        while (it.next()) |key| {
-            self.allocator.free(key.*);
-        }
-    }
-    self.cache.deinit(self.allocator);
-    self.buffer.deinit(self.allocator);
-}
-
-pub fn getOrPut(self: *StringTable, string: []const u8) Error!u32 {
-    if (self.cache.get(string)) |off| {
-        log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
-        return off;
-    }
-
-    try self.buffer.ensureUnusedCapacity(self.allocator, string.len + 1);
-    const new_off = @intCast(u32, self.buffer.items.len);
-
-    log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off });
-
-    self.buffer.appendSliceAssumeCapacity(string);
-    self.buffer.appendAssumeCapacity(0);
-
-    try self.cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off);
-
-    return new_off;
-}
-
-pub fn get(self: StringTable, off: u32) ?[]const u8 {
-    if (off >= self.buffer.items.len) return null;
-    return mem.spanZ(@ptrCast([*:0]const u8, self.buffer.items.ptr + off));
-}
-
-pub fn asSlice(self: StringTable) []const u8 {
-    return self.buffer.items;
-}
-
-pub fn size(self: StringTable) u64 {
-    return self.buffer.items.len;
-}
src/link/MachO/Symbol.zig
@@ -9,7 +9,6 @@ const mem = std.mem;
 const Allocator = mem.Allocator;
 const Dylib = @import("Dylib.zig");
 const Object = @import("Object.zig");
-const StringTable = @import("StringTable.zig");
 const Zld = @import("Zld.zig");
 
 /// Symbol name. Owned slice.
@@ -226,8 +225,8 @@ pub fn needsTlvOffset(self: Symbol, zld: *Zld) bool {
     return sect_type == macho.S_THREAD_LOCAL_VARIABLES;
 }
 
-pub fn asNlist(symbol: *Symbol, zld: *Zld, strtab: *StringTable) !macho.nlist_64 {
-    const n_strx = try strtab.getOrPut(symbol.name);
+pub fn asNlist(symbol: *Symbol, zld: *Zld) !macho.nlist_64 {
+    const n_strx = try zld.makeString(symbol.name);
     const nlist = nlist: {
         switch (symbol.payload) {
             .regular => |regular| {
src/link/MachO/Zld.zig
@@ -18,7 +18,6 @@ const CodeSignature = @import("CodeSignature.zig");
 const Dylib = @import("Dylib.zig");
 const Object = @import("Object.zig");
 const Relocation = reloc.Relocation;
-const StringTable = @import("StringTable.zig");
 const Symbol = @import("Symbol.zig");
 const Trie = @import("Trie.zig");
 
@@ -26,7 +25,6 @@ usingnamespace @import("commands.zig");
 usingnamespace @import("bind.zig");
 
 allocator: *Allocator,
-strtab: StringTable,
 
 target: ?std.Target = null,
 page_size: ?u16 = null,
@@ -114,6 +112,9 @@ stub_helper_stubs_start_off: ?u64 = null,
 
 blocks: std.AutoHashMapUnmanaged(MatchingSection, *TextBlock) = .{},
 
+strtab: std.ArrayListUnmanaged(u8) = .{},
+strtab_cache: std.StringHashMapUnmanaged(u32) = .{},
+
 has_dices: bool = false,
 has_stabs: bool = false,
 
@@ -169,7 +170,7 @@ pub const TextBlock = struct {
                         .n_value = reg.address,
                     });
                     nlists.appendAssumeCapacity(.{
-                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_strx = try zld.makeString(sym.name),
                         .n_type = macho.N_FUN,
                         .n_sect = section_id,
                         .n_desc = 0,
@@ -192,7 +193,7 @@ pub const TextBlock = struct {
                 },
                 .global => {
                     try nlists.append(.{
-                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_strx = try zld.makeString(sym.name),
                         .n_type = macho.N_GSYM,
                         .n_sect = 0,
                         .n_desc = 0,
@@ -201,7 +202,7 @@ pub const TextBlock = struct {
                 },
                 .static => {
                     try nlists.append(.{
-                        .n_strx = try zld.strtab.getOrPut(sym.name),
+                        .n_strx = try zld.makeString(sym.name),
                         .n_type = macho.N_STSYM,
                         .n_sect = reg.sectionId(zld),
                         .n_desc = 0,
@@ -311,10 +312,7 @@ pub const TextBlock = struct {
 const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
 
 pub fn init(allocator: *Allocator) !Zld {
-    return Zld{
-        .allocator = allocator,
-        .strtab = try StringTable.init(allocator),
-    };
+    return Zld{ .allocator = allocator };
 }
 
 pub fn deinit(self: *Zld) void {
@@ -357,7 +355,15 @@ pub fn deinit(self: *Zld) void {
     self.locals.deinit(self.allocator);
 
     self.globals.deinit(self.allocator);
-    self.strtab.deinit();
+
+    {
+        var it = self.strtab_cache.keyIterator();
+        while (it.next()) |key| {
+            self.allocator.free(key.*);
+        }
+    }
+    self.strtab_cache.deinit(self.allocator);
+    self.strtab.deinit(self.allocator);
 
     // TODO dealloc all blocks
     self.blocks.deinit(self.allocator);
@@ -2572,7 +2578,7 @@ fn writeSymbolTable(self: *Zld) !void {
         if (symbol.isTemp()) continue; // TODO when merging codepaths, this should go into freelist
 
         const reg = symbol.payload.regular;
-        const nlist = try symbol.asNlist(self, &self.strtab);
+        const nlist = try symbol.asNlist(self);
 
         if (reg.linkage == .translation_unit) {
             try locals.append(nlist);
@@ -2588,21 +2594,21 @@ fn writeSymbolTable(self: *Zld) !void {
             // Open scope
             try locals.ensureUnusedCapacity(4);
             locals.appendAssumeCapacity(.{
-                .n_strx = try self.strtab.getOrPut(object.tu_comp_dir.?),
+                .n_strx = try self.makeString(object.tu_comp_dir.?),
                 .n_type = macho.N_SO,
                 .n_sect = 0,
                 .n_desc = 0,
                 .n_value = 0,
             });
             locals.appendAssumeCapacity(.{
-                .n_strx = try self.strtab.getOrPut(object.tu_name.?),
+                .n_strx = try self.makeString(object.tu_name.?),
                 .n_type = macho.N_SO,
                 .n_sect = 0,
                 .n_desc = 0,
                 .n_value = 0,
             });
             locals.appendAssumeCapacity(.{
-                .n_strx = try self.strtab.getOrPut(object.name.?),
+                .n_strx = try self.makeString(object.name.?),
                 .n_type = macho.N_OSO,
                 .n_sect = 0,
                 .n_desc = 1,
@@ -2642,7 +2648,7 @@ fn writeSymbolTable(self: *Zld) !void {
     defer undef_dir.deinit();
 
     for (self.imports.items) |sym| {
-        const nlist = try sym.asNlist(self, &self.strtab);
+        const nlist = try sym.asNlist(self);
         const id = @intCast(u32, undefs.items.len);
         try undefs.append(nlist);
         try undef_dir.putNoClobber(sym.name, id);
@@ -2737,14 +2743,14 @@ fn writeStringTable(self: *Zld) !void {
     const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     symtab.stroff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
-    symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64)));
+    symtab.strsize = @intCast(u32, mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64)));
     seg.inner.filesize += symtab.strsize;
 
     log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
 
-    try self.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff);
+    try self.file.?.pwriteAll(self.strtab.items, symtab.stroff);
 
-    if (symtab.strsize > self.strtab.size() and self.target.?.cpu.arch == .x86_64) {
+    if (symtab.strsize > self.strtab.items.len and self.target.?.cpu.arch == .x86_64) {
         // This is the last section, so we need to pad it out.
         try self.file.?.pwriteAll(&[_]u8{0}, seg.inner.fileoff + seg.inner.filesize - 1);
     }
@@ -2910,3 +2916,27 @@ fn writeHeader(self: *Zld) !void {
 
     try self.file.?.pwriteAll(mem.asBytes(&header), 0);
 }
+
+pub fn makeString(self: *Zld, string: []const u8) !u32 {
+    if (self.strtab_cache.get(string)) |off| {
+        log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
+        return off;
+    }
+
+    try self.strtab.ensureUnusedCapacity(self.allocator, string.len + 1);
+    const new_off = @intCast(u32, self.strtab.items.len);
+
+    log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off });
+
+    self.strtab.appendSliceAssumeCapacity(string);
+    self.strtab.appendAssumeCapacity(0);
+
+    try self.strtab_cache.putNoClobber(self.allocator, try self.allocator.dupe(u8, string), new_off);
+
+    return new_off;
+}
+
+pub fn getString(self: *Zld, off: u32) ?[]const u8 {
+    assert(off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off));
+}
src/link/MachO.zig
@@ -26,7 +26,6 @@ const target_util = @import("../target.zig");
 const DebugSymbols = @import("MachO/DebugSymbols.zig");
 const Trie = @import("MachO/Trie.zig");
 const CodeSignature = @import("MachO/CodeSignature.zig");
-const StringTable = @import("MachO/StringTable.zig");
 const Zld = @import("MachO/Zld.zig");
 
 usingnamespace @import("MachO/commands.zig");
@@ -117,7 +116,8 @@ offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 stub_helper_stubs_start_off: ?u64 = null,
 
-strtab: StringTable = undefined,
+strtab: std.ArrayListUnmanaged(u8) = .{},
+strtab_cache: std.StringHashMapUnmanaged(u32) = .{},
 
 /// Table of GOT entries.
 offset_table: std.ArrayListUnmanaged(GOTEntry) = .{},
@@ -418,7 +418,6 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*MachO {
             .file = null,
         },
         .page_size = if (options.target.cpu.arch == .aarch64) 0x4000 else 0x1000,
-        .strtab = try StringTable.init(gpa),
     };
 
     return self;
@@ -985,7 +984,14 @@ pub fn deinit(self: *MachO) void {
     self.text_block_free_list.deinit(self.base.allocator);
     self.offset_table.deinit(self.base.allocator);
     self.offset_table_free_list.deinit(self.base.allocator);
-    self.strtab.deinit();
+    {
+        var it = self.strtab_cache.keyIterator();
+        while (it.next()) |key| {
+            self.base.allocator.free(key.*);
+        }
+    }
+    self.strtab_cache.deinit(self.base.allocator);
+    self.strtab.deinit(self.base.allocator);
     self.globals.deinit(self.base.allocator);
     self.globals_free_list.deinit(self.base.allocator);
     self.locals.deinit(self.base.allocator);
@@ -1203,7 +1209,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         const new_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)});
         defer self.base.allocator.free(new_name);
 
-        symbol.n_strx = try self.strtab.getOrPut(new_name);
+        symbol.n_strx = try self.makeString(new_name);
         symbol.n_type = macho.N_SECT;
         symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1;
         symbol.n_desc = 0;
@@ -1215,7 +1221,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         const decl_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{mem.spanZ(decl.name)});
         defer self.base.allocator.free(decl_name);
 
-        const name_str_index = try self.strtab.getOrPut(decl_name);
+        const name_str_index = try self.makeString(decl_name);
         const addr = try self.allocateTextBlock(&decl.link.macho, code.len, required_alignment);
 
         log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, addr });
@@ -1405,14 +1411,14 @@ pub fn updateDeclExports(
         if (exp.link.macho.sym_index) |i| {
             const sym = &self.globals.items[i];
             sym.* = .{
-                .n_strx = try self.strtab.getOrPut(exp_name),
+                .n_strx = sym.n_strx,
                 .n_type = n_type,
                 .n_sect = @intCast(u8, self.text_section_index.?) + 1,
                 .n_desc = n_desc,
                 .n_value = decl_sym.n_value,
             };
         } else {
-            const name_str_index = try self.strtab.getOrPut(exp_name);
+            const name_str_index = try self.makeString(exp_name);
             const i = if (self.globals_free_list.popOrNull()) |i| i else blk: {
                 _ = self.globals.addOneAssumeCapacity();
                 self.export_info_dirty = true;
@@ -1788,7 +1794,8 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         symtab.symoff = @intCast(u32, symtab_off);
         symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint);
 
-        const strtab_size = self.strtab.size();
+        try self.strtab.append(self.base.allocator, 0);
+        const strtab_size = self.strtab.items.len;
         const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1, symtab_off);
         log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size });
         symtab.stroff = @intCast(u32, strtab_off);
@@ -1930,7 +1937,7 @@ pub fn populateMissingMetadata(self: *MachO) !void {
     if (!self.nonlazy_imports.contains("dyld_stub_binder")) {
         const index = @intCast(u32, self.nonlazy_imports.count());
         const name = try self.base.allocator.dupe(u8, "dyld_stub_binder");
-        const offset = try self.strtab.getOrPut("dyld_stub_binder");
+        const offset = try self.makeString("dyld_stub_binder");
         try self.nonlazy_imports.putNoClobber(self.base.allocator, name, .{
             .symbol = .{
                 .n_strx = offset,
@@ -2063,7 +2070,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
 
 pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
     const index = @intCast(u32, self.lazy_imports.count());
-    const offset = try self.strtab.getOrPut(name);
+    const offset = try self.makeString(name);
     const sym_name = try self.base.allocator.dupe(u8, name);
     const dylib_ordinal = 1; // TODO this is now hardcoded, since we only support libSystem.
     try self.lazy_imports.putNoClobber(self.base.allocator, sym_name, .{
@@ -2253,7 +2260,7 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void {
             },
         }
     };
-    const sym_name = self.strtab.get(sym.n_strx) orelse unreachable;
+    const sym_name = self.getString(sym.n_strx) orelse unreachable;
     log.debug("writing offset table entry [ 0x{x} => 0x{x} ({s}) ]", .{ off, sym.n_value, sym_name });
     try self.base.file.?.pwriteAll(mem.asBytes(&sym.n_value), off);
 }
@@ -2751,7 +2758,7 @@ fn writeExportTrie(self: *MachO) !void {
     const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     for (self.globals.items) |symbol| {
         // TODO figure out if we should put all global symbols into the export trie
-        const name = self.strtab.get(symbol.n_strx) orelse unreachable;
+        const name = self.getString(symbol.n_strx) orelse unreachable;
         assert(symbol.n_value >= text_segment.inner.vmaddr);
         try trie.put(.{
             .name = name,
@@ -3032,7 +3039,7 @@ fn writeStringTable(self: *MachO) !void {
 
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const allocated_size = self.allocatedSizeLinkedit(symtab.stroff);
-    const needed_size = mem.alignForwardGeneric(u64, self.strtab.size(), @alignOf(u64));
+    const needed_size = mem.alignForwardGeneric(u64, self.strtab.items.len, @alignOf(u64));
 
     if (needed_size > allocated_size or self.strtab_needs_relocation) {
         symtab.strsize = 0;
@@ -3042,7 +3049,7 @@ fn writeStringTable(self: *MachO) !void {
     symtab.strsize = @intCast(u32, needed_size);
     log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize });
 
-    try self.base.file.?.pwriteAll(self.strtab.asSlice(), symtab.stroff);
+    try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff);
     self.load_commands_dirty = true;
     self.strtab_dirty = false;
 }
@@ -3173,3 +3180,27 @@ pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) {
 fn hasTlvDescriptors(_: *MachO) bool {
     return false;
 }
+
+pub fn makeString(self: *MachO, string: []const u8) !u32 {
+    if (self.strtab_cache.get(string)) |off| {
+        log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off });
+        return off;
+    }
+
+    try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1);
+    const new_off = @intCast(u32, self.strtab.items.len);
+
+    log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off });
+
+    self.strtab.appendSliceAssumeCapacity(string);
+    self.strtab.appendAssumeCapacity(0);
+
+    try self.strtab_cache.putNoClobber(self.base.allocator, try self.base.allocator.dupe(u8, string), new_off);
+
+    return new_off;
+}
+
+pub fn getString(self: *MachO, off: u32) ?[]const u8 {
+    assert(off < self.strtab.items.len);
+    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + off));
+}
CMakeLists.txt
@@ -581,7 +581,6 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
-    "${CMAKE_SOURCE_DIR}/src/link/MachO/StringTable.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig"