Commit 8118336585

Jakub Konka <kubkon@jakubkonka.com>
2021-01-17 17:20:55
macho: refactor undef symbol handling
Now, we don't erroneously write to the string table on every write of global and undef symbols.
1 parent 3562edf
Changed files (3)
src/link/MachO/imports.zig
@@ -7,12 +7,8 @@ const assert = std.debug.assert;
 const Allocator = mem.Allocator;
 
 pub const ExternSymbol = struct {
-    /// Symbol name.
-    /// We own the memory, therefore we'll need to free it by calling `deinit`.
-    /// In self-hosted, we don't expect it to be null ever.
-    /// However, this is for backwards compatibility with LLD when
-    /// we'll be patching things up post mortem.
-    name: ?[]u8 = null,
+    /// MachO symbol table entry.
+    inner: macho.nlist_64,
 
     /// Id of the dynamic library where the specified entries can be found.
     /// Id of 0 means self.
@@ -26,22 +22,16 @@ pub const ExternSymbol = struct {
 
     /// Offset relative to the start address of the `segment`.
     offset: u32 = 0,
-
-    pub fn deinit(self: *ExternSymbol, allocator: *Allocator) void {
-        if (self.name) |name| {
-            allocator.free(name);
-        }
-    }
 };
 
-pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 {
+pub fn rebaseInfoSize(symbols: anytype) !u64 {
     var stream = std.io.countingWriter(std.io.null_writer);
     var writer = stream.writer();
     var size: u64 = 0;
 
-    for (symbols) |symbol| {
+    for (symbols) |entry| {
         size += 2;
-        try leb.writeILEB128(writer, symbol.offset);
+        try leb.writeILEB128(writer, entry.value.offset);
         size += 1;
     }
 
@@ -49,8 +39,9 @@ pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 {
     return size;
 }
 
-pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
-    for (symbols) |symbol| {
+pub fn writeRebaseInfo(symbols: anytype, writer: anytype) !void {
+    for (symbols) |entry| {
+        const symbol = entry.value;
         try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER));
         try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
         try leb.writeILEB128(writer, symbol.offset);
@@ -59,23 +50,23 @@ pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
     try writer.writeByte(macho.REBASE_OPCODE_DONE);
 }
 
-pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 {
+pub fn bindInfoSize(symbols: anytype) !u64 {
     var stream = std.io.countingWriter(std.io.null_writer);
     var writer = stream.writer();
     var size: u64 = 0;
 
-    for (symbols) |symbol| {
+    for (symbols) |entry| {
+        const symbol = entry.value;
+
         size += 1;
         if (symbol.dylib_ordinal > 15) {
             try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
         }
         size += 1;
 
-        if (symbol.name) |name| {
-            size += 1;
-            size += name.len;
-            size += 1;
-        }
+        size += 1;
+        size += entry.key.len;
+        size += 1;
 
         size += 1;
         try leb.writeILEB128(writer, symbol.offset);
@@ -86,8 +77,10 @@ pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 {
     return size;
 }
 
-pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
-    for (symbols) |symbol| {
+pub fn writeBindInfo(symbols: anytype, writer: anytype) !void {
+    for (symbols) |entry| {
+        const symbol = entry.value;
+
         if (symbol.dylib_ordinal > 15) {
             try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
             try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
@@ -98,11 +91,9 @@ pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
         }
         try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER));
 
-        if (symbol.name) |name| {
-            try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-            try writer.writeAll(name);
-            try writer.writeByte(0);
-        }
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(entry.key);
+        try writer.writeByte(0);
 
         try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
         try leb.writeILEB128(writer, symbol.offset);
@@ -111,23 +102,24 @@ pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
     }
 }
 
-pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 {
+pub fn lazyBindInfoSize(symbols: anytype) !u64 {
     var stream = std.io.countingWriter(std.io.null_writer);
     var writer = stream.writer();
     var size: u64 = 0;
 
-    for (symbols) |symbol| {
+    for (symbols) |entry| {
+        const symbol = entry.value;
         size += 1;
         try leb.writeILEB128(writer, symbol.offset);
         size += 1;
         if (symbol.dylib_ordinal > 15) {
             try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
         }
-        if (symbol.name) |name| {
-            size += 1;
-            size += name.len;
-            size += 1;
-        }
+
+        size += 1;
+        size += entry.key.len;
+        size += 1;
+
         size += 2;
     }
 
@@ -135,8 +127,9 @@ pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 {
     return size;
 }
 
-pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void {
-    for (symbols) |symbol| {
+pub fn writeLazyBindInfo(symbols: anytype, writer: anytype) !void {
+    for (symbols) |entry| {
+        const symbol = entry.value;
         try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
         try leb.writeILEB128(writer, symbol.offset);
 
@@ -149,11 +142,9 @@ pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void
             try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
         }
 
-        if (symbol.name) |name| {
-            try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
-            try writer.writeAll(name);
-            try writer.writeByte(0);
-        }
+        try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+        try writer.writeAll(entry.key);
+        try writer.writeByte(0);
 
         try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
         try writer.writeByte(macho.BIND_OPCODE_DONE);
src/link/MachO.zig
@@ -1011,11 +1011,11 @@ pub fn deinit(self: *MachO) void {
         ds.deinit(self.base.allocator);
     }
     for (self.extern_lazy_symbols.items()) |*entry| {
-        entry.value.deinit(self.base.allocator);
+        self.base.allocator.free(entry.key);
     }
     self.extern_lazy_symbols.deinit(self.base.allocator);
     for (self.extern_nonlazy_symbols.items()) |*entry| {
-        entry.value.deinit(self.base.allocator);
+        self.base.allocator.free(entry.key);
     }
     self.extern_nonlazy_symbols.deinit(self.base.allocator);
     self.pie_fixups.deinit(self.base.allocator);
@@ -2042,9 +2042,16 @@ pub fn populateMissingMetadata(self: *MachO) !void {
     }
     if (!self.extern_nonlazy_symbols.contains("dyld_stub_binder")) {
         const index = @intCast(u32, self.extern_nonlazy_symbols.items().len);
-        const name = try std.fmt.allocPrint(self.base.allocator, "dyld_stub_binder", .{});
+        const name = try self.base.allocator.dupe(u8, "dyld_stub_binder");
+        const offset = try self.makeString("dyld_stub_binder");
         try self.extern_nonlazy_symbols.putNoClobber(self.base.allocator, name, .{
-            .name = name,
+            .inner = .{
+                .n_strx = offset,
+                .n_type = std.macho.N_UNDF | std.macho.N_EXT,
+                .n_sect = 0,
+                .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
+                .n_value = 0,
+            },
             .dylib_ordinal = 1, // TODO this is currently hardcoded.
             .segment = self.data_const_segment_cmd_index.?,
             .offset = index * @sizeOf(u64),
@@ -2222,15 +2229,15 @@ pub fn makeStaticString(comptime bytes: []const u8) [16]u8 {
     return buf;
 }
 
-pub fn makeString(self: *MachO, bytes: []const u8) !u32 {
+fn makeString(self: *MachO, bytes: []const u8) !u32 {
     try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1);
-    const result = @intCast(u32, self.string_table.items.len);
+    const offset = @intCast(u32, self.string_table.items.len);
     self.string_table.appendSliceAssumeCapacity(bytes);
     self.string_table.appendAssumeCapacity(0);
     self.string_table_dirty = true;
     if (self.d_sym) |*ds|
         ds.string_table_dirty = true;
-    return result;
+    return offset;
 }
 
 fn getString(self: *MachO, str_off: u32) []const u8 {
@@ -2246,6 +2253,23 @@ fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 {
     return self.makeString(new_name);
 }
 
+pub fn addExternSymbol(self: *MachO, name: []const u8) !u32 {
+    const index = @intCast(u32, self.extern_lazy_symbols.items().len);
+    const offset = try self.makeString(name);
+    const sym_name = try self.base.allocator.dupe(u8, name);
+    try self.extern_lazy_symbols.putNoClobber(self.base.allocator, sym_name, .{
+        .inner = .{
+            .n_strx = offset,
+            .n_type = macho.N_UNDF | macho.N_EXT,
+            .n_sect = 0,
+            .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER,
+            .n_value = 0,
+        },
+        .dylib_ordinal = 1, // TODO this is now hardcoded, since we only support libSystem.
+    });
+    return index;
+}
+
 const NextSegmentAddressAndOffset = struct {
     address: u64,
     offset: u64,
@@ -2585,24 +2609,10 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void {
     defer undefs.deinit();
     try undefs.ensureCapacity(nundefs);
     for (self.extern_lazy_symbols.items()) |entry| {
-        const name = try self.makeString(entry.key);
-        undefs.appendAssumeCapacity(.{
-            .n_strx = name,
-            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
-            .n_sect = 0,
-            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
-            .n_value = 0,
-        });
+        undefs.appendAssumeCapacity(entry.value.inner);
     }
     for (self.extern_nonlazy_symbols.items()) |entry| {
-        const name = try self.makeString(entry.key);
-        undefs.appendAssumeCapacity(.{
-            .n_strx = name,
-            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
-            .n_sect = 0,
-            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
-            .n_value = 0,
-        });
+        undefs.appendAssumeCapacity(entry.value.inner);
     }
 
     const locals_off = symtab.symoff;
@@ -2781,19 +2791,12 @@ fn writeRebaseInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len);
-    defer self.base.allocator.free(symbols);
-
-    for (self.extern_lazy_symbols.items()) |*entry, i| {
-        symbols[i] = &entry.value;
-    }
-
-    const size = try rebaseInfoSize(symbols);
+    const size = try rebaseInfoSize(self.extern_lazy_symbols.items());
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeRebaseInfo(symbols, stream.writer());
+    try writeRebaseInfo(self.extern_lazy_symbols.items(), stream.writer());
 
     const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
@@ -2820,19 +2823,12 @@ fn writeBindingInfoTable(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_nonlazy_symbols.items().len);
-    defer self.base.allocator.free(symbols);
-
-    for (self.extern_nonlazy_symbols.items()) |*entry, i| {
-        symbols[i] = &entry.value;
-    }
-
-    const size = try bindInfoSize(symbols);
+    const size = try bindInfoSize(self.extern_nonlazy_symbols.items());
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeBindInfo(symbols, stream.writer());
+    try writeBindInfo(self.extern_nonlazy_symbols.items(), stream.writer());
 
     const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
@@ -2856,19 +2852,12 @@ fn writeBindingInfoTable(self: *MachO) !void {
 fn writeLazyBindingInfoTable(self: *MachO) !void {
     if (!self.lazy_binding_info_dirty) return;
 
-    var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len);
-    defer self.base.allocator.free(symbols);
-
-    for (self.extern_lazy_symbols.items()) |*entry, i| {
-        symbols[i] = &entry.value;
-    }
-
-    const size = try lazyBindInfoSize(symbols);
+    const size = try lazyBindInfoSize(self.extern_lazy_symbols.items());
     var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size));
     defer self.base.allocator.free(buffer);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try writeLazyBindInfo(symbols, stream.writer());
+    try writeLazyBindInfo(self.extern_lazy_symbols.items(), stream.writer());
 
     const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
     const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
src/codegen.zig
@@ -1920,21 +1920,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         }
                     } else if (func_value.castTag(.extern_fn)) |func_payload| {
                         const decl = func_payload.data;
-                        // We don't free the decl_name immediately unless it already exists.
-                        // If it doesn't, it will get autofreed when we clean up the extern symbol table.
                         const decl_name = try std.fmt.allocPrint(self.bin_file.allocator, "_{s}", .{decl.name});
+                        defer self.bin_file.allocator.free(decl_name);
                         const already_defined = macho_file.extern_lazy_symbols.contains(decl_name);
-                        const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index| blk: {
-                            self.bin_file.allocator.free(decl_name);
-                            break :blk @intCast(u32, index);
-                        } else blk: {
-                            const index = @intCast(u32, macho_file.extern_lazy_symbols.items().len);
-                            try macho_file.extern_lazy_symbols.putNoClobber(self.bin_file.allocator, decl_name, .{
-                                .name = decl_name,
-                                .dylib_ordinal = 1, // TODO this is now hardcoded, since we only support libSystem.
-                            });
-                            break :blk index;
-                        };
+                        const symbol: u32 = if (macho_file.extern_lazy_symbols.getIndex(decl_name)) |index|
+                            @intCast(u32, index)
+                        else
+                            try macho_file.addExternSymbol(decl_name);
                         const start = self.code.items.len;
                         const len: usize = blk: {
                             switch (arch) {