Commit ae535111a4

Jakub Konka <kubkon@jakubkonka.com>
2020-12-13 21:26:57
macho: cleanup (lazy) binding info tables
1 parent 5e913c9
Changed files (3)
src/link/MachO/imports.zig
@@ -5,16 +5,22 @@ const mem = std.mem;
 
 const assert = std.debug.assert;
 const Allocator = mem.Allocator;
+const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
 
+/// Table of binding info entries used to tell the dyld which
+/// symbols to bind at loading time.
 pub const BindingInfoTable = struct {
+    /// Id of the dynamic library where the specified entries can be found.
     dylib_ordinal: i64 = 0,
+
+    /// Binding type; defaults to pointer type.
     binding_type: u8 = macho.BIND_TYPE_POINTER,
-    entries: std.ArrayListUnmanaged(Entry) = .{},
 
-    pub const Entry = struct {
-        /// Id of the symbol in the undef symbol table.
-        /// Can be null.
-        symbol: ?u16 = null,
+    symbols: std.ArrayListUnmanaged(Symbol) = .{},
+
+    pub const Symbol = struct {
+        /// Symbol name.
+        name: ?[]u8 = null,
 
         /// Id of the segment where to bind this symbol to.
         segment: u8,
@@ -24,14 +30,17 @@ pub const BindingInfoTable = struct {
     };
 
     pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void {
-        self.entries.deinit(allocator);
+        for (self.symbols.items) |*symbol| {
+            if (symbol.name) |name| {
+                allocator.free(name);
+            }
+        }
+        self.symbols.deinit(allocator);
     }
 
-    pub fn read(self: *BindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
-        var name = std.ArrayList(u8).init(allocator);
-        defer name.deinit();
-
-        var entry: Entry = .{
+    /// Parse the binding info table from byte stream.
+    pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void {
+        var symbol: Symbol = .{
             .segment = 0,
             .offset = 0,
         };
@@ -48,8 +57,8 @@ pub const BindingInfoTable = struct {
 
             switch (opcode) {
                 macho.BIND_OPCODE_DO_BIND => {
-                    try self.entries.append(allocator, entry);
-                    entry = .{
+                    try self.symbols.append(allocator, symbol);
+                    symbol = .{
                         .segment = 0,
                         .offset = 0,
                     };
@@ -59,17 +68,17 @@ pub const BindingInfoTable = struct {
                     break;
                 },
                 macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
-                    name.shrinkRetainingCapacity(0);
+                    var name = std.ArrayList(u8).init(allocator);
                     var next = try reader.readByte();
                     while (next != @as(u8, 0)) {
                         try name.append(next);
                         next = try reader.readByte();
                     }
-                    entry.symbol = symbols_by_name.get(name.items[0..]);
+                    symbol.name = name.toOwnedSlice();
                 },
                 macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
-                    entry.segment = imm;
-                    entry.offset = try leb.readILEB128(i64, reader);
+                    symbol.segment = imm;
+                    symbol.offset = try leb.readILEB128(i64, reader);
                 },
                 macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
                     assert(!dylib_ordinal_set);
@@ -90,15 +99,69 @@ pub const BindingInfoTable = struct {
         assert(done);
     }
 
-    pub fn write(self: BindingInfoTable, writer: anytype) !void {}
+    /// Write the binding info table to byte stream.
+    pub fn write(self: BindingInfoTable, writer: anytype) !void {
+        if (self.dylib_ordinal > 15) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+            try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal));
+        } else if (self.dylib_ordinal > 0) {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
+        } else {
+            try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
+        }
+        try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type));
+
+        for (self.symbols.items) |symbol| {
+            if (symbol.name) |name| {
+                try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+                try writer.writeAll(name);
+                try writer.writeByte(0);
+            }
+
+            try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
+            try leb.writeILEB128(writer, symbol.offset);
+
+            try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+        }
+
+        try writer.writeByte(macho.BIND_OPCODE_DONE);
+    }
+
+    /// Calculate size in bytes of this binding info table.
+    pub fn calcSize(self: *BindingInfoTable) usize {
+        var size: usize = 1;
+        if (self.dylib_ordinal > 15) {
+            size += sizeLEB128(self.dylib_ordinal);
+        }
+
+        size += 1;
+
+        for (self.symbols.items) |symbol| {
+            if (symbol.name) |name| {
+                size += 1;
+                size += name.len;
+                size += 1;
+            }
+
+            size += 1;
+            size += sizeLEB128(symbol.offset);
+
+            size += 1;
+        }
+
+        size += 1;
+        return size;
+    }
 };
 
+/// Table of lazy binding info entries used to tell the dyld which
+/// symbols to lazily bind at first load of a dylib.
 pub const LazyBindingInfoTable = struct {
-    entries: std.ArrayListUnmanaged(Entry) = .{},
+    symbols: std.ArrayListUnmanaged(Symbol) = .{},
 
-    pub const Entry = struct {
-        /// Id of the symbol in the undef symbol table.
-        symbol: u16,
+    pub const Symbol = struct {
+        /// Symbol name.
+        name: ?[]u8 = null,
 
         /// Offset of this symbol wrt to the segment id encoded in `segment`.
         offset: i64,
@@ -113,15 +176,17 @@ pub const LazyBindingInfoTable = struct {
     };
 
     pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void {
-        self.entries.deinit(allocator);
+        for (self.symbols.items) |*symbol| {
+            if (symbol.name) |name| {
+                allocator.free(name);
+            }
+        }
+        self.symbols.deinit(allocator);
     }
 
-    pub fn read(self: *LazyBindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
-        var name = std.ArrayList(u8).init(allocator);
-        defer name.deinit();
-
-        var entry: Entry = .{
-            .symbol = 0,
+    /// Parse the binding info table from byte stream.
+    pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void {
+        var symbol: Symbol = .{
             .offset = 0,
             .segment = 0,
             .dylib_ordinal = 0,
@@ -138,35 +203,34 @@ pub const LazyBindingInfoTable = struct {
 
             switch (opcode) {
                 macho.BIND_OPCODE_DO_BIND => {
-                    try self.entries.append(allocator, entry);
+                    try self.symbols.append(allocator, symbol);
                 },
                 macho.BIND_OPCODE_DONE => {
                     done = true;
-                    entry = .{
-                        .symbol = 0,
+                    symbol = .{
                         .offset = 0,
                         .segment = 0,
                         .dylib_ordinal = 0,
                     };
                 },
                 macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
-                    name.shrinkRetainingCapacity(0);
+                    var name = std.ArrayList(u8).init(allocator);
                     var next = try reader.readByte();
                     while (next != @as(u8, 0)) {
                         try name.append(next);
                         next = try reader.readByte();
                     }
-                    entry.symbol = symbols_by_name.get(name.items[0..]) orelse unreachable;
+                    symbol.name = name.toOwnedSlice();
                 },
                 macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
-                    entry.segment = imm;
-                    entry.offset = try leb.readILEB128(i64, reader);
+                    symbol.segment = imm;
+                    symbol.offset = try leb.readILEB128(i64, reader);
                 },
                 macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
-                    entry.dylib_ordinal = imm;
+                    symbol.dylib_ordinal = imm;
                 },
                 macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
-                    entry.dylib_ordinal = try leb.readILEB128(i64, reader);
+                    symbol.dylib_ordinal = try leb.readILEB128(i64, reader);
                 },
                 else => {
                     std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode});
@@ -176,5 +240,51 @@ pub const LazyBindingInfoTable = struct {
         assert(done);
     }
 
-    pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {}
+    /// Write the binding info table to byte stream.
+    pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {
+        for (self.symbols.items) |symbol| {
+            try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
+            try leb.writeILEB128(writer, symbol.offset);
+
+            if (symbol.dylib_ordinal > 15) {
+                try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+                try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
+            } else if (symbol.dylib_ordinal > 0) {
+                try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
+            } else {
+                try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
+            }
+
+            if (symbol.name) |name| {
+                try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+                try writer.writeAll(name);
+                try writer.writeByte(0);
+            }
+
+            try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+            try writer.writeByte(macho.BIND_OPCODE_DONE);
+        }
+    }
+
+    /// Calculate size in bytes of this binding info table.
+    pub fn calcSize(self: *LazyBindingInfoTable) usize {
+        var size: usize = 0;
+
+        for (self.symbols.items) |symbol| {
+            size += 1;
+            size += sizeLEB128(symbol.offset);
+            size += 1;
+            if (symbol.dylib_ordinal > 15) {
+                size += sizeLEB128(symbol.dylib_ordinal);
+            }
+            if (symbol.name) |name| {
+                size += 1;
+                size += name.len;
+                size += 1;
+            }
+            size += 2;
+        }
+
+        return size;
+    }
 };
src/link/MachO/Trie.zig
@@ -38,6 +38,7 @@ const macho = std.macho;
 const testing = std.testing;
 const assert = std.debug.assert;
 const Allocator = mem.Allocator;
+const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
 
 pub const Node = struct {
     base: *Trie,
@@ -244,9 +245,9 @@ pub const Node = struct {
     fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
         var node_size: usize = 0;
         if (self.terminal_info) |info| {
-            node_size += sizeULEB128Mem(info.export_flags);
-            node_size += sizeULEB128Mem(info.vmaddr_offset);
-            node_size += sizeULEB128Mem(node_size);
+            node_size += sizeLEB128(info.export_flags);
+            node_size += sizeLEB128(info.vmaddr_offset);
+            node_size += sizeLEB128(node_size);
         } else {
             node_size += 1; // 0x0 for non-terminal nodes
         }
@@ -254,7 +255,7 @@ pub const Node = struct {
 
         for (self.edges.items) |edge| {
             const next_node_offset = edge.to.trie_offset orelse 0;
-            node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset);
+            node_size += edge.label.len + 1 + sizeLEB128(next_node_offset);
         }
 
         const trie_offset = self.trie_offset orelse 0;
@@ -264,18 +265,6 @@ pub const Node = struct {
 
         return .{ .node_size = node_size, .updated = updated };
     }
-
-    /// Calculates number of bytes in ULEB128 encoding of value.
-    fn sizeULEB128Mem(value: u64) usize {
-        var res: usize = 0;
-        var v = value;
-        while (true) {
-            v = v >> 7;
-            res += 1;
-            if (v == 0) break;
-        }
-        return res;
-    }
 };
 
 /// The root node of the trie.
@@ -380,9 +369,7 @@ pub fn read(self: *Trie, reader: anytype) ReadError!usize {
 }
 
 /// Write the trie to a byte stream.
-/// Caller owns the memory and needs to free it.
-/// Panics if the trie was not finalized using `finalize`
-/// before calling this method.
+/// Panics if the trie was not finalized using `finalize` before calling this method.
 pub fn write(self: Trie, writer: anytype) !usize {
     assert(!self.trie_dirty);
     var counting_writer = std.io.countingWriter(writer);
src/link/MachO.zig
@@ -810,47 +810,50 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
                 if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) {
                     std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{});
                     std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{});
-                    return error.NoSymbolTable;
+                    return error.NoSymbolTableFound;
                 }
 
                 // Parse symbol and string tables.
                 try self.parseSymbolTable();
                 try self.parseStringTable();
 
-                std.debug.print("Undef symbols\n", .{});
-                for (self.undef_symbols.items) |sym| {
-                    const name = self.string_table.items[sym.n_strx..];
-                    const len = blk: {
-                        var end: usize = 0;
-                        while (true) {
-                            if (name[end] == @as(u8, 0)) break;
-                            end += 1;
-                        }
-                        break :blk end;
-                    };
-                    std.debug.print("name={},sym={}\n", .{ name[0..len], sym });
+                // Parse dyld info
+                try self.parseBindingInfoTable();
+                try self.parseLazyBindingInfoTable();
+
+                // Update the dylib ordinals.
+                self.binding_info_table.dylib_ordinal = next_ordinal;
+                for (self.lazy_binding_info_table.symbols.items) |*symbol| {
+                    symbol.dylib_ordinal = next_ordinal;
                 }
 
-                // Parse dyld info
-                var symbols_by_name = std.StringHashMap(u16).init(self.base.allocator);
-                defer symbols_by_name.deinit();
-                try symbols_by_name.ensureCapacity(@intCast(u32, self.undef_symbols.items.len));
-
-                for (self.undef_symbols.items) |sym, i| {
-                    const name = self.string_table.items[sym.n_strx..];
-                    const len = blk: {
-                        var end: usize = 0;
-                        while (true) {
-                            if (name[end] == @as(u8, 0)) break;
-                            end += 1;
-                        }
-                        break :blk end;
-                    };
-                    symbols_by_name.putAssumeCapacityNoClobber(name[0..len], @intCast(u16, i));
+                // Write update dyld info
+                const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+                {
+                    const size = self.binding_info_table.calcSize();
+                    assert(dyld_info.bind_size == size);
+
+                    var buffer = try self.base.allocator.alloc(u8, size);
+                    defer self.base.allocator.free(buffer);
+
+                    var stream = std.io.fixedBufferStream(buffer);
+                    try self.binding_info_table.write(stream.writer());
+
+                    try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
+                }
+                {
+                    const size = self.lazy_binding_info_table.calcSize();
+                    assert(dyld_info.lazy_bind_size == size);
+
+                    var buffer = try self.base.allocator.alloc(u8, size);
+                    defer self.base.allocator.free(buffer);
+
+                    var stream = std.io.fixedBufferStream(buffer);
+                    try self.lazy_binding_info_table.write(stream.writer());
+
+                    try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
                 }
 
-                try self.parseBindingInfoTable(symbols_by_name);
-                try self.parseLazyBindingInfoTable(symbols_by_name);
                 // Write updated load commands and the header
                 try self.writeLoadCommands();
                 try self.writeHeader();
@@ -1952,6 +1955,68 @@ fn writeExportTrie(self: *MachO) !void {
     self.cmd_table_dirty = true;
 }
 
+fn writeBindingInfoTable(self: *MachO) !void {
+    const size = self.binding_info_table.calcSize();
+    var buffer = try self.base.allocator.alloc(u8, size);
+    defer self.base.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try self.binding_info_table.write(stream.writer());
+
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
+    dyld_info.bind_off = self.linkedit_segment_next_offset.?;
+    dyld_info.bind_size = bind_size;
+
+    log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size });
+
+    if (bind_size > buffer.len) {
+        // Pad out to align(8).
+        try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size);
+    }
+    try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
+
+    self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size;
+    // Advance size of __LINKEDIT segment
+    const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    linkedit.inner.filesize += dyld_info.bind_size;
+    if (linkedit.inner.vmsize < linkedit.inner.filesize) {
+        linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
+    }
+    self.cmd_table_dirty = true;
+}
+
+fn writeLazyBindingInfoTable(self: *MachO) !void {
+    const size = self.lazy_binding_info_table.calcSize();
+    var buffer = try self.base.allocator.alloc(u8, size);
+    defer self.base.allocator.free(buffer);
+
+    var stream = std.io.fixedBufferStream(buffer);
+    try self.lazy_binding_info_table.write(stream.writer());
+
+    const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+    const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
+    dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?;
+    dyld_info.lazy_bind_size = bind_size;
+
+    log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size });
+
+    if (bind_size > buffer.len) {
+        // Pad out to align(8).
+        try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size);
+    }
+    try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
+
+    self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size;
+    // Advance size of __LINKEDIT segment
+    const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+    linkedit.inner.filesize += dyld_info.lazy_bind_size;
+    if (linkedit.inner.vmsize < linkedit.inner.filesize) {
+        linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
+    }
+    self.cmd_table_dirty = true;
+}
+
 fn writeStringTable(self: *MachO) !void {
     const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
     const needed_size = self.string_table.items.len;
@@ -2122,7 +2187,7 @@ fn parseStringTable(self: *MachO) !void {
     self.string_table.appendSliceAssumeCapacity(buffer);
 }
 
-fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
+fn parseBindingInfoTable(self: *MachO) !void {
     const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size);
     defer self.base.allocator.free(buffer);
@@ -2130,10 +2195,10 @@ fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16))
     assert(nread == buffer.len);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try self.binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
+    try self.binding_info_table.read(stream.reader(), self.base.allocator);
 }
 
-fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
+fn parseLazyBindingInfoTable(self: *MachO) !void {
     const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
     var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size);
     defer self.base.allocator.free(buffer);
@@ -2141,5 +2206,17 @@ fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u1
     assert(nread == buffer.len);
 
     var stream = std.io.fixedBufferStream(buffer);
-    try self.lazy_binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
+    try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator);
+}
+
+/// Calculates number of bytes in LEB128 encoding of value.
+pub fn sizeLEB128(value: anytype) usize {
+    var res: usize = 0;
+    var v = value;
+    while (true) {
+        v = v >> 7;
+        res += 1;
+        if (v == 0) break;
+    }
+    return res;
 }