Commit de66b65677

Jakub Konka <kubkon@jakubkonka.com>
2020-11-30 19:42:08
lld: start unifying load command logic
1 parent 0ef3071
Changed files (3)
src/link/MachO/commands.zig
@@ -0,0 +1,371 @@
+const std = @import("std");
+const fs = std.fs;
+const io = std.io;
+const mem = std.mem;
+const macho = std.macho;
+const testing = std.testing;
+
+const Allocator = std.mem.Allocator;
+const makeName = @import("../MachO.zig").makeStaticString;
+
+pub const LoadCommand = union(enum) {
+    Segment: SegmentCommand,
+    DyldInfoOnly: macho.dyld_info_command,
+    Symtab: macho.symtab_command,
+    Dysymtab: macho.dysymtab_command,
+    Dylinker: GenericCommandWithData(macho.dylinker_command),
+    Dylib: GenericCommandWithData(macho.dylib_command),
+    Main: macho.entry_point_command,
+    VersionMin: macho.version_min_command,
+    SourceVersion: macho.source_version_command,
+    LinkeditData: macho.linkedit_data_command,
+    Unknown: GenericCommandWithData(macho.load_command),
+
+    pub fn read(allocator: *Allocator, reader: anytype) !LoadCommand {
+        const header = try reader.readStruct(macho.load_command);
+        var buffer = try allocator.alloc(u8, header.cmdsize);
+        defer allocator.free(buffer);
+        const slice = [1]macho.load_command{header};
+        mem.copy(u8, buffer[0..], mem.sliceAsBytes(slice[0..1]));
+        try reader.readNoEof(buffer[@sizeOf(macho.load_command)..]);
+        var stream = io.fixedBufferStream(buffer[0..]);
+
+        return switch (header.cmd) {
+            macho.LC_SEGMENT_64 => LoadCommand{
+                .Segment = try SegmentCommand.read(allocator, stream.reader()),
+            },
+            macho.LC_DYLD_INFO, macho.LC_DYLD_INFO_ONLY => LoadCommand{
+                .DyldInfoOnly = try stream.reader().readStruct(macho.dyld_info_command),
+            },
+            macho.LC_SYMTAB => LoadCommand{
+                .Symtab = try stream.reader().readStruct(macho.symtab_command),
+            },
+            macho.LC_DYSYMTAB => LoadCommand{
+                .Dysymtab = try stream.reader().readStruct(macho.dysymtab_command),
+            },
+            macho.LC_ID_DYLINKER, macho.LC_LOAD_DYLINKER, macho.LC_DYLD_ENVIRONMENT => LoadCommand{
+                .Dylinker = try GenericCommandWithData(macho.dylinker_command).read(allocator, stream.reader()),
+            },
+            macho.LC_ID_DYLIB, macho.LC_LOAD_WEAK_DYLIB, macho.LC_LOAD_DYLIB, macho.LC_REEXPORT_DYLIB => LoadCommand{
+                .Dylib = try GenericCommandWithData(macho.dylib_command).read(allocator, stream.reader()),
+            },
+            macho.LC_MAIN => LoadCommand{
+                .Main = try stream.reader().readStruct(macho.entry_point_command),
+            },
+            macho.LC_VERSION_MIN_MACOSX, macho.LC_VERSION_MIN_IPHONEOS, macho.LC_VERSION_MIN_WATCHOS, macho.LC_VERSION_MIN_TVOS => LoadCommand{
+                .VersionMin = try stream.reader().readStruct(macho.version_min_command),
+            },
+            macho.LC_SOURCE_VERSION => LoadCommand{
+                .SourceVersion = try stream.reader().readStruct(macho.source_version_command),
+            },
+            macho.LC_FUNCTION_STARTS, macho.LC_DATA_IN_CODE, macho.LC_CODE_SIGNATURE => LoadCommand{
+                .LinkeditData = try stream.reader().readStruct(macho.linkedit_data_command),
+            },
+            else => LoadCommand{
+                .Unknown = try GenericCommandWithData(macho.load_command).read(allocator, stream.reader()),
+            },
+        };
+    }
+
+    pub fn write(self: LoadCommand, writer: anytype) !void {
+        return switch (self) {
+            .DyldInfoOnly => |x| writeStruct(x, writer),
+            .Symtab => |x| writeStruct(x, writer),
+            .Dysymtab => |x| writeStruct(x, writer),
+            .Main => |x| writeStruct(x, writer),
+            .VersionMin => |x| writeStruct(x, writer),
+            .SourceVersion => |x| writeStruct(x, writer),
+            .LinkeditData => |x| writeStruct(x, writer),
+            .Segment => |x| x.write(writer),
+            .Dylinker => |x| x.write(writer),
+            .Dylib => |x| x.write(writer),
+            .Unknown => |x| x.write(writer),
+        };
+    }
+
+    pub fn cmd(self: LoadCommand) u32 {
+        return switch (self) {
+            .DyldInfoOnly => |x| x.cmd,
+            .Symtab => |x| x.cmd,
+            .Dysymtab => |x| x.cmd,
+            .Main => |x| x.cmd,
+            .VersionMin => |x| x.cmd,
+            .SourceVersion => |x| x.cmd,
+            .LinkeditData => |x| x.cmd,
+            .Segment => |x| x.inner.cmd,
+            .Dylinker => |x| x.inner.cmd,
+            .Dylib => |x| x.inner.cmd,
+            .Unknown => |x| x.inner.cmd,
+        };
+    }
+
+    pub fn cmdsize(self: LoadCommand) u32 {
+        return switch (self) {
+            .DyldInfoOnly => |x| x.cmdsize,
+            .Symtab => |x| x.cmdsize,
+            .Dysymtab => |x| x.cmdsize,
+            .Main => |x| x.cmdsize,
+            .VersionMin => |x| x.cmdsize,
+            .SourceVersion => |x| x.cmdsize,
+            .LinkeditData => |x| x.cmdsize,
+            .Segment => |x| x.inner.cmdsize,
+            .Dylinker => |x| x.inner.cmdsize,
+            .Dylib => |x| x.inner.cmdsize,
+            .Unknown => |x| x.inner.cmdsize,
+        };
+    }
+
+    pub fn deinit(self: *LoadCommand, allocator: *Allocator) void {
+        return switch (self.*) {
+            .Segment => |*x| x.deinit(allocator),
+            .Dylinker => |*x| x.deinit(allocator),
+            .Dylib => |*x| x.deinit(allocator),
+            .Unknown => |*x| x.deinit(allocator),
+            else => {},
+        };
+    }
+
+    fn writeStruct(command: anytype, writer: anytype) !void {
+        const slice = [1]@TypeOf(command){command};
+        return writer.writeAll(mem.sliceAsBytes(slice[0..1]));
+    }
+
+    fn eql(self: LoadCommand, other: LoadCommand) bool {
+        if (@as(@TagType(LoadCommand), self) != @as(@TagType(LoadCommand), other)) return false;
+        return switch (self) {
+            .DyldInfoOnly => |x| eqlStruct(x, other.DyldInfoOnly),
+            .Symtab => |x| eqlStruct(x, other.Symtab),
+            .Dysymtab => |x| eqlStruct(x, other.Dysymtab),
+            .Main => |x| eqlStruct(x, other.Main),
+            .VersionMin => |x| eqlStruct(x, other.VersionMin),
+            .SourceVersion => |x| eqlStruct(x, other.SourceVersion),
+            .LinkeditData => |x| eqlStruct(x, other.LinkeditData),
+            .Segment => |x| x.eql(other.Segment),
+            .Dylinker => |x| x.eql(other.Dylinker),
+            .Dylib => |x| x.eql(other.Dylib),
+            .Unknown => |x| x.eql(other.Unknown),
+        };
+    }
+
+    fn eqlStruct(lhs: anytype, rhs: anytype) bool {
+        return mem.eql(u8, mem.asBytes(&lhs), mem.asBytes(&rhs));
+    }
+};
+
+pub const SegmentCommand = struct {
+    inner: macho.segment_command_64,
+    sections: std.StringArrayHashMapUnmanaged(macho.section_64) = .{},
+
+    pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand {
+        const inner = try reader.readStruct(macho.segment_command_64);
+        var segment = SegmentCommand{
+            .inner = inner,
+        };
+        try segment.sections.ensureCapacity(alloc, inner.nsects);
+
+        var i: usize = 0;
+        while (i < inner.nsects) : (i += 1) {
+            const section = try reader.readStruct(macho.section_64);
+            segment.sections.putAssumeCapacityNoClobber(mem.trimRight(u8, section.sectname[0..], &[_]u8{0}), section);
+        }
+
+        return segment;
+    }
+
+    pub fn write(self: SegmentCommand, writer: anytype) !void {
+        const cmd = [1]macho.segment_command_64{self.inner};
+        try writer.writeAll(mem.sliceAsBytes(cmd[0..1]));
+
+        for (self.sections.items()) |entry| {
+            const section = [1]macho.section_64{entry.value};
+            try writer.writeAll(mem.sliceAsBytes(section[0..1]));
+        }
+    }
+
+    pub fn deinit(self: *SegmentCommand, alloc: *Allocator) void {
+        self.sections.deinit(alloc);
+    }
+
+    fn eql(self: SegmentCommand, other: SegmentCommand) bool {
+        if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false;
+        const lhs = self.sections.items();
+        const rhs = other.sections.items();
+        var i: usize = 0;
+        while (i < self.inner.nsects) : (i += 1) {
+            if (!mem.eql(u8, lhs[i].key, rhs[i].key)) return false;
+            if (!mem.eql(u8, mem.asBytes(&lhs[i].value), mem.asBytes(&rhs[i].value))) return false;
+        }
+        return true;
+    }
+};
+
+pub fn GenericCommandWithData(comptime Cmd: type) type {
+    return struct {
+        inner: Cmd,
+        /// This field remains undefined until `read` is called.
+        data: []u8 = undefined,
+
+        const Self = @This();
+
+        pub fn read(allocator: *Allocator, reader: anytype) !Self {
+            const inner = try reader.readStruct(Cmd);
+            var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd));
+            errdefer allocator.free(data);
+            try reader.readNoEof(data[0..]);
+            return Self{
+                .inner = inner,
+                .data = data,
+            };
+        }
+
+        pub fn write(self: Self, writer: anytype) !void {
+            const cmd = [1]Cmd{self.inner};
+            try writer.writeAll(mem.sliceAsBytes(cmd[0..1]));
+            try writer.writeAll(self.data);
+        }
+
+        pub fn deinit(self: *Self, allocator: *Allocator) void {
+            allocator.free(self.data);
+        }
+
+        pub fn eql(self: Self, other: Self) bool {
+            if (!mem.eql(u8, mem.asBytes(&self.inner), mem.asBytes(&other.inner))) return false;
+            return mem.eql(u8, self.data, other.data);
+        }
+    };
+}
+
+fn testRead(allocator: *Allocator, buffer: []const u8, expected: anytype) !void {
+    var stream = io.fixedBufferStream(buffer);
+    var given = try LoadCommand.read(allocator, stream.reader());
+    defer given.deinit(allocator);
+    testing.expect(expected.eql(given));
+}
+
+fn testWrite(buffer: []u8, cmd: LoadCommand, expected: []const u8) !void {
+    var stream = io.fixedBufferStream(buffer);
+    try cmd.write(stream.writer());
+    testing.expect(mem.eql(u8, expected, buffer[0..expected.len]));
+}
+
+test "read-write segment command" {
+    var gpa = testing.allocator;
+    const in_buffer = &[_]u8{
+        0x19, 0x00, 0x00, 0x00, // cmd
+        0x98, 0x00, 0x00, 0x00, // cmdsize
+        0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname
+        0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // vmaddr
+        0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // vmsize
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // fileoff
+        0x00, 0x80, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, // filesize
+        0x07, 0x00, 0x00, 0x00, // maxprot
+        0x05, 0x00, 0x00, 0x00, // initprot
+        0x01, 0x00, 0x00, 0x00, // nsects
+        0x00, 0x00, 0x00, 0x00, // flags
+        0x5f, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // sectname
+        0x5f, 0x5f, 0x54, 0x45, 0x58, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // segname
+        0x00, 0x40, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // address
+        0xc0, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // size
+        0x00, 0x40, 0x00, 0x00, // offset
+        0x02, 0x00, 0x00, 0x00, // alignment
+        0x00, 0x00, 0x00, 0x00, // reloff
+        0x00, 0x00, 0x00, 0x00, // nreloc
+        0x00, 0x04, 0x00, 0x80, // flags
+        0x00, 0x00, 0x00, 0x00, // reserved1
+        0x00, 0x00, 0x00, 0x00, // reserved2
+        0x00, 0x00, 0x00, 0x00, // reserved3
+    };
+    var cmd = SegmentCommand{
+        .inner = .{
+            .cmd = macho.LC_SEGMENT_64,
+            .cmdsize = 152,
+            .segname = makeName("__TEXT"),
+            .vmaddr = 4294967296,
+            .vmsize = 294912,
+            .fileoff = 0,
+            .filesize = 294912,
+            .maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE,
+            .initprot = macho.VM_PROT_EXECUTE | macho.VM_PROT_READ,
+            .nsects = 1,
+            .flags = 0,
+        },
+    };
+    try cmd.sections.putNoClobber(gpa, "__text", .{
+        .sectname = makeName("__text"),
+        .segname = makeName("__TEXT"),
+        .addr = 4294983680,
+        .size = 448,
+        .offset = 16384,
+        .@"align" = 2,
+        .reloff = 0,
+        .nreloc = 0,
+        .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+        .reserved1 = 0,
+        .reserved2 = 0,
+        .reserved3 = 0,
+    });
+    defer cmd.deinit(gpa);
+    try testRead(gpa, in_buffer[0..], LoadCommand{ .Segment = cmd });
+
+    var out_buffer: [in_buffer.len]u8 = undefined;
+    try testWrite(out_buffer[0..], LoadCommand{ .Segment = cmd }, in_buffer[0..]);
+}
+
+test "read-write generic command with data" {
+    var gpa = testing.allocator;
+    const in_buffer = &[_]u8{
+        0x0c, 0x00, 0x00, 0x00, // cmd
+        0x20, 0x00, 0x00, 0x00, // cmdsize
+        0x18, 0x00, 0x00, 0x00, // name
+        0x02, 0x00, 0x00, 0x00, // timestamp
+        0x00, 0x00, 0x00, 0x00, // current_version
+        0x00, 0x00, 0x00, 0x00, // compatibility_version
+        0x2f, 0x75, 0x73, 0x72, 0x00, 0x00, 0x00, 0x00, // data
+    };
+    var cmd = GenericCommandWithData(macho.dylib_command){
+        .inner = .{
+            .cmd = macho.LC_LOAD_DYLIB,
+            .cmdsize = 32,
+            .dylib = .{
+                .name = 24,
+                .timestamp = 2,
+                .current_version = 0,
+                .compatibility_version = 0,
+            },
+        },
+    };
+    cmd.data = try gpa.alloc(u8, 8);
+    defer gpa.free(cmd.data);
+    cmd.data[0] = 0x2f;
+    cmd.data[1] = 0x75;
+    cmd.data[2] = 0x73;
+    cmd.data[3] = 0x72;
+    cmd.data[4] = 0x0;
+    cmd.data[5] = 0x0;
+    cmd.data[6] = 0x0;
+    cmd.data[7] = 0x0;
+    try testRead(gpa, in_buffer[0..], LoadCommand{ .Dylib = cmd });
+
+    var out_buffer: [in_buffer.len]u8 = undefined;
+    try testWrite(out_buffer[0..], LoadCommand{ .Dylib = cmd }, in_buffer[0..]);
+}
+
+test "read-write C struct command" {
+    var gpa = testing.allocator;
+    const in_buffer = &[_]u8{
+        0x28, 0x00, 0x00, 0x80, // cmd
+        0x18, 0x00, 0x00, 0x00, // cmdsize
+        0x04, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // entryoff
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // stacksize
+    };
+    const cmd = .{
+        .cmd = macho.LC_MAIN,
+        .cmdsize = 24,
+        .entryoff = 16644,
+        .stacksize = 0,
+    };
+    try testRead(gpa, in_buffer[0..], LoadCommand{ .Main = cmd });
+
+    var out_buffer: [in_buffer.len]u8 = undefined;
+    try testWrite(out_buffer[0..], LoadCommand{ .Main = cmd }, in_buffer[0..]);
+}
src/link/MachO/Parser.zig
@@ -0,0 +1,80 @@
+const Parser = @This();
+
+const std = @import("std");
+const fs = std.fs;
+const io = std.io;
+const mem = std.mem;
+const macho = std.macho;
+
+const Allocator = std.mem.Allocator;
+
+const LoadCommand = @import("commands.zig").LoadCommand;
+
+allocator: *Allocator,
+
+/// Mach-O header
+header: ?macho.mach_header_64 = null,
+
+/// Load commands
+load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
+
+text_cmd_index: ?usize = null,
+
+linkedit_cmd_index: ?usize = null,
+linkedit_cmd_offset: ?u64 = null,
+
+code_sig_cmd_offset: ?u64 = null,
+
+end_pos: ?u64 = null,
+
+pub fn init(allocator: *Allocator) Parser {
+    return .{
+        .allocator = allocator,
+    };
+}
+
+pub fn parse(self: *Parser, reader: anytype) !void {
+    self.header = try reader.readStruct(macho.mach_header_64);
+
+    const ncmds = self.header.?.ncmds;
+    try self.load_commands.ensureCapacity(self.allocator, ncmds);
+
+    var off: u64 = @sizeOf(macho.mach_header_64);
+    var i: u16 = 0;
+    while (i < ncmds) : (i += 1) {
+        const cmd = try LoadCommand.read(self.allocator, reader);
+        switch (cmd.cmd()) {
+            macho.LC_SEGMENT_64 => {
+                const x = cmd.Segment;
+                if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__LINKEDIT")) {
+                    self.linkedit_cmd_index = i;
+                    self.linkedit_cmd_offset = off;
+                } else if (mem.eql(u8, mem.trimRight(u8, x.inner.segname[0..], &[_]u8{0}), "__TEXT")) {
+                    self.text_cmd_index = i;
+                }
+            },
+            macho.LC_SYMTAB => {
+                const x = cmd.Symtab;
+                self.end_pos = x.stroff + x.strsize;
+            },
+            else => {},
+        }
+        off += cmd.cmdsize();
+        self.load_commands.appendAssumeCapacity(cmd);
+    }
+
+    self.code_sig_cmd_offset = off;
+
+    // TODO parse memory mapped segments
+}
+
+pub fn parseFile(self: *Parser, file: fs.File) !void {
+    return self.parse(file.reader());
+}
+
+pub fn deinit(self: *Parser) void {
+    for (self.load_commands.items) |*cmd| {
+        cmd.deinit(self.allocator);
+    }
+    self.load_commands.deinit(self.allocator);
+}
src/link/MachO.zig
@@ -813,7 +813,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
             // Pad out space for code signature
             const text_cmd = parser.load_commands.items[parser.text_cmd_index.?].Segment.inner;
             const dataoff = @intCast(u32, mem.alignForward(parser.end_pos.?, @sizeOf(u64)));
-            const datasize = 0x1000;
+            const datasize = 0x400000;
             const code_sig = macho.linkedit_data_command{
                 .cmd = macho.LC_CODE_SIGNATURE,
                 .cmdsize = @sizeOf(macho.linkedit_data_command),
@@ -1600,7 +1600,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64,
     return vaddr;
 }
 
-fn makeStaticString(comptime bytes: []const u8) [16]u8 {
+pub fn makeStaticString(comptime bytes: []const u8) [16]u8 {
     var buf = [_]u8{0} ** 16;
     if (bytes.len > buf.len) @compileError("string too long; max 16 bytes");
     mem.copy(u8, buf[0..], bytes);
@@ -1994,3 +1994,10 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
     const T = @TypeOf(a, b);
     return std.math.mul(T, a, b) catch std.math.maxInt(T);
 }
+
+test "" {
+    // TODO surprisingly this causes a linking error:
+    // _linkWithLLD symbol missing for arch
+    // _ = std.testing.refAllDecls(@This());
+    _ = std.testing.refAllDecls(@import("MachO/commands.zig"));
+}