Commit b58a2a4de6

Jakub Konka <kubkon@jakubkonka.com>
2020-12-01 23:39:07
lld+macho: move parsing logic into MachO
1 parent ed18046
Changed files (2)
src
src/link/MachO/Parser.zig
@@ -1,82 +0,0 @@
-const Parser = @This();
-
-const std = @import("std");
-const fs = std.fs;
-const io = std.io;
-const mem = std.mem;
-const macho = std.macho;
-
-const Allocator = std.mem.Allocator;
-
-const LoadCommand = @import("commands.zig").LoadCommand;
-
-allocator: *Allocator,
-
-/// Mach-O header
-header: ?macho.mach_header_64 = null,
-
-/// Load commands
-load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
-
-text_cmd_index: ?u16 = null,
-
-linkedit_cmd_index: ?u16 = null,
-linkedit_cmd_offset: ?u64 = null,
-
-code_sig_cmd_offset: ?u64 = null,
-
-end_pos: ?u64 = null,
-
-pub fn init(allocator: *Allocator) Parser {
-    return .{ .allocator = allocator };
-}
-
-pub fn parse(self: *Parser, reader: anytype) !void {
-    self.header = try reader.readStruct(macho.mach_header_64);
-
-    const ncmds = self.header.?.ncmds;
-    try self.load_commands.ensureCapacity(self.allocator, ncmds);
-
-    var off: u64 = @sizeOf(macho.mach_header_64);
-    var i: u16 = 0;
-    while (i < ncmds) : (i += 1) {
-        const cmd = try LoadCommand.read(self.allocator, reader);
-        switch (cmd.cmd()) {
-            macho.LC_SEGMENT_64 => {
-                const x = cmd.Segment;
-                if (mem.eql(u8, parseName(&x.inner.segname), "__LINKEDIT")) {
-                    self.linkedit_cmd_index = i;
-                    self.linkedit_cmd_offset = off;
-                } else if (mem.eql(u8, parseName(&x.inner.segname), "__TEXT")) {
-                    self.text_cmd_index = i;
-                }
-            },
-            macho.LC_SYMTAB => {
-                const x = cmd.Symtab;
-                self.end_pos = x.stroff + x.strsize;
-            },
-            else => {},
-        }
-        off += cmd.cmdsize();
-        self.load_commands.appendAssumeCapacity(cmd);
-    }
-
-    self.code_sig_cmd_offset = off;
-
-    // TODO parse memory mapped segments
-}
-
-pub fn parseFile(self: *Parser, file: fs.File) !void {
-    return self.parse(file.reader());
-}
-
-pub fn deinit(self: *Parser) void {
-    for (self.load_commands.items) |*cmd| {
-        cmd.deinit(self.allocator);
-    }
-    self.load_commands.deinit(self.allocator);
-}
-
-fn parseName(name: *const [16]u8) []const u8 {
-    return mem.trimRight(u8, name.*[0..], &[_]u8{0});
-}
src/link/MachO.zig
@@ -23,7 +23,6 @@ const target_util = @import("../target.zig");
 
 const Trie = @import("MachO/Trie.zig");
 const CodeSignature = @import("MachO/CodeSignature.zig");
-const Parser = @import("MachO/Parser.zig");
 
 usingnamespace @import("MachO/commands.zig");
 
@@ -35,6 +34,9 @@ base: File,
 /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB.
 page_size: u16,
 
+/// Mach-O header
+header: ?macho.mach_header_64 = null,
+
 /// Table of all load commands
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 /// __PAGEZERO segment
@@ -105,8 +107,6 @@ offset_table: std.ArrayListUnmanaged(u64) = .{},
 error_flags: File.ErrorFlags = File.ErrorFlags{},
 
 cmd_table_dirty: bool = false,
-dylinker_cmd_dirty: bool = false,
-libsystem_cmd_dirty: bool = false,
 
 /// A list of text blocks that have surplus capacity. This list can have false
 /// positives, as functions grow and shrink over time, only sometimes being added
@@ -325,7 +325,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void {
 
     if (self.cmd_table_dirty) {
         try self.writeLoadCommands();
-        try self.writeMachOHeader();
+        try self.writeHeader();
         self.cmd_table_dirty = false;
     }
 
@@ -725,66 +725,47 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
 
             // At this stage, LLD has done its job. It is time to patch the resultant
             // binaries up!
-            var parser = Parser.init(self.base.allocator);
-            defer parser.deinit();
             const out_file = try directory.handle.openFile(full_out_path, .{ .write = true });
-            defer out_file.close();
-            try parser.parseFile(out_file);
-            // Pad out space for code signature
-            const text_cmd = parser.load_commands.items[parser.text_cmd_index.?].Segment.inner;
-            const dataoff = @intCast(u32, mem.alignForward(parser.end_pos.?, @sizeOf(u64)));
-            const emit = self.base.options.emit.?;
-            const datasize = CodeSignature.calcCodeSignaturePadding(emit.sub_path, dataoff);
-            const code_sig = macho.linkedit_data_command{
-                .cmd = macho.LC_CODE_SIGNATURE,
-                .cmdsize = @sizeOf(macho.linkedit_data_command),
-                .dataoff = dataoff,
-                .datasize = datasize,
-            };
-            const linkedit_seg = parser.load_commands.items[parser.linkedit_cmd_index.?].Segment.inner;
-            const linkedit = macho.segment_command_64{
-                .cmd = linkedit_seg.cmd,
-                .cmdsize = linkedit_seg.cmdsize,
-                .segname = linkedit_seg.segname,
-                .vmaddr = linkedit_seg.vmaddr,
-                .vmsize = mem.alignForwardGeneric(u64, linkedit_seg.vmsize + datasize, self.page_size),
-                .fileoff = linkedit_seg.fileoff,
-                .filesize = linkedit_seg.filesize + (dataoff - parser.end_pos.?) + datasize,
-                .maxprot = linkedit_seg.maxprot,
-                .initprot = linkedit_seg.initprot,
-                .nsects = linkedit_seg.nsects,
-                .flags = linkedit_seg.flags,
-            };
-            const header_cmd = parser.header.?;
-            const header = macho.mach_header_64{
-                .magic = header_cmd.magic,
-                .cputype = header_cmd.cputype,
-                .cpusubtype = header_cmd.cpusubtype,
-                .filetype = header_cmd.filetype,
-                .ncmds = header_cmd.ncmds + 1,
-                .sizeofcmds = header_cmd.sizeofcmds + @sizeOf(macho.linkedit_data_command),
-                .flags = header_cmd.flags,
-                .reserved = header_cmd.reserved,
-            };
-            try out_file.pwriteAll(&[_]u8{0}, code_sig.dataoff + code_sig.datasize);
-            try out_file.pwriteAll(mem.sliceAsBytes(&[_]macho.linkedit_data_command{code_sig}), parser.code_sig_cmd_offset.?);
-            try out_file.pwriteAll(mem.sliceAsBytes(&[_]macho.segment_command_64{linkedit}), parser.linkedit_cmd_offset.?);
-            try out_file.pwriteAll(mem.sliceAsBytes(&[_]macho.mach_header_64{header}), 0);
-            // Generate adhoc code signature
-            var signature = CodeSignature.init(self.base.allocator);
-            defer signature.deinit();
-            try signature.calcAdhocSignature(
-                out_file,
-                emit.sub_path,
-                text_cmd,
-                code_sig,
-                self.base.options.output_mode,
-            );
-            var buffer = try self.base.allocator.alloc(u8, signature.size());
-            defer self.base.allocator.free(buffer);
-            signature.write(buffer);
-            try out_file.pwriteAll(buffer, code_sig.dataoff);
-            try emit.directory.handle.copyFile(emit.sub_path, emit.directory.handle, emit.sub_path, .{});
+            try self.parseFromFile(out_file);
+            if (self.code_signature_cmd_index == null) {
+                const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment;
+                const text_section = text_segment.sections.items[self.text_section_index.?];
+                const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64);
+                const needed_size = @sizeOf(macho.linkedit_data_command);
+                if (needed_size + after_last_cmd_offset > text_section.offset) {
+                    // TODO We are in the position to be able to increase the padding by moving all sections
+                    // by the required offset, but this requires a little bit more thinking and bookkeeping.
+                    // For now, return an error informing the user of the problem.
+                    std.debug.print("Not enough padding between load commands and start of __text section:\n", .{});
+                    std.debug.print("Offset after last load command: 0x{x}\n", .{after_last_cmd_offset});
+                    std.debug.print("Beginning of __text section: 0x{x}\n", .{text_section.offset});
+                    std.debug.print("Needed size: 0x{x}\n", .{needed_size});
+                    return error.NotEnoughPadding;
+                }
+                const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+                // TODO This is clunky.
+                self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64)));
+                // Add code signature load command
+                self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len);
+                try self.load_commands.append(self.base.allocator, .{
+                    .LinkeditData = .{
+                        .cmd = macho.LC_CODE_SIGNATURE,
+                        .cmdsize = @sizeOf(macho.linkedit_data_command),
+                        .dataoff = 0,
+                        .datasize = 0,
+                    },
+                });
+                // Pad out space for code signature
+                try self.writeCodeSignaturePadding();
+                // Write updated load commands and the header
+                try self.writeLoadCommands();
+                try self.writeHeader();
+                // Generate adhoc code signature
+                try self.writeCodeSignature();
+                // Move file in-place to please the kernel
+                const emit = self.base.options.emit.?;
+                try emit.directory.handle.copyFile(emit.sub_path, emit.directory.handle, emit.sub_path, .{});
+            }
         }
     }
 
@@ -1132,6 +1113,53 @@ pub fn populateMissingMetadata(self: *MachO) !void {
         .Lib => return error.TODOImplementWritingLibFiles,
     }
 
+    if (self.header == null) {
+        var header: macho.mach_header_64 = undefined;
+        header.magic = macho.MH_MAGIC_64;
+
+        const CpuInfo = struct {
+            cpu_type: macho.cpu_type_t,
+            cpu_subtype: macho.cpu_subtype_t,
+        };
+
+        const cpu_info: CpuInfo = switch (self.base.options.target.cpu.arch) {
+            .aarch64 => .{
+                .cpu_type = macho.CPU_TYPE_ARM64,
+                .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL,
+            },
+            .x86_64 => .{
+                .cpu_type = macho.CPU_TYPE_X86_64,
+                .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL,
+            },
+            else => return error.UnsupportedMachOArchitecture,
+        };
+        header.cputype = cpu_info.cpu_type;
+        header.cpusubtype = cpu_info.cpu_subtype;
+
+        const filetype: u32 = switch (self.base.options.output_mode) {
+            .Exe => macho.MH_EXECUTE,
+            .Obj => macho.MH_OBJECT,
+            .Lib => switch (self.base.options.link_mode) {
+                .Static => return error.TODOStaticLibMachOType,
+                .Dynamic => macho.MH_DYLIB,
+            },
+        };
+        header.filetype = filetype;
+        // These will get populated at the end of flushing the results to file.
+        header.ncmds = 0;
+        header.sizeofcmds = 0;
+
+        switch (self.base.options.output_mode) {
+            .Exe => {
+                header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE;
+            },
+            else => {
+                header.flags = 0;
+            },
+        }
+        header.reserved = 0;
+        self.header = header;
+    }
     if (self.pagezero_segment_cmd_index == null) {
         self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len);
         try self.load_commands.append(self.base.allocator, .{
@@ -1852,60 +1880,16 @@ fn writeLoadCommands(self: *MachO) !void {
 }
 
 /// Writes Mach-O file header.
-fn writeMachOHeader(self: *MachO) !void {
-    var hdr: macho.mach_header_64 = undefined;
-    hdr.magic = macho.MH_MAGIC_64;
-
-    const CpuInfo = struct {
-        cpu_type: macho.cpu_type_t,
-        cpu_subtype: macho.cpu_subtype_t,
-    };
-
-    const cpu_info: CpuInfo = switch (self.base.options.target.cpu.arch) {
-        .aarch64 => .{
-            .cpu_type = macho.CPU_TYPE_ARM64,
-            .cpu_subtype = macho.CPU_SUBTYPE_ARM_ALL,
-        },
-        .x86_64 => .{
-            .cpu_type = macho.CPU_TYPE_X86_64,
-            .cpu_subtype = macho.CPU_SUBTYPE_X86_64_ALL,
-        },
-        else => return error.UnsupportedMachOArchitecture,
-    };
-    hdr.cputype = cpu_info.cpu_type;
-    hdr.cpusubtype = cpu_info.cpu_subtype;
-
-    const filetype: u32 = switch (self.base.options.output_mode) {
-        .Exe => macho.MH_EXECUTE,
-        .Obj => macho.MH_OBJECT,
-        .Lib => switch (self.base.options.link_mode) {
-            .Static => return error.TODOStaticLibMachOType,
-            .Dynamic => macho.MH_DYLIB,
-        },
-    };
-    hdr.filetype = filetype;
-    hdr.ncmds = @intCast(u32, self.load_commands.items.len);
-
+fn writeHeader(self: *MachO) !void {
+    self.header.?.ncmds = @intCast(u32, self.load_commands.items.len);
     var sizeofcmds: u32 = 0;
     for (self.load_commands.items) |cmd| {
         sizeofcmds += cmd.cmdsize();
     }
-
-    hdr.sizeofcmds = sizeofcmds;
-
-    switch (self.base.options.output_mode) {
-        .Exe => {
-            hdr.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE;
-        },
-        else => {
-            hdr.flags = 0;
-        },
-    }
-    hdr.reserved = 0;
-
-    log.debug("writing Mach-O header {}\n", .{hdr});
-
-    try self.base.file.?.pwriteAll(@ptrCast([*]const u8, &hdr)[0..@sizeOf(macho.mach_header_64)], 0);
+    self.header.?.sizeofcmds = sizeofcmds;
+    log.debug("writing Mach-O header {}\n", .{self.header.?});
+    const slice = [1]macho.mach_header_64{self.header.?};
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(slice[0..1]), 0);
 }
 
 /// Saturating multiplication
@@ -1913,3 +1897,48 @@ fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
     const T = @TypeOf(a, b);
     return std.math.mul(T, a, b) catch std.math.maxInt(T);
 }
+
+/// Parse MachO contents from existing binary file.
+/// TODO This method is incomplete and currently parses only the header
+/// plus the load commands.
+fn parseFromFile(self: *MachO, file: fs.File) !void {
+    self.base.file = file;
+    var reader = file.reader();
+    const header = try reader.readStruct(macho.mach_header_64);
+    try self.load_commands.ensureCapacity(self.base.allocator, header.ncmds);
+    var i: u16 = 0;
+    while (i < header.ncmds) : (i += 1) {
+        const cmd = try LoadCommand.read(self.base.allocator, reader);
+        switch (cmd.cmd()) {
+            macho.LC_SEGMENT_64 => {
+                const x = cmd.Segment;
+                if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) {
+                    self.linkedit_segment_cmd_index = i;
+                } else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) {
+                    self.text_segment_cmd_index = i;
+                    for (x.sections.items) |sect, j| {
+                        if (isSegmentOrSection(&sect.sectname, "__text")) {
+                            self.text_section_index = @intCast(u16, j);
+                        }
+                    }
+                }
+            },
+            macho.LC_SYMTAB => {
+                self.symtab_cmd_index = i;
+            },
+            macho.LC_CODE_SIGNATURE => {
+                self.code_signature_cmd_index = i;
+            },
+            // TODO populate more MachO fields
+            else => {},
+        }
+        self.load_commands.appendAssumeCapacity(cmd);
+    }
+    self.header = header;
+
+    // TODO parse memory mapped segments
+}
+
+fn isSegmentOrSection(name: *const [16]u8, needle: []const u8) bool {
+    return mem.eql(u8, mem.trimRight(u8, name.*[0..], &[_]u8{0}), needle);
+}