Commit 2516db9645

Jakub Konka <kubkon@jakubkonka.com>
2020-08-21 08:04:02
Specify path to dyld in Mach-O
This is required since an exec on macOS always has to link against libSystem.dylib. Signed-off-by: Jakub Konka <kubkon@jakubkonka.com>
1 parent c1ee9ef
Changed files (3)
lib
src-self-hosted
lib/std/macho.zig
@@ -83,7 +83,7 @@ pub const symtab_command = extern struct {
 
 /// The linkedit_data_command contains the offsets and sizes of a blob
 /// of data in the __LINKEDIT segment.
-const linkedit_data_command = extern struct {
+pub const linkedit_data_command = extern struct {
     /// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT.
     cmd: u32,
 
@@ -97,6 +97,28 @@ const linkedit_data_command = extern struct {
     datasize: u32,
 };
 
+/// A program that uses a dynamic linker contains a dylinker_command to identify
+/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
+/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
+/// A file can have at most one of these.
+/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and contains
+/// string for dyld to treat like an environment variable.
+pub const dylinker_command = extern struct {
+    /// LC_ID_DYLINKER, LC_LOAD_DYLINKER, or LC_DYLD_ENVIRONMENT
+    cmd: u32,
+
+    /// includes pathname string
+    cmdsize: u32,
+
+    /// A variable length string in a load command is represented by an lc_str
+    /// union.  The strings are stored just after the load command structure and
+    /// the offset is from the start of the load command structure.  The size
+    /// of the string is reflected in the cmdsize field of the load command.
+    /// Once again any padded bytes to bring the cmdsize field to a multiple
+    /// of 4 bytes must be zero.
+    name: u32,
+};
+
 /// The segment load command indicates that a part of this file is to be
 /// mapped into the task's address space.  The size of this segment in memory,
 /// vmsize, maybe equal to or larger than the amount to map from this file,
src-self-hosted/link/MachO.zig
@@ -6,8 +6,11 @@ const assert = std.debug.assert;
 const fs = std.fs;
 const log = std.log.scoped(.link);
 const macho = std.macho;
+const codegen = @import("../codegen.zig");
 const math = std.math;
 const mem = std.mem;
+const trace = @import("../tracy.zig").trace;
+const Type = @import("../type.zig").Type;
 
 const Module = @import("../Module.zig");
 const link = @import("../link.zig");
@@ -17,18 +20,35 @@ pub const base_tag: File.Tag = File.Tag.macho;
 
 base: File,
 
-/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
-/// Same order as in the file.
-segment_cmds: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
+/// List of all load command headers that are in the file.
+/// We use it to track number and size of all commands needed by the header.
+commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){},
+command_file_offset: ?u64 = null,
 
 /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
 /// Same order as in the file.
+segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
 sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){},
+segment_table_offset: ?u64 = null,
 
+/// Entry point load command
+entry_point_cmd: ?macho.entry_point_command = null,
 entry_addr: ?u64 = null,
 
+/// Default VM start address set at 4GB
+vm_start_address: u64 = 0x100000000,
+
+seg_table_dirty: bool = false,
+
 error_flags: File.ErrorFlags = File.ErrorFlags{},
 
+/// `alloc_num / alloc_den` is the factor of padding when allocating.
+const alloc_num = 4;
+const alloc_den = 3;
+
+/// Default path to dyld
+const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
+
 pub const TextBlock = struct {
     pub const empty = TextBlock{};
 };
@@ -80,12 +100,6 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO
 /// Truncates the existing file contents and overwrites the contents.
 /// Returns an error if `file` is not already open with +read +write +seek abilities.
 fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO {
-    switch (options.output_mode) {
-        .Exe => {},
-        .Obj => {},
-        .Lib => return error.TODOImplementWritingLibFiles,
-    }
-
     var self: MachO = .{
         .base = .{
             .file = file,
@@ -96,31 +110,35 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Mach
     };
     errdefer self.deinit();
 
-    if (options.output_mode == .Exe) {
-        // The first segment command for executables is always a __PAGEZERO segment.
-        try self.segment_cmds.append(allocator, .{
-            .cmd = macho.LC_SEGMENT_64,
-            .cmdsize = @sizeOf(macho.segment_command_64),
-            .segname = self.makeString("__PAGEZERO"),
-            .vmaddr = 0,
-            .vmsize = 0,
-            .fileoff = 0,
-            .filesize = 0,
-            .maxprot = 0,
-            .initprot = 0,
-            .nsects = 0,
-            .flags = 0,
-        });
+    switch (options.output_mode) {
+        .Exe => {
+            // The first segment command for executables is always a __PAGEZERO segment.
+            const pagezero = .{
+                .cmd = macho.LC_SEGMENT_64,
+                .cmdsize = commandSize(@sizeOf(macho.segment_command_64)),
+                .segname = makeString("__PAGEZERO"),
+                .vmaddr = 0,
+                .vmsize = self.vm_start_address,
+                .fileoff = 0,
+                .filesize = 0,
+                .maxprot = 0,
+                .initprot = 0,
+                .nsects = 0,
+                .flags = 0,
+            };
+            try self.commands.append(allocator, .{
+                .cmd = pagezero.cmd,
+                .cmdsize = pagezero.cmdsize,
+            });
+            try self.segments.append(allocator, pagezero);
+        },
+        .Obj => return error.TODOImplementWritingObjFiles,
+        .Lib => return error.TODOImplementWritingLibFiles,
     }
 
-    return self;
-}
+    try self.populateMissingMetadata();
 
-fn makeString(self: *MachO, comptime bytes: []const u8) [16]u8 {
-    var buf: [16]u8 = undefined;
-    if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
-    mem.copy(u8, buf[0..], bytes);
-    return buf;
+    return self;
 }
 
 fn writeMachOHeader(self: *MachO) !void {
@@ -156,10 +174,14 @@ fn writeMachOHeader(self: *MachO) !void {
     };
     hdr.filetype = filetype;
 
-    // TODO consider other commands
-    const ncmds = try math.cast(u32, self.segment_cmds.items.len);
+    const ncmds = try math.cast(u32, self.commands.items.len);
     hdr.ncmds = ncmds;
-    hdr.sizeofcmds = ncmds * @sizeOf(macho.segment_command_64);
+
+    var sizeof_cmds: u32 = 0;
+    for (self.commands.items) |cmd| {
+        sizeof_cmds += cmd.cmdsize;
+    }
+    hdr.sizeofcmds = sizeof_cmds;
 
     // TODO should these be set to something else?
     hdr.flags = 0;
@@ -169,36 +191,117 @@ fn writeMachOHeader(self: *MachO) !void {
 }
 
 pub fn flush(self: *MachO, module: *Module) !void {
-    // TODO implement flush
+    // Save segments first
     {
-        const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segment_cmds.items.len);
+        const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len);
         defer self.base.allocator.free(buf);
 
+        self.command_file_offset = @sizeOf(macho.mach_header_64);
+
         for (buf) |*seg, i| {
-            seg.* = self.segment_cmds.items[i];
+            seg.* = self.segments.items[i];
+            self.command_file_offset.? += self.segments.items[i].cmdsize;
         }
 
         try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64));
     }
 
-    if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
-        log.debug("flushing. no_entry_point_found = true\n", .{});
-        self.error_flags.no_entry_point_found = true;
-    } else {
-        log.debug("flushing. no_entry_point_found = false\n", .{});
-        self.error_flags.no_entry_point_found = false;
-        try self.writeMachOHeader();
+    switch (self.base.options.output_mode) {
+        .Exe => {
+            {
+                // We need to add LC_LOAD_DYLINKER and LC_LOAD_DYLIB since we always
+                // have to link against libSystem.dylib
+                const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH)));
+                const load_dylinker = [1]macho.dylinker_command{
+                    .{
+                        .cmd = macho.LC_LOAD_DYLINKER,
+                        .cmdsize = cmdsize,
+                        .name = @sizeOf(macho.dylinker_command),
+                    },
+                };
+                try self.commands.append(self.base.allocator, .{
+                    .cmd = macho.LC_LOAD_DYLINKER,
+                    .cmdsize = cmdsize,
+                });
+
+                try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?);
+
+                const padded_path = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.dylinker_command));
+                defer self.base.allocator.free(padded_path);
+                mem.set(u8, padded_path[0..], 0);
+                mem.copy(u8, padded_path[0..], mem.spanZ(DEFAULT_DYLD_PATH));
+
+                try self.base.file.?.pwriteAll(padded_path, self.command_file_offset.? + @sizeOf(macho.dylinker_command));
+                self.command_file_offset.? += cmdsize;
+            }
+        },
+        .Obj => return error.TODOImplementWritingObjFiles,
+        .Lib => return error.TODOImplementWritingLibFiles,
     }
+
+    // if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
+    //     log.debug("flushing. no_entry_point_found = true\n", .{});
+    //     self.error_flags.no_entry_point_found = true;
+    // } else {
+    log.debug("flushing. no_entry_point_found = false\n", .{});
+    self.error_flags.no_entry_point_found = false;
+    try self.writeMachOHeader();
+    // }
 }
 
 pub fn deinit(self: *MachO) void {
-    self.segment_cmds.deinit(self.base.allocator);
+    self.commands.deinit(self.base.allocator);
+    self.segments.deinit(self.base.allocator);
     self.sections.deinit(self.base.allocator);
 }
 
 pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {}
 
-pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {}
+pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
+    // const tracy = trace(@src());
+    // defer tracy.end();
+
+    // var code_buffer = std.ArrayList(u8).init(self.base.allocator);
+    // defer code_buffer.deinit();
+
+    // var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator);
+    // defer dbg_line_buffer.deinit();
+
+    // var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator);
+    // defer dbg_info_buffer.deinit();
+
+    // var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{};
+    // defer {
+    //     for (dbg_info_type_relocs.items()) |*entry| {
+    //         entry.value.relocs.deinit(self.base.allocator);
+    //     }
+    //     dbg_info_type_relocs.deinit(self.base.allocator);
+    // }
+
+    // const typed_value = decl.typed_value.most_recent.typed_value;
+    // log.debug("typed_value = {}", .{typed_value});
+
+    // const res = try codegen.generateSymbol(
+    //     &self.base,
+    //     decl.src(),
+    //     typed_value,
+    //     &code_buffer,
+    //     &dbg_line_buffer,
+    //     &dbg_info_buffer,
+    //     &dbg_info_type_relocs,
+    // );
+    // log.debug("res = {}", .{res});
+
+    // const code = switch (res) {
+    //     .externally_managed => |x| x,
+    //     .appended => code_buffer.items,
+    //     .fail => |em| {
+    //         decl.analysis = .codegen_failure;
+    //         try module.failed_decls.put(module.gpa, decl, em);
+    //         return;
+    //     },
+    // };
+}
 
 pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {}
 
@@ -214,3 +317,117 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
 pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
     @panic("TODO implement getDeclVAddr for MachO");
 }
+
+pub fn populateMissingMetadata(self: *MachO) !void {
+    // if (self.seg_load_re_index == null) {
+    //     self.seg_load_re_index = @intCast(u16, self.segment_cmds.items.len);
+    //     const file_size = self.base.options.program_code_size_hint;
+    //     const p_align = 0x1000;
+    //     const off = self.findFreeSpace(file_size, p_align);
+    //     log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
+    //     try self.segment_cmds.append(self.base.allocator, .{});
+    //     self.entry_addr = null;
+    //     self.seg_table_dirty = true;
+    // }
+    // if (self.seg_got_index == null) {
+    //     self.seg_got_index = @intCast(u16, self.segment_cmds.items.len);
+    //     const file_size = 8 * self.base.options.symbol_count_hint;
+    //     // Apple recommends to page align for better performance.
+    //     // TODO This is not necessarily true for MH_OBJECT which means we
+    //     // could potentially shave off a couple of bytes when generating
+    //     // only object files.
+    //     const p_align = 0x1000;
+    //     const off = self.findFreeSpace(file_size, p_align);
+    //     log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
+    //     const default_vmaddr = 0x4000000;
+    //     try self.segment_cmds.append(self.base.allocator, .{
+    //         .cmd = macho.LC_SEGMENT_64,
+    //         .cmdsize = @sizeOf(macho.segment_command_64),
+    //         .segname = self.makeString("__TEXT"),
+    //         .vmaddr = default_vmaddr,
+    //         .vmsize = file_size,
+    //         .fileoff = off,
+    //         .filesize = file_size,
+    //         .maxprot = 0x5,
+    //         .initprot = 0x5,
+    //         .nsects = 0,
+    //         .flags = 0,
+    //     });
+    //     self.seg_table_dirty = true;
+    // }
+}
+
+/// Returns end pos of collision, if any.
+fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 {
+    const header_size: u64 = @sizeOf(macho.mach_header_64);
+    if (start < header_size)
+        return header_size;
+
+    const end = start + satMul(size, alloc_num) / alloc_den;
+
+    // if (self.sec_table_offset) |off| {
+    //     const section_size: u64 = @sizeOf(macho.section_64);
+    //     const tight_size = self.sections.items.len * section_size;
+    //     const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+    //     const test_end = off + increased_size;
+    //     if (end > off and start < test_end) {
+    //         return test_end;
+    //     }
+    // }
+
+    // if (self.seg_table_offset) |off| {
+    //     const segment_size: u64 = @sizeOf(macho.segment_command_64);
+    //     const tight_size = self.segment_cmds.items.len * segment_size;
+    //     const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+    //     const test_end = off + increased_size;
+    //     if (end > off and start < test_end) {
+    //         return test_end;
+    //     }
+    // }
+
+    // for (self.sections.items) |section| {
+    //     const increased_size = satMul(section.size, alloc_num) / alloc_den;
+    //     const test_end = section.offset + increased_size;
+    //     if (end > section.offset and start < test_end) {
+    //         return test_end;
+    //     }
+    // }
+
+    for (self.segments.items) |segment| {
+        const increased_size = satMul(segment.filesize, alloc_num) / alloc_den;
+        const test_end = segment_cmd.fileoff + increased_size;
+        if (end > segment_cmd.fileoff and start < test_end) {
+            return test_end;
+        }
+    }
+
+    return null;
+}
+
+fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u16) u64 {
+    var start: u64 = 0;
+    while (self.detectAllocCollision(start, object_size)) |item_end| {
+        start = mem.alignForwardGeneric(u64, item_end, min_alignment);
+    }
+    return start;
+}
+
+/// Saturating multiplication
+fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
+    const T = @TypeOf(a, b);
+    return std.math.mul(T, a, b) catch std.math.maxInt(T);
+}
+
+fn makeString(comptime bytes: []const u8) [16]u8 {
+    var buf: [16]u8 = undefined;
+    if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
+    mem.copy(u8, buf[0..], bytes);
+    return buf;
+}
+
+fn commandSize(min_size: u32) u32 {
+    if (min_size % @sizeOf(u64) == 0) return min_size;
+
+    const div = min_size / @sizeOf(u64);
+    return (div + 1) * @sizeOf(u64);
+}
src-self-hosted/codegen.zig
@@ -1427,7 +1427,62 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}),
                 }
             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
-                return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO", .{});
+                switch (arch) {
+                    // .x86_64 => {
+                    //     for (info.args) |mc_arg, arg_i| {
+                    //         const arg = inst.args[arg_i];
+                    //         const arg_mcv = try self.resolveInst(inst.args[arg_i]);
+                    //         // Here we do not use setRegOrMem even though the logic is similar, because
+                    //         // the function call will move the stack pointer, so the offsets are different.
+                    //         switch (mc_arg) {
+                    //             .none => continue,
+                    //             .register => |reg| {
+                    //                 try self.genSetReg(arg.src, reg, arg_mcv);
+                    //                 // TODO interact with the register allocator to mark the instruction as moved.
+                    //             },
+                    //             .stack_offset => {
+                    //                 // Here we need to emit instructions like this:
+                    //                 // mov     qword ptr [rsp + stack_offset], x
+                    //                 return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
+                    //             },
+                    //             .ptr_stack_offset => {
+                    //                 return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{});
+                    //             },
+                    //             .ptr_embedded_in_code => {
+                    //                 return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{});
+                    //             },
+                    //             .undef => unreachable,
+                    //             .immediate => unreachable,
+                    //             .unreach => unreachable,
+                    //             .dead => unreachable,
+                    //             .embedded_in_code => unreachable,
+                    //             .memory => unreachable,
+                    //             .compare_flags_signed => unreachable,
+                    //             .compare_flags_unsigned => unreachable,
+                    //         }
+                    //     }
+
+                    //     if (inst.func.cast(ir.Inst.Constant)) |func_inst| {
+                    //         if (func_inst.val.cast(Value.Payload.Function)) |func_val| {
+                    //             const func = func_val.func;
+                    //             const got = &macho_file.segment_cmds.items[macho_file.seg_got_index.?];
+                    //             const ptr_bytes: u64 = 8;
+                    //             const got_addr = @intCast(u32, got.vmaddrs + func.owner_decl.link.macho.offset_table_index * ptr_bytes);
+                    //             // 01 xx xx xx xx    call [addr]
+                    //             try self.code.ensureCapacity(self.code.items.len + 5);
+                    //             self.code.appendSliceAssumeCapacity(&[1]u8{ 0x1 });
+                    //             mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr);
+                    //         } else {
+                    //             return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{});
+                    //         }
+                    //     } else {
+                    //         return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{});
+                    //     }
+                    // },
+                    .x86_64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for x86_64 arch", .{}),
+                    .aarch64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for aarch64 arch", .{}),
+                    else => unreachable,
+                }
             } else {
                 unreachable;
             }