Commit 2516db9645
Changed files (3)
lib
std
src-self-hosted
lib/std/macho.zig
@@ -83,7 +83,7 @@ pub const symtab_command = extern struct {
/// The linkedit_data_command contains the offsets and sizes of a blob
/// of data in the __LINKEDIT segment.
-const linkedit_data_command = extern struct {
+pub const linkedit_data_command = extern struct {
/// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT.
cmd: u32,
@@ -97,6 +97,28 @@ const linkedit_data_command = extern struct {
datasize: u32,
};
+/// A program that uses a dynamic linker contains a dylinker_command to identify
+/// the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker
+/// contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER).
+/// A file can have at most one of these.
+/// This struct is also used for the LC_DYLD_ENVIRONMENT load command and contains
+/// string for dyld to treat like an environment variable.
+pub const dylinker_command = extern struct {
+ /// LC_ID_DYLINKER, LC_LOAD_DYLINKER, or LC_DYLD_ENVIRONMENT
+ cmd: u32,
+
+ /// includes pathname string
+ cmdsize: u32,
+
+ /// A variable length string in a load command is represented by an lc_str
+ /// union. The strings are stored just after the load command structure and
+ /// the offset is from the start of the load command structure. The size
+ /// of the string is reflected in the cmdsize field of the load command.
+ /// Once again any padded bytes to bring the cmdsize field to a multiple
+ /// of 4 bytes must be zero.
+ name: u32,
+};
+
/// The segment load command indicates that a part of this file is to be
/// mapped into the task's address space. The size of this segment in memory,
/// vmsize, maybe equal to or larger than the amount to map from this file,
src-self-hosted/link/MachO.zig
@@ -6,8 +6,11 @@ const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.link);
const macho = std.macho;
+const codegen = @import("../codegen.zig");
const math = std.math;
const mem = std.mem;
+const trace = @import("../tracy.zig").trace;
+const Type = @import("../type.zig").Type;
const Module = @import("../Module.zig");
const link = @import("../link.zig");
@@ -17,18 +20,35 @@ pub const base_tag: File.Tag = File.Tag.macho;
base: File,
-/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
-/// Same order as in the file.
-segment_cmds: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
+/// List of all load command headers that are in the file.
+/// We use it to track number and size of all commands needed by the header.
+commands: std.ArrayListUnmanaged(macho.load_command) = std.ArrayListUnmanaged(macho.load_command){},
+command_file_offset: ?u64 = null,
/// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
/// Same order as in the file.
+segments: std.ArrayListUnmanaged(macho.segment_command_64) = std.ArrayListUnmanaged(macho.segment_command_64){},
sections: std.ArrayListUnmanaged(macho.section_64) = std.ArrayListUnmanaged(macho.section_64){},
+segment_table_offset: ?u64 = null,
+/// Entry point load command
+entry_point_cmd: ?macho.entry_point_command = null,
entry_addr: ?u64 = null,
+/// Default VM start address set at 4GB
+vm_start_address: u64 = 0x100000000,
+
+seg_table_dirty: bool = false,
+
error_flags: File.ErrorFlags = File.ErrorFlags{},
+/// `alloc_num / alloc_den` is the factor of padding when allocating.
+const alloc_num = 4;
+const alloc_den = 3;
+
+/// Default path to dyld
+const DEFAULT_DYLD_PATH: [*:0]const u8 = "/usr/lib/dyld";
+
pub const TextBlock = struct {
pub const empty = TextBlock{};
};
@@ -80,12 +100,6 @@ fn openFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO
/// Truncates the existing file contents and overwrites the contents.
/// Returns an error if `file` is not already open with +read +write +seek abilities.
fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !MachO {
- switch (options.output_mode) {
- .Exe => {},
- .Obj => {},
- .Lib => return error.TODOImplementWritingLibFiles,
- }
-
var self: MachO = .{
.base = .{
.file = file,
@@ -96,31 +110,35 @@ fn createFile(allocator: *Allocator, file: fs.File, options: link.Options) !Mach
};
errdefer self.deinit();
- if (options.output_mode == .Exe) {
- // The first segment command for executables is always a __PAGEZERO segment.
- try self.segment_cmds.append(allocator, .{
- .cmd = macho.LC_SEGMENT_64,
- .cmdsize = @sizeOf(macho.segment_command_64),
- .segname = self.makeString("__PAGEZERO"),
- .vmaddr = 0,
- .vmsize = 0,
- .fileoff = 0,
- .filesize = 0,
- .maxprot = 0,
- .initprot = 0,
- .nsects = 0,
- .flags = 0,
- });
+ switch (options.output_mode) {
+ .Exe => {
+ // The first segment command for executables is always a __PAGEZERO segment.
+ const pagezero = .{
+ .cmd = macho.LC_SEGMENT_64,
+ .cmdsize = commandSize(@sizeOf(macho.segment_command_64)),
+ .segname = makeString("__PAGEZERO"),
+ .vmaddr = 0,
+ .vmsize = self.vm_start_address,
+ .fileoff = 0,
+ .filesize = 0,
+ .maxprot = 0,
+ .initprot = 0,
+ .nsects = 0,
+ .flags = 0,
+ };
+ try self.commands.append(allocator, .{
+ .cmd = pagezero.cmd,
+ .cmdsize = pagezero.cmdsize,
+ });
+ try self.segments.append(allocator, pagezero);
+ },
+ .Obj => return error.TODOImplementWritingObjFiles,
+ .Lib => return error.TODOImplementWritingLibFiles,
}
- return self;
-}
+ try self.populateMissingMetadata();
-fn makeString(self: *MachO, comptime bytes: []const u8) [16]u8 {
- var buf: [16]u8 = undefined;
- if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
- mem.copy(u8, buf[0..], bytes);
- return buf;
+ return self;
}
fn writeMachOHeader(self: *MachO) !void {
@@ -156,10 +174,14 @@ fn writeMachOHeader(self: *MachO) !void {
};
hdr.filetype = filetype;
- // TODO consider other commands
- const ncmds = try math.cast(u32, self.segment_cmds.items.len);
+ const ncmds = try math.cast(u32, self.commands.items.len);
hdr.ncmds = ncmds;
- hdr.sizeofcmds = ncmds * @sizeOf(macho.segment_command_64);
+
+ var sizeof_cmds: u32 = 0;
+ for (self.commands.items) |cmd| {
+ sizeof_cmds += cmd.cmdsize;
+ }
+ hdr.sizeofcmds = sizeof_cmds;
// TODO should these be set to something else?
hdr.flags = 0;
@@ -169,36 +191,117 @@ fn writeMachOHeader(self: *MachO) !void {
}
pub fn flush(self: *MachO, module: *Module) !void {
- // TODO implement flush
+ // Save segments first
{
- const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segment_cmds.items.len);
+ const buf = try self.base.allocator.alloc(macho.segment_command_64, self.segments.items.len);
defer self.base.allocator.free(buf);
+ self.command_file_offset = @sizeOf(macho.mach_header_64);
+
for (buf) |*seg, i| {
- seg.* = self.segment_cmds.items[i];
+ seg.* = self.segments.items[i];
+ self.command_file_offset.? += self.segments.items[i].cmdsize;
}
try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), @sizeOf(macho.mach_header_64));
}
- if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
- log.debug("flushing. no_entry_point_found = true\n", .{});
- self.error_flags.no_entry_point_found = true;
- } else {
- log.debug("flushing. no_entry_point_found = false\n", .{});
- self.error_flags.no_entry_point_found = false;
- try self.writeMachOHeader();
+ switch (self.base.options.output_mode) {
+ .Exe => {
+ {
+ // We need to add LC_LOAD_DYLINKER and LC_LOAD_DYLIB since we always
+ // have to link against libSystem.dylib
+ const cmdsize = commandSize(@intCast(u32, @sizeOf(macho.dylinker_command) + mem.lenZ(DEFAULT_DYLD_PATH)));
+ const load_dylinker = [1]macho.dylinker_command{
+ .{
+ .cmd = macho.LC_LOAD_DYLINKER,
+ .cmdsize = cmdsize,
+ .name = @sizeOf(macho.dylinker_command),
+ },
+ };
+ try self.commands.append(self.base.allocator, .{
+ .cmd = macho.LC_LOAD_DYLINKER,
+ .cmdsize = cmdsize,
+ });
+
+ try self.base.file.?.pwriteAll(mem.sliceAsBytes(load_dylinker[0..1]), self.command_file_offset.?);
+
+ const padded_path = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.dylinker_command));
+ defer self.base.allocator.free(padded_path);
+ mem.set(u8, padded_path[0..], 0);
+ mem.copy(u8, padded_path[0..], mem.spanZ(DEFAULT_DYLD_PATH));
+
+ try self.base.file.?.pwriteAll(padded_path, self.command_file_offset.? + @sizeOf(macho.dylinker_command));
+ self.command_file_offset.? += cmdsize;
+ }
+ },
+ .Obj => return error.TODOImplementWritingObjFiles,
+ .Lib => return error.TODOImplementWritingLibFiles,
}
+
+ // if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
+ // log.debug("flushing. no_entry_point_found = true\n", .{});
+ // self.error_flags.no_entry_point_found = true;
+ // } else {
+ log.debug("flushing. no_entry_point_found = false\n", .{});
+ self.error_flags.no_entry_point_found = false;
+ try self.writeMachOHeader();
+ // }
}
pub fn deinit(self: *MachO) void {
- self.segment_cmds.deinit(self.base.allocator);
+ self.commands.deinit(self.base.allocator);
+ self.segments.deinit(self.base.allocator);
self.sections.deinit(self.base.allocator);
}
pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {}
-pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {}
+pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
+ // const tracy = trace(@src());
+ // defer tracy.end();
+
+ // var code_buffer = std.ArrayList(u8).init(self.base.allocator);
+ // defer code_buffer.deinit();
+
+ // var dbg_line_buffer = std.ArrayList(u8).init(self.base.allocator);
+ // defer dbg_line_buffer.deinit();
+
+ // var dbg_info_buffer = std.ArrayList(u8).init(self.base.allocator);
+ // defer dbg_info_buffer.deinit();
+
+ // var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{};
+ // defer {
+ // for (dbg_info_type_relocs.items()) |*entry| {
+ // entry.value.relocs.deinit(self.base.allocator);
+ // }
+ // dbg_info_type_relocs.deinit(self.base.allocator);
+ // }
+
+ // const typed_value = decl.typed_value.most_recent.typed_value;
+ // log.debug("typed_value = {}", .{typed_value});
+
+ // const res = try codegen.generateSymbol(
+ // &self.base,
+ // decl.src(),
+ // typed_value,
+ // &code_buffer,
+ // &dbg_line_buffer,
+ // &dbg_info_buffer,
+ // &dbg_info_type_relocs,
+ // );
+ // log.debug("res = {}", .{res});
+
+ // const code = switch (res) {
+ // .externally_managed => |x| x,
+ // .appended => code_buffer.items,
+ // .fail => |em| {
+ // decl.analysis = .codegen_failure;
+ // try module.failed_decls.put(module.gpa, decl, em);
+ // return;
+ // },
+ // };
+}
pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void {}
@@ -214,3 +317,117 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {}
pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl) u64 {
@panic("TODO implement getDeclVAddr for MachO");
}
+
+pub fn populateMissingMetadata(self: *MachO) !void {
+ // if (self.seg_load_re_index == null) {
+ // self.seg_load_re_index = @intCast(u16, self.segment_cmds.items.len);
+ // const file_size = self.base.options.program_code_size_hint;
+ // const p_align = 0x1000;
+ // const off = self.findFreeSpace(file_size, p_align);
+ // log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
+ // try self.segment_cmds.append(self.base.allocator, .{});
+ // self.entry_addr = null;
+ // self.seg_table_dirty = true;
+ // }
+ // if (self.seg_got_index == null) {
+ // self.seg_got_index = @intCast(u16, self.segment_cmds.items.len);
+ // const file_size = 8 * self.base.options.symbol_count_hint;
+ // // Apple recommends to page align for better performance.
+ // // TODO This is not necessarily true for MH_OBJECT which means we
+ // // could potentially shave off a couple of bytes when generating
+ // // only object files.
+ // const p_align = 0x1000;
+ // const off = self.findFreeSpace(file_size, p_align);
+ // log.debug("found LC_SEGMENT_64 free space 0x{x} to 0x{x}", .{ off, off + file_size });
+ // const default_vmaddr = 0x4000000;
+ // try self.segment_cmds.append(self.base.allocator, .{
+ // .cmd = macho.LC_SEGMENT_64,
+ // .cmdsize = @sizeOf(macho.segment_command_64),
+ // .segname = self.makeString("__TEXT"),
+ // .vmaddr = default_vmaddr,
+ // .vmsize = file_size,
+ // .fileoff = off,
+ // .filesize = file_size,
+ // .maxprot = 0x5,
+ // .initprot = 0x5,
+ // .nsects = 0,
+ // .flags = 0,
+ // });
+ // self.seg_table_dirty = true;
+ // }
+}
+
+/// Returns end pos of collision, if any.
+fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 {
+ const header_size: u64 = @sizeOf(macho.mach_header_64);
+ if (start < header_size)
+ return header_size;
+
+ const end = start + satMul(size, alloc_num) / alloc_den;
+
+ // if (self.sec_table_offset) |off| {
+ // const section_size: u64 = @sizeOf(macho.section_64);
+ // const tight_size = self.sections.items.len * section_size;
+ // const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+ // const test_end = off + increased_size;
+ // if (end > off and start < test_end) {
+ // return test_end;
+ // }
+ // }
+
+ // if (self.seg_table_offset) |off| {
+ // const segment_size: u64 = @sizeOf(macho.segment_command_64);
+ // const tight_size = self.segment_cmds.items.len * segment_size;
+ // const increased_size = satMul(tight_size, alloc_num) / alloc_den;
+ // const test_end = off + increased_size;
+ // if (end > off and start < test_end) {
+ // return test_end;
+ // }
+ // }
+
+ // for (self.sections.items) |section| {
+ // const increased_size = satMul(section.size, alloc_num) / alloc_den;
+ // const test_end = section.offset + increased_size;
+ // if (end > section.offset and start < test_end) {
+ // return test_end;
+ // }
+ // }
+
+ for (self.segments.items) |segment| {
+ const increased_size = satMul(segment.filesize, alloc_num) / alloc_den;
+ const test_end = segment_cmd.fileoff + increased_size;
+ if (end > segment_cmd.fileoff and start < test_end) {
+ return test_end;
+ }
+ }
+
+ return null;
+}
+
+fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u16) u64 {
+ var start: u64 = 0;
+ while (self.detectAllocCollision(start, object_size)) |item_end| {
+ start = mem.alignForwardGeneric(u64, item_end, min_alignment);
+ }
+ return start;
+}
+
+/// Saturating multiplication
+fn satMul(a: anytype, b: anytype) @TypeOf(a, b) {
+ const T = @TypeOf(a, b);
+ return std.math.mul(T, a, b) catch std.math.maxInt(T);
+}
+
+fn makeString(comptime bytes: []const u8) [16]u8 {
+ var buf: [16]u8 = undefined;
+ if (bytes.len > buf.len) @compileError("MachO segment/section name too long");
+ mem.copy(u8, buf[0..], bytes);
+ return buf;
+}
+
+fn commandSize(min_size: u32) u32 {
+ if (min_size % @sizeOf(u64) == 0) return min_size;
+
+ const div = min_size / @sizeOf(u64);
+ return (div + 1) * @sizeOf(u64);
+}
src-self-hosted/codegen.zig
@@ -1427,7 +1427,62 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
else => return self.fail(inst.base.src, "TODO implement call for {}", .{self.target.cpu.arch}),
}
} else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
- return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO", .{});
+ switch (arch) {
+ // .x86_64 => {
+ // for (info.args) |mc_arg, arg_i| {
+ // const arg = inst.args[arg_i];
+ // const arg_mcv = try self.resolveInst(inst.args[arg_i]);
+ // // Here we do not use setRegOrMem even though the logic is similar, because
+ // // the function call will move the stack pointer, so the offsets are different.
+ // switch (mc_arg) {
+ // .none => continue,
+ // .register => |reg| {
+ // try self.genSetReg(arg.src, reg, arg_mcv);
+ // // TODO interact with the register allocator to mark the instruction as moved.
+ // },
+ // .stack_offset => {
+ // // Here we need to emit instructions like this:
+ // // mov qword ptr [rsp + stack_offset], x
+ // return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
+ // },
+ // .ptr_stack_offset => {
+ // return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{});
+ // },
+ // .ptr_embedded_in_code => {
+ // return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{});
+ // },
+ // .undef => unreachable,
+ // .immediate => unreachable,
+ // .unreach => unreachable,
+ // .dead => unreachable,
+ // .embedded_in_code => unreachable,
+ // .memory => unreachable,
+ // .compare_flags_signed => unreachable,
+ // .compare_flags_unsigned => unreachable,
+ // }
+ // }
+
+ // if (inst.func.cast(ir.Inst.Constant)) |func_inst| {
+ // if (func_inst.val.cast(Value.Payload.Function)) |func_val| {
+ // const func = func_val.func;
+ // const got = &macho_file.segment_cmds.items[macho_file.seg_got_index.?];
+ // const ptr_bytes: u64 = 8;
+ // const got_addr = @intCast(u32, got.vmaddrs + func.owner_decl.link.macho.offset_table_index * ptr_bytes);
+ // // 01 xx xx xx xx call [addr]
+ // try self.code.ensureCapacity(self.code.items.len + 5);
+ // self.code.appendSliceAssumeCapacity(&[1]u8{ 0x1 });
+ // mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), got_addr);
+ // } else {
+ // return self.fail(inst.base.src, "TODO implement calling bitcasted functions", .{});
+ // }
+ // } else {
+ // return self.fail(inst.base.src, "TODO implement calling runtime known function pointer", .{});
+ // }
+ // },
+ .x86_64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for x86_64 arch", .{}),
+ .aarch64 => return self.fail(inst.base.src, "TODO implement codegen for call when linking with MachO for aarch64 arch", .{}),
+ else => unreachable,
+ }
} else {
unreachable;
}