Commit 2f94dc939e
Changed files (26)
src
link
src/link/MachO/dyld_info/bind.zig
@@ -1,231 +1,391 @@
-pub fn Bind(comptime Ctx: type, comptime Target: type) type {
- return struct {
- entries: std.ArrayListUnmanaged(Entry) = .{},
- buffer: std.ArrayListUnmanaged(u8) = .{},
-
- const Self = @This();
-
- const Entry = struct {
- target: Target,
- offset: u64,
- segment_id: u8,
- addend: i64,
-
- pub fn lessThan(ctx: Ctx, entry: Entry, other: Entry) bool {
- if (entry.segment_id == other.segment_id) {
- if (entry.target.eql(other.target)) {
- return entry.offset < other.offset;
- }
- const entry_name = ctx.getSymbolName(entry.target);
- const other_name = ctx.getSymbolName(other.target);
- return std.mem.lessThan(u8, entry_name, other_name);
- }
- return entry.segment_id < other.segment_id;
- }
- };
+const std = @import("std");
+const assert = std.debug.assert;
+const leb = std.leb;
+const log = std.log.scoped(.dyld_info);
+const macho = std.macho;
+const testing = std.testing;
- pub fn deinit(self: *Self, gpa: Allocator) void {
- self.entries.deinit(gpa);
- self.buffer.deinit(gpa);
+const Allocator = std.mem.Allocator;
+const MachO = @import("../../MachO.zig");
+const Symbol = @import("../Symbol.zig");
+
+pub const Entry = struct {
+ target: Symbol.Index,
+ offset: u64,
+ segment_id: u8,
+ addend: i64,
+
+ pub fn lessThan(ctx: *MachO, entry: Entry, other: Entry) bool {
+ if (entry.segment_id == other.segment_id) {
+ if (entry.target == other.target) {
+ return entry.offset < other.offset;
+ }
+ const entry_name = ctx.getSymbol(entry.target).getName(ctx);
+ const other_name = ctx.getSymbol(other.target).getName(ctx);
+ return std.mem.lessThan(u8, entry_name, other_name);
}
+ return entry.segment_id < other.segment_id;
+ }
+};
- pub fn size(self: Self) u64 {
- return @as(u64, @intCast(self.buffer.items.len));
- }
+pub const Bind = struct {
+ entries: std.ArrayListUnmanaged(Entry) = .{},
+ buffer: std.ArrayListUnmanaged(u8) = .{},
- pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void {
- if (self.entries.items.len == 0) return;
+ const Self = @This();
- const writer = self.buffer.writer(gpa);
+ pub fn deinit(self: *Self, gpa: Allocator) void {
+ self.entries.deinit(gpa);
+ self.buffer.deinit(gpa);
+ }
- std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan);
+ pub fn size(self: Self) u64 {
+ return @as(u64, @intCast(self.buffer.items.len));
+ }
- var start: usize = 0;
- var seg_id: ?u8 = null;
- for (self.entries.items, 0..) |entry, i| {
- if (seg_id != null and seg_id.? == entry.segment_id) continue;
- try finalizeSegment(self.entries.items[start..i], ctx, writer);
- seg_id = entry.segment_id;
- start = i;
- }
+ pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void {
+ if (self.entries.items.len == 0) return;
- try finalizeSegment(self.entries.items[start..], ctx, writer);
- try done(writer);
+ const writer = self.buffer.writer(gpa);
+
+ std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan);
+
+ var start: usize = 0;
+ var seg_id: ?u8 = null;
+ for (self.entries.items, 0..) |entry, i| {
+ if (seg_id != null and seg_id.? == entry.segment_id) continue;
+ try finalizeSegment(self.entries.items[start..i], ctx, writer);
+ seg_id = entry.segment_id;
+ start = i;
}
- fn finalizeSegment(entries: []const Entry, ctx: Ctx, writer: anytype) !void {
- if (entries.len == 0) return;
-
- const seg_id = entries[0].segment_id;
- try setSegmentOffset(seg_id, 0, writer);
-
- var offset: u64 = 0;
- var addend: i64 = 0;
- var count: usize = 0;
- var skip: u64 = 0;
- var target: ?Target = null;
-
- var state: enum {
- start,
- bind_single,
- bind_times_skip,
- } = .start;
-
- var i: usize = 0;
- while (i < entries.len) : (i += 1) {
- const current = entries[i];
- if (target == null or !target.?.eql(current.target)) {
- switch (state) {
- .start => {},
- .bind_single => try doBind(writer),
- .bind_times_skip => try doBindTimesSkip(count, skip, writer),
- }
- state = .start;
- target = current.target;
+ try finalizeSegment(self.entries.items[start..], ctx, writer);
+ try done(writer);
+ }
- const sym = ctx.getSymbol(current.target);
- const name = ctx.getSymbolName(current.target);
- const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0;
- const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER);
+ fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void {
+ if (entries.len == 0) return;
- try setSymbol(name, flags, writer);
- try setTypePointer(writer);
- try setDylibOrdinal(ordinal, writer);
+ const seg_id = entries[0].segment_id;
+ try setSegmentOffset(seg_id, 0, writer);
- if (current.addend != addend) {
- addend = current.addend;
- try setAddend(addend, writer);
- }
- }
+ var offset: u64 = 0;
+ var addend: i64 = 0;
+ var count: usize = 0;
+ var skip: u64 = 0;
+ var target: ?Symbol.Index = null;
- log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) });
- log.debug(" => {x}", .{current.offset});
+ var state: enum {
+ start,
+ bind_single,
+ bind_times_skip,
+ } = .start;
+
+ var i: usize = 0;
+ while (i < entries.len) : (i += 1) {
+ const current = entries[i];
+ if (target == null or target.? != current.target) {
switch (state) {
- .start => {
- if (current.offset < offset) {
- try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer);
- offset = offset - (offset - current.offset);
- } else if (current.offset > offset) {
- const delta = current.offset - offset;
- try addAddr(delta, writer);
- offset += delta;
- }
- state = .bind_single;
- offset += @sizeOf(u64);
- count = 1;
- },
- .bind_single => {
- if (current.offset == offset) {
- try doBind(writer);
- state = .start;
- } else if (current.offset > offset) {
- const delta = current.offset - offset;
- state = .bind_times_skip;
- skip = @as(u64, @intCast(delta));
- offset += skip;
- } else unreachable;
- i -= 1;
- },
- .bind_times_skip => {
- if (current.offset < offset) {
- count -= 1;
- if (count == 1) {
- try doBindAddAddr(skip, writer);
- } else {
- try doBindTimesSkip(count, skip, writer);
- }
- state = .start;
- offset = offset - (@sizeOf(u64) + skip);
- i -= 2;
- } else if (current.offset == offset) {
- count += 1;
- offset += @sizeOf(u64) + skip;
- } else {
- try doBindTimesSkip(count, skip, writer);
- state = .start;
- i -= 1;
- }
- },
+ .start => {},
+ .bind_single => try doBind(writer),
+ .bind_times_skip => try doBindTimesSkip(count, skip, writer),
+ }
+ state = .start;
+ target = current.target;
+
+ const sym = ctx.getSymbol(current.target);
+ const name = sym.getName(ctx);
+ const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0;
+ const ordinal: i16 = ord: {
+ if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ if (sym.flags.import) {
+ if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord);
+ }
+ if (ctx.options.undefined_treatment == .dynamic_lookup)
+ break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ break :ord macho.BIND_SPECIAL_DYLIB_SELF;
+ };
+
+ try setSymbol(name, flags, writer);
+ try setTypePointer(writer);
+ try setDylibOrdinal(ordinal, writer);
+
+ if (current.addend != addend) {
+ addend = current.addend;
+ try setAddend(addend, writer);
}
}
+ log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) });
+ log.debug(" => {x}", .{current.offset});
switch (state) {
- .start => unreachable,
- .bind_single => try doBind(writer),
- .bind_times_skip => try doBindTimesSkip(count, skip, writer),
+ .start => {
+ if (current.offset < offset) {
+ try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer);
+ offset = offset - (offset - current.offset);
+ } else if (current.offset > offset) {
+ const delta = current.offset - offset;
+ try addAddr(delta, writer);
+ offset += delta;
+ }
+ state = .bind_single;
+ offset += @sizeOf(u64);
+ count = 1;
+ },
+ .bind_single => {
+ if (current.offset == offset) {
+ try doBind(writer);
+ state = .start;
+ } else if (current.offset > offset) {
+ const delta = current.offset - offset;
+ state = .bind_times_skip;
+ skip = @as(u64, @intCast(delta));
+ offset += skip;
+ } else unreachable;
+ i -= 1;
+ },
+ .bind_times_skip => {
+ if (current.offset < offset) {
+ count -= 1;
+ if (count == 1) {
+ try doBindAddAddr(skip, writer);
+ } else {
+ try doBindTimesSkip(count, skip, writer);
+ }
+ state = .start;
+ offset = offset - (@sizeOf(u64) + skip);
+ i -= 2;
+ } else if (current.offset == offset) {
+ count += 1;
+ offset += @sizeOf(u64) + skip;
+ } else {
+ try doBindTimesSkip(count, skip, writer);
+ state = .start;
+ i -= 1;
+ }
+ },
}
}
- pub fn write(self: Self, writer: anytype) !void {
- if (self.size() == 0) return;
- try writer.writeAll(self.buffer.items);
+ switch (state) {
+ .start => unreachable,
+ .bind_single => try doBind(writer),
+ .bind_times_skip => try doBindTimesSkip(count, skip, writer),
}
- };
-}
+ }
-pub fn LazyBind(comptime Ctx: type, comptime Target: type) type {
- return struct {
- entries: std.ArrayListUnmanaged(Entry) = .{},
- buffer: std.ArrayListUnmanaged(u8) = .{},
- offsets: std.ArrayListUnmanaged(u32) = .{},
-
- const Self = @This();
-
- const Entry = struct {
- target: Target,
- offset: u64,
- segment_id: u8,
- addend: i64,
- };
-
- pub fn deinit(self: *Self, gpa: Allocator) void {
- self.entries.deinit(gpa);
- self.buffer.deinit(gpa);
- self.offsets.deinit(gpa);
- }
+ pub fn write(self: Self, writer: anytype) !void {
+ if (self.size() == 0) return;
+ try writer.writeAll(self.buffer.items);
+ }
+};
+
+pub const WeakBind = struct {
+ entries: std.ArrayListUnmanaged(Entry) = .{},
+ buffer: std.ArrayListUnmanaged(u8) = .{},
+
+ const Self = @This();
+
+ pub fn deinit(self: *Self, gpa: Allocator) void {
+ self.entries.deinit(gpa);
+ self.buffer.deinit(gpa);
+ }
- pub fn size(self: Self) u64 {
- return @as(u64, @intCast(self.buffer.items.len));
+ pub fn size(self: Self) u64 {
+ return @as(u64, @intCast(self.buffer.items.len));
+ }
+
+ pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void {
+ if (self.entries.items.len == 0) return;
+
+ const writer = self.buffer.writer(gpa);
+
+ std.mem.sort(Entry, self.entries.items, ctx, Entry.lessThan);
+
+ var start: usize = 0;
+ var seg_id: ?u8 = null;
+ for (self.entries.items, 0..) |entry, i| {
+ if (seg_id != null and seg_id.? == entry.segment_id) continue;
+ try finalizeSegment(self.entries.items[start..i], ctx, writer);
+ seg_id = entry.segment_id;
+ start = i;
}
- pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void {
- if (self.entries.items.len == 0) return;
+ try finalizeSegment(self.entries.items[start..], ctx, writer);
+ try done(writer);
+ }
+
+ fn finalizeSegment(entries: []const Entry, ctx: *MachO, writer: anytype) !void {
+ if (entries.len == 0) return;
- try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len);
+ const seg_id = entries[0].segment_id;
+ try setSegmentOffset(seg_id, 0, writer);
- var cwriter = std.io.countingWriter(self.buffer.writer(gpa));
- const writer = cwriter.writer();
+ var offset: u64 = 0;
+ var addend: i64 = 0;
+ var count: usize = 0;
+ var skip: u64 = 0;
+ var target: ?Symbol.Index = null;
- var addend: i64 = 0;
+ var state: enum {
+ start,
+ bind_single,
+ bind_times_skip,
+ } = .start;
- for (self.entries.items) |entry| {
- self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written)));
+ var i: usize = 0;
+ while (i < entries.len) : (i += 1) {
+ const current = entries[i];
+ if (target == null or target.? != current.target) {
+ switch (state) {
+ .start => {},
+ .bind_single => try doBind(writer),
+ .bind_times_skip => try doBindTimesSkip(count, skip, writer),
+ }
+ state = .start;
+ target = current.target;
- const sym = ctx.getSymbol(entry.target);
- const name = ctx.getSymbolName(entry.target);
- const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0;
- const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER);
+ const sym = ctx.getSymbol(current.target);
+ const name = sym.getName(ctx);
+ const flags: u8 = 0; // TODO NON_WEAK_DEFINITION
- try setSegmentOffset(entry.segment_id, entry.offset, writer);
try setSymbol(name, flags, writer);
- try setDylibOrdinal(ordinal, writer);
+ try setTypePointer(writer);
- if (entry.addend != addend) {
- try setAddend(entry.addend, writer);
- addend = entry.addend;
+ if (current.addend != addend) {
+ addend = current.addend;
+ try setAddend(addend, writer);
}
+ }
- try doBind(writer);
- try done(writer);
+ log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) });
+ log.debug(" => {x}", .{current.offset});
+ switch (state) {
+ .start => {
+ if (current.offset < offset) {
+ try addAddr(@as(u64, @bitCast(@as(i64, @intCast(current.offset)) - @as(i64, @intCast(offset)))), writer);
+ offset = offset - (offset - current.offset);
+ } else if (current.offset > offset) {
+ const delta = current.offset - offset;
+ try addAddr(delta, writer);
+ offset += delta;
+ }
+ state = .bind_single;
+ offset += @sizeOf(u64);
+ count = 1;
+ },
+ .bind_single => {
+ if (current.offset == offset) {
+ try doBind(writer);
+ state = .start;
+ } else if (current.offset > offset) {
+ const delta = current.offset - offset;
+ state = .bind_times_skip;
+ skip = @as(u64, @intCast(delta));
+ offset += skip;
+ } else unreachable;
+ i -= 1;
+ },
+ .bind_times_skip => {
+ if (current.offset < offset) {
+ count -= 1;
+ if (count == 1) {
+ try doBindAddAddr(skip, writer);
+ } else {
+ try doBindTimesSkip(count, skip, writer);
+ }
+ state = .start;
+ offset = offset - (@sizeOf(u64) + skip);
+ i -= 2;
+ } else if (current.offset == offset) {
+ count += 1;
+ offset += @sizeOf(u64) + skip;
+ } else {
+ try doBindTimesSkip(count, skip, writer);
+ state = .start;
+ i -= 1;
+ }
+ },
}
}
- pub fn write(self: Self, writer: anytype) !void {
- if (self.size() == 0) return;
- try writer.writeAll(self.buffer.items);
+ switch (state) {
+ .start => unreachable,
+ .bind_single => try doBind(writer),
+ .bind_times_skip => try doBindTimesSkip(count, skip, writer),
}
- };
-}
+ }
+
+ pub fn write(self: Self, writer: anytype) !void {
+ if (self.size() == 0) return;
+ try writer.writeAll(self.buffer.items);
+ }
+};
+
+pub const LazyBind = struct {
+ entries: std.ArrayListUnmanaged(Entry) = .{},
+ buffer: std.ArrayListUnmanaged(u8) = .{},
+ offsets: std.ArrayListUnmanaged(u32) = .{},
+
+ const Self = @This();
+
+ pub fn deinit(self: *Self, gpa: Allocator) void {
+ self.entries.deinit(gpa);
+ self.buffer.deinit(gpa);
+ self.offsets.deinit(gpa);
+ }
+
+ pub fn size(self: Self) u64 {
+ return @as(u64, @intCast(self.buffer.items.len));
+ }
+
+ pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void {
+ if (self.entries.items.len == 0) return;
+
+ try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len);
+
+ var cwriter = std.io.countingWriter(self.buffer.writer(gpa));
+ const writer = cwriter.writer();
+
+ var addend: i64 = 0;
+
+ for (self.entries.items) |entry| {
+ self.offsets.appendAssumeCapacity(@as(u32, @intCast(cwriter.bytes_written)));
+
+ const sym = ctx.getSymbol(entry.target);
+ const name = sym.getName(ctx);
+ const flags: u8 = if (sym.weakRef(ctx)) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0;
+ const ordinal: i16 = ord: {
+ if (sym.flags.interposable) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ if (sym.flags.import) {
+ if (ctx.options.namespace == .flat) break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ if (sym.getDylibOrdinal(ctx)) |ord| break :ord @bitCast(ord);
+ }
+ if (ctx.options.undefined_treatment == .dynamic_lookup)
+ break :ord macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
+ break :ord macho.BIND_SPECIAL_DYLIB_SELF;
+ };
+
+ try setSegmentOffset(entry.segment_id, entry.offset, writer);
+ try setSymbol(name, flags, writer);
+ try setDylibOrdinal(ordinal, writer);
+
+ if (entry.addend != addend) {
+ try setAddend(entry.addend, writer);
+ addend = entry.addend;
+ }
+
+ try doBind(writer);
+ try done(writer);
+ }
+ }
+
+ pub fn write(self: Self, writer: anytype) !void {
+ if (self.size() == 0) return;
+ try writer.writeAll(self.buffer.items);
+ }
+};
fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void {
log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset });
@@ -282,7 +442,7 @@ fn doBind(writer: anytype) !void {
fn doBindAddAddr(addr: u64, writer: anytype) !void {
log.debug(">>> bind with add: {x}", .{addr});
- if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) {
+ if (std.mem.isAligned(addr, @sizeOf(u64))) {
const imm = @divExact(addr, @sizeOf(u64));
if (imm <= 0xf) {
try writer.writeByte(
@@ -312,429 +472,3 @@ fn done(writer: anytype) !void {
log.debug(">>> done", .{});
try writer.writeByte(macho.BIND_OPCODE_DONE);
}
-
-const TestContext = struct {
- symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{},
- strtab: std.ArrayListUnmanaged(u8) = .{},
-
- const Target = struct {
- index: u32,
-
- fn eql(this: Target, other: Target) bool {
- return this.index == other.index;
- }
- };
-
- fn deinit(ctx: *TestContext, gpa: Allocator) void {
- ctx.symbols.deinit(gpa);
- ctx.strtab.deinit(gpa);
- }
-
- fn addSymbol(ctx: *TestContext, gpa: Allocator, name: []const u8, ordinal: i16, flags: u16) !void {
- const n_strx = try ctx.addString(gpa, name);
- var n_desc = @as(u16, @bitCast(ordinal * macho.N_SYMBOL_RESOLVER));
- n_desc |= flags;
- try ctx.symbols.append(gpa, .{
- .n_value = 0,
- .n_strx = n_strx,
- .n_desc = n_desc,
- .n_type = macho.N_EXT,
- .n_sect = 0,
- });
- }
-
- fn addString(ctx: *TestContext, gpa: Allocator, name: []const u8) !u32 {
- const n_strx = @as(u32, @intCast(ctx.strtab.items.len));
- try ctx.strtab.appendSlice(gpa, name);
- try ctx.strtab.append(gpa, 0);
- return n_strx;
- }
-
- fn getSymbol(ctx: TestContext, target: Target) macho.nlist_64 {
- return ctx.symbols.items[target.index];
- }
-
- fn getSymbolName(ctx: TestContext, target: Target) []const u8 {
- const sym = ctx.getSymbol(target);
- assert(sym.n_strx < ctx.strtab.items.len);
- return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + sym.n_strx)), 0);
- }
-};
-
-fn generateTestContext() !TestContext {
- const gpa = testing.allocator;
- var ctx = TestContext{};
- try ctx.addSymbol(gpa, "_import_1", 1, 0);
- try ctx.addSymbol(gpa, "_import_2", 1, 0);
- try ctx.addSymbol(gpa, "_import_3", 1, 0);
- try ctx.addSymbol(gpa, "_import_4", 2, 0);
- try ctx.addSymbol(gpa, "_import_5_weak", 2, macho.N_WEAK_REF);
- try ctx.addSymbol(gpa, "_import_6", 2, 0);
- return ctx;
-}
-
-test "bind - no entries" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = Bind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.finalize(gpa, test_context);
- try testing.expectEqual(@as(u64, 0), bind.size());
-}
-
-test "bind - single entry" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = Bind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.entries.append(gpa, .{
- .offset = 0x10,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.finalize(gpa, test_context);
- try testing.expectEqualSlices(u8, &[_]u8{
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1,
- 0x0,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x31,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0x10,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DONE,
- }, bind.buffer.items);
-}
-
-test "bind - multiple occurrences within the same segment" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = Bind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.entries.append(gpa, .{
- .offset = 0x10,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x18,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x20,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x28,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
-
- try bind.finalize(gpa, test_context);
- try testing.expectEqualSlices(u8, &[_]u8{
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1,
- 0x0,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x31,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0x10,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DONE,
- }, bind.buffer.items);
-}
-
-test "bind - multiple occurrences with skip and addend" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = Bind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.entries.append(gpa, .{
- .offset = 0x0,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x10,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x20,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x30,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0x10,
- });
-
- try bind.finalize(gpa, test_context);
- try testing.expectEqualSlices(u8, &[_]u8{
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1,
- 0x0,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x31,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_SET_ADDEND_SLEB,
- 0x10,
- macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB,
- 0x4,
- 0x8,
- macho.BIND_OPCODE_DONE,
- }, bind.buffer.items);
-}
-
-test "bind - complex" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = Bind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.entries.append(gpa, .{
- .offset = 0x58,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x100,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 1 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x110,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 1 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x130,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 1 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x140,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 1 },
- .addend = 0x10,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x148,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 2 },
- .addend = 0,
- });
-
- try bind.finalize(gpa, test_context);
- try testing.expectEqualSlices(u8, &[_]u8{
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1,
- 0x0,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x31,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0x58,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x32,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_SET_ADDEND_SLEB,
- 0x10,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0xa0,
- 0x1,
- macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB,
- 0x2,
- 0x8,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0x10,
- macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB,
- 0x2,
- 0x8,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x33,
- 0x0,
- macho.BIND_OPCODE_SET_TYPE_IMM | 1,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_SET_ADDEND_SLEB,
- 0x0,
- macho.BIND_OPCODE_ADD_ADDR_ULEB,
- 0xf8,
- 0xff,
- 0xff,
- 0xff,
- 0xff,
- 0xff,
- 0xff,
- 0xff,
- 0xff,
- 0x1,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DONE,
- }, bind.buffer.items);
-}
-
-test "lazy bind" {
- const gpa = testing.allocator;
-
- var test_context = try generateTestContext();
- defer test_context.deinit(gpa);
-
- var bind = LazyBind(TestContext, TestContext.Target){};
- defer bind.deinit(gpa);
-
- try bind.entries.append(gpa, .{
- .offset = 0x10,
- .segment_id = 1,
- .target = TestContext.Target{ .index = 0 },
- .addend = 0,
- });
- try bind.entries.append(gpa, .{
- .offset = 0x20,
- .segment_id = 2,
- .target = TestContext.Target{ .index = 1 },
- .addend = 0x10,
- });
-
- try bind.finalize(gpa, test_context);
- try testing.expectEqualSlices(u8, &[_]u8{
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1,
- 0x10,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x31,
- 0x0,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DONE,
- macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2,
- 0x20,
- macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0,
- 0x5f,
- 0x69,
- 0x6d,
- 0x70,
- 0x6f,
- 0x72,
- 0x74,
- 0x5f,
- 0x32,
- 0x0,
- macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1,
- macho.BIND_OPCODE_SET_ADDEND_SLEB,
- 0x10,
- macho.BIND_OPCODE_DO_BIND,
- macho.BIND_OPCODE_DONE,
- }, bind.buffer.items);
-}
-
-const std = @import("std");
-const assert = std.debug.assert;
-const leb = std.leb;
-const log = std.log.scoped(.dyld_info);
-const macho = std.macho;
-const testing = std.testing;
-
-const Allocator = std.mem.Allocator;
src/link/MachO/dyld_info/Rebase.zig
@@ -1,3 +1,14 @@
+const Rebase = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const leb = std.leb;
+const log = std.log.scoped(.dyld_info);
+const macho = std.macho;
+const testing = std.testing;
+
+const Allocator = std.mem.Allocator;
+
entries: std.ArrayListUnmanaged(Entry) = .{},
buffer: std.ArrayListUnmanaged(u8) = .{},
@@ -168,7 +179,7 @@ fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void {
fn addAddr(addr: u64, writer: anytype) !void {
log.debug(">>> add: {x}", .{addr});
- if (std.mem.isAlignedGeneric(u64, addr, @sizeOf(u64))) {
+ if (std.mem.isAligned(addr, @sizeOf(u64))) {
const imm = @divExact(addr, @sizeOf(u64));
if (imm <= 0xf) {
try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @as(u4, @truncate(imm)));
@@ -561,14 +572,3 @@ test "rebase - composite" {
macho.REBASE_OPCODE_DONE,
}, rebase.buffer.items);
}
-
-const Rebase = @This();
-
-const std = @import("std");
-const assert = std.debug.assert;
-const leb = std.leb;
-const log = std.log.scoped(.dyld_info);
-const macho = std.macho;
-const testing = std.testing;
-
-const Allocator = std.mem.Allocator;
src/link/MachO/Trie.zig โ src/link/MachO/dyld_info/Trie.zig
@@ -28,6 +28,248 @@
//! After the optional exported symbol information is a byte of how many edges (0-255) that
//! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of
//! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to.
+const Trie = @This();
+
+const std = @import("std");
+const mem = std.mem;
+const leb = std.leb;
+const log = std.log.scoped(.macho);
+const macho = std.macho;
+const testing = std.testing;
+const assert = std.debug.assert;
+const Allocator = mem.Allocator;
+
+pub const Node = struct {
+ base: *Trie,
+
+ /// Terminal info associated with this node.
+ /// If this node is not a terminal node, info is null.
+ terminal_info: ?struct {
+ /// Export flags associated with this exported symbol.
+ export_flags: u64,
+ /// VM address offset wrt to the section this symbol is defined against.
+ vmaddr_offset: u64,
+ } = null,
+
+ /// Offset of this node in the trie output byte stream.
+ trie_offset: ?u64 = null,
+
+ /// List of all edges originating from this node.
+ edges: std.ArrayListUnmanaged(Edge) = .{},
+
+ node_dirty: bool = true,
+
+ /// Edge connecting to nodes in the trie.
+ pub const Edge = struct {
+ from: *Node,
+ to: *Node,
+ label: []u8,
+
+ fn deinit(self: *Edge, allocator: Allocator) void {
+ self.to.deinit(allocator);
+ allocator.destroy(self.to);
+ allocator.free(self.label);
+ self.from = undefined;
+ self.to = undefined;
+ self.label = undefined;
+ }
+ };
+
+ fn deinit(self: *Node, allocator: Allocator) void {
+ for (self.edges.items) |*edge| {
+ edge.deinit(allocator);
+ }
+ self.edges.deinit(allocator);
+ }
+
+ /// Inserts a new node starting from `self`.
+ fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node {
+ // Check for match with edges from this node.
+ for (self.edges.items) |*edge| {
+ const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
+ if (match == 0) continue;
+ if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
+
+ // Found a match, need to splice up nodes.
+ // From: A -> B
+ // To: A -> C -> B
+ const mid = try allocator.create(Node);
+ mid.* = .{ .base = self.base };
+ const to_label = try allocator.dupe(u8, edge.label[match..]);
+ allocator.free(edge.label);
+ const to_node = edge.to;
+ edge.to = mid;
+ edge.label = try allocator.dupe(u8, label[0..match]);
+ self.base.node_count += 1;
+
+ try mid.edges.append(allocator, .{
+ .from = mid,
+ .to = to_node,
+ .label = to_label,
+ });
+
+ return if (match == label.len) mid else mid.put(allocator, label[match..]);
+ }
+
+ // Add a new node.
+ const node = try allocator.create(Node);
+ node.* = .{ .base = self.base };
+ self.base.node_count += 1;
+
+ try self.edges.append(allocator, .{
+ .from = self,
+ .to = node,
+ .label = try allocator.dupe(u8, label),
+ });
+
+ return node;
+ }
+
+ /// Recursively parses the node from the input byte stream.
+ fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize {
+ self.node_dirty = true;
+ const trie_offset = try reader.context.getPos();
+ self.trie_offset = trie_offset;
+
+ var nread: usize = 0;
+
+ const node_size = try leb.readULEB128(u64, reader);
+ if (node_size > 0) {
+ const export_flags = try leb.readULEB128(u64, reader);
+ // TODO Parse special flags.
+ assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
+ export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+
+ const vmaddr_offset = try leb.readULEB128(u64, reader);
+
+ self.terminal_info = .{
+ .export_flags = export_flags,
+ .vmaddr_offset = vmaddr_offset,
+ };
+ }
+
+ const nedges = try reader.readByte();
+ self.base.node_count += nedges;
+
+ nread += (try reader.context.getPos()) - trie_offset;
+
+ var i: usize = 0;
+ while (i < nedges) : (i += 1) {
+ const edge_start_pos = try reader.context.getPos();
+
+ const label = blk: {
+ var label_buf = std.ArrayList(u8).init(allocator);
+ while (true) {
+ const next = try reader.readByte();
+ if (next == @as(u8, 0))
+ break;
+ try label_buf.append(next);
+ }
+ break :blk try label_buf.toOwnedSlice();
+ };
+
+ const seek_to = try leb.readULEB128(u64, reader);
+ const return_pos = try reader.context.getPos();
+
+ nread += return_pos - edge_start_pos;
+ try reader.context.seekTo(seek_to);
+
+ const node = try allocator.create(Node);
+ node.* = .{ .base = self.base };
+
+ nread += try node.read(allocator, reader);
+ try self.edges.append(allocator, .{
+ .from = self,
+ .to = node,
+ .label = label,
+ });
+ try reader.context.seekTo(return_pos);
+ }
+
+ return nread;
+ }
+
+ /// Writes this node to a byte stream.
+ /// The children of this node *are* not written to the byte stream
+ /// recursively. To write all nodes to a byte stream in sequence,
+ /// iterate over `Trie.ordered_nodes` and call this method on each node.
+ /// This is one of the requirements of the MachO.
+ /// Panics if `finalize` was not called before calling this method.
+ fn write(self: Node, writer: anytype) !void {
+ assert(!self.node_dirty);
+ if (self.terminal_info) |info| {
+ // Terminal node info: encode export flags and vmaddr offset of this symbol.
+ var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
+ var info_stream = std.io.fixedBufferStream(&info_buf);
+ // TODO Implement for special flags.
+ assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
+ info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
+ try leb.writeULEB128(info_stream.writer(), info.export_flags);
+ try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset);
+
+ // Encode the size of the terminal node info.
+ var size_buf: [@sizeOf(u64)]u8 = undefined;
+ var size_stream = std.io.fixedBufferStream(&size_buf);
+ try leb.writeULEB128(size_stream.writer(), info_stream.pos);
+
+ // Now, write them to the output stream.
+ try writer.writeAll(size_buf[0..size_stream.pos]);
+ try writer.writeAll(info_buf[0..info_stream.pos]);
+ } else {
+ // Non-terminal node is delimited by 0 byte.
+ try writer.writeByte(0);
+ }
+ // Write number of edges (max legal number of edges is 256).
+ try writer.writeByte(@as(u8, @intCast(self.edges.items.len)));
+
+ for (self.edges.items) |edge| {
+ // Write edge label and offset to next node in trie.
+ try writer.writeAll(edge.label);
+ try writer.writeByte(0);
+ try leb.writeULEB128(writer, edge.to.trie_offset.?);
+ }
+ }
+
+ const FinalizeResult = struct {
+ /// Current size of this node in bytes.
+ node_size: u64,
+
+ /// True if the trie offset of this node in the output byte stream
+ /// would need updating; false otherwise.
+ updated: bool,
+ };
+
+ /// Updates offset of this node in the output byte stream.
+ fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult {
+ var stream = std.io.countingWriter(std.io.null_writer);
+ const writer = stream.writer();
+
+ var node_size: u64 = 0;
+ if (self.terminal_info) |info| {
+ try leb.writeULEB128(writer, info.export_flags);
+ try leb.writeULEB128(writer, info.vmaddr_offset);
+ try leb.writeULEB128(writer, stream.bytes_written);
+ } else {
+ node_size += 1; // 0x0 for non-terminal nodes
+ }
+ node_size += 1; // 1 byte for edge count
+
+ for (self.edges.items) |edge| {
+ const next_node_offset = edge.to.trie_offset orelse 0;
+ node_size += edge.label.len + 1;
+ try leb.writeULEB128(writer, next_node_offset);
+ }
+
+ const trie_offset = self.trie_offset orelse 0;
+ const updated = offset_in_trie != trie_offset;
+ self.trie_offset = offset_in_trie;
+ self.node_dirty = false;
+ node_size += stream.bytes_written;
+
+ return FinalizeResult{ .node_size = node_size, .updated = updated };
+ }
+};
+
/// The root node of the trie.
root: ?*Node = null,
@@ -124,13 +366,11 @@ pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize
/// Write the trie to a byte stream.
/// Panics if the trie was not finalized using `finalize` before calling this method.
-pub fn write(self: Trie, writer: anytype) !u64 {
+pub fn write(self: Trie, writer: anytype) !void {
assert(!self.trie_dirty);
- var counting_writer = std.io.countingWriter(writer);
for (self.ordered_nodes.items) |node| {
- try node.write(counting_writer.writer());
+ try node.write(writer);
}
- return counting_writer.bytes_written;
}
pub fn init(self: *Trie, allocator: Allocator) !void {
@@ -155,15 +395,15 @@ test "Trie node count" {
defer trie.deinit(gpa);
try trie.init(gpa);
- try testing.expectEqual(trie.node_count, 0);
- try testing.expect(trie.root == null);
+ try testing.expectEqual(@as(usize, 1), trie.node_count);
+ try testing.expect(trie.root != null);
try trie.put(gpa, .{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
});
- try testing.expectEqual(trie.node_count, 2);
+ try testing.expectEqual(@as(usize, 2), trie.node_count);
// Inserting the same node shouldn't update the trie.
try trie.put(gpa, .{
@@ -171,14 +411,14 @@ test "Trie node count" {
.vmaddr_offset = 0,
.export_flags = 0,
});
- try testing.expectEqual(trie.node_count, 2);
+ try testing.expectEqual(@as(usize, 2), trie.node_count);
try trie.put(gpa, .{
.name = "__mh_execute_header",
.vmaddr_offset = 0x1000,
.export_flags = 0,
});
- try testing.expectEqual(trie.node_count, 4);
+ try testing.expectEqual(@as(usize, 4), trie.node_count);
// Inserting the same node shouldn't update the trie.
try trie.put(gpa, .{
@@ -186,13 +426,13 @@ test "Trie node count" {
.vmaddr_offset = 0x1000,
.export_flags = 0,
});
- try testing.expectEqual(trie.node_count, 4);
+ try testing.expectEqual(@as(usize, 4), trie.node_count);
try trie.put(gpa, .{
.name = "_main",
.vmaddr_offset = 0,
.export_flags = 0,
});
- try testing.expectEqual(trie.node_count, 4);
+ try testing.expectEqual(@as(usize, 4), trie.node_count);
}
test "Trie basic" {
@@ -279,7 +519,7 @@ test "write Trie to a byte stream" {
});
try trie.finalize(gpa);
- try trie.finalize(gpa); // Finalizing multiple times is a nop subsequently unless we add new nodes.
+ try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes.
const exp_buffer = [_]u8{
0x0, 0x1, // node root
@@ -308,7 +548,7 @@ test "write Trie to a byte stream" {
}
test "parse Trie from byte stream" {
- var gpa = testing.allocator;
+ const gpa = testing.allocator;
const in_buffer = [_]u8{
0x0, 0x1, // node root
@@ -339,7 +579,7 @@ test "parse Trie from byte stream" {
}
test "ordering bug" {
- var gpa = testing.allocator;
+ const gpa = testing.allocator;
var trie: Trie = .{};
defer trie.deinit(gpa);
try trie.init(gpa);
@@ -354,6 +594,7 @@ test "ordering bug" {
.vmaddr_offset = 0x8008,
.export_flags = 0,
});
+
try trie.finalize(gpa);
const exp_buffer = [_]u8{
@@ -369,245 +610,3 @@ test "ordering bug" {
_ = try trie.write(stream.writer());
try expectEqualHexStrings(&exp_buffer, buffer);
}
-
-pub const Node = struct {
- base: *Trie,
-
- /// Terminal info associated with this node.
- /// If this node is not a terminal node, info is null.
- terminal_info: ?struct {
- /// Export flags associated with this exported symbol.
- export_flags: u64,
- /// VM address offset wrt to the section this symbol is defined against.
- vmaddr_offset: u64,
- } = null,
-
- /// Offset of this node in the trie output byte stream.
- trie_offset: ?u64 = null,
-
- /// List of all edges originating from this node.
- edges: std.ArrayListUnmanaged(Edge) = .{},
-
- node_dirty: bool = true,
-
- /// Edge connecting to nodes in the trie.
- pub const Edge = struct {
- from: *Node,
- to: *Node,
- label: []u8,
-
- fn deinit(self: *Edge, allocator: Allocator) void {
- self.to.deinit(allocator);
- allocator.destroy(self.to);
- allocator.free(self.label);
- self.from = undefined;
- self.to = undefined;
- self.label = undefined;
- }
- };
-
- fn deinit(self: *Node, allocator: Allocator) void {
- for (self.edges.items) |*edge| {
- edge.deinit(allocator);
- }
- self.edges.deinit(allocator);
- }
-
- /// Inserts a new node starting from `self`.
- fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node {
- // Check for match with edges from this node.
- for (self.edges.items) |*edge| {
- const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to;
- if (match == 0) continue;
- if (match == edge.label.len) return edge.to.put(allocator, label[match..]);
-
- // Found a match, need to splice up nodes.
- // From: A -> B
- // To: A -> C -> B
- const mid = try allocator.create(Node);
- mid.* = .{ .base = self.base };
- const to_label = try allocator.dupe(u8, edge.label[match..]);
- allocator.free(edge.label);
- const to_node = edge.to;
- edge.to = mid;
- edge.label = try allocator.dupe(u8, label[0..match]);
- self.base.node_count += 1;
-
- try mid.edges.append(allocator, .{
- .from = mid,
- .to = to_node,
- .label = to_label,
- });
-
- return if (match == label.len) mid else mid.put(allocator, label[match..]);
- }
-
- // Add a new node.
- const node = try allocator.create(Node);
- node.* = .{ .base = self.base };
- self.base.node_count += 1;
-
- try self.edges.append(allocator, .{
- .from = self,
- .to = node,
- .label = try allocator.dupe(u8, label),
- });
-
- return node;
- }
-
- /// Recursively parses the node from the input byte stream.
- fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize {
- self.node_dirty = true;
- const trie_offset = try reader.context.getPos();
- self.trie_offset = trie_offset;
-
- var nread: usize = 0;
-
- const node_size = try leb.readULEB128(u64, reader);
- if (node_size > 0) {
- const export_flags = try leb.readULEB128(u64, reader);
- // TODO Parse special flags.
- assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
- export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
-
- const vmaddr_offset = try leb.readULEB128(u64, reader);
-
- self.terminal_info = .{
- .export_flags = export_flags,
- .vmaddr_offset = vmaddr_offset,
- };
- }
-
- const nedges = try reader.readByte();
- self.base.node_count += nedges;
-
- nread += (try reader.context.getPos()) - trie_offset;
-
- var i: usize = 0;
- while (i < nedges) : (i += 1) {
- const edge_start_pos = try reader.context.getPos();
-
- const label = blk: {
- var label_buf = std.ArrayList(u8).init(allocator);
- while (true) {
- const next = try reader.readByte();
- if (next == @as(u8, 0))
- break;
- try label_buf.append(next);
- }
- break :blk try label_buf.toOwnedSlice();
- };
-
- const seek_to = try leb.readULEB128(u64, reader);
- const return_pos = try reader.context.getPos();
-
- nread += return_pos - edge_start_pos;
- try reader.context.seekTo(seek_to);
-
- const node = try allocator.create(Node);
- node.* = .{ .base = self.base };
-
- nread += try node.read(allocator, reader);
- try self.edges.append(allocator, .{
- .from = self,
- .to = node,
- .label = label,
- });
- try reader.context.seekTo(return_pos);
- }
-
- return nread;
- }
-
- /// Writes this node to a byte stream.
- /// The children of this node *are* not written to the byte stream
- /// recursively. To write all nodes to a byte stream in sequence,
- /// iterate over `Trie.ordered_nodes` and call this method on each node.
- /// This is one of the requirements of the MachO.
- /// Panics if `finalize` was not called before calling this method.
- fn write(self: Node, writer: anytype) !void {
- assert(!self.node_dirty);
- if (self.terminal_info) |info| {
- // Terminal node info: encode export flags and vmaddr offset of this symbol.
- var info_buf: [@sizeOf(u64) * 2]u8 = undefined;
- var info_stream = std.io.fixedBufferStream(&info_buf);
- // TODO Implement for special flags.
- assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and
- info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0);
- try leb.writeULEB128(info_stream.writer(), info.export_flags);
- try leb.writeULEB128(info_stream.writer(), info.vmaddr_offset);
-
- // Encode the size of the terminal node info.
- var size_buf: [@sizeOf(u64)]u8 = undefined;
- var size_stream = std.io.fixedBufferStream(&size_buf);
- try leb.writeULEB128(size_stream.writer(), info_stream.pos);
-
- // Now, write them to the output stream.
- try writer.writeAll(size_buf[0..size_stream.pos]);
- try writer.writeAll(info_buf[0..info_stream.pos]);
- } else {
- // Non-terminal node is delimited by 0 byte.
- try writer.writeByte(0);
- }
- // Write number of edges (max legal number of edges is 256).
- try writer.writeByte(@as(u8, @intCast(self.edges.items.len)));
-
- for (self.edges.items) |edge| {
- // Write edge label and offset to next node in trie.
- try writer.writeAll(edge.label);
- try writer.writeByte(0);
- try leb.writeULEB128(writer, edge.to.trie_offset.?);
- }
- }
-
- const FinalizeResult = struct {
- /// Current size of this node in bytes.
- node_size: u64,
-
- /// True if the trie offset of this node in the output byte stream
- /// would need updating; false otherwise.
- updated: bool,
- };
-
- /// Updates offset of this node in the output byte stream.
- fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult {
- var stream = std.io.countingWriter(std.io.null_writer);
- const writer = stream.writer();
-
- var node_size: u64 = 0;
- if (self.terminal_info) |info| {
- try leb.writeULEB128(writer, info.export_flags);
- try leb.writeULEB128(writer, info.vmaddr_offset);
- try leb.writeULEB128(writer, stream.bytes_written);
- } else {
- node_size += 1; // 0x0 for non-terminal nodes
- }
- node_size += 1; // 1 byte for edge count
-
- for (self.edges.items) |edge| {
- const next_node_offset = edge.to.trie_offset orelse 0;
- node_size += edge.label.len + 1;
- try leb.writeULEB128(writer, next_node_offset);
- }
-
- const trie_offset = self.trie_offset orelse 0;
- const updated = offset_in_trie != trie_offset;
- self.trie_offset = offset_in_trie;
- self.node_dirty = false;
- node_size += stream.bytes_written;
-
- return FinalizeResult{ .node_size = node_size, .updated = updated };
- }
-};
-
-const Trie = @This();
-
-const std = @import("std");
-const mem = std.mem;
-const leb = std.leb;
-const log = std.log.scoped(.link);
-const macho = std.macho;
-const testing = std.testing;
-const assert = std.debug.assert;
-const Allocator = mem.Allocator;
src/link/MachO/Archive.zig
@@ -1,20 +1,15 @@
-file: fs.File,
-fat_offset: u64,
-name: []const u8,
-header: ar_hdr = undefined,
+path: []const u8,
+data: []const u8,
-/// Parsed table of contents.
-/// Each symbol name points to a list of all definition
-/// sites within the current static archive.
-toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},
+objects: std.ArrayListUnmanaged(Object) = .{},
// Archive files start with the ARMAG identifying string. Then follows a
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
-const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
+pub const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
-const SARMAG: u4 = 8;
+pub const SARMAG: u4 = 8;
/// String in ar_fmag at the end of each header.
const ARFMAG: *const [2:0]u8 = "`\n";
@@ -41,177 +36,93 @@ const ar_hdr = extern struct {
/// Always contains ARFMAG.
ar_fmag: [2]u8,
- const NameOrLength = union(enum) {
- Name: []const u8,
- Length: u32,
- };
- fn nameOrLength(self: ar_hdr) !NameOrLength {
- const value = getValue(&self.ar_name);
- const slash_index = mem.indexOf(u8, value, "/") orelse return error.MalformedArchive;
- const len = value.len;
- if (slash_index == len - 1) {
- // Name stored directly
- return NameOrLength{ .Name = value };
- } else {
- // Name follows the header directly and its length is encoded in
- // the name field.
- const length = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
- return NameOrLength{ .Length = length };
- }
- }
-
fn date(self: ar_hdr) !u64 {
- const value = getValue(&self.ar_date);
+ const value = mem.trimRight(u8, &self.ar_date, &[_]u8{@as(u8, 0x20)});
return std.fmt.parseInt(u64, value, 10);
}
fn size(self: ar_hdr) !u32 {
- const value = getValue(&self.ar_size);
+ const value = mem.trimRight(u8, &self.ar_size, &[_]u8{@as(u8, 0x20)});
return std.fmt.parseInt(u32, value, 10);
}
- fn getValue(raw: []const u8) []const u8 {
- return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
+ fn name(self: *const ar_hdr) ?[]const u8 {
+ const value = &self.ar_name;
+ if (mem.startsWith(u8, value, "#1/")) return null;
+ const sentinel = mem.indexOfScalar(u8, value, '/') orelse value.len;
+ return value[0..sentinel];
}
-};
-pub fn isArchive(file: fs.File, fat_offset: u64) bool {
- const reader = file.reader();
- const magic = reader.readBytesNoEof(SARMAG) catch return false;
- defer file.seekTo(fat_offset) catch {};
- return mem.eql(u8, &magic, ARMAG);
-}
-
-pub fn deinit(self: *Archive, allocator: Allocator) void {
- self.file.close();
- for (self.toc.keys()) |*key| {
- allocator.free(key.*);
- }
- for (self.toc.values()) |*value| {
- value.deinit(allocator);
+ fn nameLength(self: ar_hdr) !?u32 {
+ const value = &self.ar_name;
+ if (!mem.startsWith(u8, value, "#1/")) return null;
+ const trimmed = mem.trimRight(u8, self.ar_name["#1/".len..], &[_]u8{0x20});
+ return try std.fmt.parseInt(u32, trimmed, 10);
}
- self.toc.deinit(allocator);
- allocator.free(self.name);
-}
-
-pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void {
- _ = try reader.readBytesNoEof(SARMAG);
- self.header = try reader.readStruct(ar_hdr);
- const name_or_length = try self.header.nameOrLength();
- const embedded_name = try parseName(allocator, name_or_length, reader);
- log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name });
- defer allocator.free(embedded_name);
-
- try self.parseTableOfContents(allocator, reader);
-}
+};
-fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 {
- var name: []u8 = undefined;
- switch (name_or_length) {
- .Name => |n| {
- name = try allocator.dupe(u8, n);
- },
- .Length => |len| {
- var n = try allocator.alloc(u8, len);
- defer allocator.free(n);
- try reader.readNoEof(n);
- const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0)) orelse n.len;
- name = try allocator.dupe(u8, n[0..actual_len]);
- },
- }
- return name;
+pub fn deinit(self: *Archive, allocator: Allocator) void {
+ self.objects.deinit(allocator);
}
-fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) !void {
- const symtab_size = try reader.readInt(u32, .little);
- const symtab = try allocator.alloc(u8, symtab_size);
- defer allocator.free(symtab);
-
- reader.readNoEof(symtab) catch {
- log.debug("incomplete symbol table: expected symbol table of length 0x{x}", .{symtab_size});
- return error.MalformedArchive;
- };
+pub fn parse(self: *Archive, arena: Allocator, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
- const strtab_size = try reader.readInt(u32, .little);
- const strtab = try allocator.alloc(u8, strtab_size);
- defer allocator.free(strtab);
-
- reader.readNoEof(strtab) catch {
- log.debug("incomplete symbol table: expected string table of length 0x{x}", .{strtab_size});
- return error.MalformedArchive;
- };
-
- var symtab_stream = std.io.fixedBufferStream(symtab);
- var symtab_reader = symtab_stream.reader();
+ var stream = std.io.fixedBufferStream(self.data);
+ const reader = stream.reader();
while (true) {
- const n_strx = symtab_reader.readInt(u32, .little) catch |err| switch (err) {
- error.EndOfStream => break,
- else => |e| return e,
- };
- const object_offset = try symtab_reader.readInt(u32, .little);
+ if (stream.pos >= self.data.len) break;
+ if (!mem.isAligned(stream.pos, 2)) stream.pos += 1;
- const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + n_strx)), 0);
- const owned_name = try allocator.dupe(u8, sym_name);
- const res = try self.toc.getOrPut(allocator, owned_name);
- defer if (res.found_existing) allocator.free(owned_name);
+ const hdr = try reader.readStruct(ar_hdr);
- if (!res.found_existing) {
- res.value_ptr.* = .{};
+ if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) {
+ macho_file.base.fatal("{s}: invalid header delimiter: expected '{s}', found '{s}'", .{
+ self.path, std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag),
+ });
+ return error.ParseFailed;
}
- try res.value_ptr.append(allocator, object_offset);
- }
-}
+ var size = try hdr.size();
+ const name = name: {
+ if (hdr.name()) |n| break :name try arena.dupe(u8, n);
+ if (try hdr.nameLength()) |len| {
+ size -= len;
+ const buf = try arena.alloc(u8, len);
+ try reader.readNoEof(buf);
+ const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len;
+ break :name buf[0..actual_len];
+ }
+ unreachable;
+ };
+ defer {
+ _ = stream.seekBy(size) catch {};
+ }
-pub fn parseObject(self: Archive, gpa: Allocator, offset: u32) !Object {
- const reader = self.file.reader();
- try reader.context.seekTo(self.fat_offset + offset);
-
- const object_header = try reader.readStruct(ar_hdr);
-
- const name_or_length = try object_header.nameOrLength();
- const object_name = try parseName(gpa, name_or_length, reader);
- defer gpa.free(object_name);
-
- log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name });
-
- const name = name: {
- var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
- const path = try std.os.realpath(self.name, &buffer);
- break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name });
- };
-
- const object_name_len = switch (name_or_length) {
- .Name => 0,
- .Length => |len| len,
- };
- const object_size = (try object_header.size()) - object_name_len;
- const contents = try gpa.allocWithOptions(u8, object_size, @alignOf(u64), null);
- const amt = try reader.readAll(contents);
- if (amt != object_size) {
- return error.InputOutput;
- }
+ if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue;
- var object = Object{
- .name = name,
- .mtime = object_header.date() catch 0,
- .contents = contents,
- };
+ const object = Object{
+ .archive = self.path,
+ .path = name,
+ .data = self.data[stream.pos..][0..size],
+ .index = undefined,
+ .alive = false,
+ .mtime = hdr.date() catch 0,
+ };
- try object.parse(gpa);
+ log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path });
- return object;
+ try self.objects.append(gpa, object);
+ }
}
-const Archive = @This();
-
-const std = @import("std");
-const assert = std.debug.assert;
-const fs = std.fs;
const log = std.log.scoped(.link);
const macho = std.macho;
const mem = std.mem;
+const std = @import("std");
const Allocator = mem.Allocator;
+const Archive = @This();
+const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
src/link/MachO/Atom.zig
@@ -1,1271 +1,905 @@
-/// Each Atom always gets a symbol with the fully qualified name.
-/// The symbol can reside in any object file context structure in `symtab` array
-/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or
-/// a stub trampoline, it can be found in the linkers `locals` arraylist.
-/// If this field is 0 and file is 0, it means the codegen size = 0 and there is no symbol or
-/// offset table entry.
-sym_index: u32 = 0,
-
-/// 0 means an Atom is a synthetic Atom such as a GOT cell defined by the linker.
-/// Otherwise, it is the index into appropriate object file (indexing from 1).
-/// Prefer using `getFile()` helper to get the file index out rather than using
-/// the field directly.
-file: u32 = 0,
-
-/// If this Atom is not a synthetic Atom, i.e., references a subsection in an
-/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if
-/// this Atom contains any additional symbol references that fall within this Atom's
-/// address range. These could for example be an alias symbol which can be used
-/// internally by the relocation records, or if the Object file couldn't be split
-/// into subsections, this Atom may encompass an entire input section.
-inner_sym_index: u32 = 0,
-inner_nsyms_trailing: u32 = 0,
-
-/// Size and alignment of this atom
-/// Unlike in Elf, we need to store the size of this symbol as part of
-/// the atom since macho.nlist_64 lacks this information.
+/// Address allocated for this Atom.
+value: u64 = 0,
+
+/// Name of this Atom.
+name: u32 = 0,
+
+/// Index into linker's input file table.
+file: File.Index = 0,
+
+/// Size of this atom
size: u64 = 0,
-/// Alignment of this atom as a power of 2.
-/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned.
-alignment: Alignment = .@"1",
+/// Alignment of this atom as a power of two.
+alignment: u32 = 0,
-/// Points to the previous and next neighbours
-/// TODO use the same trick as with symbols: reserve index 0 as null atom
-next_index: ?Index = null,
-prev_index: ?Index = null,
+/// Index of the input section.
+n_sect: u32 = 0,
-pub const Alignment = @import("../../InternPool.zig").Alignment;
+/// Index of the output section.
+out_n_sect: u8 = 0,
-pub const Index = u32;
+/// Offset within the parent section pointed to by n_sect.
+/// off + size <= parent section size.
+off: u64 = 0,
-pub const Binding = struct {
- target: SymbolWithLoc,
- offset: u64,
-};
+/// Relocations of this atom.
+relocs: Loc = .{},
+
+/// Index of this atom in the linker's atoms table.
+atom_index: Index = 0,
-/// Returns `null` if the Atom is a synthetic Atom.
-/// Otherwise, returns an index into an array of Objects.
-pub fn getFile(self: Atom) ?u32 {
- if (self.file == 0) return null;
- return self.file - 1;
+/// Index of the thunk for this atom.
+thunk_index: Thunk.Index = 0,
+
+/// Unwind records associated with this atom.
+unwind_records: Loc = .{},
+
+flags: Flags = .{},
+
+pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 {
+ return macho_file.string_intern.getAssumeExists(self.name);
}
-pub fn getSymbolIndex(self: Atom) ?u32 {
- if (self.getFile() == null and self.sym_index == 0) return null;
- return self.sym_index;
+pub fn getFile(self: Atom, macho_file: *MachO) File {
+ return macho_file.getFile(self.file).?;
}
-/// Returns symbol referencing this atom.
-pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 {
- return self.getSymbolPtr(macho_file).*;
+pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 {
+ return switch (self.getFile(macho_file)) {
+ .dylib => unreachable,
+ inline else => |x| x.sections.items(.header)[self.n_sect],
+ };
}
-/// Returns pointer-to-symbol referencing this atom.
-pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 {
- const sym_index = self.getSymbolIndex().?;
- return macho_file.getSymbolPtr(.{ .sym_index = sym_index, .file = self.file });
+pub fn getInputAddress(self: Atom, macho_file: *MachO) u64 {
+ return self.getInputSection(macho_file).addr + self.off;
}
-pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc {
- const sym_index = self.getSymbolIndex().?;
- return .{ .sym_index = sym_index, .file = self.file };
+pub fn getPriority(self: Atom, macho_file: *MachO) u64 {
+ const file = self.getFile(macho_file);
+ return (@as(u64, @intCast(file.getIndex())) << 32) | @as(u64, @intCast(self.n_sect));
}
-/// Returns the name of this atom.
-pub fn getName(self: Atom, macho_file: *MachO) []const u8 {
- const sym_index = self.getSymbolIndex().?;
- return macho_file.getSymbolName(.{ .sym_index = sym_index, .file = self.file });
+pub fn getCode(self: Atom, macho_file: *MachO) []const u8 {
+ const code = switch (self.getFile(macho_file)) {
+ .dylib => unreachable,
+ inline else => |x| x.getSectionData(self.n_sect),
+ };
+ return code[self.off..][0..self.size];
}
-/// Returns how much room there is to grow in virtual address space.
-/// File offset relocation happens transparently, so it is not included in
-/// this calculation.
-pub fn capacity(self: Atom, macho_file: *MachO) u64 {
- const self_sym = self.getSymbol(macho_file);
- if (self.next_index) |next_index| {
- const next = macho_file.getAtom(next_index);
- const next_sym = next.getSymbol(macho_file);
- return next_sym.n_value - self_sym.n_value;
- } else {
- // We are the last atom.
- // The capacity is limited only by virtual address space.
- return macho_file.allocatedVirtualSize(self_sym.n_value);
- }
+pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation {
+ const relocs = switch (self.getFile(macho_file)) {
+ .dylib => unreachable,
+ inline else => |x| x.sections.items(.relocs)[self.n_sect],
+ };
+ return relocs.items[self.relocs.pos..][0..self.relocs.len];
}
-pub fn freeListEligible(self: Atom, macho_file: *MachO) bool {
- // No need to keep a free list node for the last atom.
- const next_index = self.next_index orelse return false;
- const next = macho_file.getAtom(next_index);
- const self_sym = self.getSymbol(macho_file);
- const next_sym = next.getSymbol(macho_file);
- const cap = next_sym.n_value - self_sym.n_value;
- const ideal_cap = MachO.padToIdeal(self.size);
- if (cap <= ideal_cap) return false;
- const surplus = cap - ideal_cap;
- return surplus >= MachO.min_text_capacity;
+pub fn getUnwindRecords(self: Atom, macho_file: *MachO) []const UnwindInfo.Record.Index {
+ return switch (self.getFile(macho_file)) {
+ .dylib => unreachable,
+ .internal => &[0]UnwindInfo.Record.Index{},
+ .object => |x| x.unwind_records.items[self.unwind_records.pos..][0..self.unwind_records.len],
+ };
}
-pub fn getOutputSection(macho_file: *MachO, sect: macho.section_64) !?u8 {
- const segname = sect.segName();
- const sectname = sect.sectName();
- const res: ?u8 = blk: {
- if (mem.eql(u8, "__LLVM", segname)) {
- log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{
- sect.flags, segname, sectname,
- });
- break :blk null;
- }
+pub fn markUnwindRecordsDead(self: Atom, macho_file: *MachO) void {
+ for (self.getUnwindRecords(macho_file)) |cu_index| {
+ const cu = macho_file.getUnwindRecord(cu_index);
+ cu.alive = false;
- // We handle unwind info separately.
- if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) {
- break :blk null;
- }
- if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) {
- break :blk null;
+ if (cu.getFdePtr(macho_file)) |fde| {
+ fde.alive = false;
}
+ }
+}
- if (sect.isCode()) {
- if (macho_file.text_section_index == null) {
- macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{
- .flags = macho.S_REGULAR |
- macho.S_ATTR_PURE_INSTRUCTIONS |
- macho.S_ATTR_SOME_INSTRUCTIONS,
- });
- }
- break :blk macho_file.text_section_index.?;
- }
+pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk {
+ return macho_file.getThunk(self.thunk_index);
+}
- if (sect.isDebug()) {
- break :blk null;
- }
+pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
+ const segname, const sectname, const flags = blk: {
+ if (sect.isCode()) break :blk .{
+ "__TEXT",
+ "__text",
+ macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
+ };
switch (sect.type()) {
macho.S_4BYTE_LITERALS,
macho.S_8BYTE_LITERALS,
macho.S_16BYTE_LITERALS,
- => {
- break :blk macho_file.getSectionByName("__TEXT", "__const") orelse
- try macho_file.initSection("__TEXT", "__const", .{});
- },
+ => break :blk .{ "__TEXT", "__const", macho.S_REGULAR },
+
macho.S_CSTRING_LITERALS => {
- if (mem.startsWith(u8, sectname, "__objc")) {
- break :blk macho_file.getSectionByName(segname, sectname) orelse
- try macho_file.initSection(segname, sectname, .{});
- }
- break :blk macho_file.getSectionByName("__TEXT", "__cstring") orelse
- try macho_file.initSection("__TEXT", "__cstring", .{
- .flags = macho.S_CSTRING_LITERALS,
- });
+ if (mem.startsWith(u8, sect.sectName(), "__objc")) break :blk .{
+ sect.segName(), sect.sectName(), macho.S_REGULAR,
+ };
+ break :blk .{ "__TEXT", "__cstring", macho.S_CSTRING_LITERALS };
},
+
macho.S_MOD_INIT_FUNC_POINTERS,
macho.S_MOD_TERM_FUNC_POINTERS,
- => {
- break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse
- try macho_file.initSection("__DATA_CONST", sectname, .{
- .flags = sect.flags,
- });
- },
+ => break :blk .{ "__DATA_CONST", sect.sectName(), sect.flags },
+
macho.S_LITERAL_POINTERS,
macho.S_ZEROFILL,
+ macho.S_GB_ZEROFILL,
macho.S_THREAD_LOCAL_VARIABLES,
macho.S_THREAD_LOCAL_VARIABLE_POINTERS,
macho.S_THREAD_LOCAL_REGULAR,
macho.S_THREAD_LOCAL_ZEROFILL,
- => {
- break :blk macho_file.getSectionByName(segname, sectname) orelse
- try macho_file.initSection(segname, sectname, .{
- .flags = sect.flags,
- });
- },
- macho.S_COALESCED => {
- break :blk macho_file.getSectionByName(segname, sectname) orelse
- try macho_file.initSection(segname, sectname, .{});
+ => break :blk .{ sect.segName(), sect.sectName(), sect.flags },
+
+ macho.S_COALESCED => break :blk .{
+ sect.segName(),
+ sect.sectName(),
+ macho.S_REGULAR,
},
+
macho.S_REGULAR => {
- if (mem.eql(u8, segname, "__TEXT")) {
- if (mem.eql(u8, sectname, "__rodata") or
- mem.eql(u8, sectname, "__typelink") or
- mem.eql(u8, sectname, "__itablink") or
- mem.eql(u8, sectname, "__gosymtab") or
- mem.eql(u8, sectname, "__gopclntab"))
- {
- break :blk macho_file.getSectionByName("__TEXT", sectname) orelse
- try macho_file.initSection("__TEXT", sectname, .{});
- }
- }
+ const segname = sect.segName();
+ const sectname = sect.sectName();
if (mem.eql(u8, segname, "__DATA")) {
if (mem.eql(u8, sectname, "__const") or
mem.eql(u8, sectname, "__cfstring") or
mem.eql(u8, sectname, "__objc_classlist") or
- mem.eql(u8, sectname, "__objc_imageinfo"))
- {
- break :blk macho_file.getSectionByName("__DATA_CONST", sectname) orelse
- try macho_file.initSection("__DATA_CONST", sectname, .{});
- } else if (mem.eql(u8, sectname, "__data")) {
- if (macho_file.data_section_index == null) {
- macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{});
- }
- break :blk macho_file.data_section_index.?;
- }
+ mem.eql(u8, sectname, "__objc_imageinfo")) break :blk .{
+ "__DATA_CONST",
+ sectname,
+ macho.S_REGULAR,
+ };
}
- break :blk macho_file.getSectionByName(segname, sectname) orelse
- try macho_file.initSection(segname, sectname, .{});
+ break :blk .{ segname, sectname, sect.flags };
},
- else => break :blk null,
- }
- };
- // TODO we can do this directly in the selection logic above.
- // Or is it not worth it?
- if (macho_file.data_const_section_index == null) {
- if (macho_file.getSectionByName("__DATA_CONST", "__const")) |index| {
- macho_file.data_const_section_index = index;
- }
- }
- if (macho_file.thread_vars_section_index == null) {
- if (macho_file.getSectionByName("__DATA", "__thread_vars")) |index| {
- macho_file.thread_vars_section_index = index;
- }
- }
- if (macho_file.thread_data_section_index == null) {
- if (macho_file.getSectionByName("__DATA", "__thread_data")) |index| {
- macho_file.thread_data_section_index = index;
- }
- }
- if (macho_file.thread_bss_section_index == null) {
- if (macho_file.getSectionByName("__DATA", "__thread_bss")) |index| {
- macho_file.thread_bss_section_index = index;
- }
- }
- if (macho_file.bss_section_index == null) {
- if (macho_file.getSectionByName("__DATA", "__bss")) |index| {
- macho_file.bss_section_index = index;
+ else => break :blk .{ sect.segName(), sect.sectName(), sect.flags },
}
- }
-
- return res;
-}
-
-pub fn addRelocation(macho_file: *MachO, atom_index: Index, reloc: Relocation) !void {
- return addRelocations(macho_file, atom_index, &[_]Relocation{reloc});
-}
-
-pub fn addRelocations(macho_file: *MachO, atom_index: Index, relocs: []const Relocation) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const gop = try macho_file.relocs.getOrPut(gpa, atom_index);
- if (!gop.found_existing) {
- gop.value_ptr.* = .{};
- }
- try gop.value_ptr.ensureUnusedCapacity(gpa, relocs.len);
- for (relocs) |reloc| {
- log.debug(" (adding reloc of type {s} to target %{d})", .{
- @tagName(reloc.type),
- reloc.target.sym_index,
- });
- gop.value_ptr.appendAssumeCapacity(reloc);
- }
-}
-
-pub fn addRebase(macho_file: *MachO, atom_index: Index, offset: u32) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const atom = macho_file.getAtom(atom_index);
- log.debug(" (adding rebase at offset 0x{x} in %{?d})", .{ offset, atom.getSymbolIndex() });
- const gop = try macho_file.rebases.getOrPut(gpa, atom_index);
- if (!gop.found_existing) {
- gop.value_ptr.* = .{};
- }
- try gop.value_ptr.append(gpa, offset);
-}
-
-pub fn addBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const atom = macho_file.getAtom(atom_index);
- log.debug(" (adding binding to symbol {s} at offset 0x{x} in %{?d})", .{
- macho_file.getSymbolName(binding.target),
- binding.offset,
- atom.getSymbolIndex(),
- });
- const gop = try macho_file.bindings.getOrPut(gpa, atom_index);
- if (!gop.found_existing) {
- gop.value_ptr.* = .{};
- }
- try gop.value_ptr.append(gpa, binding);
-}
-
-pub fn resolveRelocations(
- macho_file: *MachO,
- atom_index: Index,
- relocs: []*const Relocation,
- code: []u8,
-) void {
- relocs_log.debug("relocating '{s}'", .{macho_file.getAtom(atom_index).getName(macho_file)});
- for (relocs) |reloc| {
- reloc.resolve(macho_file, atom_index, code);
- }
-}
-
-pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- var removed_relocs = macho_file.relocs.fetchOrderedRemove(atom_index);
- if (removed_relocs) |*relocs| relocs.value.deinit(gpa);
- var removed_rebases = macho_file.rebases.fetchOrderedRemove(atom_index);
- if (removed_rebases) |*rebases| rebases.value.deinit(gpa);
- var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index);
- if (removed_bindings) |*bindings| bindings.value.deinit(gpa);
-}
-
-const InnerSymIterator = struct {
- sym_index: u32,
- nsyms: u32,
- file: u32,
- pos: u32 = 0,
-
- pub fn next(it: *@This()) ?SymbolWithLoc {
- if (it.pos == it.nsyms) return null;
- const res = SymbolWithLoc{ .sym_index = it.sym_index + it.pos, .file = it.file };
- it.pos += 1;
- return res;
- }
-};
-
-/// Returns an iterator over potentially contained symbols.
-/// Panics when called on a synthetic Atom.
-pub fn getInnerSymbolsIterator(macho_file: *MachO, atom_index: Index) InnerSymIterator {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null);
- return .{
- .sym_index = atom.inner_sym_index,
- .nsyms = atom.inner_nsyms_trailing,
- .file = atom.file,
};
-}
-
-/// Returns a section alias symbol if one is defined.
-/// An alias symbol is used to represent the start of an input section
-/// if there were no symbols defined within that range.
-/// Alias symbols are only used on x86_64.
-pub fn getSectionAlias(macho_file: *MachO, atom_index: Index) ?SymbolWithLoc {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null);
-
- const object = macho_file.objects.items[atom.getFile().?];
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const ntotal = @as(u32, @intCast(object.symtab.len));
- var sym_index: u32 = nbase;
- while (sym_index < ntotal) : (sym_index += 1) {
- if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| {
- if (other_atom_index == atom_index) return SymbolWithLoc{
- .sym_index = sym_index,
- .file = atom.file,
- };
- }
+ const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection(
+ segname,
+ sectname,
+ .{ .flags = flags },
+ );
+ if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) {
+ macho_file.data_sect_index = osec;
}
- return null;
+ return osec;
}
-/// Given an index into a contained symbol within, calculates an offset wrt
-/// the start of this Atom.
-pub fn calcInnerSymbolOffset(macho_file: *MachO, atom_index: Index, sym_index: u32) u64 {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null);
-
- if (atom.sym_index == sym_index) return 0;
-
- const object = macho_file.objects.items[atom.getFile().?];
- const source_sym = object.getSourceSymbol(sym_index).?;
- const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym|
- sym.n_value
- else blk: {
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- const source_sect = object.getSourceSection(sect_id);
- break :blk source_sect.addr;
- };
- return source_sym.n_value - base_addr;
-}
-
-pub fn scanAtomRelocs(macho_file: *MachO, atom_index: Index, relocs: []align(1) const macho.relocation_info) !void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const arch = target.cpu.arch;
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null); // synthetic atoms do not have relocs
-
- return switch (arch) {
- .aarch64 => scanAtomRelocsArm64(macho_file, atom_index, relocs),
- .x86_64 => scanAtomRelocsX86(macho_file, atom_index, relocs),
- else => unreachable,
- };
-}
-
-const RelocContext = struct {
- base_addr: i64 = 0,
- base_offset: i32 = 0,
-};
-
-pub fn getRelocContext(macho_file: *MachO, atom_index: Index) RelocContext {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null); // synthetic atoms do not have relocs
-
- const object = macho_file.objects.items[atom.getFile().?];
- if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
- const source_sect = object.getSourceSection(source_sym.n_sect - 1);
- return .{
- .base_addr = @as(i64, @intCast(source_sect.addr)),
- .base_offset = @as(i32, @intCast(source_sym.n_value - source_sect.addr)),
- };
- }
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- const source_sect = object.getSourceSection(sect_id);
- return .{
- .base_addr = @as(i64, @intCast(source_sect.addr)),
- .base_offset = 0,
- };
-}
-
-pub fn parseRelocTarget(macho_file: *MachO, ctx: struct {
- object_id: u32,
- rel: macho.relocation_info,
- code: []const u8,
- base_addr: i64 = 0,
- base_offset: i32 = 0,
-}) SymbolWithLoc {
+pub fn scanRelocs(self: Atom, macho_file: *MachO) !void {
const tracy = trace(@src());
defer tracy.end();
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const object = &macho_file.objects.items[ctx.object_id];
- log.debug("parsing reloc target in object({d}) '{s}' ", .{ ctx.object_id, object.name });
-
- const sym_index = if (ctx.rel.r_extern == 0) sym_index: {
- const sect_id = @as(u8, @intCast(ctx.rel.r_symbolnum - 1));
- const rel_offset = @as(u32, @intCast(ctx.rel.r_address - ctx.base_offset));
-
- const address_in_section = if (ctx.rel.r_pcrel == 0) blk: {
- break :blk if (ctx.rel.r_length == 3)
- mem.readInt(u64, ctx.code[rel_offset..][0..8], .little)
- else
- mem.readInt(u32, ctx.code[rel_offset..][0..4], .little);
- } else blk: {
- assert(target.cpu.arch == .x86_64);
- const correction: u3 = switch (@as(macho.reloc_type_x86_64, @enumFromInt(ctx.rel.r_type))) {
- .X86_64_RELOC_SIGNED => 0,
- .X86_64_RELOC_SIGNED_1 => 1,
- .X86_64_RELOC_SIGNED_2 => 2,
- .X86_64_RELOC_SIGNED_4 => 4,
- else => unreachable,
- };
- const addend = mem.readInt(i32, ctx.code[rel_offset..][0..4], .little);
- const target_address = @as(i64, @intCast(ctx.base_addr)) + ctx.rel.r_address + 4 + correction + addend;
- break :blk @as(u64, @intCast(target_address));
- };
-
- // Find containing atom
- log.debug(" | locating symbol by address @{x} in section {d}", .{ address_in_section, sect_id });
- break :sym_index object.getSymbolByAddress(address_in_section, sect_id);
- } else object.reverse_symtab_lookup[ctx.rel.r_symbolnum];
-
- const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = ctx.object_id + 1 };
- const sym = macho_file.getSymbol(sym_loc);
- const reloc_target = if (sym.sect() and !sym.ext())
- sym_loc
- else if (object.getGlobal(sym_index)) |global_index|
- macho_file.globals.items[global_index]
- else
- sym_loc;
- log.debug(" | target %{d} ('{s}') in object({?d})", .{
- reloc_target.sym_index,
- macho_file.getSymbolName(reloc_target),
- reloc_target.getFile(),
- });
- return reloc_target;
-}
-
-pub fn getRelocTargetAtomIndex(macho_file: *MachO, target: SymbolWithLoc) ?Index {
- if (target.getFile() == null) {
- const target_sym_name = macho_file.getSymbolName(target);
- if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null;
- if (mem.eql(u8, "___dso_handle", target_sym_name)) return null;
+ const object = self.getFile(macho_file).object;
+ const relocs = self.getRelocs(macho_file);
- unreachable; // referenced symbol not found
- }
-
- const object = macho_file.objects.items[target.getFile().?];
- return object.getAtomIndexForSymbol(target.sym_index);
-}
-
-fn scanAtomRelocsArm64(
- macho_file: *MachO,
- atom_index: Index,
- relocs: []align(1) const macho.relocation_info,
-) !void {
for (relocs) |rel| {
- const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
-
- switch (rel_type) {
- .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue,
- else => {},
- }
-
- if (rel.r_extern == 0) continue;
-
- const atom = macho_file.getAtom(atom_index);
- const object = &macho_file.objects.items[atom.getFile().?];
- const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum];
- const sym_loc = SymbolWithLoc{
- .sym_index = sym_index,
- .file = atom.file,
- };
+ if (try self.reportUndefSymbol(rel, macho_file)) continue;
+
+ switch (rel.type) {
+ .branch => {
+ const symbol = rel.getTargetSymbol(macho_file);
+ if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) {
+ symbol.flags.stubs = true;
+ if (symbol.flags.weak) {
+ macho_file.binds_to_weak = true;
+ }
+ } else if (mem.startsWith(u8, symbol.getName(macho_file), "_objc_msgSend$")) {
+ symbol.flags.objc_stubs = true;
+ }
+ },
- const target = if (object.getGlobal(sym_index)) |global_index|
- macho_file.globals.items[global_index]
- else
- sym_loc;
+ .got_load,
+ .got_load_page,
+ .got_load_pageoff,
+ => {
+ const symbol = rel.getTargetSymbol(macho_file);
+ if (symbol.flags.import or
+ (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable)) or
+ macho_file.options.cpu_arch.? == .aarch64) // TODO relax on arm64
+ {
+ symbol.flags.got = true;
+ if (symbol.flags.weak) {
+ macho_file.binds_to_weak = true;
+ }
+ }
+ },
- switch (rel_type) {
- .ARM64_RELOC_BRANCH26 => {
- // TODO rewrite relocation
- const sym = macho_file.getSymbol(target);
- if (sym.undf()) try macho_file.addStubEntry(target);
+ .got => {
+ rel.getTargetSymbol(macho_file).flags.got = true;
},
- .ARM64_RELOC_GOT_LOAD_PAGE21,
- .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
- .ARM64_RELOC_POINTER_TO_GOT,
+
+ .tlv,
+ .tlvp_page,
+ .tlvp_pageoff,
=> {
- // TODO rewrite relocation
- try macho_file.addGotEntry(target);
+ const symbol = rel.getTargetSymbol(macho_file);
+ if (!symbol.flags.tlv) {
+ macho_file.base.fatal(
+ "{}: {s}: illegal thread-local variable reference to regular symbol {s}",
+ .{ object.fmtPath(), self.getName(macho_file), symbol.getName(macho_file) },
+ );
+ }
+ if (symbol.flags.import or (symbol.flags.@"export" and (symbol.flags.weak or symbol.flags.interposable))) {
+ symbol.flags.tlv_ptr = true;
+ if (symbol.flags.weak) {
+ macho_file.binds_to_weak = true;
+ }
+ }
},
- .ARM64_RELOC_TLVP_LOAD_PAGE21,
- .ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
- => {
- const sym = macho_file.getSymbol(target);
- if (sym.undf()) try macho_file.addTlvPtrEntry(target);
+
+ .unsigned => {
+ if (rel.meta.length == 3) { // TODO this really should check if this is pointer width
+ if (rel.tag == .@"extern") {
+ const symbol = rel.getTargetSymbol(macho_file);
+ if (symbol.isTlvInit(macho_file)) {
+ macho_file.has_tlv = true;
+ continue;
+ }
+ if (symbol.flags.import) {
+ object.num_bind_relocs += 1;
+ if (symbol.flags.weak) {
+ object.num_weak_bind_relocs += 1;
+ macho_file.binds_to_weak = true;
+ }
+ continue;
+ }
+ if (symbol.flags.@"export") {
+ if (symbol.flags.weak) {
+ object.num_weak_bind_relocs += 1;
+ macho_file.binds_to_weak = true;
+ } else if (symbol.flags.interposable) {
+ object.num_bind_relocs += 1;
+ }
+ }
+ }
+ object.num_rebase_relocs += 1;
+ }
},
+
else => {},
}
}
}
-fn scanAtomRelocsX86(
- macho_file: *MachO,
- atom_index: Index,
- relocs: []align(1) const macho.relocation_info,
-) !void {
- for (relocs) |rel| {
- const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
+fn reportUndefSymbol(self: Atom, rel: Relocation, macho_file: *MachO) !bool {
+ if (rel.tag == .local) return false;
- switch (rel_type) {
- .X86_64_RELOC_SUBTRACTOR => continue,
- else => {},
+ const sym = rel.getTargetSymbol(macho_file);
+ if (sym.getFile(macho_file) == null) {
+ const gpa = macho_file.base.allocator;
+ const gop = try macho_file.undefs.getOrPut(gpa, rel.target);
+ if (!gop.found_existing) {
+ gop.value_ptr.* = .{};
}
+ try gop.value_ptr.append(gpa, self.atom_index);
+ return true;
+ }
- if (rel.r_extern == 0) continue;
+ return false;
+}
- const atom = macho_file.getAtom(atom_index);
- const object = &macho_file.objects.items[atom.getFile().?];
- const sym_index = object.reverse_symtab_lookup[rel.r_symbolnum];
- const sym_loc = SymbolWithLoc{
- .sym_index = sym_index,
- .file = atom.file,
- };
+pub fn resolveRelocs(self: Atom, macho_file: *MachO, buffer: []u8) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
- const target = if (object.getGlobal(sym_index)) |global_index|
- macho_file.globals.items[global_index]
- else
- sym_loc;
+ assert(!self.getInputSection(macho_file).isZerofill());
+ const relocs = self.getRelocs(macho_file);
+ const file = self.getFile(macho_file);
+ const name = self.getName(macho_file);
+ @memcpy(buffer, self.getCode(macho_file));
- switch (rel_type) {
- .X86_64_RELOC_BRANCH => {
- // TODO rewrite relocation
- const sym = macho_file.getSymbol(target);
- if (sym.undf()) try macho_file.addStubEntry(target);
- },
- .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => {
- // TODO rewrite relocation
- try macho_file.addGotEntry(target);
- },
- .X86_64_RELOC_TLV => {
- const sym = macho_file.getSymbol(target);
- if (sym.undf()) try macho_file.addTlvPtrEntry(target);
- },
- else => {},
- }
- }
-}
+ relocs_log.debug("{x}: {s}", .{ self.value, name });
-pub fn resolveRelocs(
- macho_file: *MachO,
- atom_index: Index,
- atom_code: []u8,
- atom_relocs: []align(1) const macho.relocation_info,
-) !void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const arch = target.cpu.arch;
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null); // synthetic atoms do not have relocs
-
- relocs_log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{
- atom.sym_index,
- macho_file.getSymbolName(atom.getSymbolWithLoc()),
- });
+ var stream = std.io.fixedBufferStream(buffer);
- const ctx = getRelocContext(macho_file, atom_index);
+ var i: usize = 0;
+ while (i < relocs.len) : (i += 1) {
+ const rel = relocs[i];
+ const rel_offset = rel.offset - self.off;
+ const subtractor = if (rel.meta.has_subtractor) relocs[i - 1] else null;
- return switch (arch) {
- .aarch64 => resolveRelocsArm64(macho_file, atom_index, atom_code, atom_relocs, ctx),
- .x86_64 => resolveRelocsX86(macho_file, atom_index, atom_code, atom_relocs, ctx),
- else => unreachable,
- };
-}
-
-pub fn getRelocTargetAddress(macho_file: *MachO, target: SymbolWithLoc, is_tlv: bool) u64 {
- const target_atom_index = getRelocTargetAtomIndex(macho_file, target) orelse {
- // If there is no atom for target, we still need to check for special, atom-less
- // symbols such as `___dso_handle`.
- const target_name = macho_file.getSymbolName(target);
- const atomless_sym = macho_file.getSymbol(target);
- log.debug(" | atomless target '{s}'", .{target_name});
- return atomless_sym.n_value;
- };
- const target_atom = macho_file.getAtom(target_atom_index);
- log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{
- target_atom.sym_index,
- macho_file.getSymbolName(target_atom.getSymbolWithLoc()),
- target_atom.getFile(),
- });
+ if (rel.tag == .@"extern") {
+ if (rel.getTargetSymbol(macho_file).getFile(macho_file) == null) continue;
+ }
- const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc());
- assert(target_sym.n_desc != MachO.N_DEAD);
-
- // If `target` is contained within the target atom, pull its address value.
- const offset = if (target_atom.getFile() != null) blk: {
- const object = macho_file.objects.items[target_atom.getFile().?];
- break :blk if (object.getSourceSymbol(target.sym_index)) |_|
- Atom.calcInnerSymbolOffset(macho_file, target_atom_index, target.sym_index)
- else
- 0; // section alias
- } else 0;
- const base_address: u64 = if (is_tlv) base_address: {
- // For TLV relocations, the value specified as a relocation is the displacement from the
- // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
- // defined TLV template init section in the following order:
- // * wrt to __thread_data if defined, then
- // * wrt to __thread_bss
- // TODO remember to check what the mechanism was prior to HAS_TLV_INITIALIZERS in earlier versions of macOS
- const sect_id: u16 = sect_id: {
- if (macho_file.thread_data_section_index) |i| {
- break :sect_id i;
- } else if (macho_file.thread_bss_section_index) |i| {
- break :sect_id i;
- } else break :base_address 0;
+ try stream.seekTo(rel_offset);
+ self.resolveRelocInner(rel, subtractor, buffer, macho_file, stream.writer()) catch |err| {
+ switch (err) {
+ error.RelaxFail => macho_file.base.fatal(
+ "{}: {s}: 0x{x}: failed to relax relocation: in {s}",
+ .{ file.fmtPath(), name, rel.offset, @tagName(rel.type) },
+ ),
+ else => |e| return e,
+ }
+ return error.ResolveFailed;
};
- break :base_address macho_file.sections.items(.header)[sect_id].addr;
- } else 0;
- return target_sym.n_value + offset - base_address;
+ }
}
-fn resolveRelocsArm64(
- macho_file: *MachO,
- atom_index: Index,
- atom_code: []u8,
- atom_relocs: []align(1) const macho.relocation_info,
- context: RelocContext,
-) !void {
- const atom = macho_file.getAtom(atom_index);
- const object = macho_file.objects.items[atom.getFile().?];
-
- var addend: ?i64 = null;
- var subtractor: ?SymbolWithLoc = null;
-
- for (atom_relocs) |rel| {
- const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
-
- switch (rel_type) {
- .ARM64_RELOC_ADDEND => {
- assert(addend == null);
+const ResolveError = error{
+ RelaxFail,
+ NoSpaceLeft,
+ DivisionByZero,
+ UnexpectedRemainder,
+ Overflow,
+};
- relocs_log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum });
+fn resolveRelocInner(
+ self: Atom,
+ rel: Relocation,
+ subtractor: ?Relocation,
+ code: []u8,
+ macho_file: *MachO,
+ writer: anytype,
+) ResolveError!void {
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const rel_offset = rel.offset - self.off;
+ const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect];
+ const seg = macho_file.segments.items[seg_id];
+ const P = @as(i64, @intCast(self.value)) + @as(i64, @intCast(rel_offset));
+ const A = rel.addend + rel.getRelocAddend(cpu_arch);
+ const S: i64 = @intCast(rel.getTargetAddress(macho_file));
+ const G: i64 = @intCast(rel.getGotTargetAddress(macho_file));
+ const TLS = @as(i64, @intCast(macho_file.getTlsAddress()));
+ const SUB = if (subtractor) |sub| @as(i64, @intCast(sub.getTargetAddress(macho_file))) else 0;
+
+ switch (rel.tag) {
+ .local => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] atom({d})", .{
+ P,
+ rel_offset,
+ @tagName(rel.type),
+ S + A - SUB,
+ rel.getTargetAtom(macho_file).atom_index,
+ }),
+ .@"extern" => relocs_log.debug(" {x}<+{d}>: {s}: [=> {x}] G({x}) ({s})", .{
+ P,
+ rel_offset,
+ @tagName(rel.type),
+ S + A - SUB,
+ G + A,
+ rel.getTargetSymbol(macho_file).getName(macho_file),
+ }),
+ }
- addend = rel.r_symbolnum;
- continue;
- },
- .ARM64_RELOC_SUBTRACTOR => {
- assert(subtractor == null);
-
- relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{
- @tagName(rel_type),
- rel.r_address,
- rel.r_symbolnum,
- atom.getFile(),
+ switch (rel.type) {
+ .subtractor => {},
+
+ .unsigned => {
+ assert(!rel.meta.pcrel);
+ if (rel.meta.length == 3) {
+ if (rel.tag == .@"extern") {
+ const sym = rel.getTargetSymbol(macho_file);
+ if (sym.isTlvInit(macho_file)) {
+ try writer.writeInt(u64, @intCast(S - TLS), .little);
+ return;
+ }
+ const entry = bind.Entry{
+ .target = rel.target,
+ .offset = @as(u64, @intCast(P)) - seg.vmaddr,
+ .segment_id = seg_id,
+ .addend = A,
+ };
+ if (sym.flags.import) {
+ macho_file.bind.entries.appendAssumeCapacity(entry);
+ if (sym.flags.weak) {
+ macho_file.weak_bind.entries.appendAssumeCapacity(entry);
+ }
+ return;
+ }
+ if (sym.flags.@"export") {
+ if (sym.flags.weak) {
+ macho_file.weak_bind.entries.appendAssumeCapacity(entry);
+ } else if (sym.flags.interposable) {
+ macho_file.bind.entries.appendAssumeCapacity(entry);
+ }
+ }
+ }
+ macho_file.rebase.entries.appendAssumeCapacity(.{
+ .offset = @as(u64, @intCast(P)) - seg.vmaddr,
+ .segment_id = seg_id,
});
+ try writer.writeInt(u64, @bitCast(S + A - SUB), .little);
+ } else if (rel.meta.length == 2) {
+ try writer.writeInt(u32, @bitCast(@as(i32, @truncate(S + A - SUB))), .little);
+ } else unreachable;
+ },
- subtractor = parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = atom_code,
- .base_addr = context.base_addr,
- .base_offset = context.base_offset,
- });
- continue;
- },
- else => {},
- }
+ .got => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ try writer.writeInt(i32, @intCast(G + A - P), .little);
+ },
- const target = parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = atom_code,
- .base_addr = context.base_addr,
- .base_offset = context.base_offset,
- });
- const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset));
-
- relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
- @tagName(rel_type),
- rel.r_address,
- target.sym_index,
- macho_file.getSymbolName(target),
- target.getFile(),
- });
-
- const source_addr = blk: {
- const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- break :blk source_sym.n_value + rel_offset;
- };
- const target_addr = blk: {
- if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?;
- if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf())
- break :blk macho_file.getTlvPtrEntryAddress(target).?;
- if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf())
- break :blk macho_file.getStubsEntryAddress(target).?;
- const is_tlv = is_tlv: {
- const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const header = macho_file.sections.items(.header)[source_sym.n_sect - 1];
- break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
- };
- break :blk getRelocTargetAddress(macho_file, target, is_tlv);
- };
+ .branch => {
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ assert(rel.tag == .@"extern");
+
+ switch (cpu_arch) {
+ .x86_64 => try writer.writeInt(i32, @intCast(S + A - P), .little),
+ .aarch64 => {
+ const disp: i28 = math.cast(i28, S + A - P) orelse blk: {
+ const thunk = self.getThunk(macho_file);
+ const S_: i64 = @intCast(thunk.getAddress(rel.target));
+ break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow;
+ };
+ var inst = aarch64.Instruction{
+ .unconditional_branch_immediate = mem.bytesToValue(std.meta.TagPayload(
+ aarch64.Instruction,
+ aarch64.Instruction.unconditional_branch_immediate,
+ ), code[rel_offset..][0..4]),
+ };
+ inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(disp >> 2))));
+ try writer.writeInt(u32, inst.toU32(), .little);
+ },
+ else => unreachable,
+ }
+ },
- relocs_log.debug(" | source_addr = 0x{x}", .{source_addr});
+ .got_load => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ if (rel.getTargetSymbol(macho_file).flags.got) {
+ try writer.writeInt(i32, @intCast(G + A - P), .little);
+ } else {
+ try relaxGotLoad(code[rel_offset - 3 ..]);
+ try writer.writeInt(i32, @intCast(S + A - P), .little);
+ }
+ },
- switch (rel_type) {
- .ARM64_RELOC_BRANCH26 => {
- relocs_log.debug(" source {s} (object({?})), target {s}", .{
- macho_file.getSymbolName(atom.getSymbolWithLoc()),
- atom.getFile(),
- macho_file.getSymbolName(target),
- });
+ .tlv => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ const sym = rel.getTargetSymbol(macho_file);
+ if (sym.flags.tlv_ptr) {
+ const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file));
+ try writer.writeInt(i32, @intCast(S_ + A - P), .little);
+ } else {
+ try relaxTlv(code[rel_offset - 3 ..]);
+ try writer.writeInt(i32, @intCast(S + A - P), .little);
+ }
+ },
- const displacement = if (Relocation.calcPcRelativeDisplacementArm64(
- source_addr,
- target_addr,
- )) |disp| blk: {
- relocs_log.debug(" | target_addr = 0x{x}", .{target_addr});
- break :blk disp;
- } else |_| blk: {
- const thunk_index = macho_file.thunk_table.get(atom_index).?;
- const thunk = macho_file.thunks.items[thunk_index];
- const thunk_sym_loc = if (macho_file.getSymbol(target).undf())
- thunk.getTrampoline(macho_file, .stub, target).?
- else
- thunk.getTrampoline(macho_file, .atom, target).?;
- const thunk_addr = macho_file.getSymbol(thunk_sym_loc).n_value;
- relocs_log.debug(" | target_addr = 0x{x} (thunk)", .{thunk_addr});
- break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_addr);
+ .signed, .signed1, .signed2, .signed4 => {
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ try writer.writeInt(i32, @intCast(S + A - P), .little);
+ },
+
+ .page,
+ .got_load_page,
+ .tlvp_page,
+ => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(rel.meta.pcrel);
+ const sym = rel.getTargetSymbol(macho_file);
+ const source = math.cast(u64, P) orelse return error.Overflow;
+ const target = target: {
+ const target = switch (rel.type) {
+ .page => S + A,
+ .got_load_page => G + A,
+ .tlvp_page => if (sym.flags.tlv_ptr) blk: {
+ const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file));
+ break :blk S_ + A;
+ } else S + A,
+ else => unreachable,
};
+ break :target math.cast(u64, target) orelse return error.Overflow;
+ };
+ const pages = @as(u21, @bitCast(try Relocation.calcNumberOfPages(source, target)));
+ var inst = aarch64.Instruction{
+ .pc_relative_address = mem.bytesToValue(std.meta.TagPayload(
+ aarch64.Instruction,
+ aarch64.Instruction.pc_relative_address,
+ ), code[rel_offset..][0..4]),
+ };
+ inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2));
+ inst.pc_relative_address.immlo = @as(u2, @truncate(pages));
+ try writer.writeInt(u32, inst.toU32(), .little);
+ },
- const code = atom_code[rel_offset..][0..4];
+ .pageoff => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(!rel.meta.pcrel);
+ const target = math.cast(u64, S + A) orelse return error.Overflow;
+ const inst_code = code[rel_offset..][0..4];
+ if (Relocation.isArithmeticOp(inst_code)) {
+ const off = try Relocation.calcPageOffset(target, .arithmetic);
var inst = aarch64.Instruction{
- .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload(
+ .add_subtract_immediate = mem.bytesToValue(std.meta.TagPayload(
aarch64.Instruction,
- aarch64.Instruction.unconditional_branch_immediate,
- ), code),
+ aarch64.Instruction.add_subtract_immediate,
+ ), inst_code),
};
- inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2))));
- mem.writeInt(u32, code, inst.toU32(), .little);
- },
-
- .ARM64_RELOC_PAGE21,
- .ARM64_RELOC_GOT_LOAD_PAGE21,
- .ARM64_RELOC_TLVP_LOAD_PAGE21,
- => {
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
-
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
-
- const pages = @as(u21, @bitCast(Relocation.calcNumberOfPages(source_addr, adjusted_target_addr)));
- const code = atom_code[rel_offset..][0..4];
+ inst.add_subtract_immediate.imm12 = off;
+ try writer.writeInt(u32, inst.toU32(), .little);
+ } else {
var inst = aarch64.Instruction{
- .pc_relative_address = mem.bytesToValue(meta.TagPayload(
+ .load_store_register = mem.bytesToValue(std.meta.TagPayload(
aarch64.Instruction,
- aarch64.Instruction.pc_relative_address,
- ), code),
+ aarch64.Instruction.load_store_register,
+ ), inst_code),
};
- inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2));
- inst.pc_relative_address.immlo = @as(u2, @truncate(pages));
- mem.writeInt(u32, code, inst.toU32(), .little);
- addend = null;
- },
+ const off = try Relocation.calcPageOffset(target, switch (inst.load_store_register.size) {
+ 0 => if (inst.load_store_register.v == 1)
+ Relocation.PageOffsetInstKind.load_store_128
+ else
+ Relocation.PageOffsetInstKind.load_store_8,
+ 1 => .load_store_16,
+ 2 => .load_store_32,
+ 3 => .load_store_64,
+ });
+ inst.load_store_register.offset = off;
+ try writer.writeInt(u32, inst.toU32(), .little);
+ }
+ },
- .ARM64_RELOC_PAGEOFF12 => {
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
+ .got_load_pageoff => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(!rel.meta.pcrel);
+ const target = math.cast(u64, G + A) orelse return error.Overflow;
+ const off = try Relocation.calcPageOffset(target, .load_store_64);
+ var inst: aarch64.Instruction = .{
+ .load_store_register = mem.bytesToValue(std.meta.TagPayload(
+ aarch64.Instruction,
+ aarch64.Instruction.load_store_register,
+ ), code[rel_offset..][0..4]),
+ };
+ inst.load_store_register.offset = off;
+ try writer.writeInt(u32, inst.toU32(), .little);
+ },
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
+ .tlvp_pageoff => {
+ assert(rel.tag == .@"extern");
+ assert(rel.meta.length == 2);
+ assert(!rel.meta.pcrel);
+
+ const sym = rel.getTargetSymbol(macho_file);
+ const target = target: {
+ const target = if (sym.flags.tlv_ptr) blk: {
+ const S_: i64 = @intCast(sym.getTlvPtrAddress(macho_file));
+ break :blk S_ + A;
+ } else S + A;
+ break :target math.cast(u64, target) orelse return error.Overflow;
+ };
- const code = atom_code[rel_offset..][0..4];
- if (Relocation.isArithmeticOp(code)) {
- const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic);
- var inst = aarch64.Instruction{
- .add_subtract_immediate = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.add_subtract_immediate,
- ), code),
+ const RegInfo = struct {
+ rd: u5,
+ rn: u5,
+ size: u2,
+ };
+
+ const inst_code = code[rel_offset..][0..4];
+ const reg_info: RegInfo = blk: {
+ if (Relocation.isArithmeticOp(inst_code)) {
+ const inst = mem.bytesToValue(std.meta.TagPayload(
+ aarch64.Instruction,
+ aarch64.Instruction.add_subtract_immediate,
+ ), inst_code);
+ break :blk .{
+ .rd = inst.rd,
+ .rn = inst.rn,
+ .size = inst.sf,
};
- inst.add_subtract_immediate.imm12 = off;
- mem.writeInt(u32, code, inst.toU32(), .little);
} else {
- var inst = aarch64.Instruction{
- .load_store_register = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code),
+ const inst = mem.bytesToValue(std.meta.TagPayload(
+ aarch64.Instruction,
+ aarch64.Instruction.load_store_register,
+ ), inst_code);
+ break :blk .{
+ .rd = inst.rt,
+ .rn = inst.rn,
+ .size = inst.size,
};
- const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) {
- 0 => if (inst.load_store_register.v == 1)
- Relocation.PageOffsetInstKind.load_store_128
- else
- Relocation.PageOffsetInstKind.load_store_8,
- 1 => .load_store_16,
- 2 => .load_store_32,
- 3 => .load_store_64,
- });
- inst.load_store_register.offset = off;
- mem.writeInt(u32, code, inst.toU32(), .little);
}
- addend = null;
- },
-
- .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => {
- const code = atom_code[rel_offset..][0..4];
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
+ };
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
+ var inst = if (sym.flags.tlv_ptr) aarch64.Instruction{
+ .load_store_register = .{
+ .rt = reg_info.rd,
+ .rn = reg_info.rn,
+ .offset = try Relocation.calcPageOffset(target, .load_store_64),
+ .opc = 0b01,
+ .op1 = 0b01,
+ .v = 0,
+ .size = reg_info.size,
+ },
+ } else aarch64.Instruction{
+ .add_subtract_immediate = .{
+ .rd = reg_info.rd,
+ .rn = reg_info.rn,
+ .imm12 = try Relocation.calcPageOffset(target, .arithmetic),
+ .sh = 0,
+ .s = 0,
+ .op = 0,
+ .sf = @as(u1, @truncate(reg_info.size)),
+ },
+ };
+ try writer.writeInt(u32, inst.toU32(), .little);
+ },
+ }
+}
- const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64);
- var inst: aarch64.Instruction = .{
- .load_store_register = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code),
- };
- inst.load_store_register.offset = off;
- mem.writeInt(u32, code, inst.toU32(), .little);
- addend = null;
- },
+fn relaxGotLoad(code: []u8) error{RelaxFail}!void {
+ const old_inst = disassemble(code) orelse return error.RelaxFail;
+ switch (old_inst.encoding.mnemonic) {
+ .mov => {
+ const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail;
+ relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
+ encode(&.{inst}, code) catch return error.RelaxFail;
+ },
+ else => return error.RelaxFail,
+ }
+}
- .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => {
- const code = atom_code[rel_offset..][0..4];
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + (addend orelse 0)));
+fn relaxTlv(code: []u8) error{RelaxFail}!void {
+ const old_inst = disassemble(code) orelse return error.RelaxFail;
+ switch (old_inst.encoding.mnemonic) {
+ .mov => {
+ const inst = Instruction.new(old_inst.prefix, .lea, &old_inst.ops) catch return error.RelaxFail;
+ relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding });
+ encode(&.{inst}, code) catch return error.RelaxFail;
+ },
+ else => return error.RelaxFail,
+ }
+}
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
+fn disassemble(code: []const u8) ?Instruction {
+ var disas = Disassembler.init(code);
+ const inst = disas.next() catch return null;
+ return inst;
+}
- const RegInfo = struct {
- rd: u5,
- rn: u5,
- size: u2,
- };
- const reg_info: RegInfo = blk: {
- if (Relocation.isArithmeticOp(code)) {
- const inst = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.add_subtract_immediate,
- ), code);
- break :blk .{
- .rd = inst.rd,
- .rn = inst.rn,
- .size = inst.sf,
- };
- } else {
- const inst = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code);
- break :blk .{
- .rd = inst.rt,
- .rn = inst.rn,
- .size = inst.size,
- };
- }
- };
+fn encode(insts: []const Instruction, code: []u8) !void {
+ var stream = std.io.fixedBufferStream(code);
+ const writer = stream.writer();
+ for (insts) |inst| {
+ try inst.encode(writer, .{});
+ }
+}
- var inst = if (macho_file.tlv_ptr_table.lookup.contains(target)) aarch64.Instruction{
- .load_store_register = .{
- .rt = reg_info.rd,
- .rn = reg_info.rn,
- .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64),
- .opc = 0b01,
- .op1 = 0b01,
- .v = 0,
- .size = reg_info.size,
- },
- } else aarch64.Instruction{
- .add_subtract_immediate = .{
- .rd = reg_info.rd,
- .rn = reg_info.rn,
- .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic),
- .sh = 0,
- .s = 0,
- .op = 0,
- .sf = @as(u1, @truncate(reg_info.size)),
+pub fn calcNumRelocs(self: Atom, macho_file: *MachO) u32 {
+ switch (macho_file.options.cpu_arch.?) {
+ .aarch64 => {
+ var nreloc: u32 = 0;
+ for (self.getRelocs(macho_file)) |rel| {
+ nreloc += 1;
+ switch (rel.type) {
+ .page, .pageoff => if (rel.addend > 0) {
+ nreloc += 1;
},
- };
- mem.writeInt(u32, code, inst.toU32(), .little);
- addend = null;
- },
-
- .ARM64_RELOC_POINTER_TO_GOT => {
- relocs_log.debug(" | target_addr = 0x{x}", .{target_addr});
- const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse
- return error.Overflow;
- mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @bitCast(result)), .little);
- },
-
- .ARM64_RELOC_UNSIGNED => {
- var ptr_addend = if (rel.r_length == 3)
- mem.readInt(i64, atom_code[rel_offset..][0..8], .little)
- else
- mem.readInt(i32, atom_code[rel_offset..][0..4], .little);
-
- if (rel.r_extern == 0) {
- const base_addr = if (target.sym_index >= object.source_address_lookup.len)
- @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
- else
- object.source_address_lookup[target.sym_index];
- ptr_addend -= base_addr;
+ else => {},
}
-
- const result = blk: {
- if (subtractor) |sub| {
- const sym = macho_file.getSymbol(sub);
- break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + ptr_addend;
- } else {
- break :blk @as(i64, @intCast(target_addr)) + ptr_addend;
- }
- };
- relocs_log.debug(" | target_addr = 0x{x}", .{result});
-
- if (rel.r_length == 3) {
- mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little);
- } else {
- mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little);
- }
-
- subtractor = null;
- },
-
- .ARM64_RELOC_ADDEND => unreachable,
- .ARM64_RELOC_SUBTRACTOR => unreachable,
- }
+ }
+ return nreloc;
+ },
+ .x86_64 => return @intCast(self.getRelocs(macho_file).len),
+ else => unreachable,
}
}
-fn resolveRelocsX86(
- macho_file: *MachO,
- atom_index: Index,
- atom_code: []u8,
- atom_relocs: []align(1) const macho.relocation_info,
- context: RelocContext,
-) !void {
- const atom = macho_file.getAtom(atom_index);
- const object = macho_file.objects.items[atom.getFile().?];
-
- var subtractor: ?SymbolWithLoc = null;
-
- for (atom_relocs) |rel| {
- const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
-
- switch (rel_type) {
- .X86_64_RELOC_SUBTRACTOR => {
- assert(subtractor == null);
-
- relocs_log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{
- @tagName(rel_type),
- rel.r_address,
- rel.r_symbolnum,
- atom.getFile(),
- });
+pub fn writeRelocs(self: Atom, macho_file: *MachO, code: []u8, buffer: *std.ArrayList(macho.relocation_info)) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
- subtractor = parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = atom_code,
- .base_addr = context.base_addr,
- .base_offset = context.base_offset,
- });
- continue;
- },
- else => {},
- }
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const relocs = self.getRelocs(macho_file);
+ const sect = macho_file.sections.items(.header)[self.out_n_sect];
+ var stream = std.io.fixedBufferStream(code);
- const target = parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = atom_code,
- .base_addr = context.base_addr,
- .base_offset = context.base_offset,
- });
- const rel_offset = @as(u32, @intCast(rel.r_address - context.base_offset));
-
- relocs_log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
- @tagName(rel_type),
- rel.r_address,
- target.sym_index,
- macho_file.getSymbolName(target),
- target.getFile(),
- });
-
- const source_addr = blk: {
- const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- break :blk source_sym.n_value + rel_offset;
- };
- const target_addr = blk: {
- if (relocRequiresGot(macho_file, rel)) break :blk macho_file.getGotEntryAddress(target).?;
- if (relocIsStub(macho_file, rel) and macho_file.getSymbol(target).undf())
- break :blk macho_file.getStubsEntryAddress(target).?;
- if (relocIsTlv(macho_file, rel) and macho_file.getSymbol(target).undf())
- break :blk macho_file.getTlvPtrEntryAddress(target).?;
- const is_tlv = is_tlv: {
- const source_sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const header = macho_file.sections.items(.header)[source_sym.n_sect - 1];
- break :is_tlv header.type() == macho.S_THREAD_LOCAL_VARIABLES;
+ for (relocs) |rel| {
+ const rel_offset = rel.offset - self.off;
+ const r_address: i32 = math.cast(i32, self.value + rel_offset - sect.addr) orelse return error.Overflow;
+ const r_symbolnum = r_symbolnum: {
+ const r_symbolnum: u32 = switch (rel.tag) {
+ .local => rel.getTargetAtom(macho_file).out_n_sect + 1,
+ .@"extern" => rel.getTargetSymbol(macho_file).getOutputSymtabIndex(macho_file).?,
};
- break :blk getRelocTargetAddress(macho_file, target, is_tlv);
+ break :r_symbolnum math.cast(u24, r_symbolnum) orelse return error.Overflow;
};
+ const r_extern = rel.tag == .@"extern";
+ var addend = rel.addend + rel.getRelocAddend(cpu_arch);
+ if (rel.tag == .local) {
+ const target: i64 = @intCast(rel.getTargetAddress(macho_file));
+ addend += target;
+ }
- relocs_log.debug(" | source_addr = 0x{x}", .{source_addr});
-
- switch (rel_type) {
- .X86_64_RELOC_BRANCH => {
- const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little);
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
- const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
- mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little);
- },
-
- .X86_64_RELOC_GOT,
- .X86_64_RELOC_GOT_LOAD,
- => {
- const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little);
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
- const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
- mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little);
- },
-
- .X86_64_RELOC_TLV => {
- const addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little);
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
- const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
-
- if (macho_file.tlv_ptr_table.lookup.get(target) == null) {
- // We need to rewrite the opcode from movq to leaq.
- atom_code[rel_offset - 2] = 0x8d;
+ try stream.seekTo(rel_offset);
+
+ switch (cpu_arch) {
+ .aarch64 => {
+ if (rel.type == .unsigned) switch (rel.meta.length) {
+ 0, 1 => unreachable,
+ 2 => try stream.writer().writeInt(i32, @truncate(addend), .little),
+ 3 => try stream.writer().writeInt(i64, addend, .little),
+ } else if (addend > 0) {
+ buffer.appendAssumeCapacity(.{
+ .r_address = r_address,
+ .r_symbolnum = @bitCast(math.cast(i24, addend) orelse return error.Overflow),
+ .r_pcrel = 0,
+ .r_length = 2,
+ .r_extern = 0,
+ .r_type = @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_ADDEND),
+ });
}
- mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little);
- },
-
- .X86_64_RELOC_SIGNED,
- .X86_64_RELOC_SIGNED_1,
- .X86_64_RELOC_SIGNED_2,
- .X86_64_RELOC_SIGNED_4,
- => {
- const correction: u3 = switch (rel_type) {
- .X86_64_RELOC_SIGNED => 0,
- .X86_64_RELOC_SIGNED_1 => 1,
- .X86_64_RELOC_SIGNED_2 => 2,
- .X86_64_RELOC_SIGNED_4 => 4,
- else => unreachable,
+ const r_type: macho.reloc_type_arm64 = switch (rel.type) {
+ .page => .ARM64_RELOC_PAGE21,
+ .pageoff => .ARM64_RELOC_PAGEOFF12,
+ .got_load_page => .ARM64_RELOC_GOT_LOAD_PAGE21,
+ .got_load_pageoff => .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
+ .tlvp_page => .ARM64_RELOC_TLVP_LOAD_PAGE21,
+ .tlvp_pageoff => .ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
+ .branch => .ARM64_RELOC_BRANCH26,
+ .got => .ARM64_RELOC_POINTER_TO_GOT,
+ .subtractor => .ARM64_RELOC_SUBTRACTOR,
+ .unsigned => .ARM64_RELOC_UNSIGNED,
+
+ .signed,
+ .signed1,
+ .signed2,
+ .signed4,
+ .got_load,
+ .tlv,
+ => unreachable,
};
- var addend = mem.readInt(i32, atom_code[rel_offset..][0..4], .little) + correction;
-
- if (rel.r_extern == 0) {
- const base_addr = if (target.sym_index >= object.source_address_lookup.len)
- @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
- else
- object.source_address_lookup[target.sym_index];
- addend += @as(i32, @intCast(@as(i64, @intCast(context.base_addr)) + rel.r_address + 4 -
- @as(i64, @intCast(base_addr))));
- }
-
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
-
- relocs_log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr});
-
- const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction);
- mem.writeInt(i32, atom_code[rel_offset..][0..4], disp, .little);
+ buffer.appendAssumeCapacity(.{
+ .r_address = r_address,
+ .r_symbolnum = r_symbolnum,
+ .r_pcrel = @intFromBool(rel.meta.pcrel),
+ .r_extern = @intFromBool(r_extern),
+ .r_length = rel.meta.length,
+ .r_type = @intFromEnum(r_type),
+ });
},
-
- .X86_64_RELOC_UNSIGNED => {
- var addend = if (rel.r_length == 3)
- mem.readInt(i64, atom_code[rel_offset..][0..8], .little)
- else
- mem.readInt(i32, atom_code[rel_offset..][0..4], .little);
-
- if (rel.r_extern == 0) {
- const base_addr = if (target.sym_index >= object.source_address_lookup.len)
- @as(i64, @intCast(object.getSourceSection(@as(u8, @intCast(rel.r_symbolnum - 1))).addr))
- else
- object.source_address_lookup[target.sym_index];
- addend -= base_addr;
- }
-
- const result = blk: {
- if (subtractor) |sub| {
- const sym = macho_file.getSymbol(sub);
- break :blk @as(i64, @intCast(target_addr)) - @as(i64, @intCast(sym.n_value)) + addend;
+ .x86_64 => {
+ if (rel.meta.pcrel) {
+ if (rel.tag == .local) {
+ addend -= @as(i64, @intCast(self.value + rel_offset));
} else {
- break :blk @as(i64, @intCast(target_addr)) + addend;
+ addend += 4;
}
- };
- relocs_log.debug(" | target_addr = 0x{x}", .{result});
-
- if (rel.r_length == 3) {
- mem.writeInt(u64, atom_code[rel_offset..][0..8], @as(u64, @bitCast(result)), .little);
- } else {
- mem.writeInt(u32, atom_code[rel_offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(result)))), .little);
+ }
+ switch (rel.meta.length) {
+ 0, 1 => unreachable,
+ 2 => try stream.writer().writeInt(i32, @truncate(addend), .little),
+ 3 => try stream.writer().writeInt(i64, addend, .little),
}
- subtractor = null;
+ const r_type: macho.reloc_type_x86_64 = switch (rel.type) {
+ .signed => .X86_64_RELOC_SIGNED,
+ .signed1 => .X86_64_RELOC_SIGNED_1,
+ .signed2 => .X86_64_RELOC_SIGNED_2,
+ .signed4 => .X86_64_RELOC_SIGNED_4,
+ .got_load => .X86_64_RELOC_GOT_LOAD,
+ .tlv => .X86_64_RELOC_TLV,
+ .branch => .X86_64_RELOC_BRANCH,
+ .got => .X86_64_RELOC_GOT,
+ .subtractor => .X86_64_RELOC_SUBTRACTOR,
+ .unsigned => .X86_64_RELOC_UNSIGNED,
+
+ .page,
+ .pageoff,
+ .got_load_page,
+ .got_load_pageoff,
+ .tlvp_page,
+ .tlvp_pageoff,
+ => unreachable,
+ };
+ buffer.appendAssumeCapacity(.{
+ .r_address = r_address,
+ .r_symbolnum = r_symbolnum,
+ .r_pcrel = @intFromBool(rel.meta.pcrel),
+ .r_extern = @intFromBool(r_extern),
+ .r_length = rel.meta.length,
+ .r_type = @intFromEnum(r_type),
+ });
},
-
- .X86_64_RELOC_SUBTRACTOR => unreachable,
+ else => unreachable,
}
}
}
-pub fn getAtomCode(macho_file: *MachO, atom_index: Index) []const u8 {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code.
- const object = macho_file.objects.items[atom.getFile().?];
- const source_sym = object.getSourceSymbol(atom.sym_index) orelse {
- // If there was no matching symbol present in the source symtab, this means
- // we are dealing with either an entire section, or part of it, but also
- // starting at the beginning.
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- const source_sect = object.getSourceSection(sect_id);
- assert(!source_sect.isZerofill());
- const code = object.getSectionContents(source_sect);
- const code_len = @as(usize, @intCast(atom.size));
- return code[0..code_len];
- };
- const source_sect = object.getSourceSection(source_sym.n_sect - 1);
- assert(!source_sect.isZerofill());
- const code = object.getSectionContents(source_sect);
- const offset = @as(usize, @intCast(source_sym.n_value - source_sect.addr));
- const code_len = @as(usize, @intCast(atom.size));
- return code[offset..][0..code_len];
+pub fn format(
+ atom: Atom,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = atom;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format Atom directly");
}
-pub fn getAtomRelocs(macho_file: *MachO, atom_index: Index) []const macho.relocation_info {
- const atom = macho_file.getAtom(atom_index);
- assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs.
- const object = macho_file.objects.items[atom.getFile().?];
- const cache = object.relocs_lookup[atom.sym_index];
-
- const source_sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
- break :blk source_sym.n_sect - 1;
- } else blk: {
- // If there was no matching symbol present in the source symtab, this means
- // we are dealing with either an entire section, or part of it, but also
- // starting at the beginning.
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- break :blk sect_id;
- };
- const source_sect = object.getSourceSection(source_sect_id);
- assert(!source_sect.isZerofill());
- const relocs = object.getRelocs(source_sect_id);
- return relocs[cache.start..][0..cache.len];
+pub fn fmt(atom: Atom, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .atom = atom,
+ .macho_file = macho_file,
+ } };
}
-pub fn relocRequiresGot(macho_file: *MachO, rel: macho.relocation_info) bool {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- switch (target.cpu.arch) {
- .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
- .ARM64_RELOC_GOT_LOAD_PAGE21,
- .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
- .ARM64_RELOC_POINTER_TO_GOT,
- => return true,
- else => return false,
- },
- .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
- .X86_64_RELOC_GOT,
- .X86_64_RELOC_GOT_LOAD,
- => return true,
- else => return false,
- },
- else => unreachable,
- }
-}
+const FormatContext = struct {
+ atom: Atom,
+ macho_file: *MachO,
+};
-pub fn relocIsTlv(macho_file: *MachO, rel: macho.relocation_info) bool {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- switch (target.cpu.arch) {
- .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
- .ARM64_RELOC_TLVP_LOAD_PAGE21,
- .ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
- => return true,
- else => return false,
- },
- .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
- .X86_64_RELOC_TLV => return true,
- else => return false,
- },
- else => unreachable,
+fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ const atom = ctx.atom;
+ const macho_file = ctx.macho_file;
+ try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x}) : thunk({d})", .{
+ atom.atom_index, atom.getName(macho_file), atom.value,
+ atom.out_n_sect, atom.alignment, atom.size,
+ atom.thunk_index,
+ });
+ if (!atom.flags.alive) try writer.writeAll(" : [*]");
+ if (atom.unwind_records.len > 0) {
+ try writer.writeAll(" : unwind{ ");
+ for (atom.getUnwindRecords(macho_file), atom.unwind_records.pos..) |index, i| {
+ const rec = macho_file.getUnwindRecord(index);
+ try writer.print("{d}", .{index});
+ if (!rec.alive) try writer.writeAll("([*])");
+ if (i < atom.unwind_records.pos + atom.unwind_records.len - 1) try writer.writeAll(", ");
+ }
+ try writer.writeAll(" }");
}
}
-pub fn relocIsStub(macho_file: *MachO, rel: macho.relocation_info) bool {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- switch (target.cpu.arch) {
- .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
- .ARM64_RELOC_BRANCH26 => return true,
- else => return false,
- },
- .x86_64 => switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
- .X86_64_RELOC_BRANCH => return true,
- else => return false,
- },
- else => unreachable,
- }
-}
+pub const Index = u32;
-const Atom = @This();
+pub const Flags = packed struct {
+ /// Specifies whether this atom is alive or has been garbage collected.
+ alive: bool = true,
-const std = @import("std");
-const build_options = @import("build_options");
-const aarch64 = @import("../../arch/aarch64/bits.zig");
+ /// Specifies if the atom has been visited during garbage collection.
+ visited: bool = false,
+};
+
+pub const Loc = struct {
+ pos: usize = 0,
+ len: usize = 0,
+};
+
+const aarch64 = @import("../aarch64.zig");
const assert = std.debug.assert;
-const log = std.log.scoped(.link);
-const relocs_log = std.log.scoped(.link_relocs);
+const bind = @import("dyld_info/bind.zig");
+const dis_x86_64 = @import("dis_x86_64");
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-const meta = std.meta;
-const trace = @import("../../tracy.zig").trace;
+const log = std.log.scoped(.link);
+const relocs_log = std.log.scoped(.relocs);
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
-const Arch = std.Target.Cpu.Arch;
+const Atom = @This();
+const Disassembler = dis_x86_64.Disassembler;
+const File = @import("file.zig").File;
+const Instruction = dis_x86_64.Instruction;
+const Immediate = dis_x86_64.Immediate;
const MachO = @import("../MachO.zig");
-pub const Relocation = @import("Relocation.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
+const Object = @import("Object.zig");
+const Relocation = @import("Relocation.zig");
+const Symbol = @import("Symbol.zig");
+const Thunk = @import("thunks.zig").Thunk;
+const UnwindInfo = @import("UnwindInfo.zig");
src/link/MachO/CodeSignature.zig
@@ -1,175 +1,17 @@
-page_size: u16,
-code_directory: CodeDirectory,
-requirements: ?Requirements = null,
-entitlements: ?Entitlements = null,
-signature: ?Signature = null,
-
-pub fn init(page_size: u16) CodeSignature {
- return .{
- .page_size = page_size,
- .code_directory = CodeDirectory.init(page_size),
- };
-}
-
-pub fn deinit(self: *CodeSignature, allocator: Allocator) void {
- self.code_directory.deinit(allocator);
- if (self.requirements) |*req| {
- req.deinit(allocator);
- }
- if (self.entitlements) |*ents| {
- ents.deinit(allocator);
- }
- if (self.signature) |*sig| {
- sig.deinit(allocator);
- }
-}
-
-pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void {
- const file = try fs.cwd().openFile(path, .{});
- defer file.close();
- const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32));
- self.entitlements = .{ .inner = inner };
-}
-
-pub const WriteOpts = struct {
- file: fs.File,
- exec_seg_base: u64,
- exec_seg_limit: u64,
- file_size: u32,
- output_mode: std.builtin.OutputMode,
-};
-
-pub fn writeAdhocSignature(
- self: *CodeSignature,
- comp: *const Compilation,
- opts: WriteOpts,
- writer: anytype,
-) !void {
- const gpa = comp.gpa;
-
- var header: macho.SuperBlob = .{
- .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE,
- .length = @sizeOf(macho.SuperBlob),
- .count = 0,
- };
-
- var blobs = std.ArrayList(Blob).init(gpa);
- defer blobs.deinit();
-
- self.code_directory.inner.execSegBase = opts.exec_seg_base;
- self.code_directory.inner.execSegLimit = opts.exec_seg_limit;
- self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0;
- self.code_directory.inner.codeLimit = opts.file_size;
-
- const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size));
-
- try self.code_directory.code_slots.ensureTotalCapacityPrecise(gpa, total_pages);
- self.code_directory.code_slots.items.len = total_pages;
- self.code_directory.inner.nCodeSlots = total_pages;
-
- // Calculate hash for each page (in file) and write it to the buffer
- var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool };
- try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
- .chunk_size = self.page_size,
- .max_file_size = opts.file_size,
- });
-
- try blobs.append(.{ .code_directory = &self.code_directory });
- header.length += @sizeOf(macho.BlobIndex);
- header.count += 1;
-
- var hash: [hash_size]u8 = undefined;
-
- if (self.requirements) |*req| {
- var buf = std.ArrayList(u8).init(gpa);
- defer buf.deinit();
- try req.write(buf.writer());
- Sha256.hash(buf.items, &hash, .{});
- self.code_directory.addSpecialHash(req.slotType(), hash);
-
- try blobs.append(.{ .requirements = req });
- header.count += 1;
- header.length += @sizeOf(macho.BlobIndex) + req.size();
- }
-
- if (self.entitlements) |*ents| {
- var buf = std.ArrayList(u8).init(gpa);
- defer buf.deinit();
- try ents.write(buf.writer());
- Sha256.hash(buf.items, &hash, .{});
- self.code_directory.addSpecialHash(ents.slotType(), hash);
-
- try blobs.append(.{ .entitlements = ents });
- header.count += 1;
- header.length += @sizeOf(macho.BlobIndex) + ents.size();
- }
-
- if (self.signature) |*sig| {
- try blobs.append(.{ .signature = sig });
- header.count += 1;
- header.length += @sizeOf(macho.BlobIndex) + sig.size();
- }
-
- self.code_directory.inner.hashOffset =
- @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size));
- self.code_directory.inner.length = self.code_directory.size();
- header.length += self.code_directory.size();
-
- try writer.writeInt(u32, header.magic, .big);
- try writer.writeInt(u32, header.length, .big);
- try writer.writeInt(u32, header.count, .big);
-
- var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len));
- for (blobs.items) |blob| {
- try writer.writeInt(u32, blob.slotType(), .big);
- try writer.writeInt(u32, offset, .big);
- offset += blob.size();
- }
-
- for (blobs.items) |blob| {
- try blob.write(writer);
- }
-}
-
-pub fn size(self: CodeSignature) u32 {
- var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
- if (self.requirements) |req| {
- ssize += @sizeOf(macho.BlobIndex) + req.size();
- }
- if (self.entitlements) |ent| {
- ssize += @sizeOf(macho.BlobIndex) + ent.size();
- }
- if (self.signature) |sig| {
- ssize += @sizeOf(macho.BlobIndex) + sig.size();
- }
- return ssize;
-}
-
-pub fn estimateSize(self: CodeSignature, file_size: u64) u32 {
- var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
- // Approx code slots
- const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size;
- ssize += total_pages * hash_size;
- var n_special_slots: u32 = 0;
- if (self.requirements) |req| {
- ssize += @sizeOf(macho.BlobIndex) + req.size();
- n_special_slots = @max(n_special_slots, req.slotType());
- }
- if (self.entitlements) |ent| {
- ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size;
- n_special_slots = @max(n_special_slots, ent.slotType());
- }
- if (self.signature) |sig| {
- ssize += @sizeOf(macho.BlobIndex) + sig.size();
- }
- ssize += n_special_slots * hash_size;
- return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64))));
-}
+const CodeSignature = @This();
-pub fn clear(self: *CodeSignature, allocator: Allocator) void {
- self.code_directory.deinit(allocator);
- self.code_directory = CodeDirectory.init(self.page_size);
-}
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const log = std.log.scoped(.link);
+const macho = std.macho;
+const mem = std.mem;
+const testing = std.testing;
+const Allocator = mem.Allocator;
+const Hasher = @import("hasher.zig").ParallelHasher;
+const MachO = @import("../MachO.zig");
+const Sha256 = std.crypto.hash.sha2.Sha256;
+const Zld = @import("../Zld.zig");
const hash_size = Sha256.digest_length;
@@ -257,7 +99,7 @@ const CodeDirectory = struct {
fn addSpecialHash(self: *CodeDirectory, index: u32, hash: [hash_size]u8) void {
assert(index > 0);
self.inner.nSpecialSlots = @max(self.inner.nSpecialSlots, index);
- self.special_slots[index - 1] = hash;
+ @memcpy(&self.special_slots[index - 1], &hash);
}
fn slotType(self: CodeDirectory) u32 {
@@ -376,17 +218,175 @@ const Signature = struct {
}
};
-const CodeSignature = @This();
+page_size: u16,
+code_directory: CodeDirectory,
+requirements: ?Requirements = null,
+entitlements: ?Entitlements = null,
+signature: ?Signature = null,
-const std = @import("std");
-const assert = std.debug.assert;
-const fs = std.fs;
-const log = std.log.scoped(.link);
-const macho = std.macho;
-const mem = std.mem;
-const testing = std.testing;
+pub fn init(page_size: u16) CodeSignature {
+ return .{
+ .page_size = page_size,
+ .code_directory = CodeDirectory.init(page_size),
+ };
+}
-const Allocator = mem.Allocator;
-const Compilation = @import("../../Compilation.zig");
-const Hasher = @import("hasher.zig").ParallelHasher;
-const Sha256 = std.crypto.hash.sha2.Sha256;
+pub fn deinit(self: *CodeSignature, allocator: Allocator) void {
+ self.code_directory.deinit(allocator);
+ if (self.requirements) |*req| {
+ req.deinit(allocator);
+ }
+ if (self.entitlements) |*ents| {
+ ents.deinit(allocator);
+ }
+ if (self.signature) |*sig| {
+ sig.deinit(allocator);
+ }
+}
+
+pub fn addEntitlements(self: *CodeSignature, allocator: Allocator, path: []const u8) !void {
+ const file = try fs.cwd().openFile(path, .{});
+ defer file.close();
+ const inner = try file.readToEndAlloc(allocator, std.math.maxInt(u32));
+ self.entitlements = .{ .inner = inner };
+}
+
+pub const WriteOpts = struct {
+ file: fs.File,
+ exec_seg_base: u64,
+ exec_seg_limit: u64,
+ file_size: u32,
+ dylib: bool,
+};
+
+pub fn writeAdhocSignature(
+ self: *CodeSignature,
+ macho_file: *MachO,
+ opts: WriteOpts,
+ writer: anytype,
+) !void {
+ const allocator = macho_file.base.allocator;
+
+ var header: macho.SuperBlob = .{
+ .magic = macho.CSMAGIC_EMBEDDED_SIGNATURE,
+ .length = @sizeOf(macho.SuperBlob),
+ .count = 0,
+ };
+
+ var blobs = std.ArrayList(Blob).init(allocator);
+ defer blobs.deinit();
+
+ self.code_directory.inner.execSegBase = opts.exec_seg_base;
+ self.code_directory.inner.execSegLimit = opts.exec_seg_limit;
+ self.code_directory.inner.execSegFlags = if (!opts.dylib) macho.CS_EXECSEG_MAIN_BINARY else 0;
+ self.code_directory.inner.codeLimit = opts.file_size;
+
+ const total_pages = @as(u32, @intCast(mem.alignForward(usize, opts.file_size, self.page_size) / self.page_size));
+
+ try self.code_directory.code_slots.ensureTotalCapacityPrecise(allocator, total_pages);
+ self.code_directory.code_slots.items.len = total_pages;
+ self.code_directory.inner.nCodeSlots = total_pages;
+
+ // Calculate hash for each page (in file) and write it to the buffer
+ var hasher = Hasher(Sha256){ .allocator = allocator, .thread_pool = macho_file.base.thread_pool };
+ try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
+ .chunk_size = self.page_size,
+ .max_file_size = opts.file_size,
+ });
+
+ try blobs.append(.{ .code_directory = &self.code_directory });
+ header.length += @sizeOf(macho.BlobIndex);
+ header.count += 1;
+
+ var hash: [hash_size]u8 = undefined;
+
+ if (self.requirements) |*req| {
+ var buf = std.ArrayList(u8).init(allocator);
+ defer buf.deinit();
+ try req.write(buf.writer());
+ Sha256.hash(buf.items, &hash, .{});
+ self.code_directory.addSpecialHash(req.slotType(), hash);
+
+ try blobs.append(.{ .requirements = req });
+ header.count += 1;
+ header.length += @sizeOf(macho.BlobIndex) + req.size();
+ }
+
+ if (self.entitlements) |*ents| {
+ var buf = std.ArrayList(u8).init(allocator);
+ defer buf.deinit();
+ try ents.write(buf.writer());
+ Sha256.hash(buf.items, &hash, .{});
+ self.code_directory.addSpecialHash(ents.slotType(), hash);
+
+ try blobs.append(.{ .entitlements = ents });
+ header.count += 1;
+ header.length += @sizeOf(macho.BlobIndex) + ents.size();
+ }
+
+ if (self.signature) |*sig| {
+ try blobs.append(.{ .signature = sig });
+ header.count += 1;
+ header.length += @sizeOf(macho.BlobIndex) + sig.size();
+ }
+
+ self.code_directory.inner.hashOffset =
+ @sizeOf(macho.CodeDirectory) + @as(u32, @intCast(self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size));
+ self.code_directory.inner.length = self.code_directory.size();
+ header.length += self.code_directory.size();
+
+ try writer.writeInt(u32, header.magic, .big);
+ try writer.writeInt(u32, header.length, .big);
+ try writer.writeInt(u32, header.count, .big);
+
+ var offset: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) * @as(u32, @intCast(blobs.items.len));
+ for (blobs.items) |blob| {
+ try writer.writeInt(u32, blob.slotType(), .big);
+ try writer.writeInt(u32, offset, .big);
+ offset += blob.size();
+ }
+
+ for (blobs.items) |blob| {
+ try blob.write(writer);
+ }
+}
+
+pub fn size(self: CodeSignature) u32 {
+ var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
+ if (self.requirements) |req| {
+ ssize += @sizeOf(macho.BlobIndex) + req.size();
+ }
+ if (self.entitlements) |ent| {
+ ssize += @sizeOf(macho.BlobIndex) + ent.size();
+ }
+ if (self.signature) |sig| {
+ ssize += @sizeOf(macho.BlobIndex) + sig.size();
+ }
+ return ssize;
+}
+
+pub fn estimateSize(self: CodeSignature, file_size: u64) u32 {
+ var ssize: u64 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
+ // Approx code slots
+ const total_pages = mem.alignForward(u64, file_size, self.page_size) / self.page_size;
+ ssize += total_pages * hash_size;
+ var n_special_slots: u32 = 0;
+ if (self.requirements) |req| {
+ ssize += @sizeOf(macho.BlobIndex) + req.size();
+ n_special_slots = @max(n_special_slots, req.slotType());
+ }
+ if (self.entitlements) |ent| {
+ ssize += @sizeOf(macho.BlobIndex) + ent.size() + hash_size;
+ n_special_slots = @max(n_special_slots, ent.slotType());
+ }
+ if (self.signature) |sig| {
+ ssize += @sizeOf(macho.BlobIndex) + sig.size();
+ }
+ ssize += n_special_slots * hash_size;
+ return @as(u32, @intCast(mem.alignForward(u64, ssize, @sizeOf(u64))));
+}
+
+pub fn clear(self: *CodeSignature, allocator: Allocator) void {
+ self.code_directory.deinit(allocator);
+ self.code_directory = CodeDirectory.init(self.page_size);
+}
src/link/MachO/dead_strip.zig
@@ -1,495 +1,204 @@
-//! An algorithm for dead stripping of unreferenced Atoms.
-
pub fn gcAtoms(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- var arena = std.heap.ArenaAllocator.init(gpa);
- defer arena.deinit();
-
- var roots = AtomTable.init(arena.allocator());
- try roots.ensureUnusedCapacity(@as(u32, @intCast(macho_file.globals.items.len)));
+ const gpa = macho_file.base.allocator;
- var alive = AtomTable.init(arena.allocator());
- try alive.ensureTotalCapacity(@as(u32, @intCast(macho_file.atoms.items.len)));
+ var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1);
+ defer objects.deinit();
+ for (macho_file.objects.items) |index| objects.appendAssumeCapacity(index);
+ if (macho_file.internal_object_index) |index| objects.appendAssumeCapacity(index);
- try collectRoots(macho_file, &roots);
- mark(macho_file, roots, &alive);
- prune(macho_file, alive);
-}
+ var roots = std.ArrayList(*Atom).init(gpa);
+ defer roots.deinit();
-fn addRoot(macho_file: *MachO, roots: *AtomTable, file: u32, sym_loc: SymbolWithLoc) !void {
- const sym = macho_file.getSymbol(sym_loc);
- assert(!sym.undf());
- const object = &macho_file.objects.items[file];
- const atom_index = object.getAtomIndexForSymbol(sym_loc.sym_index).?; // panic here means fatal error
- log.debug("root(ATOM({d}, %{d}, {d}))", .{
- atom_index,
- macho_file.getAtom(atom_index).sym_index,
- file,
- });
- _ = try roots.getOrPut(atom_index);
+ try collectRoots(&roots, objects.items, macho_file);
+ mark(roots.items, objects.items, macho_file);
+ prune(objects.items, macho_file);
}
-fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void {
- log.debug("collecting roots", .{});
-
- const comp = macho_file.base.comp;
-
- switch (comp.config.output_mode) {
- .Exe => {
- // Add entrypoint as GC root
- if (macho_file.getEntryPoint()) |global| {
- if (global.getFile()) |file| {
- try addRoot(macho_file, roots, file, global);
- } else {
- assert(macho_file.getSymbol(global).undf()); // Stub as our entrypoint is in a dylib.
- }
- }
- },
- else => |other| {
- assert(other == .Lib);
- // Add exports as GC roots
- for (macho_file.globals.items) |global| {
- const sym = macho_file.getSymbol(global);
- if (sym.undf()) continue;
- if (sym.n_desc == MachO.N_BOUNDARY) continue;
+fn collectRoots(roots: *std.ArrayList(*Atom), objects: []const File.Index, macho_file: *MachO) !void {
+ for (objects) |index| {
+ const object = macho_file.getFile(index).?;
+ for (object.getSymbols()) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (file.getIndex() != index) continue;
+ if (sym.flags.no_dead_strip or (macho_file.options.dylib and sym.visibility == .global))
+ try markSymbol(sym, roots, macho_file);
+ }
- if (global.getFile()) |file| {
- try addRoot(macho_file, roots, file, global);
- }
+ for (object.getAtoms()) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ const isec = atom.getInputSection(macho_file);
+ switch (isec.type()) {
+ macho.S_MOD_INIT_FUNC_POINTERS,
+ macho.S_MOD_TERM_FUNC_POINTERS,
+ => if (markAtom(atom)) try roots.append(atom),
+
+ else => if (isec.isDontDeadStrip() and markAtom(atom)) {
+ try roots.append(atom);
+ },
}
- },
- }
-
- // Add all symbols force-defined by the user.
- for (comp.force_undefined_symbols.keys()) |sym_name| {
- const global_index = macho_file.resolver.get(sym_name).?;
- const global = macho_file.globals.items[global_index];
- const sym = macho_file.getSymbol(global);
- assert(!sym.undf());
- try addRoot(macho_file, roots, global.getFile().?, global);
+ }
}
- for (macho_file.objects.items) |object| {
- const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
-
- for (object.atoms.items) |atom_index| {
- const is_gc_root = blk: {
- // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS
- // as a root.
- if (!has_subsections) break :blk true;
-
- const atom = macho_file.getAtom(atom_index);
- const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym|
- source_sym.n_sect - 1
- else sect_id: {
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- break :sect_id sect_id;
- };
- const source_sect = object.getSourceSection(sect_id);
- if (source_sect.isDontDeadStrip()) break :blk true;
- switch (source_sect.type()) {
- macho.S_MOD_INIT_FUNC_POINTERS,
- macho.S_MOD_TERM_FUNC_POINTERS,
- => break :blk true,
- else => break :blk false,
- }
- };
-
- if (is_gc_root) {
- _ = try roots.getOrPut(atom_index);
-
- log.debug("root(ATOM({d}, %{d}, {?d}))", .{
- atom_index,
- macho_file.getAtom(atom_index).sym_index,
- macho_file.getAtom(atom_index).getFile(),
- });
- }
+ for (macho_file.objects.items) |index| {
+ for (macho_file.getFile(index).?.object.unwind_records.items) |cu_index| {
+ const cu = macho_file.getUnwindRecord(cu_index);
+ if (!cu.alive) continue;
+ if (cu.getFde(macho_file)) |fde| {
+ if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file);
+ } else if (cu.getPersonality(macho_file)) |sym| try markSymbol(sym, roots, macho_file);
}
}
-}
-
-fn markLive(macho_file: *MachO, atom_index: Atom.Index, alive: *AtomTable) void {
- if (alive.contains(atom_index)) return;
-
- const atom = macho_file.getAtom(atom_index);
- const sym_loc = atom.getSymbolWithLoc();
- log.debug("mark(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() });
-
- alive.putAssumeCapacityNoClobber(atom_index, {});
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
-
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const header = macho_file.sections.items(.header)[sym.n_sect - 1];
- if (header.isZerofill()) return;
-
- const code = Atom.getAtomCode(macho_file, atom_index);
- const relocs = Atom.getAtomRelocs(macho_file, atom_index);
- const ctx = Atom.getRelocContext(macho_file, atom_index);
-
- for (relocs) |rel| {
- const reloc_target = switch (cpu_arch) {
- .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
- .ARM64_RELOC_ADDEND => continue,
- else => Atom.parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = code,
- .base_offset = ctx.base_offset,
- .base_addr = ctx.base_addr,
- }),
- },
- .x86_64 => Atom.parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = code,
- .base_offset = ctx.base_offset,
- .base_addr = ctx.base_addr,
- }),
- else => unreachable,
- };
- const target_sym = macho_file.getSymbol(reloc_target);
-
- if (target_sym.undf()) continue;
- if (reloc_target.getFile() == null) {
- const target_sym_name = macho_file.getSymbolName(reloc_target);
- if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue;
- if (mem.eql(u8, "___dso_handle", target_sym_name)) continue;
+ for (macho_file.undefined_symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ try markSymbol(sym, roots, macho_file);
+ }
- unreachable; // referenced symbol not found
+ for (&[_]?Symbol.Index{
+ macho_file.entry_index,
+ macho_file.dyld_stub_binder_index,
+ macho_file.objc_msg_send_index,
+ }) |index| {
+ if (index) |idx| {
+ const sym = macho_file.getSymbol(idx);
+ try markSymbol(sym, roots, macho_file);
}
-
- const object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index).?;
- log.debug(" following ATOM({d}, %{d}, {?d})", .{
- target_atom_index,
- macho_file.getAtom(target_atom_index).sym_index,
- macho_file.getAtom(target_atom_index).getFile(),
- });
-
- markLive(macho_file, target_atom_index, alive);
}
}
-fn refersLive(macho_file: *MachO, atom_index: Atom.Index, alive: AtomTable) bool {
- const atom = macho_file.getAtom(atom_index);
- const sym_loc = atom.getSymbolWithLoc();
-
- log.debug("refersLive(ATOM({d}, %{d}, {?d}))", .{ atom_index, sym_loc.sym_index, sym_loc.getFile() });
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
-
- const sym = macho_file.getSymbol(sym_loc);
- const header = macho_file.sections.items(.header)[sym.n_sect - 1];
- assert(!header.isZerofill());
-
- const code = Atom.getAtomCode(macho_file, atom_index);
- const relocs = Atom.getAtomRelocs(macho_file, atom_index);
- const ctx = Atom.getRelocContext(macho_file, atom_index);
-
- for (relocs) |rel| {
- const reloc_target = switch (cpu_arch) {
- .aarch64 => switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
- .ARM64_RELOC_ADDEND => continue,
- else => Atom.parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = code,
- .base_offset = ctx.base_offset,
- .base_addr = ctx.base_addr,
- }),
- },
- .x86_64 => Atom.parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = code,
- .base_offset = ctx.base_offset,
- .base_addr = ctx.base_addr,
- }),
- else => unreachable,
- };
-
- const object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = object.getAtomIndexForSymbol(reloc_target.sym_index) orelse {
- log.debug("atom for symbol '{s}' not found; skipping...", .{macho_file.getSymbolName(reloc_target)});
- continue;
- };
- if (alive.contains(target_atom_index)) {
- log.debug(" refers live ATOM({d}, %{d}, {?d})", .{
- target_atom_index,
- macho_file.getAtom(target_atom_index).sym_index,
- macho_file.getAtom(target_atom_index).getFile(),
- });
- return true;
- }
- }
+fn markSymbol(sym: *Symbol, roots: *std.ArrayList(*Atom), macho_file: *MachO) !void {
+ const atom = sym.getAtom(macho_file) orelse return;
+ if (markAtom(atom)) try roots.append(atom);
+}
- return false;
+fn markAtom(atom: *Atom) bool {
+ const already_visited = atom.flags.visited;
+ atom.flags.visited = true;
+ return atom.flags.alive and !already_visited;
}
-fn mark(macho_file: *MachO, roots: AtomTable, alive: *AtomTable) void {
- var it = roots.keyIterator();
- while (it.next()) |root| {
- markLive(macho_file, root.*, alive);
+fn mark(roots: []*Atom, objects: []const File.Index, macho_file: *MachO) void {
+ for (roots) |root| {
+ markLive(root, macho_file);
}
var loop: bool = true;
while (loop) {
loop = false;
- for (macho_file.objects.items) |object| {
- for (object.atoms.items) |atom_index| {
- if (alive.contains(atom_index)) continue;
-
- const atom = macho_file.getAtom(atom_index);
- const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym|
- source_sym.n_sect - 1
- else blk: {
- const nbase = @as(u32, @intCast(object.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- break :blk sect_id;
- };
- const source_sect = object.getSourceSection(sect_id);
-
- if (source_sect.isDontDeadStripIfReferencesLive()) {
- if (refersLive(macho_file, atom_index, alive.*)) {
- markLive(macho_file, atom_index, alive);
- loop = true;
- }
+ for (objects) |index| {
+ for (macho_file.getFile(index).?.getAtoms()) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ const isec = atom.getInputSection(macho_file);
+ if (isec.isDontDeadStripIfReferencesLive() and !atom.flags.alive and refersLive(atom, macho_file)) {
+ markLive(atom, macho_file);
+ loop = true;
}
}
}
}
-
- for (macho_file.objects.items, 0..) |_, object_id| {
- // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so,
- // marking all references as live.
- markUnwindRecords(macho_file, @as(u32, @intCast(object_id)), alive);
- }
}
-fn markUnwindRecords(macho_file: *MachO, object_id: u32, alive: *AtomTable) void {
- const object = &macho_file.objects.items[object_id];
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
-
- const unwind_records = object.getUnwindRecords();
+fn markLive(atom: *Atom, macho_file: *MachO) void {
+ assert(atom.flags.visited);
+ atom.flags.alive = true;
+ track_live_log.debug("{}marking live atom({d},{s})", .{
+ track_live_level,
+ atom.atom_index,
+ atom.getName(macho_file),
+ });
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
+ if (build_options.enable_logging)
+ track_live_level.incr();
- if (!object.hasUnwindRecords()) {
- if (alive.contains(atom_index)) {
- // Mark references live and continue.
- markEhFrameRecords(macho_file, object_id, atom_index, alive);
- } else {
- while (inner_syms_it.next()) |sym| {
- if (object.eh_frame_records_lookup.get(sym)) |fde_offset| {
- // Mark dead and continue.
- object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true;
- }
- }
- }
- continue;
+ for (atom.getRelocs(macho_file)) |rel| {
+ const target_atom = switch (rel.tag) {
+ .local => rel.getTargetAtom(macho_file),
+ .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file),
+ };
+ if (target_atom) |ta| {
+ if (markAtom(ta)) markLive(ta, macho_file);
}
+ }
- while (inner_syms_it.next()) |sym| {
- const record_id = object.unwind_records_lookup.get(sym) orelse continue;
- if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do
- if (!alive.contains(atom_index)) {
- // Mark the record dead and continue.
- object.unwind_relocs_lookup[record_id].dead = true;
- if (object.eh_frame_records_lookup.get(sym)) |fde_offset| {
- object.eh_frame_relocs_lookup.getPtr(fde_offset).?.dead = true;
- }
- continue;
- }
+ for (atom.getUnwindRecords(macho_file)) |cu_index| {
+ const cu = macho_file.getUnwindRecord(cu_index);
+ const cu_atom = cu.getAtom(macho_file);
+ if (markAtom(cu_atom)) markLive(cu_atom, macho_file);
- const record = unwind_records[record_id];
- if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) {
- markEhFrameRecords(macho_file, object_id, atom_index, alive);
- } else {
- if (UnwindInfo.getPersonalityFunctionReloc(macho_file, object_id, record_id)) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))),
- });
- const target_sym = macho_file.getSymbol(reloc_target);
- if (!target_sym.undf()) {
- const target_object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?;
- markLive(macho_file, target_atom_index, alive);
- }
- }
+ if (cu.getLsdaAtom(macho_file)) |lsda| {
+ if (markAtom(lsda)) markLive(lsda, macho_file);
+ }
+ if (cu.getFde(macho_file)) |fde| {
+ const fde_atom = fde.getAtom(macho_file);
+ if (markAtom(fde_atom)) markLive(fde_atom, macho_file);
- if (UnwindInfo.getLsdaReloc(macho_file, object_id, record_id)) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))),
- });
- const target_object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?;
- markLive(macho_file, target_atom_index, alive);
- }
+ if (fde.getLsdaAtom(macho_file)) |lsda| {
+ if (markAtom(lsda)) markLive(lsda, macho_file);
}
}
}
}
-fn markEhFrameRecords(macho_file: *MachO, object_id: u32, atom_index: Atom.Index, alive: *AtomTable) void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const object = &macho_file.objects.items[object_id];
- var it = object.getEhFrameRecordsIterator();
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
-
- while (inner_syms_it.next()) |sym| {
- const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue; // Continue in case we hit a temp symbol alias
- it.seekTo(fde_offset);
- const fde = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled
-
- const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset);
- const cie_offset = fde_offset + 4 - cie_ptr;
- it.seekTo(cie_offset);
- const cie = (it.next() catch continue).?; // We don't care about the error at this point since it was already handled
-
- switch (cpu_arch) {
- .aarch64 => {
- // Mark FDE references which should include any referenced LSDA record
- const relocs = eh_frame.getRelocs(macho_file, object_id, fde_offset);
- for (relocs) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = fde.data,
- .base_offset = @as(i32, @intCast(fde_offset)) + 4,
- });
- const target_sym = macho_file.getSymbol(reloc_target);
- if (!target_sym.undf()) blk: {
- const target_object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index) orelse
- break :blk;
- markLive(macho_file, target_atom_index, alive);
- }
- }
- },
- .x86_64 => {
- const sect = object.getSourceSection(object.eh_frame_sect_id.?);
- const lsda_ptr = fde.getLsdaPointer(cie, .{
- .base_addr = sect.addr,
- .base_offset = fde_offset,
- }) catch continue; // We don't care about the error at this point since it was already handled
- if (lsda_ptr) |lsda_address| {
- // Mark LSDA record as live
- const sym_index = object.getSymbolByAddress(lsda_address, null);
- const target_atom_index = object.getAtomIndexForSymbol(sym_index).?;
- markLive(macho_file, target_atom_index, alive);
- }
- },
- else => unreachable,
+fn refersLive(atom: *Atom, macho_file: *MachO) bool {
+ for (atom.getRelocs(macho_file)) |rel| {
+ const target_atom = switch (rel.tag) {
+ .local => rel.getTargetAtom(macho_file),
+ .@"extern" => rel.getTargetSymbol(macho_file).getAtom(macho_file),
+ };
+ if (target_atom) |ta| {
+ if (ta.flags.alive) return true;
}
+ }
+ return false;
+}
- // Mark CIE references which should include any referenced personalities
- // that are defined locally.
- if (cie.getPersonalityPointerReloc(macho_file, object_id, cie_offset)) |reloc_target| {
- const target_sym = macho_file.getSymbol(reloc_target);
- if (!target_sym.undf()) {
- const target_object = macho_file.objects.items[reloc_target.getFile().?];
- const target_atom_index = target_object.getAtomIndexForSymbol(reloc_target.sym_index).?;
- markLive(macho_file, target_atom_index, alive);
+fn prune(objects: []const File.Index, macho_file: *MachO) void {
+ for (objects) |index| {
+ for (macho_file.getFile(index).?.getAtoms()) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ if (atom.flags.alive and !atom.flags.visited) {
+ atom.flags.alive = false;
+ atom.markUnwindRecordsDead(macho_file);
}
}
}
}
-fn prune(macho_file: *MachO, alive: AtomTable) void {
- log.debug("pruning dead atoms", .{});
- for (macho_file.objects.items) |*object| {
- var i: usize = 0;
- while (i < object.atoms.items.len) {
- const atom_index = object.atoms.items[i];
- if (alive.contains(atom_index)) {
- i += 1;
- continue;
- }
-
- const atom = macho_file.getAtom(atom_index);
- const sym_loc = atom.getSymbolWithLoc();
-
- log.debug("prune(ATOM({d}, %{d}, {?d}))", .{
- atom_index,
- sym_loc.sym_index,
- sym_loc.getFile(),
- });
- log.debug(" {s} in {s}", .{ macho_file.getSymbolName(sym_loc), object.name });
-
- const sym = macho_file.getSymbolPtr(sym_loc);
- const sect_id = sym.n_sect - 1;
- var section = macho_file.sections.get(sect_id);
- section.header.size -= atom.size;
-
- if (atom.prev_index) |prev_index| {
- const prev = macho_file.getAtomPtr(prev_index);
- prev.next_index = atom.next_index;
- } else {
- if (atom.next_index) |next_index| {
- section.first_atom_index = next_index;
- }
- }
- if (atom.next_index) |next_index| {
- const next = macho_file.getAtomPtr(next_index);
- next.prev_index = atom.prev_index;
- } else {
- if (atom.prev_index) |prev_index| {
- section.last_atom_index = prev_index;
- } else {
- assert(section.header.size == 0);
- section.first_atom_index = null;
- section.last_atom_index = null;
- }
- }
-
- macho_file.sections.set(sect_id, section);
- _ = object.atoms.swapRemove(i);
-
- sym.n_desc = MachO.N_DEAD;
+const Level = struct {
+ value: usize = 0,
- var inner_sym_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (inner_sym_it.next()) |inner| {
- const inner_sym = macho_file.getSymbolPtr(inner);
- inner_sym.n_desc = MachO.N_DEAD;
- }
+ fn incr(self: *@This()) void {
+ self.value += 1;
+ }
- if (Atom.getSectionAlias(macho_file, atom_index)) |alias| {
- const alias_sym = macho_file.getSymbolPtr(alias);
- alias_sym.n_desc = MachO.N_DEAD;
- }
- }
+ pub fn format(
+ self: *const @This(),
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ try writer.writeByteNTimes(' ', self.value);
}
-}
+};
+
+var track_live_level: Level = .{};
-const std = @import("std");
const assert = std.debug.assert;
-const eh_frame = @import("eh_frame.zig");
+const build_options = @import("build_options");
const log = std.log.scoped(.dead_strip);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
+const trace = @import("../tracy.zig").trace;
+const track_live_log = std.log.scoped(.dead_strip_track_live);
+const std = @import("std");
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
+const File = @import("file.zig").File;
const MachO = @import("../MachO.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
-const UnwindInfo = @import("UnwindInfo.zig");
-
-const AtomTable = std.AutoHashMap(Atom.Index, void);
+const Symbol = @import("Symbol.zig");
src/link/MachO/DwarfInfo.zig
@@ -2,377 +2,175 @@ debug_info: []const u8,
debug_abbrev: []const u8,
debug_str: []const u8,
-pub fn getCompileUnitIterator(self: DwarfInfo) CompileUnitIterator {
- return .{ .ctx = self };
+/// Abbreviation table indexed by offset in the .debug_abbrev bytestream
+abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{},
+/// List of compile units as they appear in the .debug_info bytestream
+compile_units: std.ArrayListUnmanaged(CompileUnit) = .{},
+
+pub fn init(dw: *DwarfInfo, allocator: Allocator) !void {
+ try dw.parseAbbrevTables(allocator);
+ try dw.parseCompileUnits(allocator);
}
-const CompileUnitIterator = struct {
- ctx: DwarfInfo,
- pos: usize = 0,
-
- pub fn next(self: *CompileUnitIterator) !?CompileUnit {
- if (self.pos >= self.ctx.debug_info.len) return null;
-
- var stream = std.io.fixedBufferStream(self.ctx.debug_info[self.pos..]);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
-
- const cuh = try CompileUnit.Header.read(reader);
- const total_length = cuh.length + @as(u64, if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32));
- const offset = math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-
- const cu = CompileUnit{
- .cuh = cuh,
- .debug_info_off = self.pos + offset,
- };
-
- self.pos += (math.cast(usize, total_length) orelse return error.Overflow);
-
- return cu;
+pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void {
+ dw.abbrev_tables.deinit(allocator);
+ for (dw.compile_units.items) |*cu| {
+ cu.deinit(allocator);
}
-};
-
-pub fn genSubprogramLookupByName(
- self: DwarfInfo,
- compile_unit: CompileUnit,
- abbrev_lookup: AbbrevLookupTable,
- lookup: *SubprogramLookupByName,
-) !void {
- var abbrev_it = compile_unit.getAbbrevEntryIterator(self);
- while (try abbrev_it.next(abbrev_lookup)) |entry| switch (entry.tag) {
- dwarf.TAG.subprogram => {
- var attr_it = entry.getAttributeIterator(self, compile_unit.cuh);
-
- var name: ?[]const u8 = null;
- var low_pc: ?u64 = null;
- var high_pc: ?u64 = null;
-
- while (try attr_it.next()) |attr| switch (attr.name) {
- dwarf.AT.name => if (attr.getString(self, compile_unit.cuh)) |str| {
- name = str;
- },
- dwarf.AT.low_pc => {
- if (attr.getAddr(self, compile_unit.cuh)) |addr| {
- low_pc = addr;
- }
- if (try attr.getConstant(self)) |constant| {
- low_pc = @as(u64, @intCast(constant));
- }
- },
- dwarf.AT.high_pc => {
- if (attr.getAddr(self, compile_unit.cuh)) |addr| {
- high_pc = addr;
- }
- if (try attr.getConstant(self)) |constant| {
- high_pc = @as(u64, @intCast(constant));
- }
- },
- else => {},
- };
-
- if (name == null or low_pc == null or high_pc == null) continue;
+ dw.compile_units.deinit(allocator);
+}
- try lookup.putNoClobber(name.?, .{ .addr = low_pc.?, .size = high_pc.? });
- },
- else => {},
- };
+fn getString(dw: DwarfInfo, off: u64) [:0]const u8 {
+ assert(off < dw.debug_str.len);
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0);
}
-pub fn genAbbrevLookupByKind(self: DwarfInfo, off: usize, lookup: *AbbrevLookupTable) !void {
- const data = self.debug_abbrev[off..];
- var stream = std.io.fixedBufferStream(data);
+fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const debug_abbrev = dw.debug_abbrev;
+ var stream = std.io.fixedBufferStream(debug_abbrev);
var creader = std.io.countingReader(stream.reader());
const reader = creader.reader();
while (true) {
- const kind = try leb.readULEB128(u64, reader);
+ if (creader.bytes_read >= debug_abbrev.len) break;
- if (kind == 0) break;
-
- const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow;
- _ = try leb.readULEB128(u64, reader); // TAG
- _ = try reader.readByte(); // CHILDREN
+ try dw.abbrev_tables.ensureUnusedCapacity(allocator, 1);
+ const table_gop = dw.abbrev_tables.getOrPutAssumeCapacity(@intCast(creader.bytes_read));
+ assert(!table_gop.found_existing);
+ const table = table_gop.value_ptr;
+ table.* = .{};
while (true) {
- const name = try leb.readULEB128(u64, reader);
- const form = try leb.readULEB128(u64, reader);
-
- if (name == 0 and form == 0) break;
- }
-
- const next_pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-
- try lookup.putNoClobber(kind, .{
- .pos = pos,
- .len = next_pos - pos - 2,
- });
- }
-}
+ const code = try leb.readULEB128(Code, reader);
+ if (code == 0) break;
+
+ try table.decls.ensureUnusedCapacity(allocator, 1);
+ const decl_gop = table.decls.getOrPutAssumeCapacity(code);
+ assert(!decl_gop.found_existing);
+ const decl = decl_gop.value_ptr;
+ decl.* = .{
+ .code = code,
+ .tag = undefined,
+ .children = false,
+ };
+ decl.tag = try leb.readULEB128(Tag, reader);
+ decl.children = (try reader.readByte()) > 0;
-pub const CompileUnit = struct {
- cuh: Header,
- debug_info_off: usize,
-
- pub const Header = struct {
- is_64bit: bool,
- length: u64,
- version: u16,
- debug_abbrev_offset: u64,
- address_size: u8,
-
- fn read(reader: anytype) !Header {
- var length: u64 = try reader.readInt(u32, .little);
-
- const is_64bit = length == 0xffffffff;
- if (is_64bit) {
- length = try reader.readInt(u64, .little);
+ while (true) {
+ const at = try leb.readULEB128(At, reader);
+ const form = try leb.readULEB128(Form, reader);
+ if (at == 0 and form == 0) break;
+
+ try decl.attrs.ensureUnusedCapacity(allocator, 1);
+ const attr_gop = decl.attrs.getOrPutAssumeCapacity(at);
+ assert(!attr_gop.found_existing);
+ const attr = attr_gop.value_ptr;
+ attr.* = .{
+ .at = at,
+ .form = form,
+ };
}
-
- const version = try reader.readInt(u16, .little);
- const debug_abbrev_offset = if (is_64bit)
- try reader.readInt(u64, .little)
- else
- try reader.readInt(u32, .little);
- const address_size = try reader.readInt(u8, .little);
-
- return Header{
- .is_64bit = is_64bit,
- .length = length,
- .version = version,
- .debug_abbrev_offset = debug_abbrev_offset,
- .address_size = address_size,
- };
}
- };
-
- inline fn getDebugInfo(self: CompileUnit, ctx: DwarfInfo) []const u8 {
- return ctx.debug_info[self.debug_info_off..][0..self.cuh.length];
- }
-
- pub fn getAbbrevEntryIterator(self: CompileUnit, ctx: DwarfInfo) AbbrevEntryIterator {
- return .{ .cu = self, .ctx = ctx };
}
-};
-
-const AbbrevEntryIterator = struct {
- cu: CompileUnit,
- ctx: DwarfInfo,
- pos: usize = 0,
-
- pub fn next(self: *AbbrevEntryIterator, lookup: AbbrevLookupTable) !?AbbrevEntry {
- if (self.pos + self.cu.debug_info_off >= self.ctx.debug_info.len) return null;
-
- const debug_info = self.ctx.debug_info[self.pos + self.cu.debug_info_off ..];
- var stream = std.io.fixedBufferStream(debug_info);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+}
- const kind = try leb.readULEB128(u64, reader);
- self.pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow);
+fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
- if (kind == 0) {
- return AbbrevEntry.null();
- }
+ const debug_info = dw.debug_info;
+ var stream = std.io.fixedBufferStream(debug_info);
+ var creader = std.io.countingReader(stream.reader());
+ const reader = creader.reader();
- const abbrev_pos = lookup.get(kind) orelse return null;
- const len = try findAbbrevEntrySize(
- self.ctx,
- abbrev_pos.pos,
- abbrev_pos.len,
- self.pos + self.cu.debug_info_off,
- self.cu.cuh,
- );
- const entry = try getAbbrevEntry(
- self.ctx,
- abbrev_pos.pos,
- abbrev_pos.len,
- self.pos + self.cu.debug_info_off,
- len,
- );
-
- self.pos += len;
-
- return entry;
- }
-};
+ while (true) {
+ if (creader.bytes_read == debug_info.len) break;
-pub const AbbrevEntry = struct {
- tag: u64,
- children: u8,
- debug_abbrev_off: usize,
- debug_abbrev_len: usize,
- debug_info_off: usize,
- debug_info_len: usize,
-
- fn @"null"() AbbrevEntry {
- return .{
- .tag = 0,
- .children = dwarf.CHILDREN.no,
- .debug_abbrev_off = 0,
- .debug_abbrev_len = 0,
- .debug_info_off = 0,
- .debug_info_len = 0,
+ const cu = try dw.compile_units.addOne(allocator);
+ cu.* = .{
+ .header = undefined,
+ .pos = creader.bytes_read,
};
- }
-
- pub fn hasChildren(self: AbbrevEntry) bool {
- return self.children == dwarf.CHILDREN.yes;
- }
-
- inline fn getDebugInfo(self: AbbrevEntry, ctx: DwarfInfo) []const u8 {
- return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len];
- }
-
- inline fn getDebugAbbrev(self: AbbrevEntry, ctx: DwarfInfo) []const u8 {
- return ctx.debug_abbrev[self.debug_abbrev_off..][0..self.debug_abbrev_len];
- }
-
- pub fn getAttributeIterator(self: AbbrevEntry, ctx: DwarfInfo, cuh: CompileUnit.Header) AttributeIterator {
- return .{ .entry = self, .ctx = ctx, .cuh = cuh };
- }
-};
-
-pub const Attribute = struct {
- name: u64,
- form: u64,
- debug_info_off: usize,
- debug_info_len: usize,
-
- inline fn getDebugInfo(self: Attribute, ctx: DwarfInfo) []const u8 {
- return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len];
- }
-
- pub fn getString(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?[]const u8 {
- const debug_info = self.getDebugInfo(ctx);
- switch (self.form) {
- dwarf.FORM.string => {
- return mem.sliceTo(@as([*:0]const u8, @ptrCast(debug_info.ptr)), 0);
- },
- dwarf.FORM.strp => {
- const off = if (cuh.is_64bit)
- mem.readInt(u64, debug_info[0..8], .little)
- else
- mem.readInt(u32, debug_info[0..4], .little);
- return ctx.getString(off);
- },
- else => return null,
+ var length: u64 = try reader.readInt(u32, .little);
+ const is_64bit = length == 0xffffffff;
+ if (is_64bit) {
+ length = try reader.readInt(u64, .little);
}
+ cu.header.format = if (is_64bit) .dwarf64 else .dwarf32;
+ cu.header.length = length;
+ cu.header.version = try reader.readInt(u16, .little);
+ cu.header.debug_abbrev_offset = try readOffset(cu.header.format, reader);
+ cu.header.address_size = try reader.readInt(u8, .little);
+
+ const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?;
+ try dw.parseDie(allocator, cu, table, null, &creader);
}
+}
- pub fn getConstant(self: Attribute, ctx: DwarfInfo) !?i128 {
- const debug_info = self.getDebugInfo(ctx);
- var stream = std.io.fixedBufferStream(debug_info);
- const reader = stream.reader();
-
- return switch (self.form) {
- dwarf.FORM.data1 => debug_info[0],
- dwarf.FORM.data2 => mem.readInt(u16, debug_info[0..2], .little),
- dwarf.FORM.data4 => mem.readInt(u32, debug_info[0..4], .little),
- dwarf.FORM.data8 => mem.readInt(u64, debug_info[0..8], .little),
- dwarf.FORM.udata => try leb.readULEB128(u64, reader),
- dwarf.FORM.sdata => try leb.readILEB128(i64, reader),
- else => null,
- };
- }
-
- pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 {
- if (self.form != dwarf.FORM.addr) return null;
- const debug_info = self.getDebugInfo(ctx);
- return switch (cuh.address_size) {
- 1 => debug_info[0],
- 2 => mem.readInt(u16, debug_info[0..2], .little),
- 4 => mem.readInt(u32, debug_info[0..4], .little),
- 8 => mem.readInt(u64, debug_info[0..8], .little),
- else => unreachable,
- };
- }
-};
-
-const AttributeIterator = struct {
- entry: AbbrevEntry,
- ctx: DwarfInfo,
- cuh: CompileUnit.Header,
- debug_abbrev_pos: usize = 0,
- debug_info_pos: usize = 0,
+fn parseDie(
+ dw: *DwarfInfo,
+ allocator: Allocator,
+ cu: *CompileUnit,
+ table: AbbrevTable,
+ parent: ?u32,
+ creader: anytype,
+) anyerror!void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ while (creader.bytes_read < cu.nextCompileUnitOffset()) {
+ const die = try cu.addDie(allocator);
+ cu.diePtr(die).* = .{ .code = undefined };
+ if (parent) |p| {
+ try cu.diePtr(p).children.append(allocator, die);
+ } else {
+ try cu.children.append(allocator, die);
+ }
- pub fn next(self: *AttributeIterator) !?Attribute {
- const debug_abbrev = self.entry.getDebugAbbrev(self.ctx);
- if (self.debug_abbrev_pos >= debug_abbrev.len) return null;
+ const code = try leb.readULEB128(Code, creader.reader());
+ cu.diePtr(die).code = code;
- var stream = std.io.fixedBufferStream(debug_abbrev[self.debug_abbrev_pos..]);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+ if (code == 0) {
+ if (parent == null) continue;
+ return; // Close scope
+ }
- const name = try leb.readULEB128(u64, reader);
- const form = try leb.readULEB128(u64, reader);
-
- self.debug_abbrev_pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow);
-
- const len = try findFormSize(
- self.ctx,
- form,
- self.debug_info_pos + self.entry.debug_info_off,
- self.cuh,
- );
- const attr = Attribute{
- .name = name,
- .form = form,
- .debug_info_off = self.debug_info_pos + self.entry.debug_info_off,
- .debug_info_len = len,
- };
+ const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors
+ const data = dw.debug_info;
+ try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len);
- self.debug_info_pos += len;
+ for (decl.attrs.values()) |attr| {
+ const start = creader.bytes_read;
+ try advanceByFormSize(cu, attr.form, creader);
+ const end = creader.bytes_read;
+ cu.diePtr(die).values.appendAssumeCapacity(data[start..end]);
+ }
- return attr;
+ if (decl.children) {
+ // Open scope
+ try dw.parseDie(allocator, cu, table, die, creader);
+ }
}
-};
-
-fn getAbbrevEntry(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, di_len: usize) !AbbrevEntry {
- const debug_abbrev = self.debug_abbrev[da_off..][0..da_len];
- var stream = std.io.fixedBufferStream(debug_abbrev);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
-
- const tag = try leb.readULEB128(u64, reader);
- const children = switch (tag) {
- std.dwarf.TAG.const_type,
- std.dwarf.TAG.packed_type,
- std.dwarf.TAG.pointer_type,
- std.dwarf.TAG.reference_type,
- std.dwarf.TAG.restrict_type,
- std.dwarf.TAG.rvalue_reference_type,
- std.dwarf.TAG.shared_type,
- std.dwarf.TAG.volatile_type,
- => if (creader.bytes_read == da_len) std.dwarf.CHILDREN.no else try reader.readByte(),
- else => try reader.readByte(),
- };
-
- const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-
- return AbbrevEntry{
- .tag = tag,
- .children = children,
- .debug_abbrev_off = pos + da_off,
- .debug_abbrev_len = da_len - pos,
- .debug_info_off = di_off,
- .debug_info_len = di_len,
- };
}
-fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Header) !usize {
- const debug_info = self.debug_info[di_off..];
- var stream = std.io.fixedBufferStream(debug_info);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+fn advanceByFormSize(cu: *CompileUnit, form: Form, creader: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const reader = creader.reader();
switch (form) {
dwarf.FORM.strp,
dwarf.FORM.sec_offset,
dwarf.FORM.ref_addr,
- => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32),
+ => {
+ _ = try readOffset(cu.header.format, reader);
+ },
- dwarf.FORM.addr => return cuh.address_size,
+ dwarf.FORM.addr => try reader.skipBytes(cu.header.address_size, .{}),
dwarf.FORM.block1,
dwarf.FORM.block2,
@@ -386,119 +184,285 @@ fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Head
dwarf.FORM.block => try leb.readULEB128(u64, reader),
else => unreachable,
};
- var i: u64 = 0;
- while (i < len) : (i += 1) {
+ for (0..len) |_| {
_ = try reader.readByte();
}
- return math.cast(usize, creader.bytes_read) orelse error.Overflow;
},
dwarf.FORM.exprloc => {
- const expr_len = try leb.readULEB128(u64, reader);
- var i: u64 = 0;
- while (i < expr_len) : (i += 1) {
+ const len = try leb.readULEB128(u64, reader);
+ for (0..len) |_| {
_ = try reader.readByte();
}
- return math.cast(usize, creader.bytes_read) orelse error.Overflow;
},
- dwarf.FORM.flag_present => return 0,
+ dwarf.FORM.flag_present => {},
dwarf.FORM.data1,
dwarf.FORM.ref1,
dwarf.FORM.flag,
- => return @sizeOf(u8),
+ => try reader.skipBytes(1, .{}),
dwarf.FORM.data2,
dwarf.FORM.ref2,
- => return @sizeOf(u16),
+ => try reader.skipBytes(2, .{}),
dwarf.FORM.data4,
dwarf.FORM.ref4,
- => return @sizeOf(u32),
+ => try reader.skipBytes(4, .{}),
dwarf.FORM.data8,
dwarf.FORM.ref8,
dwarf.FORM.ref_sig8,
- => return @sizeOf(u64),
+ => try reader.skipBytes(8, .{}),
dwarf.FORM.udata,
dwarf.FORM.ref_udata,
=> {
_ = try leb.readULEB128(u64, reader);
- return math.cast(usize, creader.bytes_read) orelse error.Overflow;
},
dwarf.FORM.sdata => {
_ = try leb.readILEB128(i64, reader);
- return math.cast(usize, creader.bytes_read) orelse error.Overflow;
},
dwarf.FORM.string => {
- var count: usize = 0;
while (true) {
const byte = try reader.readByte();
- count += 1;
if (byte == 0x0) break;
}
- return count;
},
else => {
- // TODO figure out how to handle this
- log.debug("unhandled DW_FORM_* value with identifier {x}", .{form});
+ // TODO better errors
+ log.err("unhandled DW_FORM_* value with identifier {x}", .{form});
return error.UnhandledDwFormValue;
},
}
}
-fn findAbbrevEntrySize(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, cuh: CompileUnit.Header) !usize {
- const debug_abbrev = self.debug_abbrev[da_off..][0..da_len];
- var stream = std.io.fixedBufferStream(debug_abbrev);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+fn readOffset(format: Format, reader: anytype) !u64 {
+ return switch (format) {
+ .dwarf32 => try reader.readInt(u32, .little),
+ .dwarf64 => try reader.readInt(u64, .little),
+ };
+}
- const tag = try leb.readULEB128(u64, reader);
- switch (tag) {
- std.dwarf.TAG.const_type,
- std.dwarf.TAG.packed_type,
- std.dwarf.TAG.pointer_type,
- std.dwarf.TAG.reference_type,
- std.dwarf.TAG.restrict_type,
- std.dwarf.TAG.rvalue_reference_type,
- std.dwarf.TAG.shared_type,
- std.dwarf.TAG.volatile_type,
- => if (creader.bytes_read != da_len) {
- _ = try reader.readByte();
- },
- else => _ = try reader.readByte(),
+pub const AbbrevTable = struct {
+ /// Table of abbreviation declarations indexed by their assigned code value
+ decls: std.AutoArrayHashMapUnmanaged(Code, Decl) = .{},
+
+ pub fn deinit(table: *AbbrevTable, gpa: Allocator) void {
+ for (table.decls.values()) |*decl| {
+ decl.deinit(gpa);
+ }
+ table.decls.deinit(gpa);
}
+};
- var len: usize = 0;
- while (creader.bytes_read < debug_abbrev.len) {
- _ = try leb.readULEB128(u64, reader);
- const form = try leb.readULEB128(u64, reader);
- const form_len = try self.findFormSize(form, di_off + len, cuh);
- len += form_len;
+pub const Decl = struct {
+ code: Code,
+ tag: Tag,
+ children: bool,
+
+ /// Table of attributes indexed by their AT value
+ attrs: std.AutoArrayHashMapUnmanaged(At, Attr) = .{},
+
+ pub fn deinit(decl: *Decl, gpa: Allocator) void {
+ decl.attrs.deinit(gpa);
}
+};
- return len;
-}
+pub const Attr = struct {
+ at: At,
+ form: Form,
+};
-fn getString(self: DwarfInfo, off: u64) []const u8 {
- assert(off < self.debug_str.len);
- return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.debug_str.ptr + @as(usize, @intCast(off)))), 0);
-}
+pub const At = u64;
+pub const Code = u64;
+pub const Form = u64;
+pub const Tag = u64;
+
+pub const CompileUnitHeader = struct {
+ format: Format,
+ length: u64,
+ version: u16,
+ debug_abbrev_offset: u64,
+ address_size: u8,
+};
-const DwarfInfo = @This();
+pub const CompileUnit = struct {
+ header: CompileUnitHeader,
+ pos: usize,
+ dies: std.ArrayListUnmanaged(Die) = .{},
+ children: std.ArrayListUnmanaged(Die.Index) = .{},
+
+ pub fn deinit(cu: *CompileUnit, gpa: Allocator) void {
+ for (cu.dies.items) |*die| {
+ die.deinit(gpa);
+ }
+ cu.dies.deinit(gpa);
+ cu.children.deinit(gpa);
+ }
+
+ pub fn addDie(cu: *CompileUnit, gpa: Allocator) !Die.Index {
+ const index = @as(Die.Index, @intCast(cu.dies.items.len));
+ _ = try cu.dies.addOne(gpa);
+ return index;
+ }
+
+ pub fn diePtr(cu: *CompileUnit, index: Die.Index) *Die {
+ return &cu.dies.items[index];
+ }
+
+ pub fn getCompileDir(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 {
+ assert(cu.dies.items.len > 0);
+ const die = cu.dies.items[0];
+ const res = die.find(dwarf.AT.comp_dir, cu, ctx) orelse return null;
+ return res.getString(cu.header.format, ctx);
+ }
+
+ pub fn getSourceFile(cu: CompileUnit, ctx: DwarfInfo) ?[:0]const u8 {
+ assert(cu.dies.items.len > 0);
+ const die = cu.dies.items[0];
+ const res = die.find(dwarf.AT.name, cu, ctx) orelse return null;
+ return res.getString(cu.header.format, ctx);
+ }
+
+ pub fn nextCompileUnitOffset(cu: CompileUnit) u64 {
+ return cu.pos + switch (cu.header.format) {
+ .dwarf32 => @as(u64, 4),
+ .dwarf64 => 12,
+ } + cu.header.length;
+ }
+};
+
+pub const Die = struct {
+ code: Code,
+ values: std.ArrayListUnmanaged([]const u8) = .{},
+ children: std.ArrayListUnmanaged(Die.Index) = .{},
+
+ pub fn deinit(die: *Die, gpa: Allocator) void {
+ die.values.deinit(gpa);
+ die.children.deinit(gpa);
+ }
+
+ pub fn find(die: Die, at: At, cu: CompileUnit, ctx: DwarfInfo) ?DieValue {
+ const table = ctx.abbrev_tables.get(cu.header.debug_abbrev_offset) orelse return null;
+ const decl = table.decls.get(die.code).?;
+ const index = decl.attrs.getIndex(at) orelse return null;
+ const attr = decl.attrs.values()[index];
+ const value = die.values.items[index];
+ return .{ .attr = attr, .bytes = value };
+ }
+
+ pub const Index = u32;
+};
+
+pub const DieValue = struct {
+ attr: Attr,
+ bytes: []const u8,
+
+ pub fn getFlag(value: DieValue) ?bool {
+ return switch (value.attr.form) {
+ dwarf.FORM.flag => value.bytes[0] == 1,
+ dwarf.FORM.flag_present => true,
+ else => null,
+ };
+ }
+
+ pub fn getString(value: DieValue, format: Format, ctx: DwarfInfo) ?[:0]const u8 {
+ switch (value.attr.form) {
+ dwarf.FORM.string => {
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(value.bytes.ptr)), 0);
+ },
+ dwarf.FORM.strp => {
+ const off = switch (format) {
+ .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little),
+ .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little),
+ };
+ return ctx.getString(off);
+ },
+ else => return null,
+ }
+ }
+
+ pub fn getSecOffset(value: DieValue, format: Format) ?u64 {
+ return switch (value.attr.form) {
+ dwarf.FORM.sec_offset => switch (format) {
+ .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little),
+ .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little),
+ },
+ else => null,
+ };
+ }
+
+ pub fn getConstant(value: DieValue) !?i128 {
+ var stream = std.io.fixedBufferStream(value.bytes);
+ const reader = stream.reader();
+ return switch (value.attr.form) {
+ dwarf.FORM.data1 => value.bytes[0],
+ dwarf.FORM.data2 => mem.readInt(u16, value.bytes[0..2], .little),
+ dwarf.FORM.data4 => mem.readInt(u32, value.bytes[0..4], .little),
+ dwarf.FORM.data8 => mem.readInt(u64, value.bytes[0..8], .little),
+ dwarf.FORM.udata => try leb.readULEB128(u64, reader),
+ dwarf.FORM.sdata => try leb.readILEB128(i64, reader),
+ else => null,
+ };
+ }
+
+ pub fn getReference(value: DieValue, format: Format) !?u64 {
+ var stream = std.io.fixedBufferStream(value.bytes);
+ const reader = stream.reader();
+ return switch (value.attr.form) {
+ dwarf.FORM.ref1 => value.bytes[0],
+ dwarf.FORM.ref2 => mem.readInt(u16, value.bytes[0..2], .little),
+ dwarf.FORM.ref4 => mem.readInt(u32, value.bytes[0..4], .little),
+ dwarf.FORM.ref8 => mem.readInt(u64, value.bytes[0..8], .little),
+ dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader),
+ dwarf.FORM.ref_addr => switch (format) {
+ .dwarf32 => mem.readInt(u32, value.bytes[0..4], .little),
+ .dwarf64 => mem.readInt(u64, value.bytes[0..8], .little),
+ },
+ else => null,
+ };
+ }
+
+ pub fn getAddr(value: DieValue, header: CompileUnitHeader) ?u64 {
+ return switch (value.attr.form) {
+ dwarf.FORM.addr => switch (header.address_size) {
+ 1 => value.bytes[0],
+ 2 => mem.readInt(u16, value.bytes[0..2], .little),
+ 4 => mem.readInt(u32, value.bytes[0..4], .little),
+ 8 => mem.readInt(u64, value.bytes[0..8], .little),
+ else => null,
+ },
+ else => null,
+ };
+ }
+
+ pub fn getExprloc(value: DieValue) !?[]const u8 {
+ if (value.attr.form != dwarf.FORM.exprloc) return null;
+ var stream = std.io.fixedBufferStream(value.bytes);
+ var creader = std.io.countingReader(stream.reader());
+ const reader = creader.reader();
+ const expr_len = try leb.readULEB128(u64, reader);
+ return value.bytes[creader.bytes_read..][0..expr_len];
+ }
+};
+
+pub const Format = enum {
+ dwarf32,
+ dwarf64,
+};
-const std = @import("std");
const assert = std.debug.assert;
const dwarf = std.dwarf;
const leb = std.leb;
-const log = std.log.scoped(.macho);
-const math = std.math;
+const log = std.log.scoped(.link);
const mem = std.mem;
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
-pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize });
-pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 });
+const DwarfInfo = @This();
+const MachO = @import("../MachO.zig");
src/link/MachO/Dylib.zig
@@ -1,340 +1,207 @@
path: []const u8,
-id: ?Id = null,
-weak: bool = false,
-/// Header is only set if Dylib is parsed directly from a binary and not a stub file.
-header: ?macho.mach_header_64 = null,
-
-/// Parsed symbol table represented as hash map of symbols'
-/// names. We can and should defer creating *Symbols until
-/// a symbol is referenced by an object file.
-///
-/// The value for each parsed symbol represents whether the
-/// symbol is defined as a weak symbol or strong.
-/// TODO when the referenced symbol is weak, ld64 marks it as
-/// N_REF_TO_WEAK but need to investigate if there's more to it
-/// such as weak binding entry or simply weak. For now, we generate
-/// standard bind or lazy bind.
-symbols: std.StringArrayHashMapUnmanaged(bool) = .{},
-
-pub const Id = struct {
- name: []const u8,
- timestamp: u32,
- current_version: u32,
- compatibility_version: u32,
-
- pub fn default(allocator: Allocator, name: []const u8) !Id {
- return Id{
- .name = try allocator.dupe(u8, name),
- .timestamp = 2,
- .current_version = 0x10000,
- .compatibility_version = 0x10000,
- };
- }
-
- pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id {
- return Id{
- .name = try allocator.dupe(u8, name),
- .timestamp = lc.dylib.timestamp,
- .current_version = lc.dylib.current_version,
- .compatibility_version = lc.dylib.compatibility_version,
- };
- }
-
- pub fn deinit(id: Id, allocator: Allocator) void {
- allocator.free(id.name);
- }
-
- pub const ParseError = fmt.ParseIntError || fmt.BufPrintError;
-
- pub fn parseCurrentVersion(id: *Id, version: anytype) ParseError!void {
- id.current_version = try parseVersion(version);
- }
-
- pub fn parseCompatibilityVersion(id: *Id, version: anytype) ParseError!void {
- id.compatibility_version = try parseVersion(version);
- }
-
- fn parseVersion(version: anytype) ParseError!u32 {
- const string = blk: {
- switch (version) {
- .int => |int| {
- var out: u32 = 0;
- const major = math.cast(u16, int) orelse return error.Overflow;
- out += @as(u32, @intCast(major)) << 16;
- return out;
- },
- .float => |float| {
- var buf: [256]u8 = undefined;
- break :blk try fmt.bufPrint(&buf, "{d:.2}", .{float});
- },
- .string => |string| {
- break :blk string;
- },
- }
- };
-
- var out: u32 = 0;
- var values: [3][]const u8 = undefined;
+data: []const u8,
+index: File.Index,
- var split = mem.splitScalar(u8, string, '.');
- var count: u4 = 0;
- while (split.next()) |value| {
- if (count > 2) {
- log.debug("malformed version field: {s}", .{string});
- return 0x10000;
- }
- values[count] = value;
- count += 1;
- }
+header: ?macho.mach_header_64 = null,
+exports: std.MultiArrayList(Export) = .{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
+id: ?Id = null,
+ordinal: u16 = 0,
- if (count > 2) {
- out += try fmt.parseInt(u8, values[2], 10);
- }
- if (count > 1) {
- out += @as(u32, @intCast(try fmt.parseInt(u8, values[1], 10))) << 8;
- }
- out += @as(u32, @intCast(try fmt.parseInt(u16, values[0], 10))) << 16;
+symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+dependents: std.ArrayListUnmanaged(Id) = .{},
+rpaths: std.StringArrayHashMapUnmanaged(void) = .{},
+umbrella: File.Index = 0,
+platform: ?MachO.Options.Platform = null,
- return out;
- }
-};
+needed: bool,
+weak: bool,
+reexport: bool,
+explicit: bool,
+hoisted: bool = true,
+referenced: bool = false,
-pub fn isDylib(file: std.fs.File, fat_offset: u64) bool {
- const reader = file.reader();
- const hdr = reader.readStruct(macho.mach_header_64) catch return false;
- defer file.seekTo(fat_offset) catch {};
- return hdr.filetype == macho.MH_DYLIB;
-}
+output_symtab_ctx: MachO.SymtabCtx = .{},
pub fn deinit(self: *Dylib, allocator: Allocator) void {
- allocator.free(self.path);
- for (self.symbols.keys()) |key| {
- allocator.free(key);
- }
+ self.exports.deinit(allocator);
+ self.strtab.deinit(allocator);
+ if (self.id) |*id| id.deinit(allocator);
self.symbols.deinit(allocator);
- if (self.id) |*id| {
+ for (self.dependents.items) |*id| {
id.deinit(allocator);
}
+ self.dependents.deinit(allocator);
+ self.rpaths.deinit(allocator);
}
-pub fn parseFromBinary(
- self: *Dylib,
- allocator: Allocator,
- dylib_id: u16,
- dependent_libs: anytype,
- name: []const u8,
- data: []align(@alignOf(u64)) const u8,
-) !void {
- var stream = std.io.fixedBufferStream(data);
+pub fn parse(self: *Dylib, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const gpa = macho_file.base.allocator;
+ var stream = std.io.fixedBufferStream(self.data);
const reader = stream.reader();
- log.debug("parsing shared library '{s}'", .{name});
+ log.debug("parsing dylib from binary", .{});
self.header = try reader.readStruct(macho.mach_header_64);
- const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0;
- var it = LoadCommandIterator{
- .ncmds = self.header.?.ncmds,
- .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ const lc_id = self.getLoadCommand(.ID_DYLIB) orelse {
+ macho_file.base.fatal("{s}: missing LC_ID_DYLIB load command", .{self.path});
+ return error.ParseFailed;
};
- while (it.next()) |cmd| {
- switch (cmd.cmd()) {
- .SYMTAB => {
- const symtab_cmd = cmd.cast(macho.symtab_command).?;
- const symtab = @as(
- [*]const macho.nlist_64,
- // Alignment is guaranteed as a dylib is a final linked image and has to have sections
- // properly aligned in order to be correctly loaded by the loader.
- @ptrCast(@alignCast(&data[symtab_cmd.symoff])),
- )[0..symtab_cmd.nsyms];
- const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize];
-
- for (symtab) |sym| {
- const add_to_symtab = sym.ext() and (sym.sect() or sym.indr());
- if (!add_to_symtab) continue;
-
- const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0);
- try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false);
- }
- },
- .ID_DYLIB => {
- self.id = try Id.fromLoadCommand(
- allocator,
- cmd.cast(macho.dylib_command).?,
- cmd.getDylibPathName(),
- );
- },
- .REEXPORT_DYLIB => {
- if (should_lookup_reexports) {
- // Parse install_name to dependent dylib.
- const id = try Id.fromLoadCommand(
- allocator,
- cmd.cast(macho.dylib_command).?,
- cmd.getDylibPathName(),
- );
- try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id });
- }
- },
- else => {},
- }
- }
-}
+ self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName());
-/// Returns Platform composed from the first encountered build version type load command:
-/// either LC_BUILD_VERSION or LC_VERSION_MIN_*.
-pub fn getPlatform(self: Dylib, data: []align(@alignOf(u64)) const u8) ?Platform {
var it = LoadCommandIterator{
.ncmds = self.header.?.ncmds,
- .buffer = data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
};
- while (it.next()) |cmd| {
- switch (cmd.cmd()) {
- .BUILD_VERSION,
- .VERSION_MIN_MACOSX,
- .VERSION_MIN_IPHONEOS,
- .VERSION_MIN_TVOS,
- .VERSION_MIN_WATCHOS,
- => return Platform.fromLoadCommand(cmd),
- else => {},
- }
- } else return null;
-}
-
-fn addObjCClassSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void {
- const expanded = &[_][]const u8{
- try std.fmt.allocPrint(allocator, "_OBJC_CLASS_$_{s}", .{sym_name}),
- try std.fmt.allocPrint(allocator, "_OBJC_METACLASS_$_{s}", .{sym_name}),
+ while (it.next()) |cmd| switch (cmd.cmd()) {
+ .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) {
+ const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName());
+ try self.dependents.append(gpa, id);
+ },
+ .DYLD_INFO_ONLY => {
+ const dyld_cmd = cmd.cast(macho.dyld_info_command).?;
+ const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size];
+ try self.parseTrie(data, macho_file);
+ },
+ .DYLD_EXPORTS_TRIE => {
+ const ld_cmd = cmd.cast(macho.linkedit_data_command).?;
+ const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize];
+ try self.parseTrie(data, macho_file);
+ },
+ .RPATH => {
+ const path = cmd.getRpathPathName();
+ try self.rpaths.put(gpa, path, {});
+ },
+ else => {},
};
- for (expanded) |sym| {
- if (self.symbols.contains(sym)) continue;
- try self.symbols.putNoClobber(allocator, sym, false);
- }
-}
-
-fn addObjCIVarSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void {
- const expanded = try std.fmt.allocPrint(allocator, "_OBJC_IVAR_$_{s}", .{sym_name});
- if (self.symbols.contains(expanded)) return;
- try self.symbols.putNoClobber(allocator, expanded, false);
+ self.initPlatform();
}
-fn addObjCEhTypeSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void {
- const expanded = try std.fmt.allocPrint(allocator, "_OBJC_EHTYPE_$_{s}", .{sym_name});
- if (self.symbols.contains(expanded)) return;
- try self.symbols.putNoClobber(allocator, expanded, false);
-}
-
-fn addSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void {
- if (self.symbols.contains(sym_name)) return;
- try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), false);
-}
+const TrieIterator = struct {
+ data: []const u8,
+ pos: usize = 0,
-fn addWeakSymbol(self: *Dylib, allocator: Allocator, sym_name: []const u8) !void {
- if (self.symbols.contains(sym_name)) return;
- try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), true);
-}
+ fn getStream(it: *TrieIterator) std.io.FixedBufferStream([]const u8) {
+ return std.io.fixedBufferStream(it.data[it.pos..]);
+ }
-pub const TargetMatcher = struct {
- allocator: Allocator,
- cpu_arch: std.Target.Cpu.Arch,
- os_tag: std.Target.Os.Tag,
- abi: std.Target.Abi,
- target_strings: std.ArrayListUnmanaged([]const u8) = .{},
+ fn readULEB128(it: *TrieIterator) !u64 {
+ var stream = it.getStream();
+ var creader = std.io.countingReader(stream.reader());
+ const reader = creader.reader();
+ const value = try std.leb.readULEB128(u64, reader);
+ it.pos += creader.bytes_read;
+ return value;
+ }
- pub fn init(allocator: Allocator, target: std.Target) !TargetMatcher {
- var self = TargetMatcher{
- .allocator = allocator,
- .cpu_arch = target.cpu.arch,
- .os_tag = target.os.tag,
- .abi = target.abi,
- };
- const apple_string = try toAppleTargetTriple(allocator, self.cpu_arch, self.os_tag, self.abi);
- try self.target_strings.append(allocator, apple_string);
+ fn readString(it: *TrieIterator) ![:0]const u8 {
+ var stream = it.getStream();
+ const reader = stream.reader();
- if (self.abi == .simulator) {
- // For Apple simulator targets, linking gets tricky as we need to link against the simulator
- // hosts dylibs too.
- const host_target = try toAppleTargetTriple(allocator, self.cpu_arch, .macos, .none);
- try self.target_strings.append(allocator, host_target);
+ var count: usize = 0;
+ while (true) : (count += 1) {
+ const byte = try reader.readByte();
+ if (byte == 0) break;
}
- return self;
+ const str = @as([*:0]const u8, @ptrCast(it.data.ptr + it.pos))[0..count :0];
+ it.pos += count + 1;
+ return str;
}
- pub fn deinit(self: *TargetMatcher) void {
- for (self.target_strings.items) |t| {
- self.allocator.free(t);
- }
- self.target_strings.deinit(self.allocator);
+ fn readByte(it: *TrieIterator) !u8 {
+ var stream = it.getStream();
+ const value = try stream.reader().readByte();
+ it.pos += 1;
+ return value;
}
+};
- inline fn fmtCpuArch(cpu_arch: std.Target.Cpu.Arch) []const u8 {
- return switch (cpu_arch) {
- .aarch64 => "arm64",
- .x86_64 => "x86_64",
- else => unreachable,
- };
- }
+pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void {
+ try self.exports.append(allocator, .{
+ .name = try self.insertString(allocator, name),
+ .flags = flags,
+ });
+}
- inline fn fmtAbi(abi: std.Target.Abi) ?[]const u8 {
- return switch (abi) {
- .none => null,
- .simulator => "simulator",
- .macabi => "maccatalyst",
- else => unreachable,
+fn parseTrieNode(
+ self: *Dylib,
+ it: *TrieIterator,
+ allocator: Allocator,
+ arena: Allocator,
+ prefix: []const u8,
+) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const size = try it.readULEB128();
+ if (size > 0) {
+ const flags = try it.readULEB128();
+ const kind = flags & macho.EXPORT_SYMBOL_FLAGS_KIND_MASK;
+ const out_flags = Export.Flags{
+ .abs = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE,
+ .tlv = kind == macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL,
+ .weak = flags & macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION != 0,
};
- }
-
- pub fn toAppleTargetTriple(
- allocator: Allocator,
- cpu_arch: std.Target.Cpu.Arch,
- os_tag: std.Target.Os.Tag,
- abi: std.Target.Abi,
- ) ![]const u8 {
- const cpu_arch_s = fmtCpuArch(cpu_arch);
- const os_tag_s = @tagName(os_tag);
- if (fmtAbi(abi)) |abi_s| {
- return std.fmt.allocPrint(allocator, "{s}-{s}-{s}", .{ cpu_arch_s, os_tag_s, abi_s });
+ if (flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT != 0) {
+ _ = try it.readULEB128(); // dylib ordinal
+ const name = try it.readString();
+ try self.addExport(allocator, if (name.len > 0) name else prefix, out_flags);
+ } else if (flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER != 0) {
+ _ = try it.readULEB128(); // stub offset
+ _ = try it.readULEB128(); // resolver offset
+ try self.addExport(allocator, prefix, out_flags);
+ } else {
+ _ = try it.readULEB128(); // VM offset
+ try self.addExport(allocator, prefix, out_flags);
}
- return std.fmt.allocPrint(allocator, "{s}-{s}", .{ cpu_arch_s, os_tag_s });
}
- fn hasValue(stack: []const []const u8, needle: []const u8) bool {
- for (stack) |v| {
- if (mem.eql(u8, v, needle)) return true;
- }
- return false;
- }
+ const nedges = try it.readByte();
- pub fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool {
- for (self.target_strings.items) |t| {
- if (hasValue(targets, t)) return true;
- }
- return false;
+ for (0..nedges) |_| {
+ const label = try it.readString();
+ const off = try it.readULEB128();
+ const prefix_label = try std.fmt.allocPrint(arena, "{s}{s}", .{ prefix, label });
+ const curr = it.pos;
+ it.pos = off;
+ try self.parseTrieNode(it, allocator, arena, prefix_label);
+ it.pos = curr;
}
+}
- fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool {
- return hasValue(archs, fmtCpuArch(self.cpu_arch));
- }
-};
+fn parseTrie(self: *Dylib, data: []const u8, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ var arena = std.heap.ArenaAllocator.init(gpa);
+ defer arena.deinit();
+
+ var it: TrieIterator = .{ .data = data };
+ try self.parseTrieNode(&it, gpa, arena.allocator(), "");
+}
-pub fn parseFromStub(
+pub fn parseTbd(
self: *Dylib,
- allocator: Allocator,
- target: std.Target,
+ cpu_arch: std.Target.Cpu.Arch,
+ platform: ?MachO.Options.Platform,
lib_stub: LibStub,
- dylib_id: u16,
- dependent_libs: anytype,
- name: []const u8,
+ macho_file: *MachO,
) !void {
- if (lib_stub.inner.len == 0) return error.NotLibStub;
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
- log.debug("parsing shared library from stub '{s}'", .{name});
+ log.debug("parsing dylib from stub", .{});
const umbrella_lib = lib_stub.inner[0];
{
- var id = try Id.default(allocator, umbrella_lib.installName());
+ var id = try Id.default(gpa, umbrella_lib.installName());
if (umbrella_lib.currentVersion()) |version| {
try id.parseCurrentVersion(version);
}
@@ -344,21 +211,21 @@ pub fn parseFromStub(
self.id = id;
}
- var umbrella_libs = std.StringHashMap(void).init(allocator);
+ var umbrella_libs = std.StringHashMap(void).init(gpa);
defer umbrella_libs.deinit();
log.debug(" (install_name '{s}')", .{umbrella_lib.installName()});
- var matcher = try TargetMatcher.init(allocator, target);
+ self.platform = platform orelse .{
+ .platform = .MACOS,
+ .version = .{ .value = 0 },
+ };
+
+ var matcher = try TargetMatcher.init(gpa, cpu_arch, self.platform.?.platform);
defer matcher.deinit();
for (lib_stub.inner, 0..) |elem, stub_index| {
- const targets = try elem.targets(allocator);
- defer {
- for (targets) |t| allocator.free(t);
- allocator.free(targets);
- }
- if (!matcher.matchesTarget(targets)) continue;
+ if (!(try matcher.matchesTargetTbd(elem))) continue;
if (stub_index > 0) {
// TODO I thought that we could switch on presence of `parent-umbrella` map;
@@ -375,43 +242,42 @@ pub fn parseFromStub(
if (exp.symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{});
}
}
if (exp.weak_symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addWeakSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{ .weak = true });
}
}
if (exp.objc_classes) |objc_classes| {
for (objc_classes) |class_name| {
- try self.addObjCClassSymbol(allocator, class_name);
+ try self.addObjCClass(gpa, class_name);
}
}
if (exp.objc_ivars) |objc_ivars| {
for (objc_ivars) |ivar| {
- try self.addObjCIVarSymbol(allocator, ivar);
+ try self.addObjCIVar(gpa, ivar);
}
}
if (exp.objc_eh_types) |objc_eh_types| {
for (objc_eh_types) |eht| {
- try self.addObjCEhTypeSymbol(allocator, eht);
+ try self.addObjCEhType(gpa, eht);
}
}
- // TODO track which libs were already parsed in different steps
if (exp.re_exports) |re_exports| {
for (re_exports) |lib| {
if (umbrella_libs.contains(lib)) continue;
log.debug(" (found re-export '{s}')", .{lib});
- const dep_id = try Id.default(allocator, lib);
- try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id });
+ const dep_id = try Id.default(gpa, lib);
+ try self.dependents.append(gpa, dep_id);
}
}
}
@@ -424,31 +290,31 @@ pub fn parseFromStub(
if (exp.symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{});
}
}
if (exp.weak_symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addWeakSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{ .weak = true });
}
}
if (exp.objc_classes) |classes| {
for (classes) |sym_name| {
- try self.addObjCClassSymbol(allocator, sym_name);
+ try self.addObjCClass(gpa, sym_name);
}
}
if (exp.objc_ivars) |objc_ivars| {
for (objc_ivars) |ivar| {
- try self.addObjCIVarSymbol(allocator, ivar);
+ try self.addObjCIVar(gpa, ivar);
}
}
if (exp.objc_eh_types) |objc_eh_types| {
for (objc_eh_types) |eht| {
- try self.addObjCEhTypeSymbol(allocator, eht);
+ try self.addObjCEhType(gpa, eht);
}
}
}
@@ -460,31 +326,31 @@ pub fn parseFromStub(
if (reexp.symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{});
}
}
if (reexp.weak_symbols) |symbols| {
for (symbols) |sym_name| {
- try self.addWeakSymbol(allocator, sym_name);
+ try self.addExport(gpa, sym_name, .{ .weak = true });
}
}
if (reexp.objc_classes) |classes| {
for (classes) |sym_name| {
- try self.addObjCClassSymbol(allocator, sym_name);
+ try self.addObjCClass(gpa, sym_name);
}
}
if (reexp.objc_ivars) |objc_ivars| {
for (objc_ivars) |ivar| {
- try self.addObjCIVarSymbol(allocator, ivar);
+ try self.addObjCIVar(gpa, ivar);
}
}
if (reexp.objc_eh_types) |objc_eh_types| {
for (objc_eh_types) |eht| {
- try self.addObjCEhTypeSymbol(allocator, eht);
+ try self.addObjCEhType(gpa, eht);
}
}
}
@@ -492,19 +358,19 @@ pub fn parseFromStub(
if (stub.objc_classes) |classes| {
for (classes) |sym_name| {
- try self.addObjCClassSymbol(allocator, sym_name);
+ try self.addObjCClass(gpa, sym_name);
}
}
if (stub.objc_ivars) |objc_ivars| {
for (objc_ivars) |ivar| {
- try self.addObjCIVarSymbol(allocator, ivar);
+ try self.addObjCIVar(gpa, ivar);
}
}
if (stub.objc_eh_types) |objc_eh_types| {
for (objc_eh_types) |eht| {
- try self.addObjCEhTypeSymbol(allocator, eht);
+ try self.addObjCEhType(gpa, eht);
}
}
},
@@ -514,10 +380,9 @@ pub fn parseFromStub(
// For V4, we add dependent libs in a separate pass since some stubs such as libSystem include
// re-exports directly in the stub file.
for (lib_stub.inner) |elem| {
- if (elem == .v3) break;
+ if (elem == .v3) continue;
const stub = elem.v4;
- // TODO track which libs were already parsed in different steps
if (stub.reexported_libraries) |reexports| {
for (reexports) |reexp| {
if (!matcher.matchesTarget(reexp.targets)) continue;
@@ -527,30 +392,437 @@ pub fn parseFromStub(
log.debug(" (found re-export '{s}')", .{lib});
- const dep_id = try Id.default(allocator, lib);
- try dependent_libs.writeItem(.{ .id = dep_id, .parent = dylib_id });
+ const dep_id = try Id.default(gpa, lib);
+ try self.dependents.append(gpa, dep_id);
}
}
}
}
}
-const Dylib = @This();
+fn addObjCClass(self: *Dylib, allocator: Allocator, name: []const u8) !void {
+ try self.addObjCExport(allocator, "_OBJC_CLASS_", name);
+ try self.addObjCExport(allocator, "_OBJC_METACLASS_", name);
+}
+
+fn addObjCIVar(self: *Dylib, allocator: Allocator, name: []const u8) !void {
+ try self.addObjCExport(allocator, "_OBJC_IVAR_", name);
+}
+
+fn addObjCEhType(self: *Dylib, allocator: Allocator, name: []const u8) !void {
+ try self.addObjCExport(allocator, "_OBJC_EHTYPE_", name);
+}
+
+fn addObjCExport(
+ self: *Dylib,
+ allocator: Allocator,
+ comptime prefix: []const u8,
+ name: []const u8,
+) !void {
+ const full_name = try std.fmt.allocPrint(allocator, prefix ++ "$_{s}", .{name});
+ defer allocator.free(full_name);
+ try self.addExport(allocator, full_name, .{});
+}
+
+pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+
+ try self.symbols.ensureTotalCapacityPrecise(gpa, self.exports.items(.name).len);
+
+ for (self.exports.items(.name)) |noff| {
+ const name = self.getString(noff);
+ const off = try macho_file.string_intern.insert(gpa, name);
+ const gop = try macho_file.getOrCreateGlobal(off);
+ self.symbols.addOneAssumeCapacity().* = gop.index;
+ }
+}
+
+fn initPlatform(self: *Dylib) void {
+ var it = LoadCommandIterator{
+ .ncmds = self.header.?.ncmds,
+ .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ };
+ self.platform = while (it.next()) |cmd| {
+ switch (cmd.cmd()) {
+ .BUILD_VERSION,
+ .VERSION_MIN_MACOSX,
+ .VERSION_MIN_IPHONEOS,
+ .VERSION_MIN_TVOS,
+ .VERSION_MIN_WATCHOS,
+ => break MachO.Options.Platform.fromLoadCommand(cmd),
+ else => {},
+ }
+ } else null;
+}
+
+pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ if (!self.explicit and !self.hoisted) return;
+
+ for (self.symbols.items, self.exports.items(.flags)) |index, flags| {
+ const global = macho_file.getSymbol(index);
+ if (self.asFile().getSymbolRank(.{
+ .weak = flags.weak,
+ }) < global.getSymbolRank(macho_file)) {
+ global.value = 0;
+ global.atom = 0;
+ global.nlist_idx = 0;
+ global.file = self.index;
+ global.flags.weak = flags.weak;
+ global.flags.weak_ref = false;
+ global.flags.tlv = flags.tlv;
+ global.flags.dyn_ref = false;
+ global.flags.tentative = false;
+ global.visibility = .global;
+ }
+ }
+}
+
+pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void {
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const name = sym.name;
+ sym.* = .{};
+ sym.name = name;
+ }
+}
+
+pub fn isAlive(self: Dylib, macho_file: *MachO) bool {
+ if (!macho_file.options.dead_strip_dylibs) return self.explicit or self.referenced or self.needed;
+ return self.referenced or self.needed;
+}
+
+pub fn markReferenced(self: *Dylib, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items) |global_index| {
+ const global = macho_file.getSymbol(global_index);
+ const file_ptr = global.getFile(macho_file) orelse continue;
+ if (file_ptr.getIndex() != self.index) continue;
+ if (global.isLocal()) continue;
+ self.referenced = true;
+ break;
+ }
+}
+
+pub fn calcSymtabSize(self: *Dylib, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items) |global_index| {
+ const global = macho_file.getSymbol(global_index);
+ const file_ptr = global.getFile(macho_file) orelse continue;
+ if (file_ptr.getIndex() != self.index) continue;
+ if (global.isLocal()) continue;
+ assert(global.flags.import);
+ global.flags.output_symtab = true;
+ try global.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file);
+ self.output_symtab_ctx.nimports += 1;
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(global.getName(macho_file).len + 1));
+ }
+}
+
+pub fn writeSymtab(self: Dylib, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items) |global_index| {
+ const global = macho_file.getSymbol(global_index);
+ const file = global.getFile(macho_file) orelse continue;
+ if (file.getIndex() != self.index) continue;
+ const idx = global.getOutputSymtabIndex(macho_file) orelse continue;
+ const n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(global.getName(macho_file));
+ macho_file.strtab.appendAssumeCapacity(0);
+ const out_sym = &macho_file.symtab.items[idx];
+ out_sym.n_strx = n_strx;
+ global.setOutputSym(macho_file, out_sym);
+ }
+}
+
+pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib {
+ return macho_file.getFile(self.umbrella).?.dylib;
+}
+
+fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand {
+ var it = LoadCommandIterator{
+ .ncmds = self.header.?.ncmds,
+ .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ };
+ while (it.next()) |cmd| {
+ if (cmd.cmd() == lc) return cmd;
+ } else return null;
+}
+
+fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 {
+ const off = @as(u32, @intCast(self.strtab.items.len));
+ try self.strtab.writer(allocator).print("{s}\x00", .{name});
+ return off;
+}
+
+pub inline fn getString(self: Dylib, off: u32) [:0]const u8 {
+ assert(off < self.strtab.items.len);
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
+}
+
+pub fn asFile(self: *Dylib) File {
+ return .{ .dylib = self };
+}
+
+pub fn format(
+ self: *Dylib,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = self;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format dylib directly");
+}
+
+pub fn fmtSymtab(self: *Dylib, macho_file: *MachO) std.fmt.Formatter(formatSymtab) {
+ return .{ .data = .{
+ .dylib = self,
+ .macho_file = macho_file,
+ } };
+}
+
+const FormatContext = struct {
+ dylib: *Dylib,
+ macho_file: *MachO,
+};
+
+fn formatSymtab(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const dylib = ctx.dylib;
+ try writer.writeAll(" globals\n");
+ for (dylib.symbols.items) |index| {
+ const global = ctx.macho_file.getSymbol(index);
+ try writer.print(" {}\n", .{global.fmt(ctx.macho_file)});
+ }
+}
+
+pub const TargetMatcher = struct {
+ allocator: Allocator,
+ cpu_arch: std.Target.Cpu.Arch,
+ platform: macho.PLATFORM,
+ target_strings: std.ArrayListUnmanaged([]const u8) = .{},
+
+ pub fn init(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) !TargetMatcher {
+ var self = TargetMatcher{
+ .allocator = allocator,
+ .cpu_arch = cpu_arch,
+ .platform = platform,
+ };
+ const apple_string = try targetToAppleString(allocator, cpu_arch, platform);
+ try self.target_strings.append(allocator, apple_string);
+
+ switch (platform) {
+ .IOSSIMULATOR, .TVOSSIMULATOR, .WATCHOSSIMULATOR => {
+ // For Apple simulator targets, linking gets tricky as we need to link against the simulator
+ // hosts dylibs too.
+ const host_target = try targetToAppleString(allocator, cpu_arch, .MACOS);
+ try self.target_strings.append(allocator, host_target);
+ },
+ else => {},
+ }
+
+ return self;
+ }
+
+ pub fn deinit(self: *TargetMatcher) void {
+ for (self.target_strings.items) |t| {
+ self.allocator.free(t);
+ }
+ self.target_strings.deinit(self.allocator);
+ }
+
+ inline fn cpuArchToAppleString(cpu_arch: std.Target.Cpu.Arch) []const u8 {
+ return switch (cpu_arch) {
+ .aarch64 => "arm64",
+ .x86_64 => "x86_64",
+ else => unreachable,
+ };
+ }
+
+ pub fn targetToAppleString(allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, platform: macho.PLATFORM) ![]const u8 {
+ const arch = cpuArchToAppleString(cpu_arch);
+ const plat = switch (platform) {
+ .MACOS => "macos",
+ .IOS => "ios",
+ .TVOS => "tvos",
+ .WATCHOS => "watchos",
+ .IOSSIMULATOR => "ios-simulator",
+ .TVOSSIMULATOR => "tvos-simulator",
+ .WATCHOSSIMULATOR => "watchos-simulator",
+ .BRIDGEOS => "bridgeos",
+ .MACCATALYST => "maccatalyst",
+ .DRIVERKIT => "driverkit",
+ else => unreachable,
+ };
+ return std.fmt.allocPrint(allocator, "{s}-{s}", .{ arch, plat });
+ }
+
+ fn hasValue(stack: []const []const u8, needle: []const u8) bool {
+ for (stack) |v| {
+ if (mem.eql(u8, v, needle)) return true;
+ }
+ return false;
+ }
+
+ fn matchesArch(self: TargetMatcher, archs: []const []const u8) bool {
+ return hasValue(archs, cpuArchToAppleString(self.cpu_arch));
+ }
+
+ fn matchesTarget(self: TargetMatcher, targets: []const []const u8) bool {
+ for (self.target_strings.items) |t| {
+ if (hasValue(targets, t)) return true;
+ }
+ return false;
+ }
+
+ pub fn matchesTargetTbd(self: TargetMatcher, tbd: Tbd) !bool {
+ var arena = std.heap.ArenaAllocator.init(self.allocator);
+ defer arena.deinit();
+
+ const targets = switch (tbd) {
+ .v3 => |v3| blk: {
+ var targets = std.ArrayList([]const u8).init(arena.allocator());
+ for (v3.archs) |arch| {
+ const target = try std.fmt.allocPrint(arena.allocator(), "{s}-{s}", .{ arch, v3.platform });
+ try targets.append(target);
+ }
+ break :blk targets.items;
+ },
+ .v4 => |v4| v4.targets,
+ };
+
+ return self.matchesTarget(targets);
+ }
+};
+
+pub const Id = struct {
+ name: []const u8,
+ timestamp: u32,
+ current_version: u32,
+ compatibility_version: u32,
+
+ pub fn default(allocator: Allocator, name: []const u8) !Id {
+ return Id{
+ .name = try allocator.dupe(u8, name),
+ .timestamp = 2,
+ .current_version = 0x10000,
+ .compatibility_version = 0x10000,
+ };
+ }
+
+ pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id {
+ return Id{
+ .name = try allocator.dupe(u8, name),
+ .timestamp = lc.dylib.timestamp,
+ .current_version = lc.dylib.current_version,
+ .compatibility_version = lc.dylib.compatibility_version,
+ };
+ }
+
+ pub fn deinit(id: Id, allocator: Allocator) void {
+ allocator.free(id.name);
+ }
+
+ pub const ParseError = fmt.ParseIntError || fmt.BufPrintError;
+
+ pub fn parseCurrentVersion(id: *Id, version: anytype) ParseError!void {
+ id.current_version = try parseVersion(version);
+ }
+
+ pub fn parseCompatibilityVersion(id: *Id, version: anytype) ParseError!void {
+ id.compatibility_version = try parseVersion(version);
+ }
+
+ fn parseVersion(version: anytype) ParseError!u32 {
+ const string = blk: {
+ switch (version) {
+ .int => |int| {
+ var out: u32 = 0;
+ const major = math.cast(u16, int) orelse return error.Overflow;
+ out += @as(u32, @intCast(major)) << 16;
+ return out;
+ },
+ .float => |float| {
+ var buf: [256]u8 = undefined;
+ break :blk try fmt.bufPrint(&buf, "{d:.2}", .{float});
+ },
+ .string => |string| {
+ break :blk string;
+ },
+ }
+ };
+
+ var out: u32 = 0;
+ var values: [3][]const u8 = undefined;
+
+ var split = mem.split(u8, string, ".");
+ var count: u4 = 0;
+ while (split.next()) |value| {
+ if (count > 2) {
+ log.debug("malformed version field: {s}", .{string});
+ return 0x10000;
+ }
+ values[count] = value;
+ count += 1;
+ }
+
+ if (count > 2) {
+ out += try fmt.parseInt(u8, values[2], 10);
+ }
+ if (count > 1) {
+ out += @as(u32, @intCast(try fmt.parseInt(u8, values[1], 10))) << 8;
+ }
+ out += @as(u32, @intCast(try fmt.parseInt(u16, values[0], 10))) << 16;
+
+ return out;
+ }
+};
+
+const Export = struct {
+ name: u32,
+ flags: Flags,
+
+ const Flags = packed struct {
+ abs: bool = false,
+ weak: bool = false,
+ tlv: bool = false,
+ };
+};
-const std = @import("std");
const assert = std.debug.assert;
+const fat = @import("fat.zig");
const fs = std.fs;
const fmt = std.fmt;
const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-const fat = @import("fat.zig");
const tapi = @import("../tapi.zig");
+const trace = @import("../tracy.zig").trace;
+const std = @import("std");
const Allocator = mem.Allocator;
+const Dylib = @This();
+const File = @import("file.zig").File;
const LibStub = tapi.LibStub;
const LoadCommandIterator = macho.LoadCommandIterator;
const MachO = @import("../MachO.zig");
-const Platform = @import("load_commands.zig").Platform;
+const Symbol = @import("Symbol.zig");
const Tbd = tapi.Tbd;
src/link/MachO/eh_frame.zig
@@ -1,628 +1,538 @@
-pub fn scanRelocs(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- for (macho_file.objects.items, 0..) |*object, object_id| {
- var cies = std.AutoHashMap(u32, void).init(gpa);
- defer cies.deinit();
-
- var it = object.getEhFrameRecordsIterator();
-
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (inner_syms_it.next()) |sym| {
- const fde_offset = object.eh_frame_records_lookup.get(sym) orelse continue;
- if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue;
- it.seekTo(fde_offset);
- const fde = (it.next() catch continue).?; // We don't care about this error since we already handled it
-
- const cie_ptr = fde.getCiePointerSource(@intCast(object_id), macho_file, fde_offset);
- const cie_offset = fde_offset + 4 - cie_ptr;
-
- if (!cies.contains(cie_offset)) {
- try cies.putNoClobber(cie_offset, {});
- it.seekTo(cie_offset);
- const cie = (it.next() catch continue).?; // We don't care about this error since we already handled it
- try cie.scanRelocs(macho_file, @as(u32, @intCast(object_id)), cie_offset);
+pub const Cie = struct {
+ /// Includes 4byte size cell.
+ offset: u32,
+ out_offset: u32 = 0,
+ size: u32,
+ lsda_size: ?enum { p32, p64 } = null,
+ personality: ?Personality = null,
+ file: File.Index = 0,
+ alive: bool = false,
+
+ pub fn parse(cie: *Cie, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const data = cie.getData(macho_file);
+ const aug = std.mem.sliceTo(@as([*:0]const u8, @ptrCast(data.ptr + 9)), 0);
+
+ if (aug[0] != 'z') return; // TODO should we error out?
+
+ var stream = std.io.fixedBufferStream(data[9 + aug.len + 1 ..]);
+ var creader = std.io.countingReader(stream.reader());
+ const reader = creader.reader();
+
+ _ = try leb.readULEB128(u64, reader); // code alignment factor
+ _ = try leb.readULEB128(u64, reader); // data alignment factor
+ _ = try leb.readULEB128(u64, reader); // return address register
+ _ = try leb.readULEB128(u64, reader); // augmentation data length
+
+ for (aug[1..]) |ch| switch (ch) {
+ 'R' => {
+ const enc = try reader.readByte();
+ if (enc & 0xf != EH_PE.absptr or enc & EH_PE.pcrel == 0) {
+ @panic("unexpected pointer encoding"); // TODO error
}
- }
- }
- }
-}
-
-pub fn calcSectionSize(macho_file: *MachO, unwind_info: *const UnwindInfo) error{OutOfMemory}!void {
- const sect_id = macho_file.eh_frame_section_index orelse return;
- const sect = &macho_file.sections.items(.header)[sect_id];
- sect.@"align" = 3;
- sect.size = 0;
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- var size: u32 = 0;
-
- for (macho_file.objects.items, 0..) |*object, object_id| {
- var cies = std.AutoHashMap(u32, u32).init(gpa);
- defer cies.deinit();
-
- var eh_it = object.getEhFrameRecordsIterator();
-
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (inner_syms_it.next()) |sym| {
- const fde_record_offset = object.eh_frame_records_lookup.get(sym) orelse continue;
- if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue;
-
- const record_id = unwind_info.records_lookup.get(sym) orelse continue;
- const record = unwind_info.records.items[record_id];
-
- // TODO skip this check if no __compact_unwind is present
- const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch);
- if (!is_dwarf) continue;
-
- eh_it.seekTo(fde_record_offset);
- const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error
-
- const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset);
- const cie_offset = fde_record_offset + 4 - cie_ptr;
-
- const gop = try cies.getOrPut(cie_offset);
- if (!gop.found_existing) {
- eh_it.seekTo(cie_offset);
- const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error
- gop.value_ptr.* = size;
- size += source_cie_record.getSize();
+ },
+ 'P' => {
+ const enc = try reader.readByte();
+ if (enc != EH_PE.pcrel | EH_PE.indirect | EH_PE.sdata4) {
+ @panic("unexpected personality pointer encoding"); // TODO error
}
+ _ = try reader.readInt(u32, .little); // personality pointer
+ },
+ 'L' => {
+ const enc = try reader.readByte();
+ switch (enc & 0xf) {
+ EH_PE.sdata4 => cie.lsda_size = .p32,
+ EH_PE.absptr => cie.lsda_size = .p64,
+ else => unreachable, // TODO error
+ }
+ },
+ else => @panic("unexpected augmentation string"), // TODO error
+ };
+ }
- size += source_fde_record.getSize();
- }
- }
+ pub inline fn getSize(cie: Cie) u32 {
+ return cie.size + 4;
+ }
- sect.size = size;
+ pub fn getObject(cie: Cie, macho_file: *MachO) *Object {
+ const file = macho_file.getFile(cie.file).?;
+ return file.object;
}
-}
-pub fn write(macho_file: *MachO, unwind_info: *UnwindInfo) !void {
- const sect_id = macho_file.eh_frame_section_index orelse return;
- const sect = macho_file.sections.items(.header)[sect_id];
- const seg_id = macho_file.sections.items(.segment_index)[sect_id];
- const seg = macho_file.segments.items[seg_id];
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- var eh_records = std.AutoArrayHashMap(u32, EhFrameRecord(true)).init(gpa);
- defer {
- for (eh_records.values()) |*rec| {
- rec.deinit(gpa);
- }
- eh_records.deinit();
+ pub fn getData(cie: Cie, macho_file: *MachO) []const u8 {
+ const object = cie.getObject(macho_file);
+ return object.eh_frame_data.items[cie.offset..][0..cie.getSize()];
}
- var eh_frame_offset: u32 = 0;
+ pub fn getPersonality(cie: Cie, macho_file: *MachO) ?*Symbol {
+ const personality = cie.personality orelse return null;
+ return macho_file.getSymbol(personality.index);
+ }
- for (macho_file.objects.items, 0..) |*object, object_id| {
- try eh_records.ensureUnusedCapacity(2 * @as(u32, @intCast(object.exec_atoms.items.len)));
+ pub fn eql(cie: Cie, other: Cie, macho_file: *MachO) bool {
+ if (!std.mem.eql(u8, cie.getData(macho_file), other.getData(macho_file))) return false;
+ if (cie.personality != null and other.personality != null) {
+ if (cie.personality.?.index != other.personality.?.index) return false;
+ }
+ if (cie.personality != null or other.personality != null) return false;
+ return true;
+ }
- var cies = std.AutoHashMap(u32, u32).init(gpa);
- defer cies.deinit();
+ pub fn format(
+ cie: Cie,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = cie;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format CIEs directly");
+ }
- var eh_it = object.getEhFrameRecordsIterator();
+ pub fn fmt(cie: Cie, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .cie = cie,
+ .macho_file = macho_file,
+ } };
+ }
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (inner_syms_it.next()) |reloc_target| {
- const fde_record_offset = object.eh_frame_records_lookup.get(reloc_target) orelse continue;
- if (object.eh_frame_relocs_lookup.get(fde_record_offset).?.dead) continue;
+ const FormatContext = struct {
+ cie: Cie,
+ macho_file: *MachO,
+ };
- const record_id = unwind_info.records_lookup.get(reloc_target) orelse continue;
- const record = &unwind_info.records.items[record_id];
+ fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const cie = ctx.cie;
+ try writer.print("@{x} : size({x})", .{
+ cie.offset,
+ cie.getSize(),
+ });
+ if (!cie.alive) try writer.writeAll(" : [*]");
+ }
- // TODO skip this check if no __compact_unwind is present
- const is_dwarf = UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch);
- if (!is_dwarf) continue;
+ pub const Index = u32;
- eh_it.seekTo(fde_record_offset);
- const source_fde_record = (eh_it.next() catch continue).?; // We already handled this error
+ pub const Personality = struct {
+ index: Symbol.Index = 0,
+ offset: u32 = 0,
+ };
+};
- const cie_ptr = source_fde_record.getCiePointerSource(@intCast(object_id), macho_file, fde_record_offset);
- const cie_offset = fde_record_offset + 4 - cie_ptr;
+pub const Fde = struct {
+ /// Includes 4byte size cell.
+ offset: u32,
+ out_offset: u32 = 0,
+ size: u32,
+ cie: Cie.Index,
+ atom: Atom.Index = 0,
+ atom_offset: u32 = 0,
+ lsda: Atom.Index = 0,
+ lsda_offset: u32 = 0,
+ lsda_ptr_offset: u32 = 0,
+ file: File.Index = 0,
+ alive: bool = true,
+
+ pub fn parse(fde: *Fde, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const data = fde.getData(macho_file);
+ const object = fde.getObject(macho_file);
+ const sect = object.sections.items(.header)[object.eh_frame_sect_index.?];
+
+ // Parse target atom index
+ const pc_begin = std.mem.readInt(i64, data[8..][0..8], .little);
+ const taddr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + 8)) + pc_begin);
+ fde.atom = object.findAtom(taddr) orelse {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid function reference in FDE", .{
+ object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + 8,
+ });
+ return error.ParseFailed;
+ };
+ const atom = fde.getAtom(macho_file);
+ fde.atom_offset = @intCast(taddr - atom.getInputAddress(macho_file));
+
+ // Associate with a CIE
+ const cie_ptr = std.mem.readInt(u32, data[4..8], .little);
+ const cie_offset = fde.offset + 4 - cie_ptr;
+ const cie_index = for (object.cies.items, 0..) |cie, cie_index| {
+ if (cie.offset == cie_offset) break @as(Cie.Index, @intCast(cie_index));
+ } else null;
+ if (cie_index) |cie| {
+ fde.cie = cie;
+ } else {
+ macho_file.base.fatal("{}: no matching CIE found for FDE at offset {x}", .{
+ object.fmtPath(),
+ fde.offset,
+ });
+ return error.ParseFailed;
+ }
- const gop = try cies.getOrPut(cie_offset);
- if (!gop.found_existing) {
- eh_it.seekTo(cie_offset);
- const source_cie_record = (eh_it.next() catch continue).?; // We already handled this error
- var cie_record = try source_cie_record.toOwned(gpa);
- try cie_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{
- .source_offset = cie_offset,
- .out_offset = eh_frame_offset,
- .sect_addr = sect.addr,
- });
- eh_records.putAssumeCapacityNoClobber(eh_frame_offset, cie_record);
- gop.value_ptr.* = eh_frame_offset;
- eh_frame_offset += cie_record.getSize();
- }
+ const cie = fde.getCie(macho_file);
- var fde_record = try source_fde_record.toOwned(gpa);
- try fde_record.relocate(macho_file, @as(u32, @intCast(object_id)), .{
- .source_offset = fde_record_offset,
- .out_offset = eh_frame_offset,
- .sect_addr = sect.addr,
+ // Parse LSDA atom index if any
+ if (cie.lsda_size) |lsda_size| {
+ var stream = std.io.fixedBufferStream(data[24..]);
+ var creader = std.io.countingReader(stream.reader());
+ const reader = creader.reader();
+ _ = try leb.readULEB128(u64, reader); // augmentation length
+ fde.lsda_ptr_offset = @intCast(creader.bytes_read + 24);
+ const lsda_ptr = switch (lsda_size) {
+ .p32 => try reader.readInt(i32, .little),
+ .p64 => try reader.readInt(i64, .little),
+ };
+ const lsda_addr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + fde.lsda_ptr_offset)) + lsda_ptr);
+ fde.lsda = object.findAtom(lsda_addr) orelse {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid LSDA reference in FDE", .{
+ object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset,
});
- fde_record.setCiePointer(eh_frame_offset + 4 - gop.value_ptr.*);
-
- switch (cpu_arch) {
- .aarch64 => {}, // relocs take care of LSDA pointers
- .x86_64 => {
- // We need to relocate target symbol address ourselves.
- const atom_sym = macho_file.getSymbol(reloc_target);
- try fde_record.setTargetSymbolAddress(atom_sym.n_value, .{
- .base_addr = sect.addr,
- .base_offset = eh_frame_offset,
- });
-
- // We need to parse LSDA pointer and relocate ourselves.
- const cie_record = eh_records.get(
- eh_frame_offset + 4 - fde_record.getCiePointer(),
- ).?;
- const eh_frame_sect = object.getSourceSection(object.eh_frame_sect_id.?);
- const source_lsda_ptr = fde_record.getLsdaPointer(cie_record, .{
- .base_addr = eh_frame_sect.addr,
- .base_offset = fde_record_offset,
- }) catch continue; // We already handled this error
- if (source_lsda_ptr) |ptr| {
- const sym_index = object.getSymbolByAddress(ptr, null);
- const sym = object.symtab[sym_index];
- fde_record.setLsdaPointer(cie_record, sym.n_value, .{
- .base_addr = sect.addr,
- .base_offset = eh_frame_offset,
- }) catch continue; // We already handled this error
- }
- },
- else => unreachable,
- }
-
- eh_records.putAssumeCapacityNoClobber(eh_frame_offset, fde_record);
-
- UnwindInfo.UnwindEncoding.setDwarfSectionOffset(
- &record.compactUnwindEncoding,
- cpu_arch,
- @as(u24, @intCast(eh_frame_offset)),
- );
-
- const cie_record = eh_records.get(
- eh_frame_offset + 4 - fde_record.getCiePointer(),
- ).?;
- const lsda_ptr = fde_record.getLsdaPointer(cie_record, .{
- .base_addr = sect.addr,
- .base_offset = eh_frame_offset,
- }) catch continue; // We already handled this error
- if (lsda_ptr) |ptr| {
- record.lsda = ptr - seg.vmaddr;
- }
-
- eh_frame_offset += fde_record.getSize();
- }
+ return error.ParseFailed;
+ };
+ const lsda_atom = fde.getLsdaAtom(macho_file).?;
+ fde.lsda_offset = @intCast(lsda_addr - lsda_atom.getInputAddress(macho_file));
}
}
- var buffer = std.ArrayList(u8).init(gpa);
- defer buffer.deinit();
- const writer = buffer.writer();
+ pub inline fn getSize(fde: Fde) u32 {
+ return fde.size + 4;
+ }
- for (eh_records.values()) |record| {
- try writer.writeInt(u32, record.size, .little);
- try buffer.appendSlice(record.data);
+ pub fn getObject(fde: Fde, macho_file: *MachO) *Object {
+ const file = macho_file.getFile(fde.file).?;
+ return file.object;
}
- try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset);
-}
-const EhFrameRecordTag = enum { cie, fde };
+ pub fn getData(fde: Fde, macho_file: *MachO) []const u8 {
+ const object = fde.getObject(macho_file);
+ return object.eh_frame_data.items[fde.offset..][0..fde.getSize()];
+ }
-pub fn EhFrameRecord(comptime is_mutable: bool) type {
- return struct {
- tag: EhFrameRecordTag,
- size: u32,
- data: if (is_mutable) []u8 else []const u8,
+ pub fn getCie(fde: Fde, macho_file: *MachO) *const Cie {
+ const object = fde.getObject(macho_file);
+ return &object.cies.items[fde.cie];
+ }
- const Record = @This();
+ pub fn getAtom(fde: Fde, macho_file: *MachO) *Atom {
+ return macho_file.getAtom(fde.atom).?;
+ }
- pub fn deinit(rec: *Record, gpa: Allocator) void {
- comptime assert(is_mutable);
- gpa.free(rec.data);
- }
+ pub fn getLsdaAtom(fde: Fde, macho_file: *MachO) ?*Atom {
+ return macho_file.getAtom(fde.lsda);
+ }
- pub fn toOwned(rec: Record, gpa: Allocator) Allocator.Error!EhFrameRecord(true) {
- const data = try gpa.dupe(u8, rec.data);
- return EhFrameRecord(true){
- .tag = rec.tag,
- .size = rec.size,
- .data = data,
- };
- }
+ pub fn format(
+ fde: Fde,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = fde;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format FDEs directly");
+ }
- pub inline fn getSize(rec: Record) u32 {
- return 4 + rec.size;
- }
+ pub fn fmt(fde: Fde, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .fde = fde,
+ .macho_file = macho_file,
+ } };
+ }
- pub fn scanRelocs(
- rec: Record,
- macho_file: *MachO,
- object_id: u32,
- source_offset: u32,
- ) !void {
- if (rec.getPersonalityPointerReloc(macho_file, object_id, source_offset)) |target| {
- try macho_file.addGotEntry(target);
- }
- }
+ const FormatContext = struct {
+ fde: Fde,
+ macho_file: *MachO,
+ };
- pub fn getTargetSymbolAddress(rec: Record, ctx: struct {
- base_addr: u64,
- base_offset: u64,
- }) u64 {
- assert(rec.tag == .fde);
- const addend = mem.readInt(i64, rec.data[4..][0..8], .little);
- return @as(u64, @intCast(@as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8)) + addend));
- }
+ fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const fde = ctx.fde;
+ const macho_file = ctx.macho_file;
+ try writer.print("@{x} : size({x}) : cie({d}) : {s}", .{
+ fde.offset,
+ fde.getSize(),
+ fde.cie,
+ fde.getAtom(macho_file).getName(macho_file),
+ });
+ if (!fde.alive) try writer.writeAll(" : [*]");
+ }
- pub fn setTargetSymbolAddress(rec: *Record, value: u64, ctx: struct {
- base_addr: u64,
- base_offset: u64,
- }) !void {
- assert(rec.tag == .fde);
- const addend = @as(i64, @intCast(value)) - @as(i64, @intCast(ctx.base_addr + ctx.base_offset + 8));
- mem.writeInt(i64, rec.data[4..][0..8], addend, .little);
- }
+ pub const Index = u32;
+};
- pub fn getPersonalityPointerReloc(
- rec: Record,
- macho_file: *MachO,
- object_id: u32,
- source_offset: u32,
- ) ?SymbolWithLoc {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const relocs = getRelocs(macho_file, object_id, source_offset);
- for (relocs) |rel| {
- switch (cpu_arch) {
- .aarch64 => {
- const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
- switch (rel_type) {
- .ARM64_RELOC_SUBTRACTOR,
- .ARM64_RELOC_UNSIGNED,
- => continue,
- .ARM64_RELOC_POINTER_TO_GOT => {},
- else => unreachable,
- }
- },
- .x86_64 => {
- const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
- switch (rel_type) {
- .X86_64_RELOC_GOT => {},
- else => unreachable,
- }
- },
- else => unreachable,
- }
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = rec.data,
- .base_offset = @as(i32, @intCast(source_offset)) + 4,
- });
- return reloc_target;
- }
- return null;
- }
+pub const Iterator = struct {
+ data: []const u8,
+ pos: u32 = 0,
- pub fn relocate(rec: *Record, macho_file: *MachO, object_id: u32, ctx: struct {
- source_offset: u32,
- out_offset: u32,
- sect_addr: u64,
- }) !void {
- comptime assert(is_mutable);
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const relocs = getRelocs(macho_file, object_id, ctx.source_offset);
-
- for (relocs) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = rec.data,
- .base_offset = @as(i32, @intCast(ctx.source_offset)) + 4,
- });
- const rel_offset = @as(u32, @intCast(rel.r_address - @as(i32, @intCast(ctx.source_offset)) - 4));
- const source_addr = ctx.sect_addr + rel_offset + ctx.out_offset + 4;
-
- switch (cpu_arch) {
- .aarch64 => {
- const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
- switch (rel_type) {
- .ARM64_RELOC_SUBTRACTOR => {
- // Address of the __eh_frame in the source object file
- },
- .ARM64_RELOC_POINTER_TO_GOT => {
- const target_addr = macho_file.getGotEntryAddress(reloc_target).?;
- const result = math.cast(i32, @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr))) orelse
- return error.Overflow;
- mem.writeInt(i32, rec.data[rel_offset..][0..4], result, .little);
- },
- .ARM64_RELOC_UNSIGNED => {
- assert(rel.r_extern == 1);
- const target_addr = Atom.getRelocTargetAddress(macho_file, reloc_target, false);
- const result = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr));
- mem.writeInt(i64, rec.data[rel_offset..][0..8], @as(i64, @intCast(result)), .little);
- },
- else => unreachable,
- }
- },
- .x86_64 => {
- const rel_type = @as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type));
- switch (rel_type) {
- .X86_64_RELOC_GOT => {
- const target_addr = macho_file.getGotEntryAddress(reloc_target).?;
- const addend = mem.readInt(i32, rec.data[rel_offset..][0..4], .little);
- const adjusted_target_addr = @as(u64, @intCast(@as(i64, @intCast(target_addr)) + addend));
- const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
- mem.writeInt(i32, rec.data[rel_offset..][0..4], disp, .little);
- },
- else => unreachable,
- }
- },
- else => unreachable,
- }
- }
- }
+ pub const Record = struct {
+ tag: enum { fde, cie },
+ offset: u32,
+ size: u32,
+ };
- pub fn getCiePointerSource(rec: Record, object_id: u32, macho_file: *MachO, offset: u32) u32 {
- assert(rec.tag == .fde);
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const addend = mem.readInt(u32, rec.data[0..4], .little);
- switch (cpu_arch) {
- .aarch64 => {
- const relocs = getRelocs(macho_file, object_id, offset);
- const maybe_rel = for (relocs) |rel| {
- if (rel.r_address - @as(i32, @intCast(offset)) == 4 and
- @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_SUBTRACTOR)
- break rel;
- } else null;
- const rel = maybe_rel orelse return addend;
- const object = &macho_file.objects.items[object_id];
- const target_addr = object.in_symtab.?[rel.r_symbolnum].n_value;
- const sect = object.getSourceSection(object.eh_frame_sect_id.?);
- return @intCast(sect.addr + offset - target_addr + addend);
- },
- .x86_64 => return addend,
- else => unreachable,
- }
- }
+ pub fn next(it: *Iterator) !?Record {
+ if (it.pos >= it.data.len) return null;
- pub fn getCiePointer(rec: Record) u32 {
- assert(rec.tag == .fde);
- return mem.readInt(u32, rec.data[0..4], .little);
- }
+ var stream = std.io.fixedBufferStream(it.data[it.pos..]);
+ const reader = stream.reader();
- pub fn setCiePointer(rec: *Record, ptr: u32) void {
- assert(rec.tag == .fde);
- mem.writeInt(u32, rec.data[0..4], ptr, .little);
- }
+ const size = try reader.readInt(u32, .little);
+ if (size == 0xFFFFFFFF) @panic("DWARF CFI is 32bit on macOS");
- pub fn getAugmentationString(rec: Record) []const u8 {
- assert(rec.tag == .cie);
- return mem.sliceTo(@as([*:0]const u8, @ptrCast(rec.data.ptr + 5)), 0);
- }
+ const id = try reader.readInt(u32, .little);
+ const record = Record{
+ .tag = if (id == 0) .cie else .fde,
+ .offset = it.pos,
+ .size = size,
+ };
+ it.pos += size + 4;
- pub fn getPersonalityPointer(rec: Record, ctx: struct {
- base_addr: u64,
- base_offset: u64,
- }) !?u64 {
- assert(rec.tag == .cie);
- const aug_str = rec.getAugmentationString();
+ return record;
+ }
+};
- var stream = std.io.fixedBufferStream(rec.data[9 + aug_str.len ..]);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+pub fn calcSize(macho_file: *MachO) !u32 {
+ const tracy = trace(@src());
+ defer tracy.end();
- for (aug_str, 0..) |ch, i| switch (ch) {
- 'z' => if (i > 0) {
- return error.BadDwarfCfi;
- } else {
- _ = try leb.readULEB128(u64, reader);
- },
- 'R' => {
- _ = try reader.readByte();
- },
- 'P' => {
- const enc = try reader.readByte();
- const offset = ctx.base_offset + 13 + aug_str.len + creader.bytes_read;
- const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader);
- return ptr;
- },
- 'L' => {
- _ = try reader.readByte();
- },
- 'S', 'B', 'G' => {},
- else => return error.BadDwarfCfi,
- };
+ var offset: u32 = 0;
- return null;
- }
+ var cies = std.ArrayList(Cie).init(macho_file.base.allocator);
+ defer cies.deinit();
+
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
- pub fn getLsdaPointer(rec: Record, cie: Record, ctx: struct {
- base_addr: u64,
- base_offset: u64,
- }) !?u64 {
- assert(rec.tag == .fde);
- const enc = (try cie.getLsdaEncoding()) orelse return null;
- var stream = std.io.fixedBufferStream(rec.data[20..]);
- const reader = stream.reader();
- _ = try reader.readByte();
- const offset = ctx.base_offset + 25;
- const ptr = try getEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), reader);
- return ptr;
+ outer: for (object.cies.items) |*cie| {
+ for (cies.items) |other| {
+ if (other.eql(cie.*, macho_file)) {
+ // We already have a CIE record that has the exact same contents, so instead of
+ // duplicating them, we mark this one dead and set its output offset to be
+ // equal to that of the alive record. This way, we won't have to rewrite
+ // Fde.cie_index field when committing the records to file.
+ cie.out_offset = other.out_offset;
+ continue :outer;
+ }
+ }
+ cie.alive = true;
+ cie.out_offset = offset;
+ offset += cie.getSize();
+ try cies.append(cie.*);
}
+ }
- pub fn setLsdaPointer(rec: *Record, cie: Record, value: u64, ctx: struct {
- base_addr: u64,
- base_offset: u64,
- }) !void {
- assert(rec.tag == .fde);
- const enc = (try cie.getLsdaEncoding()) orelse unreachable;
- var stream = std.io.fixedBufferStream(rec.data[21..]);
- const writer = stream.writer();
- const offset = ctx.base_offset + 25;
- try setEncodedPointer(enc, @as(i64, @intCast(ctx.base_addr + offset)), value, writer);
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.fdes.items) |*fde| {
+ if (!fde.alive) continue;
+ fde.out_offset = offset;
+ offset += fde.getSize();
}
+ }
- fn getLsdaEncoding(rec: Record) !?u8 {
- assert(rec.tag == .cie);
- const aug_str = rec.getAugmentationString();
+ return offset;
+}
- const base_offset = 9 + aug_str.len;
- var stream = std.io.fixedBufferStream(rec.data[base_offset..]);
- var creader = std.io.countingReader(stream.reader());
- const reader = creader.reader();
+pub fn calcNumRelocs(macho_file: *MachO) u32 {
+ const tracy = trace(@src());
+ defer tracy.end();
- for (aug_str, 0..) |ch, i| switch (ch) {
- 'z' => if (i > 0) {
- return error.BadDwarfCfi;
- } else {
- _ = try leb.readULEB128(u64, reader);
- },
- 'R' => {
- _ = try reader.readByte();
- },
- 'P' => {
- const enc = try reader.readByte();
- _ = try getEncodedPointer(enc, 0, reader);
- },
- 'L' => {
- const enc = try reader.readByte();
- return enc;
- },
- 'S', 'B', 'G' => {},
- else => return error.BadDwarfCfi,
- };
+ var nreloc: u32 = 0;
- return null;
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.cies.items) |cie| {
+ if (!cie.alive) continue;
+ if (cie.getPersonality(macho_file)) |_| {
+ nreloc += 1; // personality
+ }
}
+ }
- fn getEncodedPointer(enc: u8, pcrel_offset: i64, reader: anytype) !?u64 {
- if (enc == EH_PE.omit) return null;
-
- var ptr: i64 = switch (enc & 0x0F) {
- EH_PE.absptr => @as(i64, @bitCast(try reader.readInt(u64, .little))),
- EH_PE.udata2 => @as(i16, @bitCast(try reader.readInt(u16, .little))),
- EH_PE.udata4 => @as(i32, @bitCast(try reader.readInt(u32, .little))),
- EH_PE.udata8 => @as(i64, @bitCast(try reader.readInt(u64, .little))),
- EH_PE.uleb128 => @as(i64, @bitCast(try leb.readULEB128(u64, reader))),
- EH_PE.sdata2 => try reader.readInt(i16, .little),
- EH_PE.sdata4 => try reader.readInt(i32, .little),
- EH_PE.sdata8 => try reader.readInt(i64, .little),
- EH_PE.sleb128 => try leb.readILEB128(i64, reader),
- else => return null,
- };
+ return nreloc;
+}
- switch (enc & 0x70) {
- EH_PE.absptr => {},
- EH_PE.pcrel => ptr += pcrel_offset,
- EH_PE.datarel,
- EH_PE.textrel,
- EH_PE.funcrel,
- EH_PE.aligned,
- => return null,
- else => return null,
- }
+pub fn write(macho_file: *MachO, buffer: []u8) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?];
+ const addend: i64 = switch (macho_file.options.cpu_arch.?) {
+ .x86_64 => 4,
+ else => 0,
+ };
- return @as(u64, @bitCast(ptr));
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.cies.items) |cie| {
+ if (!cie.alive) continue;
+
+ @memcpy(buffer[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file));
+
+ if (cie.getPersonality(macho_file)) |sym| {
+ const offset = cie.out_offset + cie.personality.?.offset;
+ const saddr = sect.addr + offset;
+ const taddr = sym.getGotAddress(macho_file);
+ std.mem.writeInt(
+ i32,
+ buffer[offset..][0..4],
+ @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend),
+ .little,
+ );
+ }
}
+ }
+
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.fdes.items) |fde| {
+ if (!fde.alive) continue;
- fn setEncodedPointer(enc: u8, pcrel_offset: i64, value: u64, writer: anytype) !void {
- if (enc == EH_PE.omit) return;
+ @memcpy(buffer[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file));
- var actual = @as(i64, @intCast(value));
+ {
+ const offset = fde.out_offset + 4;
+ const value = offset - fde.getCie(macho_file).out_offset;
+ std.mem.writeInt(u32, buffer[offset..][0..4], value, .little);
+ }
- switch (enc & 0x70) {
- EH_PE.absptr => {},
- EH_PE.pcrel => actual -= pcrel_offset,
- EH_PE.datarel,
- EH_PE.textrel,
- EH_PE.funcrel,
- EH_PE.aligned,
- => unreachable,
- else => unreachable,
+ {
+ const offset = fde.out_offset + 8;
+ const saddr = sect.addr + offset;
+ const taddr = fde.getAtom(macho_file).value;
+ std.mem.writeInt(
+ i64,
+ buffer[offset..][0..8],
+ @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)),
+ .little,
+ );
}
- switch (enc & 0x0F) {
- EH_PE.absptr => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little),
- EH_PE.udata2 => try writer.writeInt(u16, @as(u16, @bitCast(@as(i16, @intCast(actual)))), .little),
- EH_PE.udata4 => try writer.writeInt(u32, @as(u32, @bitCast(@as(i32, @intCast(actual)))), .little),
- EH_PE.udata8 => try writer.writeInt(u64, @as(u64, @bitCast(actual)), .little),
- EH_PE.uleb128 => try leb.writeULEB128(writer, @as(u64, @bitCast(actual))),
- EH_PE.sdata2 => try writer.writeInt(i16, @as(i16, @intCast(actual)), .little),
- EH_PE.sdata4 => try writer.writeInt(i32, @as(i32, @intCast(actual)), .little),
- EH_PE.sdata8 => try writer.writeInt(i64, actual, .little),
- EH_PE.sleb128 => try leb.writeILEB128(writer, actual),
- else => unreachable,
+ if (fde.getLsdaAtom(macho_file)) |atom| {
+ const offset = fde.out_offset + fde.lsda_offset;
+ const saddr = sect.addr + offset;
+ const taddr = atom.value;
+ switch (fde.getCie(macho_file).lsda_size.?) {
+ .p32 => std.mem.writeInt(
+ i32,
+ buffer[offset..][0..4],
+ @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend),
+ .little,
+ ),
+ .p64 => std.mem.writeInt(
+ i64,
+ buffer[offset..][0..8],
+ @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)),
+ .little,
+ ),
+ }
}
}
- };
-}
-
-pub fn getRelocs(macho_file: *MachO, object_id: u32, source_offset: u32) []const macho.relocation_info {
- const object = &macho_file.objects.items[object_id];
- assert(object.hasEhFrameRecords());
- const urel = object.eh_frame_relocs_lookup.get(source_offset) orelse
- return &[0]macho.relocation_info{};
- const all_relocs = object.getRelocs(object.eh_frame_sect_id.?);
- return all_relocs[urel.reloc.start..][0..urel.reloc.len];
+ }
}
-pub const Iterator = struct {
- data: []const u8,
- pos: u32 = 0,
+pub fn writeRelocs(macho_file: *MachO, code: []u8, relocs: *std.ArrayList(macho.relocation_info)) error{Overflow}!void {
+ const tracy = trace(@src());
+ defer tracy.end();
- pub fn next(it: *Iterator) !?EhFrameRecord(false) {
- if (it.pos >= it.data.len) return null;
-
- var stream = std.io.fixedBufferStream(it.data[it.pos..]);
- const reader = stream.reader();
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const sect = macho_file.sections.items(.header)[macho_file.eh_frame_sect_index.?];
+ const addend: i64 = switch (cpu_arch) {
+ .x86_64 => 4,
+ else => 0,
+ };
- const size = try reader.readInt(u32, .little);
- if (size == 0xFFFFFFFF) {
- log.debug("MachO doesn't support 64bit DWARF CFI __eh_frame records", .{});
- return error.BadDwarfCfi;
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.cies.items) |cie| {
+ if (!cie.alive) continue;
+
+ @memcpy(code[cie.out_offset..][0..cie.getSize()], cie.getData(macho_file));
+
+ if (cie.getPersonality(macho_file)) |sym| {
+ const r_address = math.cast(i32, cie.out_offset + cie.personality.?.offset) orelse return error.Overflow;
+ const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow;
+ relocs.appendAssumeCapacity(.{
+ .r_address = r_address,
+ .r_symbolnum = r_symbolnum,
+ .r_length = 2,
+ .r_extern = 1,
+ .r_pcrel = 1,
+ .r_type = switch (cpu_arch) {
+ .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_POINTER_TO_GOT),
+ .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_GOT),
+ else => unreachable,
+ },
+ });
+ }
}
+ }
- const id = try reader.readInt(u32, .little);
- const tag: EhFrameRecordTag = if (id == 0) .cie else .fde;
- const offset: u32 = 4;
- const record = EhFrameRecord(false){
- .tag = tag,
- .size = size,
- .data = it.data[it.pos + offset ..][0..size],
- };
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.fdes.items) |fde| {
+ if (!fde.alive) continue;
- it.pos += size + offset;
+ @memcpy(code[fde.out_offset..][0..fde.getSize()], fde.getData(macho_file));
- return record;
- }
+ {
+ const offset = fde.out_offset + 4;
+ const value = offset - fde.getCie(macho_file).out_offset;
+ std.mem.writeInt(u32, code[offset..][0..4], value, .little);
+ }
- pub fn reset(it: *Iterator) void {
- it.pos = 0;
- }
+ {
+ const offset = fde.out_offset + 8;
+ const saddr = sect.addr + offset;
+ const taddr = fde.getAtom(macho_file).value;
+ std.mem.writeInt(
+ i64,
+ code[offset..][0..8],
+ @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)),
+ .little,
+ );
+ }
- pub fn seekTo(it: *Iterator, pos: u32) void {
- assert(pos >= 0 and pos < it.data.len);
- it.pos = pos;
+ if (fde.getLsdaAtom(macho_file)) |atom| {
+ const offset = fde.out_offset + fde.lsda_ptr_offset;
+ const saddr = sect.addr + offset;
+ const taddr = atom.value + fde.lsda_offset;
+ switch (fde.getCie(macho_file).lsda_size.?) {
+ .p32 => std.mem.writeInt(
+ i32,
+ code[offset..][0..4],
+ @intCast(@as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)) + addend),
+ .little,
+ ),
+ .p64 => std.mem.writeInt(
+ i64,
+ code[offset..][0..8],
+ @as(i64, @intCast(taddr)) - @as(i64, @intCast(saddr)),
+ .little,
+ ),
+ }
+ }
+ }
}
-};
+}
pub const EH_PE = struct {
pub const absptr = 0x00;
@@ -643,17 +553,17 @@ pub const EH_PE = struct {
pub const omit = 0xFF;
};
-const std = @import("std");
const assert = std.debug.assert;
+const leb = std.leb;
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-const leb = std.leb;
-const log = std.log.scoped(.eh_frame);
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
-const Allocator = mem.Allocator;
+const Allocator = std.mem.Allocator;
const Atom = @import("Atom.zig");
+const File = @import("file.zig").File;
const MachO = @import("../MachO.zig");
-const Relocation = @import("Relocation.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
-const UnwindInfo = @import("UnwindInfo.zig");
+const Object = @import("Object.zig");
+const Symbol = @import("Symbol.zig");
src/link/MachO/fat.zig
@@ -1,3 +1,13 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const builtin = @import("builtin");
+const log = std.log.scoped(.macho);
+const macho = std.macho;
+const mem = std.mem;
+const native_endian = builtin.target.cpu.arch.endian();
+
+const MachO = @import("../MachO.zig");
+
pub fn isFatLibrary(file: std.fs.File) bool {
const reader = file.reader();
const hdr = reader.readStructEndian(macho.fat_header, .big) catch return false;
@@ -7,18 +17,16 @@ pub fn isFatLibrary(file: std.fs.File) bool {
pub const Arch = struct {
tag: std.Target.Cpu.Arch,
- offset: u64,
+ offset: u32,
+ size: u32,
};
-/// Caller owns the memory.
-pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch {
+pub fn parseArchs(file: std.fs.File, buffer: *[2]Arch) ![]const Arch {
const reader = file.reader();
const fat_header = try reader.readStructEndian(macho.fat_header, .big);
assert(fat_header.magic == macho.FAT_MAGIC);
- var archs = try std.ArrayList(Arch).initCapacity(gpa, fat_header.nfat_arch);
- defer archs.deinit();
-
+ var count: usize = 0;
var fat_arch_index: u32 = 0;
while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) {
const fat_arch = try reader.readStructEndian(macho.fat_arch, .big);
@@ -29,16 +37,9 @@ pub fn parseArchs(gpa: Allocator, file: std.fs.File) ![]const Arch {
macho.CPU_TYPE_X86_64 => if (fat_arch.cpusubtype == macho.CPU_SUBTYPE_X86_64_ALL) .x86_64 else continue,
else => continue,
};
-
- archs.appendAssumeCapacity(.{ .tag = arch, .offset = fat_arch.offset });
+ buffer[count] = .{ .tag = arch, .offset = fat_arch.offset, .size = fat_arch.size };
+ count += 1;
}
- return archs.toOwnedSlice();
+ return buffer[0..count];
}
-
-const std = @import("std");
-const assert = std.debug.assert;
-const log = std.log.scoped(.archive);
-const macho = std.macho;
-const mem = std.mem;
-const Allocator = mem.Allocator;
src/link/MachO/file.zig
@@ -0,0 +1,116 @@
+pub const File = union(enum) {
+ internal: *InternalObject,
+ object: *Object,
+ dylib: *Dylib,
+
+ pub fn getIndex(file: File) Index {
+ return switch (file) {
+ inline else => |x| x.index,
+ };
+ }
+
+ pub fn fmtPath(file: File) std.fmt.Formatter(formatPath) {
+ return .{ .data = file };
+ }
+
+ fn formatPath(
+ file: File,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ switch (file) {
+ .internal => try writer.writeAll(""),
+ .object => |x| try writer.print("{}", .{x.fmtPath()}),
+ .dylib => |x| try writer.writeAll(x.path),
+ }
+ }
+
+ pub fn resolveSymbols(file: File, macho_file: *MachO) void {
+ switch (file) {
+ .internal => unreachable,
+ inline else => |x| x.resolveSymbols(macho_file),
+ }
+ }
+
+ pub fn resetGlobals(file: File, macho_file: *MachO) void {
+ switch (file) {
+ .internal => unreachable,
+ inline else => |x| x.resetGlobals(macho_file),
+ }
+ }
+
+ /// Encodes symbol rank so that the following ordering applies:
+ /// * strong in object
+ /// * weak in object
+ /// * tentative in object
+ /// * strong in archive/dylib
+ /// * weak in archive/dylib
+ /// * tentative in archive
+ /// * unclaimed
+ pub fn getSymbolRank(file: File, args: struct {
+ archive: bool = false,
+ weak: bool = false,
+ tentative: bool = false,
+ }) u32 {
+ if (file == .object and !args.archive) {
+ const base: u32 = blk: {
+ if (args.tentative) break :blk 3;
+ break :blk if (args.weak) 2 else 1;
+ };
+ return (base << 16) + file.getIndex();
+ }
+ const base: u32 = blk: {
+ if (args.tentative) break :blk 3;
+ break :blk if (args.weak) 2 else 1;
+ };
+ return base + (file.getIndex() << 24);
+ }
+
+ pub fn getSymbols(file: File) []const Symbol.Index {
+ return switch (file) {
+ inline else => |x| x.symbols.items,
+ };
+ }
+
+ pub fn getAtoms(file: File) []const Atom.Index {
+ return switch (file) {
+ .dylib => unreachable,
+ inline else => |x| x.atoms.items,
+ };
+ }
+
+ pub fn calcSymtabSize(file: File, macho_file: *MachO) !void {
+ return switch (file) {
+ inline else => |x| x.calcSymtabSize(macho_file),
+ };
+ }
+
+ pub fn writeSymtab(file: File, macho_file: *MachO) void {
+ return switch (file) {
+ inline else => |x| x.writeSymtab(macho_file),
+ };
+ }
+
+ pub const Index = u32;
+
+ pub const Entry = union(enum) {
+ null: void,
+ internal: InternalObject,
+ object: Object,
+ dylib: Dylib,
+ };
+};
+
+const macho = std.macho;
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const Atom = @import("Atom.zig");
+const InternalObject = @import("InternalObject.zig");
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+const Dylib = @import("Dylib.zig");
+const Symbol = @import("Symbol.zig");
src/link/MachO/hasher.zig
@@ -9,15 +9,14 @@ pub fn ParallelHasher(comptime Hasher: type) type {
chunk_size: u64 = 0x4000,
max_file_size: ?u64 = null,
}) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
var wg: WaitGroup = .{};
- const file_size = blk: {
- const file_size = opts.max_file_size orelse try file.getEndPos();
- break :blk std.math.cast(usize, file_size) orelse return error.Overflow;
- };
- const chunk_size = std.math.cast(usize, opts.chunk_size) orelse return error.Overflow;
+ const file_size = opts.max_file_size orelse try file.getEndPos();
- const buffer = try self.allocator.alloc(u8, chunk_size * out.len);
+ const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len);
defer self.allocator.free(buffer);
const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len);
@@ -28,8 +27,11 @@ pub fn ParallelHasher(comptime Hasher: type) type {
defer wg.wait();
for (out, results, 0..) |*out_buf, *result, i| {
- const fstart = i * chunk_size;
- const fsize = if (fstart + chunk_size > file_size) file_size - fstart else chunk_size;
+ const fstart = i * opts.chunk_size;
+ const fsize = if (fstart + opts.chunk_size > file_size)
+ file_size - fstart
+ else
+ opts.chunk_size;
wg.start();
try self.thread_pool.spawn(worker, .{
file,
@@ -61,10 +63,11 @@ pub fn ParallelHasher(comptime Hasher: type) type {
};
}
-const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const mem = std.mem;
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
const ThreadPool = std.Thread.Pool;
src/link/MachO/InternalObject.zig
@@ -0,0 +1,249 @@
+index: File.Index,
+
+sections: std.MultiArrayList(Section) = .{},
+atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
+symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
+objc_methnames: std.ArrayListUnmanaged(u8) = .{},
+objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
+
+output_symtab_ctx: MachO.SymtabCtx = .{},
+
+pub fn deinit(self: *InternalObject, allocator: Allocator) void {
+ for (self.sections.items(.relocs)) |*relocs| {
+ relocs.deinit(allocator);
+ }
+ self.sections.deinit(allocator);
+ self.atoms.deinit(allocator);
+ self.symbols.deinit(allocator);
+ self.objc_methnames.deinit(allocator);
+}
+
+pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) !Symbol.Index {
+ const gpa = macho_file.base.allocator;
+ try self.symbols.ensureUnusedCapacity(gpa, 1);
+ const off = try macho_file.string_intern.insert(gpa, name);
+ const gop = try macho_file.getOrCreateGlobal(off);
+ self.symbols.addOneAssumeCapacity().* = gop.index;
+ const sym = macho_file.getSymbol(gop.index);
+ sym.* = .{ .name = off, .file = self.index };
+ return gop.index;
+}
+
+/// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs.
+pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 {
+ const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file);
+ return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file);
+}
+
+fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index {
+ const gpa = macho_file.base.allocator;
+ const atom_index = try macho_file.addAtom();
+ try self.atoms.append(gpa, atom_index);
+
+ const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname});
+ defer gpa.free(name);
+ const atom = macho_file.getAtom(atom_index).?;
+ atom.atom_index = atom_index;
+ atom.name = try macho_file.string_intern.insert(gpa, name);
+ atom.file = self.index;
+ atom.size = methname.len + 1;
+ atom.alignment = 0;
+
+ const n_sect = try self.addSection(gpa, "__TEXT", "__objc_methname");
+ const sect = &self.sections.items(.header)[n_sect];
+ sect.flags = macho.S_CSTRING_LITERALS;
+ sect.size = atom.size;
+ sect.@"align" = 0;
+ atom.n_sect = n_sect;
+ self.sections.items(.extra)[n_sect].is_objc_methname = true;
+
+ sect.offset = @intCast(self.objc_methnames.items.len);
+ try self.objc_methnames.ensureUnusedCapacity(gpa, methname.len + 1);
+ self.objc_methnames.writer(gpa).print("{s}\x00", .{methname}) catch unreachable;
+
+ return atom_index;
+}
+
+fn addObjcSelrefsSection(
+ self: *InternalObject,
+ methname: []const u8,
+ methname_atom_index: Atom.Index,
+ macho_file: *MachO,
+) !Atom.Index {
+ const gpa = macho_file.base.allocator;
+ const atom_index = try macho_file.addAtom();
+ try self.atoms.append(gpa, atom_index);
+
+ const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname});
+ defer gpa.free(name);
+ const atom = macho_file.getAtom(atom_index).?;
+ atom.atom_index = atom_index;
+ atom.name = try macho_file.string_intern.insert(gpa, name);
+ atom.file = self.index;
+ atom.size = @sizeOf(u64);
+ atom.alignment = 3;
+
+ const n_sect = try self.addSection(gpa, "__DATA", "__objc_selrefs");
+ const sect = &self.sections.items(.header)[n_sect];
+ sect.flags = macho.S_LITERAL_POINTERS | macho.S_ATTR_NO_DEAD_STRIP;
+ sect.offset = 0;
+ sect.size = atom.size;
+ sect.@"align" = 3;
+ atom.n_sect = n_sect;
+ self.sections.items(.extra)[n_sect].is_objc_selref = true;
+
+ const relocs = &self.sections.items(.relocs)[n_sect];
+ try relocs.ensureUnusedCapacity(gpa, 1);
+ relocs.appendAssumeCapacity(.{
+ .tag = .local,
+ .offset = 0,
+ .target = methname_atom_index,
+ .addend = 0,
+ .type = .unsigned,
+ .meta = .{
+ .pcrel = false,
+ .length = 3,
+ .symbolnum = 0, // Only used when synthesising unwind records so can be anything
+ .has_subtractor = false,
+ },
+ });
+ atom.relocs = .{ .pos = 0, .len = 1 };
+
+ return atom_index;
+}
+
+pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void {
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue;
+ sym.flags.output_symtab = true;
+ if (sym.isLocal()) {
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file);
+ self.output_symtab_ctx.nlocals += 1;
+ } else if (sym.flags.@"export") {
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file);
+ self.output_symtab_ctx.nexports += 1;
+ } else {
+ assert(sym.flags.import);
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file);
+ self.output_symtab_ctx.nimports += 1;
+ }
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1));
+ }
+}
+
+pub fn writeSymtab(self: InternalObject, macho_file: *MachO) void {
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ if (sym.getFile(macho_file)) |file| if (file.getIndex() != self.index) continue;
+ const idx = sym.getOutputSymtabIndex(macho_file) orelse continue;
+ const n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file));
+ macho_file.strtab.appendAssumeCapacity(0);
+ const out_sym = &macho_file.symtab.items[idx];
+ out_sym.n_strx = n_strx;
+ sym.setOutputSym(macho_file, out_sym);
+ }
+}
+
+fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 {
+ const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator)));
+ self.sections.set(n_sect, .{
+ .header = .{
+ .sectname = MachO.makeStaticString(sectname),
+ .segname = MachO.makeStaticString(segname),
+ },
+ });
+ return n_sect;
+}
+
+pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 {
+ const slice = self.sections.slice();
+ assert(index < slice.items(.header).len);
+ const sect = slice.items(.header)[index];
+ const extra = slice.items(.extra)[index];
+ if (extra.is_objc_methname) {
+ return self.objc_methnames.items[sect.offset..][0..sect.size];
+ } else if (extra.is_objc_selref) {
+ return &self.objc_selrefs;
+ } else @panic("ref to non-existent section");
+}
+
+pub fn asFile(self: *InternalObject) File {
+ return .{ .internal = self };
+}
+
+const FormatContext = struct {
+ self: *InternalObject,
+ macho_file: *MachO,
+};
+
+pub fn fmtAtoms(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatAtoms) {
+ return .{ .data = .{
+ .self = self,
+ .macho_file = macho_file,
+ } };
+}
+
+fn formatAtoms(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ try writer.writeAll(" atoms\n");
+ for (ctx.self.atoms.items) |atom_index| {
+ const atom = ctx.macho_file.getAtom(atom_index).?;
+ try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)});
+ }
+}
+
+pub fn fmtSymtab(self: *InternalObject, macho_file: *MachO) std.fmt.Formatter(formatSymtab) {
+ return .{ .data = .{
+ .self = self,
+ .macho_file = macho_file,
+ } };
+}
+
+fn formatSymtab(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ try writer.writeAll(" symbols\n");
+ for (ctx.self.symbols.items) |index| {
+ const global = ctx.macho_file.getSymbol(index);
+ try writer.print(" {}\n", .{global.fmt(ctx.macho_file)});
+ }
+}
+
+const Section = struct {
+ header: macho.section_64,
+ relocs: std.ArrayListUnmanaged(Relocation) = .{},
+ extra: Extra = .{},
+
+ const Extra = packed struct {
+ is_objc_methname: bool = false,
+ is_objc_selref: bool = false,
+ };
+};
+
+const assert = std.debug.assert;
+const macho = std.macho;
+const mem = std.mem;
+const std = @import("std");
+
+const Allocator = std.mem.Allocator;
+const Atom = @import("Atom.zig");
+const File = @import("file.zig").File;
+const InternalObject = @This();
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+const Relocation = @import("Relocation.zig");
+const Symbol = @import("Symbol.zig");
src/link/MachO/load_commands.zig
@@ -1,4 +1,14 @@
-/// Default path to dyld.
+const std = @import("std");
+const assert = std.debug.assert;
+const log = std.log.scoped(.link);
+const macho = std.macho;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Dylib = @import("Dylib.zig");
+const MachO = @import("../MachO.zig");
+const Options = @import("../MachO.zig").Options;
+
pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld";
fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 {
@@ -7,31 +17,20 @@ fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool
return mem.alignForward(u64, cmd_size + name_len, @alignOf(u64));
}
-const CalcLCsSizeCtx = struct {
- segments: []const macho.segment_command_64,
- dylibs: []const Dylib,
- referenced_dylibs: []u16,
- wants_function_starts: bool = true,
-};
-
-fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 {
- const comp = m.base.comp;
- const gpa = comp.gpa;
- var has_text_segment: bool = false;
+pub fn calcLoadCommandsSize(macho_file: *MachO, assume_max_path_len: bool) u32 {
+ const options = &macho_file.options;
var sizeofcmds: u64 = 0;
- for (ctx.segments) |seg| {
- sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64);
- if (mem.eql(u8, seg.segName(), "__TEXT")) {
- has_text_segment = true;
- }
+
+ // LC_SEGMENT_64
+ sizeofcmds += @sizeOf(macho.segment_command_64) * macho_file.segments.items.len;
+ for (macho_file.segments.items) |seg| {
+ sizeofcmds += seg.nsects * @sizeOf(macho.section_64);
}
// LC_DYLD_INFO_ONLY
sizeofcmds += @sizeOf(macho.dyld_info_command);
// LC_FUNCTION_STARTS
- if (has_text_segment and ctx.wants_function_starts) {
- sizeofcmds += @sizeOf(macho.linkedit_data_command);
- }
+ sizeofcmds += @sizeOf(macho.linkedit_data_command);
// LC_DATA_IN_CODE
sizeofcmds += @sizeOf(macho.linkedit_data_command);
// LC_SYMTAB
@@ -45,15 +44,14 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 {
false,
);
// LC_MAIN
- if (comp.config.output_mode == .Exe) {
+ if (!options.dylib) {
sizeofcmds += @sizeOf(macho.entry_point_command);
}
// LC_ID_DYLIB
- if (comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic) {
+ if (options.dylib) {
sizeofcmds += blk: {
- const emit = m.base.emit;
- const install_name = m.install_name orelse try emit.directory.join(gpa, &.{emit.sub_path});
- defer if (m.install_name == null) gpa.free(install_name);
+ const emit = options.emit;
+ const install_name = options.install_name orelse emit.sub_path;
break :blk calcInstallNameLen(
@sizeOf(macho.dylib_command),
install_name,
@@ -63,9 +61,7 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 {
}
// LC_RPATH
{
- var it = RpathIterator.init(gpa, m.base.rpath_list);
- defer it.deinit();
- while (try it.next()) |rpath| {
+ for (options.rpath_list) |rpath| {
sizeofcmds += calcInstallNameLen(
@sizeOf(macho.rpath_command),
rpath,
@@ -75,24 +71,22 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 {
}
// LC_SOURCE_VERSION
sizeofcmds += @sizeOf(macho.source_version_command);
- // LC_BUILD_VERSION or LC_VERSION_MIN_ or nothing
- {
- const target = comp.root_mod.resolved_target.result;
- const platform = Platform.fromTarget(target);
+ if (options.platform) |platform| {
if (platform.isBuildVersionCompatible()) {
// LC_BUILD_VERSION
sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version);
- } else if (platform.isVersionMinCompatible()) {
- // LC_VERSION_MIN_
+ } else {
+ // LC_VERSION_MIN_*
sizeofcmds += @sizeOf(macho.version_min_command);
}
}
// LC_UUID
sizeofcmds += @sizeOf(macho.uuid_command);
// LC_LOAD_DYLIB
- for (ctx.referenced_dylibs) |id| {
- const dylib = ctx.dylibs[id];
- const dylib_id = dylib.id orelse unreachable;
+ for (macho_file.dylibs.items) |index| {
+ const dylib = macho_file.getFile(index).?.dylib;
+ assert(dylib.isAlive(macho_file));
+ const dylib_id = dylib.id.?;
sizeofcmds += calcInstallNameLen(
@sizeOf(macho.dylib_command),
dylib_id.name,
@@ -100,19 +94,52 @@ fn calcLCsSize(m: *MachO, ctx: CalcLCsSizeCtx, assume_max_path_len: bool) !u32 {
);
}
// LC_CODE_SIGNATURE
- if (m.requiresCodeSignature()) {
+ if (macho_file.requiresCodeSig()) {
sizeofcmds += @sizeOf(macho.linkedit_data_command);
}
- return @intCast(sizeofcmds);
+ return @as(u32, @intCast(sizeofcmds));
+}
+
+pub fn calcLoadCommandsSizeObject(macho_file: *MachO) u32 {
+ const options = &macho_file.options;
+ var sizeofcmds: u64 = 0;
+
+ // LC_SEGMENT_64
+ {
+ assert(macho_file.segments.items.len == 1);
+ sizeofcmds += @sizeOf(macho.segment_command_64);
+ const seg = macho_file.segments.items[0];
+ sizeofcmds += seg.nsects * @sizeOf(macho.section_64);
+ }
+
+ // LC_DATA_IN_CODE
+ sizeofcmds += @sizeOf(macho.linkedit_data_command);
+ // LC_SYMTAB
+ sizeofcmds += @sizeOf(macho.symtab_command);
+ // LC_DYSYMTAB
+ sizeofcmds += @sizeOf(macho.dysymtab_command);
+
+ if (options.platform) |platform| {
+ if (platform.isBuildVersionCompatible()) {
+ // LC_BUILD_VERSION
+ sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version);
+ } else {
+ // LC_VERSION_MIN_*
+ sizeofcmds += @sizeOf(macho.version_min_command);
+ }
+ }
+
+ return @as(u32, @intCast(sizeofcmds));
}
-pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 {
- var padding: u32 = (try calcLCsSize(m, ctx, false)) + m.headerpad_size;
+pub fn calcMinHeaderPadSize(macho_file: *MachO) u32 {
+ const options = &macho_file.options;
+ var padding: u32 = calcLoadCommandsSize(macho_file, false) + (options.headerpad orelse 0);
log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)});
- if (m.headerpad_max_install_names) {
- const min_headerpad_size: u32 = try calcLCsSize(m, ctx, true);
+ if (options.headerpad_max_install_names) {
+ const min_headerpad_size: u32 = calcLoadCommandsSize(macho_file, true);
log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{
min_headerpad_size + @sizeOf(macho.mach_header_64),
});
@@ -125,34 +152,22 @@ pub fn calcMinHeaderPad(m: *MachO, ctx: CalcLCsSizeCtx) !u64 {
return offset;
}
-pub fn calcNumOfLCs(lc_buffer: []const u8) u32 {
- var ncmds: u32 = 0;
- var pos: usize = 0;
- while (true) {
- if (pos >= lc_buffer.len) break;
- const cmd = @as(*align(1) const macho.load_command, @ptrCast(lc_buffer.ptr + pos)).*;
- ncmds += 1;
- pos += cmd.cmdsize;
- }
- return ncmds;
-}
-
-pub fn writeDylinkerLC(lc_writer: anytype) !void {
+pub fn writeDylinkerLC(writer: anytype) !void {
const name_len = mem.sliceTo(default_dyld_path, 0).len;
const cmdsize = @as(u32, @intCast(mem.alignForward(
u64,
@sizeOf(macho.dylinker_command) + name_len,
@sizeOf(u64),
)));
- try lc_writer.writeStruct(macho.dylinker_command{
+ try writer.writeStruct(macho.dylinker_command{
.cmd = .LOAD_DYLINKER,
.cmdsize = cmdsize,
.name = @sizeOf(macho.dylinker_command),
});
- try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0));
+ try writer.writeAll(mem.sliceTo(default_dyld_path, 0));
const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len;
if (padding > 0) {
- try lc_writer.writeByteNTimes(0, padding);
+ try writer.writeByteNTimes(0, padding);
}
}
@@ -164,14 +179,14 @@ const WriteDylibLCCtx = struct {
compatibility_version: u32 = 0x10000,
};
-fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void {
+pub fn writeDylibLC(ctx: WriteDylibLCCtx, writer: anytype) !void {
const name_len = ctx.name.len + 1;
const cmdsize = @as(u32, @intCast(mem.alignForward(
u64,
@sizeOf(macho.dylib_command) + name_len,
@sizeOf(u64),
)));
- try lc_writer.writeStruct(macho.dylib_command{
+ try writer.writeStruct(macho.dylib_command{
.cmd = ctx.cmd,
.cmdsize = cmdsize,
.dylib = .{
@@ -181,392 +196,75 @@ fn writeDylibLC(ctx: WriteDylibLCCtx, lc_writer: anytype) !void {
.compatibility_version = ctx.compatibility_version,
},
});
- try lc_writer.writeAll(ctx.name);
- try lc_writer.writeByte(0);
+ try writer.writeAll(ctx.name);
+ try writer.writeByte(0);
const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len;
if (padding > 0) {
- try lc_writer.writeByteNTimes(0, padding);
+ try writer.writeByteNTimes(0, padding);
}
}
-pub fn writeDylibIdLC(macho_file: *MachO, lc_writer: anytype) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- assert(comp.config.output_mode == .Lib and comp.config.link_mode == .Dynamic);
- const emit = macho_file.base.emit;
- const install_name = macho_file.install_name orelse
- try emit.directory.join(gpa, &.{emit.sub_path});
- defer if (macho_file.install_name == null) gpa.free(install_name);
- const curr = comp.version orelse std.SemanticVersion{
- .major = 1,
- .minor = 0,
- .patch = 0,
- };
- const compat = macho_file.compatibility_version orelse std.SemanticVersion{
- .major = 1,
- .minor = 0,
- .patch = 0,
- };
+pub fn writeDylibIdLC(options: *const Options, writer: anytype) !void {
+ assert(options.dylib);
+ const emit = options.emit;
+ const install_name = options.install_name orelse emit.sub_path;
+ const curr = options.current_version orelse Options.Version.new(1, 0, 0);
+ const compat = options.compatibility_version orelse Options.Version.new(1, 0, 0);
try writeDylibLC(.{
.cmd = .ID_DYLIB,
.name = install_name,
- .current_version = @as(u32, @intCast(curr.major << 16 | curr.minor << 8 | curr.patch)),
- .compatibility_version = @as(u32, @intCast(compat.major << 16 | compat.minor << 8 | compat.patch)),
- }, lc_writer);
+ .current_version = curr.value,
+ .compatibility_version = compat.value,
+ }, writer);
}
-const RpathIterator = struct {
- buffer: []const []const u8,
- table: std.StringHashMap(void),
- count: usize = 0,
-
- fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator {
- return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) };
- }
-
- fn deinit(it: *RpathIterator) void {
- it.table.deinit();
- }
-
- fn next(it: *RpathIterator) !?[]const u8 {
- while (true) {
- if (it.count >= it.buffer.len) return null;
- const rpath = it.buffer[it.count];
- it.count += 1;
- const gop = try it.table.getOrPut(rpath);
- if (gop.found_existing) continue;
- return rpath;
- }
- }
-};
-
-pub fn writeRpathLCs(macho_file: *MachO, lc_writer: anytype) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- var it = RpathIterator.init(gpa, macho_file.base.rpath_list);
- defer it.deinit();
-
- while (try it.next()) |rpath| {
+pub fn writeRpathLCs(rpaths: []const []const u8, writer: anytype) !void {
+ for (rpaths) |rpath| {
const rpath_len = rpath.len + 1;
const cmdsize = @as(u32, @intCast(mem.alignForward(
u64,
@sizeOf(macho.rpath_command) + rpath_len,
@sizeOf(u64),
)));
- try lc_writer.writeStruct(macho.rpath_command{
+ try writer.writeStruct(macho.rpath_command{
.cmdsize = cmdsize,
.path = @sizeOf(macho.rpath_command),
});
- try lc_writer.writeAll(rpath);
- try lc_writer.writeByte(0);
+ try writer.writeAll(rpath);
+ try writer.writeByte(0);
const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len;
if (padding > 0) {
- try lc_writer.writeByteNTimes(0, padding);
+ try writer.writeByteNTimes(0, padding);
}
}
}
-pub fn writeVersionMinLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void {
- const cmd: macho.LC = switch (platform.os_tag) {
- .macos => .VERSION_MIN_MACOSX,
- .ios => .VERSION_MIN_IPHONEOS,
- .tvos => .VERSION_MIN_TVOS,
- .watchos => .VERSION_MIN_WATCHOS,
+pub fn writeVersionMinLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void {
+ const cmd: macho.LC = switch (platform.platform) {
+ .MACOS => .VERSION_MIN_MACOSX,
+ .IOS, .IOSSIMULATOR => .VERSION_MIN_IPHONEOS,
+ .TVOS, .TVOSSIMULATOR => .VERSION_MIN_TVOS,
+ .WATCHOS, .WATCHOSSIMULATOR => .VERSION_MIN_WATCHOS,
else => unreachable,
};
- try lc_writer.writeAll(mem.asBytes(&macho.version_min_command{
+ try writer.writeAll(mem.asBytes(&macho.version_min_command{
.cmd = cmd,
- .version = platform.toAppleVersion(),
- .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(),
+ .version = platform.version.value,
+ .sdk = if (sdk_version) |ver| ver.value else platform.version.value,
}));
}
-pub fn writeBuildVersionLC(platform: Platform, sdk_version: ?std.SemanticVersion, lc_writer: anytype) !void {
+pub fn writeBuildVersionLC(platform: Options.Platform, sdk_version: ?Options.Version, writer: anytype) !void {
const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version);
- try lc_writer.writeStruct(macho.build_version_command{
+ try writer.writeStruct(macho.build_version_command{
.cmdsize = cmdsize,
- .platform = platform.toApplePlatform(),
- .minos = platform.toAppleVersion(),
- .sdk = if (sdk_version) |ver| semanticVersionToAppleVersion(ver) else platform.toAppleVersion(),
+ .platform = platform.platform,
+ .minos = platform.version.value,
+ .sdk = if (sdk_version) |ver| ver.value else platform.version.value,
.ntools = 1,
});
- try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{
- .tool = .ZIG,
+ try writer.writeAll(mem.asBytes(&macho.build_tool_version{
+ .tool = @as(macho.TOOL, @enumFromInt(0x6)),
.version = 0x0,
}));
}
-
-pub fn writeLoadDylibLCs(dylibs: []const Dylib, referenced: []u16, lc_writer: anytype) !void {
- for (referenced) |index| {
- const dylib = dylibs[index];
- const dylib_id = dylib.id orelse unreachable;
- try writeDylibLC(.{
- .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB,
- .name = dylib_id.name,
- .timestamp = dylib_id.timestamp,
- .current_version = dylib_id.current_version,
- .compatibility_version = dylib_id.compatibility_version,
- }, lc_writer);
- }
-}
-
-pub const Platform = struct {
- os_tag: std.Target.Os.Tag,
- abi: std.Target.Abi,
- version: std.SemanticVersion,
-
- /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to
- /// the extracted minimum platform version.
- pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform {
- switch (lc.cmd()) {
- .BUILD_VERSION => {
- const cmd = lc.cast(macho.build_version_command).?;
- return .{
- .os_tag = switch (cmd.platform) {
- .MACOS => .macos,
- .IOS, .IOSSIMULATOR => .ios,
- .TVOS, .TVOSSIMULATOR => .tvos,
- .WATCHOS, .WATCHOSSIMULATOR => .watchos,
- else => @panic("TODO"),
- },
- .abi = switch (cmd.platform) {
- .IOSSIMULATOR,
- .TVOSSIMULATOR,
- .WATCHOSSIMULATOR,
- => .simulator,
- else => .none,
- },
- .version = appleVersionToSemanticVersion(cmd.minos),
- };
- },
- .VERSION_MIN_MACOSX,
- .VERSION_MIN_IPHONEOS,
- .VERSION_MIN_TVOS,
- .VERSION_MIN_WATCHOS,
- => {
- const cmd = lc.cast(macho.version_min_command).?;
- return .{
- .os_tag = switch (lc.cmd()) {
- .VERSION_MIN_MACOSX => .macos,
- .VERSION_MIN_IPHONEOS => .ios,
- .VERSION_MIN_TVOS => .tvos,
- .VERSION_MIN_WATCHOS => .watchos,
- else => unreachable,
- },
- .abi = .none,
- .version = appleVersionToSemanticVersion(cmd.version),
- };
- },
- else => unreachable,
- }
- }
-
- pub fn fromTarget(target: std.Target) Platform {
- return .{
- .os_tag = target.os.tag,
- .abi = target.abi,
- .version = target.os.version_range.semver.min,
- };
- }
-
- pub fn toAppleVersion(plat: Platform) u32 {
- return semanticVersionToAppleVersion(plat.version);
- }
-
- pub fn toApplePlatform(plat: Platform) macho.PLATFORM {
- return switch (plat.os_tag) {
- .macos => .MACOS,
- .ios => if (plat.abi == .simulator) .IOSSIMULATOR else .IOS,
- .tvos => if (plat.abi == .simulator) .TVOSSIMULATOR else .TVOS,
- .watchos => if (plat.abi == .simulator) .WATCHOSSIMULATOR else .WATCHOS,
- else => unreachable,
- };
- }
-
- pub fn isBuildVersionCompatible(plat: Platform) bool {
- inline for (supported_platforms) |sup_plat| {
- if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) {
- return sup_plat[2] <= plat.toAppleVersion();
- }
- }
- return false;
- }
-
- pub fn isVersionMinCompatible(plat: Platform) bool {
- inline for (supported_platforms) |sup_plat| {
- if (sup_plat[0] == plat.os_tag and sup_plat[1] == plat.abi) {
- return sup_plat[3] <= plat.toAppleVersion();
- }
- }
- return false;
- }
-
- pub fn fmtTarget(plat: Platform, cpu_arch: std.Target.Cpu.Arch) std.fmt.Formatter(formatTarget) {
- return .{ .data = .{ .platform = plat, .cpu_arch = cpu_arch } };
- }
-
- const FmtCtx = struct {
- platform: Platform,
- cpu_arch: std.Target.Cpu.Arch,
- };
-
- pub fn formatTarget(
- ctx: FmtCtx,
- comptime unused_fmt_string: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = unused_fmt_string;
- _ = options;
- try writer.print("{s}-{s}", .{ @tagName(ctx.cpu_arch), @tagName(ctx.platform.os_tag) });
- if (ctx.platform.abi != .none) {
- try writer.print("-{s}", .{@tagName(ctx.platform.abi)});
- }
- }
-
- /// Caller owns the memory.
- pub fn allocPrintTarget(plat: Platform, gpa: Allocator, cpu_arch: std.Target.Cpu.Arch) error{OutOfMemory}![]u8 {
- var buffer = std.ArrayList(u8).init(gpa);
- defer buffer.deinit();
- try buffer.writer().print("{}", .{plat.fmtTarget(cpu_arch)});
- return buffer.toOwnedSlice();
- }
-
- pub fn eqlTarget(plat: Platform, other: Platform) bool {
- return plat.os_tag == other.os_tag and plat.abi == other.abi;
- }
-};
-
-const SupportedPlatforms = struct {
- std.Target.Os.Tag,
- std.Target.Abi,
- u32, // Min platform version for which to emit LC_BUILD_VERSION
- u32, // Min supported platform version
-};
-
-// Source: https://github.com/apple-oss-distributions/ld64/blob/59a99ab60399c5e6c49e6945a9e1049c42b71135/src/ld/PlatformSupport.cpp#L52
-// zig fmt: off
-const supported_platforms = [_]SupportedPlatforms{
- .{ .macos, .none, 0xA0E00, 0xA0800 },
- .{ .ios, .none, 0xC0000, 0x70000 },
- .{ .tvos, .none, 0xC0000, 0x70000 },
- .{ .watchos, .none, 0x50000, 0x20000 },
- .{ .ios, .simulator, 0xD0000, 0x80000 },
- .{ .tvos, .simulator, 0xD0000, 0x80000 },
- .{ .watchos, .simulator, 0x60000, 0x20000 },
-};
-// zig fmt: on
-
-inline fn semanticVersionToAppleVersion(version: std.SemanticVersion) u32 {
- const major = version.major;
- const minor = version.minor;
- const patch = version.patch;
- return (@as(u32, @intCast(major)) << 16) | (@as(u32, @intCast(minor)) << 8) | @as(u32, @intCast(patch));
-}
-
-pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion {
- return .{
- .major = @as(u16, @truncate(version >> 16)),
- .minor = @as(u8, @truncate(version >> 8)),
- .patch = @as(u8, @truncate(version)),
- };
-}
-
-pub fn inferSdkVersion(macho_file: *MachO) ?std.SemanticVersion {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- var arena_allocator = std.heap.ArenaAllocator.init(gpa);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- const sdk_layout = macho_file.sdk_layout orelse return null;
- const sdk_dir = switch (sdk_layout) {
- .sdk => comp.sysroot.?,
- .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null,
- };
- if (readSdkVersionFromSettings(arena, sdk_dir)) |ver| {
- return parseSdkVersion(ver);
- } else |_| {
- // Read from settings should always succeed when vendored.
- if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version");
- }
-
- // infer from pathname
- const stem = std.fs.path.stem(sdk_dir);
- const start = for (stem, 0..) |c, i| {
- if (std.ascii.isDigit(c)) break i;
- } else stem.len;
- const end = for (stem[start..], start..) |c, i| {
- if (std.ascii.isDigit(c) or c == '.') continue;
- break i;
- } else stem.len;
- return parseSdkVersion(stem[start..end]);
-}
-
-// Official Apple SDKs ship with a `SDKSettings.json` located at the top of SDK fs layout.
-// Use property `MinimalDisplayName` to determine version.
-// The file/property is also available with vendored libc.
-fn readSdkVersionFromSettings(arena: Allocator, dir: []const u8) ![]const u8 {
- const sdk_path = try std.fs.path.join(arena, &.{ dir, "SDKSettings.json" });
- const contents = try std.fs.cwd().readFileAlloc(arena, sdk_path, std.math.maxInt(u16));
- const parsed = try std.json.parseFromSlice(std.json.Value, arena, contents, .{});
- if (parsed.value.object.get("MinimalDisplayName")) |ver| return ver.string;
- return error.SdkVersionFailure;
-}
-
-// Versions reported by Apple aren't exactly semantically valid as they usually omit
-// the patch component, so we parse SDK value by hand.
-fn parseSdkVersion(raw: []const u8) ?std.SemanticVersion {
- var parsed: std.SemanticVersion = .{
- .major = 0,
- .minor = 0,
- .patch = 0,
- };
-
- const parseNext = struct {
- fn parseNext(it: anytype) ?u16 {
- const nn = it.next() orelse return null;
- return std.fmt.parseInt(u16, nn, 10) catch null;
- }
- }.parseNext;
-
- var it = std.mem.splitAny(u8, raw, ".");
- parsed.major = parseNext(&it) orelse return null;
- parsed.minor = parseNext(&it) orelse return null;
- parsed.patch = parseNext(&it) orelse 0;
- return parsed;
-}
-
-const expect = std.testing.expect;
-const expectEqual = std.testing.expectEqual;
-
-fn testParseSdkVersionSuccess(exp: std.SemanticVersion, raw: []const u8) !void {
- const maybe_ver = parseSdkVersion(raw);
- try expect(maybe_ver != null);
- const ver = maybe_ver.?;
- try expectEqual(exp.major, ver.major);
- try expectEqual(exp.minor, ver.minor);
- try expectEqual(exp.patch, ver.patch);
-}
-
-test "parseSdkVersion" {
- try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 0 }, "13.4");
- try testParseSdkVersionSuccess(.{ .major = 13, .minor = 4, .patch = 1 }, "13.4.1");
- try testParseSdkVersionSuccess(.{ .major = 11, .minor = 15, .patch = 0 }, "11.15");
-
- try expect(parseSdkVersion("11") == null);
-}
-
-const std = @import("std");
-const assert = std.debug.assert;
-const link = @import("../../link.zig");
-const log = std.log.scoped(.link);
-const macho = std.macho;
-const mem = std.mem;
-
-const Allocator = mem.Allocator;
-const Dylib = @import("Dylib.zig");
-const MachO = @import("../MachO.zig");
-const Compilation = @import("../../Compilation.zig");
src/link/MachO/Object.zig
@@ -1,1130 +1,2093 @@
-//! Represents an input relocatable Object file.
-//! Each Object is fully loaded into memory for easier
-//! access into different data within.
-
-name: []const u8,
+archive: ?[]const u8 = null,
+path: []const u8,
mtime: u64,
-contents: []align(@alignOf(u64)) const u8,
-
-header: macho.mach_header_64 = undefined,
-
-/// Symtab and strtab might not exist for empty object files so we use an optional
-/// to signal this.
-in_symtab: ?[]align(1) const macho.nlist_64 = null,
-in_strtab: ?[]const u8 = null,
-
-/// Output symtab is sorted so that we can easily reference symbols following each
-/// other in address space.
-/// The length of the symtab is at least of the input symtab length however there
-/// can be trailing section symbols.
-symtab: []macho.nlist_64 = undefined,
-/// Can be undefined as set together with in_symtab.
-source_symtab_lookup: []u32 = undefined,
-/// Can be undefined as set together with in_symtab.
-reverse_symtab_lookup: []u32 = undefined,
-/// Can be undefined as set together with in_symtab.
-source_address_lookup: []i64 = undefined,
-/// Can be undefined as set together with in_symtab.
-source_section_index_lookup: []Entry = undefined,
-/// Can be undefined as set together with in_symtab.
-strtab_lookup: []u32 = undefined,
-/// Can be undefined as set together with in_symtab.
-atom_by_index_table: []?Atom.Index = undefined,
-/// Can be undefined as set together with in_symtab.
-globals_lookup: []i64 = undefined,
-/// Can be undefined as set together with in_symtab.
-relocs_lookup: []Entry = undefined,
-
-/// All relocations sorted and flatened, sorted by address descending
-/// per section.
-relocations: std.ArrayListUnmanaged(macho.relocation_info) = .{},
-/// Beginning index to the relocations array for each input section
-/// defined within this Object file.
-section_relocs_lookup: std.ArrayListUnmanaged(u32) = .{},
-
-/// Data-in-code records sorted by address.
-data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-
-atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
-exec_atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
-
-eh_frame_sect_id: ?u8 = null,
-eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{},
-eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+data: []const u8,
+index: File.Index,
-unwind_info_sect_id: ?u8 = null,
-unwind_relocs_lookup: []Record = undefined,
-unwind_records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{},
+header: ?macho.mach_header_64 = null,
+sections: std.MultiArrayList(Section) = .{},
+symtab: std.MultiArrayList(Nlist) = .{},
+strtab: []const u8 = &[0]u8{},
-const Entry = struct {
- start: u32 = 0,
- len: u32 = 0,
-};
-
-const Record = struct {
- dead: bool,
- reloc: Entry,
-};
-
-pub fn isObject(file: std.fs.File) bool {
- const reader = file.reader();
- const hdr = reader.readStruct(macho.mach_header_64) catch return false;
- defer file.seekTo(0) catch {};
- return hdr.filetype == macho.MH_OBJECT;
-}
+symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
-pub fn deinit(self: *Object, gpa: Allocator) void {
- self.atoms.deinit(gpa);
- self.exec_atoms.deinit(gpa);
- gpa.free(self.name);
- gpa.free(self.contents);
- if (self.in_symtab) |_| {
- gpa.free(self.source_symtab_lookup);
- gpa.free(self.reverse_symtab_lookup);
- gpa.free(self.source_address_lookup);
- gpa.free(self.source_section_index_lookup);
- gpa.free(self.strtab_lookup);
- gpa.free(self.symtab);
- gpa.free(self.atom_by_index_table);
- gpa.free(self.globals_lookup);
- gpa.free(self.relocs_lookup);
+platform: ?MachO.Options.Platform = null,
+dwarf_info: ?DwarfInfo = null,
+stab_files: std.ArrayListUnmanaged(StabFile) = .{},
+
+eh_frame_sect_index: ?u8 = null,
+compact_unwind_sect_index: ?u8 = null,
+cies: std.ArrayListUnmanaged(Cie) = .{},
+fdes: std.ArrayListUnmanaged(Fde) = .{},
+eh_frame_data: std.ArrayListUnmanaged(u8) = .{},
+unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{},
+
+alive: bool = true,
+hidden: bool = false,
+num_rebase_relocs: u32 = 0,
+num_bind_relocs: u32 = 0,
+num_weak_bind_relocs: u32 = 0,
+
+output_symtab_ctx: MachO.SymtabCtx = .{},
+
+pub fn deinit(self: *Object, allocator: Allocator) void {
+ for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| {
+ relocs.deinit(allocator);
+ sub.deinit(allocator);
}
- self.eh_frame_relocs_lookup.deinit(gpa);
- self.eh_frame_records_lookup.deinit(gpa);
- if (self.hasUnwindRecords()) {
- gpa.free(self.unwind_relocs_lookup);
+ self.sections.deinit(allocator);
+ self.symtab.deinit(allocator);
+ self.symbols.deinit(allocator);
+ self.atoms.deinit(allocator);
+ self.cies.deinit(allocator);
+ self.fdes.deinit(allocator);
+ self.eh_frame_data.deinit(allocator);
+ self.unwind_records.deinit(allocator);
+ if (self.dwarf_info) |*dw| dw.deinit(allocator);
+ for (self.stab_files.items) |*sf| {
+ sf.stabs.deinit(allocator);
}
- self.unwind_records_lookup.deinit(gpa);
- self.relocations.deinit(gpa);
- self.section_relocs_lookup.deinit(gpa);
- self.data_in_code.deinit(gpa);
+ self.stab_files.deinit(allocator);
}
-pub fn parse(self: *Object, allocator: Allocator) !void {
- var stream = std.io.fixedBufferStream(self.contents);
+pub fn parse(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const gpa = macho_file.base.allocator;
+ var stream = std.io.fixedBufferStream(self.data);
const reader = stream.reader();
self.header = try reader.readStruct(macho.mach_header_64);
- var it = LoadCommandIterator{
- .ncmds = self.header.ncmds,
- .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
- };
- const nsects = self.getSourceSections().len;
-
- // Prepopulate relocations per section lookup table.
- try self.section_relocs_lookup.resize(allocator, nsects);
- @memset(self.section_relocs_lookup.items, 0);
-
- // Parse symtab.
- const symtab = while (it.next()) |cmd| switch (cmd.cmd()) {
- .SYMTAB => break cmd.cast(macho.symtab_command).?,
- else => {},
- } else return;
-
- self.in_symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.contents.ptr + symtab.symoff))[0..symtab.nsyms];
- self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize];
-
- self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects);
- self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
- self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
- self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
- self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
- self.atom_by_index_table = try allocator.alloc(?Atom.Index, self.in_symtab.?.len + nsects);
- self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects);
- // This is wasteful but we need to be able to lookup source symbol address after stripping and
- // allocating of sections.
- self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
- self.source_section_index_lookup = try allocator.alloc(Entry, nsects);
-
- for (self.symtab) |*sym| {
- sym.* = .{
- .n_value = 0,
- .n_sect = 0,
- .n_desc = 0,
- .n_strx = 0,
- .n_type = 0,
- };
+ if (self.getLoadCommand(.SEGMENT_64)) |lc| {
+ const sections = lc.getSections();
+ try self.sections.ensureUnusedCapacity(gpa, sections.len);
+ for (sections) |sect| {
+ const index = try self.sections.addOne(gpa);
+ self.sections.set(index, .{ .header = sect });
+
+ if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
+ self.eh_frame_sect_index = @intCast(index);
+ } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) {
+ self.compact_unwind_sect_index = @intCast(index);
+ }
+ }
}
-
- @memset(self.globals_lookup, -1);
- @memset(self.atom_by_index_table, null);
- @memset(self.source_section_index_lookup, .{});
- @memset(self.relocs_lookup, .{});
-
- // You would expect that the symbol table is at least pre-sorted based on symbol's type:
- // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
- // the GO compiler does not necessarily respect that therefore we sort immediately by type
- // and address within.
- var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len);
- defer sorted_all_syms.deinit();
-
- for (self.in_symtab.?, 0..) |_, index| {
- sorted_all_syms.appendAssumeCapacity(.{ .index = @as(u32, @intCast(index)) });
+ if (self.getLoadCommand(.SYMTAB)) |lc| {
+ const cmd = lc.cast(macho.symtab_command).?;
+ self.strtab = self.data[cmd.stroff..][0..cmd.strsize];
+
+ const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms];
+ try self.symtab.ensureUnusedCapacity(gpa, symtab.len);
+ for (symtab) |nlist| {
+ self.symtab.appendAssumeCapacity(.{
+ .nlist = nlist,
+ .atom = 0,
+ .size = 0,
+ });
+ }
}
- // We sort by type: defined < undefined, and
- // afterwards by address in each group. Normally, dysymtab should
- // be enough to guarantee the sort, but turns out not every compiler
- // is kind enough to specify the symbols in the correct order.
- mem.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan);
-
- var prev_sect_id: u8 = 0;
- var section_index_lookup: ?Entry = null;
- for (sorted_all_syms.items, 0..) |sym_id, i| {
- const sym = sym_id.getSymbol(self);
-
- if (section_index_lookup) |*lookup| {
- if (sym.n_sect != prev_sect_id or sym.undf()) {
- self.source_section_index_lookup[prev_sect_id - 1] = lookup.*;
- section_index_lookup = null;
- } else {
- lookup.len += 1;
+ const NlistIdx = struct {
+ nlist: macho.nlist_64,
+ idx: usize,
+
+ fn rank(ctx: *const Object, nl: macho.nlist_64) u8 {
+ if (!nl.ext()) {
+ const name = ctx.getString(nl.n_strx);
+ if (name.len == 0) return 5;
+ if (name[0] == 'l' or name[0] == 'L') return 4;
+ return 3;
}
+ return if (nl.weakDef()) 2 else 1;
}
- if (sym.sect() and section_index_lookup == null) {
- section_index_lookup = .{ .start = @as(u32, @intCast(i)), .len = 1 };
+
+ fn lessThan(ctx: *const Object, lhs: @This(), rhs: @This()) bool {
+ if (lhs.nlist.n_sect == rhs.nlist.n_sect) {
+ if (lhs.nlist.n_value == rhs.nlist.n_value) {
+ return rank(ctx, lhs.nlist) < rank(ctx, rhs.nlist);
+ }
+ return lhs.nlist.n_value < rhs.nlist.n_value;
+ }
+ return lhs.nlist.n_sect < rhs.nlist.n_sect;
}
+ };
- prev_sect_id = sym.n_sect;
+ var nlists = try std.ArrayList(NlistIdx).initCapacity(gpa, self.symtab.items(.nlist).len);
+ defer nlists.deinit();
+ for (self.symtab.items(.nlist), 0..) |nlist, i| {
+ if (nlist.stab() or !nlist.sect()) continue;
+ nlists.appendAssumeCapacity(.{ .nlist = nlist, .idx = i });
+ }
+ mem.sort(NlistIdx, nlists.items, self, NlistIdx.lessThan);
- self.symtab[i] = sym;
- self.source_symtab_lookup[i] = sym_id.index;
- self.reverse_symtab_lookup[sym_id.index] = @as(u32, @intCast(i));
- self.source_address_lookup[i] = if (sym.undf()) -1 else @as(i64, @intCast(sym.n_value));
+ if (self.hasSubsections()) {
+ try self.initSubsections(nlists.items, macho_file);
+ } else {
+ try self.initSections(nlists.items, macho_file);
+ }
- const sym_name_len = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.in_strtab.?.ptr + sym.n_strx)), 0).len + 1;
- self.strtab_lookup[i] = @as(u32, @intCast(sym_name_len));
+ try self.initLiteralSections(macho_file);
+ try self.linkNlistToAtom(macho_file);
+
+ try self.sortAtoms(macho_file);
+ try self.initSymbols(macho_file);
+ try self.initSymbolStabs(nlists.items, macho_file);
+ try self.initRelocs(macho_file);
+
+ if (self.eh_frame_sect_index) |index| {
+ try self.initEhFrameRecords(index, macho_file);
}
- // If there were no undefined symbols, make sure we populate the
- // source section index lookup for the last scanned section.
- if (section_index_lookup) |lookup| {
- self.source_section_index_lookup[prev_sect_id - 1] = lookup;
+ if (self.compact_unwind_sect_index) |index| {
+ try self.initUnwindRecords(index, macho_file);
}
- // Parse __TEXT,__eh_frame header if one exists
- self.eh_frame_sect_id = self.getSourceSectionIndexByName("__TEXT", "__eh_frame");
+ self.initPlatform();
+ try self.initDwarfInfo(macho_file);
+
+ for (self.atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ const isec = atom.getInputSection(macho_file);
+ if (mem.eql(u8, isec.sectName(), "__eh_frame") or
+ mem.eql(u8, isec.sectName(), "__compact_unwind") or
+ isec.attrs() & macho.S_ATTR_DEBUG != 0)
+ {
+ atom.flags.alive = false;
+ }
+ }
+}
- // Parse __LD,__compact_unwind header if one exists
- self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind");
- if (self.hasUnwindRecords()) {
- self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len);
- @memset(self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} });
+inline fn isLiteral(sect: macho.section_64) bool {
+ return switch (sect.type()) {
+ macho.S_CSTRING_LITERALS,
+ macho.S_4BYTE_LITERALS,
+ macho.S_8BYTE_LITERALS,
+ macho.S_16BYTE_LITERALS,
+ macho.S_LITERAL_POINTERS,
+ => true,
+ else => false,
+ };
+}
+
+fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const slice = self.sections.slice();
+ for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| {
+ if (isLiteral(sect)) continue;
+
+ const nlist_start = for (nlists, 0..) |nlist, i| {
+ if (nlist.nlist.n_sect - 1 == n_sect) break i;
+ } else nlists.len;
+ const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| {
+ if (nlist.nlist.n_sect - 1 != n_sect) break i;
+ } else nlists.len;
+
+ if (nlist_start == nlist_end or nlists[nlist_start].nlist.n_value > sect.addr) {
+ const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() });
+ defer gpa.free(name);
+ const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr;
+ const atom_index = try self.addAtom(.{
+ .name = name,
+ .n_sect = @intCast(n_sect),
+ .off = 0,
+ .size = size,
+ .alignment = sect.@"align",
+ }, macho_file);
+ try subsections.append(gpa, .{
+ .atom = atom_index,
+ .off = 0,
+ });
+ }
+
+ var idx: usize = nlist_start;
+ while (idx < nlist_end) {
+ const alias_start = idx;
+ const nlist = nlists[alias_start];
+
+ while (idx < nlist_end and
+ nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1)
+ {}
+
+ const size = if (idx < nlist_end)
+ nlists[idx].nlist.n_value - nlist.nlist.n_value
+ else
+ sect.addr + sect.size - nlist.nlist.n_value;
+ const alignment = if (nlist.nlist.n_value > 0)
+ @min(@ctz(nlist.nlist.n_value), sect.@"align")
+ else
+ sect.@"align";
+ const atom_index = try self.addAtom(.{
+ .name = self.getString(nlist.nlist.n_strx),
+ .n_sect = @intCast(n_sect),
+ .off = nlist.nlist.n_value - sect.addr,
+ .size = size,
+ .alignment = alignment,
+ }, macho_file);
+ try subsections.append(gpa, .{
+ .atom = atom_index,
+ .off = nlist.nlist.n_value - sect.addr,
+ });
+
+ for (alias_start..idx) |i| {
+ self.symtab.items(.size)[nlists[i].idx] = size;
+ }
+ }
}
}
-const SymbolAtIndex = struct {
- index: u32,
+fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const slice = self.sections.slice();
+
+ try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);
+
+ for (slice.items(.header), 0..) |sect, n_sect| {
+ if (isLiteral(sect)) continue;
+
+ const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() });
+ defer gpa.free(name);
+
+ const atom_index = try self.addAtom(.{
+ .name = name,
+ .n_sect = @intCast(n_sect),
+ .off = 0,
+ .size = sect.size,
+ .alignment = sect.@"align",
+ }, macho_file);
+ try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 });
+
+ const nlist_start = for (nlists, 0..) |nlist, i| {
+ if (nlist.nlist.n_sect - 1 == n_sect) break i;
+ } else nlists.len;
+ const nlist_end = for (nlists[nlist_start..], nlist_start..) |nlist, i| {
+ if (nlist.nlist.n_sect - 1 != n_sect) break i;
+ } else nlists.len;
+
+ var idx: usize = nlist_start;
+ while (idx < nlist_end) {
+ const nlist = nlists[idx];
+
+ while (idx < nlist_end and
+ nlists[idx].nlist.n_value == nlist.nlist.n_value) : (idx += 1)
+ {}
+
+ const size = if (idx < nlist_end)
+ nlists[idx].nlist.n_value - nlist.nlist.n_value
+ else
+ sect.addr + sect.size - nlist.nlist.n_value;
+
+ for (nlist_start..idx) |i| {
+ self.symtab.items(.size)[nlists[i].idx] = size;
+ }
+ }
+ }
+}
- const Context = *const Object;
+const AddAtomArgs = struct {
+ name: [:0]const u8,
+ n_sect: u8,
+ off: u64,
+ size: u64,
+ alignment: u32,
+};
- fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 {
- return ctx.in_symtab.?[self.index];
+fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index {
+ const gpa = macho_file.base.allocator;
+ const atom_index = try macho_file.addAtom();
+ const atom = macho_file.getAtom(atom_index).?;
+ atom.file = self.index;
+ atom.atom_index = atom_index;
+ atom.name = try macho_file.string_intern.insert(gpa, args.name);
+ atom.n_sect = args.n_sect;
+ atom.size = args.size;
+ atom.alignment = args.alignment;
+ atom.off = args.off;
+ try self.atoms.append(gpa, atom_index);
+ return atom_index;
+}
+
+fn initLiteralSections(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ // TODO here we should split into equal-sized records, hash the contents, and then
+ // deduplicate - ICF.
+ // For now, we simply cover each literal section with one large atom.
+ const gpa = macho_file.base.allocator;
+ const slice = self.sections.slice();
+
+ try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);
+
+ for (slice.items(.header), 0..) |sect, n_sect| {
+ if (!isLiteral(sect)) continue;
+
+ const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() });
+ defer gpa.free(name);
+
+ const atom_index = try self.addAtom(.{
+ .name = name,
+ .n_sect = @intCast(n_sect),
+ .off = 0,
+ .size = sect.size,
+ .alignment = sect.@"align",
+ }, macho_file);
+ try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 });
}
+}
- fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 {
- const off = self.getSymbol(ctx).n_strx;
- return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.in_strtab.?.ptr + off)), 0);
+pub fn findAtom(self: Object, addr: u64) ?Atom.Index {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const slice = self.sections.slice();
+ for (slice.items(.header), slice.items(.subsections), 0..) |sect, subs, n_sect| {
+ if (subs.items.len == 0) continue;
+ if (sect.addr == addr) return subs.items[0].atom;
+ if (sect.addr < addr and addr < sect.addr + sect.size) {
+ return self.findAtomInSection(addr, @intCast(n_sect));
+ }
}
+ return null;
+}
- fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 {
- const sym = self.getSymbol(ctx);
- if (!sym.ext()) {
- const sym_name = self.getSymbolName(ctx);
- if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 3;
- return 2;
+fn findAtomInSection(self: Object, addr: u64, n_sect: u8) ?Atom.Index {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const slice = self.sections.slice();
+ const sect = slice.items(.header)[n_sect];
+ const subsections = slice.items(.subsections)[n_sect];
+
+ var min: usize = 0;
+ var max: usize = subsections.items.len;
+ while (min < max) {
+ const idx = (min + max) / 2;
+ const sub = subsections.items[idx];
+ const sub_addr = sect.addr + sub.off;
+ const sub_size = if (idx + 1 < subsections.items.len)
+ subsections.items[idx + 1].off - sub.off
+ else
+ sect.size - sub.off;
+ if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom;
+ if (sub_addr < addr) {
+ min = idx + 1;
+ } else {
+ max = idx;
}
- if (sym.weakDef() or sym.pext()) return 1;
- return 0;
}
- /// Performs lexicographic-like check.
- /// * lhs and rhs defined
- /// * if lhs == rhs
- /// * if lhs.n_sect == rhs.n_sect
- /// * ext < weak < local < temp
- /// * lhs.n_sect < rhs.n_sect
- /// * lhs < rhs
- /// * !rhs is undefined
- fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
- const lhs = lhs_index.getSymbol(ctx);
- const rhs = rhs_index.getSymbol(ctx);
- if (lhs.sect() and rhs.sect()) {
- if (lhs.n_value == rhs.n_value) {
- if (lhs.n_sect == rhs.n_sect) {
- const lhs_senior = lhs_index.getSymbolSeniority(ctx);
- const rhs_senior = rhs_index.getSymbolSeniority(ctx);
- if (lhs_senior == rhs_senior) {
- return lessThanByNStrx(ctx, lhs_index, rhs_index);
- } else return lhs_senior < rhs_senior;
- } else return lhs.n_sect < rhs.n_sect;
- } else return lhs.n_value < rhs.n_value;
- } else if (lhs.undf() and rhs.undf()) {
- return lessThanByNStrx(ctx, lhs_index, rhs_index);
- } else return rhs.undf();
+ if (min < subsections.items.len) {
+ const sub = subsections.items[min];
+ const sub_addr = sect.addr + sub.off;
+ const sub_size = if (min + 1 < subsections.items.len)
+ subsections.items[min + 1].off - sub.off
+ else
+ sect.size - sub.off;
+ if (sub_addr == addr or (sub_addr < addr and addr < sub_addr + sub_size)) return sub.atom;
}
- fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool {
- return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx;
+ return null;
+}
+
+fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ for (self.symtab.items(.nlist), self.symtab.items(.atom)) |nlist, *atom| {
+ if (!nlist.stab() and nlist.sect()) {
+ if (self.findAtomInSection(nlist.n_value, nlist.n_sect - 1)) |atom_index| {
+ atom.* = atom_index;
+ } else {
+ macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{
+ self.fmtPath(), self.getString(nlist.n_strx),
+ });
+ return error.ParseFailed;
+ }
+ }
}
-};
+}
-fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct {
- index: u32,
- len: u32,
-} {
- const FirstMatch = struct {
- n_sect: u8,
+fn initSymbols(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const slice = self.symtab.slice();
+
+ try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len);
- pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
- return symbol.n_sect == pred.n_sect;
+ for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| {
+ if (nlist.ext()) {
+ const name = self.getString(nlist.n_strx);
+ const off = try macho_file.string_intern.insert(gpa, name);
+ const gop = try macho_file.getOrCreateGlobal(off);
+ self.symbols.addOneAssumeCapacity().* = gop.index;
+ continue;
}
- };
- const FirstNonMatch = struct {
- n_sect: u8,
- pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
- return symbol.n_sect != pred.n_sect;
+ const index = try macho_file.addSymbol();
+ self.symbols.appendAssumeCapacity(index);
+ const symbol = macho_file.getSymbol(index);
+ const name = self.getString(nlist.n_strx);
+ symbol.* = .{
+ .value = nlist.n_value,
+ .name = try macho_file.string_intern.insert(gpa, name),
+ .nlist_idx = @intCast(i),
+ .atom = 0,
+ .file = self.index,
+ };
+
+ if (macho_file.getAtom(atom_index)) |atom| {
+ assert(!nlist.abs());
+ symbol.value -= atom.getInputAddress(macho_file);
+ symbol.atom = atom_index;
}
- };
- const index = MachO.lsearch(macho.nlist_64, symbols, FirstMatch{
- .n_sect = n_sect,
- });
- const len = MachO.lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{
- .n_sect = n_sect,
- });
+ symbol.flags.abs = nlist.abs();
+ symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip();
- return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) };
+ if (nlist.sect() and
+ self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES)
+ {
+ symbol.flags.tlv = true;
+ }
+ }
}
-fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct {
- index: u32,
- len: u32,
-} {
- const Predicate = struct {
- addr: u64,
+fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
- pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
- return symbol.n_value >= pred.addr;
+ const SymbolLookup = struct {
+ ctx: *const Object,
+ entries: @TypeOf(nlists),
+
+ fn find(fs: @This(), addr: u64) ?Symbol.Index {
+ // TODO binary search since we have the list sorted
+ for (fs.entries) |nlist| {
+ if (nlist.nlist.n_value == addr) return fs.ctx.symbols.items[nlist.idx];
+ }
+ return null;
}
};
- const index = MachO.lsearch(macho.nlist_64, symbols, Predicate{
- .addr = start_addr,
- });
- const len = MachO.lsearch(macho.nlist_64, symbols[index..], Predicate{
- .addr = end_addr,
- });
+ const start: u32 = for (self.symtab.items(.nlist), 0..) |nlist, i| {
+ if (nlist.stab()) break @intCast(i);
+ } else @intCast(self.symtab.items(.nlist).len);
+ const end: u32 = for (self.symtab.items(.nlist)[start..], start..) |nlist, i| {
+ if (!nlist.stab()) break @intCast(i);
+ } else @intCast(self.symtab.items(.nlist).len);
+
+ if (start == end) return;
+
+ const gpa = macho_file.base.allocator;
+ const syms = self.symtab.items(.nlist);
+ const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists };
+
+ var i: u32 = start;
+ while (i < end) : (i += 1) {
+ const open = syms[i];
+ if (open.n_type != macho.N_SO) {
+ macho_file.base.fatal("{}: unexpected symbol stab type 0x{x} as the first entry", .{
+ self.fmtPath(),
+ open.n_type,
+ });
+ return error.ParseFailed;
+ }
+
+ while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {}
+
+ var sf: StabFile = .{ .comp_dir = i };
+ // TODO validate
+ i += 3;
+
+ while (i < end and syms[i].n_type != macho.N_SO) : (i += 1) {
+ const nlist = syms[i];
+ var stab: StabFile.Stab = .{};
+ switch (nlist.n_type) {
+ macho.N_BNSYM => {
+ stab.tag = .func;
+ stab.symbol = sym_lookup.find(nlist.n_value);
+ // TODO validate
+ i += 3;
+ },
+ macho.N_GSYM => {
+ stab.tag = .global;
+ stab.symbol = macho_file.getGlobalByName(self.getString(nlist.n_strx));
+ },
+ macho.N_STSYM => {
+ stab.tag = .static;
+ stab.symbol = sym_lookup.find(nlist.n_value);
+ },
+ else => {
+ macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{
+ self.fmtPath(),
+ nlist.n_type,
+ });
+ return error.ParseFailed;
+ },
+ }
+ try sf.stabs.append(gpa, stab);
+ }
- return .{ .index = @as(u32, @intCast(index)), .len = @as(u32, @intCast(len)) };
+ try self.stab_files.append(gpa, sf);
+ }
}
-const SortedSection = struct {
- header: macho.section_64,
- id: u8,
-};
+fn sortAtoms(self: *Object, macho_file: *MachO) !void {
+ const lessThanAtom = struct {
+ fn lessThanAtom(ctx: *MachO, lhs: Atom.Index, rhs: Atom.Index) bool {
+ return ctx.getAtom(lhs).?.getInputAddress(ctx) < ctx.getAtom(rhs).?.getInputAddress(ctx);
+ }
+ }.lessThanAtom;
+ mem.sort(Atom.Index, self.atoms.items, macho_file, lessThanAtom);
+}
+
+fn initRelocs(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const slice = self.sections.slice();
+
+ for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| {
+ if (sect.nreloc == 0) continue;
+ // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit
+ // debug symbol stabs in the relocatable. This made me curious why that is. For now,
+ // I shall comply, but I wanna compare with dsymutil.
+ if (sect.attrs() & macho.S_ATTR_DEBUG != 0 and
+ !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue;
+
+ switch (cpu_arch) {
+ .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file),
+ .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, macho_file),
+ else => unreachable,
+ }
-fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool {
- _ = ctx;
- if (lhs.header.addr == rhs.header.addr) {
- return lhs.id < rhs.id;
+ mem.sort(Relocation, out.items, {}, Relocation.lessThan);
}
- return lhs.header.addr < rhs.header.addr;
-}
-pub const SplitIntoAtomsError = error{
- OutOfMemory,
- EndOfStream,
- MissingEhFrameSection,
- BadDwarfCfi,
-};
+ for (slice.items(.header), slice.items(.relocs), slice.items(.subsections)) |sect, relocs, subsections| {
+ if (sect.isZerofill()) continue;
+
+ var next_reloc: usize = 0;
+ for (subsections.items) |subsection| {
+ const atom = macho_file.getAtom(subsection.atom).?;
+ if (!atom.flags.alive) continue;
+ if (next_reloc >= relocs.items.len) break;
+ const end_addr = atom.off + atom.size;
+ atom.relocs.pos = next_reloc;
-pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) SplitIntoAtomsError!void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name });
+ while (next_reloc < relocs.items.len and relocs.items[next_reloc].offset < end_addr) : (next_reloc += 1) {}
- try self.splitRegularSections(macho_file, object_id);
- try self.parseEhFrameSection(macho_file, object_id);
- try self.parseUnwindInfo(macho_file, object_id);
- try self.parseDataInCode(gpa);
+ atom.relocs.len = next_reloc - atom.relocs.pos;
+ }
+ }
}
-/// Splits input regular sections into Atoms.
-/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section
-/// into subsections where each subsection then represents an Atom.
-pub fn splitRegularSections(self: *Object, macho_file: *MachO, object_id: u32) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
-
- const sections = self.getSourceSections();
- for (sections, 0..) |sect, id| {
- if (sect.isDebug()) continue;
- const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse {
- log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() });
- continue;
- };
- if (sect.size == 0) continue;
+fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const nlists = self.symtab.items(.nlist);
+ const slice = self.sections.slice();
+ const sect = slice.items(.header)[sect_id];
+ const relocs = slice.items(.relocs)[sect_id];
+
+ const data = self.getSectionData(sect_id);
+ try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len);
+ self.eh_frame_data.appendSliceAssumeCapacity(data);
+
+ // Check for non-personality relocs in FDEs and apply them
+ for (relocs.items, 0..) |rel, i| {
+ switch (rel.type) {
+ .unsigned => {
+ assert((rel.meta.length == 2 or rel.meta.length == 3) and rel.meta.has_subtractor); // TODO error
+ const S: i64 = switch (rel.tag) {
+ .local => rel.meta.symbolnum,
+ .@"extern" => @intCast(nlists[rel.meta.symbolnum].n_value),
+ };
+ const A = rel.addend;
+ const SUB: i64 = blk: {
+ const sub_rel = relocs.items[i - 1];
+ break :blk switch (sub_rel.tag) {
+ .local => sub_rel.meta.symbolnum,
+ .@"extern" => @intCast(nlists[sub_rel.meta.symbolnum].n_value),
+ };
+ };
+ switch (rel.meta.length) {
+ 0, 1 => unreachable,
+ 2 => mem.writeInt(u32, self.eh_frame_data.items[rel.offset..][0..4], @bitCast(@as(i32, @truncate(S + A - SUB))), .little),
+ 3 => mem.writeInt(u64, self.eh_frame_data.items[rel.offset..][0..8], @bitCast(S + A - SUB), .little),
+ }
+ },
+ else => {},
+ }
+ }
- const sect_id = @as(u8, @intCast(id));
- const sym = self.getSectionAliasSymbolPtr(sect_id);
- sym.* = .{
- .n_strx = 0,
- .n_type = macho.N_SECT,
- .n_sect = out_sect_id + 1,
- .n_desc = 0,
- .n_value = sect.addr,
- };
+ var it = eh_frame.Iterator{ .data = self.eh_frame_data.items };
+ while (try it.next()) |rec| {
+ switch (rec.tag) {
+ .cie => try self.cies.append(gpa, .{
+ .offset = rec.offset,
+ .size = rec.size,
+ .file = self.index,
+ }),
+ .fde => try self.fdes.append(gpa, .{
+ .offset = rec.offset,
+ .size = rec.size,
+ .cie = undefined,
+ .file = self.index,
+ }),
+ }
+ }
+
+ for (self.cies.items) |*cie| {
+ try cie.parse(macho_file);
}
- if (self.in_symtab == null) {
- for (sections, 0..) |sect, id| {
- if (sect.isDebug()) continue;
- const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue;
- if (sect.size == 0) continue;
-
- const sect_id: u8 = @intCast(id);
- const sym_index = self.getSectionAliasSymbolIndex(sect_id);
- const atom_index = try self.createAtomFromSubsection(
- macho_file,
- object_id,
- sym_index,
- sym_index,
- 1,
- sect.size,
- Alignment.fromLog2Units(sect.@"align"),
- out_sect_id,
- );
- macho_file.addAtomToSection(atom_index);
+ for (self.fdes.items) |*fde| {
+ try fde.parse(macho_file);
+ }
+
+ const sortFn = struct {
+ fn sortFn(ctx: *MachO, lhs: Fde, rhs: Fde) bool {
+ return lhs.getAtom(ctx).getInputAddress(ctx) < rhs.getAtom(ctx).getInputAddress(ctx);
+ }
+ }.sortFn;
+
+ mem.sort(Fde, self.fdes.items, macho_file, sortFn);
+
+ // Parse and attach personality pointers to CIEs if any
+ for (relocs.items) |rel| {
+ switch (rel.type) {
+ .got => {
+ assert(rel.meta.length == 2 and rel.tag == .@"extern");
+ const cie = for (self.cies.items) |*cie| {
+ if (cie.offset <= rel.offset and rel.offset < cie.offset + cie.getSize()) break cie;
+ } else {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel.offset,
+ });
+ return error.ParseFailed;
+ };
+ cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset };
+ },
+ else => {},
}
- return;
}
+}
+
+fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const SymbolLookup = struct {
+ ctx: *const Object,
- // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
- // have to infer the start of undef section in the symtab ourselves.
- const iundefsym = blk: {
- const dysymtab = self.getDysymtab() orelse {
- var iundefsym: usize = self.in_symtab.?.len;
- while (iundefsym > 0) : (iundefsym -= 1) {
- const sym = self.symtab[iundefsym - 1];
- if (sym.sect()) break;
+ fn find(fs: @This(), addr: u64) ?Symbol.Index {
+ for (fs.ctx.symbols.items, 0..) |sym_index, i| {
+ const nlist = fs.ctx.symtab.items(.nlist)[i];
+ if (nlist.ext() and nlist.n_value == addr) return sym_index;
}
- break :blk iundefsym;
- };
- break :blk dysymtab.iundefsym;
+ return null;
+ }
};
- // We only care about defined symbols, so filter every other out.
- const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]);
- defer gpa.free(symtab);
-
- const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
+ const gpa = macho_file.base.allocator;
+ const data = self.getSectionData(sect_id);
+ const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
+ const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
+ const sym_lookup = SymbolLookup{ .ctx = self };
+
+ try self.unwind_records.resize(gpa, nrecs);
+
+ const header = self.sections.items(.header)[sect_id];
+ const relocs = self.sections.items(.relocs)[sect_id].items;
+ var reloc_idx: usize = 0;
+ for (recs, self.unwind_records.items, 0..) |rec, *out_index, rec_idx| {
+ const rec_start = rec_idx * @sizeOf(macho.compact_unwind_entry);
+ const rec_end = rec_start + @sizeOf(macho.compact_unwind_entry);
+ const reloc_start = reloc_idx;
+ while (reloc_idx < relocs.len and
+ relocs[reloc_idx].offset < rec_end) : (reloc_idx += 1)
+ {}
+
+ out_index.* = try macho_file.addUnwindRecord();
+ const out = macho_file.getUnwindRecord(out_index.*);
+ out.length = rec.rangeLength;
+ out.enc = .{ .enc = rec.compactUnwindEncoding };
+ out.file = self.index;
+
+ for (relocs[reloc_start..reloc_idx]) |rel| {
+ if (rel.type != .unsigned or rel.meta.length != 3) {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+ });
+ return error.ParseFailed;
+ }
+ assert(rel.type == .unsigned and rel.meta.length == 3); // TODO error
+ const offset = rel.offset - rec_start;
+ switch (offset) {
+ 0 => switch (rel.tag) { // target symbol
+ .@"extern" => {
+ out.atom = self.symtab.items(.atom)[rel.meta.symbolnum];
+ out.atom_offset = @intCast(rec.rangeStart);
+ },
+ .local => if (self.findAtom(rec.rangeStart)) |atom_index| {
+ out.atom = atom_index;
+ const atom = out.getAtom(macho_file);
+ out.atom_offset = @intCast(rec.rangeStart - atom.getInputAddress(macho_file));
+ } else {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+ });
+ return error.ParseFailed;
+ },
+ },
+ 16 => switch (rel.tag) { // personality function
+ .@"extern" => {
+ out.personality = rel.target;
+ },
+ .local => if (sym_lookup.find(rec.personalityFunction)) |sym_index| {
+ out.personality = sym_index;
+ } else {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+ });
+ return error.ParseFailed;
+ },
+ },
+ 24 => switch (rel.tag) { // lsda
+ .@"extern" => {
+ out.lsda = self.symtab.items(.atom)[rel.meta.symbolnum];
+ out.lsda_offset = @intCast(rec.lsda);
+ },
+ .local => if (self.findAtom(rec.lsda)) |atom_index| {
+ out.lsda = atom_index;
+ const atom = out.getLsdaAtom(macho_file).?;
+ out.lsda_offset = @intCast(rec.lsda - atom.getInputAddress(macho_file));
+ } else {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+ });
+ return error.ParseFailed;
+ },
+ },
+ else => {},
+ }
+ }
+ }
- // Sort section headers by address.
- var sorted_sections = try gpa.alloc(SortedSection, sections.len);
- defer gpa.free(sorted_sections);
+ if (!macho_file.options.relocatable) try self.synthesiseNullUnwindRecords(macho_file);
- for (sections, 0..) |sect, id| {
- sorted_sections[id] = .{ .header = sect, .id = @as(u8, @intCast(id)) };
+ const sortFn = struct {
+ fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool {
+ const lhs = ctx.getUnwindRecord(lhs_index);
+ const rhs = ctx.getUnwindRecord(rhs_index);
+ const lhsa = lhs.getAtom(ctx);
+ const rhsa = rhs.getAtom(ctx);
+ return lhsa.getInputAddress(ctx) + lhs.atom_offset < rhsa.getInputAddress(ctx) + rhs.atom_offset;
+ }
+ }.sortFn;
+ mem.sort(UnwindInfo.Record.Index, self.unwind_records.items, macho_file, sortFn);
+
+ // Associate unwind records to atoms
+ var next_cu: u32 = 0;
+ while (next_cu < self.unwind_records.items.len) {
+ const start = next_cu;
+ const rec_index = self.unwind_records.items[start];
+ const rec = macho_file.getUnwindRecord(rec_index);
+ while (next_cu < self.unwind_records.items.len and
+ macho_file.getUnwindRecord(self.unwind_records.items[next_cu]).atom == rec.atom) : (next_cu += 1)
+ {}
+
+ const atom = rec.getAtom(macho_file);
+ atom.unwind_records = .{ .pos = start, .len = next_cu - start };
}
+}
- mem.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress);
-
- var sect_sym_index: u32 = 0;
- for (sorted_sections) |section| {
- const sect = section.header;
- if (sect.isDebug()) continue;
-
- const sect_id = section.id;
- log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() });
-
- // Get output segment/section in the final artifact.
- const out_sect_id = (try Atom.getOutputSection(macho_file, sect)) orelse continue;
-
- log.debug(" output sect({d}, '{s},{s}')", .{
- out_sect_id + 1,
- macho_file.sections.items(.header)[out_sect_id].segName(),
- macho_file.sections.items(.header)[out_sect_id].sectName(),
- });
-
- try self.parseRelocs(gpa, section.id);
-
- const cpu_arch = target.cpu.arch;
- const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1);
- const sect_start_index = sect_sym_index + sect_loc.index;
-
- sect_sym_index += sect_loc.len;
-
- if (sect.size == 0) continue;
- if (subsections_via_symbols and sect_loc.len > 0) {
- // If the first nlist does not match the start of the section,
- // then we need to encapsulate the memory range [section start, first symbol)
- // as a temporary symbol and insert the matching Atom.
- const first_sym = symtab[sect_start_index];
- if (first_sym.n_value > sect.addr) {
- const sym_index = self.getSectionAliasSymbolIndex(sect_id);
- const atom_size = first_sym.n_value - sect.addr;
- const atom_index = try self.createAtomFromSubsection(
- macho_file,
- object_id,
- sym_index,
- sym_index,
- 1,
- atom_size,
- Alignment.fromLog2Units(sect.@"align"),
- out_sect_id,
- );
- if (!sect.isZerofill()) {
- try self.cacheRelocs(macho_file, atom_index);
- }
- macho_file.addAtomToSection(atom_index);
+fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void {
+ // Synthesise missing unwind records.
+ // The logic here is as follows:
+ // 1. if an atom has unwind info record that is not DWARF, FDE is marked dead
+ // 2. if an atom has unwind info record that is DWARF, FDE is tied to this unwind record
+ // 3. if an atom doesn't have unwind info record but FDE is available, synthesise and tie
+ // 4. if an atom doesn't have either, synthesise a null unwind info record
+
+ const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null };
+
+ const gpa = macho_file.base.allocator;
+ var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa);
+ defer superposition.deinit();
+
+ const slice = self.symtab.slice();
+ for (slice.items(.nlist), slice.items(.atom), slice.items(.size)) |nlist, atom, size| {
+ if (nlist.stab()) continue;
+ if (!nlist.sect()) continue;
+ const sect = self.sections.items(.header)[nlist.n_sect - 1];
+ if (sect.isCode()) {
+ try superposition.ensureUnusedCapacity(1);
+ const gop = superposition.getOrPutAssumeCapacity(nlist.n_value);
+ if (gop.found_existing) {
+ assert(gop.value_ptr.atom == atom and gop.value_ptr.size == size);
}
+ gop.value_ptr.* = .{ .atom = atom, .size = size };
+ }
+ }
- var next_sym_index = sect_start_index;
- while (next_sym_index < sect_start_index + sect_loc.len) {
- const next_sym = symtab[next_sym_index];
- const addr = next_sym.n_value;
- const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1);
- assert(atom_loc.len > 0);
- const atom_sym_index = atom_loc.index + next_sym_index;
- const nsyms_trailing = atom_loc.len;
- next_sym_index += atom_loc.len;
-
- const atom_size = if (next_sym_index < sect_start_index + sect_loc.len)
- symtab[next_sym_index].n_value - addr
- else
- sect.addr + sect.size - addr;
+ for (self.unwind_records.items) |rec_index| {
+ const rec = macho_file.getUnwindRecord(rec_index);
+ const atom = rec.getAtom(macho_file);
+ const addr = atom.getInputAddress(macho_file) + rec.atom_offset;
+ superposition.getPtr(addr).?.cu = rec_index;
+ }
- const atom_align = Alignment.fromLog2Units(if (addr > 0)
- @min(@ctz(addr), sect.@"align")
- else
- sect.@"align");
-
- const atom_index = try self.createAtomFromSubsection(
- macho_file,
- object_id,
- atom_sym_index,
- atom_sym_index,
- nsyms_trailing,
- atom_size,
- atom_align,
- out_sect_id,
- );
-
- // TODO rework this at the relocation level
- if (cpu_arch == .x86_64 and addr == sect.addr) {
- // In x86_64 relocs, it can so happen that the compiler refers to the same
- // atom by both the actual assigned symbol and the start of the section. In this
- // case, we need to link the two together so add an alias.
- const alias_index = self.getSectionAliasSymbolIndex(sect_id);
- self.atom_by_index_table[alias_index] = atom_index;
+ for (self.fdes.items, 0..) |fde, fde_index| {
+ const atom = fde.getAtom(macho_file);
+ const addr = atom.getInputAddress(macho_file) + fde.atom_offset;
+ superposition.getPtr(addr).?.fde = @intCast(fde_index);
+ }
+
+ for (superposition.keys(), superposition.values()) |addr, meta| {
+ if (meta.fde) |fde_index| {
+ const fde = &self.fdes.items[fde_index];
+
+ if (meta.cu) |rec_index| {
+ const rec = macho_file.getUnwindRecord(rec_index);
+ if (!rec.enc.isDwarf(macho_file)) {
+ // Mark FDE dead
+ fde.alive = false;
+ } else {
+ // Tie FDE to unwind record
+ rec.fde = fde_index;
}
- if (!sect.isZerofill()) {
- try self.cacheRelocs(macho_file, atom_index);
+ } else {
+ // Synthesise new unwind info record
+ const fde_data = fde.getData(macho_file);
+ const atom_size = mem.readInt(u64, fde_data[16..][0..8], .little);
+ const rec_index = try macho_file.addUnwindRecord();
+ const rec = macho_file.getUnwindRecord(rec_index);
+ try self.unwind_records.append(gpa, rec_index);
+ rec.length = @intCast(atom_size);
+ rec.atom = fde.atom;
+ rec.atom_offset = fde.atom_offset;
+ rec.fde = fde_index;
+ rec.file = fde.file;
+ switch (macho_file.options.cpu_arch.?) {
+ .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF),
+ .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF),
+ else => unreachable,
}
- macho_file.addAtomToSection(atom_index);
}
- } else {
- const alias_index = self.getSectionAliasSymbolIndex(sect_id);
- const atom_index = try self.createAtomFromSubsection(
- macho_file,
- object_id,
- alias_index,
- sect_start_index,
- sect_loc.len,
- sect.size,
- Alignment.fromLog2Units(sect.@"align"),
- out_sect_id,
- );
- if (!sect.isZerofill()) {
- try self.cacheRelocs(macho_file, atom_index);
- }
- macho_file.addAtomToSection(atom_index);
+ } else if (meta.cu == null and meta.fde == null) {
+ // Create a null record
+ const rec_index = try macho_file.addUnwindRecord();
+ const rec = macho_file.getUnwindRecord(rec_index);
+ const atom = macho_file.getAtom(meta.atom).?;
+ try self.unwind_records.append(gpa, rec_index);
+ rec.length = @intCast(meta.size);
+ rec.atom = meta.atom;
+ rec.atom_offset = @intCast(addr - atom.getInputAddress(macho_file));
+ rec.file = self.index;
}
}
}
-fn createAtomFromSubsection(
- self: *Object,
- macho_file: *MachO,
- object_id: u32,
- sym_index: u32,
- inner_sym_index: u32,
- inner_nsyms_trailing: u32,
- size: u64,
- alignment: Alignment,
- out_sect_id: u8,
-) !Atom.Index {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const atom_index = try macho_file.createAtom(sym_index, .{
- .size = size,
- .alignment = alignment,
- });
- const atom = macho_file.getAtomPtr(atom_index);
- atom.inner_sym_index = inner_sym_index;
- atom.inner_nsyms_trailing = inner_nsyms_trailing;
- atom.file = object_id + 1;
- self.symtab[sym_index].n_sect = out_sect_id + 1;
-
- log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{
- sym_index,
- self.getSymbolName(sym_index),
- out_sect_id + 1,
- macho_file.sections.items(.header)[out_sect_id].segName(),
- macho_file.sections.items(.header)[out_sect_id].sectName(),
- object_id,
- });
-
- try self.atoms.append(gpa, atom_index);
- self.atom_by_index_table[sym_index] = atom_index;
+fn initPlatform(self: *Object) void {
+ var it = LoadCommandIterator{
+ .ncmds = self.header.?.ncmds,
+ .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ };
+ self.platform = while (it.next()) |cmd| {
+ switch (cmd.cmd()) {
+ .BUILD_VERSION,
+ .VERSION_MIN_MACOSX,
+ .VERSION_MIN_IPHONEOS,
+ .VERSION_MIN_TVOS,
+ .VERSION_MIN_WATCHOS,
+ => break MachO.Options.Platform.fromLoadCommand(cmd),
+ else => {},
+ }
+ } else null;
+}
- var it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (it.next()) |sym_loc| {
- const inner = macho_file.getSymbolPtr(sym_loc);
- inner.n_sect = out_sect_id + 1;
- self.atom_by_index_table[sym_loc.sym_index] = atom_index;
+/// Currently, we only check if a compile unit for this input object file exists
+/// and record that so that we can emit symbol stabs.
+/// TODO in the future, we want parse debug info and debug line sections so that
+/// we can provide nice error locations to the user.
+fn initDwarfInfo(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const gpa = macho_file.base.allocator;
+
+ var debug_info_index: ?usize = null;
+ var debug_abbrev_index: ?usize = null;
+ var debug_str_index: ?usize = null;
+
+ for (self.sections.items(.header), 0..) |sect, index| {
+ if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue;
+ if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index;
+ if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index;
+ if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index;
}
- const out_sect = macho_file.sections.items(.header)[out_sect_id];
- if (out_sect.isCode() and
- mem.eql(u8, "__TEXT", out_sect.segName()) and
- mem.eql(u8, "__text", out_sect.sectName()))
- {
- // TODO currently assuming a single section for executable machine code
- try self.exec_atoms.append(gpa, atom_index);
- }
+ if (debug_info_index == null or debug_abbrev_index == null) return;
- return atom_index;
+ var dwarf_info = DwarfInfo{
+ .debug_info = self.getSectionData(@intCast(debug_info_index.?)),
+ .debug_abbrev = self.getSectionData(@intCast(debug_abbrev_index.?)),
+ .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "",
+ };
+ dwarf_info.init(gpa) catch {
+ macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()});
+ return error.ParseFailed;
+ };
+ self.dwarf_info = dwarf_info;
}
-fn filterRelocs(
- relocs: []align(1) const macho.relocation_info,
- start_addr: u64,
- end_addr: u64,
-) Entry {
- const Predicate = struct {
- addr: u64,
+pub fn resolveSymbols(self: *Object, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
- pub fn predicate(self: @This(), rel: macho.relocation_info) bool {
- return rel.r_address >= self.addr;
- }
- };
- const LPredicate = struct {
- addr: u64,
+ for (self.symbols.items, 0..) |index, i| {
+ const nlist_idx = @as(Symbol.Index, @intCast(i));
+ const nlist = self.symtab.items(.nlist)[nlist_idx];
+ const atom_index = self.symtab.items(.atom)[nlist_idx];
- pub fn predicate(self: @This(), rel: macho.relocation_info) bool {
- return rel.r_address < self.addr;
+ if (!nlist.ext()) continue;
+ if (nlist.undf() and !nlist.tentative()) continue;
+ if (nlist.sect()) {
+ const atom = macho_file.getAtom(atom_index).?;
+ if (!atom.flags.alive) continue;
}
- };
- const start = MachO.bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr });
- const len = MachO.lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr });
+ const symbol = macho_file.getSymbol(index);
+ if (self.asFile().getSymbolRank(.{
+ .archive = !self.alive,
+ .weak = nlist.weakDef(),
+ .tentative = nlist.tentative(),
+ }) < symbol.getSymbolRank(macho_file)) {
+ const value = if (nlist.sect()) blk: {
+ const atom = macho_file.getAtom(atom_index).?;
+ break :blk nlist.n_value - atom.getInputAddress(macho_file);
+ } else nlist.n_value;
+ symbol.value = value;
+ symbol.atom = atom_index;
+ symbol.nlist_idx = nlist_idx;
+ symbol.file = self.index;
+ symbol.flags.weak = nlist.weakDef();
+ symbol.flags.abs = nlist.abs();
+ symbol.flags.tentative = nlist.tentative();
+ symbol.flags.weak_ref = false;
+ symbol.flags.dyn_ref = nlist.n_desc & macho.REFERENCED_DYNAMICALLY != 0;
+ symbol.flags.no_dead_strip = symbol.flags.no_dead_strip or nlist.noDeadStrip();
+ symbol.flags.interposable = macho_file.options.dylib and macho_file.options.namespace == .flat and !nlist.pext();
+
+ if (nlist.sect() and
+ self.sections.items(.header)[nlist.n_sect - 1].type() == macho.S_THREAD_LOCAL_VARIABLES)
+ {
+ symbol.flags.tlv = true;
+ }
+ }
- return .{ .start = @as(u32, @intCast(start)), .len = @as(u32, @intCast(len)) };
+ // Regardless of who the winner is, we still merge symbol visibility here.
+ if (nlist.pext() or (nlist.weakDef() and nlist.weakRef()) or self.hidden) {
+ if (symbol.visibility != .global) {
+ symbol.visibility = .hidden;
+ }
+ } else {
+ symbol.visibility = .global;
+ }
+ }
}
-/// Parse all relocs for the input section, and sort in descending order.
-/// Previously, I have wrongly assumed the compilers output relocations for each
-/// section in a sorted manner which is simply not true.
-fn parseRelocs(self: *Object, gpa: Allocator, sect_id: u8) !void {
- const section = self.getSourceSection(sect_id);
- const start = @as(u32, @intCast(self.relocations.items.len));
- if (self.getSourceRelocs(section)) |relocs| {
- try self.relocations.ensureUnusedCapacity(gpa, relocs.len);
- self.relocations.appendUnalignedSliceAssumeCapacity(relocs);
- mem.sort(macho.relocation_info, self.relocations.items[start..], {}, relocGreaterThan);
+pub fn resetGlobals(self: *Object, macho_file: *MachO) void {
+ for (self.symbols.items, 0..) |sym_index, nlist_idx| {
+ if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue;
+ const sym = macho_file.getSymbol(sym_index);
+ const name = sym.name;
+ sym.* = .{};
+ sym.name = name;
}
- self.section_relocs_lookup.items[sect_id] = start;
}
-fn cacheRelocs(self: *Object, macho_file: *MachO, atom_index: Atom.Index) !void {
- const atom = macho_file.getAtom(atom_index);
-
- const source_sect_id = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
- break :blk source_sym.n_sect - 1;
- } else blk: {
- // If there was no matching symbol present in the source symtab, this means
- // we are dealing with either an entire section, or part of it, but also
- // starting at the beginning.
- const nbase = @as(u32, @intCast(self.in_symtab.?.len));
- const sect_id = @as(u8, @intCast(atom.sym_index - nbase));
- break :blk sect_id;
- };
- const source_sect = self.getSourceSection(source_sect_id);
- assert(!source_sect.isZerofill());
- const relocs = self.getRelocs(source_sect_id);
-
- self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
- const offset = source_sym.n_value - source_sect.addr;
- break :blk filterRelocs(relocs, offset, offset + atom.size);
- } else filterRelocs(relocs, 0, atom.size);
-}
+pub fn markLive(self: *Object, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items, 0..) |index, nlist_idx| {
+ const nlist = self.symtab.items(.nlist)[nlist_idx];
+ if (!nlist.ext()) continue;
-fn relocGreaterThan(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool {
- _ = ctx;
- return lhs.r_address > rhs.r_address;
+ const sym = macho_file.getSymbol(index);
+ const file = sym.getFile(macho_file) orelse continue;
+ const should_keep = nlist.undf() or (nlist.tentative() and !sym.flags.tentative);
+ if (should_keep and file == .object and !file.object.alive) {
+ file.object.alive = true;
+ file.object.markLive(macho_file);
+ }
+ }
}
-fn parseEhFrameSection(self: *Object, macho_file: *MachO, object_id: u32) !void {
- const sect_id = self.eh_frame_sect_id orelse return;
- const sect = self.getSourceSection(sect_id);
+pub fn scanRelocs(self: Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
- log.debug("parsing __TEXT,__eh_frame section", .{});
+ for (self.atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ if (!atom.flags.alive) continue;
+ const sect = atom.getInputSection(macho_file);
+ if (sect.isZerofill()) continue;
+ try atom.scanRelocs(macho_file);
+ }
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
+ for (self.unwind_records.items) |rec_index| {
+ const rec = macho_file.getUnwindRecord(rec_index);
+ if (!rec.alive) continue;
+ if (rec.getFde(macho_file)) |fde| {
+ if (fde.getCie(macho_file).getPersonality(macho_file)) |sym| {
+ sym.flags.got = true;
+ }
+ } else if (rec.getPersonality(macho_file)) |sym| {
+ sym.flags.got = true;
+ }
+ }
+}
- if (macho_file.eh_frame_section_index == null) {
- macho_file.eh_frame_section_index = try macho_file.initSection("__TEXT", "__eh_frame", .{});
+pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+
+ for (self.symbols.items, 0..) |index, i| {
+ const sym = macho_file.getSymbol(index);
+ if (!sym.flags.tentative) continue;
+ const sym_file = sym.getFile(macho_file).?;
+ if (sym_file.getIndex() != self.index) continue;
+
+ const nlist_idx = @as(Symbol.Index, @intCast(i));
+ const nlist = &self.symtab.items(.nlist)[nlist_idx];
+ const nlist_atom = &self.symtab.items(.atom)[nlist_idx];
+
+ const atom_index = try macho_file.addAtom();
+ try self.atoms.append(gpa, atom_index);
+
+ const name = try std.fmt.allocPrintZ(gpa, "__DATA$__common${s}", .{sym.getName(macho_file)});
+ defer gpa.free(name);
+ const atom = macho_file.getAtom(atom_index).?;
+ atom.atom_index = atom_index;
+ atom.name = try macho_file.string_intern.insert(gpa, name);
+ atom.file = self.index;
+ atom.size = nlist.n_value;
+ atom.alignment = (nlist.n_desc >> 8) & 0x0f;
+
+ const n_sect = try self.addSection(gpa, "__DATA", "__common");
+ const sect = &self.sections.items(.header)[n_sect];
+ sect.flags = macho.S_ZEROFILL;
+ sect.size = atom.size;
+ sect.@"align" = atom.alignment;
+ atom.n_sect = n_sect;
+
+ sym.value = 0;
+ sym.atom = atom_index;
+ sym.flags.weak = false;
+ sym.flags.weak_ref = false;
+ sym.flags.tentative = false;
+ sym.visibility = .global;
+
+ nlist.n_value = 0;
+ nlist.n_type = macho.N_EXT | macho.N_SECT;
+ nlist.n_sect = 0;
+ nlist.n_desc = 0;
+ nlist_atom.* = atom_index;
}
+}
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- try self.parseRelocs(gpa, sect_id);
- const relocs = self.getRelocs(sect_id);
+fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname: []const u8) !u32 {
+ const n_sect = @as(u32, @intCast(try self.sections.addOne(allocator)));
+ self.sections.set(n_sect, .{
+ .header = .{
+ .sectname = MachO.makeStaticString(sectname),
+ .segname = MachO.makeStaticString(segname),
+ },
+ });
+ return n_sect;
+}
- var it = self.getEhFrameRecordsIterator();
- var record_count: u32 = 0;
- while (try it.next()) |_| {
- record_count += 1;
+pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (file.getIndex() != self.index) continue;
+ if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue;
+ if (sym.isSymbolStab(macho_file)) continue;
+ const name = sym.getName(macho_file);
+ // TODO in -r mode, we actually want to merge symbol names and emit only one
+ // work it out when emitting relocs
+ if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.options.relocatable) continue;
+ sym.flags.output_symtab = true;
+ if (sym.isLocal()) {
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file);
+ self.output_symtab_ctx.nlocals += 1;
+ } else if (sym.flags.@"export") {
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nexports }, macho_file);
+ self.output_symtab_ctx.nexports += 1;
+ } else {
+ assert(sym.flags.import);
+ try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nimports }, macho_file);
+ self.output_symtab_ctx.nimports += 1;
+ }
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(sym.getName(macho_file).len + 1));
}
- try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count);
- try self.eh_frame_records_lookup.ensureUnusedCapacity(gpa, record_count);
+ if (!macho_file.options.strip and self.hasDebugInfo()) self.calcStabsSize(macho_file);
+}
- it.reset();
+pub fn calcStabsSize(self: *Object, macho_file: *MachO) void {
+ if (self.dwarf_info) |dw| {
+ // TODO handle multiple CUs
+ const cu = dw.compile_units.items[0];
+ const comp_dir = cu.getCompileDir(dw) orelse return;
+ const tu_name = cu.getSourceFile(dw) orelse return;
- while (try it.next()) |record| {
- const offset = it.pos - record.getSize();
- const rel_pos: Entry = switch (cpu_arch) {
- .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()),
- .x86_64 => .{},
- else => unreachable,
- };
- self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{
- .dead = false,
- .reloc = rel_pos,
- });
-
- if (record.tag == .fde) {
- const reloc_target = blk: {
- switch (cpu_arch) {
- .aarch64 => {
- assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed
- // Find function symbol that this record describes
- const rel = for (relocs[rel_pos.start..][0..rel_pos.len]) |rel| {
- if (rel.r_address - @as(i32, @intCast(offset)) == 8 and
- @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type)) == .ARM64_RELOC_UNSIGNED)
- break rel;
- } else unreachable;
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = it.data[offset..],
- .base_offset = @as(i32, @intCast(offset)),
- });
- break :blk reloc_target;
- },
- .x86_64 => {
- const target_address = record.getTargetSymbolAddress(.{
- .base_addr = sect.addr,
- .base_offset = offset,
- });
- const target_sym_index = self.getSymbolByAddress(target_address, null);
- const reloc_target = if (self.getGlobal(target_sym_index)) |global_index|
- macho_file.globals.items[global_index]
- else
- SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 };
- break :blk reloc_target;
- },
- else => unreachable,
- }
- };
- if (reloc_target.getFile() != object_id) {
- log.debug("FDE at offset {x} marked DEAD", .{offset});
- self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true;
+ self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name
+
+ if (self.archive) |path| {
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1));
+ } else {
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1));
+ }
+
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (file.getIndex() != self.index) continue;
+ if (!sym.flags.output_symtab) continue;
+ if (macho_file.options.relocatable) {
+ const name = sym.getName(macho_file);
+ if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue;
+ }
+ const sect = macho_file.sections.items(.header)[sym.out_n_sect];
+ if (sect.isCode()) {
+ self.output_symtab_ctx.nstabs += 4; // N_BNSYM, N_FUN, N_FUN, N_ENSYM
+ } else if (sym.visibility == .global) {
+ self.output_symtab_ctx.nstabs += 1; // N_GSYM
} else {
- // You would think that we are done but turns out that the compilers may use
- // whichever symbol alias they want for a target symbol. This in particular
- // very problematic when using Zig's @export feature to re-export symbols under
- // additional names. For that reason, we need to ensure we record aliases here
- // too so that we can tie them with their matching unwind records and vice versa.
- const aliases = self.getSymbolAliases(reloc_target.sym_index);
- var i: u32 = 0;
- while (i < aliases.len) : (i += 1) {
- const actual_target = SymbolWithLoc{
- .sym_index = i + aliases.start,
- .file = reloc_target.file,
- };
- log.debug("FDE at offset {x} tracks {s}", .{
- offset,
- macho_file.getSymbolName(actual_target),
- });
- try self.eh_frame_records_lookup.putNoClobber(gpa, actual_target, offset);
- }
+ self.output_symtab_ctx.nstabs += 1; // N_STSYM
+ }
+ }
+ } else {
+ assert(self.hasSymbolStabs());
+
+ for (self.stab_files.items) |sf| {
+ self.output_symtab_ctx.nstabs += 4; // N_SO, N_SO, N_OSO, N_SO
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getCompDir(self).len + 1)); // comp_dir
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getTuName(self).len + 1)); // tu_name
+ self.output_symtab_ctx.strsize += @as(u32, @intCast(sf.getOsoPath(self).len + 1)); // path
+
+ for (sf.stabs.items) |stab| {
+ const sym = stab.getSymbol(macho_file) orelse continue;
+ const file = sym.getFile(macho_file).?;
+ if (file.getIndex() != self.index) continue;
+ if (!sym.flags.output_symtab) continue;
+ const nstabs: u32 = switch (stab.tag) {
+ .func => 4, // N_BNSYM, N_FUN, N_FUN, N_ENSYM
+ .global => 1, // N_GSYM
+ .static => 1, // N_STSYM
+ };
+ self.output_symtab_ctx.nstabs += nstabs;
}
}
}
}
-fn parseUnwindInfo(self: *Object, macho_file: *MachO, object_id: u32) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const sect_id = self.unwind_info_sect_id orelse {
- // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`,
- // we will try fully synthesising unwind info records to somewhat match Apple ld's
- // approach. However, we will only synthesise DWARF records and nothing more. For this reason,
- // we still create the output `__TEXT,__unwind_info` section.
- if (self.hasEhFrameRecords()) {
- if (macho_file.unwind_info_section_index == null) {
- macho_file.unwind_info_section_index = try macho_file.initSection(
- "__TEXT",
- "__unwind_info",
- .{},
- );
+pub fn writeSymtab(self: Object, macho_file: *MachO) void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (file.getIndex() != self.index) continue;
+ const idx = sym.getOutputSymtabIndex(macho_file) orelse continue;
+ const n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(sym.getName(macho_file));
+ macho_file.strtab.appendAssumeCapacity(0);
+ const out_sym = &macho_file.symtab.items[idx];
+ out_sym.n_strx = n_strx;
+ sym.setOutputSym(macho_file, out_sym);
+ }
+
+ if (!macho_file.options.strip and self.hasDebugInfo()) self.writeStabs(macho_file);
+}
+
+pub fn writeStabs(self: *const Object, macho_file: *MachO) void {
+ const writeFuncStab = struct {
+ inline fn writeFuncStab(
+ n_strx: u32,
+ n_sect: u8,
+ n_value: u64,
+ size: u64,
+ index: u32,
+ ctx: *MachO,
+ ) void {
+ ctx.symtab.items[index] = .{
+ .n_strx = 0,
+ .n_type = macho.N_BNSYM,
+ .n_sect = n_sect,
+ .n_desc = 0,
+ .n_value = n_value,
+ };
+ ctx.symtab.items[index + 1] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_FUN,
+ .n_sect = n_sect,
+ .n_desc = 0,
+ .n_value = n_value,
+ };
+ ctx.symtab.items[index + 2] = .{
+ .n_strx = 0,
+ .n_type = macho.N_FUN,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = size,
+ };
+ ctx.symtab.items[index + 3] = .{
+ .n_strx = 0,
+ .n_type = macho.N_ENSYM,
+ .n_sect = n_sect,
+ .n_desc = 0,
+ .n_value = size,
+ };
+ }
+ }.writeFuncStab;
+
+ var index = self.output_symtab_ctx.istab;
+
+ if (self.dwarf_info) |dw| {
+ // TODO handle multiple CUs
+ const cu = dw.compile_units.items[0];
+ const comp_dir = cu.getCompileDir(dw) orelse return;
+ const tu_name = cu.getSourceFile(dw) orelse return;
+
+ // Open scope
+ // N_SO comp_dir
+ var n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(comp_dir);
+ macho_file.strtab.appendAssumeCapacity(0);
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ // N_SO tu_name
+ n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(tu_name);
+ macho_file.strtab.appendAssumeCapacity(0);
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ // N_OSO path
+ n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ if (self.archive) |path| {
+ macho_file.strtab.appendSliceAssumeCapacity(path);
+ macho_file.strtab.appendAssumeCapacity('(');
+ macho_file.strtab.appendSliceAssumeCapacity(self.path);
+ macho_file.strtab.appendAssumeCapacity(')');
+ macho_file.strtab.appendAssumeCapacity(0);
+ } else {
+ macho_file.strtab.appendSliceAssumeCapacity(self.path);
+ macho_file.strtab.appendAssumeCapacity(0);
+ }
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_OSO,
+ .n_sect = 0,
+ .n_desc = 1,
+ .n_value = self.mtime,
+ };
+ index += 1;
+
+ for (self.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (file.getIndex() != self.index) continue;
+ if (!sym.flags.output_symtab) continue;
+ if (macho_file.options.relocatable) {
+ const name = sym.getName(macho_file);
+ if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue;
+ }
+ const sect = macho_file.sections.items(.header)[sym.out_n_sect];
+ const sym_n_strx = n_strx: {
+ const symtab_index = sym.getOutputSymtabIndex(macho_file).?;
+ const osym = macho_file.symtab.items[symtab_index];
+ break :n_strx osym.n_strx;
+ };
+ const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0;
+ const sym_n_value = sym.getAddress(.{}, macho_file);
+ const sym_size = sym.getSize(macho_file);
+ if (sect.isCode()) {
+ writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file);
+ index += 4;
+ } else if (sym.visibility == .global) {
+ macho_file.symtab.items[index] = .{
+ .n_strx = sym_n_strx,
+ .n_type = macho.N_GSYM,
+ .n_sect = sym_n_sect,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ } else {
+ macho_file.symtab.items[index] = .{
+ .n_strx = sym_n_strx,
+ .n_type = macho.N_STSYM,
+ .n_sect = sym_n_sect,
+ .n_desc = 0,
+ .n_value = sym_n_value,
+ };
+ index += 1;
}
}
- return;
- };
- log.debug("parsing unwind info in {s}", .{self.name});
+ // Close scope
+ // N_SO
+ macho_file.symtab.items[index] = .{
+ .n_strx = 0,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ } else {
+ assert(self.hasSymbolStabs());
+
+ for (self.stab_files.items) |sf| {
+ // Open scope
+ // N_SO comp_dir
+ var n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(sf.getCompDir(self));
+ macho_file.strtab.appendAssumeCapacity(0);
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ // N_SO tu_name
+ n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(sf.getTuName(self));
+ macho_file.strtab.appendAssumeCapacity(0);
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ // N_OSO path
+ n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
+ macho_file.strtab.appendSliceAssumeCapacity(sf.getOsoPath(self));
+ macho_file.strtab.appendAssumeCapacity(0);
+ macho_file.symtab.items[index] = .{
+ .n_strx = n_strx,
+ .n_type = macho.N_OSO,
+ .n_sect = 0,
+ .n_desc = 1,
+ .n_value = sf.getOsoModTime(self),
+ };
+ index += 1;
+
+ for (sf.stabs.items) |stab| {
+ const sym = stab.getSymbol(macho_file) orelse continue;
+ const file = sym.getFile(macho_file).?;
+ if (file.getIndex() != self.index) continue;
+ if (!sym.flags.output_symtab) continue;
+ const sym_n_strx = n_strx: {
+ const symtab_index = sym.getOutputSymtabIndex(macho_file).?;
+ const osym = macho_file.symtab.items[symtab_index];
+ break :n_strx osym.n_strx;
+ };
+ const sym_n_sect: u8 = if (!sym.flags.abs) @intCast(sym.out_n_sect + 1) else 0;
+ const sym_n_value = sym.getAddress(.{}, macho_file);
+ const sym_size = sym.getSize(macho_file);
+ switch (stab.tag) {
+ .func => {
+ writeFuncStab(sym_n_strx, sym_n_sect, sym_n_value, sym_size, index, macho_file);
+ index += 4;
+ },
+ .global => {
+ macho_file.symtab.items[index] = .{
+ .n_strx = sym_n_strx,
+ .n_type = macho.N_GSYM,
+ .n_sect = sym_n_sect,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ },
+ .static => {
+ macho_file.symtab.items[index] = .{
+ .n_strx = sym_n_strx,
+ .n_type = macho.N_STSYM,
+ .n_sect = sym_n_sect,
+ .n_desc = 0,
+ .n_value = sym_n_value,
+ };
+ index += 1;
+ },
+ }
+ }
- if (macho_file.unwind_info_section_index == null) {
- macho_file.unwind_info_section_index = try macho_file.initSection("__TEXT", "__unwind_info", .{});
+ // Close scope
+ // N_SO
+ macho_file.symtab.items[index] = .{
+ .n_strx = 0,
+ .n_type = macho.N_SO,
+ .n_sect = 0,
+ .n_desc = 0,
+ .n_value = 0,
+ };
+ index += 1;
+ }
}
+}
- const unwind_records = self.getUnwindRecords();
-
- try self.unwind_records_lookup.ensureUnusedCapacity(gpa, @as(u32, @intCast(unwind_records.len)));
+fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand {
+ var it = LoadCommandIterator{
+ .ncmds = self.header.?.ncmds,
+ .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+ };
+ while (it.next()) |cmd| {
+ if (cmd.cmd() == lc) return cmd;
+ } else return null;
+}
- const needs_eh_frame = for (unwind_records) |record| {
- if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true;
- } else false;
+pub fn getSectionData(self: *const Object, index: u32) []const u8 {
+ const slice = self.sections.slice();
+ assert(index < slice.items(.header).len);
+ const sect = slice.items(.header)[index];
+ return self.data[sect.offset..][0..sect.size];
+}
- if (needs_eh_frame and !self.hasEhFrameRecords()) return error.MissingEhFrameSection;
+fn getString(self: Object, off: u32) [:0]const u8 {
+ assert(off < self.strtab.len);
+ return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0);
+}
- try self.parseRelocs(gpa, sect_id);
- const relocs = self.getRelocs(sect_id);
+/// TODO handle multiple CUs
+pub fn hasDebugInfo(self: Object) bool {
+ if (self.dwarf_info) |dw| {
+ return dw.compile_units.items.len > 0;
+ }
+ return self.hasSymbolStabs();
+}
- for (unwind_records, 0..) |record, record_id| {
- const offset = record_id * @sizeOf(macho.compact_unwind_entry);
- const rel_pos = filterRelocs(
- relocs,
- offset,
- offset + @sizeOf(macho.compact_unwind_entry),
- );
- assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed
- self.unwind_relocs_lookup[record_id] = .{
- .dead = false,
- .reloc = rel_pos,
- };
+fn hasSymbolStabs(self: Object) bool {
+ return self.stab_files.items.len > 0;
+}
- // Find function symbol that this record describes
- const rel = relocs[rel_pos.start..][rel_pos.len - 1];
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = object_id,
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(offset)),
- });
- if (reloc_target.getFile() != object_id) {
- log.debug("unwind record {d} marked DEAD", .{record_id});
- self.unwind_relocs_lookup[record_id].dead = true;
- } else {
- // You would think that we are done but turns out that the compilers may use
- // whichever symbol alias they want for a target symbol. This in particular
- // very problematic when using Zig's @export feature to re-export symbols under
- // additional names. For that reason, we need to ensure we record aliases here
- // too so that we can tie them with their matching unwind records and vice versa.
- const aliases = self.getSymbolAliases(reloc_target.sym_index);
- var i: u32 = 0;
- while (i < aliases.len) : (i += 1) {
- const actual_target = SymbolWithLoc{
- .sym_index = i + aliases.start,
- .file = reloc_target.file,
- };
- log.debug("unwind record {d} tracks {s}", .{
- record_id,
- macho_file.getSymbolName(actual_target),
- });
- try self.unwind_records_lookup.putNoClobber(gpa, actual_target, @intCast(record_id));
- }
- }
+pub fn hasObjc(self: Object) bool {
+ for (self.symtab.items(.nlist)) |nlist| {
+ const name = self.getString(nlist.n_strx);
+ if (mem.startsWith(u8, name, "_OBJC_CLASS_$_")) return true;
+ }
+ for (self.sections.items(.header)) |sect| {
+ if (mem.eql(u8, sect.segName(), "__DATA") and mem.eql(u8, sect.sectName(), "__objc_catlist")) return true;
+ if (mem.eql(u8, sect.segName(), "__TEXT") and mem.eql(u8, sect.sectName(), "__swift")) return true;
}
+ return false;
}
-pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 {
- const symtab = self.in_symtab.?;
- if (index >= symtab.len) return null;
- const mapped_index = self.source_symtab_lookup[index];
- return symtab[mapped_index];
+pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry {
+ const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{};
+ const cmd = lc.cast(macho.linkedit_data_command).?;
+ const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
+ const dice = @as(
+ [*]align(1) const macho.data_in_code_entry,
+ @ptrCast(self.data.ptr + cmd.dataoff),
+ )[0..ndice];
+ return dice;
}
-pub fn getSourceSection(self: Object, index: u8) macho.section_64 {
- const sections = self.getSourceSections();
- assert(index < sections.len);
- return sections[index];
+pub inline fn hasSubsections(self: Object) bool {
+ return self.header.?.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0;
}
-pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 {
- const index = self.getSourceSectionIndexByName(segname, sectname) orelse return null;
- const sections = self.getSourceSections();
- return sections[index];
+pub fn asFile(self: *Object) File {
+ return .{ .object = self };
}
-pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 {
- const sections = self.getSourceSections();
- for (sections, 0..) |sect, i| {
- if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName()))
- return @as(u8, @intCast(i));
- } else return null;
+pub fn format(
+ self: *Object,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = self;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format objects directly");
}
-pub fn getSourceSections(self: Object) []align(1) const macho.section_64 {
- var it = LoadCommandIterator{
- .ncmds = self.header.ncmds,
- .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
- };
- while (it.next()) |cmd| switch (cmd.cmd()) {
- .SEGMENT_64 => {
- return cmd.getSections();
- },
- else => {},
- } else unreachable;
-}
+const FormatContext = struct {
+ object: *Object,
+ macho_file: *MachO,
+};
-pub fn parseDataInCode(self: *Object, gpa: Allocator) !void {
- var it = LoadCommandIterator{
- .ncmds = self.header.ncmds,
- .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
- };
- const cmd = while (it.next()) |cmd| {
- switch (cmd.cmd()) {
- .DATA_IN_CODE => break cmd.cast(macho.linkedit_data_command).?,
- else => {},
- }
- } else return;
- const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
- const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(self.contents.ptr + cmd.dataoff))[0..ndice];
- try self.data_in_code.ensureTotalCapacityPrecise(gpa, dice.len);
- self.data_in_code.appendUnalignedSliceAssumeCapacity(dice);
- mem.sort(macho.data_in_code_entry, self.data_in_code.items, {}, diceLessThan);
+pub fn fmtAtoms(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatAtoms) {
+ return .{ .data = .{
+ .object = self,
+ .macho_file = macho_file,
+ } };
}
-fn diceLessThan(ctx: void, lhs: macho.data_in_code_entry, rhs: macho.data_in_code_entry) bool {
- _ = ctx;
- return lhs.offset < rhs.offset;
+fn formatAtoms(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const object = ctx.object;
+ try writer.writeAll(" atoms\n");
+ for (object.atoms.items) |atom_index| {
+ const atom = ctx.macho_file.getAtom(atom_index).?;
+ try writer.print(" {}\n", .{atom.fmt(ctx.macho_file)});
+ }
}
-fn getDysymtab(self: Object) ?macho.dysymtab_command {
- var it = LoadCommandIterator{
- .ncmds = self.header.ncmds,
- .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
- };
- while (it.next()) |cmd| {
- switch (cmd.cmd()) {
- .DYSYMTAB => return cmd.cast(macho.dysymtab_command).?,
- else => {},
- }
- } else return null;
+pub fn fmtCies(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatCies) {
+ return .{ .data = .{
+ .object = self,
+ .macho_file = macho_file,
+ } };
}
-pub fn parseDwarfInfo(self: Object) DwarfInfo {
- var di = DwarfInfo{
- .debug_info = &[0]u8{},
- .debug_abbrev = &[0]u8{},
- .debug_str = &[0]u8{},
- };
- for (self.getSourceSections()) |sect| {
- if (!sect.isDebug()) continue;
- const sectname = sect.sectName();
- if (mem.eql(u8, sectname, "__debug_info")) {
- di.debug_info = self.getSectionContents(sect);
- } else if (mem.eql(u8, sectname, "__debug_abbrev")) {
- di.debug_abbrev = self.getSectionContents(sect);
- } else if (mem.eql(u8, sectname, "__debug_str")) {
- di.debug_str = self.getSectionContents(sect);
- }
+fn formatCies(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const object = ctx.object;
+ try writer.writeAll(" cies\n");
+ for (object.cies.items, 0..) |cie, i| {
+ try writer.print(" cie({d}) : {}\n", .{ i, cie.fmt(ctx.macho_file) });
}
- return di;
}
-/// Returns Platform composed from the first encountered build version type load command:
-/// either LC_BUILD_VERSION or LC_VERSION_MIN_*.
-pub fn getPlatform(self: Object) ?Platform {
- var it = LoadCommandIterator{
- .ncmds = self.header.ncmds,
- .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds],
- };
- while (it.next()) |cmd| {
- switch (cmd.cmd()) {
- .BUILD_VERSION,
- .VERSION_MIN_MACOSX,
- .VERSION_MIN_IPHONEOS,
- .VERSION_MIN_TVOS,
- .VERSION_MIN_WATCHOS,
- => return Platform.fromLoadCommand(cmd),
- else => {},
- }
- } else return null;
+pub fn fmtFdes(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatFdes) {
+ return .{ .data = .{
+ .object = self,
+ .macho_file = macho_file,
+ } };
}
-pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 {
- const size = @as(usize, @intCast(sect.size));
- return self.contents[sect.offset..][0..size];
+fn formatFdes(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const object = ctx.object;
+ try writer.writeAll(" fdes\n");
+ for (object.fdes.items, 0..) |fde, i| {
+ try writer.print(" fde({d}) : {}\n", .{ i, fde.fmt(ctx.macho_file) });
+ }
}
-pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 {
- const start = @as(u32, @intCast(self.in_symtab.?.len));
- return start + sect_id;
+pub fn fmtUnwindRecords(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatUnwindRecords) {
+ return .{ .data = .{
+ .object = self,
+ .macho_file = macho_file,
+ } };
}
-pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 {
- return self.symtab[self.getSectionAliasSymbolIndex(sect_id)];
+fn formatUnwindRecords(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const object = ctx.object;
+ const macho_file = ctx.macho_file;
+ try writer.writeAll(" unwind records\n");
+ for (object.unwind_records.items) |rec| {
+ try writer.print(" rec({d}) : {}\n", .{ rec, macho_file.getUnwindRecord(rec).fmt(macho_file) });
+ }
}
-pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 {
- return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)];
+pub fn fmtSymtab(self: *Object, macho_file: *MachO) std.fmt.Formatter(formatSymtab) {
+ return .{ .data = .{
+ .object = self,
+ .macho_file = macho_file,
+ } };
}
-fn getSourceRelocs(self: Object, sect: macho.section_64) ?[]align(1) const macho.relocation_info {
- if (sect.nreloc == 0) return null;
- return @as([*]align(1) const macho.relocation_info, @ptrCast(self.contents.ptr + sect.reloff))[0..sect.nreloc];
+fn formatSymtab(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const object = ctx.object;
+ try writer.writeAll(" symbols\n");
+ for (object.symbols.items) |index| {
+ const sym = ctx.macho_file.getSymbol(index);
+ try writer.print(" {}\n", .{sym.fmt(ctx.macho_file)});
+ }
}
-pub fn getRelocs(self: Object, sect_id: u8) []const macho.relocation_info {
- const sect = self.getSourceSection(sect_id);
- const start = self.section_relocs_lookup.items[sect_id];
- const len = sect.nreloc;
- return self.relocations.items[start..][0..len];
+pub fn fmtPath(self: Object) std.fmt.Formatter(formatPath) {
+ return .{ .data = self };
}
-pub fn getSymbolName(self: Object, index: u32) []const u8 {
- const strtab = self.in_strtab.?;
- const sym = self.symtab[index];
+fn formatPath(
+ object: Object,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ if (object.archive) |path| {
+ try writer.writeAll(path);
+ try writer.writeByte('(');
+ try writer.writeAll(object.path);
+ try writer.writeByte(')');
+ } else try writer.writeAll(object.path);
+}
+
+const Section = struct {
+ header: macho.section_64,
+ subsections: std.ArrayListUnmanaged(Subsection) = .{},
+ relocs: std.ArrayListUnmanaged(Relocation) = .{},
+};
+
+const Subsection = struct {
+ atom: Atom.Index,
+ off: u64,
+};
+
+const Nlist = struct {
+ nlist: macho.nlist_64,
+ size: u64,
+ atom: Atom.Index,
+};
+
+const StabFile = struct {
+ comp_dir: u32,
+ stabs: std.ArrayListUnmanaged(Stab) = .{},
- if (self.getSourceSymbol(index) == null) {
- assert(sym.n_strx == 0);
- return "";
+ fn getCompDir(sf: StabFile, object: *const Object) [:0]const u8 {
+ const nlist = object.symtab.items(.nlist)[sf.comp_dir];
+ return object.getString(nlist.n_strx);
}
- const start = sym.n_strx;
- const len = self.strtab_lookup[index];
+ fn getTuName(sf: StabFile, object: *const Object) [:0]const u8 {
+ const nlist = object.symtab.items(.nlist)[sf.comp_dir + 1];
+ return object.getString(nlist.n_strx);
+ }
- return strtab[start..][0 .. len - 1 :0];
-}
+ fn getOsoPath(sf: StabFile, object: *const Object) [:0]const u8 {
+ const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2];
+ return object.getString(nlist.n_strx);
+ }
-fn getSymbolAliases(self: Object, index: u32) Entry {
- const addr = self.source_address_lookup[index];
- var start = index;
- while (start > 0 and
- self.source_address_lookup[start - 1] == addr) : (start -= 1)
- {}
- const end: u32 = for (self.source_address_lookup[start..], start..) |saddr, i| {
- if (saddr != addr) break @as(u32, @intCast(i));
- } else @as(u32, @intCast(self.source_address_lookup.len));
- return .{ .start = start, .len = end - start };
-}
+ fn getOsoModTime(sf: StabFile, object: *const Object) u64 {
+ const nlist = object.symtab.items(.nlist)[sf.comp_dir + 2];
+ return nlist.n_value;
+ }
-pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 {
- // Find containing atom
- const Predicate = struct {
- addr: i64,
+ const Stab = struct {
+ tag: enum { func, global, static } = .func,
+ symbol: ?Symbol.Index = null,
- pub fn predicate(pred: @This(), other: i64) bool {
- return if (other == -1) true else other > pred.addr;
+ fn getSymbol(stab: Stab, macho_file: *MachO) ?*Symbol {
+ return if (stab.symbol) |s| macho_file.getSymbol(s) else null;
}
};
+};
- if (sect_hint) |sect_id| {
- if (self.source_section_index_lookup[sect_id].len > 0) {
- const lookup = self.source_section_index_lookup[sect_id];
- const target_sym_index = MachO.lsearch(
- i64,
- self.source_address_lookup[lookup.start..][0..lookup.len],
- Predicate{ .addr = @as(i64, @intCast(addr)) },
- );
- if (target_sym_index > 0) {
- // Hone in on the most senior alias of the target symbol.
- // See SymbolAtIndex.lessThan for more context.
- const aliases = self.getSymbolAliases(@intCast(lookup.start + target_sym_index - 1));
- return aliases.start;
- }
- }
- return self.getSectionAliasSymbolIndex(sect_id);
- }
+const x86_64 = struct {
+ fn parseRelocs(
+ self: *const Object,
+ n_sect: u8,
+ sect: macho.section_64,
+ out: *std.ArrayListUnmanaged(Relocation),
+ macho_file: *MachO,
+ ) !void {
+ const gpa = macho_file.base.allocator;
+
+ const relocs = @as(
+ [*]align(1) const macho.relocation_info,
+ @ptrCast(self.data.ptr + sect.reloff),
+ )[0..sect.nreloc];
+ const code = self.getSectionData(@intCast(n_sect));
+
+ try out.ensureTotalCapacityPrecise(gpa, relocs.len);
+
+ var i: usize = 0;
+ while (i < relocs.len) : (i += 1) {
+ const rel = relocs[i];
+ const rel_type: macho.reloc_type_x86_64 = @enumFromInt(rel.r_type);
+ const rel_offset = @as(u32, @intCast(rel.r_address));
+
+ var addend = switch (rel.r_length) {
+ 0 => code[rel_offset],
+ 1 => mem.readInt(i16, code[rel_offset..][0..2], .little),
+ 2 => mem.readInt(i32, code[rel_offset..][0..4], .little),
+ 3 => mem.readInt(i64, code[rel_offset..][0..8], .little),
+ };
+ addend += switch (@as(macho.reloc_type_x86_64, @enumFromInt(rel.r_type))) {
+ .X86_64_RELOC_SIGNED_1 => 1,
+ .X86_64_RELOC_SIGNED_2 => 2,
+ .X86_64_RELOC_SIGNED_4 => 4,
+ else => 0,
+ };
- const target_sym_index = MachO.lsearch(i64, self.source_address_lookup, Predicate{
- .addr = @as(i64, @intCast(addr)),
- });
- assert(target_sym_index > 0);
- return @as(u32, @intCast(target_sym_index - 1));
-}
+ const target = if (rel.r_extern == 0) blk: {
+ const nsect = rel.r_symbolnum - 1;
+ const taddr: i64 = if (rel.r_pcrel == 1)
+ @as(i64, @intCast(sect.addr)) + rel.r_address + addend + 4
+ else
+ addend;
+ const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address,
+ });
+ return error.ParseFailed;
+ };
+ addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file)));
+ break :blk target;
+ } else self.symbols.items[rel.r_symbolnum];
+
+ const has_subtractor = if (i > 0 and
+ @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR)
+ blk: {
+ if (rel_type != .X86_64_RELOC_UNSIGNED) {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
+ });
+ return error.ParseFailed;
+ }
+ break :blk true;
+ } else false;
+
+ const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| {
+ switch (err) {
+ error.Pcrel => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: PC-relative {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ error.NonPcrel => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ error.InvalidLength => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
+ ),
+ error.NonExtern => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ }
+ return error.ParseFailed;
+ };
-pub fn getGlobal(self: Object, sym_index: u32) ?u32 {
- if (self.globals_lookup[sym_index] == -1) return null;
- return @as(u32, @intCast(self.globals_lookup[sym_index]));
-}
+ out.appendAssumeCapacity(.{
+ .tag = if (rel.r_extern == 1) .@"extern" else .local,
+ .offset = @as(u32, @intCast(rel.r_address)),
+ .target = target,
+ .addend = addend,
+ .type = @"type",
+ .meta = .{
+ .pcrel = rel.r_pcrel == 1,
+ .has_subtractor = has_subtractor,
+ .length = rel.r_length,
+ .symbolnum = rel.r_symbolnum,
+ },
+ });
+ }
+ }
-pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?Atom.Index {
- return self.atom_by_index_table[sym_index];
-}
+ fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_x86_64) !Relocation.Type {
+ switch (rel_type) {
+ .X86_64_RELOC_UNSIGNED => {
+ if (rel.r_pcrel == 1) return error.Pcrel;
+ if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength;
+ return .unsigned;
+ },
+
+ .X86_64_RELOC_SUBTRACTOR => {
+ if (rel.r_pcrel == 1) return error.Pcrel;
+ return .subtractor;
+ },
+
+ .X86_64_RELOC_BRANCH,
+ .X86_64_RELOC_GOT_LOAD,
+ .X86_64_RELOC_GOT,
+ .X86_64_RELOC_TLV,
+ => {
+ if (rel.r_pcrel == 0) return error.NonPcrel;
+ if (rel.r_length != 2) return error.InvalidLength;
+ if (rel.r_extern == 0) return error.NonExtern;
+ return switch (rel_type) {
+ .X86_64_RELOC_BRANCH => .branch,
+ .X86_64_RELOC_GOT_LOAD => .got_load,
+ .X86_64_RELOC_GOT => .got,
+ .X86_64_RELOC_TLV => .tlv,
+ else => unreachable,
+ };
+ },
+
+ .X86_64_RELOC_SIGNED,
+ .X86_64_RELOC_SIGNED_1,
+ .X86_64_RELOC_SIGNED_2,
+ .X86_64_RELOC_SIGNED_4,
+ => {
+ if (rel.r_pcrel == 0) return error.NonPcrel;
+ if (rel.r_length != 2) return error.InvalidLength;
+ return switch (rel_type) {
+ .X86_64_RELOC_SIGNED => .signed,
+ .X86_64_RELOC_SIGNED_1 => .signed1,
+ .X86_64_RELOC_SIGNED_2 => .signed2,
+ .X86_64_RELOC_SIGNED_4 => .signed4,
+ else => unreachable,
+ };
+ },
+ }
+ }
+};
-pub fn hasUnwindRecords(self: Object) bool {
- return self.unwind_info_sect_id != null;
-}
+const aarch64 = struct {
+ fn parseRelocs(
+ self: *const Object,
+ n_sect: u8,
+ sect: macho.section_64,
+ out: *std.ArrayListUnmanaged(Relocation),
+ macho_file: *MachO,
+ ) !void {
+ const gpa = macho_file.base.allocator;
+
+ const relocs = @as(
+ [*]align(1) const macho.relocation_info,
+ @ptrCast(self.data.ptr + sect.reloff),
+ )[0..sect.nreloc];
+ const code = self.getSectionData(@intCast(n_sect));
+
+ try out.ensureTotalCapacityPrecise(gpa, relocs.len);
+
+ var i: usize = 0;
+ while (i < relocs.len) : (i += 1) {
+ var rel = relocs[i];
+ const rel_offset = @as(u32, @intCast(rel.r_address));
+
+ var addend: i64 = 0;
+
+ switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
+ .ARM64_RELOC_ADDEND => {
+ addend = rel.r_symbolnum;
+ i += 1;
+ if (i >= relocs.len) {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset,
+ });
+ return error.ParseFailed;
+ }
+ rel = relocs[i];
+ switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
+ .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {},
+ else => |x| {
+ macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(x) },
+ );
+ return error.ParseFailed;
+ },
+ }
+ },
+ .ARM64_RELOC_UNSIGNED => {
+ addend = switch (rel.r_length) {
+ 0 => code[rel_offset],
+ 1 => mem.readInt(i16, code[rel_offset..][0..2], .little),
+ 2 => mem.readInt(i32, code[rel_offset..][0..4], .little),
+ 3 => mem.readInt(i64, code[rel_offset..][0..8], .little),
+ };
+ },
+ else => {},
+ }
-pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry {
- const sect_id = self.unwind_info_sect_id orelse return &[0]macho.compact_unwind_entry{};
- const sect = self.getSourceSection(sect_id);
- const data = self.getSectionContents(sect);
- const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
- return @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data))[0..num_entries];
-}
+ const rel_type: macho.reloc_type_arm64 = @enumFromInt(rel.r_type);
-pub fn hasEhFrameRecords(self: Object) bool {
- return self.eh_frame_sect_id != null;
-}
+ const target = if (rel.r_extern == 0) blk: {
+ const nsect = rel.r_symbolnum - 1;
+ const taddr: i64 = if (rel.r_pcrel == 1)
+ @as(i64, @intCast(sect.addr)) + rel.r_address + addend
+ else
+ addend;
+ const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address,
+ });
+ return error.ParseFailed;
+ };
+ addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file)));
+ break :blk target;
+ } else self.symbols.items[rel.r_symbolnum];
+
+ const has_subtractor = if (i > 0 and
+ @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR)
+ blk: {
+ if (rel_type != .ARM64_RELOC_UNSIGNED) {
+ macho_file.base.fatal("{}: {s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{
+ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
+ });
+ return error.ParseFailed;
+ }
+ break :blk true;
+ } else false;
+
+ const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| {
+ switch (err) {
+ error.Pcrel => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: PC-relative {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ error.NonPcrel => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ error.InvalidLength => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
+ ),
+ error.NonExtern => macho_file.base.fatal(
+ "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation",
+ .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+ ),
+ }
+ return error.ParseFailed;
+ };
-pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator {
- const sect_id = self.eh_frame_sect_id orelse return .{ .data = &[0]u8{} };
- const sect = self.getSourceSection(sect_id);
- const data = self.getSectionContents(sect);
- return .{ .data = data };
-}
+ out.appendAssumeCapacity(.{
+ .tag = if (rel.r_extern == 1) .@"extern" else .local,
+ .offset = @as(u32, @intCast(rel.r_address)),
+ .target = target,
+ .addend = addend,
+ .type = @"type",
+ .meta = .{
+ .pcrel = rel.r_pcrel == 1,
+ .has_subtractor = has_subtractor,
+ .length = rel.r_length,
+ .symbolnum = rel.r_symbolnum,
+ },
+ });
+ }
+ }
-pub fn hasDataInCode(self: Object) bool {
- return self.data_in_code.items.len > 0;
-}
+ fn validateRelocType(rel: macho.relocation_info, rel_type: macho.reloc_type_arm64) !Relocation.Type {
+ switch (rel_type) {
+ .ARM64_RELOC_UNSIGNED => {
+ if (rel.r_pcrel == 1) return error.Pcrel;
+ if (rel.r_length != 2 and rel.r_length != 3) return error.InvalidLength;
+ return .unsigned;
+ },
+
+ .ARM64_RELOC_SUBTRACTOR => {
+ if (rel.r_pcrel == 1) return error.Pcrel;
+ return .subtractor;
+ },
+
+ .ARM64_RELOC_BRANCH26,
+ .ARM64_RELOC_PAGE21,
+ .ARM64_RELOC_GOT_LOAD_PAGE21,
+ .ARM64_RELOC_TLVP_LOAD_PAGE21,
+ .ARM64_RELOC_POINTER_TO_GOT,
+ => {
+ if (rel.r_pcrel == 0) return error.NonPcrel;
+ if (rel.r_length != 2) return error.InvalidLength;
+ if (rel.r_extern == 0) return error.NonExtern;
+ return switch (rel_type) {
+ .ARM64_RELOC_BRANCH26 => .branch,
+ .ARM64_RELOC_PAGE21 => .page,
+ .ARM64_RELOC_GOT_LOAD_PAGE21 => .got_load_page,
+ .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp_page,
+ .ARM64_RELOC_POINTER_TO_GOT => .got,
+ else => unreachable,
+ };
+ },
+
+ .ARM64_RELOC_PAGEOFF12,
+ .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
+ .ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
+ => {
+ if (rel.r_pcrel == 1) return error.Pcrel;
+ if (rel.r_length != 2) return error.InvalidLength;
+ if (rel.r_extern == 0) return error.NonExtern;
+ return switch (rel_type) {
+ .ARM64_RELOC_PAGEOFF12 => .pageoff,
+ .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got_load_pageoff,
+ .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp_pageoff,
+ else => unreachable,
+ };
+ },
-const Object = @This();
+ .ARM64_RELOC_ADDEND => unreachable, // We make it part of the addend field
+ }
+ }
+};
-const std = @import("std");
-const build_options = @import("build_options");
const assert = std.debug.assert;
-const dwarf = std.dwarf;
const eh_frame = @import("eh_frame.zig");
-const fs = std.fs;
-const io = std.io;
const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-const sort = std.sort;
-const trace = @import("../../tracy.zig").trace;
+const trace = @import("../tracy.zig").trace;
+const std = @import("std");
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
+const Cie = eh_frame.Cie;
const DwarfInfo = @import("DwarfInfo.zig");
+const Fde = eh_frame.Fde;
+const File = @import("file.zig").File;
const LoadCommandIterator = macho.LoadCommandIterator;
const MachO = @import("../MachO.zig");
-const Platform = @import("load_commands.zig").Platform;
-const SymbolWithLoc = MachO.SymbolWithLoc;
+const Object = @This();
+const Relocation = @import("Relocation.zig");
+const StringTable = @import("../strtab.zig").StringTable;
+const Symbol = @import("Symbol.zig");
const UnwindInfo = @import("UnwindInfo.zig");
-const Alignment = Atom.Alignment;
src/link/MachO/relocatable.zig
@@ -0,0 +1,452 @@
+pub fn flush(macho_file: *MachO) !void {
+ markExports(macho_file);
+ claimUnresolved(macho_file);
+ try initOutputSections(macho_file);
+ try macho_file.sortSections();
+ try macho_file.addAtomsToSections();
+ try calcSectionSizes(macho_file);
+
+ {
+ // For relocatable, we only ever need a single segment so create it now.
+ const prot: macho.vm_prot_t = macho.PROT.READ | macho.PROT.WRITE | macho.PROT.EXEC;
+ try macho_file.segments.append(macho_file.base.allocator, .{
+ .cmdsize = @sizeOf(macho.segment_command_64),
+ .segname = MachO.makeStaticString(""),
+ .maxprot = prot,
+ .initprot = prot,
+ });
+ const seg = &macho_file.segments.items[0];
+ seg.nsects = @intCast(macho_file.sections.items(.header).len);
+ seg.cmdsize += seg.nsects * @sizeOf(macho.section_64);
+ }
+
+ var off = try allocateSections(macho_file);
+
+ {
+ // Allocate the single segment.
+ assert(macho_file.segments.items.len == 1);
+ const seg = &macho_file.segments.items[0];
+ var vmaddr: u64 = 0;
+ var fileoff: u64 = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64);
+ seg.vmaddr = vmaddr;
+ seg.fileoff = fileoff;
+
+ for (macho_file.sections.items(.header)) |header| {
+ vmaddr = header.addr + header.size;
+ if (!header.isZerofill()) {
+ fileoff = header.offset + header.size;
+ }
+ }
+
+ seg.vmsize = vmaddr - seg.vmaddr;
+ seg.filesize = fileoff - seg.fileoff;
+ }
+
+ macho_file.allocateAtoms();
+
+ state_log.debug("{}", .{macho_file.dumpState()});
+
+ try macho_file.calcSymtabSize();
+ try writeAtoms(macho_file);
+ try writeCompactUnwind(macho_file);
+ try writeEhFrame(macho_file);
+
+ off = mem.alignForward(u32, off, @alignOf(u64));
+ off = try macho_file.writeDataInCode(0, off);
+ off = mem.alignForward(u32, off, @alignOf(u64));
+ off = try macho_file.writeSymtab(off);
+ off = mem.alignForward(u32, off, @alignOf(u64));
+ off = try macho_file.writeStrtab(off);
+
+ const ncmds, const sizeofcmds = try writeLoadCommands(macho_file);
+ try writeHeader(macho_file, ncmds, sizeofcmds);
+}
+
+fn markExports(macho_file: *MachO) void {
+ for (macho_file.objects.items) |index| {
+ for (macho_file.getFile(index).?.getSymbols()) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const file = sym.getFile(macho_file) orelse continue;
+ if (sym.visibility != .global) continue;
+ if (file.getIndex() == index) {
+ sym.flags.@"export" = true;
+ }
+ }
+ }
+}
+
+fn claimUnresolved(macho_file: *MachO) void {
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+
+ for (object.symbols.items, 0..) |sym_index, i| {
+ const nlist_idx = @as(Symbol.Index, @intCast(i));
+ const nlist = object.symtab.items(.nlist)[nlist_idx];
+ if (!nlist.ext()) continue;
+ if (!nlist.undf()) continue;
+
+ const sym = macho_file.getSymbol(sym_index);
+ if (sym.getFile(macho_file) != null) continue;
+
+ sym.value = 0;
+ sym.atom = 0;
+ sym.nlist_idx = nlist_idx;
+ sym.file = index;
+ sym.flags.weak_ref = nlist.weakRef();
+ sym.flags.import = true;
+ sym.visibility = .global;
+ }
+ }
+}
+
+fn initOutputSections(macho_file: *MachO) !void {
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index) orelse continue;
+ if (!atom.flags.alive) continue;
+ atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(macho_file), macho_file);
+ }
+ }
+
+ const needs_unwind_info = for (macho_file.objects.items) |index| {
+ if (macho_file.getFile(index).?.object.compact_unwind_sect_index != null) break true;
+ } else false;
+ if (needs_unwind_info) {
+ macho_file.unwind_info_sect_index = try macho_file.addSection("__LD", "__compact_unwind", .{
+ .flags = macho.S_ATTR_DEBUG,
+ });
+ }
+
+ const needs_eh_frame = for (macho_file.objects.items) |index| {
+ if (macho_file.getFile(index).?.object.eh_frame_sect_index != null) break true;
+ } else false;
+ if (needs_eh_frame) {
+ assert(needs_unwind_info);
+ macho_file.eh_frame_sect_index = try macho_file.addSection("__TEXT", "__eh_frame", .{});
+ }
+}
+
+fn calcSectionSizes(macho_file: *MachO) !void {
+ const slice = macho_file.sections.slice();
+ for (slice.items(.header), slice.items(.atoms)) |*header, atoms| {
+ if (atoms.items.len == 0) continue;
+ for (atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ const atom_alignment = try math.powi(u32, 2, atom.alignment);
+ const offset = mem.alignForward(u64, header.size, atom_alignment);
+ const padding = offset - header.size;
+ atom.value = offset;
+ header.size += padding + atom.size;
+ header.@"align" = @max(header.@"align", atom.alignment);
+ header.nreloc += atom.calcNumRelocs(macho_file);
+ }
+ }
+
+ if (macho_file.unwind_info_sect_index) |index| {
+ calcCompactUnwindSize(macho_file, index);
+ }
+
+ if (macho_file.eh_frame_sect_index) |index| {
+ const sect = &macho_file.sections.items(.header)[index];
+ sect.size = try eh_frame.calcSize(macho_file);
+ sect.@"align" = 3;
+ sect.nreloc = eh_frame.calcNumRelocs(macho_file);
+ }
+}
+
+fn calcCompactUnwindSize(macho_file: *MachO, sect_index: u8) void {
+ var size: u32 = 0;
+ var nreloc: u32 = 0;
+
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.unwind_records.items) |irec| {
+ const rec = macho_file.getUnwindRecord(irec);
+ if (!rec.alive) continue;
+ size += @sizeOf(macho.compact_unwind_entry);
+ nreloc += 1;
+ if (rec.getPersonality(macho_file)) |_| {
+ nreloc += 1;
+ }
+ if (rec.getLsdaAtom(macho_file)) |_| {
+ nreloc += 1;
+ }
+ }
+ }
+
+ const sect = &macho_file.sections.items(.header)[sect_index];
+ sect.size = size;
+ sect.nreloc = nreloc;
+ sect.@"align" = 3;
+}
+
+fn allocateSections(macho_file: *MachO) !u32 {
+ var fileoff = load_commands.calcLoadCommandsSizeObject(macho_file) + @sizeOf(macho.mach_header_64);
+ var vmaddr: u64 = 0;
+ const slice = macho_file.sections.slice();
+
+ for (slice.items(.header)) |*header| {
+ const alignment = try math.powi(u32, 2, header.@"align");
+ vmaddr = mem.alignForward(u64, vmaddr, alignment);
+ header.addr = vmaddr;
+ vmaddr += header.size;
+
+ if (!header.isZerofill()) {
+ fileoff = mem.alignForward(u32, fileoff, alignment);
+ header.offset = fileoff;
+ fileoff += @intCast(header.size);
+ }
+ }
+
+ for (slice.items(.header)) |*header| {
+ if (header.nreloc == 0) continue;
+ header.reloff = mem.alignForward(u32, fileoff, @alignOf(macho.relocation_info));
+ fileoff = header.reloff + header.nreloc * @sizeOf(macho.relocation_info);
+ }
+
+ return fileoff;
+}
+
+// We need to sort relocations in descending order to be compatible with Apple's linker.
+fn sortReloc(ctx: void, lhs: macho.relocation_info, rhs: macho.relocation_info) bool {
+ _ = ctx;
+ return lhs.r_address > rhs.r_address;
+}
+
+fn writeAtoms(macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const gpa = macho_file.base.allocator;
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const slice = macho_file.sections.slice();
+
+ for (slice.items(.header), slice.items(.atoms)) |header, atoms| {
+ if (atoms.items.len == 0) continue;
+ if (header.isZerofill()) continue;
+
+ const code = try gpa.alloc(u8, header.size);
+ defer gpa.free(code);
+ const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0;
+ @memset(code, padding_byte);
+
+ var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc);
+ defer relocs.deinit();
+
+ for (atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ assert(atom.flags.alive);
+ const off = atom.value - header.addr;
+ @memcpy(code[off..][0..atom.size], atom.getCode(macho_file));
+ try atom.writeRelocs(macho_file, code[off..][0..atom.size], &relocs);
+ }
+
+ assert(relocs.items.len == header.nreloc);
+
+ mem.sort(macho.relocation_info, relocs.items, {}, sortReloc);
+
+ // TODO scattered writes?
+ try macho_file.base.file.pwriteAll(code, header.offset);
+ try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff);
+ }
+}
+
+fn writeCompactUnwind(macho_file: *MachO) !void {
+ const sect_index = macho_file.unwind_info_sect_index orelse return;
+ const gpa = macho_file.base.allocator;
+ const header = macho_file.sections.items(.header)[sect_index];
+
+ const nrecs = @divExact(header.size, @sizeOf(macho.compact_unwind_entry));
+ var entries = try std.ArrayList(macho.compact_unwind_entry).initCapacity(gpa, nrecs);
+ defer entries.deinit();
+
+ var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc);
+ defer relocs.deinit();
+
+ const addReloc = struct {
+ fn addReloc(offset: i32, cpu_arch: std.Target.Cpu.Arch) macho.relocation_info {
+ return .{
+ .r_address = offset,
+ .r_symbolnum = 0,
+ .r_pcrel = 0,
+ .r_length = 3,
+ .r_extern = 0,
+ .r_type = switch (cpu_arch) {
+ .aarch64 => @intFromEnum(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED),
+ .x86_64 => @intFromEnum(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED),
+ else => unreachable,
+ },
+ };
+ }
+ }.addReloc;
+
+ var offset: i32 = 0;
+ for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ for (object.unwind_records.items) |irec| {
+ const rec = macho_file.getUnwindRecord(irec);
+ if (!rec.alive) continue;
+
+ var out: macho.compact_unwind_entry = .{
+ .rangeStart = 0,
+ .rangeLength = rec.length,
+ .compactUnwindEncoding = rec.enc.enc,
+ .personalityFunction = 0,
+ .lsda = 0,
+ };
+
+ {
+ // Function address
+ const atom = rec.getAtom(macho_file);
+ const addr = rec.getAtomAddress(macho_file);
+ out.rangeStart = addr;
+ var reloc = addReloc(offset, macho_file.options.cpu_arch.?);
+ reloc.r_symbolnum = atom.out_n_sect + 1;
+ relocs.appendAssumeCapacity(reloc);
+ }
+
+ // Personality function
+ if (rec.getPersonality(macho_file)) |sym| {
+ const r_symbolnum = math.cast(u24, sym.getOutputSymtabIndex(macho_file).?) orelse return error.Overflow;
+ var reloc = addReloc(offset + 16, macho_file.options.cpu_arch.?);
+ reloc.r_symbolnum = r_symbolnum;
+ reloc.r_extern = 1;
+ relocs.appendAssumeCapacity(reloc);
+ }
+
+ // LSDA address
+ if (rec.getLsdaAtom(macho_file)) |atom| {
+ const addr = rec.getLsdaAddress(macho_file);
+ out.lsda = addr;
+ var reloc = addReloc(offset + 24, macho_file.options.cpu_arch.?);
+ reloc.r_symbolnum = atom.out_n_sect + 1;
+ relocs.appendAssumeCapacity(reloc);
+ }
+
+ entries.appendAssumeCapacity(out);
+ offset += @sizeOf(macho.compact_unwind_entry);
+ }
+ }
+
+ assert(entries.items.len == nrecs);
+ assert(relocs.items.len == header.nreloc);
+
+ mem.sort(macho.relocation_info, relocs.items, {}, sortReloc);
+
+ // TODO scattered writes?
+ try macho_file.base.file.pwriteAll(mem.sliceAsBytes(entries.items), header.offset);
+ try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff);
+}
+
+fn writeEhFrame(macho_file: *MachO) !void {
+ const sect_index = macho_file.eh_frame_sect_index orelse return;
+ const gpa = macho_file.base.allocator;
+ const header = macho_file.sections.items(.header)[sect_index];
+
+ const code = try gpa.alloc(u8, header.size);
+ defer gpa.free(code);
+
+ var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc);
+ defer relocs.deinit();
+
+ try eh_frame.writeRelocs(macho_file, code, &relocs);
+ assert(relocs.items.len == header.nreloc);
+
+ mem.sort(macho.relocation_info, relocs.items, {}, sortReloc);
+
+ // TODO scattered writes?
+ try macho_file.base.file.pwriteAll(code, header.offset);
+ try macho_file.base.file.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff);
+}
+
+fn writeLoadCommands(macho_file: *MachO) !struct { usize, usize } {
+ const gpa = macho_file.base.allocator;
+ const needed_size = load_commands.calcLoadCommandsSizeObject(macho_file);
+ const buffer = try gpa.alloc(u8, needed_size);
+ defer gpa.free(buffer);
+
+ var stream = std.io.fixedBufferStream(buffer);
+ var cwriter = std.io.countingWriter(stream.writer());
+ const writer = cwriter.writer();
+
+ var ncmds: usize = 0;
+
+ // Segment and section load commands
+ {
+ assert(macho_file.segments.items.len == 1);
+ const seg = macho_file.segments.items[0];
+ try writer.writeStruct(seg);
+ for (macho_file.sections.items(.header)) |header| {
+ try writer.writeStruct(header);
+ }
+ ncmds += 1;
+ }
+
+ try writer.writeStruct(macho_file.data_in_code_cmd);
+ ncmds += 1;
+ try writer.writeStruct(macho_file.symtab_cmd);
+ ncmds += 1;
+ try writer.writeStruct(macho_file.dysymtab_cmd);
+ ncmds += 1;
+
+ if (macho_file.options.platform) |platform| {
+ if (platform.isBuildVersionCompatible()) {
+ try load_commands.writeBuildVersionLC(platform, macho_file.options.sdk_version, writer);
+ ncmds += 1;
+ } else {
+ try load_commands.writeVersionMinLC(platform, macho_file.options.sdk_version, writer);
+ ncmds += 1;
+ }
+ }
+
+ assert(cwriter.bytes_written == needed_size);
+
+ try macho_file.base.file.pwriteAll(buffer, @sizeOf(macho.mach_header_64));
+
+ return .{ ncmds, buffer.len };
+}
+
+fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void {
+ var header: macho.mach_header_64 = .{};
+ header.filetype = macho.MH_OBJECT;
+
+ const subsections_via_symbols = for (macho_file.objects.items) |index| {
+ const object = macho_file.getFile(index).?.object;
+ if (object.hasSubsections()) break true;
+ } else false;
+ if (subsections_via_symbols) {
+ header.flags |= macho.MH_SUBSECTIONS_VIA_SYMBOLS;
+ }
+
+ switch (macho_file.options.cpu_arch.?) {
+ .aarch64 => {
+ header.cputype = macho.CPU_TYPE_ARM64;
+ header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL;
+ },
+ .x86_64 => {
+ header.cputype = macho.CPU_TYPE_X86_64;
+ header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL;
+ },
+ else => {},
+ }
+
+ header.ncmds = @intCast(ncmds);
+ header.sizeofcmds = @intCast(sizeofcmds);
+
+ try macho_file.base.file.pwriteAll(mem.asBytes(&header), 0);
+}
+
+const assert = std.debug.assert;
+const eh_frame = @import("eh_frame.zig");
+const load_commands = @import("load_commands.zig");
+const macho = std.macho;
+const math = std.math;
+const mem = std.mem;
+const state_log = std.log.scoped(.state);
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
+
+const Atom = @import("Atom.zig");
+const MachO = @import("../MachO.zig");
+const Symbol = @import("Symbol.zig");
src/link/MachO/Relocation.zig
@@ -1,235 +1,62 @@
-//! Relocation used by the self-hosted backends to instruct the linker where and how to
-//! fixup the values when flushing the contents to file and/or memory.
-
-type: Type,
-target: SymbolWithLoc,
+tag: enum { @"extern", local },
offset: u32,
+target: u32,
addend: i64,
-pcrel: bool,
-length: u2,
-dirty: bool = true,
-
-pub const Type = enum {
- // x86, x86_64
- /// RIP-relative displacement to a GOT pointer
- got,
- /// RIP-relative displacement
- signed,
- /// RIP-relative displacement to a TLV thunk
- tlv,
-
- // aarch64
- /// PC-relative distance to target page in GOT section
- got_page,
- /// Offset to a GOT pointer relative to the start of a page in GOT section
- got_pageoff,
- /// PC-relative distance to target page in a section
- page,
- /// Offset to a pointer relative to the start of a page in a section
- pageoff,
-
- // common
- /// PC/RIP-relative displacement B/BL/CALL
- branch,
- /// Absolute pointer value
- unsigned,
- /// Relative offset to TLV initializer
- tlv_initializer,
-};
-
-/// Returns true if and only if the reloc can be resolved.
-pub fn isResolvable(self: Relocation, macho_file: *MachO) bool {
- _ = self.getTargetBaseAddress(macho_file) orelse return false;
- return true;
+type: Type,
+meta: packed struct {
+ pcrel: bool,
+ has_subtractor: bool,
+ length: u2,
+ symbolnum: u24,
+},
+
+pub fn getTargetSymbol(rel: Relocation, macho_file: *MachO) *Symbol {
+ assert(rel.tag == .@"extern");
+ return macho_file.getSymbol(rel.target);
}
-pub fn isGotIndirection(self: Relocation) bool {
- return switch (self.type) {
- .got, .got_page, .got_pageoff => true,
- else => false,
- };
+pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) *Atom {
+ assert(rel.tag == .local);
+ return macho_file.getAtom(rel.target).?;
}
-pub fn isStubTrampoline(self: Relocation, macho_file: *MachO) bool {
- return switch (self.type) {
- .branch => macho_file.getSymbol(self.target).undf(),
- else => false,
+pub fn getTargetAddress(rel: Relocation, macho_file: *MachO) u64 {
+ return switch (rel.tag) {
+ .local => rel.getTargetAtom(macho_file).value,
+ .@"extern" => rel.getTargetSymbol(macho_file).getAddress(.{}, macho_file),
};
}
-pub fn getTargetBaseAddress(self: Relocation, macho_file: *MachO) ?u64 {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- if (self.isStubTrampoline(macho_file)) {
- const index = macho_file.stub_table.lookup.get(self.target) orelse return null;
- const header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?];
- return header.addr +
- index * @import("stubs.zig").stubSize(target.cpu.arch);
- }
- switch (self.type) {
- .got, .got_page, .got_pageoff => {
- const got_index = macho_file.got_table.lookup.get(self.target) orelse return null;
- const header = macho_file.sections.items(.header)[macho_file.got_section_index.?];
- return header.addr + got_index * @sizeOf(u64);
- },
- .tlv => {
- const atom_index = macho_file.tlv_table.get(self.target) orelse return null;
- const atom = macho_file.getAtom(atom_index);
- return atom.getSymbol(macho_file).n_value;
- },
- else => {
- const target_atom_index = macho_file.getAtomIndexForSymbol(self.target) orelse return null;
- const target_atom = macho_file.getAtom(target_atom_index);
- return target_atom.getSymbol(macho_file).n_value;
- },
- }
-}
-
-pub fn resolve(self: Relocation, macho_file: *MachO, atom_index: Atom.Index, code: []u8) void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const arch = target.cpu.arch;
- const atom = macho_file.getAtom(atom_index);
- const source_sym = atom.getSymbol(macho_file);
- const source_addr = source_sym.n_value + self.offset;
-
- const target_base_addr = self.getTargetBaseAddress(macho_file).?; // Oops, you didn't check if the relocation can be resolved with isResolvable().
- const target_addr: i64 = switch (self.type) {
- .tlv_initializer => blk: {
- assert(self.addend == 0); // Addend here makes no sense.
- const header = macho_file.sections.items(.header)[macho_file.thread_data_section_index.?];
- break :blk @as(i64, @intCast(target_base_addr - header.addr));
- },
- else => @as(i64, @intCast(target_base_addr)) + self.addend,
+pub fn getGotTargetAddress(rel: Relocation, macho_file: *MachO) u64 {
+ return switch (rel.tag) {
+ .local => 0,
+ .@"extern" => rel.getTargetSymbol(macho_file).getGotAddress(macho_file),
};
-
- relocs_log.debug(" ({x}: [() => 0x{x} ({s})) ({s})", .{
- source_addr,
- target_addr,
- macho_file.getSymbolName(self.target),
- @tagName(self.type),
- });
-
- switch (arch) {
- .aarch64 => self.resolveAarch64(source_addr, target_addr, code),
- .x86_64 => self.resolveX8664(source_addr, target_addr, code),
- else => unreachable,
- }
-}
-
-fn resolveAarch64(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void {
- var buffer = code[self.offset..];
- switch (self.type) {
- .branch => {
- const displacement = math.cast(
- i28,
- @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)),
- ) orelse unreachable; // TODO codegen should never allow for jump larger than i28 displacement
- var inst = aarch64.Instruction{
- .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.unconditional_branch_immediate,
- ), buffer[0..4]),
- };
- inst.unconditional_branch_immediate.imm26 = @as(u26, @truncate(@as(u28, @bitCast(displacement >> 2))));
- mem.writeInt(u32, buffer[0..4], inst.toU32(), .little);
- },
- .page, .got_page => {
- const source_page = @as(i32, @intCast(source_addr >> 12));
- const target_page = @as(i32, @intCast(target_addr >> 12));
- const pages = @as(u21, @bitCast(@as(i21, @intCast(target_page - source_page))));
- var inst = aarch64.Instruction{
- .pc_relative_address = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.pc_relative_address,
- ), buffer[0..4]),
- };
- inst.pc_relative_address.immhi = @as(u19, @truncate(pages >> 2));
- inst.pc_relative_address.immlo = @as(u2, @truncate(pages));
- mem.writeInt(u32, buffer[0..4], inst.toU32(), .little);
- },
- .pageoff, .got_pageoff => {
- const narrowed = @as(u12, @truncate(@as(u64, @intCast(target_addr))));
- if (isArithmeticOp(buffer[0..4])) {
- var inst = aarch64.Instruction{
- .add_subtract_immediate = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.add_subtract_immediate,
- ), buffer[0..4]),
- };
- inst.add_subtract_immediate.imm12 = narrowed;
- mem.writeInt(u32, buffer[0..4], inst.toU32(), .little);
- } else {
- var inst = aarch64.Instruction{
- .load_store_register = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), buffer[0..4]),
- };
- const offset: u12 = blk: {
- if (inst.load_store_register.size == 0) {
- if (inst.load_store_register.v == 1) {
- // 128-bit SIMD is scaled by 16.
- break :blk @divExact(narrowed, 16);
- }
- // Otherwise, 8-bit SIMD or ldrb.
- break :blk narrowed;
- } else {
- const denom: u4 = math.powi(u4, 2, inst.load_store_register.size) catch unreachable;
- break :blk @divExact(narrowed, denom);
- }
- };
- inst.load_store_register.offset = offset;
- mem.writeInt(u32, buffer[0..4], inst.toU32(), .little);
- }
- },
- .tlv_initializer, .unsigned => switch (self.length) {
- 2 => mem.writeInt(u32, buffer[0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little),
- 3 => mem.writeInt(u64, buffer[0..8], @as(u64, @bitCast(target_addr)), .little),
- else => unreachable,
- },
- .got, .signed, .tlv => unreachable, // Invalid target architecture.
- }
}
-fn resolveX8664(self: Relocation, source_addr: u64, target_addr: i64, code: []u8) void {
- switch (self.type) {
- .branch, .got, .tlv, .signed => {
- const displacement = @as(i32, @intCast(@as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr)) - 4));
- mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @bitCast(displacement)), .little);
- },
- .tlv_initializer, .unsigned => {
- switch (self.length) {
- 2 => {
- mem.writeInt(u32, code[self.offset..][0..4], @as(u32, @truncate(@as(u64, @bitCast(target_addr)))), .little);
- },
- 3 => {
- mem.writeInt(u64, code[self.offset..][0..8], @as(u64, @bitCast(target_addr)), .little);
- },
- else => unreachable,
- }
- },
- .got_page, .got_pageoff, .page, .pageoff => unreachable, // Invalid target architecture.
- }
-}
-
-pub inline fn isArithmeticOp(inst: *const [4]u8) bool {
- const group_decode = @as(u5, @truncate(inst[3]));
- return ((group_decode >> 2) == 4);
-}
-
-pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 {
- const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr + 4 + correction));
- return math.cast(i32, disp) orelse error.Overflow;
+pub fn getRelocAddend(rel: Relocation, cpu_arch: std.Target.Cpu.Arch) i64 {
+ const addend: i64 = switch (rel.type) {
+ .signed => 0,
+ .signed1 => -1,
+ .signed2 => -2,
+ .signed4 => -4,
+ else => 0,
+ };
+ return switch (cpu_arch) {
+ .x86_64 => if (rel.meta.pcrel) addend - 4 else addend,
+ else => addend,
+ };
}
-pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 {
- const disp = @as(i64, @intCast(target_addr)) - @as(i64, @intCast(source_addr));
- return math.cast(i28, disp) orelse error.Overflow;
+pub fn lessThan(ctx: void, lhs: Relocation, rhs: Relocation) bool {
+ _ = ctx;
+ return lhs.offset < rhs.offset;
}
-pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 {
- const source_page = @as(i32, @intCast(source_addr >> 12));
- const target_page = @as(i32, @intCast(target_addr >> 12));
- const pages = @as(i21, @intCast(target_page - source_page));
+pub fn calcNumberOfPages(saddr: u64, taddr: u64) error{Overflow}!i21 {
+ const spage = math.cast(i32, saddr >> 12) orelse return error.Overflow;
+ const tpage = math.cast(i32, taddr >> 12) orelse return error.Overflow;
+ const pages = math.cast(i21, tpage - spage) orelse return error.Overflow;
return pages;
}
@@ -242,8 +69,8 @@ pub const PageOffsetInstKind = enum {
load_store_128,
};
-pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 {
- const narrowed = @as(u12, @truncate(target_addr));
+pub fn calcPageOffset(taddr: u64, kind: PageOffsetInstKind) !u12 {
+ const narrowed = @as(u12, @truncate(taddr));
return switch (kind) {
.arithmetic, .load_store_8 => narrowed,
.load_store_16 => try math.divExact(u12, narrowed, 2),
@@ -253,17 +80,57 @@ pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 {
};
}
-const Relocation = @This();
+pub inline fn isArithmeticOp(inst: *const [4]u8) bool {
+ const group_decode = @as(u5, @truncate(inst[3]));
+ return ((group_decode >> 2) == 4);
+}
+
+pub const Type = enum {
+ // x86_64
+ /// RIP-relative displacement (X86_64_RELOC_SIGNED)
+ signed,
+ /// RIP-relative displacement (X86_64_RELOC_SIGNED_1)
+ signed1,
+ /// RIP-relative displacement (X86_64_RELOC_SIGNED_2)
+ signed2,
+ /// RIP-relative displacement (X86_64_RELOC_SIGNED_4)
+ signed4,
+ /// RIP-relative GOT load (X86_64_RELOC_GOT_LOAD)
+ got_load,
+ /// RIP-relative TLV load (X86_64_RELOC_TLV)
+ tlv,
+
+ // arm64
+ /// PC-relative load (distance to page, ARM64_RELOC_PAGE21)
+ page,
+ /// Non-PC-relative offset to symbol (ARM64_RELOC_PAGEOFF12)
+ pageoff,
+ /// PC-relative GOT load (distance to page, ARM64_RELOC_GOT_LOAD_PAGE21)
+ got_load_page,
+ /// Non-PC-relative offset to GOT slot (ARM64_RELOC_GOT_LOAD_PAGEOFF12)
+ got_load_pageoff,
+ /// PC-relative TLV load (distance to page, ARM64_RELOC_TLVP_LOAD_PAGE21)
+ tlvp_page,
+ /// Non-PC-relative offset to TLV slot (ARM64_RELOC_TLVP_LOAD_PAGEOFF12)
+ tlvp_pageoff,
+
+ // common
+ /// PC-relative call/bl/b (X86_64_RELOC_BRANCH or ARM64_RELOC_BRANCH26)
+ branch,
+ /// PC-relative displacement to GOT pointer (X86_64_RELOC_GOT or ARM64_RELOC_POINTER_TO_GOT)
+ got,
+ /// Absolute subtractor value (X86_64_RELOC_SUBTRACTOR or ARM64_RELOC_SUBTRACTOR)
+ subtractor,
+ /// Absolute relocation (X86_64_RELOC_UNSIGNED or ARM64_RELOC_UNSIGNED)
+ unsigned,
+};
-const std = @import("std");
-const aarch64 = @import("../../arch/aarch64/bits.zig");
const assert = std.debug.assert;
-const relocs_log = std.log.scoped(.link_relocs);
const macho = std.macho;
const math = std.math;
-const mem = std.mem;
-const meta = std.meta;
+const std = @import("std");
const Atom = @import("Atom.zig");
const MachO = @import("../MachO.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
+const Relocation = @This();
+const Symbol = @import("Symbol.zig");
src/link/MachO/stubs.zig
@@ -1,169 +0,0 @@
-pub inline fn stubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u8 {
- return switch (cpu_arch) {
- .x86_64 => 15,
- .aarch64 => 6 * @sizeOf(u32),
- else => unreachable, // unhandled architecture type
- };
-}
-
-pub inline fn stubHelperSize(cpu_arch: std.Target.Cpu.Arch) u8 {
- return switch (cpu_arch) {
- .x86_64 => 10,
- .aarch64 => 3 * @sizeOf(u32),
- else => unreachable, // unhandled architecture type
- };
-}
-
-pub inline fn stubSize(cpu_arch: std.Target.Cpu.Arch) u8 {
- return switch (cpu_arch) {
- .x86_64 => 6,
- .aarch64 => 3 * @sizeOf(u32),
- else => unreachable, // unhandled architecture type
- };
-}
-
-pub inline fn stubAlignment(cpu_arch: std.Target.Cpu.Arch) u8 {
- return switch (cpu_arch) {
- .x86_64 => 1,
- .aarch64 => 4,
- else => unreachable, // unhandled architecture type
- };
-}
-
-pub inline fn stubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u8 {
- return switch (cpu_arch) {
- .x86_64 => 1,
- .aarch64 => 2 * @sizeOf(u32),
- else => unreachable,
- };
-}
-
-pub fn writeStubHelperPreambleCode(args: struct {
- cpu_arch: std.Target.Cpu.Arch,
- source_addr: u64,
- dyld_private_addr: u64,
- dyld_stub_binder_got_addr: u64,
-}, writer: anytype) !void {
- switch (args.cpu_arch) {
- .x86_64 => {
- try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d });
- {
- const disp = try Relocation.calcPcRelativeDisplacementX86(
- args.source_addr + 3,
- args.dyld_private_addr,
- 0,
- );
- try writer.writeInt(i32, disp, .little);
- }
- try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 });
- {
- const disp = try Relocation.calcPcRelativeDisplacementX86(
- args.source_addr + 11,
- args.dyld_stub_binder_got_addr,
- 0,
- );
- try writer.writeInt(i32, disp, .little);
- }
- },
- .aarch64 => {
- {
- const pages = Relocation.calcNumberOfPages(args.source_addr, args.dyld_private_addr);
- try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little);
- }
- {
- const off = try Relocation.calcPageOffset(args.dyld_private_addr, .arithmetic);
- try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little);
- }
- try writer.writeInt(u32, aarch64.Instruction.stp(
- .x16,
- .x17,
- aarch64.Register.sp,
- aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
- ).toU32(), .little);
- {
- const pages = Relocation.calcNumberOfPages(args.source_addr + 12, args.dyld_stub_binder_got_addr);
- try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
- }
- {
- const off = try Relocation.calcPageOffset(args.dyld_stub_binder_got_addr, .load_store_64);
- try writer.writeInt(u32, aarch64.Instruction.ldr(
- .x16,
- .x16,
- aarch64.Instruction.LoadStoreOffset.imm(off),
- ).toU32(), .little);
- }
- try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
- },
- else => unreachable,
- }
-}
-
-pub fn writeStubHelperCode(args: struct {
- cpu_arch: std.Target.Cpu.Arch,
- source_addr: u64,
- target_addr: u64,
-}, writer: anytype) !void {
- switch (args.cpu_arch) {
- .x86_64 => {
- try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 });
- {
- const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 6, args.target_addr, 0);
- try writer.writeInt(i32, disp, .little);
- }
- },
- .aarch64 => {
- const stub_size: u4 = 3 * @sizeOf(u32);
- const literal = blk: {
- const div_res = try std.math.divExact(u64, stub_size - @sizeOf(u32), 4);
- break :blk std.math.cast(u18, div_res) orelse return error.Overflow;
- };
- try writer.writeInt(u32, aarch64.Instruction.ldrLiteral(
- .w16,
- literal,
- ).toU32(), .little);
- {
- const disp = try Relocation.calcPcRelativeDisplacementArm64(args.source_addr + 4, args.target_addr);
- try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little);
- }
- try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 });
- },
- else => unreachable,
- }
-}
-
-pub fn writeStubCode(args: struct {
- cpu_arch: std.Target.Cpu.Arch,
- source_addr: u64,
- target_addr: u64,
-}, writer: anytype) !void {
- switch (args.cpu_arch) {
- .x86_64 => {
- try writer.writeAll(&.{ 0xff, 0x25 });
- {
- const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 2, args.target_addr, 0);
- try writer.writeInt(i32, disp, .little);
- }
- },
- .aarch64 => {
- {
- const pages = Relocation.calcNumberOfPages(args.source_addr, args.target_addr);
- try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
- }
- {
- const off = try Relocation.calcPageOffset(args.target_addr, .load_store_64);
- try writer.writeInt(u32, aarch64.Instruction.ldr(
- .x16,
- .x16,
- aarch64.Instruction.LoadStoreOffset.imm(off),
- ).toU32(), .little);
- }
- try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
- },
- else => unreachable,
- }
-}
-
-const std = @import("std");
-const aarch64 = @import("../../arch/aarch64/bits.zig");
-
-const Relocation = @import("Relocation.zig");
src/link/MachO/Symbol.zig
@@ -0,0 +1,383 @@
+//! Represents a defined symbol.
+
+/// Allocated address value of this symbol.
+value: u64 = 0,
+
+/// Offset into the linker's intern table.
+name: u32 = 0,
+
+/// File where this symbol is defined.
+file: File.Index = 0,
+
+/// Atom containing this symbol if any.
+/// Index of 0 means there is no associated atom with this symbol.
+/// Use `getAtom` to get the pointer to the atom.
+atom: Atom.Index = 0,
+
+/// Assigned output section index for this atom.
+out_n_sect: u16 = 0,
+
+/// Index of the source nlist this symbol references.
+/// Use `getNlist` to pull the nlist from the relevant file.
+nlist_idx: u32 = 0,
+
+/// Misc flags for the symbol packaged as packed struct for compression.
+flags: Flags = .{},
+
+visibility: Visibility = .local,
+
+extra: u32 = 0,
+
+pub fn isLocal(symbol: Symbol) bool {
+ return !(symbol.flags.import or symbol.flags.@"export");
+}
+
+pub fn isSymbolStab(symbol: Symbol, macho_file: *MachO) bool {
+ const file = symbol.getFile(macho_file) orelse return false;
+ return switch (file) {
+ .object => symbol.getNlist(macho_file).stab(),
+ else => false,
+ };
+}
+
+pub fn isTlvInit(symbol: Symbol, macho_file: *MachO) bool {
+ const name = symbol.getName(macho_file);
+ return std.mem.indexOf(u8, name, "$tlv$init") != null;
+}
+
+pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool {
+ const file = symbol.getFile(macho_file).?;
+ const is_dylib_weak = switch (file) {
+ .dylib => |x| x.weak,
+ else => false,
+ };
+ return is_dylib_weak or symbol.flags.weak_ref;
+}
+
+pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 {
+ return macho_file.string_intern.getAssumeExists(symbol.name);
+}
+
+pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom {
+ return macho_file.getAtom(symbol.atom);
+}
+
+pub fn getFile(symbol: Symbol, macho_file: *MachO) ?File {
+ return macho_file.getFile(symbol.file);
+}
+
+/// Asserts file is an object.
+pub fn getNlist(symbol: Symbol, macho_file: *MachO) macho.nlist_64 {
+ const file = symbol.getFile(macho_file).?;
+ return switch (file) {
+ .object => |x| x.symtab.items(.nlist)[symbol.nlist_idx],
+ else => unreachable,
+ };
+}
+
+pub fn getSize(symbol: Symbol, macho_file: *MachO) u64 {
+ const file = symbol.getFile(macho_file).?;
+ assert(file == .object);
+ return file.object.symtab.items(.size)[symbol.nlist_idx];
+}
+
+pub fn getDylibOrdinal(symbol: Symbol, macho_file: *MachO) ?u16 {
+ assert(symbol.flags.import);
+ const file = symbol.getFile(macho_file) orelse return null;
+ return switch (file) {
+ .dylib => |x| x.ordinal,
+ else => null,
+ };
+}
+
+pub fn getSymbolRank(symbol: Symbol, macho_file: *MachO) u32 {
+ const file = symbol.getFile(macho_file) orelse return std.math.maxInt(u32);
+ const in_archive = switch (file) {
+ .object => |x| !x.alive,
+ else => false,
+ };
+ return file.getSymbolRank(.{
+ .archive = in_archive,
+ .weak = symbol.flags.weak,
+ .tentative = symbol.flags.tentative,
+ });
+}
+
+pub fn getAddress(symbol: Symbol, opts: struct {
+ stubs: bool = true,
+}, macho_file: *MachO) u64 {
+ if (opts.stubs) {
+ if (symbol.flags.stubs) {
+ return symbol.getStubsAddress(macho_file);
+ } else if (symbol.flags.objc_stubs) {
+ return symbol.getObjcStubsAddress(macho_file);
+ }
+ }
+ if (symbol.getAtom(macho_file)) |atom| return atom.value + symbol.value;
+ return symbol.value;
+}
+
+pub fn getGotAddress(symbol: Symbol, macho_file: *MachO) u64 {
+ if (!symbol.flags.got) return 0;
+ const extra = symbol.getExtra(macho_file).?;
+ return macho_file.got.getAddress(extra.got, macho_file);
+}
+
+pub fn getStubsAddress(symbol: Symbol, macho_file: *MachO) u64 {
+ if (!symbol.flags.stubs) return 0;
+ const extra = symbol.getExtra(macho_file).?;
+ return macho_file.stubs.getAddress(extra.stubs, macho_file);
+}
+
+pub fn getObjcStubsAddress(symbol: Symbol, macho_file: *MachO) u64 {
+ if (!symbol.flags.objc_stubs) return 0;
+ const extra = symbol.getExtra(macho_file).?;
+ return macho_file.objc_stubs.getAddress(extra.objc_stubs, macho_file);
+}
+
+pub fn getObjcSelrefsAddress(symbol: Symbol, macho_file: *MachO) u64 {
+ if (!symbol.flags.objc_stubs) return 0;
+ const extra = symbol.getExtra(macho_file).?;
+ const atom = macho_file.getAtom(extra.objc_selrefs).?;
+ assert(atom.flags.alive);
+ return atom.value;
+}
+
+pub fn getTlvPtrAddress(symbol: Symbol, macho_file: *MachO) u64 {
+ if (!symbol.flags.tlv_ptr) return 0;
+ const extra = symbol.getExtra(macho_file).?;
+ return macho_file.tlv_ptr.getAddress(extra.tlv_ptr, macho_file);
+}
+
+pub fn getOutputSymtabIndex(symbol: Symbol, macho_file: *MachO) ?u32 {
+ if (!symbol.flags.output_symtab) return null;
+ assert(!symbol.isSymbolStab(macho_file));
+ const file = symbol.getFile(macho_file).?;
+ const symtab_ctx = switch (file) {
+ inline else => |x| x.output_symtab_ctx,
+ };
+ var idx = symbol.getExtra(macho_file).?.symtab;
+ if (symbol.isLocal()) {
+ idx += symtab_ctx.ilocal;
+ } else if (symbol.flags.@"export") {
+ idx += symtab_ctx.iexport;
+ } else {
+ assert(symbol.flags.import);
+ idx += symtab_ctx.iimport;
+ }
+ return idx;
+}
+
+const AddExtraOpts = struct {
+ got: ?u32 = null,
+ stubs: ?u32 = null,
+ objc_stubs: ?u32 = null,
+ objc_selrefs: ?u32 = null,
+ tlv_ptr: ?u32 = null,
+ symtab: ?u32 = null,
+};
+
+pub fn addExtra(symbol: *Symbol, opts: AddExtraOpts, macho_file: *MachO) !void {
+ if (symbol.getExtra(macho_file) == null) {
+ symbol.extra = try macho_file.addSymbolExtra(.{});
+ }
+ var extra = symbol.getExtra(macho_file).?;
+ inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| {
+ if (@field(opts, field.name)) |x| {
+ @field(extra, field.name) = x;
+ }
+ }
+ symbol.setExtra(extra, macho_file);
+}
+
+pub inline fn getExtra(symbol: Symbol, macho_file: *MachO) ?Extra {
+ return macho_file.getSymbolExtra(symbol.extra);
+}
+
+pub inline fn setExtra(symbol: Symbol, extra: Extra, macho_file: *MachO) void {
+ macho_file.setSymbolExtra(symbol.extra, extra);
+}
+
+pub fn setOutputSym(symbol: Symbol, macho_file: *MachO, out: *macho.nlist_64) void {
+ if (symbol.isLocal()) {
+ out.n_type = if (symbol.flags.abs) macho.N_ABS else macho.N_SECT;
+ out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1);
+ out.n_desc = 0;
+ out.n_value = symbol.getAddress(.{}, macho_file);
+
+ switch (symbol.visibility) {
+ .hidden => out.n_type |= macho.N_PEXT,
+ else => {},
+ }
+ } else if (symbol.flags.@"export") {
+ assert(symbol.visibility == .global);
+ out.n_type = macho.N_EXT;
+ out.n_type |= if (symbol.flags.abs) macho.N_ABS else macho.N_SECT;
+ out.n_sect = if (symbol.flags.abs) 0 else @intCast(symbol.out_n_sect + 1);
+ out.n_value = symbol.getAddress(.{}, macho_file);
+ out.n_desc = 0;
+
+ if (symbol.flags.weak) {
+ out.n_desc |= macho.N_WEAK_DEF;
+ }
+ if (symbol.flags.dyn_ref) {
+ out.n_desc |= macho.REFERENCED_DYNAMICALLY;
+ }
+ } else {
+ assert(symbol.visibility == .global);
+ out.n_type = macho.N_EXT;
+ out.n_sect = 0;
+ out.n_value = 0;
+ out.n_desc = 0;
+
+ const ord: u16 = if (macho_file.options.namespace == .flat)
+ @as(u8, @bitCast(macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP))
+ else if (symbol.getDylibOrdinal(macho_file)) |ord|
+ ord
+ else
+ macho.BIND_SPECIAL_DYLIB_SELF;
+ out.n_desc = macho.N_SYMBOL_RESOLVER * ord;
+
+ if (symbol.flags.weak) {
+ out.n_desc |= macho.N_WEAK_DEF;
+ }
+
+ if (symbol.weakRef(macho_file)) {
+ out.n_desc |= macho.N_WEAK_REF;
+ }
+ }
+}
+
+pub fn format(
+ symbol: Symbol,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = symbol;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format symbols directly");
+}
+
+const FormatContext = struct {
+ symbol: Symbol,
+ macho_file: *MachO,
+};
+
+pub fn fmt(symbol: Symbol, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .symbol = symbol,
+ .macho_file = macho_file,
+ } };
+}
+
+fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ const symbol = ctx.symbol;
+ try writer.print("%{d} : {s} : @{x}", .{
+ symbol.nlist_idx,
+ symbol.getName(ctx.macho_file),
+ symbol.getAddress(.{}, ctx.macho_file),
+ });
+ if (symbol.getFile(ctx.macho_file)) |file| {
+ if (symbol.out_n_sect != 0) {
+ try writer.print(" : sect({d})", .{symbol.out_n_sect});
+ }
+ if (symbol.getAtom(ctx.macho_file)) |atom| {
+ try writer.print(" : atom({d})", .{atom.atom_index});
+ }
+ var buf: [2]u8 = .{'_'} ** 2;
+ if (symbol.flags.@"export") buf[0] = 'E';
+ if (symbol.flags.import) buf[1] = 'I';
+ try writer.print(" : {s}", .{&buf});
+ if (symbol.flags.weak) try writer.writeAll(" : weak");
+ if (symbol.isSymbolStab(ctx.macho_file)) try writer.writeAll(" : stab");
+ switch (file) {
+ .internal => |x| try writer.print(" : internal({d})", .{x.index}),
+ .object => |x| try writer.print(" : object({d})", .{x.index}),
+ .dylib => |x| try writer.print(" : dylib({d})", .{x.index}),
+ }
+ } else try writer.writeAll(" : unresolved");
+}
+
+pub const Flags = packed struct {
+ /// Whether the symbol is imported at runtime.
+ import: bool = false,
+
+ /// Whether the symbol is exported at runtime.
+ @"export": bool = false,
+
+ /// Whether this symbol is weak.
+ weak: bool = false,
+
+ /// Whether this symbol is weakly referenced.
+ weak_ref: bool = false,
+
+ /// Whether this symbol is dynamically referenced.
+ dyn_ref: bool = false,
+
+ /// Whether this symbol was marked as N_NO_DEAD_STRIP.
+ no_dead_strip: bool = false,
+
+ /// Whether this symbol can be interposed at runtime.
+ interposable: bool = false,
+
+ /// Whether this symbol is absolute.
+ abs: bool = false,
+
+ /// Whether this symbol is a tentative definition.
+ tentative: bool = false,
+
+ /// Whether this symbol is a thread-local variable.
+ tlv: bool = false,
+
+ /// Whether the symbol makes into the output symtab or not.
+ output_symtab: bool = false,
+
+ /// Whether the symbol contains __got indirection.
+ got: bool = false,
+
+ /// Whether the symbols contains __stubs indirection.
+ stubs: bool = false,
+
+ /// Whether the symbol has a TLV pointer.
+ tlv_ptr: bool = false,
+
+ /// Whether the symbol contains __objc_stubs indirection.
+ objc_stubs: bool = false,
+};
+
+pub const Visibility = enum {
+ global,
+ hidden,
+ local,
+};
+
+pub const Extra = struct {
+ got: u32 = 0,
+ stubs: u32 = 0,
+ objc_stubs: u32 = 0,
+ objc_selrefs: u32 = 0,
+ tlv_ptr: u32 = 0,
+ symtab: u32 = 0,
+};
+
+pub const Index = u32;
+
+const assert = std.debug.assert;
+const macho = std.macho;
+const std = @import("std");
+
+const Atom = @import("Atom.zig");
+const File = @import("file.zig").File;
+const MachO = @import("../MachO.zig");
+const Object = @import("Object.zig");
+const Symbol = @This();
src/link/MachO/synthetic.zig
@@ -0,0 +1,669 @@
+pub const GotSection = struct {
+ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
+ pub const Index = u32;
+
+ pub fn deinit(got: *GotSection, allocator: Allocator) void {
+ got.symbols.deinit(allocator);
+ }
+
+ pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+ const index = @as(Index, @intCast(got.symbols.items.len));
+ const entry = try got.symbols.addOne(gpa);
+ entry.* = sym_index;
+ const symbol = macho_file.getSymbol(sym_index);
+ try symbol.addExtra(.{ .got = index }, macho_file);
+ }
+
+ pub fn getAddress(got: GotSection, index: Index, macho_file: *MachO) u64 {
+ assert(index < got.symbols.items.len);
+ const header = macho_file.sections.items(.header)[macho_file.got_sect_index.?];
+ return header.addr + index * @sizeOf(u64);
+ }
+
+ pub fn size(got: GotSection) usize {
+ return got.symbols.items.len * @sizeOf(u64);
+ }
+
+ pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?];
+ const seg = macho_file.segments.items[seg_id];
+
+ for (got.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const addr = got.getAddress(@intCast(idx), macho_file);
+ const entry = bind.Entry{
+ .target = sym_index,
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ .addend = 0,
+ };
+ if (sym.flags.import) {
+ try macho_file.bind.entries.append(gpa, entry);
+ if (sym.flags.weak) {
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ }
+ } else {
+ try macho_file.rebase.entries.append(gpa, .{
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ });
+ if (sym.flags.weak) {
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ } else if (sym.flags.interposable) {
+ try macho_file.bind.entries.append(gpa, entry);
+ }
+ }
+ }
+ }
+
+ pub fn write(got: GotSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ for (got.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ const value = if (sym.flags.import) @as(u64, 0) else sym.getAddress(.{}, macho_file);
+ try writer.writeInt(u64, value, .little);
+ }
+ }
+
+ const FormatCtx = struct {
+ got: GotSection,
+ macho_file: *MachO,
+ };
+
+ pub fn fmt(got: GotSection, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{ .got = got, .macho_file = macho_file } };
+ }
+
+ pub fn format2(
+ ctx: FormatCtx,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ for (ctx.got.symbols.items, 0..) |entry, i| {
+ const symbol = ctx.macho_file.getSymbol(entry);
+ try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{
+ i,
+ symbol.getGotAddress(ctx.macho_file),
+ entry,
+ symbol.getAddress(.{}, ctx.macho_file),
+ symbol.getName(ctx.macho_file),
+ });
+ }
+ }
+};
+
+pub const StubsSection = struct {
+ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
+ pub const Index = u32;
+
+ pub fn deinit(stubs: *StubsSection, allocator: Allocator) void {
+ stubs.symbols.deinit(allocator);
+ }
+
+ pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+ const index = @as(Index, @intCast(stubs.symbols.items.len));
+ const entry = try stubs.symbols.addOne(gpa);
+ entry.* = sym_index;
+ const symbol = macho_file.getSymbol(sym_index);
+ try symbol.addExtra(.{ .stubs = index }, macho_file);
+ }
+
+ pub fn getAddress(stubs: StubsSection, index: Index, macho_file: *MachO) u64 {
+ assert(index < stubs.symbols.items.len);
+ const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?];
+ return header.addr + index * header.reserved2;
+ }
+
+ pub fn size(stubs: StubsSection, macho_file: *MachO) usize {
+ const header = macho_file.sections.items(.header)[macho_file.stubs_sect_index.?];
+ return stubs.symbols.items.len * header.reserved2;
+ }
+
+ pub fn write(stubs: StubsSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?];
+
+ for (stubs.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const source = sym.getAddress(.{ .stubs = true }, macho_file);
+ const target = laptr_sect.addr + idx * @sizeOf(u64);
+ switch (cpu_arch) {
+ .x86_64 => {
+ try writer.writeAll(&.{ 0xff, 0x25 });
+ try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little);
+ },
+ .aarch64 => {
+ // TODO relax if possible
+ const pages = try Relocation.calcNumberOfPages(source, target);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(target, .load_store_64);
+ try writer.writeInt(
+ u32,
+ aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(),
+ .little,
+ );
+ try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
+ },
+ else => unreachable,
+ }
+ }
+ }
+
+ const FormatCtx = struct {
+ stubs: StubsSection,
+ macho_file: *MachO,
+ };
+
+ pub fn fmt(stubs: StubsSection, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{ .stubs = stubs, .macho_file = macho_file } };
+ }
+
+ pub fn format2(
+ ctx: FormatCtx,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ for (ctx.stubs.symbols.items, 0..) |entry, i| {
+ const symbol = ctx.macho_file.getSymbol(entry);
+ try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{
+ i,
+ symbol.getStubsAddress(ctx.macho_file),
+ entry,
+ symbol.getAddress(.{}, ctx.macho_file),
+ symbol.getName(ctx.macho_file),
+ });
+ }
+ }
+};
+
+pub const StubsHelperSection = struct {
+ pub inline fn preambleSize(cpu_arch: std.Target.Cpu.Arch) usize {
+ return switch (cpu_arch) {
+ .x86_64 => 15,
+ .aarch64 => 6 * @sizeOf(u32),
+ else => 0,
+ };
+ }
+
+ pub inline fn entrySize(cpu_arch: std.Target.Cpu.Arch) usize {
+ return switch (cpu_arch) {
+ .x86_64 => 10,
+ .aarch64 => 3 * @sizeOf(u32),
+ else => 0,
+ };
+ }
+
+ pub fn size(stubs_helper: StubsHelperSection, macho_file: *MachO) usize {
+ const tracy = trace(@src());
+ defer tracy.end();
+ _ = stubs_helper;
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ var s: usize = preambleSize(cpu_arch);
+ for (macho_file.stubs.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) {
+ s += entrySize(cpu_arch);
+ }
+ }
+ return s;
+ }
+
+ pub fn write(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ try stubs_helper.writePreamble(macho_file, writer);
+
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
+ const preamble_size = preambleSize(cpu_arch);
+ const entry_size = entrySize(cpu_arch);
+
+ var idx: usize = 0;
+ for (macho_file.stubs.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ if ((sym.flags.import and !sym.flags.weak) or (!sym.flags.weak and sym.flags.interposable)) {
+ const offset = macho_file.lazy_bind.offsets.items[idx];
+ const source: i64 = @intCast(sect.addr + preamble_size + entry_size * idx);
+ const target: i64 = @intCast(sect.addr);
+ switch (cpu_arch) {
+ .x86_64 => {
+ try writer.writeByte(0x68);
+ try writer.writeInt(u32, offset, .little);
+ try writer.writeByte(0xe9);
+ try writer.writeInt(i32, @intCast(target - source - 6 - 4), .little);
+ },
+ .aarch64 => {
+ const literal = blk: {
+ const div_res = try std.math.divExact(u64, entry_size - @sizeOf(u32), 4);
+ break :blk std.math.cast(u18, div_res) orelse return error.Overflow;
+ };
+ try writer.writeInt(u32, aarch64.Instruction.ldrLiteral(
+ .w16,
+ literal,
+ ).toU32(), .little);
+ const disp = math.cast(i28, @as(i64, @intCast(target)) - @as(i64, @intCast(source + 4))) orelse
+ return error.Overflow;
+ try writer.writeInt(u32, aarch64.Instruction.b(disp).toU32(), .little);
+ try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 });
+ },
+ else => unreachable,
+ }
+ idx += 1;
+ }
+ }
+ }
+
+ fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void {
+ _ = stubs_helper;
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
+ const dyld_private_addr = target: {
+ const sym = macho_file.getSymbol(macho_file.dyld_private_index.?);
+ break :target sym.getAddress(.{}, macho_file);
+ };
+ const dyld_stub_binder_addr = target: {
+ const sym = macho_file.getSymbol(macho_file.dyld_stub_binder_index.?);
+ break :target sym.getGotAddress(macho_file);
+ };
+ switch (cpu_arch) {
+ .x86_64 => {
+ try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d });
+ try writer.writeInt(i32, @intCast(dyld_private_addr - sect.addr - 3 - 4), .little);
+ try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 });
+ try writer.writeInt(i32, @intCast(dyld_stub_binder_addr - sect.addr - 11 - 4), .little);
+ },
+ .aarch64 => {
+ {
+ // TODO relax if possible
+ const pages = try Relocation.calcNumberOfPages(sect.addr, dyld_private_addr);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x17, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(dyld_private_addr, .arithmetic);
+ try writer.writeInt(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32(), .little);
+ }
+ try writer.writeInt(u32, aarch64.Instruction.stp(
+ .x16,
+ .x17,
+ aarch64.Register.sp,
+ aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
+ ).toU32(), .little);
+ {
+ // TODO relax if possible
+ const pages = try Relocation.calcNumberOfPages(sect.addr + 12, dyld_stub_binder_addr);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(dyld_stub_binder_addr, .load_store_64);
+ try writer.writeInt(u32, aarch64.Instruction.ldr(
+ .x16,
+ .x16,
+ aarch64.Instruction.LoadStoreOffset.imm(off),
+ ).toU32(), .little);
+ }
+ try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
+ },
+ else => unreachable,
+ }
+ }
+};
+
+pub const LaSymbolPtrSection = struct {
+ pub fn size(laptr: LaSymbolPtrSection, macho_file: *MachO) usize {
+ _ = laptr;
+ return macho_file.stubs.symbols.items.len * @sizeOf(u64);
+ }
+
+ pub fn addDyldRelocs(laptr: LaSymbolPtrSection, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ _ = laptr;
+ const gpa = macho_file.base.allocator;
+
+ const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?];
+ const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?];
+ const seg = macho_file.segments.items[seg_id];
+
+ for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const addr = sect.addr + idx * @sizeOf(u64);
+ const entry = bind.Entry{
+ .target = sym_index,
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ .addend = 0,
+ };
+ if (sym.flags.import) {
+ if (sym.flags.weak) {
+ try macho_file.bind.entries.append(gpa, entry);
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ } else {
+ try macho_file.lazy_bind.entries.append(gpa, entry);
+ }
+ } else {
+ if (sym.flags.weak) {
+ try macho_file.rebase.entries.append(gpa, .{
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ });
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ } else if (sym.flags.interposable) {
+ try macho_file.lazy_bind.entries.append(gpa, entry);
+ }
+ }
+ }
+ }
+
+ pub fn write(laptr: LaSymbolPtrSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ _ = laptr;
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
+ for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const value: u64 = if (sym.flags.@"export")
+ sym.getAddress(.{ .stubs = false }, macho_file)
+ else if (sym.flags.weak)
+ @as(u64, 0)
+ else
+ sect.addr + StubsHelperSection.preambleSize(cpu_arch) +
+ StubsHelperSection.entrySize(cpu_arch) * idx;
+ try writer.writeInt(u64, @intCast(value), .little);
+ }
+ }
+};
+
+pub const TlvPtrSection = struct {
+ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
+ pub const Index = u32;
+
+ pub fn deinit(tlv: *TlvPtrSection, allocator: Allocator) void {
+ tlv.symbols.deinit(allocator);
+ }
+
+ pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+ const index = @as(Index, @intCast(tlv.symbols.items.len));
+ const entry = try tlv.symbols.addOne(gpa);
+ entry.* = sym_index;
+ const symbol = macho_file.getSymbol(sym_index);
+ try symbol.addExtra(.{ .tlv_ptr = index }, macho_file);
+ }
+
+ pub fn getAddress(tlv: TlvPtrSection, index: Index, macho_file: *MachO) u64 {
+ assert(index < tlv.symbols.items.len);
+ const header = macho_file.sections.items(.header)[macho_file.tlv_ptr_sect_index.?];
+ return header.addr + index * @sizeOf(u64) * 3;
+ }
+
+ pub fn size(tlv: TlvPtrSection) usize {
+ return tlv.symbols.items.len * @sizeOf(u64);
+ }
+
+ pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+ const gpa = macho_file.base.allocator;
+ const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?];
+ const seg = macho_file.segments.items[seg_id];
+
+ for (tlv.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const addr = tlv.getAddress(@intCast(idx), macho_file);
+ const entry = bind.Entry{
+ .target = sym_index,
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ .addend = 0,
+ };
+ if (sym.flags.import) {
+ try macho_file.bind.entries.append(gpa, entry);
+ if (sym.flags.weak) {
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ }
+ } else {
+ try macho_file.rebase.entries.append(gpa, .{
+ .offset = addr - seg.vmaddr,
+ .segment_id = seg_id,
+ });
+ if (sym.flags.weak) {
+ try macho_file.weak_bind.entries.append(gpa, entry);
+ } else if (sym.flags.interposable) {
+ try macho_file.bind.entries.append(gpa, entry);
+ }
+ }
+ }
+ }
+
+ pub fn write(tlv: TlvPtrSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (tlv.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ if (sym.flags.import) {
+ try writer.writeInt(u64, 0, .little);
+ } else {
+ try writer.writeInt(u64, sym.getAddress(.{}, macho_file), .little);
+ }
+ }
+ }
+
+ const FormatCtx = struct {
+ tlv: TlvPtrSection,
+ macho_file: *MachO,
+ };
+
+ pub fn fmt(tlv: TlvPtrSection, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{ .tlv = tlv, .macho_file = macho_file } };
+ }
+
+ pub fn format2(
+ ctx: FormatCtx,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ for (ctx.tlv.symbols.items, 0..) |entry, i| {
+ const symbol = ctx.macho_file.getSymbol(entry);
+ try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{
+ i,
+ symbol.getTlvPtrAddress(ctx.macho_file),
+ entry,
+ symbol.getAddress(.{}, ctx.macho_file),
+ symbol.getName(ctx.macho_file),
+ });
+ }
+ }
+};
+
+pub const ObjcStubsSection = struct {
+ symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
+ pub fn deinit(objc: *ObjcStubsSection, allocator: Allocator) void {
+ objc.symbols.deinit(allocator);
+ }
+
+ pub fn entrySize(cpu_arch: std.Target.Cpu.Arch) u8 {
+ return switch (cpu_arch) {
+ .x86_64 => 13,
+ .aarch64 => 8 * @sizeOf(u32),
+ else => unreachable,
+ };
+ }
+
+ pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+ const index = @as(Index, @intCast(objc.symbols.items.len));
+ const entry = try objc.symbols.addOne(gpa);
+ entry.* = sym_index;
+ const symbol = macho_file.getSymbol(sym_index);
+ try symbol.addExtra(.{ .objc_stubs = index }, macho_file);
+ }
+
+ pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 {
+ assert(index < objc.symbols.items.len);
+ const header = macho_file.sections.items(.header)[macho_file.objc_stubs_sect_index.?];
+ return header.addr + index * entrySize(macho_file.options.cpu_arch.?);
+ }
+
+ pub fn size(objc: ObjcStubsSection, macho_file: *MachO) usize {
+ return objc.symbols.items.len * entrySize(macho_file.options.cpu_arch.?);
+ }
+
+ pub fn write(objc: ObjcStubsSection, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ for (objc.symbols.items, 0..) |sym_index, idx| {
+ const sym = macho_file.getSymbol(sym_index);
+ const addr = objc.getAddress(@intCast(idx), macho_file);
+ switch (macho_file.options.cpu_arch.?) {
+ .x86_64 => {
+ try writer.writeAll(&.{ 0x48, 0x8b, 0x35 });
+ {
+ const target = sym.getObjcSelrefsAddress(macho_file);
+ const source = addr;
+ try writer.writeInt(i32, @intCast(target - source - 3 - 4), .little);
+ }
+ try writer.writeAll(&.{ 0xff, 0x25 });
+ {
+ const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?);
+ const target = target_sym.getGotAddress(macho_file);
+ const source = addr + 7;
+ try writer.writeInt(i32, @intCast(target - source - 2 - 4), .little);
+ }
+ },
+ .aarch64 => {
+ {
+ const target = sym.getObjcSelrefsAddress(macho_file);
+ const source = addr;
+ const pages = try Relocation.calcNumberOfPages(source, target);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x1, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(target, .load_store_64);
+ try writer.writeInt(
+ u32,
+ aarch64.Instruction.ldr(.x1, .x1, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(),
+ .little,
+ );
+ }
+ {
+ const target_sym = macho_file.getSymbol(macho_file.objc_msg_send_index.?);
+ const target = target_sym.getGotAddress(macho_file);
+ const source = addr + 2 * @sizeOf(u32);
+ const pages = try Relocation.calcNumberOfPages(source, target);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(target, .load_store_64);
+ try writer.writeInt(
+ u32,
+ aarch64.Instruction.ldr(.x16, .x16, aarch64.Instruction.LoadStoreOffset.imm(off)).toU32(),
+ .little,
+ );
+ }
+ try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
+ try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little);
+ try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little);
+ try writer.writeInt(u32, aarch64.Instruction.brk(1).toU32(), .little);
+ },
+ else => unreachable,
+ }
+ }
+ }
+
+ const FormatCtx = struct {
+ objc: ObjcStubsSection,
+ macho_file: *MachO,
+ };
+
+ pub fn fmt(objc: ObjcStubsSection, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{ .objc = objc, .macho_file = macho_file } };
+ }
+
+ pub fn format2(
+ ctx: FormatCtx,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ for (ctx.objc.symbols.items, 0..) |entry, i| {
+ const symbol = ctx.macho_file.getSymbol(entry);
+ try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{
+ i,
+ symbol.getObjcStubsAddress(ctx.macho_file),
+ entry,
+ symbol.getAddress(.{}, ctx.macho_file),
+ symbol.getName(ctx.macho_file),
+ });
+ }
+ }
+
+ pub const Index = u32;
+};
+
+pub const Indsymtab = struct {
+ pub inline fn nsyms(ind: Indsymtab, macho_file: *MachO) u32 {
+ _ = ind;
+ return @intCast(macho_file.stubs.symbols.items.len * 2 + macho_file.got.symbols.items.len);
+ }
+
+ pub fn write(ind: Indsymtab, macho_file: *MachO, writer: anytype) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ _ = ind;
+
+ for (macho_file.stubs.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little);
+ }
+
+ for (macho_file.got.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little);
+ }
+
+ for (macho_file.stubs.symbols.items) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ try writer.writeInt(u32, sym.getOutputSymtabIndex(macho_file).?, .little);
+ }
+ }
+};
+
+pub const RebaseSection = Rebase;
+pub const BindSection = bind.Bind;
+pub const WeakBindSection = bind.WeakBind;
+pub const LazyBindSection = bind.LazyBind;
+pub const ExportTrieSection = Trie;
+
+const aarch64 = @import("../aarch64.zig");
+const assert = std.debug.assert;
+const bind = @import("dyld_info/bind.zig");
+const math = std.math;
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
+
+const Allocator = std.mem.Allocator;
+const MachO = @import("../MachO.zig");
+const Rebase = @import("dyld_info/Rebase.zig");
+const Relocation = @import("Relocation.zig");
+const Symbol = @import("Symbol.zig");
+const Trie = @import("dyld_info/Trie.zig");
src/link/MachO/thunks.zig
@@ -1,374 +1,176 @@
-//! An algorithm for allocating output machine code section (aka `__TEXT,__text`),
-//! and insertion of range extending thunks. As such, this algorithm is only run
-//! for a target that requires range extenders such as arm64.
-//!
-//! The algorithm works pessimistically and assumes that any reference to an Atom in
-//! another output section is out of range.
-
-/// Branch instruction has 26 bits immediate but 4 byte aligned.
-const jump_bits = @bitSizeOf(i28);
-
-const max_distance = (1 << (jump_bits - 1));
-
-/// A branch will need an extender if its target is larger than
-/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number.
-/// mold uses 5MiB margin, while ld64 uses 4MiB margin. We will follow mold
-/// and assume margin to be 5MiB.
-const max_allowed_distance = max_distance - 0x500_000;
-
-pub const Thunk = struct {
- start_index: Atom.Index,
- len: u32,
-
- targets: std.MultiArrayList(Target) = .{},
- lookup: std.AutoHashMapUnmanaged(Target, u32) = .{},
-
- pub const Tag = enum {
- stub,
- atom,
- };
-
- pub const Target = struct {
- tag: Tag,
- target: SymbolWithLoc,
- };
-
- pub const Index = u32;
-
- pub fn deinit(self: *Thunk, gpa: Allocator) void {
- self.targets.deinit(gpa);
- self.lookup.deinit(gpa);
- }
-
- pub fn getStartAtomIndex(self: Thunk) Atom.Index {
- assert(self.len != 0);
- return self.start_index;
- }
-
- pub fn getEndAtomIndex(self: Thunk) Atom.Index {
- assert(self.len != 0);
- return self.start_index + self.len - 1;
- }
-
- pub fn getSize(self: Thunk) u64 {
- return 12 * self.len;
+pub fn createThunks(sect_id: u8, macho_file: *MachO) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const gpa = macho_file.base.allocator;
+ const slice = macho_file.sections.slice();
+ const header = &slice.items(.header)[sect_id];
+ const atoms = slice.items(.atoms)[sect_id].items;
+ assert(atoms.len > 0);
+
+ for (atoms) |atom_index| {
+ macho_file.getAtom(atom_index).?.value = @bitCast(@as(i64, -1));
}
- pub fn getAlignment() u32 {
- return @alignOf(u32);
- }
-
- pub fn getTrampoline(self: Thunk, macho_file: *MachO, tag: Tag, target: SymbolWithLoc) ?SymbolWithLoc {
- const atom_index = self.lookup.get(.{ .tag = tag, .target = target }) orelse return null;
- return macho_file.getAtom(atom_index).getSymbolWithLoc();
- }
-};
-
-pub fn createThunks(macho_file: *MachO, sect_id: u8) !void {
- const header = &macho_file.sections.items(.header)[sect_id];
- if (header.size == 0) return;
-
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const first_atom_index = macho_file.sections.items(.first_atom_index)[sect_id].?;
-
- header.size = 0;
- header.@"align" = 0;
-
- var atom_count: u32 = 0;
-
- {
- var atom_index = first_atom_index;
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc());
- sym.n_value = 0;
- atom_count += 1;
-
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
+ var i: usize = 0;
+ while (i < atoms.len) {
+ const start = i;
+ const start_atom = macho_file.getAtom(atoms[start]).?;
+ assert(start_atom.flags.alive);
+ start_atom.value = try advance(header, start_atom.size, start_atom.alignment);
+ i += 1;
+
+ while (i < atoms.len and
+ header.size - start_atom.value < max_allowed_distance) : (i += 1)
+ {
+ const atom_index = atoms[i];
+ const atom = macho_file.getAtom(atom_index).?;
+ assert(atom.flags.alive);
+ atom.value = try advance(header, atom.size, atom.alignment);
}
- }
-
- var allocated = std.AutoHashMap(Atom.Index, void).init(gpa);
- defer allocated.deinit();
- try allocated.ensureTotalCapacity(atom_count);
-
- var group_start = first_atom_index;
- var group_end = first_atom_index;
- var offset: u64 = 0;
-
- while (true) {
- const group_start_atom = macho_file.getAtom(group_start);
- log.debug("GROUP START at {d}", .{group_start});
-
- while (true) {
- const atom = macho_file.getAtom(group_end);
- offset = atom.alignment.forward(offset);
-
- const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc());
- sym.n_value = offset;
- offset += atom.size;
-
- macho_file.logAtom(group_end, log);
-
- header.@"align" = @max(header.@"align", atom.alignment.toLog2Units());
-
- allocated.putAssumeCapacityNoClobber(group_end, {});
-
- const group_start_sym = macho_file.getSymbol(group_start_atom.getSymbolWithLoc());
- if (offset - group_start_sym.n_value >= max_allowed_distance) break;
-
- if (atom.next_index) |next_index| {
- group_end = next_index;
- } else break;
- }
- log.debug("GROUP END at {d}", .{group_end});
-
- // Insert thunk at group_end
- const thunk_index = @as(u32, @intCast(macho_file.thunks.items.len));
- try macho_file.thunks.append(gpa, .{ .start_index = undefined, .len = 0 });
-
- // Scan relocs in the group and create trampolines for any unreachable callsite.
- var atom_index = group_start;
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- try scanRelocs(
- macho_file,
- atom_index,
- allocated,
- thunk_index,
- group_end,
- );
-
- if (atom_index == group_end) break;
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
+ // Insert a thunk at the group end
+ const thunk_index = try macho_file.addThunk();
+ const thunk = macho_file.getThunk(thunk_index);
+ thunk.out_n_sect = sect_id;
+
+ // Scan relocs in the group and create trampolines for any unreachable callsite
+ for (atoms[start..i]) |atom_index| {
+ const atom = macho_file.getAtom(atom_index).?;
+ log.debug("atom({d}) {s}", .{ atom_index, atom.getName(macho_file) });
+ for (atom.getRelocs(macho_file)) |rel| {
+ if (rel.type != .branch) continue;
+ if (isReachable(atom, rel, macho_file)) continue;
+ try thunk.symbols.put(gpa, rel.target, {});
+ }
+ atom.thunk_index = thunk_index;
}
- offset = mem.alignForward(u64, offset, Thunk.getAlignment());
- allocateThunk(macho_file, thunk_index, offset, header);
- offset += macho_file.thunks.items[thunk_index].getSize();
+ thunk.value = try advance(header, thunk.size(), 2);
- const thunk = macho_file.thunks.items[thunk_index];
- if (thunk.len == 0) {
- const group_end_atom = macho_file.getAtom(group_end);
- if (group_end_atom.next_index) |next_index| {
- group_start = next_index;
- group_end = next_index;
- } else break;
- } else {
- const thunk_end_atom_index = thunk.getEndAtomIndex();
- const thunk_end_atom = macho_file.getAtom(thunk_end_atom_index);
- if (thunk_end_atom.next_index) |next_index| {
- group_start = next_index;
- group_end = next_index;
- } else break;
- }
+ log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(macho_file) });
}
-
- header.size = @as(u32, @intCast(offset));
}
-fn allocateThunk(
- macho_file: *MachO,
- thunk_index: Thunk.Index,
- base_offset: u64,
- header: *macho.section_64,
-) void {
- const thunk = macho_file.thunks.items[thunk_index];
- if (thunk.len == 0) return;
-
- const first_atom_index = thunk.getStartAtomIndex();
- const end_atom_index = thunk.getEndAtomIndex();
-
- var atom_index = first_atom_index;
- var offset = base_offset;
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- offset = mem.alignForward(u64, offset, Thunk.getAlignment());
-
- const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc());
- sym.n_value = offset;
- offset += atom.size;
-
- macho_file.logAtom(atom_index, log);
-
- header.@"align" = @max(header.@"align", atom.alignment.toLog2Units());
-
- if (end_atom_index == atom_index) break;
-
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
- }
+fn advance(sect: *macho.section_64, size: u64, pow2_align: u32) !u64 {
+ const alignment = try math.powi(u32, 2, pow2_align);
+ const offset = mem.alignForward(u64, sect.size, alignment);
+ const padding = offset - sect.size;
+ sect.size += padding + size;
+ sect.@"align" = @max(sect.@"align", pow2_align);
+ return offset;
}
-fn scanRelocs(
- macho_file: *MachO,
- atom_index: Atom.Index,
- allocated: std.AutoHashMap(Atom.Index, void),
- thunk_index: Thunk.Index,
- group_end: Atom.Index,
-) !void {
- const atom = macho_file.getAtom(atom_index);
- const object = macho_file.objects.items[atom.getFile().?];
-
- const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: {
- const source_sect = object.getSourceSection(source_sym.n_sect - 1);
- break :blk @as(i32, @intCast(source_sym.n_value - source_sect.addr));
- } else 0;
-
- const code = Atom.getAtomCode(macho_file, atom_index);
- const relocs = Atom.getAtomRelocs(macho_file, atom_index);
- const ctx = Atom.getRelocContext(macho_file, atom_index);
+fn isReachable(atom: *const Atom, rel: Relocation, macho_file: *MachO) bool {
+ const target = rel.getTargetSymbol(macho_file);
+ if (target.flags.stubs or target.flags.objc_stubs) return false;
+ if (atom.out_n_sect != target.out_n_sect) return false;
+ const target_atom = target.getAtom(macho_file).?;
+ if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false;
+ const saddr = @as(i64, @intCast(atom.value)) + @as(i64, @intCast(rel.offset - atom.off));
+ const taddr: i64 = @intCast(rel.getTargetAddress(macho_file));
+ _ = math.cast(i28, taddr + rel.addend - saddr) orelse return false;
+ return true;
+}
- for (relocs) |rel| {
- if (!relocNeedsThunk(rel)) continue;
+pub const Thunk = struct {
+ value: u64 = 0,
+ out_n_sect: u8 = 0,
+ symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{},
- const target = Atom.parseRelocTarget(macho_file, .{
- .object_id = atom.getFile().?,
- .rel = rel,
- .code = code,
- .base_offset = ctx.base_offset,
- .base_addr = ctx.base_addr,
- });
- if (isReachable(macho_file, atom_index, rel, base_offset, target, allocated)) continue;
+ pub fn deinit(thunk: *Thunk, allocator: Allocator) void {
+ thunk.symbols.deinit(allocator);
+ }
- log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{
- rel.r_address - base_offset,
- macho_file.getSymbolName(atom.getSymbolWithLoc()),
- macho_file.getSymbol(atom.getSymbolWithLoc()).n_value,
- macho_file.getSymbolName(target),
- macho_file.getSymbol(target).n_value,
- });
+ pub fn size(thunk: Thunk) usize {
+ return thunk.symbols.keys().len * trampoline_size;
+ }
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target_sym = macho_file.getSymbol(target);
- const thunk = &macho_file.thunks.items[thunk_index];
+ pub fn getAddress(thunk: Thunk, sym_index: Symbol.Index) u64 {
+ return thunk.value + thunk.symbols.getIndex(sym_index).? * trampoline_size;
+ }
- const tag: Thunk.Tag = if (target_sym.undf()) .stub else .atom;
- const thunk_target: Thunk.Target = .{ .tag = tag, .target = target };
- const gop = try thunk.lookup.getOrPut(gpa, thunk_target);
- if (!gop.found_existing) {
- gop.value_ptr.* = try pushThunkAtom(macho_file, thunk, group_end);
- try thunk.targets.append(gpa, thunk_target);
+ pub fn write(thunk: Thunk, macho_file: *MachO, writer: anytype) !void {
+ for (thunk.symbols.keys(), 0..) |sym_index, i| {
+ const sym = macho_file.getSymbol(sym_index);
+ const saddr = thunk.value + i * trampoline_size;
+ const taddr = sym.getAddress(.{}, macho_file);
+ const pages = try Relocation.calcNumberOfPages(saddr, taddr);
+ try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
+ const off = try Relocation.calcPageOffset(taddr, .arithmetic);
+ try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little);
+ try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
}
-
- try macho_file.thunk_table.put(gpa, atom_index, thunk_index);
}
-}
-fn pushThunkAtom(macho_file: *MachO, thunk: *Thunk, group_end: Atom.Index) !Atom.Index {
- const thunk_atom_index = try createThunkAtom(macho_file);
-
- const thunk_atom = macho_file.getAtomPtr(thunk_atom_index);
- const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex();
- const end_atom = macho_file.getAtomPtr(end_atom_index);
-
- if (end_atom.next_index) |first_after_index| {
- const first_after_atom = macho_file.getAtomPtr(first_after_index);
- first_after_atom.prev_index = thunk_atom_index;
- thunk_atom.next_index = first_after_index;
+ pub fn format(
+ thunk: Thunk,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = thunk;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format Thunk directly");
}
- end_atom.next_index = thunk_atom_index;
- thunk_atom.prev_index = end_atom_index;
-
- if (thunk.len == 0) {
- thunk.start_index = thunk_atom_index;
+ pub fn fmt(thunk: Thunk, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .thunk = thunk,
+ .macho_file = macho_file,
+ } };
}
- thunk.len += 1;
-
- return thunk_atom_index;
-}
-
-inline fn relocNeedsThunk(rel: macho.relocation_info) bool {
- const rel_type = @as(macho.reloc_type_arm64, @enumFromInt(rel.r_type));
- return rel_type == .ARM64_RELOC_BRANCH26;
-}
-
-fn isReachable(
- macho_file: *MachO,
- atom_index: Atom.Index,
- rel: macho.relocation_info,
- base_offset: i32,
- target: SymbolWithLoc,
- allocated: std.AutoHashMap(Atom.Index, void),
-) bool {
- if (macho_file.stub_table.lookup.contains(target)) return false;
-
- const source_atom = macho_file.getAtom(atom_index);
- const source_sym = macho_file.getSymbol(source_atom.getSymbolWithLoc());
-
- const target_object = macho_file.objects.items[target.getFile().?];
- const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?;
- const target_atom = macho_file.getAtom(target_atom_index);
- const target_sym = macho_file.getSymbol(target_atom.getSymbolWithLoc());
-
- if (source_sym.n_sect != target_sym.n_sect) return false;
+ const FormatContext = struct {
+ thunk: Thunk,
+ macho_file: *MachO,
+ };
- if (!allocated.contains(target_atom_index)) return false;
+ fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = options;
+ _ = unused_fmt_string;
+ const thunk = ctx.thunk;
+ const macho_file = ctx.macho_file;
+ try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size() });
+ for (thunk.symbols.keys()) |index| {
+ const sym = macho_file.getSymbol(index);
+ try writer.print(" %{d} : {s} : @{x}\n", .{ index, sym.getName(macho_file), sym.value });
+ }
+ }
- const source_addr = source_sym.n_value + @as(u32, @intCast(rel.r_address - base_offset));
- const target_addr = if (Atom.relocRequiresGot(macho_file, rel))
- macho_file.getGotEntryAddress(target).?
- else
- Atom.getRelocTargetAddress(macho_file, target, false);
- _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch
- return false;
+ const trampoline_size = 3 * @sizeOf(u32);
- return true;
-}
+ pub const Index = u32;
+};
-fn createThunkAtom(macho_file: *MachO) !Atom.Index {
- const sym_index = try macho_file.allocateSymbol();
- const atom_index = try macho_file.createAtom(sym_index, .{
- .size = @sizeOf(u32) * 3,
- .alignment = .@"4",
- });
- const sym = macho_file.getSymbolPtr(.{ .sym_index = sym_index });
- sym.n_type = macho.N_SECT;
- sym.n_sect = macho_file.text_section_index.? + 1;
- return atom_index;
-}
+/// Branch instruction has 26 bits immediate but is 4 byte aligned.
+const jump_bits = @bitSizeOf(i28);
+const max_distance = (1 << (jump_bits - 1));
-pub fn writeThunkCode(macho_file: *MachO, thunk: *const Thunk, writer: anytype) !void {
- const slice = thunk.targets.slice();
- for (thunk.getStartAtomIndex()..thunk.getEndAtomIndex(), 0..) |atom_index, target_index| {
- const atom = macho_file.getAtom(@intCast(atom_index));
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const source_addr = sym.n_value;
- const tag = slice.items(.tag)[target_index];
- const target = slice.items(.target)[target_index];
- const target_addr = switch (tag) {
- .stub => macho_file.getStubsEntryAddress(target).?,
- .atom => macho_file.getSymbol(target).n_value,
- };
- const pages = Relocation.calcNumberOfPages(source_addr, target_addr);
- try writer.writeInt(u32, aarch64.Instruction.adrp(.x16, pages).toU32(), .little);
- const off = try Relocation.calcPageOffset(target_addr, .arithmetic);
- try writer.writeInt(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32(), .little);
- try writer.writeInt(u32, aarch64.Instruction.br(.x16).toU32(), .little);
- }
-}
+/// A branch will need an extender if its target is larger than
+/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number.
+/// mold uses 5MiB margin, while ld64 uses 4MiB margin. We will follow mold
+/// and assume margin to be 5MiB.
+const max_allowed_distance = max_distance - 0x500_000;
-const std = @import("std");
+const aarch64 = @import("../aarch64.zig");
const assert = std.debug.assert;
-const log = std.log.scoped(.thunks);
+const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-
-const aarch64 = @import("../../arch/aarch64/bits.zig");
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
const MachO = @import("../MachO.zig");
const Relocation = @import("Relocation.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
+const Symbol = @import("Symbol.zig");
src/link/MachO/UnwindInfo.zig
@@ -1,376 +1,122 @@
-gpa: Allocator,
-
/// List of all unwind records gathered from all objects and sorted
-/// by source function address.
-records: std.ArrayListUnmanaged(macho.compact_unwind_entry) = .{},
-records_lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, RecordIndex) = .{},
+/// by allocated relative function address within the section.
+records: std.ArrayListUnmanaged(Record.Index) = .{},
/// List of all personalities referenced by either unwind info entries
/// or __eh_frame entries.
-personalities: [max_personalities]SymbolWithLoc = undefined,
+personalities: [max_personalities]Symbol.Index = undefined,
personalities_count: u2 = 0,
/// List of common encodings sorted in descending order with the most common first.
-common_encodings: [max_common_encodings]macho.compact_unwind_encoding_t = undefined,
+common_encodings: [max_common_encodings]Encoding = undefined,
common_encodings_count: u7 = 0,
/// List of record indexes containing an LSDA pointer.
-lsdas: std.ArrayListUnmanaged(RecordIndex) = .{},
-lsdas_lookup: std.AutoHashMapUnmanaged(RecordIndex, u32) = .{},
+lsdas: std.ArrayListUnmanaged(u32) = .{},
+lsdas_lookup: std.ArrayListUnmanaged(u32) = .{},
/// List of second level pages.
pages: std.ArrayListUnmanaged(Page) = .{},
-/// Upper bound (exclusive) of all the record ranges
-end_boundary: u64 = 0,
-
-const RecordIndex = u32;
-
-const max_personalities = 3;
-const max_common_encodings = 127;
-const max_compact_encodings = 256;
-
-const second_level_page_bytes = 0x1000;
-const second_level_page_words = second_level_page_bytes / @sizeOf(u32);
-
-const max_regular_second_level_entries =
- (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) /
- @sizeOf(macho.unwind_info_regular_second_level_entry);
-
-const max_compressed_second_level_entries =
- (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) /
- @sizeOf(u32);
-
-const compressed_entry_func_offset_mask = ~@as(u24, 0);
-
-const Page = struct {
- kind: enum { regular, compressed },
- start: RecordIndex,
- count: u16,
- page_encodings: [max_compact_encodings]RecordIndex = undefined,
- page_encodings_count: u9 = 0,
-
- fn appendPageEncoding(page: *Page, record_id: RecordIndex) void {
- assert(page.page_encodings_count <= max_compact_encodings);
- page.page_encodings[page.page_encodings_count] = record_id;
- page.page_encodings_count += 1;
- }
-
- fn getPageEncoding(
- page: *const Page,
- info: *const UnwindInfo,
- enc: macho.compact_unwind_encoding_t,
- ) ?u8 {
- comptime var index: u9 = 0;
- inline while (index < max_compact_encodings) : (index += 1) {
- if (index >= page.page_encodings_count) return null;
- const record_id = page.page_encodings[index];
- const record = info.records.items[record_id];
- if (record.compactUnwindEncoding == enc) {
- return @as(u8, @intCast(index));
- }
- }
- return null;
- }
-
- fn format(
- page: *const Page,
- comptime unused_format_string: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = page;
- _ = unused_format_string;
- _ = options;
- _ = writer;
- @compileError("do not format Page directly; use page.fmtDebug()");
- }
-
- const DumpCtx = struct {
- page: *const Page,
- info: *const UnwindInfo,
- };
-
- fn dump(
- ctx: DumpCtx,
- comptime unused_format_string: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
- ) @TypeOf(writer).Error!void {
- _ = options;
- comptime assert(unused_format_string.len == 0);
- try writer.writeAll("Page:\n");
- try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)});
- try writer.print(" entries: {d} - {d}\n", .{
- ctx.page.start,
- ctx.page.start + ctx.page.count,
- });
- try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count});
- for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |record_id, i| {
- const record = ctx.info.records.items[record_id];
- const enc = record.compactUnwindEncoding;
- try writer.print(" {d}: 0x{x:0>8}\n", .{ ctx.info.common_encodings_count + i, enc });
- }
- }
-
- fn fmtDebug(page: *const Page, info: *const UnwindInfo) std.fmt.Formatter(dump) {
- return .{ .data = .{
- .page = page,
- .info = info,
- } };
- }
-
- fn write(page: *const Page, info: *const UnwindInfo, writer: anytype) !void {
- switch (page.kind) {
- .regular => {
- try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{
- .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header),
- .entryCount = page.count,
- });
-
- for (info.records.items[page.start..][0..page.count]) |record| {
- try writer.writeStruct(macho.unwind_info_regular_second_level_entry{
- .functionOffset = @as(u32, @intCast(record.rangeStart)),
- .encoding = record.compactUnwindEncoding,
- });
- }
- },
- .compressed => {
- const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) +
- @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32);
- try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{
- .entryPageOffset = entry_offset,
- .entryCount = page.count,
- .encodingsPageOffset = @sizeOf(
- macho.unwind_info_compressed_second_level_page_header,
- ),
- .encodingsCount = page.page_encodings_count,
- });
-
- for (page.page_encodings[0..page.page_encodings_count]) |record_id| {
- const enc = info.records.items[record_id].compactUnwindEncoding;
- try writer.writeInt(u32, enc, .little);
- }
-
- assert(page.count > 0);
- const first_entry = info.records.items[page.start];
- for (info.records.items[page.start..][0..page.count]) |record| {
- const enc_index = blk: {
- if (info.getCommonEncoding(record.compactUnwindEncoding)) |id| {
- break :blk id;
- }
- const ncommon = info.common_encodings_count;
- break :blk ncommon + page.getPageEncoding(info, record.compactUnwindEncoding).?;
- };
- const compressed = macho.UnwindInfoCompressedEntry{
- .funcOffset = @as(u24, @intCast(record.rangeStart - first_entry.rangeStart)),
- .encodingIndex = @as(u8, @intCast(enc_index)),
- };
- try writer.writeStruct(compressed);
- }
- },
- }
- }
-};
+pub fn deinit(info: *UnwindInfo, allocator: Allocator) void {
+ info.records.deinit(allocator);
+ info.pages.deinit(allocator);
+ info.lsdas.deinit(allocator);
+ info.lsdas_lookup.deinit(allocator);
+}
-pub fn deinit(info: *UnwindInfo) void {
- info.records.deinit(info.gpa);
- info.records_lookup.deinit(info.gpa);
- info.pages.deinit(info.gpa);
- info.lsdas.deinit(info.gpa);
- info.lsdas_lookup.deinit(info.gpa);
+fn canFold(macho_file: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool {
+ const cpu_arch = macho_file.options.cpu_arch.?;
+ const lhs = macho_file.getUnwindRecord(lhs_index);
+ const rhs = macho_file.getUnwindRecord(rhs_index);
+ if (cpu_arch == .x86_64) {
+ if (lhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND) or
+ rhs.enc.getMode() == @intFromEnum(macho.UNWIND_X86_64_MODE.STACK_IND)) return false;
+ }
+ const lhs_per = lhs.personality orelse 0;
+ const rhs_per = rhs.personality orelse 0;
+ return lhs.enc.eql(rhs.enc) and
+ lhs_per == rhs_per and
+ lhs.fde == rhs.fde and
+ lhs.getLsdaAtom(macho_file) == null and rhs.getLsdaAtom(macho_file) == null;
}
-pub fn scanRelocs(macho_file: *MachO) !void {
- if (macho_file.unwind_info_section_index == null) return;
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- for (macho_file.objects.items, 0..) |*object, object_id| {
- const unwind_records = object.getUnwindRecords();
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (inner_syms_it.next()) |sym| {
- const record_id = object.unwind_records_lookup.get(sym) orelse continue;
- if (object.unwind_relocs_lookup[record_id].dead) continue;
- const record = unwind_records[record_id];
- if (!UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) {
- if (getPersonalityFunctionReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| {
- // Personality function; add GOT pointer.
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = @as(u32, @intCast(object_id)),
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))),
- });
- try macho_file.addGotEntry(reloc_target);
- }
- }
+pub fn generate(info: *UnwindInfo, macho_file: *MachO) !void {
+ const gpa = macho_file.base.allocator;
+
+ log.debug("generating unwind info", .{});
+
+ // Collect all unwind records
+ for (macho_file.sections.items(.atoms)) |atoms| {
+ for (atoms.items) |atom_index| {
+ const atom = macho_file.getAtom(atom_index) orelse continue;
+ if (!atom.flags.alive) continue;
+ const recs = atom.getUnwindRecords(macho_file);
+ try info.records.ensureUnusedCapacity(gpa, recs.len);
+ for (recs) |rec| {
+ if (!macho_file.getUnwindRecord(rec).alive) continue;
+ info.records.appendAssumeCapacity(rec);
}
}
}
-}
-
-pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
- if (macho_file.unwind_info_section_index == null) return;
-
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
-
- var records = std.ArrayList(macho.compact_unwind_entry).init(info.gpa);
- defer records.deinit();
-
- var sym_indexes = std.ArrayList(SymbolWithLoc).init(info.gpa);
- defer sym_indexes.deinit();
-
- // TODO handle dead stripping
- for (macho_file.objects.items, 0..) |*object, object_id| {
- log.debug("collecting unwind records in {s} ({d})", .{ object.name, object_id });
- const unwind_records = object.getUnwindRecords();
-
- // Contents of unwind records does not have to cover all symbol in executable section
- // so we need insert them ourselves.
- try records.ensureUnusedCapacity(object.exec_atoms.items.len);
- try sym_indexes.ensureUnusedCapacity(object.exec_atoms.items.len);
-
- for (object.exec_atoms.items) |atom_index| {
- var inner_syms_it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- var prev_symbol: ?SymbolWithLoc = null;
- while (inner_syms_it.next()) |symbol| {
- var record = if (object.unwind_records_lookup.get(symbol)) |record_id| blk: {
- if (object.unwind_relocs_lookup[record_id].dead) continue;
- var record = unwind_records[record_id];
-
- if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) {
- info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record);
- } else {
- if (getPersonalityFunctionReloc(
- macho_file,
- @as(u32, @intCast(object_id)),
- record_id,
- )) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = @as(u32, @intCast(object_id)),
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))),
- });
- const personality_index = info.getPersonalityFunction(reloc_target) orelse inner: {
- const personality_index = info.personalities_count;
- info.personalities[personality_index] = reloc_target;
- info.personalities_count += 1;
- break :inner personality_index;
- };
-
- record.personalityFunction = personality_index + 1;
- UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1);
- }
-
- if (getLsdaReloc(macho_file, @as(u32, @intCast(object_id)), record_id)) |rel| {
- const reloc_target = Atom.parseRelocTarget(macho_file, .{
- .object_id = @as(u32, @intCast(object_id)),
- .rel = rel,
- .code = mem.asBytes(&record),
- .base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry))),
- });
- record.lsda = @as(u64, @bitCast(reloc_target));
- }
- }
- break :blk record;
- } else blk: {
- const sym = macho_file.getSymbol(symbol);
- if (sym.n_desc == MachO.N_DEAD) continue;
- if (prev_symbol) |prev_sym| {
- const prev_addr = object.getSourceSymbol(prev_sym.sym_index).?.n_value;
- const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value;
- if (prev_addr == curr_addr) continue;
- }
-
- if (!object.hasUnwindRecords()) {
- if (object.eh_frame_records_lookup.get(symbol)) |fde_offset| {
- if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue;
- var record = nullRecord();
- info.collectPersonalityFromDwarf(macho_file, @as(u32, @intCast(object_id)), symbol, &record);
- switch (cpu_arch) {
- .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF),
- .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF),
- else => unreachable,
- }
- break :blk record;
- }
- }
-
- break :blk nullRecord();
- };
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbol(symbol);
- assert(sym.n_desc != MachO.N_DEAD);
- const size = if (inner_syms_it.next()) |next_sym| blk: {
- // All this trouble to account for symbol aliases.
- // TODO I think that remodelling the linker so that a Symbol references an Atom
- // is the way to go, kinda like we do for ELF. We might also want to perhaps tag
- // symbol aliases somehow so that they are excluded from everything except relocation
- // resolution.
- defer inner_syms_it.pos -= 1;
- const curr_addr = object.getSourceSymbol(symbol.sym_index).?.n_value;
- const next_addr = object.getSourceSymbol(next_sym.sym_index).?.n_value;
- if (next_addr > curr_addr) break :blk next_addr - curr_addr;
- break :blk macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value;
- } else macho_file.getSymbol(atom.getSymbolWithLoc()).n_value + atom.size - sym.n_value;
- record.rangeStart = sym.n_value;
- record.rangeLength = @as(u32, @intCast(size));
-
- try records.append(record);
- try sym_indexes.append(symbol);
-
- prev_symbol = symbol;
- }
+ // Encode records
+ for (info.records.items) |index| {
+ const rec = macho_file.getUnwindRecord(index);
+ if (rec.getFde(macho_file)) |fde| {
+ rec.enc.setDwarfSectionOffset(@intCast(fde.out_offset));
+ } else if (rec.getPersonality(macho_file)) |_| {
+ const personality_index = try info.getOrPutPersonalityFunction(rec.personality.?); // TODO handle error
+ rec.enc.setPersonalityIndex(personality_index + 1);
}
}
- // Record the ending boundary before folding.
- assert(records.items.len > 0);
- info.end_boundary = blk: {
- const last_record = records.items[records.items.len - 1];
- break :blk last_record.rangeStart + last_record.rangeLength;
- };
+ // Sort by assigned relative address within each output section
+ const sortFn = struct {
+ fn sortFn(ctx: *MachO, lhs_index: Record.Index, rhs_index: Record.Index) bool {
+ const lhs = ctx.getUnwindRecord(lhs_index);
+ const rhs = ctx.getUnwindRecord(rhs_index);
+ const lhsa = lhs.getAtom(ctx);
+ const rhsa = rhs.getAtom(ctx);
+ if (lhsa.out_n_sect == rhsa.out_n_sect) return lhs.getAtomAddress(ctx) < rhs.getAtomAddress(ctx);
+ return lhsa.out_n_sect < rhsa.out_n_sect;
+ }
+ }.sortFn;
+ mem.sort(Record.Index, info.records.items, macho_file, sortFn);
- // Fold records
- try info.records.ensureTotalCapacity(info.gpa, records.items.len);
- try info.records_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(sym_indexes.items.len)));
-
- var maybe_prev: ?macho.compact_unwind_entry = null;
- for (records.items, 0..) |record, i| {
- const record_id = blk: {
- if (maybe_prev) |prev| {
- const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch);
- if (is_dwarf or
- (prev.compactUnwindEncoding != record.compactUnwindEncoding) or
- (prev.personalityFunction != record.personalityFunction) or
- record.lsda > 0)
- {
- const record_id = @as(RecordIndex, @intCast(info.records.items.len));
- info.records.appendAssumeCapacity(record);
- maybe_prev = record;
- break :blk record_id;
- } else {
- break :blk @as(RecordIndex, @intCast(info.records.items.len - 1));
- }
+ // Fold the records
+ // Any adjacent two records that share encoding can be folded into one.
+ {
+ var i: usize = 0;
+ var j: usize = 1;
+ while (j < info.records.items.len) : (j += 1) {
+ if (canFold(macho_file, info.records.items[i], info.records.items[j])) {
+ const rec = macho_file.getUnwindRecord(info.records.items[i]);
+ rec.length += macho_file.getUnwindRecord(info.records.items[j]).length + 1;
} else {
- const record_id = @as(RecordIndex, @intCast(info.records.items.len));
- info.records.appendAssumeCapacity(record);
- maybe_prev = record;
- break :blk record_id;
+ i += 1;
+ info.records.items[i] = info.records.items[j];
}
- };
- info.records_lookup.putAssumeCapacityNoClobber(sym_indexes.items[i], record_id);
+ }
+ info.records.shrinkAndFree(gpa, i + 1);
+ }
+
+ for (info.records.items) |rec_index| {
+ const rec = macho_file.getUnwindRecord(rec_index);
+ const atom = rec.getAtom(macho_file);
+ log.debug("@{x}-{x} : {s} : rec({d}) : {}", .{
+ rec.getAtomAddress(macho_file),
+ rec.getAtomAddress(macho_file) + rec.length,
+ atom.getName(macho_file),
+ rec_index,
+ rec.enc,
+ });
}
// Calculate common encodings
{
const CommonEncWithCount = struct {
- enc: macho.compact_unwind_encoding_t,
+ enc: Encoding,
count: u32,
fn greaterThan(ctx: void, lhs: @This(), rhs: @This()) bool {
@@ -380,39 +126,38 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
};
const Context = struct {
- pub fn hash(ctx: @This(), key: macho.compact_unwind_encoding_t) u32 {
+ pub fn hash(ctx: @This(), key: Encoding) u32 {
_ = ctx;
- return key;
+ return key.enc;
}
pub fn eql(
ctx: @This(),
- key1: macho.compact_unwind_encoding_t,
- key2: macho.compact_unwind_encoding_t,
+ key1: Encoding,
+ key2: Encoding,
b_index: usize,
) bool {
_ = ctx;
_ = b_index;
- return key1 == key2;
+ return key1.eql(key2);
}
};
var common_encodings_counts = std.ArrayHashMap(
- macho.compact_unwind_encoding_t,
+ Encoding,
CommonEncWithCount,
Context,
false,
- ).init(info.gpa);
+ ).init(gpa);
defer common_encodings_counts.deinit();
- for (info.records.items) |record| {
- assert(!isNull(record));
- if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) continue;
- const enc = record.compactUnwindEncoding;
- const gop = try common_encodings_counts.getOrPut(enc);
+ for (info.records.items) |rec_index| {
+ const rec = macho_file.getUnwindRecord(rec_index);
+ if (rec.enc.isDwarf(macho_file)) continue;
+ const gop = try common_encodings_counts.getOrPut(rec.enc);
if (!gop.found_existing) {
gop.value_ptr.* = .{
- .enc = enc,
+ .enc = rec.enc,
.count = 0,
};
}
@@ -427,7 +172,7 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
if (i >= max_common_encodings) break;
if (slice[i].count < 2) continue;
info.appendCommonEncoding(slice[i].enc);
- log.debug("adding common encoding: {d} => 0x{x:0>8}", .{ i, slice[i].enc });
+ log.debug("adding common encoding: {d} => {}", .{ i, slice[i].enc });
}
}
@@ -435,8 +180,8 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
{
var i: u32 = 0;
while (i < info.records.items.len) {
- const range_start_max: u64 =
- info.records.items[i].rangeStart + compressed_entry_func_offset_mask;
+ const rec = macho_file.getUnwindRecord(info.records.items[i]);
+ const range_start_max: u64 = rec.getAtomAddress(macho_file) + compressed_entry_func_offset_mask;
var encoding_count: u9 = info.common_encodings_count;
var space_left: u32 = second_level_page_words -
@sizeOf(macho.unwind_info_compressed_second_level_page_header) / @sizeOf(u32);
@@ -447,19 +192,18 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
};
while (space_left >= 1 and i < info.records.items.len) {
- const record = info.records.items[i];
- const enc = record.compactUnwindEncoding;
- const is_dwarf = UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch);
+ const next = macho_file.getUnwindRecord(info.records.items[i]);
+ const is_dwarf = next.enc.isDwarf(macho_file);
- if (record.rangeStart >= range_start_max) {
+ if (next.getAtomAddress(macho_file) >= range_start_max) {
break;
- } else if (info.getCommonEncoding(enc) != null or
- page.getPageEncoding(info, enc) != null and !is_dwarf)
+ } else if (info.getCommonEncoding(next.enc) != null or
+ page.getPageEncoding(next.enc) != null and !is_dwarf)
{
i += 1;
space_left -= 1;
} else if (space_left >= 2 and encoding_count < max_compact_encodings) {
- page.appendPageEncoding(i);
+ page.appendPageEncoding(next.enc);
i += 1;
space_left -= 2;
encoding_count += 1;
@@ -481,63 +225,24 @@ pub fn collect(info: *UnwindInfo, macho_file: *MachO) !void {
page.kind = .compressed;
}
- log.debug("{}", .{page.fmtDebug(info)});
+ log.debug("{}", .{page.fmt(info.*)});
- try info.pages.append(info.gpa, page);
+ try info.pages.append(gpa, page);
}
}
- // Save indices of records requiring LSDA relocation
- try info.lsdas_lookup.ensureTotalCapacity(info.gpa, @as(u32, @intCast(info.records.items.len)));
- for (info.records.items, 0..) |rec, i| {
- info.lsdas_lookup.putAssumeCapacityNoClobber(@as(RecordIndex, @intCast(i)), @as(u32, @intCast(info.lsdas.items.len)));
- if (rec.lsda == 0) continue;
- try info.lsdas.append(info.gpa, @as(RecordIndex, @intCast(i)));
- }
-}
-
-fn collectPersonalityFromDwarf(
- info: *UnwindInfo,
- macho_file: *MachO,
- object_id: u32,
- sym_loc: SymbolWithLoc,
- record: *macho.compact_unwind_entry,
-) void {
- const object = &macho_file.objects.items[object_id];
- var it = object.getEhFrameRecordsIterator();
- const fde_offset = object.eh_frame_records_lookup.get(sym_loc).?;
- it.seekTo(fde_offset);
- const fde = (it.next() catch return).?; // We don't care about the error since we already handled it
- const cie_ptr = fde.getCiePointerSource(object_id, macho_file, fde_offset);
- const cie_offset = fde_offset + 4 - cie_ptr;
- it.seekTo(cie_offset);
- const cie = (it.next() catch return).?; // We don't care about the error since we already handled it
-
- if (cie.getPersonalityPointerReloc(
- macho_file,
- @as(u32, @intCast(object_id)),
- cie_offset,
- )) |target| {
- const personality_index = info.getPersonalityFunction(target) orelse inner: {
- const personality_index = info.personalities_count;
- info.personalities[personality_index] = target;
- info.personalities_count += 1;
- break :inner personality_index;
- };
-
- record.personalityFunction = personality_index + 1;
- UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1);
+ // Save records having an LSDA pointer
+ try info.lsdas_lookup.ensureTotalCapacityPrecise(gpa, info.records.items.len);
+ for (info.records.items, 0..) |index, i| {
+ const rec = macho_file.getUnwindRecord(index);
+ info.lsdas_lookup.appendAssumeCapacity(@intCast(info.lsdas.items.len));
+ if (rec.getLsdaAtom(macho_file)) |_| {
+ try info.lsdas.append(gpa, @intCast(i));
+ }
}
}
-pub fn calcSectionSize(info: UnwindInfo, macho_file: *MachO) void {
- const sect_id = macho_file.unwind_info_section_index orelse return;
- const sect = &macho_file.sections.items(.header)[sect_id];
- sect.@"align" = 2;
- sect.size = info.calcRequiredSize();
-}
-
-fn calcRequiredSize(info: UnwindInfo) usize {
+pub fn calcSize(info: UnwindInfo) usize {
var total_size: usize = 0;
total_size += @sizeOf(macho.unwind_info_section_header);
total_size +=
@@ -549,59 +254,12 @@ fn calcRequiredSize(info: UnwindInfo) usize {
return total_size;
}
-pub fn write(info: *UnwindInfo, macho_file: *MachO) !void {
- const sect_id = macho_file.unwind_info_section_index orelse return;
- const sect = &macho_file.sections.items(.header)[sect_id];
- const seg_id = macho_file.sections.items(.segment_index)[sect_id];
- const seg = macho_file.segments.items[seg_id];
-
- const text_sect_id = macho_file.text_section_index.?;
- const text_sect = macho_file.sections.items(.header)[text_sect_id];
+pub fn write(info: UnwindInfo, macho_file: *MachO, buffer: []u8) !void {
+ const seg = macho_file.getTextSegment();
+ const header = macho_file.sections.items(.header)[macho_file.unwind_info_sect_index.?];
- var personalities: [max_personalities]u32 = undefined;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
-
- log.debug("Personalities:", .{});
- for (info.personalities[0..info.personalities_count], 0..) |reloc_target, i| {
- const addr = macho_file.getGotEntryAddress(reloc_target).?;
- personalities[i] = @as(u32, @intCast(addr - seg.vmaddr));
- log.debug(" {d}: 0x{x} ({s})", .{ i, personalities[i], macho_file.getSymbolName(reloc_target) });
- }
-
- for (info.records.items) |*rec| {
- // Finalize missing address values
- rec.rangeStart += text_sect.addr - seg.vmaddr;
- if (rec.personalityFunction > 0) {
- const index = math.cast(usize, rec.personalityFunction - 1) orelse return error.Overflow;
- rec.personalityFunction = personalities[index];
- }
-
- if (rec.compactUnwindEncoding > 0 and !UnwindEncoding.isDwarf(rec.compactUnwindEncoding, cpu_arch)) {
- const lsda_target = @as(SymbolWithLoc, @bitCast(rec.lsda));
- if (lsda_target.getFile()) |_| {
- const sym = macho_file.getSymbol(lsda_target);
- rec.lsda = sym.n_value - seg.vmaddr;
- }
- }
- }
-
- for (info.records.items, 0..) |record, i| {
- log.debug("Unwind record at offset 0x{x}", .{i * @sizeOf(macho.compact_unwind_entry)});
- log.debug(" start: 0x{x}", .{record.rangeStart});
- log.debug(" length: 0x{x}", .{record.rangeLength});
- log.debug(" compact encoding: 0x{x:0>8}", .{record.compactUnwindEncoding});
- log.debug(" personality: 0x{x}", .{record.personalityFunction});
- log.debug(" LSDA: 0x{x}", .{record.lsda});
- }
-
- var buffer = std.ArrayList(u8).init(info.gpa);
- defer buffer.deinit();
-
- const size = info.calcRequiredSize();
- try buffer.ensureTotalCapacityPrecise(size);
-
- var cwriter = std.io.countingWriter(buffer.writer());
+ var stream = std.io.fixedBufferStream(buffer);
+ var cwriter = std.io.countingWriter(stream.writer());
const writer = cwriter.writer();
const common_encodings_offset: u32 = @sizeOf(macho.unwind_info_section_header);
@@ -621,211 +279,404 @@ pub fn write(info: *UnwindInfo, macho_file: *MachO) !void {
});
try writer.writeAll(mem.sliceAsBytes(info.common_encodings[0..info.common_encodings_count]));
- try writer.writeAll(mem.sliceAsBytes(personalities[0..info.personalities_count]));
- const pages_base_offset = @as(u32, @intCast(size - (info.pages.items.len * second_level_page_bytes)));
+ for (info.personalities[0..info.personalities_count]) |sym_index| {
+ const sym = macho_file.getSymbol(sym_index);
+ try writer.writeInt(u32, @intCast(sym.getGotAddress(macho_file) - seg.vmaddr), .little);
+ }
+
+ const pages_base_offset = @as(u32, @intCast(header.size - (info.pages.items.len * second_level_page_bytes)));
const lsda_base_offset = @as(u32, @intCast(pages_base_offset -
(info.lsdas.items.len * @sizeOf(macho.unwind_info_section_header_lsda_index_entry))));
for (info.pages.items, 0..) |page, i| {
assert(page.count > 0);
- const first_entry = info.records.items[page.start];
+ const rec = macho_file.getUnwindRecord(info.records.items[page.start]);
try writer.writeStruct(macho.unwind_info_section_header_index_entry{
- .functionOffset = @as(u32, @intCast(first_entry.rangeStart)),
+ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)),
.secondLevelPagesSectionOffset = @as(u32, @intCast(pages_base_offset + i * second_level_page_bytes)),
.lsdaIndexArraySectionOffset = lsda_base_offset +
- info.lsdas_lookup.get(page.start).? * @sizeOf(macho.unwind_info_section_header_lsda_index_entry),
+ info.lsdas_lookup.items[page.start] * @sizeOf(macho.unwind_info_section_header_lsda_index_entry),
});
}
- // Relocate end boundary address
- const end_boundary = @as(u32, @intCast(info.end_boundary + text_sect.addr - seg.vmaddr));
+ const last_rec = macho_file.getUnwindRecord(info.records.items[info.records.items.len - 1]);
+ const sentinel_address = @as(u32, @intCast(last_rec.getAtomAddress(macho_file) + last_rec.length - seg.vmaddr));
try writer.writeStruct(macho.unwind_info_section_header_index_entry{
- .functionOffset = end_boundary,
+ .functionOffset = sentinel_address,
.secondLevelPagesSectionOffset = 0,
.lsdaIndexArraySectionOffset = lsda_base_offset +
@as(u32, @intCast(info.lsdas.items.len)) * @sizeOf(macho.unwind_info_section_header_lsda_index_entry),
});
- for (info.lsdas.items) |record_id| {
- const record = info.records.items[record_id];
+ for (info.lsdas.items) |index| {
+ const rec = macho_file.getUnwindRecord(info.records.items[index]);
try writer.writeStruct(macho.unwind_info_section_header_lsda_index_entry{
- .functionOffset = @as(u32, @intCast(record.rangeStart)),
- .lsdaOffset = @as(u32, @intCast(record.lsda)),
+ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)),
+ .lsdaOffset = @as(u32, @intCast(rec.getLsdaAddress(macho_file) - seg.vmaddr)),
});
}
for (info.pages.items) |page| {
const start = cwriter.bytes_written;
- try page.write(info, writer);
+ try page.write(info, macho_file, writer);
const nwritten = cwriter.bytes_written - start;
if (nwritten < second_level_page_bytes) {
- const offset = math.cast(usize, second_level_page_bytes - nwritten) orelse return error.Overflow;
- try writer.writeByteNTimes(0, offset);
+ try writer.writeByteNTimes(0, second_level_page_bytes - nwritten);
}
}
- const padding = buffer.items.len - cwriter.bytes_written;
+ const padding = buffer.len - cwriter.bytes_written;
if (padding > 0) {
- const offset = math.cast(usize, cwriter.bytes_written) orelse return error.Overflow;
- @memset(buffer.items[offset..], 0);
- }
-
- try macho_file.base.file.?.pwriteAll(buffer.items, sect.offset);
-}
-
-fn getRelocs(macho_file: *MachO, object_id: u32, record_id: usize) []const macho.relocation_info {
- const object = &macho_file.objects.items[object_id];
- assert(object.hasUnwindRecords());
- const rel_pos = object.unwind_relocs_lookup[record_id].reloc;
- const relocs = object.getRelocs(object.unwind_info_sect_id.?);
- return relocs[rel_pos.start..][0..rel_pos.len];
-}
-
-fn isPersonalityFunction(record_id: usize, rel: macho.relocation_info) bool {
- const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry)));
- const rel_offset = rel.r_address - base_offset;
- return rel_offset == 16;
-}
-
-pub fn getPersonalityFunctionReloc(
- macho_file: *MachO,
- object_id: u32,
- record_id: usize,
-) ?macho.relocation_info {
- const relocs = getRelocs(macho_file, object_id, record_id);
- for (relocs) |rel| {
- if (isPersonalityFunction(record_id, rel)) return rel;
+ @memset(buffer[cwriter.bytes_written..], 0);
}
- return null;
}
-fn getPersonalityFunction(info: UnwindInfo, global_index: SymbolWithLoc) ?u2 {
+fn getOrPutPersonalityFunction(info: *UnwindInfo, sym_index: Symbol.Index) error{TooManyPersonalities}!u2 {
comptime var index: u2 = 0;
inline while (index < max_personalities) : (index += 1) {
- if (index >= info.personalities_count) return null;
- if (info.personalities[index].eql(global_index)) {
+ if (info.personalities[index] == sym_index) {
+ return index;
+ } else if (index == info.personalities_count) {
+ info.personalities[index] = sym_index;
+ info.personalities_count += 1;
return index;
}
}
- return null;
-}
-
-fn isLsda(record_id: usize, rel: macho.relocation_info) bool {
- const base_offset = @as(i32, @intCast(record_id * @sizeOf(macho.compact_unwind_entry)));
- const rel_offset = rel.r_address - base_offset;
- return rel_offset == 24;
-}
-
-pub fn getLsdaReloc(macho_file: *MachO, object_id: u32, record_id: usize) ?macho.relocation_info {
- const relocs = getRelocs(macho_file, object_id, record_id);
- for (relocs) |rel| {
- if (isLsda(record_id, rel)) return rel;
- }
- return null;
+ return error.TooManyPersonalities;
}
-pub fn isNull(rec: macho.compact_unwind_entry) bool {
- return rec.rangeStart == 0 and
- rec.rangeLength == 0 and
- rec.compactUnwindEncoding == 0 and
- rec.lsda == 0 and
- rec.personalityFunction == 0;
-}
-
-inline fn nullRecord() macho.compact_unwind_entry {
- return .{
- .rangeStart = 0,
- .rangeLength = 0,
- .compactUnwindEncoding = 0,
- .personalityFunction = 0,
- .lsda = 0,
- };
-}
-
-fn appendCommonEncoding(info: *UnwindInfo, enc: macho.compact_unwind_encoding_t) void {
+fn appendCommonEncoding(info: *UnwindInfo, enc: Encoding) void {
assert(info.common_encodings_count <= max_common_encodings);
info.common_encodings[info.common_encodings_count] = enc;
info.common_encodings_count += 1;
}
-fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7 {
+fn getCommonEncoding(info: UnwindInfo, enc: Encoding) ?u7 {
comptime var index: u7 = 0;
inline while (index < max_common_encodings) : (index += 1) {
if (index >= info.common_encodings_count) return null;
- if (info.common_encodings[index] == enc) {
+ if (info.common_encodings[index].eql(enc)) {
return index;
}
}
return null;
}
-pub const UnwindEncoding = struct {
- pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 {
+pub const Encoding = extern struct {
+ enc: macho.compact_unwind_encoding_t,
+
+ pub fn getMode(enc: Encoding) u4 {
comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK);
- return @as(u4, @truncate((enc & macho.UNWIND_ARM64_MODE_MASK) >> 24));
+ return @as(u4, @truncate((enc.enc & macho.UNWIND_ARM64_MODE_MASK) >> 24));
}
- pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool {
- const mode = getMode(enc);
- return switch (cpu_arch) {
+ pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool {
+ const mode = enc.getMode();
+ return switch (macho_file.options.cpu_arch.?) {
.aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF,
.x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF,
else => unreachable,
};
}
- pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void {
- enc.* |= @as(u32, @intCast(@intFromEnum(mode))) << 24;
+ pub fn setMode(enc: *Encoding, mode: anytype) void {
+ enc.enc |= @as(u32, @intCast(@intFromEnum(mode))) << 24;
}
- pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool {
- const has_lsda = @as(u1, @truncate((enc & macho.UNWIND_HAS_LSDA) >> 31));
+ pub fn hasLsda(enc: Encoding) bool {
+ const has_lsda = @as(u1, @truncate((enc.enc & macho.UNWIND_HAS_LSDA) >> 31));
return has_lsda == 1;
}
- pub fn setHasLsda(enc: *macho.compact_unwind_encoding_t, has_lsda: bool) void {
+ pub fn setHasLsda(enc: *Encoding, has_lsda: bool) void {
const mask = @as(u32, @intCast(@intFromBool(has_lsda))) << 31;
- enc.* |= mask;
+ enc.enc |= mask;
}
- pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 {
- const index = @as(u2, @truncate((enc & macho.UNWIND_PERSONALITY_MASK) >> 28));
+ pub fn getPersonalityIndex(enc: Encoding) u2 {
+ const index = @as(u2, @truncate((enc.enc & macho.UNWIND_PERSONALITY_MASK) >> 28));
return index;
}
- pub fn setPersonalityIndex(enc: *macho.compact_unwind_encoding_t, index: u2) void {
+ pub fn setPersonalityIndex(enc: *Encoding, index: u2) void {
const mask = @as(u32, @intCast(index)) << 28;
- enc.* |= mask;
+ enc.enc |= mask;
}
- pub fn getDwarfSectionOffset(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) u24 {
- assert(isDwarf(enc, cpu_arch));
- const offset = @as(u24, @truncate(enc));
+ pub fn getDwarfSectionOffset(enc: Encoding) u24 {
+ const offset = @as(u24, @truncate(enc.enc));
return offset;
}
- pub fn setDwarfSectionOffset(enc: *macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch, offset: u24) void {
- assert(isDwarf(enc.*, cpu_arch));
- enc.* |= offset;
+ pub fn setDwarfSectionOffset(enc: *Encoding, offset: u24) void {
+ enc.enc |= offset;
+ }
+
+ pub fn eql(enc: Encoding, other: Encoding) bool {
+ return enc.enc == other.enc;
+ }
+
+ pub fn format(
+ enc: Encoding,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ try writer.print("0x{x:0>8}", .{enc.enc});
}
};
-const UnwindInfo = @This();
+pub const Record = struct {
+ length: u32 = 0,
+ enc: Encoding = .{ .enc = 0 },
+ atom: Atom.Index = 0,
+ atom_offset: u32 = 0,
+ lsda: Atom.Index = 0,
+ lsda_offset: u32 = 0,
+ personality: ?Symbol.Index = null, // TODO make this zero-is-null
+ fde: Fde.Index = 0, // TODO actually make FDE at 0 an invalid FDE
+ file: File.Index = 0,
+ alive: bool = true,
+
+ pub fn getObject(rec: Record, macho_file: *MachO) *Object {
+ return macho_file.getFile(rec.file).?.object;
+ }
+
+ pub fn getAtom(rec: Record, macho_file: *MachO) *Atom {
+ return macho_file.getAtom(rec.atom).?;
+ }
+
+ pub fn getLsdaAtom(rec: Record, macho_file: *MachO) ?*Atom {
+ return macho_file.getAtom(rec.lsda);
+ }
+
+ pub fn getPersonality(rec: Record, macho_file: *MachO) ?*Symbol {
+ const personality = rec.personality orelse return null;
+ return macho_file.getSymbol(personality);
+ }
+
+ pub fn getFde(rec: Record, macho_file: *MachO) ?Fde {
+ if (!rec.enc.isDwarf(macho_file)) return null;
+ return rec.getObject(macho_file).fdes.items[rec.fde];
+ }
+
+ pub fn getFdePtr(rec: Record, macho_file: *MachO) ?*Fde {
+ if (!rec.enc.isDwarf(macho_file)) return null;
+ return &rec.getObject(macho_file).fdes.items[rec.fde];
+ }
+
+ pub fn getAtomAddress(rec: Record, macho_file: *MachO) u64 {
+ const atom = rec.getAtom(macho_file);
+ return atom.value + rec.atom_offset;
+ }
+
+ pub fn getLsdaAddress(rec: Record, macho_file: *MachO) u64 {
+ const lsda = rec.getLsdaAtom(macho_file) orelse return 0;
+ return lsda.value + rec.lsda_offset;
+ }
+
+ pub fn format(
+ rec: Record,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = rec;
+ _ = unused_fmt_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format UnwindInfo.Records directly");
+ }
+
+ pub fn fmt(rec: Record, macho_file: *MachO) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .rec = rec,
+ .macho_file = macho_file,
+ } };
+ }
+
+ const FormatContext = struct {
+ rec: Record,
+ macho_file: *MachO,
+ };
+
+ fn format2(
+ ctx: FormatContext,
+ comptime unused_fmt_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = unused_fmt_string;
+ _ = options;
+ const rec = ctx.rec;
+ const macho_file = ctx.macho_file;
+ try writer.print("{x} : len({x})", .{
+ rec.enc.enc, rec.length,
+ });
+ if (rec.enc.isDwarf(macho_file)) try writer.print(" : fde({d})", .{rec.fde});
+ try writer.print(" : {s}", .{rec.getAtom(macho_file).getName(macho_file)});
+ if (!rec.alive) try writer.writeAll(" : [*]");
+ }
+
+ pub const Index = u32;
+};
+
+const max_personalities = 3;
+const max_common_encodings = 127;
+const max_compact_encodings = 256;
+
+const second_level_page_bytes = 0x1000;
+const second_level_page_words = second_level_page_bytes / @sizeOf(u32);
+
+const max_regular_second_level_entries =
+ (second_level_page_bytes - @sizeOf(macho.unwind_info_regular_second_level_page_header)) /
+ @sizeOf(macho.unwind_info_regular_second_level_entry);
+
+const max_compressed_second_level_entries =
+ (second_level_page_bytes - @sizeOf(macho.unwind_info_compressed_second_level_page_header)) /
+ @sizeOf(u32);
+
+const compressed_entry_func_offset_mask = ~@as(u24, 0);
+
+const Page = struct {
+ kind: enum { regular, compressed },
+ start: u32,
+ count: u16,
+ page_encodings: [max_compact_encodings]Encoding = undefined,
+ page_encodings_count: u9 = 0,
+
+ fn appendPageEncoding(page: *Page, enc: Encoding) void {
+ assert(page.page_encodings_count <= max_compact_encodings);
+ page.page_encodings[page.page_encodings_count] = enc;
+ page.page_encodings_count += 1;
+ }
+
+ fn getPageEncoding(page: Page, enc: Encoding) ?u8 {
+ comptime var index: u9 = 0;
+ inline while (index < max_compact_encodings) : (index += 1) {
+ if (index >= page.page_encodings_count) return null;
+ if (page.page_encodings[index].eql(enc)) {
+ return @as(u8, @intCast(index));
+ }
+ }
+ return null;
+ }
+
+ fn format(
+ page: *const Page,
+ comptime unused_format_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) !void {
+ _ = page;
+ _ = unused_format_string;
+ _ = options;
+ _ = writer;
+ @compileError("do not format Page directly; use page.fmt()");
+ }
+
+ const FormatPageContext = struct {
+ page: Page,
+ info: UnwindInfo,
+ };
+
+ fn format2(
+ ctx: FormatPageContext,
+ comptime unused_format_string: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+ ) @TypeOf(writer).Error!void {
+ _ = options;
+ _ = unused_format_string;
+ try writer.writeAll("Page:\n");
+ try writer.print(" kind: {s}\n", .{@tagName(ctx.page.kind)});
+ try writer.print(" entries: {d} - {d}\n", .{
+ ctx.page.start,
+ ctx.page.start + ctx.page.count,
+ });
+ try writer.print(" encodings (count = {d})\n", .{ctx.page.page_encodings_count});
+ for (ctx.page.page_encodings[0..ctx.page.page_encodings_count], 0..) |enc, i| {
+ try writer.print(" {d}: {}\n", .{ ctx.info.common_encodings_count + i, enc });
+ }
+ }
+
+ fn fmt(page: Page, info: UnwindInfo) std.fmt.Formatter(format2) {
+ return .{ .data = .{
+ .page = page,
+ .info = info,
+ } };
+ }
+
+ fn write(page: Page, info: UnwindInfo, macho_file: *MachO, writer: anytype) !void {
+ const seg = macho_file.getTextSegment();
+
+ switch (page.kind) {
+ .regular => {
+ try writer.writeStruct(macho.unwind_info_regular_second_level_page_header{
+ .entryPageOffset = @sizeOf(macho.unwind_info_regular_second_level_page_header),
+ .entryCount = page.count,
+ });
+
+ for (info.records.items[page.start..][0..page.count]) |index| {
+ const rec = macho_file.getUnwindRecord(index);
+ try writer.writeStruct(macho.unwind_info_regular_second_level_entry{
+ .functionOffset = @as(u32, @intCast(rec.getAtomAddress(macho_file) - seg.vmaddr)),
+ .encoding = rec.enc.enc,
+ });
+ }
+ },
+ .compressed => {
+ const entry_offset = @sizeOf(macho.unwind_info_compressed_second_level_page_header) +
+ @as(u16, @intCast(page.page_encodings_count)) * @sizeOf(u32);
+ try writer.writeStruct(macho.unwind_info_compressed_second_level_page_header{
+ .entryPageOffset = entry_offset,
+ .entryCount = page.count,
+ .encodingsPageOffset = @sizeOf(macho.unwind_info_compressed_second_level_page_header),
+ .encodingsCount = page.page_encodings_count,
+ });
+
+ for (page.page_encodings[0..page.page_encodings_count]) |enc| {
+ try writer.writeInt(u32, enc.enc, .little);
+ }
+
+ assert(page.count > 0);
+ const first_rec = macho_file.getUnwindRecord(info.records.items[page.start]);
+ for (info.records.items[page.start..][0..page.count]) |index| {
+ const rec = macho_file.getUnwindRecord(index);
+ const enc_index = blk: {
+ if (info.getCommonEncoding(rec.enc)) |id| break :blk id;
+ const ncommon = info.common_encodings_count;
+ break :blk ncommon + page.getPageEncoding(rec.enc).?;
+ };
+ const compressed = macho.UnwindInfoCompressedEntry{
+ .funcOffset = @as(u24, @intCast(rec.getAtomAddress(macho_file) - first_rec.getAtomAddress(macho_file))),
+ .encodingIndex = @as(u8, @intCast(enc_index)),
+ };
+ try writer.writeStruct(compressed);
+ }
+ },
+ }
+ }
+};
const std = @import("std");
const assert = std.debug.assert;
const eh_frame = @import("eh_frame.zig");
const fs = std.fs;
const leb = std.leb;
-const log = std.log.scoped(.unwind_info);
+const log = std.log.scoped(.link);
const macho = std.macho;
const math = std.math;
const mem = std.mem;
-const trace = @import("../../tracy.zig").trace;
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
const Atom = @import("Atom.zig");
-const EhFrameRecord = eh_frame.EhFrameRecord;
+const Fde = eh_frame.Fde;
+const File = @import("file.zig").File;
const MachO = @import("../MachO.zig");
const Object = @import("Object.zig");
-const SymbolWithLoc = MachO.SymbolWithLoc;
+const Symbol = @import("Symbol.zig");
+const UnwindInfo = @This();
src/link/MachO/uuid.zig
@@ -4,22 +4,31 @@
/// and we will use it too as it seems accepted by Apple OSes.
/// TODO LLD also hashes the output filename to disambiguate between same builds with different
/// output files. Should we also do that?
-pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+pub fn calcUuid(
+ allocator: Allocator,
+ thread_pool: *ThreadPool,
+ file: fs.File,
+ file_size: u64,
+ out: *[Md5.digest_length]u8,
+) !void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
const chunk_size: usize = 1024 * 1024;
const num_chunks: usize = std.math.cast(usize, @divTrunc(file_size, chunk_size)) orelse return error.Overflow;
const actual_num_chunks = if (@rem(file_size, chunk_size) > 0) num_chunks + 1 else num_chunks;
- const hashes = try comp.gpa.alloc([Md5.digest_length]u8, actual_num_chunks);
- defer comp.gpa.free(hashes);
+ const hashes = try allocator.alloc([Md5.digest_length]u8, actual_num_chunks);
+ defer allocator.free(hashes);
- var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
+ var hasher = Hasher(Md5){ .allocator = allocator, .thread_pool = thread_pool };
try hasher.hash(file, hashes, .{
.chunk_size = chunk_size,
.max_file_size = file_size,
});
- const final_buffer = try comp.gpa.alloc(u8, actual_num_chunks * Md5.digest_length);
- defer comp.gpa.free(final_buffer);
+ const final_buffer = try allocator.alloc(u8, actual_num_chunks * Md5.digest_length);
+ defer allocator.free(final_buffer);
for (hashes, 0..) |hash, i| {
@memcpy(final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
@@ -35,11 +44,12 @@ inline fn conform(out: *[Md5.digest_length]u8) void {
out[8] = (out[8] & 0x3F) | 0x80;
}
-const std = @import("std");
const fs = std.fs;
const mem = std.mem;
+const std = @import("std");
+const trace = @import("../tracy.zig").trace;
const Allocator = mem.Allocator;
-const Compilation = @import("../../Compilation.zig");
const Md5 = std.crypto.hash.Md5;
const Hasher = @import("hasher.zig").ParallelHasher;
+const ThreadPool = std.Thread.Pool;
src/link/MachO/zld.zig
@@ -1,1230 +0,0 @@
-pub fn linkWithZld(
- macho_file: *MachO,
- arena: Allocator,
- prog_node: *std.Progress.Node,
-) link.File.FlushError!void {
- const tracy = trace(@src());
- defer tracy.end();
-
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = comp.root_mod.resolved_target.result;
- const emit = macho_file.base.emit;
-
- const directory = emit.directory; // Just an alias to make it shorter to type.
- const full_out_path = try directory.join(arena, &[_][]const u8{emit.sub_path});
- const opt_zcu = comp.module;
-
- // If there is no Zig code to compile, then we should skip flushing the output file because it
- // will not be part of the linker line anyway.
- const module_obj_path: ?[]const u8 = if (opt_zcu != null) blk: {
- try macho_file.flushModule(arena, prog_node);
-
- if (fs.path.dirname(full_out_path)) |dirname| {
- break :blk try fs.path.join(arena, &.{ dirname, macho_file.base.zcu_object_sub_path.? });
- } else {
- break :blk macho_file.base.zcu_object_sub_path.?;
- }
- } else null;
-
- var sub_prog_node = prog_node.start("MachO Flush", 0);
- sub_prog_node.activate();
- sub_prog_node.context.refresh();
- defer sub_prog_node.end();
-
- const output_mode = comp.config.output_mode;
- const link_mode = comp.config.link_mode;
- const cpu_arch = target.cpu.arch;
- const is_lib = output_mode == .Lib;
- const is_dyn_lib = link_mode == .Dynamic and is_lib;
- const is_exe_or_dyn_lib = is_dyn_lib or output_mode == .Exe;
- const stack_size = macho_file.base.stack_size;
-
- const id_symlink_basename = "zld.id";
-
- var man: Cache.Manifest = undefined;
- defer if (!macho_file.base.disable_lld_caching) man.deinit();
-
- var digest: [Cache.hex_digest_len]u8 = undefined;
-
- const objects = comp.objects;
-
- if (!macho_file.base.disable_lld_caching) {
- man = comp.cache_parent.obtain();
-
- // We are about to obtain this lock, so here we give other processes a chance first.
- macho_file.base.releaseLock();
-
- comptime assert(Compilation.link_hash_implementation_version == 11);
-
- for (objects) |obj| {
- _ = try man.addFile(obj.path, null);
- man.hash.add(obj.must_link);
- }
- for (comp.c_object_table.keys()) |key| {
- _ = try man.addFile(key.status.success.object_path, null);
- }
- try man.addOptionalFile(module_obj_path);
- // We can skip hashing libc and libc++ components that we are in charge of building from Zig
- // installation sources because they are always a product of the compiler version + target information.
- man.hash.add(stack_size);
- man.hash.add(macho_file.pagezero_vmsize);
- man.hash.add(macho_file.headerpad_size);
- man.hash.add(macho_file.headerpad_max_install_names);
- man.hash.add(macho_file.base.gc_sections);
- man.hash.add(macho_file.dead_strip_dylibs);
- man.hash.add(comp.root_mod.strip);
- try MachO.hashAddFrameworks(&man, macho_file.frameworks);
- man.hash.addListOfBytes(macho_file.base.rpath_list);
- if (is_dyn_lib) {
- man.hash.addOptionalBytes(macho_file.install_name);
- man.hash.addOptional(comp.version);
- }
- try link.hashAddSystemLibs(&man, comp.system_libs);
- man.hash.addOptionalBytes(comp.sysroot);
- man.hash.addListOfBytes(comp.force_undefined_symbols.keys());
- try man.addOptionalFile(macho_file.entitlements);
-
- // We don't actually care whether it's a cache hit or miss; we just
- // need the digest and the lock.
- _ = try man.hit();
- digest = man.final();
-
- var prev_digest_buf: [digest.len]u8 = undefined;
- const prev_digest: []u8 = Cache.readSmallFile(
- directory.handle,
- id_symlink_basename,
- &prev_digest_buf,
- ) catch |err| blk: {
- log.debug("MachO Zld new_digest={s} error: {s}", .{
- std.fmt.fmtSliceHexLower(&digest),
- @errorName(err),
- });
- // Handle this as a cache miss.
- break :blk prev_digest_buf[0..0];
- };
- if (mem.eql(u8, prev_digest, &digest)) {
- // Hot diggity dog! The output binary is already there.
- log.debug("MachO Zld digest={s} match - skipping invocation", .{
- std.fmt.fmtSliceHexLower(&digest),
- });
- macho_file.base.lock = man.toOwnedLock();
- return;
- }
- log.debug("MachO Zld prev_digest={s} new_digest={s}", .{
- std.fmt.fmtSliceHexLower(prev_digest),
- std.fmt.fmtSliceHexLower(&digest),
- });
-
- // We are about to change the output file to be different, so we invalidate the build hash now.
- directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) {
- error.FileNotFound => {},
- else => |e| return e,
- };
- }
-
- if (output_mode == .Obj) {
- // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy
- // here. TODO: think carefully about how we can avoid this redundant operation when doing
- // build-obj. See also the corresponding TODO in linkAsArchive.
- const the_object_path = blk: {
- if (objects.len != 0) {
- break :blk objects[0].path;
- }
-
- if (comp.c_object_table.count() != 0)
- break :blk comp.c_object_table.keys()[0].status.success.object_path;
-
- if (module_obj_path) |p|
- break :blk p;
-
- // TODO I think this is unreachable. Audit this situation when solving the above TODO
- // regarding eliding redundant object -> object transformations.
- return error.NoObjectsToLink;
- };
- // This can happen when using --enable-cache and using the stage1 backend. In this case
- // we can skip the file copy.
- if (!mem.eql(u8, the_object_path, full_out_path)) {
- try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{});
- }
- } else {
- const sub_path = emit.sub_path;
-
- const old_file = macho_file.base.file; // TODO is this needed at all?
- defer macho_file.base.file = old_file;
-
- const file = try directory.handle.createFile(sub_path, .{
- .truncate = true,
- .read = true,
- .mode = link.File.determineMode(false, output_mode, link_mode),
- });
- defer file.close();
- macho_file.base.file = file;
-
- // Index 0 is always a null symbol.
- try macho_file.locals.append(gpa, .{
- .n_strx = 0,
- .n_type = 0,
- .n_sect = 0,
- .n_desc = 0,
- .n_value = 0,
- });
- try macho_file.strtab.buffer.append(gpa, 0);
-
- // Positional arguments to the linker such as object files and static archives.
- var positionals = std.ArrayList(Compilation.LinkObject).init(arena);
- try positionals.ensureUnusedCapacity(objects.len);
- positionals.appendSliceAssumeCapacity(objects);
-
- for (comp.c_object_table.keys()) |key| {
- try positionals.append(.{ .path = key.status.success.object_path });
- }
-
- if (module_obj_path) |p| {
- try positionals.append(.{ .path = p });
- }
-
- if (comp.compiler_rt_lib) |lib| try positionals.append(.{ .path = lib.full_object_path });
- if (comp.compiler_rt_obj) |obj| try positionals.append(.{ .path = obj.full_object_path });
-
- // libc++ dep
- if (comp.config.link_libcpp) {
- try positionals.ensureUnusedCapacity(2);
- positionals.appendAssumeCapacity(.{ .path = comp.libcxxabi_static_lib.?.full_object_path });
- positionals.appendAssumeCapacity(.{ .path = comp.libcxx_static_lib.?.full_object_path });
- }
-
- var libs = std.StringArrayHashMap(link.SystemLib).init(arena);
-
- {
- const vals = comp.system_libs.values();
- try libs.ensureUnusedCapacity(vals.len);
- for (vals) |v| libs.putAssumeCapacity(v.path.?, v);
- }
-
- {
- try libs.ensureUnusedCapacity(macho_file.frameworks.len);
- for (macho_file.frameworks) |v| libs.putAssumeCapacity(v.path, .{
- .needed = v.needed,
- .weak = v.weak,
- .path = v.path,
- });
- }
-
- try macho_file.resolveLibSystem(arena, comp, &libs);
-
- if (comp.verbose_link) {
- var argv = std.ArrayList([]const u8).init(arena);
-
- try argv.append("zig");
- try argv.append("ld");
-
- if (is_exe_or_dyn_lib) {
- try argv.append("-dynamic");
- }
-
- if (is_dyn_lib) {
- try argv.append("-dylib");
-
- if (macho_file.install_name) |install_name| {
- try argv.append("-install_name");
- try argv.append(install_name);
- }
- }
-
- {
- const platform = Platform.fromTarget(target);
- try argv.append("-platform_version");
- try argv.append(@tagName(platform.os_tag));
- try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version}));
-
- const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file);
- if (sdk_version) |ver| {
- try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor }));
- } else {
- try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version}));
- }
- }
-
- if (comp.sysroot) |syslibroot| {
- try argv.append("-syslibroot");
- try argv.append(syslibroot);
- }
-
- for (macho_file.base.rpath_list) |rpath| {
- try argv.append("-rpath");
- try argv.append(rpath);
- }
-
- try argv.appendSlice(&.{
- "-pagezero_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.pagezero_vmsize}),
- "-headerpad_size", try std.fmt.allocPrint(arena, "0x{x}", .{macho_file.headerpad_size}),
- });
-
- if (macho_file.headerpad_max_install_names) {
- try argv.append("-headerpad_max_install_names");
- }
-
- if (macho_file.base.gc_sections) {
- try argv.append("-dead_strip");
- }
-
- if (macho_file.dead_strip_dylibs) {
- try argv.append("-dead_strip_dylibs");
- }
-
- if (macho_file.entry_name) |entry_name| {
- try argv.appendSlice(&.{ "-e", entry_name });
- }
-
- for (objects) |obj| {
- if (obj.must_link) {
- try argv.append("-force_load");
- }
- try argv.append(obj.path);
- }
-
- for (comp.c_object_table.keys()) |key| {
- try argv.append(key.status.success.object_path);
- }
-
- if (module_obj_path) |p| {
- try argv.append(p);
- }
-
- if (comp.compiler_rt_lib) |lib| try argv.append(lib.full_object_path);
- if (comp.compiler_rt_obj) |obj| try argv.append(obj.full_object_path);
-
- if (comp.config.link_libcpp) {
- try argv.append(comp.libcxxabi_static_lib.?.full_object_path);
- try argv.append(comp.libcxx_static_lib.?.full_object_path);
- }
-
- try argv.append("-o");
- try argv.append(full_out_path);
-
- try argv.append("-lSystem");
-
- for (comp.system_libs.keys()) |l_name| {
- const info = comp.system_libs.get(l_name).?;
- const arg = if (info.needed)
- try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name})
- else if (info.weak)
- try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name})
- else
- try std.fmt.allocPrint(arena, "-l{s}", .{l_name});
- try argv.append(arg);
- }
-
- for (macho_file.frameworks) |framework| {
- const name = std.fs.path.stem(framework.path);
- const arg = if (framework.needed)
- try std.fmt.allocPrint(arena, "-needed_framework {s}", .{name})
- else if (framework.weak)
- try std.fmt.allocPrint(arena, "-weak_framework {s}", .{name})
- else
- try std.fmt.allocPrint(arena, "-framework {s}", .{name});
- try argv.append(arg);
- }
-
- if (is_dyn_lib and macho_file.base.allow_shlib_undefined) {
- try argv.append("-undefined");
- try argv.append("dynamic_lookup");
- }
-
- Compilation.dump_argv(argv.items);
- }
-
- var dependent_libs = std.fifo.LinearFifo(MachO.DylibReExportInfo, .Dynamic).init(arena);
-
- for (positionals.items) |obj| {
- const in_file = try std.fs.cwd().openFile(obj.path, .{});
- defer in_file.close();
-
- var parse_ctx = MachO.ParseErrorCtx.init(gpa);
- defer parse_ctx.deinit();
-
- macho_file.parsePositional(
- in_file,
- obj.path,
- obj.must_link,
- &dependent_libs,
- &parse_ctx,
- ) catch |err| try macho_file.handleAndReportParseError(obj.path, err, &parse_ctx);
- }
-
- for (libs.keys(), libs.values()) |path, lib| {
- const in_file = try std.fs.cwd().openFile(path, .{});
- defer in_file.close();
-
- var parse_ctx = MachO.ParseErrorCtx.init(gpa);
- defer parse_ctx.deinit();
-
- macho_file.parseLibrary(
- in_file,
- path,
- lib,
- false,
- false,
- null,
- &dependent_libs,
- &parse_ctx,
- ) catch |err| try macho_file.handleAndReportParseError(path, err, &parse_ctx);
- }
-
- try macho_file.parseDependentLibs(&dependent_libs);
-
- try macho_file.resolveSymbols();
- if (macho_file.unresolved.count() > 0) {
- try macho_file.reportUndefined();
- return error.FlushFailure;
- }
-
- for (macho_file.objects.items, 0..) |*object, object_id| {
- object.splitIntoAtoms(macho_file, @as(u32, @intCast(object_id))) catch |err| switch (err) {
- error.MissingEhFrameSection => try macho_file.reportParseError(
- object.name,
- "missing section: '__TEXT,__eh_frame' is required but could not be found",
- .{},
- ),
- error.BadDwarfCfi => try macho_file.reportParseError(
- object.name,
- "invalid DWARF: failed to parse '__TEXT,__eh_frame' section",
- .{},
- ),
- else => |e| return e,
- };
- }
-
- if (macho_file.base.gc_sections) {
- try dead_strip.gcAtoms(macho_file);
- }
-
- try macho_file.createDyldPrivateAtom();
- try macho_file.createTentativeDefAtoms();
-
- if (comp.config.output_mode == .Exe) {
- const global = macho_file.getEntryPoint().?;
- if (macho_file.getSymbol(global).undf()) {
- // We do one additional check here in case the entry point was found in one of the dylibs.
- // (I actually have no idea what this would imply but it is a possible outcome and so we
- // support it.)
- try macho_file.addStubEntry(global);
- }
- }
-
- for (macho_file.objects.items) |object| {
- for (object.atoms.items) |atom_index| {
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const header = macho_file.sections.items(.header)[sym.n_sect - 1];
- if (header.isZerofill()) continue;
-
- const relocs = Atom.getAtomRelocs(macho_file, atom_index);
- try Atom.scanAtomRelocs(macho_file, atom_index, relocs);
- }
- }
-
- try eh_frame.scanRelocs(macho_file);
- try UnwindInfo.scanRelocs(macho_file);
-
- if (macho_file.dyld_stub_binder_index) |index|
- try macho_file.addGotEntry(macho_file.globals.items[index]);
-
- try calcSectionSizes(macho_file);
-
- var unwind_info = UnwindInfo{ .gpa = gpa };
- defer unwind_info.deinit();
- try unwind_info.collect(macho_file);
-
- try eh_frame.calcSectionSize(macho_file, &unwind_info);
- unwind_info.calcSectionSize(macho_file);
-
- try pruneAndSortSections(macho_file);
- try createSegments(macho_file);
- try allocateSegments(macho_file);
-
- try macho_file.allocateSpecialSymbols();
-
- if (build_options.enable_logging) {
- macho_file.logSymtab();
- macho_file.logSegments();
- macho_file.logSections();
- macho_file.logAtoms();
- }
-
- try writeAtoms(macho_file);
- if (target.cpu.arch == .aarch64) try writeThunks(macho_file);
- try writeDyldPrivateAtom(macho_file);
-
- if (macho_file.stubs_section_index) |_| {
- try writeStubs(macho_file);
- try writeStubHelpers(macho_file);
- try writeLaSymbolPtrs(macho_file);
- }
- if (macho_file.got_section_index) |sect_id|
- try writePointerEntries(macho_file, sect_id, &macho_file.got_table);
- if (macho_file.tlv_ptr_section_index) |sect_id|
- try writePointerEntries(macho_file, sect_id, &macho_file.tlv_ptr_table);
-
- try eh_frame.write(macho_file, &unwind_info);
- try unwind_info.write(macho_file);
- try macho_file.writeLinkeditSegmentData();
-
- // If the last section of __DATA segment is zerofill section, we need to ensure
- // that the free space between the end of the last non-zerofill section of __DATA
- // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will
- // copy-paste this space into memory for quicker zerofill operation.
- if (macho_file.data_segment_cmd_index) |data_seg_id| blk: {
- var physical_zerofill_start: ?u64 = null;
- const section_indexes = macho_file.getSectionIndexes(data_seg_id);
- for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| {
- if (header.isZerofill() and header.size > 0) break;
- physical_zerofill_start = header.offset + header.size;
- } else break :blk;
- const start = physical_zerofill_start orelse break :blk;
- const linkedit = macho_file.getLinkeditSegmentPtr();
- const size = math.cast(usize, linkedit.fileoff - start) orelse return error.Overflow;
- if (size > 0) {
- log.debug("zeroing out zerofill area of length {x} at {x}", .{ size, start });
- const padding = try gpa.alloc(u8, size);
- defer gpa.free(padding);
- @memset(padding, 0);
- try macho_file.base.file.?.pwriteAll(padding, start);
- }
- }
-
- // Write code signature padding if required
- var codesig: ?CodeSignature = if (macho_file.requiresCodeSignature()) blk: {
- // Preallocate space for the code signature.
- // We need to do this at this stage so that we have the load commands with proper values
- // written out to the file.
- // The most important here is to have the correct vm and filesize of the __LINKEDIT segment
- // where the code signature goes into.
- var codesig = CodeSignature.init(MachO.getPageSize(cpu_arch));
- codesig.code_directory.ident = fs.path.basename(full_out_path);
- if (macho_file.entitlements) |path| {
- try codesig.addEntitlements(gpa, path);
- }
- try macho_file.writeCodeSignaturePadding(&codesig);
- break :blk codesig;
- } else null;
- defer if (codesig) |*csig| csig.deinit(gpa);
-
- // Write load commands
- var lc_buffer = std.ArrayList(u8).init(arena);
- const lc_writer = lc_buffer.writer();
-
- try macho_file.writeSegmentHeaders(lc_writer);
- try lc_writer.writeStruct(macho_file.dyld_info_cmd);
- try lc_writer.writeStruct(macho_file.function_starts_cmd);
- try lc_writer.writeStruct(macho_file.data_in_code_cmd);
- try lc_writer.writeStruct(macho_file.symtab_cmd);
- try lc_writer.writeStruct(macho_file.dysymtab_cmd);
- try load_commands.writeDylinkerLC(lc_writer);
-
- switch (output_mode) {
- .Exe => blk: {
- const seg_id = macho_file.header_segment_cmd_index.?;
- const seg = macho_file.segments.items[seg_id];
- const global = macho_file.getEntryPoint() orelse break :blk;
- const sym = macho_file.getSymbol(global);
-
- const addr: u64 = if (sym.undf())
- // In this case, the symbol has been resolved in one of dylibs and so we point
- // to the stub as its vmaddr value.
- macho_file.getStubsEntryAddress(global).?
- else
- sym.n_value;
-
- try lc_writer.writeStruct(macho.entry_point_command{
- .entryoff = @as(u32, @intCast(addr - seg.vmaddr)),
- .stacksize = macho_file.base.stack_size,
- });
- },
- .Lib => if (link_mode == .Dynamic) {
- try load_commands.writeDylibIdLC(macho_file, lc_writer);
- },
- else => {},
- }
-
- try load_commands.writeRpathLCs(macho_file, lc_writer);
- try lc_writer.writeStruct(macho.source_version_command{
- .version = 0,
- });
- {
- const platform = Platform.fromTarget(target);
- const sdk_version: ?std.SemanticVersion = load_commands.inferSdkVersion(macho_file);
- if (platform.isBuildVersionCompatible()) {
- try load_commands.writeBuildVersionLC(platform, sdk_version, lc_writer);
- } else {
- try load_commands.writeVersionMinLC(platform, sdk_version, lc_writer);
- }
- }
-
- const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @as(u32, @intCast(lc_buffer.items.len));
- try lc_writer.writeStruct(macho_file.uuid_cmd);
-
- try load_commands.writeLoadDylibLCs(
- macho_file.dylibs.items,
- macho_file.referenced_dylibs.keys(),
- lc_writer,
- );
-
- if (codesig != null) {
- try lc_writer.writeStruct(macho_file.codesig_cmd);
- }
-
- const ncmds = load_commands.calcNumOfLCs(lc_buffer.items);
- try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64));
- try macho_file.writeHeader(ncmds, @as(u32, @intCast(lc_buffer.items.len)));
- try macho_file.writeUuid(comp, uuid_cmd_offset, codesig != null);
-
- if (codesig) |*csig| {
- try macho_file.writeCodeSignature(comp, csig); // code signing always comes last
- try MachO.invalidateKernelCache(directory.handle, macho_file.base.emit.sub_path);
- }
- }
-
- if (!macho_file.base.disable_lld_caching) {
- // Update the file with the digest. If it fails we can continue; it only
- // means that the next invocation will have an unnecessary cache miss.
- Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| {
- log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)});
- };
- // Again failure here only means an unnecessary cache miss.
- if (man.have_exclusive_lock) {
- man.writeManifest() catch |err| {
- log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)});
- };
- }
- // We hang on to this lock so that the output file path can be used without
- // other processes clobbering it.
- macho_file.base.lock = man.toOwnedLock();
- }
-}
-
-fn createSegments(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const page_size = MachO.getPageSize(target.cpu.arch);
- const aligned_pagezero_vmsize = mem.alignBackward(u64, macho_file.pagezero_vmsize, page_size);
- if (macho_file.base.comp.config.output_mode != .Lib and aligned_pagezero_vmsize > 0) {
- if (aligned_pagezero_vmsize != macho_file.pagezero_vmsize) {
- log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{macho_file.pagezero_vmsize});
- log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize});
- }
- macho_file.pagezero_segment_cmd_index = @intCast(macho_file.segments.items.len);
- try macho_file.segments.append(gpa, .{
- .cmdsize = @sizeOf(macho.segment_command_64),
- .segname = MachO.makeStaticString("__PAGEZERO"),
- .vmsize = aligned_pagezero_vmsize,
- });
- }
-
- // __TEXT segment is non-optional
- {
- const protection = MachO.getSegmentMemoryProtection("__TEXT");
- macho_file.text_segment_cmd_index = @intCast(macho_file.segments.items.len);
- macho_file.header_segment_cmd_index = macho_file.text_segment_cmd_index.?;
- try macho_file.segments.append(gpa, .{
- .cmdsize = @sizeOf(macho.segment_command_64),
- .segname = MachO.makeStaticString("__TEXT"),
- .maxprot = protection,
- .initprot = protection,
- });
- }
-
- for (macho_file.sections.items(.header), 0..) |header, sect_id| {
- if (header.size == 0) continue; // empty section
-
- const segname = header.segName();
- const segment_id = macho_file.getSegmentByName(segname) orelse blk: {
- log.debug("creating segment '{s}'", .{segname});
- const segment_id = @as(u8, @intCast(macho_file.segments.items.len));
- const protection = MachO.getSegmentMemoryProtection(segname);
- try macho_file.segments.append(gpa, .{
- .cmdsize = @sizeOf(macho.segment_command_64),
- .segname = MachO.makeStaticString(segname),
- .maxprot = protection,
- .initprot = protection,
- });
- break :blk segment_id;
- };
- const segment = &macho_file.segments.items[segment_id];
- segment.cmdsize += @sizeOf(macho.section_64);
- segment.nsects += 1;
- macho_file.sections.items(.segment_index)[sect_id] = segment_id;
- }
-
- if (macho_file.getSegmentByName("__DATA_CONST")) |index| {
- macho_file.data_const_segment_cmd_index = index;
- }
-
- if (macho_file.getSegmentByName("__DATA")) |index| {
- macho_file.data_segment_cmd_index = index;
- }
-
- // __LINKEDIT always comes last
- {
- const protection = MachO.getSegmentMemoryProtection("__LINKEDIT");
- macho_file.linkedit_segment_cmd_index = @intCast(macho_file.segments.items.len);
- try macho_file.segments.append(gpa, .{
- .cmdsize = @sizeOf(macho.segment_command_64),
- .segname = MachO.makeStaticString("__LINKEDIT"),
- .maxprot = protection,
- .initprot = protection,
- });
- }
-}
-
-fn writeAtoms(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const slice = macho_file.sections.slice();
-
- for (slice.items(.first_atom_index), 0..) |first_atom_index, sect_id| {
- const header = slice.items(.header)[sect_id];
- if (header.isZerofill()) continue;
-
- var atom_index = first_atom_index orelse continue;
-
- var buffer = try gpa.alloc(u8, math.cast(usize, header.size) orelse return error.Overflow);
- defer gpa.free(buffer);
- @memset(buffer, 0); // TODO with NOPs
-
- log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() });
-
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- if (atom.getFile()) |file| {
- const this_sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const padding_size: usize = if (atom.next_index) |next_index| blk: {
- const next_sym = macho_file.getSymbol(macho_file.getAtom(next_index).getSymbolWithLoc());
- const size = next_sym.n_value - (this_sym.n_value + atom.size);
- break :blk math.cast(usize, size) orelse return error.Overflow;
- } else 0;
-
- log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{
- atom.sym_index,
- macho_file.getSymbolName(atom.getSymbolWithLoc()),
- file,
- });
- if (padding_size > 0) {
- log.debug(" (with padding {x})", .{padding_size});
- }
-
- const offset = math.cast(usize, this_sym.n_value - header.addr) orelse
- return error.Overflow;
- log.debug(" (at offset 0x{x})", .{offset});
-
- const code = Atom.getAtomCode(macho_file, atom_index);
- const relocs = Atom.getAtomRelocs(macho_file, atom_index);
- const size = math.cast(usize, atom.size) orelse return error.Overflow;
- @memcpy(buffer[offset .. offset + size], code);
- try Atom.resolveRelocs(
- macho_file,
- atom_index,
- buffer[offset..][0..size],
- relocs,
- );
- }
-
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
- }
-
- log.debug(" (writing at file offset 0x{x})", .{header.offset});
- try macho_file.base.file.?.pwriteAll(buffer, header.offset);
- }
-}
-
-fn writeDyldPrivateAtom(macho_file: *MachO) !void {
- const atom_index = macho_file.dyld_private_atom_index orelse return;
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- const sect_id = macho_file.data_section_index.?;
- const header = macho_file.sections.items(.header)[sect_id];
- const offset = sym.n_value - header.addr + header.offset;
- log.debug("writing __dyld_private at offset 0x{x}", .{offset});
- const buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64);
- try macho_file.base.file.?.pwriteAll(&buffer, offset);
-}
-
-fn writeThunks(macho_file: *MachO) !void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- assert(target.cpu.arch == .aarch64);
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- const sect_id = macho_file.text_section_index orelse return;
- const header = macho_file.sections.items(.header)[sect_id];
-
- for (macho_file.thunks.items, 0..) |*thunk, i| {
- if (thunk.getSize() == 0) continue;
- const thunk_size = math.cast(usize, thunk.getSize()) orelse return error.Overflow;
- var buffer = try std.ArrayList(u8).initCapacity(gpa, thunk_size);
- defer buffer.deinit();
- try thunks.writeThunkCode(macho_file, thunk, buffer.writer());
- const thunk_atom = macho_file.getAtom(thunk.getStartAtomIndex());
- const thunk_sym = macho_file.getSymbol(thunk_atom.getSymbolWithLoc());
- const offset = thunk_sym.n_value - header.addr + header.offset;
- log.debug("writing thunk({d}) at offset 0x{x}", .{ i, offset });
- try macho_file.base.file.?.pwriteAll(buffer.items, offset);
- }
-}
-
-fn writePointerEntries(macho_file: *MachO, sect_id: u8, table: anytype) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const header = macho_file.sections.items(.header)[sect_id];
- const capacity = math.cast(usize, header.size) orelse return error.Overflow;
- var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity);
- defer buffer.deinit();
- for (table.entries.items) |entry| {
- const sym = macho_file.getSymbol(entry);
- buffer.writer().writeInt(u64, sym.n_value, .little) catch unreachable;
- }
- log.debug("writing __DATA_CONST,__got contents at file offset 0x{x}", .{header.offset});
- try macho_file.base.file.?.pwriteAll(buffer.items, header.offset);
-}
-
-fn writeStubs(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const stubs_header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?];
- const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?];
-
- const capacity = math.cast(usize, stubs_header.size) orelse return error.Overflow;
- var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity);
- defer buffer.deinit();
-
- for (0..macho_file.stub_table.count()) |index| {
- try stubs.writeStubCode(.{
- .cpu_arch = cpu_arch,
- .source_addr = stubs_header.addr + stubs.stubSize(cpu_arch) * index,
- .target_addr = la_symbol_ptr_header.addr + index * @sizeOf(u64),
- }, buffer.writer());
- }
-
- log.debug("writing __TEXT,__stubs contents at file offset 0x{x}", .{stubs_header.offset});
- try macho_file.base.file.?.pwriteAll(buffer.items, stubs_header.offset);
-}
-
-fn writeStubHelpers(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?];
-
- const capacity = math.cast(usize, stub_helper_header.size) orelse return error.Overflow;
- var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity);
- defer buffer.deinit();
-
- {
- const dyld_private_addr = blk: {
- const atom = macho_file.getAtom(macho_file.dyld_private_atom_index.?);
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
- break :blk sym.n_value;
- };
- const dyld_stub_binder_got_addr = blk: {
- const sym_loc = macho_file.globals.items[macho_file.dyld_stub_binder_index.?];
- break :blk macho_file.getGotEntryAddress(sym_loc).?;
- };
- try stubs.writeStubHelperPreambleCode(.{
- .cpu_arch = cpu_arch,
- .source_addr = stub_helper_header.addr,
- .dyld_private_addr = dyld_private_addr,
- .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr,
- }, buffer.writer());
- }
-
- for (0..macho_file.stub_table.count()) |index| {
- const source_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) +
- stubs.stubHelperSize(cpu_arch) * index;
- try stubs.writeStubHelperCode(.{
- .cpu_arch = cpu_arch,
- .source_addr = source_addr,
- .target_addr = stub_helper_header.addr,
- }, buffer.writer());
- }
-
- log.debug("writing __TEXT,__stub_helper contents at file offset 0x{x}", .{
- stub_helper_header.offset,
- });
- try macho_file.base.file.?.pwriteAll(buffer.items, stub_helper_header.offset);
-}
-
-fn writeLaSymbolPtrs(macho_file: *MachO) !void {
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const cpu_arch = target.cpu.arch;
- const la_symbol_ptr_header = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_section_index.?];
- const stub_helper_header = macho_file.sections.items(.header)[macho_file.stub_helper_section_index.?];
-
- const capacity = math.cast(usize, la_symbol_ptr_header.size) orelse return error.Overflow;
- var buffer = try std.ArrayList(u8).initCapacity(gpa, capacity);
- defer buffer.deinit();
-
- for (0..macho_file.stub_table.count()) |index| {
- const target_addr = stub_helper_header.addr + stubs.stubHelperPreambleSize(cpu_arch) +
- stubs.stubHelperSize(cpu_arch) * index;
- buffer.writer().writeInt(u64, target_addr, .little) catch unreachable;
- }
-
- log.debug("writing __DATA,__la_symbol_ptr contents at file offset 0x{x}", .{
- la_symbol_ptr_header.offset,
- });
- try macho_file.base.file.?.pwriteAll(buffer.items, la_symbol_ptr_header.offset);
-}
-
-fn pruneAndSortSections(macho_file: *MachO) !void {
- const Entry = struct {
- index: u8,
-
- pub fn lessThan(ctx: *MachO, lhs: @This(), rhs: @This()) bool {
- const lhs_header = ctx.sections.items(.header)[lhs.index];
- const rhs_header = ctx.sections.items(.header)[rhs.index];
- return MachO.getSectionPrecedence(lhs_header) < MachO.getSectionPrecedence(rhs_header);
- }
- };
-
- const comp = macho_file.base.comp;
- const gpa = comp.gpa;
-
- var entries = try std.ArrayList(Entry).initCapacity(gpa, macho_file.sections.slice().len);
- defer entries.deinit();
-
- for (0..macho_file.sections.slice().len) |index| {
- const section = macho_file.sections.get(index);
- if (section.header.size == 0) {
- log.debug("pruning section {s},{s} {?d}", .{
- section.header.segName(),
- section.header.sectName(),
- section.first_atom_index,
- });
- for (&[_]*?u8{
- &macho_file.text_section_index,
- &macho_file.data_const_section_index,
- &macho_file.data_section_index,
- &macho_file.bss_section_index,
- &macho_file.thread_vars_section_index,
- &macho_file.thread_data_section_index,
- &macho_file.thread_bss_section_index,
- &macho_file.eh_frame_section_index,
- &macho_file.unwind_info_section_index,
- &macho_file.got_section_index,
- &macho_file.tlv_ptr_section_index,
- &macho_file.stubs_section_index,
- &macho_file.stub_helper_section_index,
- &macho_file.la_symbol_ptr_section_index,
- }) |maybe_index| {
- if (maybe_index.* != null and maybe_index.*.? == index) {
- maybe_index.* = null;
- }
- }
- continue;
- }
- entries.appendAssumeCapacity(.{ .index = @intCast(index) });
- }
-
- mem.sort(Entry, entries.items, macho_file, Entry.lessThan);
-
- var slice = macho_file.sections.toOwnedSlice();
- defer slice.deinit(gpa);
-
- const backlinks = try gpa.alloc(u8, slice.len);
- defer gpa.free(backlinks);
- for (entries.items, 0..) |entry, i| {
- backlinks[entry.index] = @as(u8, @intCast(i));
- }
-
- try macho_file.sections.ensureTotalCapacity(gpa, entries.items.len);
- for (entries.items) |entry| {
- macho_file.sections.appendAssumeCapacity(slice.get(entry.index));
- }
-
- for (&[_]*?u8{
- &macho_file.text_section_index,
- &macho_file.data_const_section_index,
- &macho_file.data_section_index,
- &macho_file.bss_section_index,
- &macho_file.thread_vars_section_index,
- &macho_file.thread_data_section_index,
- &macho_file.thread_bss_section_index,
- &macho_file.eh_frame_section_index,
- &macho_file.unwind_info_section_index,
- &macho_file.got_section_index,
- &macho_file.tlv_ptr_section_index,
- &macho_file.stubs_section_index,
- &macho_file.stub_helper_section_index,
- &macho_file.la_symbol_ptr_section_index,
- }) |maybe_index| {
- if (maybe_index.*) |*index| {
- index.* = backlinks[index.*];
- }
- }
-}
-
-fn calcSectionSizes(macho_file: *MachO) !void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const slice = macho_file.sections.slice();
- for (slice.items(.header), 0..) |*header, sect_id| {
- if (header.size == 0) continue;
- if (macho_file.text_section_index) |txt| {
- if (txt == sect_id and target.cpu.arch == .aarch64) continue;
- }
-
- var atom_index = slice.items(.first_atom_index)[sect_id] orelse continue;
-
- header.size = 0;
- header.@"align" = 0;
-
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- const atom_offset = atom.alignment.forward(header.size);
- const padding = atom_offset - header.size;
-
- const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc());
- sym.n_value = atom_offset;
-
- header.size += padding + atom.size;
- header.@"align" = @max(header.@"align", atom.alignment.toLog2Units());
-
- atom_index = atom.next_index orelse break;
- }
- }
-
- if (macho_file.text_section_index != null and target.cpu.arch == .aarch64) {
- // Create jump/branch range extenders if needed.
- try thunks.createThunks(macho_file, macho_file.text_section_index.?);
- }
-
- // Update offsets of all symbols contained within each Atom.
- // We need to do this since our unwind info synthesiser relies on
- // traversing the symbols when synthesising unwind info and DWARF CFI records.
- for (slice.items(.first_atom_index)) |first_atom_index| {
- var atom_index = first_atom_index orelse continue;
-
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbol(atom.getSymbolWithLoc());
-
- if (atom.getFile() != null) {
- // Update each symbol contained within the atom
- var it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (it.next()) |sym_loc| {
- const inner_sym = macho_file.getSymbolPtr(sym_loc);
- inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset(
- macho_file,
- atom_index,
- sym_loc.sym_index,
- );
- }
-
- // If there is a section alias, update it now too
- if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| {
- const alias = macho_file.getSymbolPtr(sym_loc);
- alias.n_value = sym.n_value;
- }
- }
-
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
- }
- }
-
- if (macho_file.got_section_index) |sect_id| {
- const header = &macho_file.sections.items(.header)[sect_id];
- header.size = macho_file.got_table.count() * @sizeOf(u64);
- header.@"align" = 3;
- }
-
- if (macho_file.tlv_ptr_section_index) |sect_id| {
- const header = &macho_file.sections.items(.header)[sect_id];
- header.size = macho_file.tlv_ptr_table.count() * @sizeOf(u64);
- header.@"align" = 3;
- }
-
- const cpu_arch = target.cpu.arch;
-
- if (macho_file.stubs_section_index) |sect_id| {
- const header = &macho_file.sections.items(.header)[sect_id];
- header.size = macho_file.stub_table.count() * stubs.stubSize(cpu_arch);
- header.@"align" = math.log2(stubs.stubAlignment(cpu_arch));
- }
-
- if (macho_file.stub_helper_section_index) |sect_id| {
- const header = &macho_file.sections.items(.header)[sect_id];
- header.size = macho_file.stub_table.count() * stubs.stubHelperSize(cpu_arch) +
- stubs.stubHelperPreambleSize(cpu_arch);
- header.@"align" = math.log2(stubs.stubAlignment(cpu_arch));
- }
-
- if (macho_file.la_symbol_ptr_section_index) |sect_id| {
- const header = &macho_file.sections.items(.header)[sect_id];
- header.size = macho_file.stub_table.count() * @sizeOf(u64);
- header.@"align" = 3;
- }
-}
-
-fn allocateSegments(macho_file: *MachO) !void {
- for (macho_file.segments.items, 0..) |*segment, segment_index| {
- const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT");
- const base_size = if (is_text_segment)
- try load_commands.calcMinHeaderPad(macho_file, .{
- .segments = macho_file.segments.items,
- .dylibs = macho_file.dylibs.items,
- .referenced_dylibs = macho_file.referenced_dylibs.keys(),
- })
- else
- 0;
- try allocateSegment(macho_file, @as(u8, @intCast(segment_index)), base_size);
- }
-}
-
-fn getSegmentAllocBase(macho_file: *MachO, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } {
- if (segment_index > 0) {
- const prev_segment = macho_file.segments.items[segment_index - 1];
- return .{
- .vmaddr = prev_segment.vmaddr + prev_segment.vmsize,
- .fileoff = prev_segment.fileoff + prev_segment.filesize,
- };
- }
- return .{ .vmaddr = 0, .fileoff = 0 };
-}
-
-fn allocateSegment(macho_file: *MachO, segment_index: u8, init_size: u64) !void {
- const target = macho_file.base.comp.root_mod.resolved_target.result;
- const segment = &macho_file.segments.items[segment_index];
-
- if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation
-
- const base = getSegmentAllocBase(macho_file, segment_index);
- segment.vmaddr = base.vmaddr;
- segment.fileoff = base.fileoff;
- segment.filesize = init_size;
- segment.vmsize = init_size;
-
- // Allocate the sections according to their alignment at the beginning of the segment.
- const indexes = macho_file.getSectionIndexes(segment_index);
- var start = init_size;
-
- const slice = macho_file.sections.slice();
- for (slice.items(.header)[indexes.start..indexes.end], 0..) |*header, sect_id| {
- const alignment = try math.powi(u32, 2, header.@"align");
- const start_aligned = mem.alignForward(u64, start, alignment);
- const n_sect = @as(u8, @intCast(indexes.start + sect_id + 1));
-
- header.offset = if (header.isZerofill())
- 0
- else
- @as(u32, @intCast(segment.fileoff + start_aligned));
- header.addr = segment.vmaddr + start_aligned;
-
- if (slice.items(.first_atom_index)[indexes.start + sect_id]) |first_atom_index| {
- var atom_index = first_atom_index;
-
- log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{
- n_sect,
- header.segName(),
- header.sectName(),
- });
-
- while (true) {
- const atom = macho_file.getAtom(atom_index);
- const sym = macho_file.getSymbolPtr(atom.getSymbolWithLoc());
- sym.n_value += header.addr;
- sym.n_sect = n_sect;
-
- log.debug(" ATOM(%{d}, '{s}') @{x}", .{
- atom.sym_index,
- macho_file.getSymbolName(atom.getSymbolWithLoc()),
- sym.n_value,
- });
-
- if (atom.getFile() != null) {
- // Update each symbol contained within the atom
- var it = Atom.getInnerSymbolsIterator(macho_file, atom_index);
- while (it.next()) |sym_loc| {
- const inner_sym = macho_file.getSymbolPtr(sym_loc);
- inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset(
- macho_file,
- atom_index,
- sym_loc.sym_index,
- );
- inner_sym.n_sect = n_sect;
- }
-
- // If there is a section alias, update it now too
- if (Atom.getSectionAlias(macho_file, atom_index)) |sym_loc| {
- const alias = macho_file.getSymbolPtr(sym_loc);
- alias.n_value = sym.n_value;
- alias.n_sect = n_sect;
- }
- }
-
- if (atom.next_index) |next_index| {
- atom_index = next_index;
- } else break;
- }
- }
-
- start = start_aligned + header.size;
-
- if (!header.isZerofill()) {
- segment.filesize = start;
- }
- segment.vmsize = start;
- }
-
- const page_size = MachO.getPageSize(target.cpu.arch);
- segment.filesize = mem.alignForward(u64, segment.filesize, page_size);
- segment.vmsize = mem.alignForward(u64, segment.vmsize, page_size);
-}
-
-const std = @import("std");
-const build_options = @import("build_options");
-const assert = std.debug.assert;
-const dwarf = std.dwarf;
-const fs = std.fs;
-const log = std.log.scoped(.link);
-const macho = std.macho;
-const math = std.math;
-const mem = std.mem;
-
-const aarch64 = @import("../../arch/aarch64/bits.zig");
-const calcUuid = @import("uuid.zig").calcUuid;
-const dead_strip = @import("dead_strip.zig");
-const eh_frame = @import("eh_frame.zig");
-const fat = @import("fat.zig");
-const link = @import("../../link.zig");
-const load_commands = @import("load_commands.zig");
-const stubs = @import("stubs.zig");
-const thunks = @import("thunks.zig");
-const trace = @import("../../tracy.zig").trace;
-
-const Allocator = mem.Allocator;
-const Archive = @import("Archive.zig");
-const Atom = @import("Atom.zig");
-const Cache = std.Build.Cache;
-const CodeSignature = @import("CodeSignature.zig");
-const Compilation = @import("../../Compilation.zig");
-const Dylib = @import("Dylib.zig");
-const MachO = @import("../MachO.zig");
-const Md5 = std.crypto.hash.Md5;
-const LibStub = @import("../tapi.zig").LibStub;
-const Object = @import("Object.zig");
-const Platform = load_commands.Platform;
-const Section = MachO.Section;
-const SymbolWithLoc = MachO.SymbolWithLoc;
-const TableSection = @import("../table_section.zig").TableSection;
-const Trie = @import("Trie.zig");
-const UnwindInfo = @import("UnwindInfo.zig");
CMakeLists.txt
@@ -603,20 +603,24 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/DwarfInfo.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Dylib.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/InternalObject.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/Relocation.zig"
- "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/UnwindInfo.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/bind.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Rebase.zig"
- "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/dyld_info/Trie.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/eh_frame.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/file.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/relocatable.zig"
+ "${CMAKE_SOURCE_DIR}/src/link/MachO/synthetic.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
- "${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"
"${CMAKE_SOURCE_DIR}/src/link/Wasm.zig"