Commit 86ab6ca56c
Changed files (5)
src
link
src/link/MachO/reloc/aarch64.zig
@@ -10,6 +10,7 @@ const reloc = @import("../reloc.zig");
const Allocator = mem.Allocator;
const Relocation = reloc.Relocation;
+const Symbol = @import("../Symbol.zig");
pub const Branch = struct {
base: Relocation,
@@ -188,6 +189,7 @@ pub const Parser = struct {
it: *reloc.RelocIterator,
code: []u8,
parsed: std.ArrayList(*Relocation),
+ symbols: []*Symbol,
addend: ?u32 = null,
subtractor: ?Relocation.Target = null,
@@ -273,7 +275,7 @@ pub const Parser = struct {
var branch = try parser.allocator.create(Branch);
errdefer parser.allocator.destroy(branch);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
branch.* = .{
.base = .{
@@ -294,7 +296,7 @@ pub const Parser = struct {
assert(rel.r_length == 2);
const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
@@ -400,7 +402,7 @@ pub const Parser = struct {
aarch64.Instruction.load_store_register,
), inst) };
}
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(PageOff);
errdefer parser.allocator.destroy(page_off);
@@ -437,7 +439,7 @@ pub const Parser = struct {
), inst);
assert(parsed_inst.size == 3);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(GotPageOff);
errdefer parser.allocator.destroy(page_off);
@@ -496,7 +498,7 @@ pub const Parser = struct {
}
};
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var page_off = try parser.allocator.create(TlvpPageOff);
errdefer parser.allocator.destroy(page_off);
@@ -531,7 +533,7 @@ pub const Parser = struct {
assert(rel.r_pcrel == 0);
assert(parser.subtractor == null);
- parser.subtractor = Relocation.Target.from_reloc(rel);
+ parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
// Verify SUBTRACTOR is followed by UNSIGNED.
const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type);
@@ -554,7 +556,7 @@ pub const Parser = struct {
var unsigned = try parser.allocator.create(reloc.Unsigned);
errdefer parser.allocator.destroy(unsigned);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_64bit: bool = switch (rel.r_length) {
3 => true,
2 => false,
src/link/MachO/reloc/x86_64.zig
@@ -9,6 +9,7 @@ const reloc = @import("../reloc.zig");
const Allocator = mem.Allocator;
const Relocation = reloc.Relocation;
+const Symbol = @import("../Symbol.zig");
pub const Branch = struct {
base: Relocation,
@@ -95,6 +96,7 @@ pub const Parser = struct {
it: *reloc.RelocIterator,
code: []u8,
parsed: std.ArrayList(*Relocation),
+ symbols: []*Symbol,
subtractor: ?Relocation.Target = null,
pub fn deinit(parser: *Parser) void {
@@ -145,7 +147,7 @@ pub const Parser = struct {
var branch = try parser.allocator.create(Branch);
errdefer parser.allocator.destroy(branch);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
branch.* = .{
.base = .{
@@ -165,7 +167,7 @@ pub const Parser = struct {
assert(rel.r_length == 2);
const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_extern = rel.r_extern == 1;
const offset = @intCast(u32, rel.r_address);
@@ -211,7 +213,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var got_load = try parser.allocator.create(GotLoad);
errdefer parser.allocator.destroy(got_load);
@@ -237,7 +239,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var got = try parser.allocator.create(Got);
errdefer parser.allocator.destroy(got);
@@ -263,7 +265,7 @@ pub const Parser = struct {
const offset = @intCast(u32, rel.r_address);
const inst = parser.code[offset..][0..4];
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
var tlv = try parser.allocator.create(Tlv);
errdefer parser.allocator.destroy(tlv);
@@ -288,7 +290,7 @@ pub const Parser = struct {
assert(rel.r_pcrel == 0);
assert(parser.subtractor == null);
- parser.subtractor = Relocation.Target.from_reloc(rel);
+ parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
// Verify SUBTRACTOR is followed by UNSIGNED.
const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type);
@@ -311,7 +313,7 @@ pub const Parser = struct {
var unsigned = try parser.allocator.create(reloc.Unsigned);
errdefer parser.allocator.destroy(unsigned);
- const target = Relocation.Target.from_reloc(rel);
+ const target = Relocation.Target.from_reloc(rel, parser.symbols);
const is_64bit: bool = switch (rel.r_length) {
3 => true,
2 => false,
src/link/MachO/Object.zig
@@ -43,17 +43,13 @@ dwarf_debug_str_index: ?u16 = null,
dwarf_debug_line_index: ?u16 = null,
dwarf_debug_ranges_index: ?u16 = null,
-symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
-strtab: std.ArrayListUnmanaged(u8) = .{},
+symbols: std.ArrayListUnmanaged(*Symbol) = .{},
+initializers: std.ArrayListUnmanaged(*Symbol) = .{},
+data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-locals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
-stabs: std.ArrayListUnmanaged(Stab) = .{},
tu_path: ?[]const u8 = null,
tu_mtime: ?u64 = null,
-initializers: std.ArrayListUnmanaged(CppStatic) = .{},
-data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-
pub const Section = struct {
inner: macho.section_64,
code: []u8,
@@ -71,23 +67,6 @@ pub const Section = struct {
}
};
-const CppStatic = struct {
- symbol: u32,
- target_addr: u64,
-};
-
-const Stab = struct {
- tag: Tag,
- symbol: u32,
- size: ?u64 = null,
-
- const Tag = enum {
- function,
- global,
- static,
- };
-};
-
const DebugInfo = struct {
inner: dwarf.DwarfInfo,
debug_info: []u8,
@@ -169,14 +148,12 @@ pub fn deinit(self: *Object) void {
}
self.sections.deinit(self.allocator);
- for (self.locals.items()) |*entry| {
- entry.value.deinit(self.allocator);
+ for (self.symbols.items) |sym| {
+ sym.deinit(self.allocator);
+ self.allocator.destroy(sym);
}
- self.locals.deinit(self.allocator);
+ self.symbols.deinit(self.allocator);
- self.symtab.deinit(self.allocator);
- self.strtab.deinit(self.allocator);
- self.stabs.deinit(self.allocator);
self.data_in_code_entries.deinit(self.allocator);
self.initializers.deinit(self.allocator);
@@ -222,9 +199,9 @@ pub fn parse(self: *Object) !void {
}
try self.readLoadCommands(reader);
+ try self.parseSymbols();
try self.parseSections();
- if (self.symtab_cmd_index != null) try self.parseSymtab();
- if (self.data_in_code_cmd_index != null) try self.readDataInCode();
+ try self.parseDataInCode();
try self.parseInitializers();
try self.parseDebugInfo();
}
@@ -298,9 +275,10 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
}
pub fn parseSections(self: *Object) !void {
- log.debug("parsing sections in {s}", .{self.name.?});
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
+ log.debug("parsing sections in {s}", .{self.name.?});
+
try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
for (seg.sections.items) |sect| {
@@ -327,6 +305,7 @@ pub fn parseSections(self: *Object) !void {
self.arch.?,
section.code,
mem.bytesAsSlice(macho.relocation_info, raw_relocs),
+ self.symbols.items,
);
}
@@ -344,60 +323,70 @@ pub fn parseInitializers(self: *Object) !void {
const relocs = section.relocs orelse unreachable;
try self.initializers.ensureCapacity(self.allocator, relocs.len);
for (relocs) |rel| {
- self.initializers.appendAssumeCapacity(.{
- .symbol = rel.target.symbol,
- .target_addr = undefined,
- });
+ self.initializers.appendAssumeCapacity(rel.target.symbol);
}
- mem.reverse(CppStatic, self.initializers.items);
-
- for (self.initializers.items) |initializer| {
- const sym = self.symtab.items[initializer.symbol];
- const sym_name = self.getString(sym.n_strx);
- log.debug(" | {s}", .{sym_name});
- }
+ mem.reverse(*Symbol, self.initializers.items);
}
-pub fn parseSymtab(self: *Object) !void {
- const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+pub fn parseSymbols(self: *Object) !void {
+ const index = self.symtab_cmd_index orelse return;
+ const symtab_cmd = self.load_commands.items[index].Symtab;
var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
defer self.allocator.free(symtab);
-
_ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
- try self.symtab.appendSlice(self.allocator, slice);
var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
defer self.allocator.free(strtab);
-
_ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
- try self.strtab.appendSlice(self.allocator, strtab);
- for (self.symtab.items) |sym, sym_id| {
- if (Symbol.isStab(sym) or Symbol.isUndef(sym)) continue;
+ for (slice) |sym| {
+ if (Symbol.isStab(sym)) {
+ log.err("TODO handle stabs embedded within object files", .{});
+ return error.HandleStabsInObjects;
+ }
- const sym_name = self.getString(sym.n_strx);
- const tag: Symbol.Tag = tag: {
- if (Symbol.isLocal(sym)) {
- if (self.arch.? == .aarch64 and mem.startsWith(u8, sym_name, "l")) continue;
- break :tag .local;
- }
- if (Symbol.isWeakDef(sym)) {
- break :tag .weak;
+ const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx));
+ const name = try self.allocator.dupe(u8, sym_name);
+
+ const symbol: *Symbol = symbol: {
+ if (Symbol.isSect(sym)) {
+ const linkage: Symbol.Regular.Linkage = linkage: {
+ if (!Symbol.isExt(sym)) break :linkage .translation_unit;
+ if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit;
+ break :linkage .global;
+ };
+ const regular = try self.allocator.create(Symbol.Regular);
+ errdefer self.allocator.destroy(regular);
+ regular.* = .{
+ .base = .{
+ .@"type" = .regular,
+ .name = name,
+ },
+ .linkage = .translation_unit,
+ .address = sym.n_value,
+ .section = sym.n_sect - 1,
+ .weak_ref = Symbol.isWeakRef(sym),
+ .file = self,
+ };
+ break :symbol ®ular.base;
}
- break :tag .strong;
+
+ const undef = try self.allocator.create(Symbol.Unresolved);
+ errdefer self.allocator.destroy(undef);
+ undef.* = .{
+ .base = .{
+ .@"type" = .unresolved,
+ .name = name,
+ },
+ .file = self,
+ };
+ break :symbol &undef.base;
};
- const name = try self.allocator.dupe(u8, sym_name);
- try self.locals.putNoClobber(self.allocator, name, .{
- .tag = tag,
- .name = name,
- .address = 0,
- .section = 0,
- .index = @intCast(u32, sym_id),
- });
+ try self.symbols.append(self.allocator, symbol);
}
}
@@ -429,38 +418,31 @@ pub fn parseDebugInfo(self: *Object) !void {
break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
};
- for (self.locals.items()) |entry, index| {
- const local = entry.value;
- const source_sym = self.symtab.items[local.index.?];
- const size = blk: for (debug_info.inner.func_list.items) |func| {
- if (func.pc_range) |range| {
- if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
- break :blk range.end - range.start;
+ for (self.symbols.items) |sym| {
+ if (sym.cast(Symbol.Regular)) |reg| {
+ const size: u64 = blk: for (debug_info.inner.func_list.items) |func| {
+ if (func.pc_range) |range| {
+ if (reg.address >= range.start and reg.address < range.end) {
+ break :blk range.end - range.start;
+ }
}
- }
- } else null;
- const tag: Stab.Tag = tag: {
- if (size != null) break :tag .function;
- switch (local.tag) {
- .weak, .strong => break :tag .global,
- else => break :tag .static,
- }
- };
-
- try self.stabs.append(self.allocator, .{
- .tag = tag,
- .size = size,
- .symbol = @intCast(u32, index),
- });
+ } else 0;
+
+ reg.stab = .{
+ .kind = kind: {
+ if (size > 0) break :kind .function;
+ switch (reg.linkage) {
+ .translation_unit => break :kind .static,
+ else => break :kind .global,
+ }
+ },
+ .size = size,
+ };
+ }
}
}
-pub fn getString(self: *const Object, str_off: u32) []const u8 {
- assert(str_off < self.strtab.items.len);
- return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
-}
-
-pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
+fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
const sect = seg.sections.items[index];
var buffer = try allocator.alloc(u8, sect.size);
@@ -468,7 +450,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
return buffer;
}
-pub fn readDataInCode(self: *Object) !void {
+pub fn parseDataInCode(self: *Object) !void {
const index = self.data_in_code_cmd_index orelse return;
const data_in_code = self.load_commands.items[index].LinkeditData;
src/link/MachO/reloc.zig
@@ -10,6 +10,7 @@ const aarch64 = @import("reloc/aarch64.zig");
const x86_64 = @import("reloc/x86_64.zig");
const Allocator = mem.Allocator;
+const Symbol = @import("Symbol.zig");
pub const Relocation = struct {
@"type": Type,
@@ -75,12 +76,12 @@ pub const Relocation = struct {
};
pub const Target = union(enum) {
- symbol: u32,
+ symbol: *Symbol,
section: u16,
- pub fn from_reloc(reloc: macho.relocation_info) Target {
+ pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target {
return if (reloc.r_extern == 1) .{
- .symbol = reloc.r_symbolnum,
+ .symbol = symbols[reloc.r_symbolnum],
} else .{
.section = @intCast(u16, reloc.r_symbolnum - 1),
};
@@ -136,6 +137,7 @@ pub fn parse(
arch: std.Target.Cpu.Arch,
code: []u8,
relocs: []const macho.relocation_info,
+ symbols: []*Symbol,
) ![]*Relocation {
var it = RelocIterator{
.buffer = relocs,
@@ -148,6 +150,7 @@ pub fn parse(
.it = &it,
.code = code,
.parsed = std.ArrayList(*Relocation).init(allocator),
+ .symbols = symbols,
};
defer parser.deinit();
try parser.parse();
@@ -160,6 +163,7 @@ pub fn parse(
.it = &it,
.code = code,
.parsed = std.ArrayList(*Relocation).init(allocator),
+ .symbols = symbols,
};
defer parser.deinit();
try parser.parse();
src/link/MachO/Symbol.zig
@@ -2,31 +2,93 @@ const Symbol = @This();
const std = @import("std");
const macho = std.macho;
+const mem = std.mem;
-const Allocator = std.mem.Allocator;
+const Allocator = mem.Allocator;
+const Object = @import("Object.zig");
-pub const Tag = enum {
- local,
- weak,
- strong,
- import,
- undef,
+pub const Type = enum {
+ regular,
+ proxy,
+ unresolved,
};
-tag: Tag,
+/// Symbol type.
+@"type": Type,
+
+/// Symbol name. Owned slice.
name: []u8,
-address: u64,
-section: u8,
-/// Index of file where to locate this symbol.
-/// Depending on context, this is either an object file, or a dylib.
-file: ?u16 = null,
+pub const Regular = struct {
+ base: Symbol,
+
+ /// Linkage type.
+ linkage: Linkage,
+
+ /// Symbol address.
+ address: u64,
+
+ /// Section ID where the symbol resides.
+ section: u8,
+
+ /// Whether the symbol is a weak ref.
+ weak_ref: bool,
+
+ /// File where to locate this symbol.
+ file: *Object,
+
+ /// Debug stab if defined.
+ stab: ?struct {
+ /// Stab kind
+ kind: enum {
+ function,
+ global,
+ static,
+ },
-/// Index of this symbol within the file's symbol table.
-index: ?u32 = null,
+ /// Size of the stab.
+ size: u64,
+ } = null,
-pub fn deinit(self: *Symbol, allocator: *Allocator) void {
- allocator.free(self.name);
+ pub const base_type: Symbol.Type = .regular;
+
+ pub const Linkage = enum {
+ translation_unit,
+ linkage_unit,
+ global,
+ };
+};
+
+pub const Proxy = struct {
+ base: Symbol,
+
+ /// Dylib ordinal.
+ dylib: u16,
+
+ pub const base_type: Symbol.Type = .proxy;
+};
+
+pub const Unresolved = struct {
+ base: Symbol,
+
+ /// Alias of.
+ alias: ?*Symbol = null,
+
+ /// File where this symbol was referenced.
+ file: *Object,
+
+ pub const base_type: Symbol.Type = .unresolved;
+};
+
+pub fn deinit(base: *Symbol, allocator: *Allocator) void {
+ allocator.free(base.name);
+}
+
+pub fn cast(base: *Symbol, comptime T: type) ?*T {
+ if (base.@"type" != T.base_type) {
+ return null;
+ }
+ return @fieldParentPtr(T, "base", base);
}
pub fn isStab(sym: macho.nlist_64) bool {
@@ -55,17 +117,6 @@ pub fn isWeakDef(sym: macho.nlist_64) bool {
return (sym.n_desc & macho.N_WEAK_DEF) != 0;
}
-/// Symbol is local if it is defined and not an extern.
-pub fn isLocal(sym: macho.nlist_64) bool {
- return isSect(sym) and !isExt(sym);
-}
-
-/// Symbol is global if it is defined and an extern.
-pub fn isGlobal(sym: macho.nlist_64) bool {
- return isSect(sym) and isExt(sym);
-}
-
-/// Symbol is undefined if it is not defined and an extern.
-pub fn isUndef(sym: macho.nlist_64) bool {
- return isUndf(sym) and isExt(sym);
+pub fn isWeakRef(sym: macho.nlist_64) bool {
+ return (sym.n_desc & macho.N_WEAK_REF) != 0;
}