Commit 86ab6ca56c

Jakub Konka <kubkon@jakubkonka.com>
2021-05-02 23:40:08
zld: rewrite Object to include pointers to Symbols
1 parent b6be28d
Changed files (5)
src/link/MachO/reloc/aarch64.zig
@@ -10,6 +10,7 @@ const reloc = @import("../reloc.zig");
 
 const Allocator = mem.Allocator;
 const Relocation = reloc.Relocation;
+const Symbol = @import("../Symbol.zig");
 
 pub const Branch = struct {
     base: Relocation,
@@ -188,6 +189,7 @@ pub const Parser = struct {
     it: *reloc.RelocIterator,
     code: []u8,
     parsed: std.ArrayList(*Relocation),
+    symbols: []*Symbol,
     addend: ?u32 = null,
     subtractor: ?Relocation.Target = null,
 
@@ -273,7 +275,7 @@ pub const Parser = struct {
         var branch = try parser.allocator.create(Branch);
         errdefer parser.allocator.destroy(branch);
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         branch.* = .{
             .base = .{
@@ -294,7 +296,7 @@ pub const Parser = struct {
         assert(rel.r_length == 2);
 
         const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         const offset = @intCast(u32, rel.r_address);
         const inst = parser.code[offset..][0..4];
@@ -400,7 +402,7 @@ pub const Parser = struct {
                 aarch64.Instruction.load_store_register,
             ), inst) };
         }
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var page_off = try parser.allocator.create(PageOff);
         errdefer parser.allocator.destroy(page_off);
@@ -437,7 +439,7 @@ pub const Parser = struct {
         ), inst);
         assert(parsed_inst.size == 3);
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var page_off = try parser.allocator.create(GotPageOff);
         errdefer parser.allocator.destroy(page_off);
@@ -496,7 +498,7 @@ pub const Parser = struct {
             }
         };
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var page_off = try parser.allocator.create(TlvpPageOff);
         errdefer parser.allocator.destroy(page_off);
@@ -531,7 +533,7 @@ pub const Parser = struct {
         assert(rel.r_pcrel == 0);
         assert(parser.subtractor == null);
 
-        parser.subtractor = Relocation.Target.from_reloc(rel);
+        parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
 
         // Verify SUBTRACTOR is followed by UNSIGNED.
         const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type);
@@ -554,7 +556,7 @@ pub const Parser = struct {
         var unsigned = try parser.allocator.create(reloc.Unsigned);
         errdefer parser.allocator.destroy(unsigned);
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
         const is_64bit: bool = switch (rel.r_length) {
             3 => true,
             2 => false,
src/link/MachO/reloc/x86_64.zig
@@ -9,6 +9,7 @@ const reloc = @import("../reloc.zig");
 
 const Allocator = mem.Allocator;
 const Relocation = reloc.Relocation;
+const Symbol = @import("../Symbol.zig");
 
 pub const Branch = struct {
     base: Relocation,
@@ -95,6 +96,7 @@ pub const Parser = struct {
     it: *reloc.RelocIterator,
     code: []u8,
     parsed: std.ArrayList(*Relocation),
+    symbols: []*Symbol,
     subtractor: ?Relocation.Target = null,
 
     pub fn deinit(parser: *Parser) void {
@@ -145,7 +147,7 @@ pub const Parser = struct {
         var branch = try parser.allocator.create(Branch);
         errdefer parser.allocator.destroy(branch);
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         branch.* = .{
             .base = .{
@@ -165,7 +167,7 @@ pub const Parser = struct {
         assert(rel.r_length == 2);
 
         const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
         const is_extern = rel.r_extern == 1;
 
         const offset = @intCast(u32, rel.r_address);
@@ -211,7 +213,7 @@ pub const Parser = struct {
 
         const offset = @intCast(u32, rel.r_address);
         const inst = parser.code[offset..][0..4];
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var got_load = try parser.allocator.create(GotLoad);
         errdefer parser.allocator.destroy(got_load);
@@ -237,7 +239,7 @@ pub const Parser = struct {
 
         const offset = @intCast(u32, rel.r_address);
         const inst = parser.code[offset..][0..4];
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var got = try parser.allocator.create(Got);
         errdefer parser.allocator.destroy(got);
@@ -263,7 +265,7 @@ pub const Parser = struct {
 
         const offset = @intCast(u32, rel.r_address);
         const inst = parser.code[offset..][0..4];
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
 
         var tlv = try parser.allocator.create(Tlv);
         errdefer parser.allocator.destroy(tlv);
@@ -288,7 +290,7 @@ pub const Parser = struct {
         assert(rel.r_pcrel == 0);
         assert(parser.subtractor == null);
 
-        parser.subtractor = Relocation.Target.from_reloc(rel);
+        parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols);
 
         // Verify SUBTRACTOR is followed by UNSIGNED.
         const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type);
@@ -311,7 +313,7 @@ pub const Parser = struct {
         var unsigned = try parser.allocator.create(reloc.Unsigned);
         errdefer parser.allocator.destroy(unsigned);
 
-        const target = Relocation.Target.from_reloc(rel);
+        const target = Relocation.Target.from_reloc(rel, parser.symbols);
         const is_64bit: bool = switch (rel.r_length) {
             3 => true,
             2 => false,
src/link/MachO/Object.zig
@@ -43,17 +43,13 @@ dwarf_debug_str_index: ?u16 = null,
 dwarf_debug_line_index: ?u16 = null,
 dwarf_debug_ranges_index: ?u16 = null,
 
-symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
-strtab: std.ArrayListUnmanaged(u8) = .{},
+symbols: std.ArrayListUnmanaged(*Symbol) = .{},
+initializers: std.ArrayListUnmanaged(*Symbol) = .{},
+data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
 
-locals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
-stabs: std.ArrayListUnmanaged(Stab) = .{},
 tu_path: ?[]const u8 = null,
 tu_mtime: ?u64 = null,
 
-initializers: std.ArrayListUnmanaged(CppStatic) = .{},
-data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-
 pub const Section = struct {
     inner: macho.section_64,
     code: []u8,
@@ -71,23 +67,6 @@ pub const Section = struct {
     }
 };
 
-const CppStatic = struct {
-    symbol: u32,
-    target_addr: u64,
-};
-
-const Stab = struct {
-    tag: Tag,
-    symbol: u32,
-    size: ?u64 = null,
-
-    const Tag = enum {
-        function,
-        global,
-        static,
-    };
-};
-
 const DebugInfo = struct {
     inner: dwarf.DwarfInfo,
     debug_info: []u8,
@@ -169,14 +148,12 @@ pub fn deinit(self: *Object) void {
     }
     self.sections.deinit(self.allocator);
 
-    for (self.locals.items()) |*entry| {
-        entry.value.deinit(self.allocator);
+    for (self.symbols.items) |sym| {
+        sym.deinit(self.allocator);
+        self.allocator.destroy(sym);
     }
-    self.locals.deinit(self.allocator);
+    self.symbols.deinit(self.allocator);
 
-    self.symtab.deinit(self.allocator);
-    self.strtab.deinit(self.allocator);
-    self.stabs.deinit(self.allocator);
     self.data_in_code_entries.deinit(self.allocator);
     self.initializers.deinit(self.allocator);
 
@@ -222,9 +199,9 @@ pub fn parse(self: *Object) !void {
     }
 
     try self.readLoadCommands(reader);
+    try self.parseSymbols();
     try self.parseSections();
-    if (self.symtab_cmd_index != null) try self.parseSymtab();
-    if (self.data_in_code_cmd_index != null) try self.readDataInCode();
+    try self.parseDataInCode();
     try self.parseInitializers();
     try self.parseDebugInfo();
 }
@@ -298,9 +275,10 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void {
 }
 
 pub fn parseSections(self: *Object) !void {
-    log.debug("parsing sections in {s}", .{self.name.?});
     const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
 
+    log.debug("parsing sections in {s}", .{self.name.?});
+
     try self.sections.ensureCapacity(self.allocator, seg.sections.items.len);
 
     for (seg.sections.items) |sect| {
@@ -327,6 +305,7 @@ pub fn parseSections(self: *Object) !void {
                 self.arch.?,
                 section.code,
                 mem.bytesAsSlice(macho.relocation_info, raw_relocs),
+                self.symbols.items,
             );
         }
 
@@ -344,60 +323,70 @@ pub fn parseInitializers(self: *Object) !void {
     const relocs = section.relocs orelse unreachable;
     try self.initializers.ensureCapacity(self.allocator, relocs.len);
     for (relocs) |rel| {
-        self.initializers.appendAssumeCapacity(.{
-            .symbol = rel.target.symbol,
-            .target_addr = undefined,
-        });
+        self.initializers.appendAssumeCapacity(rel.target.symbol);
     }
 
-    mem.reverse(CppStatic, self.initializers.items);
-
-    for (self.initializers.items) |initializer| {
-        const sym = self.symtab.items[initializer.symbol];
-        const sym_name = self.getString(sym.n_strx);
-        log.debug("    | {s}", .{sym_name});
-    }
+    mem.reverse(*Symbol, self.initializers.items);
 }
 
-pub fn parseSymtab(self: *Object) !void {
-    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
+pub fn parseSymbols(self: *Object) !void {
+    const index = self.symtab_cmd_index orelse return;
+    const symtab_cmd = self.load_commands.items[index].Symtab;
 
     var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
     defer self.allocator.free(symtab);
-
     _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
     const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
-    try self.symtab.appendSlice(self.allocator, slice);
 
     var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
     defer self.allocator.free(strtab);
-
     _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
-    try self.strtab.appendSlice(self.allocator, strtab);
 
-    for (self.symtab.items) |sym, sym_id| {
-        if (Symbol.isStab(sym) or Symbol.isUndef(sym)) continue;
+    for (slice) |sym| {
+        if (Symbol.isStab(sym)) {
+            log.err("TODO handle stabs embedded within object files", .{});
+            return error.HandleStabsInObjects;
+        }
 
-        const sym_name = self.getString(sym.n_strx);
-        const tag: Symbol.Tag = tag: {
-            if (Symbol.isLocal(sym)) {
-                if (self.arch.? == .aarch64 and mem.startsWith(u8, sym_name, "l")) continue;
-                break :tag .local;
-            }
-            if (Symbol.isWeakDef(sym)) {
-                break :tag .weak;
+        const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx));
+        const name = try self.allocator.dupe(u8, sym_name);
+
+        const symbol: *Symbol = symbol: {
+            if (Symbol.isSect(sym)) {
+                const linkage: Symbol.Regular.Linkage = linkage: {
+                    if (!Symbol.isExt(sym)) break :linkage .translation_unit;
+                    if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit;
+                    break :linkage .global;
+                };
+                const regular = try self.allocator.create(Symbol.Regular);
+                errdefer self.allocator.destroy(regular);
+                regular.* = .{
+                    .base = .{
+                        .@"type" = .regular,
+                        .name = name,
+                    },
+                    .linkage = .translation_unit,
+                    .address = sym.n_value,
+                    .section = sym.n_sect - 1,
+                    .weak_ref = Symbol.isWeakRef(sym),
+                    .file = self,
+                };
+                break :symbol &regular.base;
             }
-            break :tag .strong;
+
+            const undef = try self.allocator.create(Symbol.Unresolved);
+            errdefer self.allocator.destroy(undef);
+            undef.* = .{
+                .base = .{
+                    .@"type" = .unresolved,
+                    .name = name,
+                },
+                .file = self,
+            };
+            break :symbol &undef.base;
         };
-        const name = try self.allocator.dupe(u8, sym_name);
 
-        try self.locals.putNoClobber(self.allocator, name, .{
-            .tag = tag,
-            .name = name,
-            .address = 0,
-            .section = 0,
-            .index = @intCast(u32, sym_id),
-        });
+        try self.symbols.append(self.allocator, symbol);
     }
 }
 
@@ -429,38 +418,31 @@ pub fn parseDebugInfo(self: *Object) !void {
         break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000));
     };
 
-    for (self.locals.items()) |entry, index| {
-        const local = entry.value;
-        const source_sym = self.symtab.items[local.index.?];
-        const size = blk: for (debug_info.inner.func_list.items) |func| {
-            if (func.pc_range) |range| {
-                if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
-                    break :blk range.end - range.start;
+    for (self.symbols.items) |sym| {
+        if (sym.cast(Symbol.Regular)) |reg| {
+            const size: u64 = blk: for (debug_info.inner.func_list.items) |func| {
+                if (func.pc_range) |range| {
+                    if (reg.address >= range.start and reg.address < range.end) {
+                        break :blk range.end - range.start;
+                    }
                 }
-            }
-        } else null;
-        const tag: Stab.Tag = tag: {
-            if (size != null) break :tag .function;
-            switch (local.tag) {
-                .weak, .strong => break :tag .global,
-                else => break :tag .static,
-            }
-        };
-
-        try self.stabs.append(self.allocator, .{
-            .tag = tag,
-            .size = size,
-            .symbol = @intCast(u32, index),
-        });
+            } else 0;
+
+            reg.stab = .{
+                .kind = kind: {
+                    if (size > 0) break :kind .function;
+                    switch (reg.linkage) {
+                        .translation_unit => break :kind .static,
+                        else => break :kind .global,
+                    }
+                },
+                .size = size,
+            };
+        }
     }
 }
 
-pub fn getString(self: *const Object, str_off: u32) []const u8 {
-    assert(str_off < self.strtab.items.len);
-    return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off));
-}
-
-pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
+fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
     const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
     const sect = seg.sections.items[index];
     var buffer = try allocator.alloc(u8, sect.size);
@@ -468,7 +450,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 {
     return buffer;
 }
 
-pub fn readDataInCode(self: *Object) !void {
+pub fn parseDataInCode(self: *Object) !void {
     const index = self.data_in_code_cmd_index orelse return;
     const data_in_code = self.load_commands.items[index].LinkeditData;
 
src/link/MachO/reloc.zig
@@ -10,6 +10,7 @@ const aarch64 = @import("reloc/aarch64.zig");
 const x86_64 = @import("reloc/x86_64.zig");
 
 const Allocator = mem.Allocator;
+const Symbol = @import("Symbol.zig");
 
 pub const Relocation = struct {
     @"type": Type,
@@ -75,12 +76,12 @@ pub const Relocation = struct {
     };
 
     pub const Target = union(enum) {
-        symbol: u32,
+        symbol: *Symbol,
         section: u16,
 
-        pub fn from_reloc(reloc: macho.relocation_info) Target {
+        pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target {
             return if (reloc.r_extern == 1) .{
-                .symbol = reloc.r_symbolnum,
+                .symbol = symbols[reloc.r_symbolnum],
             } else .{
                 .section = @intCast(u16, reloc.r_symbolnum - 1),
             };
@@ -136,6 +137,7 @@ pub fn parse(
     arch: std.Target.Cpu.Arch,
     code: []u8,
     relocs: []const macho.relocation_info,
+    symbols: []*Symbol,
 ) ![]*Relocation {
     var it = RelocIterator{
         .buffer = relocs,
@@ -148,6 +150,7 @@ pub fn parse(
                 .it = &it,
                 .code = code,
                 .parsed = std.ArrayList(*Relocation).init(allocator),
+                .symbols = symbols,
             };
             defer parser.deinit();
             try parser.parse();
@@ -160,6 +163,7 @@ pub fn parse(
                 .it = &it,
                 .code = code,
                 .parsed = std.ArrayList(*Relocation).init(allocator),
+                .symbols = symbols,
             };
             defer parser.deinit();
             try parser.parse();
src/link/MachO/Symbol.zig
@@ -2,31 +2,93 @@ const Symbol = @This();
 
 const std = @import("std");
 const macho = std.macho;
+const mem = std.mem;
 
-const Allocator = std.mem.Allocator;
+const Allocator = mem.Allocator;
+const Object = @import("Object.zig");
 
-pub const Tag = enum {
-    local,
-    weak,
-    strong,
-    import,
-    undef,
+pub const Type = enum {
+    regular,
+    proxy,
+    unresolved,
 };
 
-tag: Tag,
+/// Symbol type.
+@"type": Type,
+
+/// Symbol name. Owned slice.
 name: []u8,
-address: u64,
-section: u8,
 
-/// Index of file where to locate this symbol.
-/// Depending on context, this is either an object file, or a dylib.
-file: ?u16 = null,
+pub const Regular = struct {
+    base: Symbol,
+
+    /// Linkage type.
+    linkage: Linkage,
+
+    /// Symbol address.
+    address: u64,
+
+    /// Section ID where the symbol resides.
+    section: u8,
+
+    /// Whether the symbol is a weak ref.
+    weak_ref: bool,
+
+    /// File where to locate this symbol.
+    file: *Object,
+
+    /// Debug stab if defined.
+    stab: ?struct {
+        /// Stab kind
+        kind: enum {
+            function,
+            global,
+            static,
+        },
 
-/// Index of this symbol within the file's symbol table.
-index: ?u32 = null,
+        /// Size of the stab.
+        size: u64,
+    } = null,
 
-pub fn deinit(self: *Symbol, allocator: *Allocator) void {
-    allocator.free(self.name);
+    pub const base_type: Symbol.Type = .regular;
+
+    pub const Linkage = enum {
+        translation_unit,
+        linkage_unit,
+        global,
+    };
+};
+
+pub const Proxy = struct {
+    base: Symbol,
+
+    /// Dylib ordinal.
+    dylib: u16,
+
+    pub const base_type: Symbol.Type = .proxy;
+};
+
+pub const Unresolved = struct {
+    base: Symbol,
+
+    /// Alias of.
+    alias: ?*Symbol = null,
+
+    /// File where this symbol was referenced.
+    file: *Object,
+
+    pub const base_type: Symbol.Type = .unresolved;
+};
+
+pub fn deinit(base: *Symbol, allocator: *Allocator) void {
+    allocator.free(base.name);
+}
+
+pub fn cast(base: *Symbol, comptime T: type) ?*T {
+    if (base.@"type" != T.base_type) {
+        return null;
+    }
+    return @fieldParentPtr(T, "base", base);
 }
 
 pub fn isStab(sym: macho.nlist_64) bool {
@@ -55,17 +117,6 @@ pub fn isWeakDef(sym: macho.nlist_64) bool {
     return (sym.n_desc & macho.N_WEAK_DEF) != 0;
 }
 
-/// Symbol is local if it is defined and not an extern.
-pub fn isLocal(sym: macho.nlist_64) bool {
-    return isSect(sym) and !isExt(sym);
-}
-
-/// Symbol is global if it is defined and an extern.
-pub fn isGlobal(sym: macho.nlist_64) bool {
-    return isSect(sym) and isExt(sym);
-}
-
-/// Symbol is undefined if it is not defined and an extern.
-pub fn isUndef(sym: macho.nlist_64) bool {
-    return isUndf(sym) and isExt(sym);
+pub fn isWeakRef(sym: macho.nlist_64) bool {
+    return (sym.n_desc & macho.N_WEAK_REF) != 0;
 }