Commit 988b184d03

Jakub Konka <kubkon@jakubkonka.com>
2021-03-26 09:40:01
zld: redo symbol resolution in objects
Store only globals and undefs at the linker level, while all locals stay scoped to the actual object file they were defined in. This is fine since the relocations referencing locals will always be resolved first using the local symbol table before checking for the reference within the linker's global symbol table. This also paves the way for proper symbol resolution from within static and dynamic libraries.
1 parent 262e09c
src/link/MachO/Archive.zig
@@ -98,7 +98,6 @@ pub fn deinit(self: *Archive) void {
         entry.value.deinit(self.allocator);
     }
     self.toc.deinit(self.allocator);
-    self.file.close();
 }
 
 /// Caller owns the returned Archive instance and is responsible for calling
@@ -131,20 +130,12 @@ pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, ar_name: [
         .name = name,
     };
 
-    var object_offsets = try self.readTableOfContents(reader);
-    defer self.allocator.free(object_offsets);
-
-    var i: usize = 1;
-    while (i < object_offsets.len) : (i += 1) {
-        const offset = object_offsets[i];
-        try reader.context.seekTo(offset);
-        try self.readObject(arch, ar_name, reader);
-    }
+    try self.parseTableOfContents(reader);
 
     return self;
 }
 
-fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
+fn parseTableOfContents(self: *Archive, reader: anytype) !void {
     const symtab_size = try reader.readIntLittle(u32);
     var symtab = try self.allocator.alloc(u8, symtab_size);
     defer self.allocator.free(symtab);
@@ -158,10 +149,6 @@ fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
     var symtab_stream = std.io.fixedBufferStream(symtab);
     var symtab_reader = symtab_stream.reader();
 
-    var object_offsets = std.ArrayList(u32).init(self.allocator);
-    try object_offsets.append(0);
-    var last: usize = 0;
-
     while (true) {
         const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) {
             error.EndOfStream => break,
@@ -179,19 +166,7 @@ fn readTableOfContents(self: *Archive, reader: anytype) ![]u32 {
         }
 
         try res.entry.value.append(self.allocator, object_offset);
-
-        // TODO This will go once we properly use archive's TOC to pick
-        // an object which defines a missing symbol rather than pasting in
-        // all of the objects always.
-        // Here, we assume that symbols are NOT sorted in any way, and
-        // they point to objects in sequence.
-        if (object_offsets.items[last] != object_offset) {
-            try object_offsets.append(object_offset);
-            last += 1;
-        }
     }
-
-    return object_offsets.toOwnedSlice();
 }
 
 fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, reader: anytype) !void {
src/link/MachO/Object.zig
@@ -9,6 +9,7 @@ const macho = std.macho;
 const mem = std.mem;
 
 const Allocator = mem.Allocator;
+const Symbol = @import("Symbol.zig");
 const parseName = @import("Zld.zig").parseName;
 
 usingnamespace @import("commands.zig");
@@ -36,7 +37,7 @@ dwarf_debug_str_index: ?u16 = null,
 dwarf_debug_line_index: ?u16 = null,
 dwarf_debug_ranges_index: ?u16 = null,
 
-symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+symtab: std.ArrayListUnmanaged(Symbol) = .{},
 strtab: std.ArrayListUnmanaged(u8) = .{},
 
 data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
@@ -53,7 +54,6 @@ pub fn deinit(self: *Object) void {
     if (self.ar_name) |v| {
         self.allocator.free(v);
     }
-    self.file.close();
 }
 
 /// Caller owns the returned Object instance and is responsible for calling
@@ -89,21 +89,9 @@ pub fn initFromFile(allocator: *Allocator, arch: std.Target.Cpu.Arch, name: []co
     };
 
     try self.readLoadCommands(reader, .{});
-
-    if (self.symtab_cmd_index != null) {
-        try self.readSymtab();
-        try self.readStrtab();
-    }
-
+    if (self.symtab_cmd_index != null) try self.parseSymtab();
     if (self.data_in_code_cmd_index != null) try self.readDataInCode();
 
-    log.debug("\n\n", .{});
-    log.debug("{s} defines symbols", .{self.name});
-    for (self.symtab.items) |sym| {
-        const symname = self.getString(sym.n_strx);
-        log.debug("'{s}': {}", .{ symname, sym });
-    }
-
     return self;
 }
 
@@ -174,25 +162,33 @@ pub fn readLoadCommands(self: *Object, reader: anytype, offset: ReadOffset) !voi
     }
 }
 
-pub fn readSymtab(self: *Object) !void {
+pub fn parseSymtab(self: *Object) !void {
     const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    var buffer = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
-    defer self.allocator.free(buffer);
-    _ = try self.file.preadAll(buffer, symtab_cmd.symoff);
+
+    var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
+    defer self.allocator.free(symtab);
+
+    _ = try self.file.preadAll(symtab, symtab_cmd.symoff);
     try self.symtab.ensureCapacity(self.allocator, symtab_cmd.nsyms);
-    // TODO this align case should not be needed.
-    // Probably a bug in stage1.
-    const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, buffer));
-    self.symtab.appendSliceAssumeCapacity(slice);
-}
 
-pub fn readStrtab(self: *Object) !void {
-    const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab;
-    var buffer = try self.allocator.alloc(u8, symtab_cmd.strsize);
-    defer self.allocator.free(buffer);
-    _ = try self.file.preadAll(buffer, symtab_cmd.stroff);
-    try self.strtab.ensureCapacity(self.allocator, symtab_cmd.strsize);
-    self.strtab.appendSliceAssumeCapacity(buffer);
+    var stream = std.io.fixedBufferStream(symtab);
+    var reader = stream.reader();
+
+    while (true) {
+        const symbol = reader.readStruct(macho.nlist_64) catch |err| switch (err) {
+            error.EndOfStream => break,
+            else => |e| return e,
+        };
+        self.symtab.appendAssumeCapacity(.{
+            .inner = symbol,
+        });
+    }
+
+    var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
+    defer self.allocator.free(strtab);
+
+    _ = try self.file.preadAll(strtab, symtab_cmd.stroff);
+    try self.strtab.appendSlice(self.allocator, strtab);
 }
 
 pub fn getString(self: *const Object, str_off: u32) []const u8 {
src/link/MachO/Symbol.zig
@@ -0,0 +1,55 @@
+const Symbol = @This();
+
+const std = @import("std");
+const macho = std.macho;
+
+/// MachO representation of this symbol.
+inner: macho.nlist_64,
+
+/// Index of file where to locate this symbol.
+/// Depending on context, this is either an object file, or a dylib.
+file: ?u16 = null,
+
+/// Index of this symbol within the file's symbol table.
+index: ?u32 = null,
+
+pub fn isStab(self: Symbol) bool {
+    return (macho.N_STAB & self.inner.n_type) != 0;
+}
+
+pub fn isPext(self: Symbol) bool {
+    return (macho.N_PEXT & self.inner.n_type) != 0;
+}
+
+pub fn isExt(self: Symbol) bool {
+    return (macho.N_EXT & self.inner.n_type) != 0;
+}
+
+pub fn isSect(self: Symbol) bool {
+    const type_ = macho.N_TYPE & self.inner.n_type;
+    return type_ == macho.N_SECT;
+}
+
+pub fn isUndf(self: Symbol) bool {
+    const type_ = macho.N_TYPE & self.inner.n_type;
+    return type_ == macho.N_UNDF;
+}
+
+pub fn isWeakDef(self: Symbol) bool {
+    return self.inner.n_desc == macho.N_WEAK_DEF;
+}
+
+/// Symbol is local if it is either a stab or it is defined and not an extern.
+pub fn isLocal(self: Symbol) bool {
+    return self.isStab() or (self.isSect() and !self.isExt());
+}
+
+/// Symbol is global if it is defined and an extern.
+pub fn isGlobal(self: Symbol) bool {
+    return self.isSect() and self.isExt();
+}
+
+/// Symbol is undefined if it is not defined and an extern.
+pub fn isUndef(self: Symbol) bool {
+    return self.isUndf() and self.isExt();
+}
src/link/MachO/Zld.zig
@@ -13,9 +13,10 @@ const log = std.log.scoped(.zld);
 const aarch64 = @import("../../codegen/aarch64.zig");
 
 const Allocator = mem.Allocator;
-const CodeSignature = @import("CodeSignature.zig");
 const Archive = @import("Archive.zig");
+const CodeSignature = @import("CodeSignature.zig");
 const Object = @import("Object.zig");
+const Symbol = @import("Symbol.zig");
 const Trie = @import("Trie.zig");
 
 usingnamespace @import("commands.zig");
@@ -28,10 +29,8 @@ page_size: ?u16 = null,
 file: ?fs.File = null,
 out_path: ?[]const u8 = null,
 
-// TODO Eventually, we will want to keep track of the  archives themselves to be able to exclude objects
-// contained within from landing in the final artifact. For now however, since we don't optimise the binary
-// at all, we just move all objects from the archives into the final artifact.
 objects: std.ArrayListUnmanaged(Object) = .{},
+archives: std.ArrayListUnmanaged(Archive) = .{},
 
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 
@@ -74,17 +73,19 @@ la_symbol_ptr_section_index: ?u16 = null,
 data_section_index: ?u16 = null,
 bss_section_index: ?u16 = null,
 
-locals: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(Symbol)) = .{},
-exports: std.StringArrayHashMapUnmanaged(macho.nlist_64) = .{},
-nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
-lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
-tlv_bootstrap: ?Import = null,
-threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
-local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
-nonlazy_pointers: std.StringArrayHashMapUnmanaged(GotEntry) = .{},
-
+globals: std.StringArrayHashMapUnmanaged(Symbol) = .{},
+undefs: std.StringArrayHashMapUnmanaged(Symbol) = .{},
 strtab: std.ArrayListUnmanaged(u8) = .{},
 
+// locals: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(Symbol)) = .{},
+// exports: std.StringArrayHashMapUnmanaged(macho.nlist_64) = .{},
+// nonlazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+// lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
+// tlv_bootstrap: ?Import = null,
+// threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
+// local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
+// nonlazy_pointers: std.StringArrayHashMapUnmanaged(GotEntry) = .{},
+
 stub_helper_stubs_start_off: ?u64 = null,
 
 mappings: std.AutoHashMapUnmanaged(MappingKey, SectionMapping) = .{},
@@ -112,18 +113,6 @@ const SectionMapping = struct {
     offset: u32,
 };
 
-const Symbol = struct {
-    inner: macho.nlist_64,
-    tt: Type,
-    object_id: u16,
-
-    const Type = enum {
-        Local,
-        WeakGlobal,
-        Global,
-    };
-};
-
 const DebugInfo = struct {
     inner: dwarf.DwarfInfo,
     debug_info: []u8,
@@ -188,17 +177,6 @@ const DebugInfo = struct {
     }
 };
 
-pub const Import = struct {
-    /// MachO symbol table entry.
-    symbol: macho.nlist_64,
-
-    /// Id of the dynamic library where the specified entries can be found.
-    dylib_ordinal: i64,
-
-    /// Index of this import within the import list.
-    index: u32,
-};
-
 /// Default path to dyld
 /// TODO instead of hardcoding it, we should probably look through some env vars and search paths
 /// instead but this will do for now.
@@ -218,40 +196,42 @@ pub fn init(allocator: *Allocator) Zld {
 }
 
 pub fn deinit(self: *Zld) void {
-    self.threadlocal_offsets.deinit(self.allocator);
-    self.strtab.deinit(self.allocator);
-    self.local_rebases.deinit(self.allocator);
-    for (self.lazy_imports.items()) |*entry| {
-        self.allocator.free(entry.key);
+    for (self.load_commands.items) |*lc| {
+        lc.deinit(self.allocator);
     }
-    self.lazy_imports.deinit(self.allocator);
-    for (self.nonlazy_imports.items()) |*entry| {
-        self.allocator.free(entry.key);
+    self.load_commands.deinit(self.allocator);
+
+    for (self.objects.items) |*object| {
+        object.deinit();
     }
-    self.nonlazy_imports.deinit(self.allocator);
-    for (self.nonlazy_pointers.items()) |*entry| {
-        self.allocator.free(entry.key);
+    self.objects.deinit(self.allocator);
+
+    for (self.archives.items) |*archive| {
+        archive.deinit();
     }
-    self.nonlazy_pointers.deinit(self.allocator);
-    for (self.exports.items()) |*entry| {
+    self.archives.deinit(self.allocator);
+
+    self.mappings.deinit(self.allocator);
+    self.unhandled_sections.deinit(self.allocator);
+
+    for (self.globals.items()) |*entry| {
         self.allocator.free(entry.key);
     }
-    self.exports.deinit(self.allocator);
-    for (self.locals.items()) |*entry| {
+    self.globals.deinit(self.allocator);
+
+    for (self.undefs.items()) |*entry| {
         self.allocator.free(entry.key);
-        entry.value.deinit(self.allocator);
     }
-    self.locals.deinit(self.allocator);
+    self.undefs.deinit(self.allocator);
+}
+
+pub fn closeFiles(self: *Zld) void {
     for (self.objects.items) |*object| {
-        object.deinit();
+        object.file.close();
     }
-    self.objects.deinit(self.allocator);
-    for (self.load_commands.items) |*lc| {
-        lc.deinit(self.allocator);
+    for (self.archives.items) |*archive| {
+        archive.file.close();
     }
-    self.load_commands.deinit(self.allocator);
-    self.mappings.deinit(self.allocator);
-    self.unhandled_sections.deinit(self.allocator);
     if (self.file) |*f| f.close();
 }
 
@@ -292,16 +272,15 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8) !void {
 
     try self.populateMetadata();
     try self.parseInputFiles(files);
-    try self.sortSections();
-    try self.resolveImports();
-    try self.allocateTextSegment();
-    try self.allocateDataConstSegment();
-    try self.allocateDataSegment();
-    self.allocateLinkeditSegment();
-    try self.writeStubHelperCommon();
-    try self.resolveSymbols();
-    try self.doRelocs();
-    try self.flush();
+    self.printSymtab();
+    // try self.sortSections();
+    // try self.allocateTextSegment();
+    // try self.allocateDataConstSegment();
+    // try self.allocateDataSegment();
+    // self.allocateLinkeditSegment();
+    // try self.writeStubHelperCommon();
+    // try self.doRelocs();
+    // try self.flush();
 }
 
 fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
@@ -315,7 +294,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
             };
             const index = @intCast(u16, self.objects.items.len);
             try self.objects.append(self.allocator, object);
-            try self.updateMetadata(index);
+            try self.resolveSymbols(index);
             continue;
         }
 
@@ -324,12 +303,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void {
                 error.NotArchive => break :try_archive,
                 else => |e| return e,
             };
-            defer archive.deinit();
-            while (archive.objects.popOrNull()) |object| {
-                const index = @intCast(u16, self.objects.items.len);
-                try self.objects.append(self.allocator, object);
-                try self.updateMetadata(index);
-            }
+            try self.archives.append(self.allocator, archive);
             continue;
         }
 
@@ -798,94 +772,6 @@ fn sortSections(self: *Zld) !void {
     }
 }
 
-fn resolveImports(self: *Zld) !void {
-    var imports = std.StringArrayHashMap(bool).init(self.allocator);
-    defer imports.deinit();
-
-    for (self.objects.items) |object| {
-        for (object.symtab.items) |sym| {
-            if (isLocal(&sym)) continue;
-
-            const name = object.getString(sym.n_strx);
-            const res = try imports.getOrPut(name);
-            if (isExport(&sym)) {
-                res.entry.value = false;
-                continue;
-            }
-            if (res.found_existing and !res.entry.value)
-                continue;
-            res.entry.value = true;
-        }
-    }
-
-    for (imports.items()) |entry| {
-        if (!entry.value) continue;
-
-        const sym_name = entry.key;
-        const n_strx = try self.makeString(sym_name);
-        var new_sym: macho.nlist_64 = .{
-            .n_strx = n_strx,
-            .n_type = macho.N_UNDF | macho.N_EXT,
-            .n_value = 0,
-            .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER,
-            .n_sect = 0,
-        };
-        var key = try self.allocator.dupe(u8, sym_name);
-        // TODO handle symbol resolution from non-libc dylibs.
-        const dylib_ordinal = 1;
-
-        // TODO need to rework this. Perhaps should create a set of all possible libc
-        // symbols which are expected to be nonlazy?
-        if (mem.eql(u8, sym_name, "___stdoutp") or
-            mem.eql(u8, sym_name, "___stderrp") or
-            mem.eql(u8, sym_name, "___stdinp") or
-            mem.eql(u8, sym_name, "___stack_chk_guard") or
-            mem.eql(u8, sym_name, "_environ") or
-            mem.eql(u8, sym_name, "__DefaultRuneLocale") or
-            mem.eql(u8, sym_name, "_mach_task_self_"))
-        {
-            log.debug("writing nonlazy symbol '{s}'", .{sym_name});
-            const index = @intCast(u32, self.nonlazy_imports.items().len);
-            try self.nonlazy_imports.putNoClobber(self.allocator, key, .{
-                .symbol = new_sym,
-                .dylib_ordinal = dylib_ordinal,
-                .index = index,
-            });
-        } else if (mem.eql(u8, sym_name, "__tlv_bootstrap")) {
-            log.debug("writing threadlocal symbol '{s}'", .{sym_name});
-            self.tlv_bootstrap = .{
-                .symbol = new_sym,
-                .dylib_ordinal = dylib_ordinal,
-                .index = 0,
-            };
-        } else {
-            log.debug("writing lazy symbol '{s}'", .{sym_name});
-            const index = @intCast(u32, self.lazy_imports.items().len);
-            try self.lazy_imports.putNoClobber(self.allocator, key, .{
-                .symbol = new_sym,
-                .dylib_ordinal = dylib_ordinal,
-                .index = index,
-            });
-        }
-    }
-
-    const n_strx = try self.makeString("dyld_stub_binder");
-    const name = try self.allocator.dupe(u8, "dyld_stub_binder");
-    log.debug("writing nonlazy symbol 'dyld_stub_binder'", .{});
-    const index = @intCast(u32, self.nonlazy_imports.items().len);
-    try self.nonlazy_imports.putNoClobber(self.allocator, name, .{
-        .symbol = .{
-            .n_strx = n_strx,
-            .n_type = std.macho.N_UNDF | std.macho.N_EXT,
-            .n_sect = 0,
-            .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER,
-            .n_value = 0,
-        },
-        .dylib_ordinal = 1,
-        .index = index,
-    });
-}
-
 fn allocateTextSegment(self: *Zld) !void {
     const seg = &self.load_commands.items[self.text_segment_cmd_index.?].Segment;
     const nexterns = @intCast(u32, self.lazy_imports.items().len);
@@ -1267,90 +1153,49 @@ fn writeStubInStubHelper(self: *Zld, index: u32) !void {
     try self.file.?.pwriteAll(code, stub_off);
 }
 
-fn resolveSymbols(self: *Zld) !void {
-    for (self.objects.items) |object, object_id| {
-        const seg = object.load_commands.items[object.segment_cmd_index.?].Segment;
-        log.debug("\n\n", .{});
-        log.debug("resolving symbols in {s}", .{object.name});
-
-        for (object.symtab.items) |sym| {
-            if (isImport(&sym)) continue;
-
-            const sym_name = object.getString(sym.n_strx);
-            const out_name = try self.allocator.dupe(u8, sym_name);
-            const locs = try self.locals.getOrPut(self.allocator, out_name);
-            defer {
-                if (locs.found_existing) self.allocator.free(out_name);
-            }
-
-            if (!locs.found_existing) {
-                locs.entry.value = .{};
-            }
-
-            const tt: Symbol.Type = blk: {
-                if (isLocal(&sym)) {
-                    break :blk .Local;
-                } else if (isWeakDef(&sym)) {
-                    break :blk .WeakGlobal;
-                } else {
-                    break :blk .Global;
-                }
+fn resolveSymbols(self: *Zld, object_id: u16) !void {
+    const object = self.objects.items[object_id];
+    log.warn("resolving symbols in '{s}'", .{object.name});
+
+    for (object.symtab.items) |sym, sym_id| {
+        if (sym.isLocal()) continue; // If symbol is local to CU, we don't put it in the global symbol table.
+
+        const sym_name = object.getString(sym.inner.n_strx);
+        if (sym.isGlobal()) {
+            const global = self.globals.getEntry(sym_name) orelse {
+                const name = try self.allocator.dupe(u8, sym_name);
+                try self.globals.putNoClobber(self.allocator, name, .{
+                    .inner = sym.inner,
+                    .file = object_id,
+                    .index = @intCast(u32, sym_id),
+                });
+                _ = self.undefs.swapRemove(sym_name);
+                continue;
             };
-            if (tt == .Global) {
-                for (locs.entry.value.items) |ss| {
-                    if (ss.tt == .Global) {
-                        log.debug("symbol already defined '{s}'", .{sym_name});
-                        continue;
-                        // log.err("symbol '{s}' defined multiple times: {}", .{ sym_name, sym });
-                        // return error.MultipleSymbolDefinitions;
-                    }
-                }
-            }
 
-            const source_sect_id = sym.n_sect - 1;
-            const target_mapping = self.mappings.get(.{
-                .object_id = @intCast(u16, object_id),
-                .source_sect_id = source_sect_id,
-            }) orelse {
-                if (self.unhandled_sections.get(.{
-                    .object_id = @intCast(u16, object_id),
-                    .source_sect_id = source_sect_id,
-                }) != null) continue;
+            if (sym.isWeakDef()) continue; // If symbol is weak, nothing to do.
+            if (!global.value.isWeakDef()) { // If both symbols are strong, we have a collision.
+                log.err("symbol '{s}' defined multiple times", .{sym_name});
+                return error.MultipleSymbolDefinitions;
+            }
 
-                log.err("section not mapped for symbol '{s}': {}", .{ sym_name, sym });
-                return error.SectionNotMappedForSymbol;
+            global.value = .{
+                .inner = sym.inner,
+                .file = object_id,
+                .index = @intCast(u32, sym_id),
             };
-            const source_sect = seg.sections.items[source_sect_id];
-            const target_seg = self.load_commands.items[target_mapping.target_seg_id].Segment;
-            const target_sect = target_seg.sections.items[target_mapping.target_sect_id];
-            const target_addr = target_sect.addr + target_mapping.offset;
-            const n_value = sym.n_value - source_sect.addr + target_addr;
-
-            log.debug("resolving '{s}':{} as {s} symbol at 0x{x}", .{ sym_name, sym, tt, n_value });
-
-            // TODO there might be a more generic way of doing this.
-            var n_sect: u16 = 0;
-            for (self.load_commands.items) |cmd, cmd_id| {
-                if (cmd != .Segment) break;
-                if (cmd_id == target_mapping.target_seg_id) {
-                    n_sect += target_mapping.target_sect_id + 1;
-                    break;
-                }
-                n_sect += @intCast(u16, cmd.Segment.sections.items.len);
-            }
+        } else if (sym.isUndef()) {
+            if (self.globals.contains(sym_name)) continue; // Nothing to do if we already found a definition.
+            if (self.undefs.contains(sym_name)) continue; // No need to reinsert the undef ref.
 
-            const n_strx = try self.makeString(sym_name);
-            try locs.entry.value.append(self.allocator, .{
-                .inner = .{
-                    .n_strx = n_strx,
-                    .n_value = n_value,
-                    .n_type = macho.N_SECT,
-                    .n_desc = sym.n_desc,
-                    .n_sect = @intCast(u8, n_sect),
-                },
-                .tt = tt,
-                .object_id = @intCast(u16, object_id),
+            const name = try self.allocator.dupe(u8, sym_name);
+            try self.undefs.putNoClobber(self.allocator, name, .{
+                .inner = sym.inner,
             });
+        } else {
+            // Oh no, unhandled symbol type, report back to the user.
+            log.err("unhandled symbol type for symbol {any}", .{sym});
+            return error.UnhandledSymbolType;
         }
     }
 }
@@ -3175,7 +3020,6 @@ fn writeCodeSignature(self: *Zld) !void {
     try code_sig.write(stream.writer());
 
     log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len });
-
     try self.file.?.pwriteAll(buffer, code_sig_cmd.dataoff);
 }
 
@@ -3261,34 +3105,19 @@ pub fn parseName(name: *const [16]u8) []const u8 {
     return name[0..len];
 }
 
-fn isLocal(sym: *const macho.nlist_64) callconv(.Inline) bool {
-    if (isExtern(sym)) return false;
-    const tt = macho.N_TYPE & sym.n_type;
-    return tt == macho.N_SECT;
-}
-
-fn isExport(sym: *const macho.nlist_64) callconv(.Inline) bool {
-    if (!isExtern(sym)) return false;
-    const tt = macho.N_TYPE & sym.n_type;
-    return tt == macho.N_SECT;
-}
-
-fn isImport(sym: *const macho.nlist_64) callconv(.Inline) bool {
-    if (!isExtern(sym)) return false;
-    const tt = macho.N_TYPE & sym.n_type;
-    return tt == macho.N_UNDF;
-}
-
-fn isExtern(sym: *const macho.nlist_64) callconv(.Inline) bool {
-    if ((sym.n_type & macho.N_EXT) == 0) return false;
-    return (sym.n_type & macho.N_PEXT) == 0;
-}
-
-fn isWeakDef(sym: *const macho.nlist_64) callconv(.Inline) bool {
-    return (sym.n_desc & macho.N_WEAK_DEF) != 0;
-}
-
 fn aarch64IsArithmetic(inst: *const [4]u8) callconv(.Inline) bool {
     const group_decode = @truncate(u5, inst[3]);
     return ((group_decode >> 2) == 4);
 }
+
+fn printSymtab(self: Zld) void {
+    log.warn("globals", .{});
+    for (self.globals.items()) |entry| {
+        log.warn("    | {s} => {any}", .{ entry.key, entry.value });
+    }
+
+    log.warn("undefs", .{});
+    for (self.undefs.items()) |entry| {
+        log.warn("    | {s} => {any}", .{ entry.key, entry.value });
+    }
+}
src/link/MachO.zig
@@ -658,7 +658,10 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
 
         if (use_zld) {
             var zld = Zld.init(self.base.allocator);
-            defer zld.deinit();
+            defer {
+                zld.closeFiles();
+                zld.deinit();
+            }
             zld.arch = target.cpu.arch;
 
             var input_files = std.ArrayList([]const u8).init(self.base.allocator);
CMakeLists.txt
@@ -569,6 +569,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/MachO/CodeSignature.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/DebugSymbols.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Object.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/Symbol.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Trie.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Zld.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"