Commit `0c171afab0`

Jakub Konka <kubkon@jakubkonka.com>

2024-01-10 19:39:40

macho: parse an input object file!

master

1 parent 7588eec

Changed files (8)

src

link

MachO

@@ -38,7 +38,7 @@ unwind_records: Loc = .{},
 flags: Flags = .{},
 
 pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 {
-    return macho_file.string_intern.getAssumeExists(self.name);
+    return macho_file.strings.getAssumeExists(self.name);
 }
 
 pub fn getFile(self: Atom, macho_file: *MachO) File {

@@ -431,7 +431,7 @@ pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void {
 
     for (self.exports.items(.name)) |noff| {
         const name = self.getString(noff);
-        const off = try macho_file.string_intern.insert(gpa, name);
+        const off = try macho_file.strings.insert(gpa, name);
         const gop = try macho_file.getOrCreateGlobal(off);
         self.symbols.addOneAssumeCapacity().* = gop.index;
     }

@@ -155,10 +155,10 @@ pub const Fde = struct {
         const pc_begin = std.mem.readInt(i64, data[8..][0..8], .little);
         const taddr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + 8)) + pc_begin);
         fde.atom = object.findAtom(taddr) orelse {
-            macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid function reference in FDE", .{
-                object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + 8,
+            try macho_file.reportParseError2(object.index, "{s},{s}: 0x{x}: invalid function reference in FDE", .{
+                sect.segName(), sect.sectName(), fde.offset + 8,
             });
-            return error.ParseFailed;
+            return error.MalformedObject;
         };
         const atom = fde.getAtom(macho_file);
         fde.atom_offset = @intCast(taddr - atom.getInputAddress(macho_file));
@@ -172,11 +172,10 @@ pub const Fde = struct {
         if (cie_index) |cie| {
             fde.cie = cie;
         } else {
-            macho_file.base.fatal("{}: no matching CIE found for FDE at offset {x}", .{
-                object.fmtPath(),
+            try macho_file.reportParseError2(object.index, "no matching CIE found for FDE at offset {x}", .{
                 fde.offset,
             });
-            return error.ParseFailed;
+            return error.MalformedObject;
         }
 
         const cie = fde.getCie(macho_file);
@@ -194,10 +193,10 @@ pub const Fde = struct {
             };
             const lsda_addr: u64 = @intCast(@as(i64, @intCast(sect.addr + fde.offset + fde.lsda_ptr_offset)) + lsda_ptr);
             fde.lsda = object.findAtom(lsda_addr) orelse {
-                macho_file.base.fatal("{}: {s},{s}: 0x{x}: invalid LSDA reference in FDE", .{
-                    object.fmtPath(), sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset,
+                try macho_file.reportParseError2(object.index, "{s},{s}: 0x{x}: invalid LSDA reference in FDE", .{
+                    sect.segName(), sect.sectName(), fde.offset + fde.lsda_ptr_offset,
                 });
-                return error.ParseFailed;
+                return error.MalformedObject;
             };
             const lsda_atom = fde.getLsdaAtom(macho_file).?;
             fde.lsda_offset = @intCast(lsda_addr - lsda_atom.getInputAddress(macho_file));

@@ -31,6 +31,14 @@ num_weak_bind_relocs: u32 = 0,
 
 output_symtab_ctx: MachO.SymtabCtx = .{},
 
+pub fn isObject(path: []const u8) !bool {
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
+    const reader = file.reader();
+    const header = reader.readStruct(macho.mach_header_64) catch return false;
+    return header.filetype == macho.MH_OBJECT;
+}
+
 pub fn deinit(self: *Object, allocator: Allocator) void {
     for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| {
         relocs.deinit(allocator);
@@ -55,12 +63,25 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     var stream = std.io.fixedBufferStream(self.data);
     const reader = stream.reader();
 
     self.header = try reader.readStruct(macho.mach_header_64);
 
+    const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => |x| {
+            try macho_file.reportParseError2(self.index, "unknown cpu architecture: {d}", .{x});
+            return error.InvalidCpuArch;
+        },
+    };
+    if (macho_file.getTarget().cpu.arch != this_cpu_arch) {
+        try macho_file.reportParseError2(self.index, "invalid cpu architecture: {s}", .{@tagName(this_cpu_arch)});
+        return error.InvalidCpuArch;
+    }
+
     if (self.getLoadCommand(.SEGMENT_64)) |lc| {
         const sections = lc.getSections();
         try self.sections.ensureUnusedCapacity(gpa, sections.len);
@@ -146,6 +167,20 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
     }
 
     self.initPlatform();
+
+    if (self.platform) |platform| {
+        if (!macho_file.platform.eqlTarget(platform)) {
+            try macho_file.reportParseError2(self.index, "invalid platform: {}", .{
+                platform.fmtTarget(macho_file.getTarget().cpu.arch),
+            });
+            return error.InvalidTarget;
+        }
+        if (macho_file.platform.version.order(platform.version) != .lt) {
+            try macho_file.reportParseError2(self.index, "object file built for newer platform: {}", .{platform});
+            return error.InvalidTarget;
+        }
+    }
+
     try self.initDwarfInfo(macho_file);
 
     for (self.atoms.items) |atom_index| {
@@ -175,7 +210,7 @@ inline fn isLiteral(sect: macho.section_64) bool {
 fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const slice = self.sections.slice();
     for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| {
         if (isLiteral(sect)) continue;
@@ -243,7 +278,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
 fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const slice = self.sections.slice();
 
     try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);
@@ -299,12 +334,12 @@ const AddAtomArgs = struct {
 };
 
 fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index {
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const atom_index = try macho_file.addAtom();
     const atom = macho_file.getAtom(atom_index).?;
     atom.file = self.index;
     atom.atom_index = atom_index;
-    atom.name = try macho_file.string_intern.insert(gpa, args.name);
+    atom.name = try macho_file.strings.insert(gpa, args.name);
     atom.n_sect = args.n_sect;
     atom.size = args.size;
     atom.alignment = args.alignment;
@@ -319,7 +354,7 @@ fn initLiteralSections(self: *Object, macho_file: *MachO) !void {
     // TODO here we should split into equal-sized records, hash the contents, and then
     // deduplicate - ICF.
     // For now, we simply cover each literal section with one large atom.
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const slice = self.sections.slice();
 
     try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len);
@@ -401,10 +436,10 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void {
             if (self.findAtomInSection(nlist.n_value, nlist.n_sect - 1)) |atom_index| {
                 atom.* = atom_index;
             } else {
-                macho_file.base.fatal("{}: symbol {s} not attached to any (sub)section", .{
-                    self.fmtPath(), self.getString(nlist.n_strx),
+                try macho_file.reportParseError2(self.index, "symbol {s} not attached to any (sub)section", .{
+                    self.getString(nlist.n_strx),
                 });
-                return error.ParseFailed;
+                return error.MalformedObject;
             }
         }
     }
@@ -413,7 +448,7 @@ fn linkNlistToAtom(self: *Object, macho_file: *MachO) !void {
 fn initSymbols(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const slice = self.symtab.slice();
 
     try self.symbols.ensureUnusedCapacity(gpa, slice.items(.nlist).len);
@@ -421,7 +456,7 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void {
     for (slice.items(.nlist), slice.items(.atom), 0..) |nlist, atom_index, i| {
         if (nlist.ext()) {
             const name = self.getString(nlist.n_strx);
-            const off = try macho_file.string_intern.insert(gpa, name);
+            const off = try macho_file.strings.insert(gpa, name);
             const gop = try macho_file.getOrCreateGlobal(off);
             self.symbols.addOneAssumeCapacity().* = gop.index;
             continue;
@@ -433,7 +468,7 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void {
         const name = self.getString(nlist.n_strx);
         symbol.* = .{
             .value = nlist.n_value,
-            .name = try macho_file.string_intern.insert(gpa, name),
+            .name = try macho_file.strings.insert(gpa, name),
             .nlist_idx = @intCast(i),
             .atom = 0,
             .file = self.index,
@@ -482,7 +517,7 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void {
 
     if (start == end) return;
 
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const syms = self.symtab.items(.nlist);
     const sym_lookup = SymbolLookup{ .ctx = self, .entries = nlists };
 
@@ -490,11 +525,10 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void {
     while (i < end) : (i += 1) {
         const open = syms[i];
         if (open.n_type != macho.N_SO) {
-            macho_file.base.fatal("{}: unexpected symbol stab type 0x{x} as the first entry", .{
-                self.fmtPath(),
+            try macho_file.reportParseError2(self.index, "unexpected symbol stab type 0x{x} as the first entry", .{
                 open.n_type,
             });
-            return error.ParseFailed;
+            return error.MalformedObject;
         }
 
         while (i < end and syms[i].n_type == macho.N_SO and syms[i].n_sect != 0) : (i += 1) {}
@@ -522,11 +556,10 @@ fn initSymbolStabs(self: *Object, nlists: anytype, macho_file: *MachO) !void {
                     stab.symbol = sym_lookup.find(nlist.n_value);
                 },
                 else => {
-                    macho_file.base.fatal("{}: unhandled symbol stab type 0x{x}", .{
-                        self.fmtPath(),
+                    try macho_file.reportParseError2(self.index, "unhandled symbol stab type 0x{x}", .{
                         nlist.n_type,
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 },
             }
             try sf.stabs.append(gpa, stab);
@@ -548,7 +581,7 @@ fn sortAtoms(self: *Object, macho_file: *MachO) !void {
 fn initRelocs(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const cpu_arch = macho_file.options.cpu_arch.?;
+    const cpu_arch = macho_file.getTarget().cpu.arch;
     const slice = self.sections.slice();
 
     for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| {
@@ -589,7 +622,7 @@ fn initRelocs(self: *Object, macho_file: *MachO) !void {
 fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const nlists = self.symtab.items(.nlist);
     const slice = self.sections.slice();
     const sect = slice.items(.header)[sect_id];
@@ -667,10 +700,10 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
                 const cie = for (self.cies.items) |*cie| {
                     if (cie.offset <= rel.offset and rel.offset < cie.offset + cie.getSize()) break cie;
                 } else {
-                    macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                        self.fmtPath(), sect.segName(), sect.sectName(), rel.offset,
+                    try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                        sect.segName(), sect.sectName(), rel.offset,
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 };
                 cie.personality = .{ .index = @intCast(rel.target), .offset = rel.offset - cie.offset };
             },
@@ -695,7 +728,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
         }
     };
 
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     const data = self.getSectionData(sect_id);
     const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
     const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
@@ -722,10 +755,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
 
         for (relocs[reloc_start..reloc_idx]) |rel| {
             if (rel.type != .unsigned or rel.meta.length != 3) {
-                macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                    self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+                try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                    header.segName(), header.sectName(), rel.offset,
                 });
-                return error.ParseFailed;
+                return error.MalformedObject;
             }
             assert(rel.type == .unsigned and rel.meta.length == 3); // TODO error
             const offset = rel.offset - rec_start;
@@ -740,10 +773,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
                         const atom = out.getAtom(macho_file);
                         out.atom_offset = @intCast(rec.rangeStart - atom.getInputAddress(macho_file));
                     } else {
-                        macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                            self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+                        try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                            header.segName(), header.sectName(), rel.offset,
                         });
-                        return error.ParseFailed;
+                        return error.MalformedObject;
                     },
                 },
                 16 => switch (rel.tag) { // personality function
@@ -753,10 +786,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
                     .local => if (sym_lookup.find(rec.personalityFunction)) |sym_index| {
                         out.personality = sym_index;
                     } else {
-                        macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                            self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+                        try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                            header.segName(), header.sectName(), rel.offset,
                         });
-                        return error.ParseFailed;
+                        return error.MalformedObject;
                     },
                 },
                 24 => switch (rel.tag) { // lsda
@@ -769,10 +802,10 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
                         const atom = out.getLsdaAtom(macho_file).?;
                         out.lsda_offset = @intCast(rec.lsda - atom.getInputAddress(macho_file));
                     } else {
-                        macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                            self.fmtPath(), header.segName(), header.sectName(), rel.offset,
+                        try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                            header.segName(), header.sectName(), rel.offset,
                         });
-                        return error.ParseFailed;
+                        return error.MalformedObject;
                     },
                 },
                 else => {},
@@ -780,7 +813,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
         }
     }
 
-    if (!macho_file.options.relocatable) try self.synthesiseNullUnwindRecords(macho_file);
+    if (!macho_file.base.isObject()) try self.synthesiseNullUnwindRecords(macho_file);
 
     const sortFn = struct {
         fn sortFn(ctx: *MachO, lhs_index: UnwindInfo.Record.Index, rhs_index: UnwindInfo.Record.Index) bool {
@@ -818,7 +851,7 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void {
 
     const Superposition = struct { atom: Atom.Index, size: u64, cu: ?UnwindInfo.Record.Index = null, fde: ?Fde.Index = null };
 
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
     var superposition = std.AutoArrayHashMap(u64, Superposition).init(gpa);
     defer superposition.deinit();
 
@@ -875,7 +908,7 @@ fn synthesiseNullUnwindRecords(self: *Object, macho_file: *MachO) !void {
                 rec.atom_offset = fde.atom_offset;
                 rec.fde = fde_index;
                 rec.file = fde.file;
-                switch (macho_file.options.cpu_arch.?) {
+                switch (macho_file.getTarget().cpu.arch) {
                     .x86_64 => rec.enc.setMode(macho.UNWIND_X86_64_MODE.DWARF),
                     .aarch64 => rec.enc.setMode(macho.UNWIND_ARM64_MODE.DWARF),
                     else => unreachable,
@@ -907,7 +940,7 @@ fn initPlatform(self: *Object) void {
             .VERSION_MIN_IPHONEOS,
             .VERSION_MIN_TVOS,
             .VERSION_MIN_WATCHOS,
-            => break MachO.Options.Platform.fromLoadCommand(cmd),
+            => break MachO.Platform.fromLoadCommand(cmd),
             else => {},
         }
     } else null;
@@ -921,7 +954,7 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
 
     var debug_info_index: ?usize = null;
     var debug_abbrev_index: ?usize = null;
@@ -942,8 +975,8 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void {
         .debug_str = if (debug_str_index) |index| self.getSectionData(@intCast(index)) else "",
     };
     dwarf_info.init(gpa) catch {
-        macho_file.base.fatal("{}: invalid __DWARF info found", .{self.fmtPath()});
-        return error.ParseFailed;
+        try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{});
+        return error.MalformedObject;
     };
     self.dwarf_info = dwarf_info;
 }
@@ -1060,7 +1093,7 @@ pub fn scanRelocs(self: Object, macho_file: *MachO) !void {
 pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
-    const gpa = macho_file.base.allocator;
+    const gpa = macho_file.base.comp.gpa;
 
     for (self.symbols.items, 0..) |index, i| {
         const sym = macho_file.getSymbol(index);
@@ -1079,7 +1112,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void {
         defer gpa.free(name);
         const atom = macho_file.getAtom(atom_index).?;
         atom.atom_index = atom_index;
-        atom.name = try macho_file.string_intern.insert(gpa, name);
+        atom.name = try macho_file.strings.insert(gpa, name);
         atom.file = self.index;
         atom.size = nlist.n_value;
         atom.alignment = (nlist.n_desc >> 8) & 0x0f;
@@ -1130,7 +1163,7 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void {
         const name = sym.getName(macho_file);
         // TODO in -r mode, we actually want to merge symbol names and emit only one
         // work it out when emitting relocs
-        if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.options.relocatable) continue;
+        if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.base.isObject()) continue;
         sym.flags.output_symtab = true;
         if (sym.isLocal()) {
             try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file);
@@ -1171,7 +1204,7 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) void {
             const file = sym.getFile(macho_file) orelse continue;
             if (file.getIndex() != self.index) continue;
             if (!sym.flags.output_symtab) continue;
-            if (macho_file.options.relocatable) {
+            if (macho_file.base.isObject()) {
                 const name = sym.getName(macho_file);
                 if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue;
             }
@@ -1329,7 +1362,7 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) void {
             const file = sym.getFile(macho_file) orelse continue;
             if (file.getIndex() != self.index) continue;
             if (!sym.flags.output_symtab) continue;
-            if (macho_file.options.relocatable) {
+            if (macho_file.base.isObject()) {
                 const name = sym.getName(macho_file);
                 if (name.len > 0 and (name[0] == 'L' or name[0] == 'l')) continue;
             }
@@ -1747,7 +1780,7 @@ const x86_64 = struct {
         out: *std.ArrayListUnmanaged(Relocation),
         macho_file: *MachO,
     ) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
 
         const relocs = @as(
             [*]align(1) const macho.relocation_info,
@@ -1783,10 +1816,10 @@ const x86_64 = struct {
                 else
                     addend;
                 const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse {
-                    macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                        self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address,
+                    try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                        sect.segName(), sect.sectName(), rel.r_address,
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 };
                 addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file)));
                 break :blk target;
@@ -1796,34 +1829,38 @@ const x86_64 = struct {
                 @as(macho.reloc_type_x86_64, @enumFromInt(relocs[i - 1].r_type)) == .X86_64_RELOC_SUBTRACTOR)
             blk: {
                 if (rel_type != .X86_64_RELOC_UNSIGNED) {
-                    macho_file.base.fatal("{}: {s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{
-                        self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
+                    try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: X86_64_RELOC_SUBTRACTOR followed by {s}", .{
+                        sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 }
                 break :blk true;
             } else false;
 
             const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| {
                 switch (err) {
-                    error.Pcrel => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: PC-relative {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.Pcrel => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: PC-relative {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
-                    error.NonPcrel => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.NonPcrel => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: non-PC-relative {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
-                    error.InvalidLength => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
+                    error.InvalidLength => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
                     ),
-                    error.NonExtern => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.NonExtern => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: non-extern target in {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
                 }
-                return error.ParseFailed;
+                return error.MalformedObject;
             };
 
             out.appendAssumeCapacity(.{
@@ -1899,7 +1936,7 @@ const aarch64 = struct {
         out: *std.ArrayListUnmanaged(Relocation),
         macho_file: *MachO,
     ) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
 
         const relocs = @as(
             [*]align(1) const macho.relocation_info,
@@ -1921,20 +1958,21 @@ const aarch64 = struct {
                     addend = rel.r_symbolnum;
                     i += 1;
                     if (i >= relocs.len) {
-                        macho_file.base.fatal("{}: {s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{
-                            self.fmtPath(), sect.segName(), sect.sectName(), rel_offset,
+                        try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: unterminated ARM64_RELOC_ADDEND", .{
+                            sect.segName(), sect.sectName(), rel_offset,
                         });
-                        return error.ParseFailed;
+                        return error.MalformedObject;
                     }
                     rel = relocs[i];
                     switch (@as(macho.reloc_type_arm64, @enumFromInt(rel.r_type))) {
                         .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {},
                         else => |x| {
-                            macho_file.base.fatal(
-                                "{}: {s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}",
-                                .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(x) },
+                            try macho_file.reportParseError2(
+                                self.index,
+                                "{s},{s}: 0x{x}: ARM64_RELOC_ADDEND followed by {s}",
+                                .{ sect.segName(), sect.sectName(), rel_offset, @tagName(x) },
                             );
-                            return error.ParseFailed;
+                            return error.MalformedObject;
                         },
                     }
                 },
@@ -1958,10 +1996,10 @@ const aarch64 = struct {
                 else
                     addend;
                 const target = self.findAtomInSection(@intCast(taddr), @intCast(nsect)) orelse {
-                    macho_file.base.fatal("{}: {s},{s}: 0x{x}: bad relocation", .{
-                        self.fmtPath(), sect.segName(), sect.sectName(), rel.r_address,
+                    try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: bad relocation", .{
+                        sect.segName(), sect.sectName(), rel.r_address,
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 };
                 addend = taddr - @as(i64, @intCast(macho_file.getAtom(target).?.getInputAddress(macho_file)));
                 break :blk target;
@@ -1971,34 +2009,38 @@ const aarch64 = struct {
                 @as(macho.reloc_type_arm64, @enumFromInt(relocs[i - 1].r_type)) == .ARM64_RELOC_SUBTRACTOR)
             blk: {
                 if (rel_type != .ARM64_RELOC_UNSIGNED) {
-                    macho_file.base.fatal("{}: {s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{
-                        self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
+                    try macho_file.reportParseError2(self.index, "{s},{s}: 0x{x}: ARM64_RELOC_SUBTRACTOR followed by {s}", .{
+                        sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type),
                     });
-                    return error.ParseFailed;
+                    return error.MalformedObject;
                 }
                 break :blk true;
             } else false;
 
             const @"type": Relocation.Type = validateRelocType(rel, rel_type) catch |err| {
                 switch (err) {
-                    error.Pcrel => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: PC-relative {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.Pcrel => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: PC-relative {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
-                    error.NonPcrel => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: non-PC-relative {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.NonPcrel => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: non-PC-relative {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
-                    error.InvalidLength => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
+                    error.InvalidLength => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: invalid length of {d} in {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @as(u8, 1) << rel.r_length, @tagName(rel_type) },
                     ),
-                    error.NonExtern => macho_file.base.fatal(
-                        "{}: {s},{s}: 0x{x}: non-extern target in {s} relocation",
-                        .{ self.fmtPath(), sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
+                    error.NonExtern => try macho_file.reportParseError2(
+                        self.index,
+                        "{s},{s}: 0x{x}: non-extern target in {s} relocation",
+                        .{ sect.segName(), sect.sectName(), rel_offset, @tagName(rel_type) },
                     ),
                 }
-                return error.ParseFailed;
+                return error.MalformedObject;
             };
 
             out.appendAssumeCapacity(.{

@@ -55,7 +55,7 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool {
 }
 
 pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 {
-    return macho_file.string_intern.getAssumeExists(symbol.name);
+    return macho_file.strings.getAssumeExists(symbol.name);
 }
 
 pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom {

@@ -8,7 +8,7 @@ pub const GotSection = struct {
     }
 
     pub fn addSymbol(got: *GotSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const index = @as(Index, @intCast(got.symbols.items.len));
         const entry = try got.symbols.addOne(gpa);
         entry.* = sym_index;
@@ -29,7 +29,7 @@ pub const GotSection = struct {
     pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void {
         const tracy = trace(@src());
         defer tracy.end();
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?];
         const seg = macho_file.segments.items[seg_id];
 
@@ -111,7 +111,7 @@ pub const StubsSection = struct {
     }
 
     pub fn addSymbol(stubs: *StubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const index = @as(Index, @intCast(stubs.symbols.items.len));
         const entry = try stubs.symbols.addOne(gpa);
         entry.* = sym_index;
@@ -133,7 +133,7 @@ pub const StubsSection = struct {
     pub fn write(stubs: StubsSection, macho_file: *MachO, writer: anytype) !void {
         const tracy = trace(@src());
         defer tracy.end();
-        const cpu_arch = macho_file.options.cpu_arch.?;
+        const cpu_arch = macho_file.getTarget().cpu.arch;
         const laptr_sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?];
 
         for (stubs.symbols.items, 0..) |sym_index, idx| {
@@ -213,7 +213,7 @@ pub const StubsHelperSection = struct {
         const tracy = trace(@src());
         defer tracy.end();
         _ = stubs_helper;
-        const cpu_arch = macho_file.options.cpu_arch.?;
+        const cpu_arch = macho_file.getTarget().cpu.arch;
         var s: usize = preambleSize(cpu_arch);
         for (macho_file.stubs.symbols.items) |sym_index| {
             const sym = macho_file.getSymbol(sym_index);
@@ -230,7 +230,7 @@ pub const StubsHelperSection = struct {
 
         try stubs_helper.writePreamble(macho_file, writer);
 
-        const cpu_arch = macho_file.options.cpu_arch.?;
+        const cpu_arch = macho_file.getTarget().cpu.arch;
         const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
         const preamble_size = preambleSize(cpu_arch);
         const entry_size = entrySize(cpu_arch);
@@ -272,7 +272,7 @@ pub const StubsHelperSection = struct {
 
     fn writePreamble(stubs_helper: StubsHelperSection, macho_file: *MachO, writer: anytype) !void {
         _ = stubs_helper;
-        const cpu_arch = macho_file.options.cpu_arch.?;
+        const cpu_arch = macho_file.getTarget().cpu.arch;
         const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
         const dyld_private_addr = target: {
             const sym = macho_file.getSymbol(macho_file.dyld_private_index.?);
@@ -331,7 +331,7 @@ pub const LaSymbolPtrSection = struct {
         const tracy = trace(@src());
         defer tracy.end();
         _ = laptr;
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
 
         const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?];
         const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?];
@@ -371,7 +371,7 @@ pub const LaSymbolPtrSection = struct {
         const tracy = trace(@src());
         defer tracy.end();
         _ = laptr;
-        const cpu_arch = macho_file.options.cpu_arch.?;
+        const cpu_arch = macho_file.getTarget().cpu.arch;
         const sect = macho_file.sections.items(.header)[macho_file.stubs_helper_sect_index.?];
         for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| {
             const sym = macho_file.getSymbol(sym_index);
@@ -397,7 +397,7 @@ pub const TlvPtrSection = struct {
     }
 
     pub fn addSymbol(tlv: *TlvPtrSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const index = @as(Index, @intCast(tlv.symbols.items.len));
         const entry = try tlv.symbols.addOne(gpa);
         entry.* = sym_index;
@@ -418,7 +418,7 @@ pub const TlvPtrSection = struct {
     pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void {
         const tracy = trace(@src());
         defer tracy.end();
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?];
         const seg = macho_file.segments.items[seg_id];
 
@@ -510,7 +510,7 @@ pub const ObjcStubsSection = struct {
     }
 
     pub fn addSymbol(objc: *ObjcStubsSection, sym_index: Symbol.Index, macho_file: *MachO) !void {
-        const gpa = macho_file.base.allocator;
+        const gpa = macho_file.base.comp.gpa;
         const index = @as(Index, @intCast(objc.symbols.items.len));
         const entry = try objc.symbols.addOne(gpa);
         entry.* = sym_index;
@@ -521,11 +521,11 @@ pub const ObjcStubsSection = struct {
     pub fn getAddress(objc: ObjcStubsSection, index: Index, macho_file: *MachO) u64 {
         assert(index < objc.symbols.items.len);
         const header = macho_file.sections.items(.header)[macho_file.objc_stubs_sect_index.?];
-        return header.addr + index * entrySize(macho_file.options.cpu_arch.?);
+        return header.addr + index * entrySize(macho_file.getTarget().cpu.arch);
     }
 
     pub fn size(objc: ObjcStubsSection, macho_file: *MachO) usize {
-        return objc.symbols.items.len * entrySize(macho_file.options.cpu_arch.?);
+        return objc.symbols.items.len * entrySize(macho_file.getTarget().cpu.arch);
     }
 
     pub fn write(objc: ObjcStubsSection, macho_file: *MachO, writer: anytype) !void {
@@ -535,7 +535,7 @@ pub const ObjcStubsSection = struct {
         for (objc.symbols.items, 0..) |sym_index, idx| {
             const sym = macho_file.getSymbol(sym_index);
             const addr = objc.getAddress(@intCast(idx), macho_file);
-            switch (macho_file.options.cpu_arch.?) {
+            switch (macho_file.getTarget().cpu.arch) {
                 .x86_64 => {
                     try writer.writeAll(&.{ 0x48, 0x8b, 0x35 });
                     {
@@ -654,12 +654,12 @@ pub const WeakBindSection = bind.WeakBind;
 pub const LazyBindSection = bind.LazyBind;
 pub const ExportTrieSection = Trie;
 
-const aarch64 = @import("../aarch64.zig");
+const aarch64 = @import("../../arch/aarch64/bits.zig");
 const assert = std.debug.assert;
 const bind = @import("dyld_info/bind.zig");
 const math = std.math;
 const std = @import("std");
-const trace = @import("../tracy.zig").trace;
+const trace = @import("../../tracy.zig").trace;
 
 const Allocator = std.mem.Allocator;
 const MachO = @import("../MachO.zig");

@@ -372,7 +372,7 @@ pub const Encoding = extern struct {
 
     pub fn isDwarf(enc: Encoding, macho_file: *MachO) bool {
         const mode = enc.getMode();
-        return switch (macho_file.options.cpu_arch.?) {
+        return switch (macho_file.getTarget().cpu.arch) {
             .aarch64 => @as(macho.UNWIND_ARM64_MODE, @enumFromInt(mode)) == .DWARF,
             .x86_64 => @as(macho.UNWIND_X86_64_MODE, @enumFromInt(mode)) == .DWARF,
             else => unreachable,

@@ -1,4 +1,4 @@
-base: File,
+base: link.File,
 
 /// If this is not null, an object file is created by LLVM and emitted to zcu_object_sub_path.
 llvm_object: ?*LlvmObject = null,
@@ -6,6 +6,27 @@ llvm_object: ?*LlvmObject = null,
 /// Debug symbols bundle (or dSym).
 d_sym: ?DebugSymbols = null,
 
+/// A list of all input files.
+/// Index of each input file also encodes the priority or precedence of one input file
+/// over another.
+files: std.MultiArrayList(File.Entry) = .{},
+internal_object: ?File.Index = null,
+objects: std.ArrayListUnmanaged(File.Index) = .{},
+dylibs: std.ArrayListUnmanaged(File.Index) = .{},
+
+segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{},
+sections: std.MultiArrayList(Section) = .{},
+
+symbols: std.ArrayListUnmanaged(Symbol) = .{},
+symbols_extra: std.ArrayListUnmanaged(u32) = .{},
+globals: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{},
+/// This table will be populated after `scanRelocs` has run.
+/// Key is symbol index.
+undefs: std.AutoHashMapUnmanaged(Symbol.Index, std.ArrayListUnmanaged(Atom.Index)) = .{},
+/// Global symbols we need to resolve for the link to succeed.
+undefined_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+boundary_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+
 dyld_info_cmd: macho.dyld_info_command = .{},
 symtab_cmd: macho.symtab_command = .{},
 dysymtab_cmd: macho.dysymtab_command = .{},
@@ -14,36 +35,46 @@ data_in_code_cmd: macho.linkedit_data_command = .{ .cmd = .DATA_IN_CODE },
 uuid_cmd: macho.uuid_command = .{ .uuid = [_]u8{0} ** 16 },
 codesig_cmd: macho.linkedit_data_command = .{ .cmd = .CODE_SIGNATURE },
 
-segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{},
-sections: std.MultiArrayList(Section) = .{},
-
-pagezero_segment_cmd_index: ?u8 = null,
-header_segment_cmd_index: ?u8 = null,
-text_segment_cmd_index: ?u8 = null,
-data_const_segment_cmd_index: ?u8 = null,
-data_segment_cmd_index: ?u8 = null,
-linkedit_segment_cmd_index: ?u8 = null,
-
-text_section_index: ?u8 = null,
-data_const_section_index: ?u8 = null,
-data_section_index: ?u8 = null,
-bss_section_index: ?u8 = null,
-thread_vars_section_index: ?u8 = null,
-thread_data_section_index: ?u8 = null,
-thread_bss_section_index: ?u8 = null,
-eh_frame_section_index: ?u8 = null,
-unwind_info_section_index: ?u8 = null,
-stubs_section_index: ?u8 = null,
-stub_helper_section_index: ?u8 = null,
-got_section_index: ?u8 = null,
-la_symbol_ptr_section_index: ?u8 = null,
-tlv_ptr_section_index: ?u8 = null,
-
-strtab: StringTable = .{},
+pagezero_seg_index: ?u8 = null,
+text_seg_index: ?u8 = null,
+linkedit_seg_index: ?u8 = null,
+data_sect_index: ?u8 = null,
+got_sect_index: ?u8 = null,
+stubs_sect_index: ?u8 = null,
+stubs_helper_sect_index: ?u8 = null,
+la_symbol_ptr_sect_index: ?u8 = null,
+tlv_ptr_sect_index: ?u8 = null,
+eh_frame_sect_index: ?u8 = null,
+unwind_info_sect_index: ?u8 = null,
+objc_stubs_sect_index: ?u8 = null,
 
 /// List of atoms that are either synthetic or map directly to the Zig source program.
 atoms: std.ArrayListUnmanaged(Atom) = .{},
-
+thunks: std.ArrayListUnmanaged(Thunk) = .{},
+unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record) = .{},
+
+/// String interning table
+strings: StringTable = .{},
+
+/// Output synthetic sections
+symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
+indsymtab: Indsymtab = .{},
+got: GotSection = .{},
+stubs: StubsSection = .{},
+stubs_helper: StubsHelperSection = .{},
+objc_stubs: ObjcStubsSection = .{},
+la_symbol_ptr: LaSymbolPtrSection = .{},
+tlv_ptr: TlvPtrSection = .{},
+rebase: RebaseSection = .{},
+bind: BindSection = .{},
+weak_bind: WeakBindSection = .{},
+lazy_bind: LazyBindSection = .{},
+export_trie: ExportTrieSection = .{},
+unwind_info: UnwindInfo = .{},
+
+/// Options
+/// SDK layout
 sdk_layout: ?SdkLayout,
 /// Size of the __PAGEZERO segment.
 pagezero_vmsize: ?u64,
@@ -62,6 +93,8 @@ entitlements: ?[]const u8,
 compatibility_version: ?std.SemanticVersion,
 /// Entry name
 entry_name: ?[]const u8,
+platform: Platform,
+sdk_version: ?std.SemanticVersion,
 
 /// Hot-code swapping state.
 hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{},
@@ -144,6 +177,8 @@ pub fn createEmpty(
             .enabled => default_entry_symbol_name,
             .named => |name| name,
         },
+        .platform = Platform.fromTarget(target),
+        .sdk_version = if (options.darwin_sdk_layout) |layout| inferSdkVersion(comp, layout) else null,
     };
     if (use_llvm and comp.config.have_zcu) {
         self.llvm_object = try LlvmObject.create(arena, comp);
@@ -156,9 +191,16 @@ pub fn createEmpty(
         .mode = link.File.determineMode(false, output_mode, link_mode),
     });
 
-    // Index 0 is always a null symbol.
-    // try self.locals.append(gpa, null_sym);
-    try self.strtab.buffer.append(gpa, 0);
+    // Append null file
+    try self.files.append(gpa, .null);
+    // Atom at index 0 is reserved as null atom
+    try self.atoms.append(gpa, .{});
+    // Append empty string to string tables
+    try self.strings.buffer.append(gpa, 0);
+    try self.strtab.append(gpa, 0);
+    // Append null symbols
+    try self.symbols.append(gpa, .{});
+    try self.symbols_extra.append(gpa, 0);
 
     // TODO: init
 
@@ -208,8 +250,71 @@ pub fn open(
     return createEmpty(arena, comp, emit, options);
 }
 
+pub fn deinit(self: *MachO) void {
+    const gpa = self.base.comp.gpa;
+
+    if (self.llvm_object) |llvm_object| llvm_object.deinit();
+
+    if (self.d_sym) |*d_sym| {
+        d_sym.deinit();
+    }
+
+    for (self.files.items(.tags), self.files.items(.data)) |tag, *data| switch (tag) {
+        .null => {},
+        .internal => data.internal.deinit(gpa),
+        .object => data.object.deinit(gpa),
+        .dylib => data.dylib.deinit(gpa),
+    };
+    self.files.deinit(gpa);
+    self.objects.deinit(gpa);
+    self.dylibs.deinit(gpa);
+
+    self.segments.deinit(gpa);
+    for (self.sections.items(.atoms)) |*list| {
+        list.deinit(gpa);
+    }
+    self.sections.deinit(gpa);
+
+    self.symbols.deinit(gpa);
+    self.symbols_extra.deinit(gpa);
+    self.globals.deinit(gpa);
+    {
+        var it = self.undefs.iterator();
+        while (it.next()) |entry| {
+            entry.value_ptr.deinit(gpa);
+        }
+        self.undefs.deinit(gpa);
+    }
+    self.undefined_symbols.deinit(gpa);
+    self.boundary_symbols.deinit(gpa);
+
+    self.strings.deinit(gpa);
+    self.symtab.deinit(gpa);
+    self.strtab.deinit(gpa);
+    self.got.deinit(gpa);
+    self.stubs.deinit(gpa);
+    self.objc_stubs.deinit(gpa);
+    self.tlv_ptr.deinit(gpa);
+    self.rebase.deinit(gpa);
+    self.bind.deinit(gpa);
+    self.weak_bind.deinit(gpa);
+    self.lazy_bind.deinit(gpa);
+    self.export_trie.deinit(gpa);
+    self.unwind_info.deinit(gpa);
+
+    self.atoms.deinit(gpa);
+    for (self.thunks.items) |*thunk| {
+        thunk.deinit(gpa);
+    }
+    self.thunks.deinit(gpa);
+    self.unwind_records.deinit(gpa);
+}
+
 pub fn flush(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node) link.File.FlushError!void {
-    // TODO: what else should we do in flush? Is it actually needed at all?
+    // TODO: I think this is just a temp and can be removed once we can emit static archives
+    if (self.base.isStaticLib() and build_options.have_llvm) {
+        return self.base.linkAsArchive(arena, prog_node);
+    }
     try self.flushModule(arena, prog_node);
 }
 
@@ -219,10 +324,11 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
 
     const comp = self.base.comp;
     const gpa = comp.gpa;
-    _ = gpa;
 
     if (self.llvm_object) |llvm_object| {
         try self.base.emitLlvmObject(arena, llvm_object, prog_node);
+        // TODO: I think this is just a temp and can be removed once we can emit static archives
+        if (self.base.isStaticLib() and build_options.have_llvm) return;
     }
 
     var sub_prog_node = prog_node.start("MachO Flush", 0);
@@ -240,11 +346,55 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
             break :blk path;
         }
     } else null;
-    _ = module_obj_path;
 
     // --verbose-link
     if (comp.verbose_link) try self.dumpArgv(comp);
 
+    if (self.base.isStaticLib()) return self.flushStaticLib(comp, module_obj_path);
+    if (self.base.isObject()) return self.flushObject(comp, module_obj_path);
+
+    var positionals = std.ArrayList(Compilation.LinkObject).init(gpa);
+    defer positionals.deinit();
+
+    try positionals.ensureUnusedCapacity(comp.objects.len);
+    positionals.appendSliceAssumeCapacity(comp.objects);
+
+    // This is a set of object files emitted by clang in a single `build-exe` invocation.
+    // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up
+    // in this set.
+    try positionals.ensureUnusedCapacity(comp.c_object_table.keys().len);
+    for (comp.c_object_table.keys()) |key| {
+        positionals.appendAssumeCapacity(.{ .path = key.status.success.object_path });
+    }
+
+    if (module_obj_path) |path| try positionals.append(.{ .path = path });
+
+    // rpaths
+    var rpath_table = std.StringArrayHashMap(void).init(gpa);
+    defer rpath_table.deinit();
+    try rpath_table.ensureUnusedCapacity(self.base.rpath_list.len);
+
+    for (self.base.rpath_list) |rpath| {
+        _ = rpath_table.putAssumeCapacity(rpath, {});
+    }
+
+    for (positionals.items) |obj| {
+        self.parsePositional(obj.path, obj.must_link) catch |err| switch (err) {
+            error.MalformedObject,
+            error.MalformedArchive,
+            error.InvalidCpuArch,
+            error.InvalidTarget,
+            => continue, // already reported
+            else => |e| try self.reportParseError(
+                obj.path,
+                "unexpected error: parsing input file failed with error {s}",
+                .{@errorName(e)},
+            ),
+        };
+    }
+
+    state_log.debug("{}", .{self.dumpState()});
+
     @panic("TODO");
 }
 
@@ -255,7 +405,6 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void {
     defer arena_allocator.deinit();
     const arena = arena_allocator.allocator();
 
-    const target = self.base.comp.root_mod.resolved_target.result;
     const directory = self.base.emit.directory;
     const full_out_path = try directory.join(arena, &[_][]const u8{self.base.emit.sub_path});
     const module_obj_path: ?[]const u8 = if (self.base.zcu_object_sub_path) |path| blk: {
@@ -309,18 +458,14 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void {
             }
         }
 
-        {
-            const platform = Platform.fromTarget(target);
-            try argv.append("-platform_version");
-            try argv.append(@tagName(platform.os_tag));
-            try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version}));
-
-            const sdk_version: ?std.SemanticVersion = self.inferSdkVersion();
-            if (sdk_version) |ver| {
-                try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor }));
-            } else {
-                try argv.append(try std.fmt.allocPrint(arena, "{}", .{platform.version}));
-            }
+        try argv.append("-platform_version");
+        try argv.append(@tagName(self.platform.os_tag));
+        try argv.append(try std.fmt.allocPrint(arena, "{}", .{self.platform.version}));
+
+        if (self.sdk_version) |ver| {
+            try argv.append(try std.fmt.allocPrint(arena, "{d}.{d}", .{ ver.major, ver.minor }));
+        } else {
+            try argv.append(try std.fmt.allocPrint(arena, "{}", .{self.platform.version}));
         }
 
         if (comp.sysroot) |syslibroot| {
@@ -419,6 +564,26 @@ fn dumpArgv(self: *MachO, comp: *Compilation) !void {
     Compilation.dump_argv(argv.items);
 }
 
+fn flushStaticLib(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void {
+    _ = comp;
+    _ = module_obj_path;
+
+    var err = try self.addErrorWithNotes(0);
+    try err.addMsg(self, "TODO implement flushStaticLib", .{});
+
+    return error.FlushFailure;
+}
+
+fn flushObject(self: *MachO, comp: *Compilation, module_obj_path: ?[]const u8) link.File.FlushError!void {
+    _ = comp;
+    _ = module_obj_path;
+
+    var err = try self.addErrorWithNotes(0);
+    try err.addMsg(self, "TODO implement flushObject", .{});
+
+    return error.FlushFailure;
+}
+
 /// XNU starting with Big Sur running on arm64 is caching inodes of running binaries.
 /// Any change to the binary will effectively invalidate the kernel's cache
 /// resulting in a SIGKILL on each subsequent run. Since when doing incremental
@@ -518,132 +683,60 @@ fn accessLibPath(
 }
 
 const ParseError = error{
-    UnknownFileType,
+    MalformedObject,
+    MalformedArchive,
+    NotLibStub,
+    InvalidCpuArch,
     InvalidTarget,
     InvalidTargetFatLibrary,
-    DylibAlreadyExists,
     IncompatibleDylibVersion,
     OutOfMemory,
     Overflow,
     InputOutput,
-    MalformedArchive,
-    NotLibStub,
     EndOfStream,
     FileSystem,
     NotSupported,
 } || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError || tapi.TapiError;
 
-pub fn parsePositional(
-    self: *MachO,
-    file: std.fs.File,
-    path: []const u8,
-    must_link: bool,
-    dependent_libs: anytype,
-    ctx: *ParseErrorCtx,
-) ParseError!void {
+fn parsePositional(self: *MachO, path: []const u8, must_link: bool) ParseError!void {
     const tracy = trace(@src());
     defer tracy.end();
+    if (try Object.isObject(path)) {
+        try self.parseObject(path);
+    } else {
+        try self.parseLibrary(.{ .path = path }, must_link);
+    }
+}
 
+fn parseLibrary(self: *MachO, lib: SystemLib, must_link: bool) ParseError!void {
     _ = self;
-    _ = file;
-    _ = path;
+    _ = lib;
     _ = must_link;
-    _ = dependent_libs;
-    _ = ctx;
 }
 
-pub fn deinit(self: *MachO) void {
-    const gpa = self.base.comp.gpa;
-
-    if (self.llvm_object) |llvm_object| llvm_object.deinit();
-
-    if (self.d_sym) |*d_sym| {
-        d_sym.deinit();
-    }
-
-    self.strtab.deinit(gpa);
-
-    self.segments.deinit(gpa);
-
-    for (self.sections.items(.free_list)) |*list| {
-        list.deinit(gpa);
-    }
-    self.sections.deinit(gpa);
-}
+fn parseObject(self: *MachO, path: []const u8) ParseError!void {
+    const tracy = trace(@src());
+    defer tracy.end();
 
-fn freeAtom(self: *MachO, atom_index: Atom.Index) void {
     const gpa = self.base.comp.gpa;
-    log.debug("freeAtom {d}", .{atom_index});
-
-    // Remove any relocs and base relocs associated with this Atom
-    Atom.freeRelocations(self, atom_index);
-
-    const atom = self.getAtom(atom_index);
-    const sect_id = atom.getSymbol(self).n_sect - 1;
-    const free_list = &self.sections.items(.free_list)[sect_id];
-    var already_have_free_list_node = false;
-    {
-        var i: usize = 0;
-        // TODO turn free_list into a hash map
-        while (i < free_list.items.len) {
-            if (free_list.items[i] == atom_index) {
-                _ = free_list.swapRemove(i);
-                continue;
-            }
-            if (free_list.items[i] == atom.prev_index) {
-                already_have_free_list_node = true;
-            }
-            i += 1;
-        }
-    }
-
-    const maybe_last_atom_index = &self.sections.items(.last_atom_index)[sect_id];
-    if (maybe_last_atom_index.*) |last_atom_index| {
-        if (last_atom_index == atom_index) {
-            if (atom.prev_index) |prev_index| {
-                // TODO shrink the section size here
-                maybe_last_atom_index.* = prev_index;
-            } else {
-                maybe_last_atom_index.* = null;
-            }
-        }
-    }
-
-    if (atom.prev_index) |prev_index| {
-        const prev = self.getAtomPtr(prev_index);
-        prev.next_index = atom.next_index;
-
-        if (!already_have_free_list_node and prev.*.freeListEligible(self)) {
-            // The free list is heuristics, it doesn't have to be perfect, so we can ignore
-            // the OOM here.
-            free_list.append(gpa, prev_index) catch {};
-        }
-    } else {
-        self.getAtomPtr(atom_index).prev_index = null;
-    }
-
-    if (atom.next_index) |next_index| {
-        self.getAtomPtr(next_index).prev_index = atom.prev_index;
-    } else {
-        self.getAtomPtr(atom_index).next_index = null;
-    }
-
-    // Appending to free lists is allowed to fail because the free lists are heuristics based anyway.
-    const sym_index = atom.getSymbolIndex().?;
-
-    self.locals_free_list.append(gpa, sym_index) catch {};
-
-    // Try freeing GOT atom if this decl had one
-    self.got_table.freeEntry(gpa, .{ .sym_index = sym_index });
-
-    if (self.d_sym) |*d_sym| {
-        d_sym.swapRemoveRelocs(sym_index);
-    }
-
-    self.locals.items[sym_index].n_type = 0;
-    _ = self.atom_by_index_table.remove(sym_index);
-    log.debug("  adding local symbol index {d} to free list", .{sym_index});
-    self.getAtomPtr(atom_index).sym_index = 0;
+    const file = try std.fs.cwd().openFile(path, .{});
+    defer file.close();
+    const mtime: u64 = mtime: {
+        const stat = file.stat() catch break :mtime 0;
+        break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000)));
+    };
+    const data = try file.readToEndAlloc(gpa, std.math.maxInt(u32));
+    const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
+    self.files.set(index, .{ .object = .{
+        .path = try gpa.dupe(u8, path),
+        .mtime = mtime,
+        .data = data,
+        .index = index,
+    } });
+    try self.objects.append(gpa, index);
+
+    const object = self.getFile(index).?.object;
+    try object.parse(self);
 }
 
 fn shrinkAtom(self: *MachO, atom_index: Atom.Index, new_block_size: u64) void {
@@ -716,7 +809,7 @@ pub fn updateDecl(self: *MachO, mod: *Module, decl_index: InternPool.DeclIndex)
 
 fn updateLazySymbolAtom(
     self: *MachO,
-    sym: File.LazySymbol,
+    sym: link.File.LazySymbol,
     atom_index: Atom.Index,
     section_index: u8,
 ) !void {
@@ -727,7 +820,7 @@ fn updateLazySymbolAtom(
     @panic("TODO updateLazySymbolAtom");
 }
 
-pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol) !Atom.Index {
+pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: link.File.LazySymbol) !Atom.Index {
     _ = self;
     _ = sym;
     @panic("TODO getOrCreateAtomForLazySymbol");
@@ -763,7 +856,7 @@ pub fn updateExports(
     mod: *Module,
     exported: Module.Exported,
     exports: []const *Module.Export,
-) File.UpdateExportsError!void {
+) link.File.UpdateExportsError!void {
     if (build_options.skip_non_native and builtin.object_format != .macho) {
         @panic("Attempted to compile for object format that was disabled by build configuration");
     }
@@ -795,7 +888,7 @@ pub fn freeDecl(self: *MachO, decl_index: InternPool.DeclIndex) void {
     @panic("TODO freeDecl");
 }
 
-pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: File.RelocInfo) !u64 {
+pub fn getDeclVAddr(self: *MachO, decl_index: InternPool.DeclIndex, reloc_info: link.File.RelocInfo) !u64 {
     assert(self.llvm_object == null);
     _ = decl_index;
     _ = reloc_info;
@@ -872,94 +965,224 @@ fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u32) u64 {
     return start;
 }
 
+pub fn getTarget(self: MachO) std.Target {
+    return self.base.comp.root_mod.resolved_target.result;
+}
+
 pub fn makeStaticString(bytes: []const u8) [16]u8 {
     var buf = [_]u8{0} ** 16;
     @memcpy(buf[0..bytes.len], bytes);
     return buf;
 }
 
-pub const ParseErrorCtx = struct {
-    arena_allocator: std.heap.ArenaAllocator,
-    detected_dylib_id: struct {
-        parent: u16,
-        required_version: u32,
-        found_version: u32,
-    },
-    detected_targets: std.ArrayList([]const u8),
+pub fn getFile(self: *MachO, index: File.Index) ?File {
+    const tag = self.files.items(.tags)[index];
+    return switch (tag) {
+        .null => null,
+        .internal => .{ .internal = &self.files.items(.data)[index].internal },
+        .object => .{ .object = &self.files.items(.data)[index].object },
+        .dylib => .{ .dylib = &self.files.items(.data)[index].dylib },
+    };
+}
 
-    pub fn init(gpa: Allocator) ParseErrorCtx {
-        return .{
-            .arena_allocator = std.heap.ArenaAllocator.init(gpa),
-            .detected_dylib_id = undefined,
-            .detected_targets = std.ArrayList([]const u8).init(gpa),
+pub fn getInternalObject(self: *MachO) ?*InternalObject {
+    const index = self.internal_object orelse return null;
+    return self.getFile(index).?.internal;
+}
+
+pub fn addAtom(self: *MachO) error{OutOfMemory}!Atom.Index {
+    const index = @as(Atom.Index, @intCast(self.atoms.items.len));
+    const atom = try self.atoms.addOne(self.base.comp.gpa);
+    atom.* = .{};
+    return index;
+}
+
+pub fn getAtom(self: *MachO, index: Atom.Index) ?*Atom {
+    if (index == 0) return null;
+    assert(index < self.atoms.items.len);
+    return &self.atoms.items[index];
+}
+
+pub fn addSymbol(self: *MachO) !Symbol.Index {
+    const index = @as(Symbol.Index, @intCast(self.symbols.items.len));
+    const symbol = try self.symbols.addOne(self.base.comp.gpa);
+    symbol.* = .{};
+    return index;
+}
+
+pub fn getSymbol(self: *MachO, index: Symbol.Index) *Symbol {
+    assert(index < self.symbols.items.len);
+    return &self.symbols.items[index];
+}
+
+pub fn addSymbolExtra(self: *MachO, extra: Symbol.Extra) !u32 {
+    const fields = @typeInfo(Symbol.Extra).Struct.fields;
+    try self.symbols_extra.ensureUnusedCapacity(self.base.comp.gpa, fields.len);
+    return self.addSymbolExtraAssumeCapacity(extra);
+}
+
+pub fn addSymbolExtraAssumeCapacity(self: *MachO, extra: Symbol.Extra) u32 {
+    const index = @as(u32, @intCast(self.symbols_extra.items.len));
+    const fields = @typeInfo(Symbol.Extra).Struct.fields;
+    inline for (fields) |field| {
+        self.symbols_extra.appendAssumeCapacity(switch (field.type) {
+            u32 => @field(extra, field.name),
+            else => @compileError("bad field type"),
+        });
+    }
+    return index;
+}
+
+pub fn getSymbolExtra(self: MachO, index: u32) ?Symbol.Extra {
+    if (index == 0) return null;
+    const fields = @typeInfo(Symbol.Extra).Struct.fields;
+    var i: usize = index;
+    var result: Symbol.Extra = undefined;
+    inline for (fields) |field| {
+        @field(result, field.name) = switch (field.type) {
+            u32 => self.symbols_extra.items[i],
+            else => @compileError("bad field type"),
         };
+        i += 1;
     }
+    return result;
+}
 
-    pub fn deinit(ctx: *ParseErrorCtx) void {
-        ctx.arena_allocator.deinit();
-        ctx.detected_targets.deinit();
+pub fn setSymbolExtra(self: *MachO, index: u32, extra: Symbol.Extra) void {
+    assert(index > 0);
+    const fields = @typeInfo(Symbol.Extra).Struct.fields;
+    inline for (fields, 0..) |field, i| {
+        self.symbols_extra.items[index + i] = switch (field.type) {
+            u32 => @field(extra, field.name),
+            else => @compileError("bad field type"),
+        };
     }
+}
+
+const GetOrCreateGlobalResult = struct {
+    found_existing: bool,
+    index: Symbol.Index,
+};
 
-    pub fn arena(ctx: *ParseErrorCtx) Allocator {
-        return ctx.arena_allocator.allocator();
+pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult {
+    const gpa = self.base.comp.gpa;
+    const gop = try self.globals.getOrPut(gpa, off);
+    if (!gop.found_existing) {
+        const index = try self.addSymbol();
+        const global = self.getSymbol(index);
+        global.name = off;
+        gop.value_ptr.* = index;
+    }
+    return .{
+        .found_existing = gop.found_existing,
+        .index = gop.value_ptr.*,
+    };
+}
+
+pub fn getGlobalByName(self: *MachO, name: []const u8) ?Symbol.Index {
+    const off = self.strings.getOffset(name) orelse return null;
+    return self.globals.get(off);
+}
+
+pub fn addUnwindRecord(self: *MachO) !UnwindInfo.Record.Index {
+    const index = @as(UnwindInfo.Record.Index, @intCast(self.unwind_records.items.len));
+    const rec = try self.unwind_records.addOne(self.base.comp.gpa);
+    rec.* = .{};
+    return index;
+}
+
+pub fn getUnwindRecord(self: *MachO, index: UnwindInfo.Record.Index) *UnwindInfo.Record {
+    assert(index < self.unwind_records.items.len);
+    return &self.unwind_records.items[index];
+}
+
+pub fn addThunk(self: *MachO) !Thunk.Index {
+    const index = @as(Thunk.Index, @intCast(self.thunks.items.len));
+    const thunk = try self.thunks.addOne(self.base.comp.gpa);
+    thunk.* = .{};
+    return index;
+}
+
+pub fn getThunk(self: *MachO, index: Thunk.Index) *Thunk {
+    assert(index < self.thunks.items.len);
+    return &self.thunks.items[index];
+}
+
+pub fn eatPrefix(path: []const u8, prefix: []const u8) ?[]const u8 {
+    if (mem.startsWith(u8, path, prefix)) return path[prefix.len..];
+    return null;
+}
+
+const ErrorWithNotes = struct {
+    /// Allocated index in comp.link_errors array.
+    index: usize,
+
+    /// Next available note slot.
+    note_slot: usize = 0,
+
+    pub fn addMsg(
+        err: ErrorWithNotes,
+        macho_file: *MachO,
+        comptime format: []const u8,
+        args: anytype,
+    ) error{OutOfMemory}!void {
+        const comp = macho_file.base.comp;
+        const gpa = comp.gpa;
+        const err_msg = &comp.link_errors.items[err.index];
+        err_msg.msg = try std.fmt.allocPrint(gpa, format, args);
+    }
+
+    pub fn addNote(
+        err: *ErrorWithNotes,
+        macho_file: *MachO,
+        comptime format: []const u8,
+        args: anytype,
+    ) error{OutOfMemory}!void {
+        const comp = macho_file.base.comp;
+        const gpa = comp.gpa;
+        const err_msg = &comp.link_errors.items[err.index];
+        assert(err.note_slot < err_msg.notes.len);
+        err_msg.notes[err.note_slot] = .{ .msg = try std.fmt.allocPrint(gpa, format, args) };
+        err.note_slot += 1;
     }
 };
 
-pub fn handleAndReportParseError(
+pub fn addErrorWithNotes(self: *MachO, note_count: usize) error{OutOfMemory}!ErrorWithNotes {
+    const comp = self.base.comp;
+    const gpa = comp.gpa;
+    try comp.link_errors.ensureUnusedCapacity(gpa, 1);
+    return self.addErrorWithNotesAssumeCapacity(note_count);
+}
+
+fn addErrorWithNotesAssumeCapacity(self: *MachO, note_count: usize) error{OutOfMemory}!ErrorWithNotes {
+    const comp = self.base.comp;
+    const gpa = comp.gpa;
+    const index = comp.link_errors.items.len;
+    const err = comp.link_errors.addOneAssumeCapacity();
+    err.* = .{ .msg = undefined, .notes = try gpa.alloc(link.File.ErrorMsg, note_count) };
+    return .{ .index = index };
+}
+
+pub fn reportParseError(
     self: *MachO,
     path: []const u8,
-    err: ParseError,
-    ctx: *const ParseErrorCtx,
+    comptime format: []const u8,
+    args: anytype,
 ) error{OutOfMemory}!void {
-    const target = self.base.comp.root_mod.resolved_target.result;
-    const gpa = self.base.comp.gpa;
-    const cpu_arch = target.cpu.arch;
-    switch (err) {
-        error.DylibAlreadyExists => {},
-        error.IncompatibleDylibVersion => {
-            const parent = &self.dylibs.items[ctx.detected_dylib_id.parent];
-            try self.reportDependencyError(
-                if (parent.id) |id| id.name else parent.path,
-                path,
-                "incompatible dylib version: expected at least '{}', but found '{}'",
-                .{
-                    load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.required_version),
-                    load_commands.appleVersionToSemanticVersion(ctx.detected_dylib_id.found_version),
-                },
-            );
-        },
-        error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}),
-        error.InvalidTarget, error.InvalidTargetFatLibrary => {
-            var targets_string = std.ArrayList(u8).init(gpa);
-            defer targets_string.deinit();
-
-            if (ctx.detected_targets.items.len > 1) {
-                try targets_string.writer().writeAll("(");
-                for (ctx.detected_targets.items) |t| {
-                    try targets_string.writer().print("{s}, ", .{t});
-                }
-                try targets_string.resize(targets_string.items.len - 2);
-                try targets_string.writer().writeAll(")");
-            } else {
-                try targets_string.writer().writeAll(ctx.detected_targets.items[0]);
-            }
+    var err = try self.addErrorWithNotes(1);
+    try err.addMsg(self, format, args);
+    try err.addNote(self, "while parsing {s}", .{path});
+}
 
-            switch (err) {
-                error.InvalidTarget => try self.reportParseError(
-                    path,
-                    "invalid target: expected '{}', but found '{s}'",
-                    .{ Platform.fromTarget(target).fmtTarget(cpu_arch), targets_string.items },
-                ),
-                error.InvalidTargetFatLibrary => try self.reportParseError(
-                    path,
-                    "invalid architecture in universal library: expected '{s}', but found '{s}'",
-                    .{ @tagName(cpu_arch), targets_string.items },
-                ),
-                else => unreachable,
-            }
-        },
-        else => |e| try self.reportParseError(path, "{s}: parsing object failed", .{@errorName(e)}),
-    }
+pub fn reportParseError2(
+    self: *MachO,
+    file_index: File.Index,
+    comptime format: []const u8,
+    args: anytype,
+) error{OutOfMemory}!void {
+    var err = try self.addErrorWithNotes(1);
+    try err.addMsg(self, format, args);
+    try err.addNote(self, "while parsing {}", .{self.getFile(file_index).?.fmtPath()});
 }
 
 fn reportMissingLibraryError(
@@ -968,18 +1191,11 @@ fn reportMissingLibraryError(
     comptime format: []const u8,
     args: anytype,
 ) error{OutOfMemory}!void {
-    const comp = self.base.comp;
-    const gpa = comp.gpa;
-    try comp.link_errors.ensureUnusedCapacity(gpa, 1);
-    const notes = try gpa.alloc(File.ErrorMsg, checked_paths.len);
-    errdefer gpa.free(notes);
-    for (checked_paths, notes) |path, *note| {
-        note.* = .{ .msg = try std.fmt.allocPrint(gpa, "tried {s}", .{path}) };
+    var err = try self.addErrorWithNotes(checked_paths.len);
+    try err.addMsg(self, format, args);
+    for (checked_paths) |path| {
+        try err.addNote(self, "tried {s}", .{path});
     }
-    comp.link_errors.appendAssumeCapacity(.{
-        .msg = try std.fmt.allocPrint(gpa, format, args),
-        .notes = notes,
-    });
 }
 
 fn reportDependencyError(
@@ -992,7 +1208,7 @@ fn reportDependencyError(
     const comp = self.base.comp;
     const gpa = comp.gpa;
     try comp.link_errors.ensureUnusedCapacity(gpa, 1);
-    var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 2);
+    var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 2);
     defer notes.deinit();
     if (path) |p| {
         notes.appendAssumeCapacity(.{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{p}) });
@@ -1004,42 +1220,6 @@ fn reportDependencyError(
     });
 }
 
-pub fn reportParseError(
-    self: *MachO,
-    path: []const u8,
-    comptime format: []const u8,
-    args: anytype,
-) error{OutOfMemory}!void {
-    const comp = self.base.comp;
-    const gpa = comp.gpa;
-    try comp.link_errors.ensureUnusedCapacity(gpa, 1);
-    var notes = try gpa.alloc(File.ErrorMsg, 1);
-    errdefer gpa.free(notes);
-    notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{path}) };
-    comp.link_errors.appendAssumeCapacity(.{
-        .msg = try std.fmt.allocPrint(gpa, format, args),
-        .notes = notes,
-    });
-}
-
-pub fn reportUnresolvedBoundarySymbol(
-    self: *MachO,
-    sym_name: []const u8,
-    comptime format: []const u8,
-    args: anytype,
-) error{OutOfMemory}!void {
-    const comp = self.base.comp;
-    const gpa = comp.gpa;
-    try comp.link_errors.ensureUnusedCapacity(gpa, 1);
-    var notes = try gpa.alloc(File.ErrorMsg, 1);
-    errdefer gpa.free(notes);
-    notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while resolving {s}", .{sym_name}) };
-    comp.link_errors.appendAssumeCapacity(.{
-        .msg = try std.fmt.allocPrint(gpa, format, args),
-        .notes = notes,
-    });
-}
-
 pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void {
     const comp = self.base.comp;
     const gpa = comp.gpa;
@@ -1050,7 +1230,7 @@ pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void {
         const global = self.globals.items[global_index];
         const sym_name = self.getSymbolName(global);
 
-        var notes = try std.ArrayList(File.ErrorMsg).initCapacity(gpa, 1);
+        var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, 1);
         defer notes.deinit();
 
         if (global.getFile()) |file| {
@@ -1060,7 +1240,7 @@ pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void {
             notes.appendAssumeCapacity(.{ .msg = note });
         }
 
-        var err_msg = File.ErrorMsg{
+        var err_msg = link.File.ErrorMsg{
             .msg = try std.fmt.allocPrint(gpa, "undefined reference to symbol {s}", .{sym_name}),
         };
         err_msg.notes = try notes.toOwnedSlice();
@@ -1164,6 +1344,145 @@ pub fn ptraceDetach(self: *MachO, pid: std.os.pid_t) !void {
     self.hot_state.mach_task = null;
 }
 
+pub fn dumpState(self: *MachO) std.fmt.Formatter(fmtDumpState) {
+    return .{ .data = self };
+}
+
+fn fmtDumpState(
+    self: *MachO,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    for (self.objects.items) |index| {
+        const object = self.getFile(index).?.object;
+        try writer.print("object({d}) : {} : has_debug({})", .{
+            index,
+            object.fmtPath(),
+            object.hasDebugInfo(),
+        });
+        if (!object.alive) try writer.writeAll(" : ([*])");
+        try writer.writeByte('\n');
+        try writer.print("{}{}{}{}{}\n", .{
+            object.fmtAtoms(self),
+            object.fmtCies(self),
+            object.fmtFdes(self),
+            object.fmtUnwindRecords(self),
+            object.fmtSymtab(self),
+        });
+    }
+    // for (self.dylibs.items) |index| {
+    //     const dylib = self.getFile(index).?.dylib;
+    //     try writer.print("dylib({d}) : {s} : needed({}) : weak({})", .{
+    //         index,
+    //         dylib.path,
+    //         dylib.needed,
+    //         dylib.weak,
+    //     });
+    //     if (!dylib.isAlive(self)) try writer.writeAll(" : ([*])");
+    //     try writer.writeByte('\n');
+    //     try writer.print("{}\n", .{dylib.fmtSymtab(self)});
+    // }
+    if (self.getInternalObject()) |internal| {
+        try writer.print("internal({d}) : internal\n", .{internal.index});
+        try writer.print("{}{}\n", .{ internal.fmtAtoms(self), internal.fmtSymtab(self) });
+    }
+    try writer.writeAll("thunks\n");
+    for (self.thunks.items, 0..) |thunk, index| {
+        try writer.print("thunk({d}) : {}\n", .{ index, thunk.fmt(self) });
+    }
+    try writer.print("stubs\n{}\n", .{self.stubs.fmt(self)});
+    try writer.print("objc_stubs\n{}\n", .{self.objc_stubs.fmt(self)});
+    try writer.print("got\n{}\n", .{self.got.fmt(self)});
+    try writer.print("tlv_ptr\n{}\n", .{self.tlv_ptr.fmt(self)});
+    try writer.writeByte('\n');
+    try writer.print("sections\n{}\n", .{self.fmtSections()});
+    try writer.print("segments\n{}\n", .{self.fmtSegments()});
+}
+
+fn fmtSections(self: *MachO) std.fmt.Formatter(formatSections) {
+    return .{ .data = self };
+}
+
+fn formatSections(
+    self: *MachO,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    const slice = self.sections.slice();
+    for (slice.items(.header), slice.items(.segment_id), 0..) |header, seg_id, i| {
+        try writer.print("sect({d}) : seg({d}) : {s},{s} : @{x} ({x}) : align({x}) : size({x})\n", .{
+            i,               seg_id,      header.segName(), header.sectName(), header.offset, header.addr,
+            header.@"align", header.size,
+        });
+    }
+}
+
+fn fmtSegments(self: *MachO) std.fmt.Formatter(formatSegments) {
+    return .{ .data = self };
+}
+
+fn formatSegments(
+    self: *MachO,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    for (self.segments.items, 0..) |seg, i| {
+        try writer.print("seg({d}) : {s} : @{x}-{x} ({x}-{x})\n", .{
+            i,           seg.segName(),              seg.vmaddr, seg.vmaddr + seg.vmsize,
+            seg.fileoff, seg.fileoff + seg.filesize,
+        });
+    }
+}
+
+pub fn fmtSectType(tt: u8) std.fmt.Formatter(formatSectType) {
+    return .{ .data = tt };
+}
+
+fn formatSectType(
+    tt: u8,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    const name = switch (tt) {
+        macho.S_REGULAR => "REGULAR",
+        macho.S_ZEROFILL => "ZEROFILL",
+        macho.S_CSTRING_LITERALS => "CSTRING_LITERALS",
+        macho.S_4BYTE_LITERALS => "4BYTE_LITERALS",
+        macho.S_8BYTE_LITERALS => "8BYTE_LITERALS",
+        macho.S_16BYTE_LITERALS => "16BYTE_LITERALS",
+        macho.S_LITERAL_POINTERS => "LITERAL_POINTERS",
+        macho.S_NON_LAZY_SYMBOL_POINTERS => "NON_LAZY_SYMBOL_POINTERS",
+        macho.S_LAZY_SYMBOL_POINTERS => "LAZY_SYMBOL_POINTERS",
+        macho.S_SYMBOL_STUBS => "SYMBOL_STUBS",
+        macho.S_MOD_INIT_FUNC_POINTERS => "MOD_INIT_FUNC_POINTERS",
+        macho.S_MOD_TERM_FUNC_POINTERS => "MOD_TERM_FUNC_POINTERS",
+        macho.S_COALESCED => "COALESCED",
+        macho.S_GB_ZEROFILL => "GB_ZEROFILL",
+        macho.S_INTERPOSING => "INTERPOSING",
+        macho.S_DTRACE_DOF => "DTRACE_DOF",
+        macho.S_THREAD_LOCAL_REGULAR => "THREAD_LOCAL_REGULAR",
+        macho.S_THREAD_LOCAL_ZEROFILL => "THREAD_LOCAL_ZEROFILL",
+        macho.S_THREAD_LOCAL_VARIABLES => "THREAD_LOCAL_VARIABLES",
+        macho.S_THREAD_LOCAL_VARIABLE_POINTERS => "THREAD_LOCAL_VARIABLE_POINTERS",
+        macho.S_THREAD_LOCAL_INIT_FUNCTION_POINTERS => "THREAD_LOCAL_INIT_FUNCTION_POINTERS",
+        macho.S_INIT_FUNC_OFFSETS => "INIT_FUNC_OFFSETS",
+        else => |x| return writer.print("UNKNOWN({x})", .{x}),
+    };
+    try writer.print("{s}", .{name});
+}
+
 const is_hot_update_compatible = switch (builtin.target.os.tag) {
     .macos => true,
     else => false,
@@ -1171,32 +1490,14 @@ const is_hot_update_compatible = switch (builtin.target.os.tag) {
 
 const default_entry_symbol_name = "_main";
 
-pub const base_tag: File.Tag = File.Tag.macho;
+pub const base_tag: link.File.Tag = link.File.Tag.macho;
 pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1)));
 pub const N_BOUNDARY: u16 = @as(u16, @bitCast(@as(i16, -2)));
 
-pub const Section = struct {
+const Section = struct {
     header: macho.section_64,
-    segment_index: u8,
-    first_atom_index: ?Atom.Index = null,
-    last_atom_index: ?Atom.Index = null,
-
-    /// A list of atoms that have surplus capacity. This list can have false
-    /// positives, as functions grow and shrink over time, only sometimes being added
-    /// or removed from the freelist.
-    ///
-    /// An atom has surplus capacity when its overcapacity value is greater than
-    /// padToIdeal(minimum_atom_size). That is, when it has so
-    /// much extra capacity, that we could fit a small new symbol in it, itself with
-    /// ideal_capacity or more.
-    ///
-    /// Ideal capacity is defined by size + (size / ideal_factor).
-    ///
-    /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that
-    /// overcapacity can be negative. A simple way to have negative overcapacity is to
-    /// allocate a fresh atom, which will have ideal capacity, and then grow it
-    /// by 1 byte. It will then have -1 overcapacity.
-    free_list: std.ArrayListUnmanaged(Atom.Index) = .{},
+    segment_id: u8,
+    atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
 };
 
 const HotUpdateState = struct {
@@ -1385,15 +1686,13 @@ pub inline fn appleVersionToSemanticVersion(version: u32) std.SemanticVersion {
     };
 }
 
-fn inferSdkVersion(self: *MachO) ?std.SemanticVersion {
-    const comp = self.base.comp;
+fn inferSdkVersion(comp: *Compilation, sdk_layout: SdkLayout) ?std.SemanticVersion {
     const gpa = comp.gpa;
 
     var arena_allocator = std.heap.ArenaAllocator.init(gpa);
     defer arena_allocator.deinit();
     const arena = arena_allocator.allocator();
 
-    const sdk_layout = self.sdk_layout orelse return null;
     const sdk_dir = switch (sdk_layout) {
         .sdk => comp.sysroot.?,
         .vendored => std.fs.path.join(arena, &.{ comp.zig_lib_directory.path.?, "libc", "darwin" }) catch return null,
@@ -1402,6 +1701,7 @@ fn inferSdkVersion(self: *MachO) ?std.SemanticVersion {
         return parseSdkVersion(ver);
     } else |_| {
         // Read from settings should always succeed when vendored.
+        // TODO: convert to fatal linker error
         if (sdk_layout == .vendored) @panic("zig installation bug: unable to parse SDK version");
     }
 
@@ -1470,6 +1770,15 @@ pub const default_pagezero_vmsize: u64 = 0x100000000;
 /// potential future extensions.
 pub const default_headerpad_size: u32 = 0x1000;
 
+const SystemLib = struct {
+    path: []const u8,
+    needed: bool = false,
+    weak: bool = false,
+    hidden: bool = false,
+    reexport: bool = false,
+    must_link: bool = false,
+};
+
 const MachO = @This();
 
 const std = @import("std");
@@ -1479,6 +1788,7 @@ const assert = std.debug.assert;
 const dwarf = std.dwarf;
 const fs = std.fs;
 const log = std.log.scoped(.link);
+const state_log = std.log.scoped(.link_state);
 const macho = std.macho;
 const math = std.math;
 const mem = std.mem;
@@ -1488,6 +1798,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig");
 const calcUuid = @import("MachO/uuid.zig").calcUuid;
 const codegen = @import("../codegen.zig");
 const dead_strip = @import("MachO/dead_strip.zig");
+const eh_frame = @import("MachO/eh_frame.zig");
 const fat = @import("MachO/fat.zig");
 const link = @import("../link.zig");
 const llvm_backend = @import("../codegen/llvm.zig");
@@ -1496,12 +1807,14 @@ const tapi = @import("tapi.zig");
 const target_util = @import("../target.zig");
 const thunks = @import("MachO/thunks.zig");
 const trace = @import("../tracy.zig").trace;
+const synthetic = @import("MachO/synthetic.zig");
 
 const Air = @import("../Air.zig");
 const Alignment = Atom.Alignment;
 const Allocator = mem.Allocator;
 const Archive = @import("MachO/Archive.zig");
 pub const Atom = @import("MachO/Atom.zig");
+const BindSection = synthetic.BindSection;
 const Cache = std.Build.Cache;
 const CodeSignature = @import("MachO/CodeSignature.zig");
 const Compilation = @import("../Compilation.zig");
@@ -1509,17 +1822,29 @@ pub const DebugSymbols = @import("MachO/DebugSymbols.zig");
 const Dwarf = File.Dwarf;
 const DwarfInfo = @import("MachO/DwarfInfo.zig");
 const Dylib = @import("MachO/Dylib.zig");
-const File = link.File;
+const ExportTrieSection = synthetic.ExportTrieSection;
+const File = @import("MachO/file.zig").File;
+const GotSection = synthetic.GotSection;
+const Indsymtab = synthetic.Indsymtab;
+const InternalObject = @import("MachO/InternalObject.zig");
+const ObjcStubsSection = synthetic.ObjcStubsSection;
 const Object = @import("MachO/Object.zig");
+const LazyBindSection = synthetic.LazyBindSection;
+const LaSymbolPtrSection = synthetic.LaSymbolPtrSection;
 const LibStub = tapi.LibStub;
 const Liveness = @import("../Liveness.zig");
 const LlvmObject = @import("../codegen/llvm.zig").Object;
 const Md5 = std.crypto.hash.Md5;
 const Module = @import("../Module.zig");
 const InternPool = @import("../InternPool.zig");
+const RebaseSection = synthetic.RebaseSection;
 const Relocation = @import("MachO/Relocation.zig");
 const StringTable = @import("StringTable.zig");
-const TableSection = @import("table_section.zig").TableSection;
-const Type = @import("../type.zig").Type;
+const StubsSection = synthetic.StubsSection;
+const StubsHelperSection = synthetic.StubsHelperSection;
+const Symbol = @import("MachO/Symbol.zig");
+const Thunk = thunks.Thunk;
+const TlvPtrSection = synthetic.TlvPtrSection;
 const TypedValue = @import("../TypedValue.zig");
-const Value = @import("../value.zig").Value;
+const UnwindInfo = @import("MachO/UnwindInfo.zig");
+const WeakBindSection = synthetic.WeakBindSection;

Commit 0c171afab0

Commit `0c171afab0`