Commit 6337ce16ae

Jakub Konka <kubkon@jakubkonka.com>
2024-01-28 00:07:01
macho: do not allocate input files in full
1 parent 190ea02
src/link/MachO/Archive.zig
@@ -1,6 +1,3 @@
-path: []const u8,
-data: []const u8,
-
 objects: std.ArrayListUnmanaged(Object) = .{},
 
 // Archive files start with the ARMAG identifying string.  Then follows a
@@ -73,62 +70,73 @@ pub fn isArchive(path: []const u8, fat_arch: ?fat.Arch) !bool {
 }
 
 pub fn deinit(self: *Archive, allocator: Allocator) void {
-    allocator.free(self.data);
-    allocator.free(self.path);
     self.objects.deinit(allocator);
 }
 
-pub fn parse(self: *Archive, macho_file: *MachO) !void {
+pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.File, fat_arch: ?fat.Arch) !void {
     const gpa = macho_file.base.comp.gpa;
 
     var arena = std.heap.ArenaAllocator.init(gpa);
     defer arena.deinit();
 
-    var stream = std.io.fixedBufferStream(self.data);
-    const reader = stream.reader();
-    _ = try reader.readBytesNoEof(SARMAG);
+    const offset = if (fat_arch) |ar| ar.offset else 0;
+    const size = if (fat_arch) |ar| ar.size else (try file.stat()).size;
+    try file.seekTo(offset);
+
+    const reader = file.reader();
+    _ = try reader.readBytesNoEof(Archive.SARMAG);
 
+    var pos: usize = Archive.SARMAG;
     while (true) {
-        if (stream.pos >= self.data.len) break;
-        if (!mem.isAligned(stream.pos, 2)) stream.pos += 1;
+        if (pos >= size) break;
+        if (!mem.isAligned(pos, 2)) {
+            try file.seekBy(1);
+            pos += 1;
+        }
 
         const hdr = try reader.readStruct(ar_hdr);
+        pos += @sizeOf(ar_hdr);
 
         if (!mem.eql(u8, &hdr.ar_fmag, ARFMAG)) {
-            try macho_file.reportParseError(self.path, "invalid header delimiter: expected '{s}', found '{s}'", .{
+            try macho_file.reportParseError(path, "invalid header delimiter: expected '{s}', found '{s}'", .{
                 std.fmt.fmtSliceEscapeLower(ARFMAG), std.fmt.fmtSliceEscapeLower(&hdr.ar_fmag),
             });
             return error.MalformedArchive;
         }
 
-        var size = try hdr.size();
+        var hdr_size = try hdr.size();
         const name = name: {
             if (hdr.name()) |n| break :name n;
             if (try hdr.nameLength()) |len| {
-                size -= len;
+                hdr_size -= len;
                 const buf = try arena.allocator().alloc(u8, len);
                 try reader.readNoEof(buf);
+                pos += len;
                 const actual_len = mem.indexOfScalar(u8, buf, @as(u8, 0)) orelse len;
                 break :name buf[0..actual_len];
             }
             unreachable;
         };
         defer {
-            _ = stream.seekBy(size) catch {};
+            _ = file.seekBy(hdr_size) catch {};
+            pos += hdr_size;
         }
 
         if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue;
 
         const object = Object{
-            .archive = try gpa.dupe(u8, self.path),
+            .archive = .{
+                .path = try gpa.dupe(u8, path),
+                .offset = offset + pos,
+            },
             .path = try gpa.dupe(u8, name),
-            .data = try gpa.dupe(u8, self.data[stream.pos..][0..size]),
+            .file = try std.fs.cwd().openFile(path, .{}),
             .index = undefined,
             .alive = false,
             .mtime = hdr.date() catch 0,
         };
 
-        log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, self.path });
+        log.debug("extracting object '{s}' from archive '{s}'", .{ object.path, path });
 
         try self.objects.append(gpa, object);
     }
src/link/MachO/Atom.zig
@@ -43,7 +43,11 @@ prev_index: Index = 0,
 next_index: Index = 0,
 
 pub fn getName(self: Atom, macho_file: *MachO) [:0]const u8 {
-    return macho_file.strings.getAssumeExists(self.name);
+    return switch (self.getFile(macho_file)) {
+        .dylib => unreachable,
+        .zig_object => |x| x.strtab.getAssumeExists(self.name),
+        inline else => |x| x.getString(self.name),
+    };
 }
 
 pub fn getFile(self: Atom, macho_file: *MachO) File {
@@ -52,17 +56,17 @@ pub fn getFile(self: Atom, macho_file: *MachO) File {
 
 pub fn getRelocs(self: Atom, macho_file: *MachO) []const Relocation {
     return switch (self.getFile(macho_file)) {
-        .zig_object => |x| x.getAtomRelocs(self),
-        .object => |x| x.getAtomRelocs(self),
-        else => unreachable,
+        .dylib => unreachable,
+        inline else => |x| x.getAtomRelocs(self),
     };
 }
 
 pub fn getInputSection(self: Atom, macho_file: *MachO) macho.section_64 {
     return switch (self.getFile(macho_file)) {
+        .dylib => unreachable,
         .zig_object => |x| x.getInputSection(self, macho_file),
         .object => |x| x.sections.items(.header)[self.n_sect],
-        else => unreachable,
+        .internal => |x| x.sections.items(.header)[self.n_sect],
     };
 }
 
src/link/MachO/DwarfInfo.zig
@@ -1,15 +1,17 @@
-debug_info: []const u8,
-debug_abbrev: []const u8,
-debug_str: []const u8,
-
 /// Abbreviation table indexed by offset in the .debug_abbrev bytestream
 abbrev_tables: std.AutoArrayHashMapUnmanaged(u64, AbbrevTable) = .{},
 /// List of compile units as they appear in the .debug_info bytestream
 compile_units: std.ArrayListUnmanaged(CompileUnit) = .{},
-
-pub fn init(dw: *DwarfInfo, allocator: Allocator) !void {
-    try dw.parseAbbrevTables(allocator);
-    try dw.parseCompileUnits(allocator);
+/// Debug info string table
+strtab: std.ArrayListUnmanaged(u8) = .{},
+/// Debug info data
+di_data: std.ArrayListUnmanaged(u8) = .{},
+
+pub fn init(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void {
+    try dw.strtab.ensureTotalCapacityPrecise(allocator, di.debug_str.len);
+    dw.strtab.appendSliceAssumeCapacity(di.debug_str);
+    try dw.parseAbbrevTables(allocator, di);
+    try dw.parseCompileUnits(allocator, di);
 }
 
 pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void {
@@ -18,18 +20,27 @@ pub fn deinit(dw: *DwarfInfo, allocator: Allocator) void {
         cu.deinit(allocator);
     }
     dw.compile_units.deinit(allocator);
+    dw.strtab.deinit(allocator);
+    dw.di_data.deinit(allocator);
+}
+
+fn appendDiData(dw: *DwarfInfo, allocator: Allocator, values: []const u8) error{OutOfMemory}!u32 {
+    const index: u32 = @intCast(dw.di_data.items.len);
+    try dw.di_data.ensureUnusedCapacity(allocator, values.len);
+    dw.di_data.appendSliceAssumeCapacity(values);
+    return index;
 }
 
 fn getString(dw: DwarfInfo, off: usize) [:0]const u8 {
-    assert(off < dw.debug_str.len);
-    return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.debug_str.ptr + off)), 0);
+    assert(off < dw.strtab.items.len);
+    return mem.sliceTo(@as([*:0]const u8, @ptrCast(dw.strtab.items.ptr + off)), 0);
 }
 
-fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void {
+fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const debug_abbrev = dw.debug_abbrev;
+    const debug_abbrev = di.debug_abbrev;
     var stream = std.io.fixedBufferStream(debug_abbrev);
     var creader = std.io.countingReader(stream.reader());
     const reader = creader.reader();
@@ -77,11 +88,11 @@ fn parseAbbrevTables(dw: *DwarfInfo, allocator: Allocator) !void {
     }
 }
 
-fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void {
+fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator, di: DebugInfo) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const debug_info = dw.debug_info;
+    const debug_info = di.debug_info;
     var stream = std.io.fixedBufferStream(debug_info);
     var creader = std.io.countingReader(stream.reader());
     const reader = creader.reader();
@@ -107,7 +118,7 @@ fn parseCompileUnits(dw: *DwarfInfo, allocator: Allocator) !void {
         cu.header.address_size = try reader.readInt(u8, .little);
 
         const table = dw.abbrev_tables.get(cu.header.debug_abbrev_offset).?;
-        try dw.parseDie(allocator, cu, table, null, &creader);
+        try dw.parseDie(allocator, cu, table, di, null, &creader);
     }
 }
 
@@ -116,6 +127,7 @@ fn parseDie(
     allocator: Allocator,
     cu: *CompileUnit,
     table: AbbrevTable,
+    di: DebugInfo,
     parent: ?u32,
     creader: anytype,
 ) anyerror!void {
@@ -140,19 +152,20 @@ fn parseDie(
         }
 
         const decl = table.decls.get(code) orelse return error.MalformedDwarf; // TODO better errors
-        const data = dw.debug_info;
+        const data = di.debug_info;
         try cu.diePtr(die).values.ensureTotalCapacityPrecise(allocator, decl.attrs.values().len);
 
         for (decl.attrs.values()) |attr| {
             const start = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow;
             try advanceByFormSize(cu, attr.form, creader);
             const end = std.math.cast(usize, creader.bytes_read) orelse return error.Overflow;
-            cu.diePtr(die).values.appendAssumeCapacity(data[start..end]);
+            const index = try dw.appendDiData(allocator, data[start..end]);
+            cu.diePtr(die).values.appendAssumeCapacity(.{ .index = index, .len = @intCast(end - start) });
         }
 
         if (decl.children) {
             // Open scope
-            try dw.parseDie(allocator, cu, table, die, creader);
+            try dw.parseDie(allocator, cu, table, di, die, creader);
         }
     }
 }
@@ -340,7 +353,7 @@ pub const CompileUnit = struct {
 
 pub const Die = struct {
     code: Code,
-    values: std.ArrayListUnmanaged([]const u8) = .{},
+    values: std.ArrayListUnmanaged(struct { index: u32, len: u32 }) = .{},
     children: std.ArrayListUnmanaged(Die.Index) = .{},
 
     pub fn deinit(die: *Die, gpa: Allocator) void {
@@ -354,7 +367,7 @@ pub const Die = struct {
         const index = decl.attrs.getIndex(at) orelse return null;
         const attr = decl.attrs.values()[index];
         const value = die.values.items[index];
-        return .{ .attr = attr, .bytes = value };
+        return .{ .attr = attr, .bytes = ctx.di_data.items[value.index..][0..value.len] };
     }
 
     pub const Index = u32;
@@ -458,6 +471,12 @@ pub const Format = enum {
     dwarf64,
 };
 
+const DebugInfo = struct {
+    debug_info: []const u8,
+    debug_abbrev: []const u8,
+    debug_str: []const u8,
+};
+
 const assert = std.debug.assert;
 const dwarf = std.dwarf;
 const leb = std.leb;
src/link/MachO/Dylib.zig
@@ -1,8 +1,6 @@
 path: []const u8,
-data: []const u8,
 index: File.Index,
 
-header: ?macho.mach_header_64 = null,
 exports: std.MultiArrayList(Export) = .{},
 strtab: std.ArrayListUnmanaged(u8) = .{},
 id: ?Id = null,
@@ -34,7 +32,6 @@ pub fn isDylib(path: []const u8, fat_arch: ?fat.Arch) !bool {
 }
 
 pub fn deinit(self: *Dylib, allocator: Allocator) void {
-    allocator.free(self.data);
     allocator.free(self.path);
     self.exports.deinit(allocator);
     self.strtab.deinit(allocator);
@@ -44,22 +41,29 @@ pub fn deinit(self: *Dylib, allocator: Allocator) void {
         id.deinit(allocator);
     }
     self.dependents.deinit(allocator);
+    for (self.rpaths.keys()) |rpath| {
+        allocator.free(rpath);
+    }
     self.rpaths.deinit(allocator);
 }
 
-pub fn parse(self: *Dylib, macho_file: *MachO) !void {
+pub fn parse(self: *Dylib, macho_file: *MachO, file: std.fs.File, fat_arch: ?fat.Arch) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
     const gpa = macho_file.base.comp.gpa;
-    var stream = std.io.fixedBufferStream(self.data);
-    const reader = stream.reader();
+    const offset = if (fat_arch) |ar| ar.offset else 0;
 
     log.debug("parsing dylib from binary", .{});
 
-    self.header = try reader.readStruct(macho.mach_header_64);
+    var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined;
+    {
+        const amt = try file.preadAll(&header_buffer, offset);
+        if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput;
+    }
+    const header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*;
 
-    const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
+    const this_cpu_arch: std.Target.Cpu.Arch = switch (header.cputype) {
         macho.CPU_TYPE_ARM64 => .aarch64,
         macho.CPU_TYPE_X86_64 => .x86_64,
         else => |x| {
@@ -72,39 +76,60 @@ pub fn parse(self: *Dylib, macho_file: *MachO) !void {
         return error.InvalidCpuArch;
     }
 
-    const lc_id = self.getLoadCommand(.ID_DYLIB) orelse {
-        try macho_file.reportParseError2(self.index, "missing LC_ID_DYLIB load command", .{});
-        return error.MalformedDylib;
-    };
-    self.id = try Id.fromLoadCommand(gpa, lc_id.cast(macho.dylib_command).?, lc_id.getDylibPathName());
+    const lc_buffer = try gpa.alloc(u8, header.sizeofcmds);
+    defer gpa.free(lc_buffer);
+    {
+        const amt = try file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64));
+        if (amt != lc_buffer.len) return error.InputOutput;
+    }
 
     var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
+        .ncmds = header.ncmds,
+        .buffer = lc_buffer,
     };
     while (it.next()) |cmd| switch (cmd.cmd()) {
-        .REEXPORT_DYLIB => if (self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) {
+        .ID_DYLIB => {
+            self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName());
+        },
+        .REEXPORT_DYLIB => if (header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0) {
             const id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName());
             try self.dependents.append(gpa, id);
         },
         .DYLD_INFO_ONLY => {
             const dyld_cmd = cmd.cast(macho.dyld_info_command).?;
-            const data = self.data[dyld_cmd.export_off..][0..dyld_cmd.export_size];
+            const data = try gpa.alloc(u8, dyld_cmd.export_size);
+            defer gpa.free(data);
+            const amt = try file.preadAll(data, dyld_cmd.export_off + offset);
+            if (amt != data.len) return error.InputOutput;
             try self.parseTrie(data, macho_file);
         },
         .DYLD_EXPORTS_TRIE => {
             const ld_cmd = cmd.cast(macho.linkedit_data_command).?;
-            const data = self.data[ld_cmd.dataoff..][0..ld_cmd.datasize];
+            const data = try gpa.alloc(u8, ld_cmd.datasize);
+            defer gpa.free(data);
+            const amt = try file.preadAll(data, ld_cmd.dataoff + offset);
+            if (amt != data.len) return error.InputOutput;
             try self.parseTrie(data, macho_file);
         },
         .RPATH => {
             const path = cmd.getRpathPathName();
-            try self.rpaths.put(gpa, path, {});
+            try self.rpaths.put(gpa, try gpa.dupe(u8, path), {});
+        },
+        .BUILD_VERSION,
+        .VERSION_MIN_MACOSX,
+        .VERSION_MIN_IPHONEOS,
+        .VERSION_MIN_TVOS,
+        .VERSION_MIN_WATCHOS,
+        => {
+            self.platform = MachO.Platform.fromLoadCommand(cmd);
         },
         else => {},
     };
 
-    self.initPlatform();
+    if (self.id == null) {
+        try macho_file.reportParseError2(self.index, "missing LC_ID_DYLIB load command", .{});
+        return error.MalformedDylib;
+    }
 
     if (self.platform) |platform| {
         if (!macho_file.platform.eqlTarget(platform)) {
@@ -168,7 +193,7 @@ const TrieIterator = struct {
 
 pub fn addExport(self: *Dylib, allocator: Allocator, name: []const u8, flags: Export.Flags) !void {
     try self.exports.append(allocator, .{
-        .name = try self.insertString(allocator, name),
+        .name = try self.addString(allocator, name),
         .flags = flags,
     });
 }
@@ -479,24 +504,6 @@ pub fn initSymbols(self: *Dylib, macho_file: *MachO) !void {
     }
 }
 
-fn initPlatform(self: *Dylib) void {
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
-    };
-    self.platform = while (it.next()) |cmd| {
-        switch (cmd.cmd()) {
-            .BUILD_VERSION,
-            .VERSION_MIN_MACOSX,
-            .VERSION_MIN_IPHONEOS,
-            .VERSION_MIN_TVOS,
-            .VERSION_MIN_WATCHOS,
-            => break MachO.Platform.fromLoadCommand(cmd),
-            else => {},
-        }
-    } else null;
-}
-
 pub fn resolveSymbols(self: *Dylib, macho_file: *MachO) void {
     const tracy = trace(@src());
     defer tracy.end();
@@ -526,8 +533,10 @@ pub fn resetGlobals(self: *Dylib, macho_file: *MachO) void {
     for (self.symbols.items) |sym_index| {
         const sym = macho_file.getSymbol(sym_index);
         const name = sym.name;
+        const global = sym.flags.global;
         sym.* = .{};
         sym.name = name;
+        sym.flags.global = global;
     }
 }
 
@@ -589,17 +598,7 @@ pub inline fn getUmbrella(self: Dylib, macho_file: *MachO) *Dylib {
     return macho_file.getFile(self.umbrella).?.dylib;
 }
 
-fn getLoadCommand(self: Dylib, lc: macho.LC) ?LoadCommandIterator.LoadCommand {
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
-    };
-    while (it.next()) |cmd| {
-        if (cmd.cmd() == lc) return cmd;
-    } else return null;
-}
-
-fn insertString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 {
+fn addString(self: *Dylib, allocator: Allocator, name: []const u8) !u32 {
     const off = @as(u32, @intCast(self.strtab.items.len));
     try self.strtab.writer(allocator).print("{s}\x00", .{name});
     return off;
src/link/MachO/InternalObject.zig
@@ -3,6 +3,7 @@ index: File.Index,
 sections: std.MultiArrayList(Section) = .{},
 atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
 symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
 
 objc_methnames: std.ArrayListUnmanaged(u8) = .{},
 objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
@@ -16,6 +17,7 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void {
     self.sections.deinit(allocator);
     self.atoms.deinit(allocator);
     self.symbols.deinit(allocator);
+    self.strtab.deinit(allocator);
     self.objc_methnames.deinit(allocator);
 }
 
@@ -26,7 +28,11 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO)
     const gop = try macho_file.getOrCreateGlobal(off);
     self.symbols.addOneAssumeCapacity().* = gop.index;
     const sym = macho_file.getSymbol(gop.index);
-    sym.* = .{ .name = off, .file = self.index };
+    sym.file = self.index;
+    sym.value = 0;
+    sym.atom = 0;
+    sym.nlist_idx = 0;
+    sym.flags = .{ .global = true };
     return gop.index;
 }
 
@@ -45,7 +51,7 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil
     defer gpa.free(name);
     const atom = macho_file.getAtom(atom_index).?;
     atom.atom_index = atom_index;
-    atom.name = try macho_file.strings.insert(gpa, name);
+    atom.name = try self.addString(gpa, name);
     atom.file = self.index;
     atom.size = methname.len + 1;
     atom.alignment = .@"1";
@@ -79,7 +85,7 @@ fn addObjcSelrefsSection(
     defer gpa.free(name);
     const atom = macho_file.getAtom(atom_index).?;
     atom.atom_index = atom_index;
-    atom.name = try macho_file.strings.insert(gpa, name);
+    atom.name = try self.addString(gpa, name);
     atom.file = self.index;
     atom.size = @sizeOf(u64);
     atom.alignment = .@"8";
@@ -158,16 +164,36 @@ fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8,
     return n_sect;
 }
 
-pub fn getSectionData(self: *const InternalObject, index: u32) []const u8 {
+pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) !void {
+    assert(buffer.len == atom.size);
     const slice = self.sections.slice();
-    assert(index < slice.items(.header).len);
-    const sect = slice.items(.header)[index];
-    const extra = slice.items(.extra)[index];
-    if (extra.is_objc_methname) {
-        return self.objc_methnames.items[sect.offset..][0..sect.size];
-    } else if (extra.is_objc_selref) {
-        return &self.objc_selrefs;
-    } else @panic("ref to non-existent section");
+    const sect = slice.items(.header)[atom.n_sect];
+    const extra = slice.items(.extra)[atom.n_sect];
+    const data = if (extra.is_objc_methname)
+        self.objc_methnames.items[sect.offset..][0..sect.size]
+    else if (extra.is_objc_selref)
+        &self.objc_selrefs
+    else
+        @panic("ref to non-existent section");
+    @memcpy(buffer, data[atom.off..][0..atom.size]);
+}
+
+pub fn getAtomRelocs(self: *const InternalObject, atom: Atom) []const Relocation {
+    const relocs = self.sections.items(.relocs)[atom.n_sect];
+    return relocs.items[atom.relocs.pos..][0..atom.relocs.len];
+}
+
+fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
+    const off: u32 = @intCast(self.strtab.items.len);
+    try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
+    self.strtab.appendSliceAssumeCapacity(name);
+    self.strtab.appendAssumeCapacity(0);
+    return off;
+}
+
+pub fn getString(self: InternalObject, off: u32) [:0]const u8 {
+    assert(off < self.strtab.items.len);
+    return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
 }
 
 pub fn asFile(self: *InternalObject) File {
src/link/MachO/Object.zig
@@ -1,13 +1,13 @@
-archive: ?[]const u8 = null,
+archive: ?Archive = null,
 path: []const u8,
+file: std.fs.File,
 mtime: u64,
-data: []const u8,
 index: File.Index,
 
 header: ?macho.mach_header_64 = null,
 sections: std.MultiArrayList(Section) = .{},
 symtab: std.MultiArrayList(Nlist) = .{},
-strtab: []const u8 = &[0]u8{},
+strtab: std.ArrayListUnmanaged(u8) = .{},
 
 symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
 atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
@@ -22,6 +22,7 @@ cies: std.ArrayListUnmanaged(Cie) = .{},
 fdes: std.ArrayListUnmanaged(Fde) = .{},
 eh_frame_data: std.ArrayListUnmanaged(u8) = .{},
 unwind_records: std.ArrayListUnmanaged(UnwindInfo.Record.Index) = .{},
+data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
 
 alive: bool = true,
 hidden: bool = false,
@@ -29,6 +30,11 @@ hidden: bool = false,
 dynamic_relocs: MachO.DynamicRelocs = .{},
 output_symtab_ctx: MachO.SymtabCtx = .{},
 
+const Archive = struct {
+    path: []const u8,
+    offset: u64,
+};
+
 pub fn isObject(path: []const u8) !bool {
     const file = try std.fs.cwd().openFile(path, .{});
     defer file.close();
@@ -37,12 +43,16 @@ pub fn isObject(path: []const u8) !bool {
 }
 
 pub fn deinit(self: *Object, allocator: Allocator) void {
+    self.file.close();
+    if (self.archive) |*ar| allocator.free(ar.path);
+    allocator.free(self.path);
     for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| {
         relocs.deinit(allocator);
         sub.deinit(allocator);
     }
     self.sections.deinit(allocator);
     self.symtab.deinit(allocator);
+    self.strtab.deinit(allocator);
     self.symbols.deinit(allocator);
     self.atoms.deinit(allocator);
     self.cies.deinit(allocator);
@@ -54,7 +64,7 @@ pub fn deinit(self: *Object, allocator: Allocator) void {
         sf.stabs.deinit(allocator);
     }
     self.stab_files.deinit(allocator);
-    allocator.free(self.data);
+    self.data_in_code.deinit(allocator);
 }
 
 pub fn parse(self: *Object, macho_file: *MachO) !void {
@@ -62,10 +72,14 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
     defer tracy.end();
 
     const gpa = macho_file.base.comp.gpa;
-    var stream = std.io.fixedBufferStream(self.data);
-    const reader = stream.reader();
+    const offset = if (self.archive) |ar| ar.offset else 0;
 
-    self.header = try reader.readStruct(macho.mach_header_64);
+    var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined;
+    {
+        const amt = try self.file.preadAll(&header_buffer, offset);
+        if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput;
+    }
+    self.header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*;
 
     const this_cpu_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
         macho.CPU_TYPE_ARM64 => .aarch64,
@@ -80,35 +94,79 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
         return error.InvalidCpuArch;
     }
 
-    if (self.getLoadCommand(.SEGMENT_64)) |lc| {
-        const sections = lc.getSections();
-        try self.sections.ensureUnusedCapacity(gpa, sections.len);
-        for (sections) |sect| {
-            const index = try self.sections.addOne(gpa);
-            self.sections.set(index, .{ .header = sect });
-
-            if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
-                self.eh_frame_sect_index = @intCast(index);
-            } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) {
-                self.compact_unwind_sect_index = @intCast(index);
-            }
-        }
-    }
-    if (self.getLoadCommand(.SYMTAB)) |lc| {
-        const cmd = lc.cast(macho.symtab_command).?;
-        self.strtab = self.data[cmd.stroff..][0..cmd.strsize];
-
-        const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(self.data.ptr + cmd.symoff))[0..cmd.nsyms];
-        try self.symtab.ensureUnusedCapacity(gpa, symtab.len);
-        for (symtab) |nlist| {
-            self.symtab.appendAssumeCapacity(.{
-                .nlist = nlist,
-                .atom = 0,
-                .size = 0,
-            });
-        }
+    const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds);
+    defer gpa.free(lc_buffer);
+    {
+        const amt = try self.file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64));
+        if (amt != self.header.?.sizeofcmds) return error.InputOutput;
     }
 
+    var it = LoadCommandIterator{
+        .ncmds = self.header.?.ncmds,
+        .buffer = lc_buffer,
+    };
+    while (it.next()) |lc| switch (lc.cmd()) {
+        .SEGMENT_64 => {
+            const sections = lc.getSections();
+            try self.sections.ensureUnusedCapacity(gpa, sections.len);
+            for (sections) |sect| {
+                const index = try self.sections.addOne(gpa);
+                self.sections.set(index, .{ .header = sect });
+
+                if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
+                    self.eh_frame_sect_index = @intCast(index);
+                } else if (mem.eql(u8, sect.sectName(), "__compact_unwind")) {
+                    self.compact_unwind_sect_index = @intCast(index);
+                }
+            }
+        },
+        .SYMTAB => {
+            const cmd = lc.cast(macho.symtab_command).?;
+            try self.strtab.resize(gpa, cmd.strsize);
+            {
+                const amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset);
+                if (amt != self.strtab.items.len) return error.InputOutput;
+            }
+
+            const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64));
+            defer gpa.free(symtab_buffer);
+            {
+                const amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset);
+                if (amt != symtab_buffer.len) return error.InputOutput;
+            }
+            const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms];
+            try self.symtab.ensureUnusedCapacity(gpa, symtab.len);
+            for (symtab) |nlist| {
+                self.symtab.appendAssumeCapacity(.{
+                    .nlist = nlist,
+                    .atom = 0,
+                    .size = 0,
+                });
+            }
+        },
+        .DATA_IN_CODE => {
+            const cmd = lc.cast(macho.linkedit_data_command).?;
+            const buffer = try gpa.alloc(u8, cmd.datasize);
+            defer gpa.free(buffer);
+            {
+                const amt = try self.file.preadAll(buffer, offset + cmd.dataoff);
+                if (amt != buffer.len) return error.InputOutput;
+            }
+            const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
+            const dice = @as([*]align(1) const macho.data_in_code_entry, @ptrCast(buffer.ptr))[0..ndice];
+            try self.data_in_code.appendUnalignedSlice(gpa, dice);
+        },
+        .BUILD_VERSION,
+        .VERSION_MIN_MACOSX,
+        .VERSION_MIN_IPHONEOS,
+        .VERSION_MIN_TVOS,
+        .VERSION_MIN_WATCHOS,
+        => if (self.platform == null) {
+            self.platform = MachO.Platform.fromLoadCommand(lc);
+        },
+        else => {},
+    };
+
     const NlistIdx = struct {
         nlist: macho.nlist_64,
         idx: usize,
@@ -170,8 +228,6 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
         try self.parseUnwindRecords(macho_file);
     }
 
-    self.initPlatform();
-
     if (self.platform) |platform| {
         if (!macho_file.platform.eqlTarget(platform)) {
             try macho_file.reportParseError2(self.index, "invalid platform: {}", .{
@@ -237,7 +293,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
             defer gpa.free(name);
             const size = if (nlist_start == nlist_end) sect.size else nlists[nlist_start].nlist.n_value - sect.addr;
             const atom_index = try self.addAtom(.{
-                .name = name,
+                .name = try self.addString(gpa, name),
                 .n_sect = @intCast(n_sect),
                 .off = 0,
                 .size = size,
@@ -267,7 +323,7 @@ fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
             else
                 sect.@"align";
             const atom_index = try self.addAtom(.{
-                .name = self.getString(nlist.nlist.n_strx),
+                .name = nlist.nlist.n_strx,
                 .n_sect = @intCast(n_sect),
                 .off = nlist.nlist.n_value - sect.addr,
                 .size = size,
@@ -300,7 +356,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
         defer gpa.free(name);
 
         const atom_index = try self.addAtom(.{
-            .name = name,
+            .name = try self.addString(gpa, name),
             .n_sect = @intCast(n_sect),
             .off = 0,
             .size = sect.size,
@@ -336,7 +392,7 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void {
 }
 
 const AddAtomArgs = struct {
-    name: [:0]const u8,
+    name: u32,
     n_sect: u8,
     off: u64,
     size: u64,
@@ -349,7 +405,7 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index {
     const atom = macho_file.getAtom(atom_index).?;
     atom.file = self.index;
     atom.atom_index = atom_index;
-    atom.name = try macho_file.strings.insert(gpa, args.name);
+    atom.name = args.name;
     atom.n_sect = args.n_sect;
     atom.size = args.size;
     atom.alignment = Atom.Alignment.fromLog2Units(args.alignment);
@@ -376,7 +432,7 @@ fn initLiteralSections(self: *Object, macho_file: *MachO) !void {
         defer gpa.free(name);
 
         const atom_index = try self.addAtom(.{
-            .name = name,
+            .name = try self.addString(gpa, name),
             .n_sect = @intCast(n_sect),
             .off = 0,
             .size = sect.size,
@@ -475,10 +531,9 @@ fn initSymbols(self: *Object, macho_file: *MachO) !void {
         const index = try macho_file.addSymbol();
         self.symbols.appendAssumeCapacity(index);
         const symbol = macho_file.getSymbol(index);
-        const name = self.getString(nlist.n_strx);
         symbol.* = .{
             .value = nlist.n_value,
-            .name = try macho_file.strings.insert(gpa, name),
+            .name = nlist.n_strx,
             .nlist_idx = @intCast(i),
             .atom = 0,
             .file = self.index,
@@ -638,7 +693,10 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
     const sect = slice.items(.header)[sect_id];
     const relocs = slice.items(.relocs)[sect_id];
 
-    const data = try self.getSectionData(sect_id);
+    // TODO: read into buffer directly
+    const data = try self.getSectionData(gpa, sect_id);
+    defer gpa.free(data);
+
     try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len);
     self.eh_frame_data.appendSliceAssumeCapacity(data);
 
@@ -739,7 +797,8 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
     };
 
     const gpa = macho_file.base.comp.gpa;
-    const data = try self.getSectionData(sect_id);
+    const data = try self.getSectionData(gpa, sect_id);
+    defer gpa.free(data);
     const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
     const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
     const sym_lookup = SymbolLookup{ .ctx = self };
@@ -934,24 +993,6 @@ fn parseUnwindRecords(self: *Object, macho_file: *MachO) !void {
     }
 }
 
-fn initPlatform(self: *Object) void {
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
-    };
-    self.platform = while (it.next()) |cmd| {
-        switch (cmd.cmd()) {
-            .BUILD_VERSION,
-            .VERSION_MIN_MACOSX,
-            .VERSION_MIN_IPHONEOS,
-            .VERSION_MIN_TVOS,
-            .VERSION_MIN_WATCHOS,
-            => break MachO.Platform.fromLoadCommand(cmd),
-            else => {},
-        }
-    } else null;
-}
-
 /// Currently, we only check if a compile unit for this input object file exists
 /// and record that so that we can emit symbol stabs.
 /// TODO in the future, we want parse debug info and debug line sections so that
@@ -975,12 +1016,20 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void {
 
     if (debug_info_index == null or debug_abbrev_index == null) return;
 
-    var dwarf_info = DwarfInfo{
-        .debug_info = try self.getSectionData(@intCast(debug_info_index.?)),
-        .debug_abbrev = try self.getSectionData(@intCast(debug_abbrev_index.?)),
-        .debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index)) else "",
-    };
-    dwarf_info.init(gpa) catch {
+    const debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?));
+    defer gpa.free(debug_info);
+    const debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?));
+    defer gpa.free(debug_abbrev);
+    const debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{};
+    defer gpa.free(debug_str);
+
+    var dwarf_info = DwarfInfo{};
+    errdefer dwarf_info.deinit(gpa);
+    dwarf_info.init(gpa, .{
+        .debug_info = debug_info,
+        .debug_abbrev = debug_abbrev,
+        .debug_str = debug_str,
+    }) catch {
         try macho_file.reportParseError2(self.index, "invalid __DWARF info found", .{});
         return error.MalformedObject;
     };
@@ -1049,8 +1098,10 @@ pub fn resetGlobals(self: *Object, macho_file: *MachO) void {
         if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue;
         const sym = macho_file.getSymbol(sym_index);
         const name = sym.name;
+        const global = sym.flags.global;
         sym.* = .{};
         sym.name = name;
+        sym.flags.global = global;
     }
 }
 
@@ -1137,7 +1188,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void {
         defer gpa.free(name);
         const atom = macho_file.getAtom(atom_index).?;
         atom.atom_index = atom_index;
-        atom.name = try macho_file.strings.insert(gpa, name);
+        atom.name = try self.addString(gpa, name);
         atom.file = self.index;
         atom.size = nlist.n_value;
         atom.alignment = Atom.Alignment.fromLog2Units((nlist.n_desc >> 8) & 0x0f);
@@ -1151,6 +1202,7 @@ pub fn convertTentativeDefinitions(self: *Object, macho_file: *MachO) !void {
 
         sym.value = 0;
         sym.atom = atom_index;
+        sym.flags.global = true;
         sym.flags.weak = false;
         sym.flags.weak_ref = false;
         sym.flags.tentative = false;
@@ -1219,8 +1271,8 @@ pub fn calcStabsSize(self: *Object, macho_file: *MachO) error{Overflow}!void {
         self.output_symtab_ctx.strsize += @as(u32, @intCast(comp_dir.len + 1)); // comp_dir
         self.output_symtab_ctx.strsize += @as(u32, @intCast(tu_name.len + 1)); // tu_name
 
-        if (self.archive) |path| {
-            self.output_symtab_ctx.strsize += @as(u32, @intCast(path.len + 1 + self.path.len + 1 + 1));
+        if (self.archive) |ar| {
+            self.output_symtab_ctx.strsize += @as(u32, @intCast(ar.path.len + 1 + self.path.len + 1 + 1));
         } else {
             self.output_symtab_ctx.strsize += @as(u32, @intCast(self.path.len + 1));
         }
@@ -1365,8 +1417,8 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) error{Overflow}!void
         index += 1;
         // N_OSO path
         n_strx = @as(u32, @intCast(macho_file.strtab.items.len));
-        if (self.archive) |path| {
-            macho_file.strtab.appendSliceAssumeCapacity(path);
+        if (self.archive) |ar| {
+            macho_file.strtab.appendSliceAssumeCapacity(ar.path);
             macho_file.strtab.appendAssumeCapacity('(');
             macho_file.strtab.appendSliceAssumeCapacity(self.path);
             macho_file.strtab.appendAssumeCapacity(')');
@@ -1532,30 +1584,25 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO) error{Overflow}!void
     }
 }
 
-fn getLoadCommand(self: Object, lc: macho.LC) ?LoadCommandIterator.LoadCommand {
-    var it = LoadCommandIterator{
-        .ncmds = self.header.?.ncmds,
-        .buffer = self.data[@sizeOf(macho.mach_header_64)..][0..self.header.?.sizeofcmds],
-    };
-    while (it.next()) |cmd| {
-        if (cmd.cmd() == lc) return cmd;
-    } else return null;
-}
-
-pub fn getSectionData(self: *const Object, index: u32) error{Overflow}![]const u8 {
+fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 {
     const slice = self.sections.slice();
     assert(index < slice.items(.header).len);
     const sect = slice.items(.header)[index];
-    const off = math.cast(usize, sect.offset) orelse return error.Overflow;
-    const size = math.cast(usize, sect.size) orelse return error.Overflow;
-    return self.data[off..][0..size];
+    const offset = if (self.archive) |ar| ar.offset else 0;
+    const buffer = try allocator.alloc(u8, sect.size);
+    errdefer allocator.free(buffer);
+    const amt = try self.file.preadAll(buffer, sect.offset + offset);
+    if (amt != buffer.len) return error.InputOutput;
+    return buffer;
 }
 
-pub fn getAtomData(self: *const Object, atom: Atom) error{Overflow}![]const u8 {
-    const data = try self.getSectionData(atom.n_sect);
-    const off = math.cast(usize, atom.off) orelse return error.Overflow;
-    const size = math.cast(usize, atom.size) orelse return error.Overflow;
-    return data[off..][0..size];
+pub fn getAtomData(self: *const Object, atom: Atom, buffer: []u8) !void {
+    assert(buffer.len == atom.size);
+    const slice = self.sections.slice();
+    const offset = if (self.archive) |ar| ar.offset else 0;
+    const sect = slice.items(.header)[atom.n_sect];
+    const amt = try self.file.preadAll(buffer, sect.offset + offset + atom.off);
+    if (amt != buffer.len) return error.InputOutput;
 }
 
 pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation {
@@ -1563,9 +1610,17 @@ pub fn getAtomRelocs(self: *const Object, atom: Atom) []const Relocation {
     return relocs.items[atom.relocs.pos..][0..atom.relocs.len];
 }
 
-fn getString(self: Object, off: u32) [:0]const u8 {
-    assert(off < self.strtab.len);
-    return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0);
+fn addString(self: *Object, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 {
+    const off: u32 = @intCast(self.strtab.items.len);
+    try self.strtab.ensureUnusedCapacity(allocator, name.len + 1);
+    self.strtab.appendSliceAssumeCapacity(name);
+    self.strtab.appendAssumeCapacity(0);
+    return off;
+}
+
+pub fn getString(self: Object, off: u32) [:0]const u8 {
+    assert(off < self.strtab.items.len);
+    return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0);
 }
 
 pub fn hasUnwindRecords(self: Object) bool {
@@ -1600,15 +1655,8 @@ pub fn hasObjc(self: Object) bool {
     return false;
 }
 
-pub fn getDataInCode(self: Object) []align(1) const macho.data_in_code_entry {
-    const lc = self.getLoadCommand(.DATA_IN_CODE) orelse return &[0]macho.data_in_code_entry{};
-    const cmd = lc.cast(macho.linkedit_data_command).?;
-    const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
-    const dice = @as(
-        [*]align(1) const macho.data_in_code_entry,
-        @ptrCast(self.data.ptr + cmd.dataoff),
-    )[0..ndice];
-    return dice;
+pub fn getDataInCode(self: Object) []const macho.data_in_code_entry {
+    return self.data_in_code.items;
 }
 
 pub inline fn hasSubsections(self: Object) bool {
@@ -1762,8 +1810,8 @@ fn formatPath(
 ) !void {
     _ = unused_fmt_string;
     _ = options;
-    if (object.archive) |path| {
-        try writer.writeAll(path);
+    if (object.archive) |ar| {
+        try writer.writeAll(ar.path);
         try writer.writeByte('(');
         try writer.writeAll(object.path);
         try writer.writeByte(')');
@@ -1831,11 +1879,17 @@ const x86_64 = struct {
     ) !void {
         const gpa = macho_file.base.comp.gpa;
 
-        const relocs = @as(
-            [*]align(1) const macho.relocation_info,
-            @ptrCast(self.data.ptr + sect.reloff),
-        )[0..sect.nreloc];
-        const code = try self.getSectionData(@intCast(n_sect));
+        const offset = if (self.archive) |ar| ar.offset else 0;
+        const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
+        defer gpa.free(relocs_buffer);
+        {
+            const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset);
+            if (amt != relocs_buffer.len) return error.InputOutput;
+        }
+        const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
+
+        const code = try self.getSectionData(gpa, @intCast(n_sect));
+        defer gpa.free(code);
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
 
@@ -1987,11 +2041,17 @@ const aarch64 = struct {
     ) !void {
         const gpa = macho_file.base.comp.gpa;
 
-        const relocs = @as(
-            [*]align(1) const macho.relocation_info,
-            @ptrCast(self.data.ptr + sect.reloff),
-        )[0..sect.nreloc];
-        const code = try self.getSectionData(@intCast(n_sect));
+        const offset = if (self.archive) |ar| ar.offset else 0;
+        const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
+        defer gpa.free(relocs_buffer);
+        {
+            const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset);
+            if (amt != relocs_buffer.len) return error.InputOutput;
+        }
+        const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
+
+        const code = try self.getSectionData(gpa, @intCast(n_sect));
+        defer gpa.free(code);
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
 
src/link/MachO/relocatable.zig
@@ -290,8 +290,7 @@ fn writeAtoms(macho_file: *MachO) !void {
             assert(atom.flags.alive);
             const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow;
             const atom_size = math.cast(usize, atom.size) orelse return error.Overflow;
-            const atom_data = try atom.getFile(macho_file).object.getAtomData(atom.*);
-            @memcpy(code[off..][0..atom_size], atom_data);
+            try atom.getFile(macho_file).object.getAtomData(atom.*, code[off..][0..atom_size]);
             try atom.writeRelocs(macho_file, code[off..][0..atom_size], &relocs);
         }
 
src/link/MachO/Symbol.zig
@@ -55,7 +55,12 @@ pub fn weakRef(symbol: Symbol, macho_file: *MachO) bool {
 }
 
 pub fn getName(symbol: Symbol, macho_file: *MachO) [:0]const u8 {
-    return macho_file.strings.getAssumeExists(symbol.name);
+    if (symbol.flags.global) return macho_file.strings.getAssumeExists(symbol.name);
+    return switch (symbol.getFile(macho_file).?) {
+        .dylib => unreachable, // There are no local symbols for dylibs
+        .zig_object => |x| x.strtab.getAssumeExists(symbol.name),
+        inline else => |x| x.getString(symbol.name),
+    };
 }
 
 pub fn getAtom(symbol: Symbol, macho_file: *MachO) ?*Atom {
@@ -341,6 +346,11 @@ pub const Flags = packed struct {
     /// Whether the symbol is exported at runtime.
     @"export": bool = false,
 
+    /// Whether the symbol is effectively an extern and takes part in global
+    /// symbol resolution. Then, its name will be saved in global string interning
+    /// table.
+    global: bool = false,
+
     /// Whether this symbol is weak.
     weak: bool = false,
 
src/link/MachO/ZigObject.zig
@@ -3,6 +3,7 @@ path: []const u8,
 index: File.Index,
 
 symtab: std.MultiArrayList(Nlist) = .{},
+strtab: StringTable = .{},
 
 symbols: std.ArrayListUnmanaged(Symbol.Index) = .{},
 atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
@@ -52,10 +53,12 @@ pub fn init(self: *ZigObject, macho_file: *MachO) !void {
     const gpa = comp.gpa;
 
     try self.atoms.append(gpa, 0); // null input section
+    try self.strtab.buffer.append(gpa, 0);
 }
 
 pub fn deinit(self: *ZigObject, allocator: Allocator) void {
     self.symtab.deinit(allocator);
+    self.strtab.deinit(allocator);
     self.symbols.deinit(allocator);
     self.atoms.deinit(allocator);
     self.globals_lookup.deinit(allocator);
@@ -136,37 +139,24 @@ pub fn addAtom(self: *ZigObject, macho_file: *MachO) !Symbol.Index {
     return symbol_index;
 }
 
-/// Caller owns the memory.
-pub fn getAtomDataAlloc(
-    self: ZigObject,
-    macho_file: *MachO,
-    allocator: Allocator,
-    atom: Atom,
-) ![]u8 {
+pub fn getAtomData(self: ZigObject, macho_file: *MachO, atom: Atom, buffer: []u8) !void {
     assert(atom.file == self.index);
+    assert(atom.size == buffer.len);
     const sect = macho_file.sections.items(.header)[atom.out_n_sect];
     assert(!sect.isZerofill());
 
     switch (sect.type()) {
         macho.S_THREAD_LOCAL_REGULAR => {
             const tlv = self.tlv_initializers.get(atom.atom_index).?;
-            const data = try allocator.dupe(u8, tlv.data);
-            return data;
+            @memcpy(buffer, tlv.data);
         },
         macho.S_THREAD_LOCAL_VARIABLES => {
-            const size = std.math.cast(usize, atom.size) orelse return error.Overflow;
-            const data = try allocator.alloc(u8, size);
-            @memset(data, 0);
-            return data;
+            @memset(buffer, 0);
         },
         else => {
             const file_offset = sect.offset + atom.value - sect.addr;
-            const size = std.math.cast(usize, atom.size) orelse return error.Overflow;
-            const data = try allocator.alloc(u8, size);
-            errdefer allocator.free(data);
-            const amt = try macho_file.base.file.?.preadAll(data, file_offset);
-            if (amt != data.len) return error.InputOutput;
-            return data;
+            const amt = try macho_file.base.file.?.preadAll(buffer, file_offset);
+            if (amt != buffer.len) return error.InputOutput;
         },
     }
 }
@@ -242,8 +232,10 @@ pub fn resetGlobals(self: *ZigObject, macho_file: *MachO) void {
         if (!self.symtab.items(.nlist)[nlist_idx].ext()) continue;
         const sym = macho_file.getSymbol(sym_index);
         const name = sym.name;
+        const global = sym.flags.global;
         sym.* = .{};
         sym.name = name;
+        sym.flags.global = global;
     }
 }
 
@@ -686,7 +678,7 @@ fn updateDeclCode(
     sym.out_n_sect = sect_index;
     atom.out_n_sect = sect_index;
 
-    sym.name = try macho_file.strings.insert(gpa, decl_name);
+    sym.name = try self.strtab.insert(gpa, decl_name);
     atom.flags.alive = true;
     atom.name = sym.name;
     nlist.n_strx = sym.name;
@@ -796,7 +788,7 @@ fn createTlvInitializer(
     atom.out_n_sect = sect_index;
 
     sym.value = 0;
-    sym.name = try macho_file.strings.insert(gpa, sym_name);
+    sym.name = try self.strtab.insert(gpa, sym_name);
     atom.flags.alive = true;
     atom.name = sym.name;
     nlist.n_strx = sym.name;
@@ -849,7 +841,7 @@ fn createTlvDescriptor(
     atom.out_n_sect = sect_index;
 
     sym.value = 0;
-    sym.name = try macho_file.strings.insert(gpa, name);
+    sym.name = try self.strtab.insert(gpa, name);
     atom.flags.alive = true;
     atom.name = sym.name;
     nlist.n_strx = sym.name;
@@ -1019,7 +1011,7 @@ fn lowerConst(
     };
 
     const sym = macho_file.getSymbol(sym_index);
-    const name_str_index = try macho_file.strings.insert(gpa, name);
+    const name_str_index = try self.strtab.insert(gpa, name);
     sym.name = name_str_index;
     sym.out_n_sect = output_section_index;
 
@@ -1110,7 +1102,7 @@ pub fn updateExports(
         }
 
         const exp_name = mod.intern_pool.stringToSlice(exp.opts.name);
-        const global_nlist_index = if (metadata.@"export"(self, macho_file, exp_name)) |exp_index|
+        const global_nlist_index = if (metadata.@"export"(self, exp_name)) |exp_index|
             exp_index.*
         else blk: {
             const global_nlist_index = try self.getGlobalSymbol(macho_file, exp_name, null);
@@ -1159,7 +1151,7 @@ fn updateLazySymbol(
             lazy_sym.ty.fmt(mod),
         });
         defer gpa.free(name);
-        break :blk try macho_file.strings.insert(gpa, name);
+        break :blk try self.strtab.insert(gpa, name);
     };
 
     const src = if (lazy_sym.ty.getOwnerDeclOrNull(mod)) |owner_decl|
@@ -1247,7 +1239,7 @@ pub fn deleteDeclExport(
 
     const mod = macho_file.base.comp.module.?;
     const exp_name = mod.intern_pool.stringToSlice(name);
-    const nlist_index = metadata.@"export"(self, macho_file, exp_name) orelse return;
+    const nlist_index = metadata.@"export"(self, exp_name) orelse return;
 
     log.debug("deleting export '{s}'", .{exp_name});
 
@@ -1268,7 +1260,7 @@ pub fn getGlobalSymbol(self: *ZigObject, macho_file: *MachO, name: []const u8, l
     const gpa = macho_file.base.comp.gpa;
     const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name});
     defer gpa.free(sym_name);
-    const off = try macho_file.strings.insert(gpa, sym_name);
+    const off = try self.strtab.insert(gpa, sym_name);
     const lookup_gop = try self.globals_lookup.getOrPut(gpa, off);
     if (!lookup_gop.found_existing) {
         const nlist_index = try self.addNlist(gpa);
@@ -1406,10 +1398,10 @@ const DeclMetadata = struct {
     /// A list of all exports aliases of this Decl.
     exports: std.ArrayListUnmanaged(Symbol.Index) = .{},
 
-    fn @"export"(m: DeclMetadata, zig_object: *ZigObject, macho_file: *MachO, name: []const u8) ?*u32 {
+    fn @"export"(m: DeclMetadata, zig_object: *ZigObject, name: []const u8) ?*u32 {
         for (m.exports.items) |*exp| {
             const nlist = zig_object.symtab.items(.nlist)[exp.*];
-            const exp_name = macho_file.strings.getAssumeExists(nlist.n_strx);
+            const exp_name = zig_object.strtab.getAssumeExists(nlist.n_strx);
             if (mem.eql(u8, name, exp_name)) return exp;
         }
         return null;
src/link/MachO.zig
@@ -610,7 +610,10 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
             if (mem.indexOf(u8, sect.segName(), "ZIG") == null) continue; // Non-Zig sections are handled separately
             // TODO: we will resolve and write ZigObject's TLS data twice:
             // once here, and once in writeAtoms
-            const code = zo.getAtomDataAlloc(self, gpa, atom.*) catch |err| switch (err) {
+            const atom_size = math.cast(usize, atom.size) orelse return error.Overflow;
+            const code = try gpa.alloc(u8, atom_size);
+            defer gpa.free(code);
+            zo.getAtomData(self, atom.*, code) catch |err| switch (err) {
                 error.InputOutput => {
                     try self.reportUnexpectedError("fetching code for '{s}' failed", .{
                         atom.getName(self),
@@ -625,7 +628,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
                     return error.FlushFailure;
                 },
             };
-            defer gpa.free(code);
             const file_offset = sect.offset + atom.value - sect.addr;
             atom.resolveRelocs(self, code) catch |err| switch (err) {
                 error.ResolveFailed => has_resolve_error = true,
@@ -974,17 +976,15 @@ fn parseObject(self: *MachO, path: []const u8) ParseError!void {
 
     const gpa = self.base.comp.gpa;
     const file = try std.fs.cwd().openFile(path, .{});
-    defer file.close();
     const mtime: u64 = mtime: {
         const stat = file.stat() catch break :mtime 0;
         break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000)));
     };
-    const data = try file.readToEndAlloc(gpa, std.math.maxInt(u32));
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .object = .{
         .path = try gpa.dupe(u8, path),
+        .file = file,
         .mtime = mtime,
-        .data = data,
         .index = index,
     } });
     try self.objects.append(gpa, index);
@@ -1013,17 +1013,9 @@ fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Ar
     const file = try std.fs.cwd().openFile(lib.path, .{});
     defer file.close();
 
-    const data = if (fat_arch) |arch| blk: {
-        try file.seekTo(arch.offset);
-        const data = try gpa.alloc(u8, arch.size);
-        const nread = try file.readAll(data);
-        if (nread != arch.size) return error.InputOutput;
-        break :blk data;
-    } else try file.readToEndAlloc(gpa, std.math.maxInt(u32));
-
-    var archive = Archive{ .path = try gpa.dupe(u8, lib.path), .data = data };
+    var archive = Archive{};
     defer archive.deinit(gpa);
-    try archive.parse(self);
+    try archive.parse(self, lib.path, file, fat_arch);
 
     var has_parse_error = false;
     for (archive.objects.items) |extracted| {
@@ -1058,18 +1050,9 @@ fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch)
     const file = try std.fs.cwd().openFile(lib.path, .{});
     defer file.close();
 
-    const data = if (fat_arch) |arch| blk: {
-        try file.seekTo(arch.offset);
-        const data = try gpa.alloc(u8, arch.size);
-        const nread = try file.readAll(data);
-        if (nread != arch.size) return error.InputOutput;
-        break :blk data;
-    } else try file.readToEndAlloc(gpa, std.math.maxInt(u32));
-
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .dylib = .{
         .path = try gpa.dupe(u8, lib.path),
-        .data = data,
         .index = index,
         .needed = lib.needed,
         .weak = lib.weak,
@@ -1077,7 +1060,7 @@ fn parseDylib(self: *MachO, lib: SystemLib, explicit: bool, fat_arch: ?fat.Arch)
         .explicit = explicit,
     } });
     const dylib = &self.files.items(.data)[index].dylib;
-    try dylib.parse(self);
+    try dylib.parse(self, file, fat_arch);
 
     try self.dylibs.append(gpa, index);
 
@@ -1098,7 +1081,6 @@ fn parseTbd(self: *MachO, lib: SystemLib, explicit: bool) ParseError!File.Index
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .dylib = .{
         .path = try gpa.dupe(u8, lib.path),
-        .data = &[0]u8{},
         .index = index,
         .needed = lib.needed,
         .weak = lib.weak,
@@ -1404,6 +1386,8 @@ pub fn resolveSymbols(self: *MachO) !void {
         const index = self.objects.items[i];
         if (!self.getFile(index).?.object.alive) {
             _ = self.objects.orderedRemove(i);
+            self.files.items(.data)[index].object.deinit(self.base.comp.gpa);
+            self.files.set(index, .null);
         } else i += 1;
     }
 
@@ -1511,18 +1495,13 @@ fn createObjcSections(self: *MachO) !void {
     }
 
     for (objc_msgsend_syms.keys()) |sym_index| {
+        const internal = self.getInternalObject().?;
         const sym = self.getSymbol(sym_index);
-        sym.value = 0;
-        sym.atom = 0;
-        sym.nlist_idx = 0;
-        sym.file = self.internal_object.?;
-        sym.flags = .{};
+        _ = try internal.addSymbol(sym.getName(self), self);
         sym.visibility = .hidden;
-        const object = self.getInternalObject().?;
         const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?;
-        const selrefs_index = try object.addObjcMsgsendSections(name, self);
+        const selrefs_index = try internal.addObjcMsgsendSections(name, self);
         try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self);
-        try object.symbols.append(gpa, sym_index);
     }
 }
 
@@ -1659,6 +1638,8 @@ fn deadStripDylibs(self: *MachO) void {
         const index = self.dylibs.items[i];
         if (!self.getFile(index).?.dylib.isAlive(self)) {
             _ = self.dylibs.orderedRemove(i);
+            self.files.items(.data)[index].dylib.deinit(self.base.comp.gpa);
+            self.files.set(index, .null);
         } else i += 1;
     }
 }
@@ -2609,13 +2590,13 @@ fn writeAtoms(self: *MachO) !void {
             const atom = self.getAtom(atom_index).?;
             assert(atom.flags.alive);
             const off = math.cast(usize, atom.value - header.addr) orelse return error.Overflow;
-            const data = switch (atom.getFile(self)) {
-                .object => |x| try x.getAtomData(atom.*),
-                .zig_object => |x| try x.getAtomDataAlloc(self, arena.allocator(), atom.*),
-                else => unreachable,
-            };
             const atom_size = math.cast(usize, atom.size) orelse return error.Overflow;
-            @memcpy(buffer[off..][0..atom_size], data);
+            switch (atom.getFile(self)) {
+                .internal => |x| try x.getAtomData(atom.*, buffer[off..][0..atom_size]),
+                .object => |x| try x.getAtomData(atom.*, buffer[off..][0..atom_size]),
+                .zig_object => |x| try x.getAtomData(self, atom.*, buffer[off..][0..atom_size]),
+                else => unreachable,
+            }
             atom.resolveRelocs(self, buffer[off..][0..atom_size]) catch |err| switch (err) {
                 error.ResolveFailed => has_resolve_error = true,
                 else => |e| return e,
@@ -3734,6 +3715,7 @@ pub fn getOrCreateGlobal(self: *MachO, off: u32) !GetOrCreateGlobalResult {
         const index = try self.addSymbol();
         const global = self.getSymbol(index);
         global.name = off;
+        global.flags.global = true;
         gop.value_ptr.* = index;
     }
     return .{