Commit db6addf31a

Jakub Konka <kubkon@jakubkonka.com>
2024-02-06 11:17:00
macho: store open file descriptors in a global array
1 parent 7bd8b35
Changed files (5)
src/link/MachO/Archive.zig
@@ -73,24 +73,25 @@ pub fn deinit(self: *Archive, allocator: Allocator) void {
     self.objects.deinit(allocator);
 }
 
-pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.File, fat_arch: ?fat.Arch) !void {
+pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, handle_index: File.HandleIndex, fat_arch: ?fat.Arch) !void {
     const gpa = macho_file.base.comp.gpa;
 
     var arena = std.heap.ArenaAllocator.init(gpa);
     defer arena.deinit();
 
+    const handle = macho_file.getFileHandle(handle_index);
     const offset = if (fat_arch) |ar| ar.offset else 0;
-    const size = if (fat_arch) |ar| ar.size else (try file.stat()).size;
-    try file.seekTo(offset);
+    const size = if (fat_arch) |ar| ar.size else (try handle.stat()).size;
+    try handle.seekTo(offset);
 
-    const reader = file.reader();
+    const reader = handle.reader();
     _ = try reader.readBytesNoEof(Archive.SARMAG);
 
     var pos: usize = Archive.SARMAG;
     while (true) {
         if (pos >= size) break;
         if (!mem.isAligned(pos, 2)) {
-            try file.seekBy(1);
+            try handle.seekBy(1);
             pos += 1;
         }
 
@@ -118,7 +119,7 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.
             unreachable;
         };
         defer {
-            _ = file.seekBy(hdr_size) catch {};
+            _ = handle.seekBy(hdr_size) catch {};
             pos += hdr_size;
         }
 
@@ -130,7 +131,7 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, file: std.fs.
                 .offset = offset + pos,
             },
             .path = try gpa.dupe(u8, name),
-            .file = try std.fs.cwd().openFile(path, .{}),
+            .file_handle = handle_index,
             .index = undefined,
             .alive = false,
             .mtime = hdr.date() catch 0,
@@ -150,5 +151,6 @@ const std = @import("std");
 
 const Allocator = mem.Allocator;
 const Archive = @This();
+const File = @import("file.zig").File;
 const MachO = @import("../MachO.zig");
 const Object = @import("Object.zig");
src/link/MachO/Atom.zig
@@ -58,7 +58,7 @@ pub fn getData(self: Atom, macho_file: *MachO, buffer: []u8) !void {
     assert(buffer.len == self.size);
     switch (self.getFile(macho_file)) {
         .internal => |x| try x.getAtomData(self, buffer),
-        .object => |x| try x.getAtomData(self, buffer),
+        .object => |x| try x.getAtomData(macho_file, self, buffer),
         .zig_object => |x| try x.getAtomData(macho_file, self, buffer),
         else => unreachable,
     }
src/link/MachO/file.zig
@@ -105,6 +105,9 @@ pub const File = union(enum) {
         object: Object,
         dylib: Dylib,
     };
+
+    pub const Handle = std.fs.File;
+    pub const HandleIndex = Index;
 };
 
 const macho = std.macho;
src/link/MachO/Object.zig
@@ -1,6 +1,6 @@
 archive: ?Archive = null,
 path: []const u8,
-file: std.fs.File,
+file_handle: File.HandleIndex,
 mtime: u64,
 index: File.Index,
 
@@ -43,7 +43,6 @@ pub fn isObject(path: []const u8) !bool {
 }
 
 pub fn deinit(self: *Object, allocator: Allocator) void {
-    self.file.close();
     if (self.archive) |*ar| allocator.free(ar.path);
     allocator.free(self.path);
     for (self.sections.items(.relocs), self.sections.items(.subsections)) |*relocs, *sub| {
@@ -73,10 +72,11 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
 
     const gpa = macho_file.base.comp.gpa;
     const offset = if (self.archive) |ar| ar.offset else 0;
+    const handle = macho_file.getFileHandle(self.file_handle);
 
     var header_buffer: [@sizeOf(macho.mach_header_64)]u8 = undefined;
     {
-        const amt = try self.file.preadAll(&header_buffer, offset);
+        const amt = try handle.preadAll(&header_buffer, offset);
         if (amt != @sizeOf(macho.mach_header_64)) return error.InputOutput;
     }
     self.header = @as(*align(1) const macho.mach_header_64, @ptrCast(&header_buffer)).*;
@@ -97,7 +97,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
     const lc_buffer = try gpa.alloc(u8, self.header.?.sizeofcmds);
     defer gpa.free(lc_buffer);
     {
-        const amt = try self.file.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64));
+        const amt = try handle.preadAll(lc_buffer, offset + @sizeOf(macho.mach_header_64));
         if (amt != self.header.?.sizeofcmds) return error.InputOutput;
     }
 
@@ -124,14 +124,14 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
             const cmd = lc.cast(macho.symtab_command).?;
             try self.strtab.resize(gpa, cmd.strsize);
             {
-                const amt = try self.file.preadAll(self.strtab.items, cmd.stroff + offset);
+                const amt = try handle.preadAll(self.strtab.items, cmd.stroff + offset);
                 if (amt != self.strtab.items.len) return error.InputOutput;
             }
 
             const symtab_buffer = try gpa.alloc(u8, cmd.nsyms * @sizeOf(macho.nlist_64));
             defer gpa.free(symtab_buffer);
             {
-                const amt = try self.file.preadAll(symtab_buffer, cmd.symoff + offset);
+                const amt = try handle.preadAll(symtab_buffer, cmd.symoff + offset);
                 if (amt != symtab_buffer.len) return error.InputOutput;
             }
             const symtab = @as([*]align(1) const macho.nlist_64, @ptrCast(symtab_buffer.ptr))[0..cmd.nsyms];
@@ -149,7 +149,7 @@ pub fn parse(self: *Object, macho_file: *MachO) !void {
             const buffer = try gpa.alloc(u8, cmd.datasize);
             defer gpa.free(buffer);
             {
-                const amt = try self.file.preadAll(buffer, offset + cmd.dataoff);
+                const amt = try handle.preadAll(buffer, offset + cmd.dataoff);
                 if (amt != buffer.len) return error.InputOutput;
             }
             const ndice = @divExact(cmd.datasize, @sizeOf(macho.data_in_code_entry));
@@ -697,7 +697,7 @@ fn initEhFrameRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
     const relocs = slice.items(.relocs)[sect_id];
 
     // TODO: read into buffer directly
-    const data = try self.getSectionData(gpa, sect_id);
+    const data = try self.getSectionData(sect_id, macho_file);
     defer gpa.free(data);
 
     try self.eh_frame_data.ensureTotalCapacityPrecise(gpa, data.len);
@@ -800,7 +800,7 @@ fn initUnwindRecords(self: *Object, sect_id: u8, macho_file: *MachO) !void {
     };
 
     const gpa = macho_file.base.comp.gpa;
-    const data = try self.getSectionData(gpa, sect_id);
+    const data = try self.getSectionData(sect_id, macho_file);
     defer gpa.free(data);
     const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry));
     const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs];
@@ -1019,11 +1019,11 @@ fn initDwarfInfo(self: *Object, macho_file: *MachO) !void {
 
     if (debug_info_index == null or debug_abbrev_index == null) return;
 
-    const debug_info = try self.getSectionData(gpa, @intCast(debug_info_index.?));
+    const debug_info = try self.getSectionData(@intCast(debug_info_index.?), macho_file);
     defer gpa.free(debug_info);
-    const debug_abbrev = try self.getSectionData(gpa, @intCast(debug_abbrev_index.?));
+    const debug_abbrev = try self.getSectionData(@intCast(debug_abbrev_index.?), macho_file);
     defer gpa.free(debug_abbrev);
-    const debug_str = if (debug_str_index) |index| try self.getSectionData(gpa, @intCast(index)) else &[0]u8{};
+    const debug_str = if (debug_str_index) |index| try self.getSectionData(@intCast(index), macho_file) else &[0]u8{};
     defer gpa.free(debug_str);
 
     var dwarf_info = DwarfInfo{};
@@ -1589,25 +1589,28 @@ pub fn writeStabs(self: *const Object, macho_file: *MachO, ctx: anytype) error{O
     }
 }
 
-fn getSectionData(self: *const Object, allocator: Allocator, index: u32) ![]u8 {
+fn getSectionData(self: *const Object, index: u32, macho_file: *MachO) ![]u8 {
+    const gpa = macho_file.base.comp.gpa;
     const slice = self.sections.slice();
     assert(index < slice.items(.header).len);
     const sect = slice.items(.header)[index];
+    const handle = macho_file.getFileHandle(self.file_handle);
     const offset = if (self.archive) |ar| ar.offset else 0;
     const size = math.cast(usize, sect.size) orelse return error.Overflow;
-    const buffer = try allocator.alloc(u8, size);
-    errdefer allocator.free(buffer);
-    const amt = try self.file.preadAll(buffer, sect.offset + offset);
+    const buffer = try gpa.alloc(u8, size);
+    errdefer gpa.free(buffer);
+    const amt = try handle.preadAll(buffer, sect.offset + offset);
     if (amt != buffer.len) return error.InputOutput;
     return buffer;
 }
 
-pub fn getAtomData(self: *const Object, atom: Atom, buffer: []u8) !void {
+pub fn getAtomData(self: *const Object, macho_file: *MachO, atom: Atom, buffer: []u8) !void {
     assert(buffer.len == atom.size);
     const slice = self.sections.slice();
+    const handle = macho_file.getFileHandle(self.file_handle);
     const offset = if (self.archive) |ar| ar.offset else 0;
     const sect = slice.items(.header)[atom.n_sect];
-    const amt = try self.file.preadAll(buffer, sect.offset + offset + atom.off);
+    const amt = try handle.preadAll(buffer, sect.offset + offset + atom.off);
     if (amt != buffer.len) return error.InputOutput;
 }
 
@@ -1885,16 +1888,17 @@ const x86_64 = struct {
     ) !void {
         const gpa = macho_file.base.comp.gpa;
 
+        const handle = macho_file.getFileHandle(self.file_handle);
         const offset = if (self.archive) |ar| ar.offset else 0;
         const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
         defer gpa.free(relocs_buffer);
         {
-            const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset);
+            const amt = try handle.preadAll(relocs_buffer, sect.reloff + offset);
             if (amt != relocs_buffer.len) return error.InputOutput;
         }
         const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
 
-        const code = try self.getSectionData(gpa, @intCast(n_sect));
+        const code = try self.getSectionData(@intCast(n_sect), macho_file);
         defer gpa.free(code);
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
@@ -2047,16 +2051,17 @@ const aarch64 = struct {
     ) !void {
         const gpa = macho_file.base.comp.gpa;
 
+        const handle = macho_file.getFileHandle(self.file_handle);
         const offset = if (self.archive) |ar| ar.offset else 0;
         const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info));
         defer gpa.free(relocs_buffer);
         {
-            const amt = try self.file.preadAll(relocs_buffer, sect.reloff + offset);
+            const amt = try handle.preadAll(relocs_buffer, sect.reloff + offset);
             if (amt != relocs_buffer.len) return error.InputOutput;
         }
         const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc];
 
-        const code = try self.getSectionData(gpa, @intCast(n_sect));
+        const code = try self.getSectionData(@intCast(n_sect), macho_file);
         defer gpa.free(code);
 
         try out.ensureTotalCapacityPrecise(gpa, relocs.len);
src/link/MachO.zig
@@ -10,6 +10,10 @@ d_sym: ?DebugSymbols = null,
 /// Index of each input file also encodes the priority or precedence of one input file
 /// over another.
 files: std.MultiArrayList(File.Entry) = .{},
+/// Long-lived list of all file descriptors.
+/// We store them globally rather than per actual File so that we can re-use
+/// one file handle per every object file within an archive.
+file_handles: std.ArrayListUnmanaged(File.Handle) = .{},
 zig_object: ?File.Index = null,
 internal_object: ?File.Index = null,
 objects: std.ArrayListUnmanaged(File.Index) = .{},
@@ -315,6 +319,11 @@ pub fn deinit(self: *MachO) void {
         d_sym.deinit();
     }
 
+    for (self.file_handles.items) |handle| {
+        handle.close();
+    }
+    self.file_handles.deinit(gpa);
+
     for (self.files.items(.tags), self.files.items(.data)) |tag, *data| switch (tag) {
         .null => {},
         .zig_object => data.zig_object.deinit(gpa),
@@ -394,8 +403,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
     sub_prog_node.activate();
     defer sub_prog_node.end();
 
-    const target = comp.root_mod.resolved_target.result;
-    _ = target;
     const directory = self.base.emit.directory;
     const full_out_path = try directory.join(arena, &[_][]const u8{self.base.emit.sub_path});
     const module_obj_path: ?[]const u8 = if (self.base.zcu_object_sub_path) |path| blk: {
@@ -985,6 +992,8 @@ fn parseObject(self: *MachO, path: []const u8) ParseError!void {
 
     const gpa = self.base.comp.gpa;
     const file = try std.fs.cwd().openFile(path, .{});
+    errdefer file.close();
+    const handle = try self.addFileHandle(file);
     const mtime: u64 = mtime: {
         const stat = file.stat() catch break :mtime 0;
         break :mtime @as(u64, @intCast(@divFloor(stat.mtime, 1_000_000_000)));
@@ -992,7 +1001,7 @@ fn parseObject(self: *MachO, path: []const u8) ParseError!void {
     const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
     self.files.set(index, .{ .object = .{
         .path = try gpa.dupe(u8, path),
-        .file = file,
+        .file_handle = handle,
         .mtime = mtime,
         .index = index,
     } });
@@ -1020,11 +1029,12 @@ fn parseArchive(self: *MachO, lib: SystemLib, must_link: bool, fat_arch: ?fat.Ar
     const gpa = self.base.comp.gpa;
 
     const file = try std.fs.cwd().openFile(lib.path, .{});
-    defer file.close();
+    errdefer file.close();
+    const handle = try self.addFileHandle(file);
 
     var archive = Archive{};
     defer archive.deinit(gpa);
-    try archive.parse(self, lib.path, file, fat_arch);
+    try archive.parse(self, lib.path, handle, fat_arch);
 
     var has_parse_error = false;
     for (archive.objects.items) |extracted| {
@@ -3796,6 +3806,19 @@ pub fn getInternalObject(self: *MachO) ?*InternalObject {
     return self.getFile(index).?.internal;
 }
 
+pub fn addFileHandle(self: *MachO, file: std.fs.File) !File.HandleIndex {
+    const gpa = self.base.comp.gpa;
+    const index: File.HandleIndex = @intCast(self.file_handles.items.len);
+    const fh = try self.file_handles.addOne(gpa);
+    fh.* = file;
+    return index;
+}
+
+pub fn getFileHandle(self: MachO, index: File.HandleIndex) File.Handle {
+    assert(index < self.file_handles.items.len);
+    return self.file_handles.items[index];
+}
+
 pub fn addAtom(self: *MachO) error{OutOfMemory}!Atom.Index {
     const index = @as(Atom.Index, @intCast(self.atoms.items.len));
     const atom = try self.atoms.addOne(self.base.comp.gpa);
@@ -4616,7 +4639,6 @@ const Cache = std.Build.Cache;
 const CodeSignature = @import("MachO/CodeSignature.zig");
 const Compilation = @import("../Compilation.zig");
 pub const DebugSymbols = @import("MachO/DebugSymbols.zig");
-const Dwarf = File.Dwarf;
 const DwarfInfo = @import("MachO/DwarfInfo.zig");
 const Dylib = @import("MachO/Dylib.zig");
 const ExportTrieSection = synthetic.ExportTrieSection;