Commit 0b15ba8334

Jakub Konka <kubkon@jakubkonka.com>
2021-07-31 16:01:02
macho: don't allocate Dylib on the heap
instead, immediately transfer ownership to MachO struct. Also, revert back to try-ok-fail parsing approach of objects, archives, and dylibs. It seems easier to try and fail than check if the file *is* of a certain type given that a dylib may be a stub and parsing yaml twice in a row seems very wasteful. Hint for the future: if we optimise yaml/TAPI parsing, this approach may be rethought!
1 parent f023cda
Changed files (4)
src/link/MachO/Archive.zig
@@ -104,38 +104,52 @@ pub fn deinit(self: *Archive, allocator: *Allocator) void {
     allocator.free(self.name);
 }
 
-pub fn isArchive(file: fs.File, arch: Arch) !bool {
-    const Internal = struct {
-        fn isArchive(reader: anytype, a: Arch) !bool {
-            const offset = try fat.getLibraryOffset(reader, a);
-            try reader.context.seekTo(offset);
-            const magic = try reader.readBytesNoEof(SARMAG);
-            if (!mem.eql(u8, &magic, ARMAG)) return false;
-            const header = try reader.readStruct(ar_hdr);
-            return mem.eql(u8, &header.ar_fmag, ARFMAG);
-        }
+pub fn createAndParseFromPath(allocator: *Allocator, arch: Arch, path: []const u8) !?Archive {
+    const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) {
+        error.FileNotFound => return null,
+        else => |e| return e,
+    };
+    errdefer file.close();
+
+    const name = try allocator.dupe(u8, path);
+    errdefer allocator.free(name);
+
+    var archive = Archive{
+        .name = name,
+        .file = file,
     };
-    const is_archive = if (Internal.isArchive(file.reader(), arch)) |res|
-        res
-    else |err| switch (err) {
-        error.EndOfStream => false,
-        error.MismatchedCpuArchitecture => true, // TODO maybe this check should be done differently?
+
+    archive.parse(allocator, arch) catch |err| switch (err) {
+        error.EndOfStream, error.NotArchive => {
+            archive.deinit(allocator);
+            return null;
+        },
         else => |e| return e,
     };
-    try file.seekTo(0);
-    return is_archive;
+
+    return archive;
 }
 
 pub fn parse(self: *Archive, allocator: *Allocator, arch: Arch) !void {
-    self.library_offset = try fat.getLibraryOffset(self.file.reader(), arch);
-    try self.file.seekTo(self.library_offset);
     const reader = self.file.reader();
+    self.library_offset = try fat.getLibraryOffset(reader, arch);
+    try self.file.seekTo(self.library_offset);
+
     const magic = try reader.readBytesNoEof(SARMAG);
+    if (!mem.eql(u8, &magic, ARMAG)) {
+        log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic });
+        return error.NotArchive;
+    }
+
     self.header = try reader.readStruct(ar_hdr);
-    var embedded_name = try parseName(allocator, self.header.?, reader);
-    defer allocator.free(embedded_name);
+    if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) {
+        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag });
+        return error.NotArchive;
+    }
 
+    var embedded_name = try parseName(allocator, self.header.?, reader);
     log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name });
+    defer allocator.free(embedded_name);
 
     try self.parseTableOfContents(allocator, reader);
     try reader.context.seekTo(0);
src/link/MachO/Dylib.zig
@@ -148,20 +148,17 @@ pub fn createAndParseFromPath(
     arch: Arch,
     path: []const u8,
     opts: CreateOpts,
-) Error!?[]*Dylib {
+) Error!?[]Dylib {
     const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) {
         error.FileNotFound => return null,
         else => |e| return e,
     };
     errdefer file.close();
 
-    const dylib = try allocator.create(Dylib);
-    errdefer allocator.destroy(dylib);
-
     const name = try allocator.dupe(u8, path);
     errdefer allocator.free(name);
 
-    dylib.* = .{
+    var dylib = Dylib{
         .name = name,
         .file = file,
     };
@@ -172,7 +169,6 @@ pub fn createAndParseFromPath(
 
             var lib_stub = LibStub.loadFromFile(allocator, file) catch {
                 dylib.deinit(allocator);
-                allocator.destroy(dylib);
                 return null;
             };
             defer lib_stub.deinit();
@@ -191,12 +187,11 @@ pub fn createAndParseFromPath(
 
             // TODO maybe this should be an error and facilitate auto-cleanup?
             dylib.deinit(allocator);
-            allocator.destroy(dylib);
             return null;
         }
     }
 
-    var dylibs = std.ArrayList(*Dylib).init(allocator);
+    var dylibs = std.ArrayList(Dylib).init(allocator);
     defer dylibs.deinit();
 
     try dylibs.append(dylib);
@@ -449,7 +444,7 @@ pub fn parseDependentLibs(
     self: *Dylib,
     allocator: *Allocator,
     arch: Arch,
-    out: *std.ArrayList(*Dylib),
+    out: *std.ArrayList(Dylib),
     syslibroot: ?[]const u8,
 ) !void {
     outer: for (self.dependent_libs.items) |id| {
@@ -489,6 +484,7 @@ pub fn parseDependentLibs(
             )) orelse {
                 continue;
             };
+            defer allocator.free(dylibs);
 
             try out.appendSlice(dylibs);
 
src/link/MachO/Object.zig
@@ -154,29 +154,47 @@ pub fn deinit(self: *Object, allocator: *Allocator) void {
     }
 }
 
-pub fn isObject(file: fs.File) !bool {
-    const Internal = struct {
-        fn isObject(reader: anytype) !bool {
-            const header = try reader.readStruct(macho.mach_header_64);
-            return header.filetype == macho.MH_OBJECT;
-        }
+pub fn createAndParseFromPath(allocator: *Allocator, arch: Arch, path: []const u8) !?Object {
+    const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) {
+        error.FileNotFound => return null,
+        else => |e| return e,
     };
-    const is_object = if (Internal.isObject(file.reader())) |res|
-        res
-    else |err| switch (err) {
-        error.EndOfStream => false,
+    errdefer file.close();
+
+    const name = try allocator.dupe(u8, path);
+    errdefer allocator.free(name);
+
+    var object = Object{
+        .name = name,
+        .file = file,
+    };
+
+    object.parse(allocator, arch) catch |err| switch (err) {
+        error.EndOfStream, error.NotObject => {
+            object.deinit(allocator);
+            return null;
+        },
         else => |e| return e,
     };
-    try file.seekTo(0);
-    return is_object;
+
+    return object;
 }
 
 pub fn parse(self: *Object, allocator: *Allocator, arch: Arch) !void {
-    var reader = self.file.reader();
+    const reader = self.file.reader();
     if (self.file_offset) |offset| {
         try reader.context.seekTo(offset);
     }
+
     const header = try reader.readStruct(macho.mach_header_64);
+    if (header.filetype != macho.MH_OBJECT) {
+        log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{
+            macho.MH_OBJECT,
+            header.filetype,
+        });
+        return error.NotObject;
+    }
+
     const this_arch: Arch = switch (header.cputype) {
         macho.CPU_TYPE_ARM64 => .aarch64,
         macho.CPU_TYPE_X86_64 => .x86_64,
@@ -189,6 +207,7 @@ pub fn parse(self: *Object, allocator: *Allocator, arch: Arch) !void {
         log.err("mismatched cpu architecture: expected {s}, found {s}", .{ arch, this_arch });
         return error.MismatchedCpuArchitecture;
     }
+
     self.header = header;
 
     try self.readLoadCommands(allocator, reader);
src/link/MachO.zig
@@ -63,7 +63,7 @@ entry_addr: ?u64 = null,
 
 objects: std.ArrayListUnmanaged(Object) = .{},
 archives: std.ArrayListUnmanaged(Archive) = .{},
-dylibs: std.ArrayListUnmanaged(*Dylib) = .{},
+dylibs: std.ArrayListUnmanaged(Dylib) = .{},
 
 next_dylib_ordinal: u16 = 1,
 
@@ -994,25 +994,15 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const
             const path = try std.fs.realpath(file_name, &buffer);
             break :full_path try self.base.allocator.dupe(u8, path);
         };
-        const file = try fs.cwd().openFile(full_path, .{});
+        defer self.base.allocator.free(full_path);
 
-        if (try Object.isObject(file)) {
-            const object = try self.objects.addOne(self.base.allocator);
-            object.* = .{
-                .name = full_path,
-                .file = file,
-            };
-            try object.parse(self.base.allocator, arch);
+        if (try Object.createAndParseFromPath(self.base.allocator, arch, full_path)) |object| {
+            try self.objects.append(self.base.allocator, object);
             continue;
         }
 
-        if (try Archive.isArchive(file, arch)) {
-            const archive = try self.archives.addOne(self.base.allocator);
-            archive.* = .{
-                .name = full_path,
-                .file = file,
-            };
-            try archive.parse(self.base.allocator, arch);
+        if (try Archive.createAndParseFromPath(self.base.allocator, arch, full_path)) |archive| {
+            try self.archives.append(self.base.allocator, archive);
             continue;
         }
 
@@ -1024,8 +1014,6 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const
             continue;
         }
 
-        self.base.allocator.free(full_path);
-        file.close();
         log.warn("unknown filetype for positional input file: '{s}'", .{file_name});
     }
 }
@@ -1033,8 +1021,6 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const
 fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !void {
     const arch = self.base.options.target.cpu.arch;
     for (libs) |lib| {
-        const file = try fs.cwd().openFile(lib, .{});
-
         if (try Dylib.createAndParseFromPath(self.base.allocator, arch, lib, .{
             .syslibroot = syslibroot,
         })) |dylibs| {
@@ -1043,17 +1029,11 @@ fn parseLibs(self: *MachO, libs: []const []const u8, syslibroot: ?[]const u8) !v
             continue;
         }
 
-        if (try Archive.isArchive(file, arch)) {
-            const archive = try self.archives.addOne(self.base.allocator);
-            archive.* = .{
-                .name = try self.base.allocator.dupe(u8, lib),
-                .file = file,
-            };
-            try archive.parse(self.base.allocator, arch);
+        if (try Archive.createAndParseFromPath(self.base.allocator, arch, lib)) |archive| {
+            try self.archives.append(self.base.allocator, archive);
             continue;
         }
 
-        file.close();
         log.warn("unknown filetype for a library: '{s}'", .{lib});
     }
 }
@@ -2351,17 +2331,17 @@ fn resolveSymbols(self: *MachO) !void {
         });
     }
 
-    var referenced = std.AutoHashMap(*Dylib, void).init(self.base.allocator);
+    var referenced = std.AutoHashMap(u16, void).init(self.base.allocator);
     defer referenced.deinit();
 
     loop: for (self.undefs.items) |sym| {
         if (symbolIsNull(sym)) continue;
 
         const sym_name = self.getString(sym.n_strx);
-        for (self.dylibs.items) |dylib| {
+        for (self.dylibs.items) |*dylib, id| {
             if (!dylib.symbols.contains(sym_name)) continue;
 
-            if (!referenced.contains(dylib)) {
+            if (!referenced.contains(@intCast(u16, id))) {
                 // Add LC_LOAD_DYLIB load command for each referenced dylib/stub.
                 dylib.ordinal = self.next_dylib_ordinal;
                 const dylib_id = dylib.id orelse unreachable;
@@ -2375,7 +2355,7 @@ fn resolveSymbols(self: *MachO) !void {
                 errdefer dylib_cmd.deinit(self.base.allocator);
                 try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd });
                 self.next_dylib_ordinal += 1;
-                try referenced.putNoClobber(dylib, {});
+                try referenced.putNoClobber(@intCast(u16, id), {});
             }
 
             const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable;
@@ -3365,9 +3345,8 @@ pub fn deinit(self: *MachO) void {
     }
     self.archives.deinit(self.base.allocator);
 
-    for (self.dylibs.items) |dylib| {
+    for (self.dylibs.items) |*dylib| {
         dylib.deinit(self.base.allocator);
-        self.base.allocator.destroy(dylib);
     }
     self.dylibs.deinit(self.base.allocator);