Commit 897a554109

Jakub Konka <kubkon@jakubkonka.com>
2024-02-07 11:37:33
macho: populate output archive symtab
1 parent 80cafad
src/link/MachO/Archive.zig
@@ -1,63 +1,5 @@
 objects: std.ArrayListUnmanaged(Object) = .{},
 
-// Archive files start with the ARMAG identifying string.  Then follows a
-// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
-// member indicates, for each member file.
-/// String that begins an archive file.
-pub const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
-/// Size of that string.
-pub const SARMAG: u4 = 8;
-
-/// String in ar_fmag at the end of each header.
-const ARFMAG: *const [2:0]u8 = "`\n";
-
-const ar_hdr = extern struct {
-    /// Member file name, sometimes / terminated.
-    ar_name: [16]u8,
-
-    /// File date, decimal seconds since Epoch.
-    ar_date: [12]u8,
-
-    /// User ID, in ASCII format.
-    ar_uid: [6]u8,
-
-    /// Group ID, in ASCII format.
-    ar_gid: [6]u8,
-
-    /// File mode, in ASCII octal.
-    ar_mode: [8]u8,
-
-    /// File size, in ASCII decimal.
-    ar_size: [10]u8,
-
-    /// Always contains ARFMAG.
-    ar_fmag: [2]u8,
-
-    fn date(self: ar_hdr) !u64 {
-        const value = mem.trimRight(u8, &self.ar_date, &[_]u8{@as(u8, 0x20)});
-        return std.fmt.parseInt(u64, value, 10);
-    }
-
-    fn size(self: ar_hdr) !u32 {
-        const value = mem.trimRight(u8, &self.ar_size, &[_]u8{@as(u8, 0x20)});
-        return std.fmt.parseInt(u32, value, 10);
-    }
-
-    fn name(self: *const ar_hdr) ?[]const u8 {
-        const value = &self.ar_name;
-        if (mem.startsWith(u8, value, "#1/")) return null;
-        const sentinel = mem.indexOfScalar(u8, value, '/') orelse value.len;
-        return value[0..sentinel];
-    }
-
-    fn nameLength(self: ar_hdr) !?u32 {
-        const value = &self.ar_name;
-        if (!mem.startsWith(u8, value, "#1/")) return null;
-        const trimmed = mem.trimRight(u8, self.ar_name["#1/".len..], &[_]u8{0x20});
-        return try std.fmt.parseInt(u32, trimmed, 10);
-    }
-};
-
 pub fn isArchive(path: []const u8, fat_arch: ?fat.Arch) !bool {
     const file = try std.fs.cwd().openFile(path, .{});
     defer file.close();
@@ -85,9 +27,9 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, handle_index:
     try handle.seekTo(offset);
 
     const reader = handle.reader();
-    _ = try reader.readBytesNoEof(Archive.SARMAG);
+    _ = try reader.readBytesNoEof(SARMAG);
 
-    var pos: usize = Archive.SARMAG;
+    var pos: usize = SARMAG;
     while (true) {
         if (pos >= size) break;
         if (!mem.isAligned(pos, 2)) {
@@ -123,7 +65,10 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, handle_index:
             pos += hdr_size;
         }
 
-        if (mem.eql(u8, name, "__.SYMDEF") or mem.eql(u8, name, "__.SYMDEF SORTED")) continue;
+        if (mem.eql(u8, name, SYMDEF) or
+            mem.eql(u8, name, SYMDEF64) or
+            mem.eql(u8, name, SYMDEF_SORTED) or
+            mem.eql(u8, name, SYMDEF64_SORTED)) continue;
 
         const object = Object{
             .archive = .{
@@ -143,6 +88,227 @@ pub fn parse(self: *Archive, macho_file: *MachO, path: []const u8, handle_index:
     }
 }
 
+pub fn writeHeader(
+    object_name: []const u8,
+    object_size: u32,
+    format: Format,
+    writer: anytype,
+) !void {
+    var hdr: ar_hdr = .{
+        .ar_name = undefined,
+        .ar_date = undefined,
+        .ar_uid = undefined,
+        .ar_gid = undefined,
+        .ar_mode = undefined,
+        .ar_size = undefined,
+        .ar_fmag = undefined,
+    };
+    @memset(mem.asBytes(&hdr), 0x20);
+    inline for (@typeInfo(ar_hdr).Struct.fields) |field| {
+        var stream = std.io.fixedBufferStream(&@field(hdr, field.name));
+        stream.writer().print("0", .{}) catch unreachable;
+    }
+    @memcpy(&hdr.ar_fmag, ARFMAG);
+
+    const object_name_len = mem.alignForward(u32, object_name.len + 1, format.ptrWidth());
+    const total_object_size = object_size + object_name_len;
+
+    {
+        var stream = std.io.fixedBufferStream(&hdr.ar_name);
+        stream.writer().print("#1/{d}", .{object_name_len}) catch unreachable;
+    }
+    {
+        var stream = std.io.fixedBufferStream(&hdr.ar_size);
+        stream.writer().print("{d}", .{total_object_size}) catch unreachable;
+    }
+
+    try writer.writeAll(mem.asBytes(&hdr));
+    try writer.print("{s}\x00", .{object_name});
+
+    const padding = object_name_len - object_name.len - 1;
+    if (padding > 0) {
+        try writer.writeByteNTimes(0, padding);
+    }
+}
+
+// Archive files start with the ARMAG identifying string.  Then follows a
+// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
+// member indicates, for each member file.
+/// String that begins an archive file.
+pub const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
+/// Size of that string.
+pub const SARMAG: u4 = 8;
+
+/// String in ar_fmag at the end of each header.
+const ARFMAG: *const [2:0]u8 = "`\n";
+
+const SYMDEF = "__.SYMDEF";
+const SYMDEF64 = "__.SYMDEF_64";
+const SYMDEF_SORTED = "__.SYMDEF SORTED";
+const SYMDEF64_SORTED = "__.SYMDEF_64 SORTED";
+
+const ar_hdr = extern struct {
+    /// Member file name, sometimes / terminated.
+    ar_name: [16]u8,
+
+    /// File date, decimal seconds since Epoch.
+    ar_date: [12]u8,
+
+    /// User ID, in ASCII format.
+    ar_uid: [6]u8,
+
+    /// Group ID, in ASCII format.
+    ar_gid: [6]u8,
+
+    /// File mode, in ASCII octal.
+    ar_mode: [8]u8,
+
+    /// File size, in ASCII decimal.
+    ar_size: [10]u8,
+
+    /// Always contains ARFMAG.
+    ar_fmag: [2]u8,
+
+    fn date(self: ar_hdr) !u64 {
+        const value = mem.trimRight(u8, &self.ar_date, &[_]u8{@as(u8, 0x20)});
+        return std.fmt.parseInt(u64, value, 10);
+    }
+
+    fn size(self: ar_hdr) !u32 {
+        const value = mem.trimRight(u8, &self.ar_size, &[_]u8{@as(u8, 0x20)});
+        return std.fmt.parseInt(u32, value, 10);
+    }
+
+    fn name(self: *const ar_hdr) ?[]const u8 {
+        const value = &self.ar_name;
+        if (mem.startsWith(u8, value, "#1/")) return null;
+        const sentinel = mem.indexOfScalar(u8, value, '/') orelse value.len;
+        return value[0..sentinel];
+    }
+
+    fn nameLength(self: ar_hdr) !?u32 {
+        const value = &self.ar_name;
+        if (!mem.startsWith(u8, value, "#1/")) return null;
+        const trimmed = mem.trimRight(u8, self.ar_name["#1/".len..], &[_]u8{0x20});
+        return try std.fmt.parseInt(u32, trimmed, 10);
+    }
+};
+
+pub const ArSymtab = struct {
+    entries: std.ArrayListUnmanaged(Entry) = .{},
+    strtab: StringTable = .{},
+    format: Format = .p32,
+
+    pub fn deinit(ar: *ArSymtab, allocator: Allocator) void {
+        ar.entries.deinit(allocator);
+        ar.strtab.deinit(allocator);
+    }
+
+    pub fn sort(ar: *ArSymtab) void {
+        mem.sort(Entry, ar.entries.items, {}, Entry.lessThan);
+    }
+
+    pub fn size(ar: ArSymtab) usize {
+        const ptr_width = ar.format.ptrWidth();
+        return ptr_width + ar.entries.items.len * 2 * ptr_width + ptr_width + mem.alignForward(usize, ar.strtab.buffer.items.len, ptr_width);
+    }
+
+    pub fn write(ar: ArSymtab, macho_file: *MachO, writer: anytype) !void {
+        // Header
+        try writeHeader(SYMDEF, ar.size());
+        // Symtab size
+        try ar.writeInt(ar.entries.items.len * 2);
+        // Symtab entries
+        for (ar.entries.items) |entry| {
+            const file_off = switch (macho_file.getFile(entry.file).?) {
+                .zig_object => |x| x.output_ar_state.file_off,
+                .object => |x| x.output_ar_state.file_off,
+                else => unreachable,
+            };
+            // Name offset
+            try ar.writeInt(entry.off);
+            // File offset
+            try ar.writeInt(file_off);
+        }
+        // Strtab size
+        const strtab_size = mem.alignForward(u64, ar.strtab.buffer.items.len, ar.format.ptrWidth());
+        const padding = strtab_size - ar.strtab.buffer.items.len;
+        try ar.writeInt(strtab_size);
+        // Strtab
+        try writer.writeAll(ar.strtab.buffer.items);
+        if (padding > 0) {
+            try writer.writeByteNTimes(0, padding);
+        }
+    }
+
+    fn writeInt(ar: ArSymtab, value: u64, writer: anytype) !void {
+        switch (ar.format) {
+            .p32 => try writer.writeInt(u32, std.math.cast(u32, value) orelse return error.Overflow, .little),
+            .p64 => try writer.writeInt(u64, value, .little),
+        }
+    }
+
+    const FormatContext = struct {
+        ar: ArSymtab,
+        macho_file: *MachO,
+    };
+
+    pub fn fmt(ar: ArSymtab, macho_file: *MachO) std.fmt.Formatter(format2) {
+        return .{ .data = .{ .ar = ar, .macho_file = macho_file } };
+    }
+
+    fn format2(
+        ctx: FormatContext,
+        comptime unused_fmt_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) !void {
+        _ = unused_fmt_string;
+        _ = options;
+        const ar = ctx.ar;
+        const macho_file = ctx.macho_file;
+        for (ar.entries.items, 0..) |entry, i| {
+            const name = ar.strtab.getAssumeExists(entry.off);
+            const file = macho_file.getFile(entry.file).?;
+            try writer.print("  {d}: {s} in file({d})({})\n", .{ i, name, entry.file, file.fmtPath() });
+        }
+    }
+
+    const Entry = struct {
+        /// Symbol name offset
+        off: u32,
+        /// Exporting file
+        file: File.Index,
+
+        pub fn lessThan(ctx: void, lhs: Entry, rhs: Entry) bool {
+            _ = ctx;
+            if (lhs.off == rhs.off) return lhs.file < rhs.file;
+            return lhs.off < rhs.off;
+        }
+    };
+};
+
+const Format = enum {
+    p32,
+    p64,
+
+    fn ptrWidth(self: Format) usize {
+        return switch (self) {
+            .p32 => @as(usize, 4),
+            .p64 => 8,
+        };
+    }
+};
+
+pub const ArState = struct {
+    /// File offset of the ar_hdr describing the contributing
+    /// object in the archive.
+    file_off: u64 = 0,
+
+    /// Total size of the contributing object (excludes ar_hdr and long name with padding).
+    size: u64 = 0,
+};
+
 const fat = @import("fat.zig");
 const link = @import("../../link.zig");
 const log = std.log.scoped(.link);
@@ -155,3 +321,4 @@ const Archive = @This();
 const File = @import("file.zig").File;
 const MachO = @import("../MachO.zig");
 const Object = @import("Object.zig");
+const StringTable = @import("../StringTable.zig");
src/link/MachO/file.zig
@@ -175,6 +175,13 @@ pub const File = union(enum) {
         };
     }
 
+    pub fn updateArSymtab(file: File, ar_symtab: *Archive.ArSymtab, macho_file: *MachO) error{OutOfMemory}!void {
+        return switch (file) {
+            .dylib, .internal => unreachable,
+            inline else => |x| x.updateArSymtab(ar_symtab, macho_file),
+        };
+    }
+
     pub fn calcSymtabSize(file: File, macho_file: *MachO) !void {
         return switch (file) {
             inline else => |x| x.calcSymtabSize(macho_file),
@@ -206,6 +213,7 @@ const macho = std.macho;
 const std = @import("std");
 
 const Allocator = std.mem.Allocator;
+const Archive = @import("Archive.zig");
 const Atom = @import("Atom.zig");
 const InternalObject = @import("InternalObject.zig");
 const MachO = @import("../MachO.zig");
src/link/MachO/Object.zig
@@ -1,4 +1,4 @@
-archive: ?Archive = null,
+archive: ?InArchive = null,
 path: []const u8,
 file_handle: File.HandleIndex,
 mtime: u64,
@@ -29,8 +29,9 @@ hidden: bool = false,
 
 dynamic_relocs: MachO.DynamicRelocs = .{},
 output_symtab_ctx: MachO.SymtabCtx = .{},
+output_ar_state: Archive.ArState = .{},
 
-const Archive = struct {
+const InArchive = struct {
     path: []const u8,
     offset: u64,
 };
@@ -1232,6 +1233,33 @@ fn addSection(self: *Object, allocator: Allocator, segname: []const u8, sectname
     return n_sect;
 }
 
+pub fn updateArSymtab(self: Object, ar_symtab: *Archive.ArSymtab, macho_file: *MachO) error{OutOfMemory}!void {
+    const gpa = macho_file.base.comp.gpa;
+    for (self.symtab.items(.nlist)) |nlist| {
+        if (!nlist.ext() or (nlist.undf() and !nlist.tentative())) continue;
+        const off = try ar_symtab.strtab.insert(gpa, self.getString(nlist.n_strx));
+        try ar_symtab.entries.append(gpa, .{ .off = off, .file = self.index });
+    }
+}
+
+pub fn updateArSize(self: *Object, macho_file: *MachO) !void {
+    const file = macho_file.getFileHandle(self.file_handle);
+    const size = (try file.stat()).size;
+    self.output_ar_state.size = size;
+}
+
+pub fn writeAr(self: Object, macho_file: *MachO, writer: anytype) !void {
+    // Header
+    try Archive.writeHeader(self.path, self.output_ar_state.size, writer);
+    // Data
+    const file = macho_file.getFileHandle(self.file_handle);
+    // TODO try using copyRangeAll
+    const gpa = macho_file.base.comp.gpa;
+    const data = try file.readToEndAlloc(gpa, self.output_ar_state.size);
+    defer gpa.free(data);
+    try writer.writeAll(data);
+}
+
 pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
@@ -2241,6 +2269,7 @@ const trace = @import("../../tracy.zig").trace;
 const std = @import("std");
 
 const Allocator = mem.Allocator;
+const Archive = @import("Archive.zig");
 const Atom = @import("Atom.zig");
 const Cie = eh_frame.Cie;
 const DwarfInfo = @import("DwarfInfo.zig");
src/link/MachO/relocatable.zig
@@ -64,7 +64,9 @@ pub fn flushObject(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]c
     };
     off = allocateSectionsRelocs(macho_file, off);
 
-    state_log.debug("{}", .{macho_file.dumpState()});
+    if (build_options.enable_logging) {
+        state_log.debug("{}", .{macho_file.dumpState()});
+    }
 
     try macho_file.calcSymtabSize();
     try writeAtoms(macho_file);
@@ -111,6 +113,7 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ?
     // First, we flush relocatable object file generated with our backends.
     if (macho_file.getZigObject()) |zo| {
         zo.resolveSymbols(macho_file);
+        zo.asFile().markExportsRelocatable(macho_file);
         zo.asFile().claimUnresolvedRelocatable(macho_file);
         try macho_file.sortSections();
         try macho_file.addAtomsToSections();
@@ -126,7 +129,9 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ?
         };
         off = allocateSectionsRelocs(macho_file, off);
 
-        state_log.debug("{}", .{macho_file.dumpState()});
+        if (build_options.enable_logging) {
+            state_log.debug("{}", .{macho_file.dumpState()});
+        }
 
         try macho_file.calcSymtabSize();
         try writeAtoms(macho_file);
@@ -150,6 +155,26 @@ pub fn flushStaticLib(macho_file: *MachO, comp: *Compilation, module_obj_path: ?
         try zo.readFileContents(macho_file);
     }
 
+    var files = std.ArrayList(File.Index).init(gpa);
+    defer files.deinit();
+    try files.ensureTotalCapacityPrecise(macho_file.objects.items.len + 1);
+    if (macho_file.getZigObject()) |zo| files.appendAssumeCapacity(zo.index);
+    for (macho_file.objects.items) |index| files.appendAssumeCapacity(index);
+
+    // Update ar symtab from parsed objects
+    var ar_symtab: Archive.ArSymtab = .{};
+    defer ar_symtab.deinit(gpa);
+
+    for (files.items) |index| {
+        try macho_file.getFile(index).?.updateArSymtab(&ar_symtab, macho_file);
+    }
+
+    ar_symtab.sort();
+
+    if (build_options.enable_logging) {
+        state_log.debug("ar_symtab\n{}\n", .{ar_symtab.fmt(macho_file)});
+    }
+
     var err = try macho_file.addErrorWithNotes(0);
     try err.addMsg(macho_file, "TODO implement flushStaticLib", .{});
 
@@ -646,6 +671,7 @@ fn writeHeader(macho_file: *MachO, ncmds: usize, sizeofcmds: usize) !void {
 }
 
 const assert = std.debug.assert;
+const build_options = @import("build_options");
 const eh_frame = @import("eh_frame.zig");
 const link = @import("../../link.zig");
 const load_commands = @import("load_commands.zig");
@@ -657,6 +683,7 @@ const state_log = std.log.scoped(.link_state);
 const std = @import("std");
 const trace = @import("../../tracy.zig").trace;
 
+const Archive = @import("Archive.zig");
 const Atom = @import("Atom.zig");
 const Compilation = @import("../../Compilation.zig");
 const File = @import("file.zig").File;
src/link/MachO/ZigObject.zig
@@ -48,6 +48,7 @@ relocs: RelocationTable = .{},
 
 dynamic_relocs: MachO.DynamicRelocs = .{},
 output_symtab_ctx: MachO.SymtabCtx = .{},
+output_ar_state: Archive.ArState = .{},
 
 pub fn init(self: *ZigObject, macho_file: *MachO) !void {
     const comp = macho_file.base.comp;
@@ -297,6 +298,29 @@ pub fn readFileContents(self: *ZigObject, macho_file: *MachO) !void {
     if (amt != size) return error.InputOutput;
 }
 
+pub fn updateArSymtab(self: ZigObject, ar_symtab: *Archive.ArSymtab, macho_file: *MachO) error{OutOfMemory}!void {
+    const gpa = macho_file.base.comp.gpa;
+    for (self.symbols.items) |sym_index| {
+        const sym = macho_file.getSymbol(sym_index);
+        const file = sym.getFile(macho_file).?;
+        assert(file.getIndex() == self.index);
+        if (!sym.flags.@"export") continue;
+        const off = try ar_symtab.strtab.insert(gpa, sym.getName(macho_file));
+        try ar_symtab.entries.append(gpa, .{ .off = off, .file = self.index });
+    }
+}
+
+pub fn updateArSize(self: *ZigObject) void {
+    self.output_ar_state.size = self.data.items.len;
+}
+
+pub fn writeAr(self: ZigObject, writer: anytype) !void {
+    // Header
+    try Archive.writeHeader(self.path, self.output_ar_state.size, writer);
+    // Data
+    try writer.writeAll(self.data.items);
+}
+
 pub fn scanRelocs(self: *ZigObject, macho_file: *MachO) !void {
     for (self.atoms.items) |atom_index| {
         const atom = macho_file.getAtom(atom_index) orelse continue;
src/link/MachO.zig
@@ -606,7 +606,9 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node
     self.allocateSyntheticSymbols();
     try self.allocateLinkeditSegment();
 
-    state_log.debug("{}", .{self.dumpState()});
+    if (build_options.enable_logging) {
+        state_log.debug("{}", .{self.dumpState()});
+    }
 
     try self.initDyldInfoSections();