Commit 177b1b6bf9

Tom Maenan Read Cutting <readcuttingt@gmail.com>
2021-06-25 00:29:39
Add fat/universal dylib support to zig ld
With this change zig ld can link with dynamic libraries contained within a fat/universal file that had multiple seperate binaries embedded within it for multi-arch support (in macOS). Whilst zig can still only create single-architecture executables - the ability to link with fat libraries is useful for cases where they are the easiest (or only) option to link against.
1 parent 2d2a6ed
Changed files (6)
lib/std/elf.zig
@@ -429,7 +429,7 @@ pub fn ProgramHeaderIterator(ParseSource: anytype) type {
                 if (self.elf_header.endian == native_endian) return phdr;
 
                 // Convert fields to native endianness.
-                bswapAllFields(Elf64_Phdr, &phdr);
+                mem.bswapAllFields(Elf64_Phdr, &phdr);
                 return phdr;
             }
 
@@ -441,7 +441,7 @@ pub fn ProgramHeaderIterator(ParseSource: anytype) type {
             // ELF endianness does NOT match native endianness.
             if (self.elf_header.endian != native_endian) {
                 // Convert fields to native endianness.
-                bswapAllFields(Elf32_Phdr, &phdr);
+                mem.bswapAllFields(Elf32_Phdr, &phdr);
             }
 
             // Convert 32-bit header to 64-bit.
@@ -479,7 +479,7 @@ pub fn SectionHeaderIterator(ParseSource: anytype) type {
                 if (self.elf_header.endian == native_endian) return shdr;
 
                 // Convert fields to native endianness.
-                bswapAllFields(Elf64_Shdr, &shdr);
+                mem.bswapAllFields(Elf64_Shdr, &shdr);
                 return shdr;
             }
 
@@ -491,7 +491,7 @@ pub fn SectionHeaderIterator(ParseSource: anytype) type {
             // ELF endianness does NOT match native endianness.
             if (self.elf_header.endian != native_endian) {
                 // Convert fields to native endianness.
-                bswapAllFields(Elf32_Shdr, &shdr);
+                mem.bswapAllFields(Elf32_Shdr, &shdr);
             }
 
             // Convert 32-bit header to 64-bit.
@@ -531,26 +531,6 @@ pub fn int32(need_bswap: bool, int_32: anytype, comptime Int64: anytype) Int64 {
     }
 }
 
-pub fn bswapAllFields(comptime S: type, ptr: *S) void {
-    if (@typeInfo(S) != .Struct) @compileError("bswapAllFields expects a struct as the first argument");
-    inline for (std.meta.fields(S)) |f| {
-        @field(ptr, f.name) = @byteSwap(f.field_type, @field(ptr, f.name));
-    }
-}
-test "bswapAllFields" {
-    var s: Elf32_Chdr = .{
-        .ch_type = 0x12341234,
-        .ch_size = 0x56785678,
-        .ch_addralign = 0x12124242,
-    };
-    bswapAllFields(Elf32_Chdr, &s);
-    try std.testing.expectEqual(Elf32_Chdr{
-        .ch_type = 0x34123412,
-        .ch_size = 0x78567856,
-        .ch_addralign = 0x42421212,
-    }, s);
-}
-
 pub const EI_NIDENT = 16;
 
 pub const EI_CLASS = 4;
lib/std/macho.zig
@@ -24,6 +24,19 @@ pub const mach_header_64 = extern struct {
     reserved: u32,
 };
 
+pub const fat_header = extern struct {
+    magic: u32,
+    nfat_arch: u32,
+};
+
+pub const fat_arch = extern struct {
+    cputype: cpu_type_t,
+    cpusubtype: cpu_subtype_t,
+    offset: u32,
+    size: u32,
+    @"align": u32,
+};
+
 pub const load_command = extern struct {
     cmd: u32,
     cmdsize: u32,
@@ -1040,6 +1053,20 @@ pub const MH_APP_EXTENSION_SAFE = 0x02000000;
 /// The external symbols listed in the nlist symbol table do not include all the symbols listed in the dyld info.
 pub const MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000;
 
+// Constants for the flags field of the fat_header
+
+/// the fat magic number
+pub const FAT_MAGIC = 0xcafebabe;
+
+/// NXSwapLong(FAT_MAGIC)
+pub const FAT_CIGAM = 0xbebafeca;
+
+/// the 64-bit fat magic number
+pub const FAT_MAGIC_64 = 0xcafebabf;
+
+/// NXSwapLong(FAT_MAGIC_64)
+pub const FAT_CIGAM_64 = 0xbfbafeca;
+
 /// The flags field of a section structure is separated into two parts a section
 /// type and section attributes.  The section types are mutually exclusive (it
 /// can only have one type) but the section attributes are not (it may have more
lib/std/mem.zig
@@ -1539,6 +1539,34 @@ test "writeIntBig and writeIntLittle" {
     try testing.expect(eql(u8, buf2[0..], &[_]u8{ 0xfc, 0xff }));
 }
 
+/// Swap the byte order of all the members of the fields of a struct
+/// (Changing their endianess)
+pub fn bswapAllFields(comptime S: type, ptr: *S) void {
+    if (@typeInfo(S) != .Struct) @compileError("bswapAllFields expects a struct as the first argument");
+    inline for (std.meta.fields(S)) |f| {
+        @field(ptr, f.name) = @byteSwap(f.field_type, @field(ptr, f.name));
+    }
+}
+
+test "bswapAllFields" {
+    const T = extern struct {
+        f0: u8,
+        f1: u16,
+        f2: u32,
+    };
+    var s = T{
+        .f0 = 0x12,
+        .f1 = 0x1234,
+        .f2 = 0x12345678,
+    };
+    bswapAllFields(T, &s);
+    try std.testing.expectEqual(T{
+        .f0 = 0x12,
+        .f1 = 0x3412,
+        .f2 = 0x78563412,
+    }, s);
+}
+
 /// Returns an iterator that iterates over the slices of `buffer` that are not
 /// any of the bytes in `delimiter_bytes`.
 /// tokenize("   abc def    ghi  ", " ")
src/link/MachO/Dylib.zig
@@ -1,6 +1,7 @@
 const Dylib = @This();
 
 const std = @import("std");
+const builtin = std.builtin;
 const assert = std.debug.assert;
 const fs = std.fs;
 const fmt = std.fmt;
@@ -8,6 +9,7 @@ const log = std.log.scoped(.dylib);
 const macho = std.macho;
 const math = std.math;
 const mem = std.mem;
+const native_endian = builtin.target.cpu.arch.endian();
 
 const Allocator = mem.Allocator;
 const Arch = std.Target.Cpu.Arch;
@@ -26,6 +28,10 @@ syslibroot: ?[]const u8 = null,
 
 ordinal: ?u16 = null,
 
+// The actual dylib contents we care about linking with will be embedded at
+// an offset within a file if we are linking against a fat lib
+library_offset: u64 = 0,
+
 load_commands: std.ArrayListUnmanaged(LoadCommand) = .{},
 
 symtab_cmd_index: ?u16 = null,
@@ -205,9 +211,45 @@ pub fn closeFile(self: Dylib) void {
     }
 }
 
+fn decodeArch(cputype: macho.cpu_type_t) !std.Target.Cpu.Arch {
+    const arch: Arch = switch (cputype) {
+        macho.CPU_TYPE_ARM64 => .aarch64,
+        macho.CPU_TYPE_X86_64 => .x86_64,
+        else => {
+            return error.UnsupportedCpuArchitecture;
+        },
+    };
+    return arch;
+}
+
 pub fn parse(self: *Dylib) !void {
     log.debug("parsing shared library '{s}'", .{self.name.?});
 
+    self.library_offset = offset: {
+        const fat_header = try readFatStruct(self.file.?.reader(), macho.fat_header);
+        if (fat_header.magic != macho.FAT_MAGIC) break :offset 0;
+
+        var fat_arch_index: u32 = 0;
+        while (fat_arch_index < fat_header.nfat_arch) : (fat_arch_index += 1) {
+            const fat_arch = try readFatStruct(self.file.?.reader(), macho.fat_arch);
+            // If we come across an architecture that we do not know how to handle, that's
+            // fine because we can keep looking for one that might match.
+            const lib_arch = decodeArch(fat_arch.cputype) catch |err| switch (err) {
+                error.UnsupportedCpuArchitecture => continue,
+                else => |e| return e,
+            };
+            if (lib_arch == self.arch.?) {
+                // We have found a matching architecture!
+                break :offset fat_arch.offset;
+            }
+        } else {
+            log.err("Could not find matching cpu architecture in fat library: expected {s}", .{self.arch.?});
+            return error.MismatchedCpuArchitecture;
+        }
+    };
+
+    try self.file.?.seekTo(self.library_offset);
+
     var reader = self.file.?.reader();
     self.header = try reader.readStruct(macho.mach_header_64);
 
@@ -216,14 +258,14 @@ pub fn parse(self: *Dylib) !void {
         return error.NotDylib;
     }
 
-    const this_arch: std.Target.Cpu.Arch = switch (self.header.?.cputype) {
-        macho.CPU_TYPE_ARM64 => .aarch64,
-        macho.CPU_TYPE_X86_64 => .x86_64,
-        else => |value| {
-            log.err("unsupported cpu architecture 0x{x}", .{value});
-            return error.UnsupportedCpuArchitecture;
+    const this_arch: Arch = decodeArch(self.header.?.cputype) catch |err| switch (err) {
+        error.UnsupportedCpuArchitecture => |e| {
+            log.err("unsupported cpu architecture 0x{x}", .{self.header.?.cputype});
+            return e;
         },
+        else => |e| return e,
     };
+
     if (this_arch != self.arch.?) {
         log.err("mismatched cpu architecture: expected {s}, found {s}", .{ self.arch.?, this_arch });
         return error.MismatchedCpuArchitecture;
@@ -234,6 +276,16 @@ pub fn parse(self: *Dylib) !void {
     try self.parseSymbols();
 }
 
+fn readFatStruct(reader: anytype, comptime T: type) !T {
+    // Fat structures (fat_header & fat_arch) are always written and read to/from
+    // disk in big endian order.
+    var res: T = try reader.readStruct(T);
+    if (native_endian != builtin.Endian.Big) {
+        mem.bswapAllFields(T, &res);
+    }
+    return res;
+}
+
 fn readLoadCommands(self: *Dylib, reader: anytype) !void {
     try self.load_commands.ensureCapacity(self.allocator, self.header.?.ncmds);
 
@@ -285,12 +337,12 @@ fn parseSymbols(self: *Dylib) !void {
 
     var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms);
     defer self.allocator.free(symtab);
-    _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff);
+    _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff + self.library_offset);
     const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab));
 
     var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize);
     defer self.allocator.free(strtab);
-    _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff);
+    _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff + self.library_offset);
 
     for (slice) |sym| {
         const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx));
src/link/MachO/Object.zig
@@ -247,18 +247,14 @@ pub fn parse(self: *Object) !void {
         try reader.context.seekTo(offset);
     }
 
-    self.header = try reader.readStruct(macho.mach_header_64);
-
-    if (self.header.?.filetype != macho.MH_OBJECT) {
-        log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{
-            macho.MH_OBJECT,
-            self.header.?.filetype,
-        });
+    const header = try reader.readStruct(macho.mach_header_64);
 
+    if (header.filetype != macho.MH_OBJECT) {
+        log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, header.filetype });
         return error.NotObject;
     }
 
-    const this_arch: Arch = switch (self.header.?.cputype) {
+    const this_arch: Arch = switch (header.cputype) {
         macho.CPU_TYPE_ARM64 => .aarch64,
         macho.CPU_TYPE_X86_64 => .x86_64,
         else => |value| {
@@ -271,6 +267,8 @@ pub fn parse(self: *Object) !void {
         return error.MismatchedCpuArchitecture;
     }
 
+    self.header = header;
+
     try self.readLoadCommands(reader);
     try self.parseSymbols();
     try self.parseSections();
src/link/Elf.zig
@@ -1108,7 +1108,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void {
                 for (buf) |*phdr, i| {
                     phdr.* = progHeaderTo32(self.program_headers.items[i]);
                     if (foreign_endian) {
-                        std.elf.bswapAllFields(elf.Elf32_Phdr, phdr);
+                        mem.bswapAllFields(elf.Elf32_Phdr, phdr);
                     }
                 }
                 try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
@@ -1120,7 +1120,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void {
                 for (buf) |*phdr, i| {
                     phdr.* = self.program_headers.items[i];
                     if (foreign_endian) {
-                        std.elf.bswapAllFields(elf.Elf64_Phdr, phdr);
+                        mem.bswapAllFields(elf.Elf64_Phdr, phdr);
                     }
                 }
                 try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.phdr_table_offset.?);
@@ -1197,7 +1197,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void {
                     shdr.* = sectHeaderTo32(self.sections.items[i]);
                     log.debug("writing section {}", .{shdr.*});
                     if (foreign_endian) {
-                        std.elf.bswapAllFields(elf.Elf32_Shdr, shdr);
+                        mem.bswapAllFields(elf.Elf32_Shdr, shdr);
                     }
                 }
                 try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
@@ -1210,7 +1210,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation) !void {
                     shdr.* = self.sections.items[i];
                     log.debug("writing section {}", .{shdr.*});
                     if (foreign_endian) {
-                        std.elf.bswapAllFields(elf.Elf64_Shdr, shdr);
+                        mem.bswapAllFields(elf.Elf64_Shdr, shdr);
                     }
                 }
                 try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?);
@@ -2740,14 +2740,14 @@ fn writeProgHeader(self: *Elf, index: usize) !void {
         .p32 => {
             var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])};
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf32_Phdr, &phdr[0]);
+                mem.bswapAllFields(elf.Elf32_Phdr, &phdr[0]);
             }
             return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset);
         },
         .p64 => {
             var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]};
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf64_Phdr, &phdr[0]);
+                mem.bswapAllFields(elf.Elf64_Phdr, &phdr[0]);
             }
             return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset);
         },
@@ -2761,7 +2761,7 @@ fn writeSectHeader(self: *Elf, index: usize) !void {
             var shdr: [1]elf.Elf32_Shdr = undefined;
             shdr[0] = sectHeaderTo32(self.sections.items[index]);
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf32_Shdr, &shdr[0]);
+                mem.bswapAllFields(elf.Elf32_Shdr, &shdr[0]);
             }
             const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf32_Shdr);
             return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset);
@@ -2769,7 +2769,7 @@ fn writeSectHeader(self: *Elf, index: usize) !void {
         .p64 => {
             var shdr = [1]elf.Elf64_Shdr{self.sections.items[index]};
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf64_Shdr, &shdr[0]);
+                mem.bswapAllFields(elf.Elf64_Shdr, &shdr[0]);
             }
             const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf64_Shdr);
             return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset);
@@ -2867,7 +2867,7 @@ fn writeSymbol(self: *Elf, index: usize) !void {
                 },
             };
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf32_Sym, &sym[0]);
+                mem.bswapAllFields(elf.Elf32_Sym, &sym[0]);
             }
             const off = syms_sect.sh_offset + @sizeOf(elf.Elf32_Sym) * index;
             try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off);
@@ -2875,7 +2875,7 @@ fn writeSymbol(self: *Elf, index: usize) !void {
         .p64 => {
             var sym = [1]elf.Elf64_Sym{self.local_symbols.items[index]};
             if (foreign_endian) {
-                std.elf.bswapAllFields(elf.Elf64_Sym, &sym[0]);
+                mem.bswapAllFields(elf.Elf64_Sym, &sym[0]);
             }
             const off = syms_sect.sh_offset + @sizeOf(elf.Elf64_Sym) * index;
             try self.base.file.?.pwriteAll(mem.sliceAsBytes(sym[0..1]), off);
@@ -2906,7 +2906,7 @@ fn writeAllGlobalSymbols(self: *Elf) !void {
                     .st_shndx = self.global_symbols.items[i].st_shndx,
                 };
                 if (foreign_endian) {
-                    std.elf.bswapAllFields(elf.Elf32_Sym, sym);
+                    mem.bswapAllFields(elf.Elf32_Sym, sym);
                 }
             }
             try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off);
@@ -2925,7 +2925,7 @@ fn writeAllGlobalSymbols(self: *Elf) !void {
                     .st_shndx = self.global_symbols.items[i].st_shndx,
                 };
                 if (foreign_endian) {
-                    std.elf.bswapAllFields(elf.Elf64_Sym, sym);
+                    mem.bswapAllFields(elf.Elf64_Sym, sym);
                 }
             }
             try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), global_syms_off);