Commit 3dff040ca5

Jakub Konka <kubkon@jakubkonka.com>
2023-01-21 13:26:27
macho: synthesise unwind records in absence of compact unwind section
Unlike Apple ld, we will not do any DWARF CFI parsing and simply output DWARF type unwind records.
1 parent 24f6c07
Changed files (3)
src/link/MachO/dead_strip.zig
@@ -238,16 +238,10 @@ fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable) !void {
         }
     }
 
-    for (zld.objects.items) |object, object_id| {
+    for (zld.objects.items) |_, object_id| {
         // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so,
         // marking all references as live.
-        // TODO I am currently assuming there will always be __unwind_info section emitted which implies
-        // we will not traverse __eh_frame in isolation. This however is only true for more recent versions
-        // of macOS so if there is a feature request to handle earlier versions of macOS, the following
-        // bit code needs updating as well.
-        if (object.hasUnwindRecords()) {
-            try markUnwindRecords(zld, @intCast(u32, object_id), alive);
-        }
+        try markUnwindRecords(zld, @intCast(u32, object_id), alive);
     }
 }
 
@@ -256,9 +250,23 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void {
     const cpu_arch = zld.options.target.cpu.arch;
 
     const unwind_records = object.getUnwindRecords();
-    var it = object.getEhFrameRecordsIterator();
 
     for (object.exec_atoms.items) |atom_index| {
+        if (!object.hasUnwindRecords()) {
+            if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| {
+                const ptr = object.eh_frame_relocs_lookup.getPtr(fde_offset).?;
+                if (ptr.dead) continue; // already marked
+                if (!alive.contains(atom_index)) {
+                    // Mark dead and continue.
+                    ptr.dead = true;
+                } else {
+                    // Mark references live and continue.
+                    try markEhFrameRecord(zld, object_id, atom_index, alive);
+                }
+                continue;
+            }
+        }
+
         const record_id = object.unwind_records_lookup.get(atom_index) orelse continue;
         if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do
         if (!alive.contains(atom_index)) {
@@ -272,61 +280,7 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void {
 
         const record = unwind_records[record_id];
         if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) {
-            const fde_offset = object.eh_frame_records_lookup.get(atom_index).?;
-            it.seekTo(fde_offset);
-            const fde = (try it.next()).?;
-
-            const cie_ptr = fde.getCiePointer();
-            const cie_offset = fde_offset + 4 - cie_ptr;
-            it.seekTo(cie_offset);
-            const cie = (try it.next()).?;
-
-            switch (cpu_arch) {
-                .aarch64 => {
-                    // Mark FDE references which should include any referenced LSDA record
-                    const relocs = eh_frame.getRelocs(zld, object_id, fde_offset);
-                    for (relocs) |rel| {
-                        const target = UnwindInfo.parseRelocTarget(
-                            zld,
-                            object_id,
-                            rel,
-                            fde.data,
-                            @intCast(i32, fde_offset) + 4,
-                        );
-                        const target_sym = zld.getSymbol(target);
-                        if (!target_sym.undf()) blk: {
-                            const target_object = zld.objects.items[target.getFile().?];
-                            const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse
-                                break :blk;
-                            markLive(zld, target_atom_index, alive);
-                        }
-                    }
-                },
-                .x86_64 => {
-                    const lsda_ptr = try fde.getLsdaPointer(cie, .{
-                        .base_addr = object.eh_frame_sect.?.addr,
-                        .base_offset = fde_offset,
-                    });
-                    if (lsda_ptr) |lsda_address| {
-                        // Mark LSDA record as live
-                        const sym_index = object.getSymbolByAddress(lsda_address, null);
-                        const target_atom_index = object.getAtomIndexForSymbol(sym_index).?;
-                        markLive(zld, target_atom_index, alive);
-                    }
-                },
-                else => unreachable,
-            }
-
-            // Mark CIE references which should include any referenced personalities
-            // that are defined locally.
-            if (cie.getPersonalityPointerReloc(zld, object_id, cie_offset)) |target| {
-                const target_sym = zld.getSymbol(target);
-                if (!target_sym.undf()) {
-                    const target_object = zld.objects.items[target.getFile().?];
-                    const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?;
-                    markLive(zld, target_atom_index, alive);
-                }
-            }
+            try markEhFrameRecord(zld, object_id, atom_index, alive);
         } else {
             if (UnwindInfo.getPersonalityFunctionReloc(zld, object_id, record_id)) |rel| {
                 const target = UnwindInfo.parseRelocTarget(
@@ -360,6 +314,68 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void {
     }
 }
 
+fn markEhFrameRecord(zld: *Zld, object_id: u32, atom_index: AtomIndex, alive: *AtomTable) !void {
+    const cpu_arch = zld.options.target.cpu.arch;
+    const object = &zld.objects.items[object_id];
+    var it = object.getEhFrameRecordsIterator();
+
+    const fde_offset = object.eh_frame_records_lookup.get(atom_index).?;
+    it.seekTo(fde_offset);
+    const fde = (try it.next()).?;
+
+    const cie_ptr = fde.getCiePointer();
+    const cie_offset = fde_offset + 4 - cie_ptr;
+    it.seekTo(cie_offset);
+    const cie = (try it.next()).?;
+
+    switch (cpu_arch) {
+        .aarch64 => {
+            // Mark FDE references which should include any referenced LSDA record
+            const relocs = eh_frame.getRelocs(zld, object_id, fde_offset);
+            for (relocs) |rel| {
+                const target = UnwindInfo.parseRelocTarget(
+                    zld,
+                    object_id,
+                    rel,
+                    fde.data,
+                    @intCast(i32, fde_offset) + 4,
+                );
+                const target_sym = zld.getSymbol(target);
+                if (!target_sym.undf()) blk: {
+                    const target_object = zld.objects.items[target.getFile().?];
+                    const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse
+                        break :blk;
+                    markLive(zld, target_atom_index, alive);
+                }
+            }
+        },
+        .x86_64 => {
+            const lsda_ptr = try fde.getLsdaPointer(cie, .{
+                .base_addr = object.eh_frame_sect.?.addr,
+                .base_offset = fde_offset,
+            });
+            if (lsda_ptr) |lsda_address| {
+                // Mark LSDA record as live
+                const sym_index = object.getSymbolByAddress(lsda_address, null);
+                const target_atom_index = object.getAtomIndexForSymbol(sym_index).?;
+                markLive(zld, target_atom_index, alive);
+            }
+        },
+        else => unreachable,
+    }
+
+    // Mark CIE references which should include any referenced personalities
+    // that are defined locally.
+    if (cie.getPersonalityPointerReloc(zld, object_id, cie_offset)) |target| {
+        const target_sym = zld.getSymbol(target);
+        if (!target_sym.undf()) {
+            const target_object = zld.objects.items[target.getFile().?];
+            const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?;
+            markLive(zld, target_atom_index, alive);
+        }
+    }
+}
+
 fn prune(zld: *Zld, alive: AtomTable) void {
     log.debug("pruning dead atoms", .{});
     for (zld.objects.items) |*object| {
src/link/MachO/Object.zig
@@ -725,7 +725,18 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void {
 }
 
 fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void {
-    const sect = self.unwind_info_sect orelse return;
+    const sect = self.unwind_info_sect orelse {
+        // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`,
+        // we will try fully synthesising unwind info records to somewhat match Apple ld's
+        // approach. However, we will only synthesise DWARF records and nothing more. For this reason,
+        // we still create the output `__TEXT,__unwind_info` section.
+        if (self.eh_frame_sect != null) {
+            if (zld.getSectionByName("__TEXT", "__unwind_info") == null) {
+                _ = try zld.initSection("__TEXT", "__unwind_info", .{});
+            }
+        }
+        return;
+    };
 
     log.debug("parsing unwind info in {s}", .{self.name});
 
src/link/MachO/UnwindInfo.zig
@@ -253,37 +253,13 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void {
         try records.ensureUnusedCapacity(object.exec_atoms.items.len);
         try atom_indexes.ensureUnusedCapacity(object.exec_atoms.items.len);
 
-        var it = object.getEhFrameRecordsIterator();
-
         for (object.exec_atoms.items) |atom_index| {
             var record = if (object.unwind_records_lookup.get(atom_index)) |record_id| blk: {
                 if (object.unwind_relocs_lookup[record_id].dead) continue;
                 var record = unwind_records[record_id];
 
                 if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) {
-                    const fde_offset = object.eh_frame_records_lookup.get(atom_index).?;
-                    it.seekTo(fde_offset);
-                    const fde = (try it.next()).?;
-                    const cie_ptr = fde.getCiePointer();
-                    const cie_offset = fde_offset + 4 - cie_ptr;
-                    it.seekTo(cie_offset);
-                    const cie = (try it.next()).?;
-
-                    if (cie.getPersonalityPointerReloc(
-                        zld,
-                        @intCast(u32, object_id),
-                        cie_offset,
-                    )) |target| {
-                        const personality_index = info.getPersonalityFunction(target) orelse inner: {
-                            const personality_index = info.personalities_count;
-                            info.personalities[personality_index] = target;
-                            info.personalities_count += 1;
-                            break :inner personality_index;
-                        };
-
-                        record.personalityFunction = personality_index + 1;
-                        UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1);
-                    }
+                    try info.collectPersonalityFromDwarf(zld, @intCast(u32, object_id), atom_index, &record);
                 } else {
                     if (getPersonalityFunctionReloc(
                         zld,
@@ -324,6 +300,21 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void {
                 const atom = zld.getAtom(atom_index);
                 const sym = zld.getSymbol(atom.getSymbolWithLoc());
                 if (sym.n_desc == N_DEAD) continue;
+
+                if (!object.hasUnwindRecords()) {
+                    if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| {
+                        if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue;
+                        var record = nullRecord();
+                        try info.collectPersonalityFromDwarf(zld, @intCast(u32, object_id), atom_index, &record);
+                        switch (cpu_arch) {
+                            .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF),
+                            .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF),
+                            else => unreachable,
+                        }
+                        break :blk record;
+                    }
+                }
+
                 break :blk nullRecord();
             };
 
@@ -499,6 +490,40 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void {
     }
 }
 
+fn collectPersonalityFromDwarf(
+    info: *UnwindInfo,
+    zld: *Zld,
+    object_id: u32,
+    atom_index: u32,
+    record: *macho.compact_unwind_entry,
+) !void {
+    const object = &zld.objects.items[object_id];
+    var it = object.getEhFrameRecordsIterator();
+    const fde_offset = object.eh_frame_records_lookup.get(atom_index).?;
+    it.seekTo(fde_offset);
+    const fde = (try it.next()).?;
+    const cie_ptr = fde.getCiePointer();
+    const cie_offset = fde_offset + 4 - cie_ptr;
+    it.seekTo(cie_offset);
+    const cie = (try it.next()).?;
+
+    if (cie.getPersonalityPointerReloc(
+        zld,
+        @intCast(u32, object_id),
+        cie_offset,
+    )) |target| {
+        const personality_index = info.getPersonalityFunction(target) orelse inner: {
+            const personality_index = info.personalities_count;
+            info.personalities[personality_index] = target;
+            info.personalities_count += 1;
+            break :inner personality_index;
+        };
+
+        record.personalityFunction = personality_index + 1;
+        UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1);
+    }
+}
+
 pub fn calcSectionSize(info: UnwindInfo, zld: *Zld) !void {
     const sect_id = zld.getSectionByName("__TEXT", "__unwind_info") orelse return;
     const sect = &zld.sections.items(.header)[sect_id];
@@ -766,40 +791,26 @@ fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7
 }
 
 pub const UnwindEncoding = struct {
-    pub const UNWIND_X86_64_MODE = enum(u4) {
-        none = 0,
-        ebp_frame = 1,
-        stack_immd = 2,
-        stack_ind = 3,
-        dwarf = 4,
-    };
-
-    pub const UNWIND_ARM64_MODE = enum(u4) {
-        none = 0,
-        frameless = 2,
-        dwarf = 3,
-        frame = 4,
-    };
-
-    pub const UNWIND_MODE_MASK: u32 = 0x0F000000;
-    pub const UNWIND_PERSONALITY_INDEX_MASK: u32 = 0x30000000;
-    pub const UNWIND_HAS_LSDA_MASK: u32 = 0x40000000;
-
     pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 {
-        const mode = @truncate(u4, (enc & UNWIND_MODE_MASK) >> 24);
-        return mode;
+        comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK);
+        return @truncate(u4, (enc & macho.UNWIND_ARM64_MODE_MASK) >> 24);
     }
 
     pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool {
-        switch (cpu_arch) {
-            .aarch64 => return @intToEnum(UNWIND_ARM64_MODE, getMode(enc)) == .dwarf,
-            .x86_64 => return @intToEnum(UNWIND_X86_64_MODE, getMode(enc)) == .dwarf,
+        const mode = getMode(enc);
+        return switch (cpu_arch) {
+            .aarch64 => @intToEnum(macho.UNWIND_ARM64_MODE, mode) == .DWARF,
+            .x86_64 => @intToEnum(macho.UNWIND_X86_64_MODE, mode) == .DWARF,
             else => unreachable,
-        }
+        };
+    }
+
+    pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void {
+        enc.* |= @intCast(u32, @enumToInt(mode)) << 24;
     }
 
     pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool {
-        const has_lsda = @truncate(u1, (enc & UNWIND_HAS_LSDA_MASK) >> 31);
+        const has_lsda = @truncate(u1, (enc & macho.UNWIND_HAS_LSDA) >> 31);
         return has_lsda == 1;
     }
 
@@ -809,7 +820,7 @@ pub const UnwindEncoding = struct {
     }
 
     pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 {
-        const index = @truncate(u2, (enc & UNWIND_PERSONALITY_INDEX_MASK) >> 28);
+        const index = @truncate(u2, (enc & macho.UNWIND_PERSONALITY_MASK) >> 28);
         return index;
     }