Commit 9b0c555db4

Jakub Konka <kubkon@jakubkonka.com>
2022-10-28 11:47:58
macho: fix regression in dead strip for x86_64
Correctly handle calculating encompassing atoms for local relocations (`r_extern == 0`).
1 parent 27bcd4e
Changed files (3)
src/link/MachO/dead_strip.zig
@@ -162,21 +162,6 @@ fn markLive(
         };
         const target_sym = zld.getSymbol(target);
 
-        if (rel.r_extern == 0) {
-            // We are pessimistic and mark all atoms within the target section as live.
-            // TODO: this can be improved by marking only the relevant atoms.
-            const sect_id = target_sym.n_sect;
-            const object = zld.objects.items[target.getFile().?];
-            for (object.atoms.items) |other_atom_index| {
-                const other_atom = zld.getAtom(other_atom_index);
-                const other_sym = zld.getSymbol(other_atom.getSymbolWithLoc());
-                if (other_sym.n_sect == sect_id) {
-                    markLive(zld, other_atom_index, alive, reverse_lookups);
-                }
-            }
-            continue;
-        }
-
         if (target_sym.undf()) continue;
         if (target.getFile() == null) {
             const target_sym_name = zld.getSymbolName(target);
src/link/MachO/Object.zig
@@ -44,6 +44,10 @@ symtab: []macho.nlist_64 = undefined,
 /// Can be undefined as set together with in_symtab.
 source_symtab_lookup: []u32 = undefined,
 /// Can be undefined as set together with in_symtab.
+source_address_lookup: []i64 = undefined,
+/// Can be undefined as set together with in_symtab.
+source_section_index_lookup: []i64 = undefined,
+/// Can be undefined as set together with in_symtab.
 strtab_lookup: []u32 = undefined,
 /// Can be undefined as set together with in_symtab.
 atom_by_index_table: []AtomIndex = undefined,
@@ -58,6 +62,8 @@ pub fn deinit(self: *Object, gpa: Allocator) void {
     gpa.free(self.contents);
     if (self.in_symtab) |_| {
         gpa.free(self.source_symtab_lookup);
+        gpa.free(self.source_address_lookup);
+        gpa.free(self.source_section_index_lookup);
         gpa.free(self.strtab_lookup);
         gpa.free(self.symtab);
         gpa.free(self.atom_by_index_table);
@@ -116,6 +122,10 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
                 self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
                 self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
                 self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects);
+                // This is wasteful but we need to be able to lookup source symbol address after stripping and
+                // allocating of sections.
+                self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
+                self.source_section_index_lookup = try allocator.alloc(i64, nsects);
 
                 for (self.symtab) |*sym| {
                     sym.* = .{
@@ -129,6 +139,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
 
                 mem.set(i64, self.globals_lookup, -1);
                 mem.set(AtomIndex, self.atom_by_index_table, 0);
+                mem.set(i64, self.source_section_index_lookup, -1);
 
                 // You would expect that the symbol table is at least pre-sorted based on symbol's type:
                 // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
@@ -150,8 +161,13 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
                 for (sorted_all_syms.items) |sym_id, i| {
                     const sym = sym_id.getSymbol(self);
 
+                    if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) {
+                        self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i);
+                    }
+
                     self.symtab[i] = sym;
                     self.source_symtab_lookup[i] = sym_id.index;
+                    self.source_address_lookup[i] = if (sym.undf()) -1 else @intCast(i64, sym.n_value);
 
                     const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1;
                     self.strtab_lookup[i] = @intCast(u32, sym_name_len);
@@ -244,13 +260,12 @@ fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct {
     return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
 }
 
-fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64, end_addr: u64) struct {
+fn filterSymbolsByAddress(symbols: []macho.nlist_64, start_addr: u64, end_addr: u64) struct {
     index: u32,
     len: u32,
 } {
     const Predicate = struct {
         addr: u64,
-        n_sect: u8,
 
         pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool {
             return symbol.n_value >= pred.addr;
@@ -259,11 +274,9 @@ fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64
 
     const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, Predicate{
         .addr = start_addr,
-        .n_sect = n_sect,
     });
     const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], Predicate{
         .addr = end_addr,
-        .n_sect = n_sect,
     });
 
     return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) };
@@ -412,12 +425,7 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void {
             while (next_sym_index < sect_start_index + sect_loc.len) {
                 const next_sym = symtab[next_sym_index];
                 const addr = next_sym.n_value;
-                const atom_loc = filterSymbolsByAddress(
-                    symtab[next_sym_index..],
-                    sect_id + 1,
-                    addr,
-                    addr + 1,
-                );
+                const atom_loc = filterSymbolsByAddress(symtab[next_sym_index..], addr, addr + 1);
                 assert(atom_loc.len > 0);
                 const atom_sym_index = atom_loc.index + next_sym_index;
                 const nsyms_trailing = atom_loc.len - 1;
src/link/MachO/ZldAtom.zig
@@ -181,6 +181,27 @@ const RelocContext = struct {
     base_offset: i32 = 0,
 };
 
+pub fn getRelocContext(zld: *Zld, atom_index: AtomIndex) RelocContext {
+    const atom = zld.getAtom(atom_index);
+    assert(atom.getFile() != null); // synthetic atoms do not have relocs
+
+    const object = zld.objects.items[atom.getFile().?];
+    if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
+        const source_sect = object.getSourceSection(source_sym.n_sect - 1);
+        return .{
+            .base_addr = source_sect.addr,
+            .base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
+        };
+    }
+    const nbase = @intCast(u32, object.in_symtab.?.len);
+    const sect_id = @intCast(u16, atom.sym_index - nbase);
+    const source_sect = object.getSourceSection(sect_id);
+    return .{
+        .base_addr = source_sect.addr,
+        .base_offset = 0,
+    };
+}
+
 pub fn parseRelocTarget(
     zld: *Zld,
     atom_index: AtomIndex,
@@ -192,6 +213,52 @@ pub fn parseRelocTarget(
 
     if (rel.r_extern == 0) {
         const sect_id = @intCast(u8, rel.r_symbolnum - 1);
+        const ctx = getRelocContext(zld, atom_index);
+        const atom_code = getAtomCode(zld, atom_index);
+        const rel_offset = @intCast(u32, rel.r_address - ctx.base_offset);
+
+        const address_in_section = if (rel.r_pcrel == 0) blk: {
+            break :blk if (rel.r_length == 3)
+                mem.readIntLittle(i64, atom_code[rel_offset..][0..8])
+            else
+                mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
+        } else blk: {
+            const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) {
+                .X86_64_RELOC_SIGNED => 0,
+                .X86_64_RELOC_SIGNED_1 => 1,
+                .X86_64_RELOC_SIGNED_2 => 2,
+                .X86_64_RELOC_SIGNED_4 => 4,
+                else => unreachable,
+            };
+            const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
+            const target_address = @intCast(i64, ctx.base_addr) + rel.r_address + 4 + correction + addend;
+            break :blk target_address;
+        };
+
+        // Find containing atom
+        const Predicate = struct {
+            addr: i64,
+
+            pub fn predicate(pred: @This(), other: i64) bool {
+                return if (other == -1) true else other > pred.addr;
+            }
+        };
+
+        if (object.source_section_index_lookup[sect_id] > -1) {
+            const first_sym_index = @intCast(usize, object.source_section_index_lookup[sect_id]);
+            const target_sym_index = @import("zld.zig").lsearch(i64, object.source_address_lookup[first_sym_index..], Predicate{
+                .addr = address_in_section,
+            });
+
+            if (target_sym_index > 0) {
+                return SymbolWithLoc{
+                    .sym_index = @intCast(u32, first_sym_index + target_sym_index - 1),
+                    .file = atom.file,
+                };
+            }
+        }
+
+        // Start of section is not contained anywhere, return synthetic atom.
         const sym_index = object.getSectionAliasSymbolIndex(sect_id);
         return SymbolWithLoc{ .sym_index = sym_index, .file = atom.file };
     }
@@ -405,29 +472,13 @@ pub fn resolveRelocs(
     const atom = zld.getAtom(atom_index);
     assert(atom.getFile() != null); // synthetic atoms do not have relocs
 
-    const object = zld.objects.items[atom.getFile().?];
-    const ctx: RelocContext = blk: {
-        if (object.getSourceSymbol(atom.sym_index)) |source_sym| {
-            const source_sect = object.getSourceSection(source_sym.n_sect - 1);
-            break :blk .{
-                .base_addr = source_sect.addr,
-                .base_offset = @intCast(i32, source_sym.n_value - source_sect.addr),
-            };
-        }
-        const nbase = @intCast(u32, object.in_symtab.?.len);
-        const sect_id = @intCast(u16, atom.sym_index - nbase);
-        const source_sect = object.getSourceSection(sect_id);
-        break :blk .{
-            .base_addr = source_sect.addr,
-            .base_offset = 0,
-        };
-    };
-
     log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{
         atom.sym_index,
         zld.getSymbolName(atom.getSymbolWithLoc()),
     });
 
+    const ctx = getRelocContext(zld, atom_index);
+
     return switch (arch) {
         .aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
         .x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx),
@@ -744,8 +795,11 @@ fn resolveRelocsArm64(
                     mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
 
                 if (rel.r_extern == 0) {
-                    const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
-                    ptr_addend -= @intCast(i64, target_sect_base_addr);
+                    const base_addr = if (target.sym_index > object.source_address_lookup.len)
+                        @intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
+                    else
+                        object.source_address_lookup[target.sym_index];
+                    ptr_addend -= base_addr;
                 }
 
                 const result = blk: {
@@ -878,13 +932,12 @@ fn resolveRelocsX86(
                 var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction;
 
                 if (rel.r_extern == 0) {
-                    // Note for the future self: when r_extern == 0, we should subtract correction from the
-                    // addend.
-                    const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
-                    // We need to add base_offset, i.e., offset of this atom wrt to the source
-                    // section. Otherwise, the addend will over-/under-shoot.
-                    addend += @intCast(i32, @intCast(i64, context.base_addr + rel_offset + 4) -
-                        @intCast(i64, target_sect_base_addr) + context.base_offset);
+                    const base_addr = if (target.sym_index > object.source_address_lookup.len)
+                        @intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
+                    else
+                        object.source_address_lookup[target.sym_index];
+                    addend += @intCast(i32, @intCast(i64, context.base_addr) + rel.r_address + 4 -
+                        @intCast(i64, base_addr));
                 }
 
                 const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
@@ -902,8 +955,11 @@ fn resolveRelocsX86(
                     mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
 
                 if (rel.r_extern == 0) {
-                    const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr;
-                    addend -= @intCast(i64, target_sect_base_addr);
+                    const base_addr = if (target.sym_index > object.source_address_lookup.len)
+                        @intCast(i64, object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr)
+                    else
+                        object.source_address_lookup[target.sym_index];
+                    addend -= base_addr;
                 }
 
                 const result = blk: {