Commit 1be8621815

Jakub Konka <kubkon@jakubkonka.com>
2023-03-21 21:27:17
macho+zld: when finding by address, note the end of section symbols too
Previously, if we were looking for the very last symbol by address in some section, and the next symbol happened to also have the same address value but would reside in a different section, we would keep going finding the wrong symbol in the wrong section. This mechanism turns out vital for correct linking of Go binaries where the runtime looks for specially crafted synthetic symbols which mark the beginning and end of each section. In this case, we had an unfortunate clash between the end of PC marked machine code section (`_runtime.etext`) and beginning of read-only data (`_runtime.rodata`).
1 parent dc98009
Changed files (2)
src
src/link/MachO/Object.zig
@@ -50,7 +50,7 @@ reverse_symtab_lookup: []u32 = undefined,
 /// Can be undefined as set together with in_symtab.
 source_address_lookup: []i64 = undefined,
 /// Can be undefined as set together with in_symtab.
-source_section_index_lookup: []i64 = undefined,
+source_section_index_lookup: []Entry = undefined,
 /// Can be undefined as set together with in_symtab.
 strtab_lookup: []u32 = undefined,
 /// Can be undefined as set together with in_symtab.
@@ -58,7 +58,7 @@ atom_by_index_table: []AtomIndex = undefined,
 /// Can be undefined as set together with in_symtab.
 globals_lookup: []i64 = undefined,
 /// Can be undefined as set together with in_symtab.
-relocs_lookup: []RelocEntry = undefined,
+relocs_lookup: []Entry = undefined,
 
 /// All relocations sorted and flatened, sorted by address descending
 /// per section.
@@ -81,11 +81,14 @@ unwind_info_sect_id: ?u8 = null,
 unwind_relocs_lookup: []Record = undefined,
 unwind_records_lookup: std.AutoHashMapUnmanaged(AtomIndex, u32) = .{},
 
-const RelocEntry = struct { start: u32, len: u32 };
+const Entry = struct {
+    start: u32 = 0,
+    len: u32 = 0,
+};
 
 const Record = struct {
     dead: bool,
-    reloc: RelocEntry,
+    reloc: Entry,
 };
 
 pub fn deinit(self: *Object, gpa: Allocator) void {
@@ -170,11 +173,11 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
     self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len);
     self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
     self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects);
-    self.relocs_lookup = try allocator.alloc(RelocEntry, self.in_symtab.?.len + nsects);
+    self.relocs_lookup = try allocator.alloc(Entry, self.in_symtab.?.len + nsects);
     // This is wasteful but we need to be able to lookup source symbol address after stripping and
     // allocating of sections.
     self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len);
-    self.source_section_index_lookup = try allocator.alloc(i64, nsects);
+    self.source_section_index_lookup = try allocator.alloc(Entry, nsects);
 
     for (self.symtab) |*sym| {
         sym.* = .{
@@ -188,11 +191,8 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
 
     mem.set(i64, self.globals_lookup, -1);
     mem.set(AtomIndex, self.atom_by_index_table, 0);
-    mem.set(i64, self.source_section_index_lookup, -1);
-    mem.set(RelocEntry, self.relocs_lookup, .{
-        .start = 0,
-        .len = 0,
-    });
+    mem.set(Entry, self.source_section_index_lookup, .{});
+    mem.set(Entry, self.relocs_lookup, .{});
 
     // You would expect that the symbol table is at least pre-sorted based on symbol's type:
     // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance,
@@ -211,13 +211,25 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
     // is kind enough to specify the symbols in the correct order.
     sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan);
 
+    var prev_sect_id: u8 = 0;
+    var section_index_lookup: ?Entry = null;
     for (sorted_all_syms.items, 0..) |sym_id, i| {
         const sym = sym_id.getSymbol(self);
 
-        if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) {
-            self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i);
+        if (section_index_lookup) |*lookup| {
+            if (sym.n_sect != prev_sect_id or sym.undf()) {
+                self.source_section_index_lookup[prev_sect_id - 1] = lookup.*;
+                section_index_lookup = null;
+            } else {
+                lookup.len += 1;
+            }
+        }
+        if (sym.sect() and section_index_lookup == null) {
+            section_index_lookup = .{ .start = @intCast(u32, i), .len = 1 };
         }
 
+        prev_sect_id = sym.n_sect;
+
         self.symtab[i] = sym;
         self.source_symtab_lookup[i] = sym_id.index;
         self.reverse_symtab_lookup[sym_id.index] = @intCast(u32, i);
@@ -234,13 +246,7 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch)
     self.unwind_info_sect_id = self.getSourceSectionIndexByName("__LD", "__compact_unwind");
     if (self.hasUnwindRecords()) {
         self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len);
-        mem.set(Record, self.unwind_relocs_lookup, .{
-            .dead = true,
-            .reloc = .{
-                .start = 0,
-                .len = 0,
-            },
-        });
+        mem.set(Record, self.unwind_relocs_lookup, .{ .dead = true, .reloc = .{} });
     }
 }
 
@@ -620,7 +626,7 @@ fn filterRelocs(
     relocs: []align(1) const macho.relocation_info,
     start_addr: u64,
     end_addr: u64,
-) RelocEntry {
+) Entry {
     const Predicate = struct {
         addr: u64,
 
@@ -712,9 +718,9 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void {
 
     while (try it.next()) |record| {
         const offset = it.pos - record.getSize();
-        const rel_pos = switch (cpu_arch) {
+        const rel_pos: Entry = switch (cpu_arch) {
             .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()),
-            .x86_64 => RelocEntry{ .start = 0, .len = 0 },
+            .x86_64 => .{},
             else => unreachable,
         };
         self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{
@@ -990,13 +996,15 @@ pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 {
     };
 
     if (sect_hint) |sect_id| {
-        if (self.source_section_index_lookup[sect_id] > -1) {
-            const first_sym_index = @intCast(usize, self.source_section_index_lookup[sect_id]);
-            const target_sym_index = @import("zld.zig").lsearch(i64, self.source_address_lookup[first_sym_index..], Predicate{
-                .addr = @intCast(i64, addr),
-            });
+        if (self.source_section_index_lookup[sect_id].len > 0) {
+            const lookup = self.source_section_index_lookup[sect_id];
+            const target_sym_index = @import("zld.zig").lsearch(
+                i64,
+                self.source_address_lookup[lookup.start..][0..lookup.len],
+                Predicate{ .addr = @intCast(i64, addr) },
+            );
             if (target_sym_index > 0) {
-                return @intCast(u32, first_sym_index + target_sym_index - 1);
+                return @intCast(u32, lookup.start + target_sym_index - 1);
             }
         }
         return self.getSectionAliasSymbolIndex(sect_id);
src/link/MachO/ZldAtom.zig
@@ -790,10 +790,11 @@ fn resolveRelocsX86(
         const target = parseRelocTarget(zld, atom_index, rel);
         const rel_offset = @intCast(u32, rel.r_address - context.base_offset);
 
-        log.debug("  RELA({s}) @ {x} => %{d} in object({?})", .{
+        log.debug("  RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{
             @tagName(rel_type),
             rel.r_address,
             target.sym_index,
+            zld.getSymbolName(target),
             target.getFile(),
         });