Commit 0f2489d8fc

Jakub Konka <kubkon@jakubkonka.com>
2023-11-10 12:52:52
macho: resolve special section/segment boundary symbols
Boundary symbols have a special name prefix: * section$start$segname$sectname * section$stop$segname$sectname * segment$start$segname * segment$stop$segname and will resolve to either start or end of the respective section/segment if found. If not found, we return an error stating we couldn't find the requested section/segment rather than silently failing and resolving the address to 0 which seems to be the case with Apple's ld64.
1 parent 03adafd
Changed files (2)
src/link/MachO/dead_strip.zig
@@ -50,6 +50,7 @@ fn collectRoots(macho_file: *MachO, roots: *AtomTable) !void {
             for (macho_file.globals.items) |global| {
                 const sym = macho_file.getSymbol(global);
                 if (sym.undf()) continue;
+                if (sym.n_desc == MachO.N_BOUNDARY) continue;
 
                 if (global.getFile()) |file| {
                     try addRoot(macho_file, roots, file, global);
src/link/MachO.zig
@@ -1416,6 +1416,51 @@ pub fn allocateSpecialSymbols(self: *MachO) !void {
             seg.segName(),
         });
     }
+
+    for (self.globals.items) |global| {
+        const sym = self.getSymbolPtr(global);
+        if (sym.n_desc != N_BOUNDARY) continue;
+        if (self.getSectionBoundarySymbol(global)) |bsym| {
+            const sect_id = self.getSectionByName(bsym.segname, bsym.sectname) orelse {
+                try self.reportUnresolvedBoundarySymbol(self.getSymbolName(global), "section not found: {s},{s}", .{
+                    bsym.segname, bsym.sectname,
+                });
+                continue;
+            };
+            const sect = self.sections.items(.header)[sect_id];
+            sym.n_sect = sect_id + 1;
+            sym.n_value = switch (bsym.kind) {
+                .start => sect.addr,
+                .stop => sect.addr + sect.size,
+            };
+
+            log.debug("allocating {s} at @0x{x} sect({d})", .{
+                self.getSymbolName(global),
+                sym.n_value,
+                sym.n_sect,
+            });
+
+            continue;
+        }
+        if (self.getSegmentBoundarySymbol(global)) |bsym| {
+            const seg_id = self.getSegmentByName(bsym.segname) orelse {
+                try self.reportUnresolvedBoundarySymbol(self.getSymbolName(global), "segment not found: {s}", .{
+                    bsym.segname,
+                });
+
+                continue;
+            };
+            const seg = self.segments.items[seg_id];
+            sym.n_value = switch (bsym.kind) {
+                .start => seg.vmaddr,
+                .stop => seg.vmaddr + seg.vmsize,
+            };
+
+            log.debug("allocating {s} at @0x{x} ", .{ self.getSymbolName(global), sym.n_value });
+
+            continue;
+        }
+    }
 }
 
 const CreateAtomOpts = struct {
@@ -1442,6 +1487,7 @@ pub fn createTentativeDefAtoms(self: *MachO) !void {
         const sym = self.getSymbolPtr(global);
         if (!sym.tentative()) continue;
         if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
 
         log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{
             global.sym_index, self.getSymbolName(global), global.file,
@@ -1630,6 +1676,13 @@ pub fn resolveSymbols(self: *MachO) !void {
     try self.createMhExecuteHeaderSymbol();
     try self.createDsoHandleSymbol();
     try self.resolveSymbolsAtLoading();
+
+    // Final stop, check if unresolved contain any of the special magic boundary symbols
+    // * section$start$
+    // * section$stop$
+    // * segment$start$
+    // * segment$stop$
+    try self.resolveBoundarySymbols();
 }
 
 fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void {
@@ -1845,6 +1898,34 @@ fn resolveSymbolsAtLoading(self: *MachO) !void {
     }
 }
 
+fn resolveBoundarySymbols(self: *MachO) !void {
+    var next_sym: usize = 0;
+    while (next_sym < self.unresolved.count()) {
+        const global_index = self.unresolved.keys()[next_sym];
+        const global = &self.globals.items[global_index];
+
+        if (self.getSectionBoundarySymbol(global.*) != null or self.getSegmentBoundarySymbol(global.*) != null) {
+            const sym_index = try self.allocateSymbol();
+            const sym_loc = SymbolWithLoc{ .sym_index = sym_index };
+            const sym = self.getSymbolPtr(sym_loc);
+            sym.* = .{
+                .n_strx = try self.strtab.insert(self.base.allocator, self.getSymbolName(global.*)),
+                .n_type = macho.N_SECT | macho.N_EXT,
+                .n_sect = 0,
+                .n_desc = N_BOUNDARY,
+                .n_value = 0,
+            };
+            if (global.getFile()) |file| {
+                const global_object = &self.objects.items[file];
+                global_object.globals_lookup[global.sym_index] = global_index;
+            }
+            global.* = sym_loc;
+            _ = self.unresolved.swapRemove(global_index);
+            continue;
+        }
+    }
+}
+
 pub fn deinit(self: *MachO) void {
     const gpa = self.base.allocator;
 
@@ -3565,6 +3646,7 @@ fn collectRebaseData(self: *MachO, rebase: *Rebase) !void {
             const atom = self.getAtom(atom_index);
             const sym = self.getSymbol(atom.getSymbolWithLoc());
             if (sym.n_desc == N_DEAD) continue;
+            if (sym.n_desc == N_BOUNDARY) continue;
 
             const sect_id = sym.n_sect - 1;
             const section = self.sections.items(.header)[sect_id];
@@ -3719,6 +3801,7 @@ fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void {
             const atom = self.getAtom(atom_index);
             const sym = self.getSymbol(atom.getSymbolWithLoc());
             if (sym.n_desc == N_DEAD) continue;
+            if (sym.n_desc == N_BOUNDARY) continue;
 
             const sect_id = sym.n_sect - 1;
             const section = self.sections.items(.header)[sect_id];
@@ -3819,6 +3902,7 @@ fn collectExportData(self: *MachO, trie: *Trie) !void {
         if (sym.undf()) continue;
         assert(sym.ext());
         if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
 
         const sym_name = self.getSymbolName(global);
         log.debug("  (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value });
@@ -3953,7 +4037,8 @@ const asc_u64 = std.sort.asc(u64);
 fn addSymbolToFunctionStarts(self: *MachO, sym_loc: SymbolWithLoc, addresses: *std.ArrayList(u64)) !void {
     const sym = self.getSymbol(sym_loc);
     if (sym.n_strx == 0) return;
-    if (sym.n_desc == MachO.N_DEAD) return;
+    if (sym.n_desc == N_DEAD) return;
+    if (sym.n_desc == N_BOUNDARY) return;
     if (self.symbolIsTemp(sym_loc)) return;
     try addresses.append(sym.n_value);
 }
@@ -4061,7 +4146,8 @@ pub fn writeDataInCode(self: *MachO) !void {
         for (object.exec_atoms.items) |atom_index| {
             const atom = self.getAtom(atom_index);
             const sym = self.getSymbol(atom.getSymbolWithLoc());
-            if (sym.n_desc == MachO.N_DEAD) continue;
+            if (sym.n_desc == N_DEAD) continue;
+            if (sym.n_desc == N_BOUNDARY) return;
 
             const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym|
                 source_sym.n_value
@@ -4119,7 +4205,8 @@ fn writeSymtabs(self: *MachO) !void {
 fn addLocalToSymtab(self: *MachO, sym_loc: SymbolWithLoc, locals: *std.ArrayList(macho.nlist_64)) !void {
     const sym = self.getSymbol(sym_loc);
     if (sym.n_strx == 0) return; // no name, skip
-    if (sym.n_desc == MachO.N_DEAD) return; // garbage-collected, skip
+    if (sym.n_desc == N_DEAD) return; // garbage-collected, skip
+    if (sym.n_desc == N_BOUNDARY) return; // boundary symbol, skip
     if (sym.ext()) return; // an export lands in its own symtab section, skip
     if (self.symbolIsTemp(sym_loc)) return; // local temp symbol, skip
     var out_sym = sym;
@@ -4157,6 +4244,7 @@ fn writeSymtab(self: *MachO) !SymtabCtx {
         const sym = self.getSymbol(global);
         if (sym.undf()) continue; // import, skip
         if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
         var out_sym = sym;
         out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global));
         try exports.append(out_sym);
@@ -4172,6 +4260,7 @@ fn writeSymtab(self: *MachO) !SymtabCtx {
         if (sym.n_strx == 0) continue; // no name, skip
         if (!sym.undf()) continue; // not an import, skip
         if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
         const new_index = @as(u32, @intCast(imports.items.len));
         var out_sym = sym;
         out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global));
@@ -4842,6 +4931,55 @@ pub fn getSymbolName(self: *const MachO, sym_with_loc: SymbolWithLoc) []const u8
     }
 }
 
+const BoundarySymbolKind = enum {
+    start,
+    stop,
+};
+
+const SectionBoundarySymbol = struct {
+    kind: BoundarySymbolKind,
+    segname: []const u8,
+    sectname: []const u8,
+};
+
+pub fn getSectionBoundarySymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) ?SectionBoundarySymbol {
+    const sym_name = self.getSymbolName(sym_with_loc);
+    if (mem.startsWith(u8, sym_name, "section$")) {
+        const trailing = sym_name["section$".len..];
+        const kind: BoundarySymbolKind = kind: {
+            if (mem.startsWith(u8, trailing, "start$")) break :kind .start;
+            if (mem.startsWith(u8, trailing, "stop$")) break :kind .stop;
+            return null;
+        };
+        const names = trailing[@tagName(kind).len + 1 ..];
+        const sep_idx = mem.indexOf(u8, names, "$") orelse return null;
+        const segname = names[0..sep_idx];
+        const sectname = names[sep_idx + 1 ..];
+        return .{ .kind = kind, .segname = segname, .sectname = sectname };
+    }
+    return null;
+}
+
+const SegmentBoundarySymbol = struct {
+    kind: BoundarySymbolKind,
+    segname: []const u8,
+};
+
+pub fn getSegmentBoundarySymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) ?SegmentBoundarySymbol {
+    const sym_name = self.getSymbolName(sym_with_loc);
+    if (mem.startsWith(u8, sym_name, "segment$")) {
+        const trailing = sym_name["segment$".len..];
+        const kind: BoundarySymbolKind = kind: {
+            if (mem.startsWith(u8, trailing, "start$")) break :kind .start;
+            if (mem.startsWith(u8, trailing, "stop$")) break :kind .stop;
+            return null;
+        };
+        const segname = trailing[@tagName(kind).len + 1 ..];
+        return .{ .kind = kind, .segname = segname };
+    }
+    return null;
+}
+
 /// Returns pointer to the global entry for `name` if one exists.
 pub fn getGlobalPtr(self: *MachO, name: []const u8) ?*SymbolWithLoc {
     const global_index = self.resolver.get(name) orelse return null;
@@ -5137,6 +5275,23 @@ pub fn reportParseError(
     });
 }
 
+pub fn reportUnresolvedBoundarySymbol(
+    self: *MachO,
+    sym_name: []const u8,
+    comptime format: []const u8,
+    args: anytype,
+) error{OutOfMemory}!void {
+    const gpa = self.base.allocator;
+    try self.misc_errors.ensureUnusedCapacity(gpa, 1);
+    var notes = try gpa.alloc(File.ErrorMsg, 1);
+    errdefer gpa.free(notes);
+    notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while resolving {s}", .{sym_name}) };
+    self.misc_errors.appendAssumeCapacity(.{
+        .msg = try std.fmt.allocPrint(gpa, format, args),
+        .notes = notes,
+    });
+}
+
 pub fn reportUndefined(self: *MachO) error{OutOfMemory}!void {
     const gpa = self.base.allocator;
     const count = self.unresolved.count();
@@ -5340,7 +5495,8 @@ pub fn logSymtab(self: *MachO) void {
     for (self.globals.items, 0..) |global, i| {
         const sym = self.getSymbol(global);
         if (sym.undf()) continue;
-        if (sym.n_desc == MachO.N_DEAD) continue;
+        if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
         scoped_log.debug("    %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{
             i,
             self.getSymbolName(global),
@@ -5355,7 +5511,8 @@ pub fn logSymtab(self: *MachO) void {
     for (self.globals.items, 0..) |global, i| {
         const sym = self.getSymbol(global);
         if (!sym.undf()) continue;
-        if (sym.n_desc == MachO.N_DEAD) continue;
+        if (sym.n_desc == N_DEAD) continue;
+        if (sym.n_desc == N_BOUNDARY) continue;
         const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER);
         scoped_log.debug("    %{d}: {s} @{x} in ord({d}), {s}", .{
             i,
@@ -5466,6 +5623,7 @@ pub fn logAtom(self: *MachO, atom_index: Atom.Index, logger: anytype) void {
 
 pub const base_tag: File.Tag = File.Tag.macho;
 pub const N_DEAD: u16 = @as(u16, @bitCast(@as(i16, -1)));
+pub const N_BOUNDARY: u16 = @as(u16, @bitCast(@as(i16, -2)));
 
 /// Mode of operation of the linker.
 pub const Mode = enum {