Commit 7345976261

Jakub Konka <kubkon@jakubkonka.com>
2022-07-21 09:39:12
macho: sort subsection symbols by seniority
1 parent 39df241
Changed files (2)
src
src/link/MachO/Object.zig
@@ -197,11 +197,9 @@ const SymbolAtIndex = struct {
         return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0);
     }
 
+    /// Returns whether lhs is less than rhs by allocated address in object file.
+    /// Undefined symbols are pushed to the back (always evaluate to true).
     fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
-        // We sort by type: defined < undefined, and
-        // afterwards by address in each group. Normally, dysymtab should
-        // be enough to guarantee the sort, but turns out not every compiler
-        // is kind enough to specify the symbols in the correct order.
         const lhs = lhs_index.getSymbol(ctx);
         const rhs = rhs_index.getSymbol(ctx);
         if (lhs.sect()) {
@@ -215,6 +213,29 @@ const SymbolAtIndex = struct {
             return false;
         }
     }
+
+    /// Returns whether lhs is less senior than rhs. The rules are:
+    /// 1. ext
+    /// 2. weak
+    /// 3. local
+    /// 4. temp (local starting with `l` prefix).
+    fn lessThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
+        const lhs = lhs_index.getSymbol(ctx);
+        const rhs = rhs_index.getSymbol(ctx);
+        if (!rhs.ext()) {
+            const lhs_name = lhs_index.getSymbolName(ctx);
+            return mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L");
+        } else if (rhs.pext() or rhs.weakDef()) {
+            return !lhs.ext();
+        } else {
+            return false;
+        }
+    }
+
+    /// Like lessThanBySeniority but negated.
+    fn greaterThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool {
+        return !lessThanBySeniority(ctx, lhs_index, rhs_index);
+    }
 };
 
 fn filterSymbolsByAddress(
@@ -295,6 +316,10 @@ pub fn splitIntoAtomsOneShot(
         sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) });
     }
 
+    // We sort by type: defined < undefined, and
+    // afterwards by address in each group. Normally, dysymtab should
+    // be enough to guarantee the sort, but turns out not every compiler
+    // is kind enough to specify the symbols in the correct order.
     sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan);
 
     // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we
@@ -409,10 +434,18 @@ pub fn splitIntoAtomsOneShot(
                 );
                 next_sym_count += atom_syms.len;
 
+                // We want to bubble up the first externally defined symbol here.
                 assert(atom_syms.len > 0);
-                const sym_index = for (atom_syms) |atom_sym| {
-                    if (atom_sym.getSymbol(context).ext()) break atom_sym.index;
-                } else atom_syms[0].index;
+                var sorted_atom_syms = std.ArrayList(SymbolAtIndex).init(gpa);
+                defer sorted_atom_syms.deinit();
+                try sorted_atom_syms.appendSlice(atom_syms);
+                sort.sort(
+                    SymbolAtIndex,
+                    sorted_atom_syms.items,
+                    context,
+                    SymbolAtIndex.greaterThanBySeniority,
+                );
+
                 const atom_size = blk: {
                     const end_addr = if (next_sym_count < filtered_syms.len)
                         filtered_syms[next_sym_count].getSymbol(context).n_value
@@ -432,12 +465,12 @@ pub fn splitIntoAtomsOneShot(
                 const atom = try self.createAtomFromSubsection(
                     macho_file,
                     object_id,
-                    sym_index,
+                    sorted_atom_syms.items[0].index,
                     atom_size,
                     atom_align,
                     atom_code,
                     relocs,
-                    atom_syms[1..],
+                    sorted_atom_syms.items[1..],
                     match,
                     sect,
                     gc_roots,
@@ -552,12 +585,7 @@ fn createAtomFromSubsection(
     // the filtered symbols and note which symbol is contained within so that
     // we can properly allocate addresses down the line.
     // While we're at it, we need to update segment,section mapping of each symbol too.
-    try atom.contained.ensureTotalCapacity(gpa, indexes.len + 1);
-    atom.contained.appendAssumeCapacity(.{
-        .sym_index = sym_index,
-        .offset = 0,
-    });
-
+    try atom.contained.ensureTotalCapacity(gpa, indexes.len);
     for (indexes) |inner_sym_index| {
         const inner_sym = &self.symtab.items[inner_sym_index.index];
         inner_sym.n_sect = macho_file.getSectionOrdinal(match);
src/link/MachO.zig
@@ -2940,12 +2940,6 @@ fn createTentativeDefAtoms(self: *MachO) !void {
 
         if (global.file) |file| {
             const object = &self.objects.items[file];
-
-            try atom.contained.append(gpa, .{
-                .sym_index = global.sym_index,
-                .offset = 0,
-            });
-
             try object.managed_atoms.append(gpa, atom);
             try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom);
         } else {
@@ -5594,6 +5588,7 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
 
                 const global = atom.getSymbolWithLoc();
                 const sym = atom.getSymbolPtr(self);
+                const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
 
                 if (sym.n_desc == N_DESC_GCED) continue;
                 if (!sym.ext()) {
@@ -5605,15 +5600,6 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
                     } else continue;
                 }
 
-                loop = true;
-                const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
-
-                // TODO don't dedup eh_frame info yet until we actually implement parsing unwind records
-                if (match.eql(.{
-                    .seg = self.text_segment_cmd_index,
-                    .sect = self.eh_frame_section_index,
-                })) continue;
-
                 self.logAtom(atom);
                 sym.n_desc = N_DESC_GCED;
                 self.removeAtomFromSection(atom, match);
@@ -5644,6 +5630,8 @@ fn gcAtoms(self: *MachO, gc_roots: *std.AutoHashMap(*Atom, void)) !void {
                     const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(self);
                     tlv_ptr_sym.n_desc = N_DESC_GCED;
                 }
+
+                loop = true;
             }
         }
     }
@@ -6831,7 +6819,6 @@ pub fn generateSymbolStabs(
     };
     const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name);
     const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir);
-    const source_symtab = object.getSourceSymtab();
 
     // Open scope
     try locals.ensureUnusedCapacity(3);
@@ -6857,71 +6844,27 @@ pub fn generateSymbolStabs(
         .n_value = object.mtime,
     });
 
+    var stabs_buf: [4]macho.nlist_64 = undefined;
+
     for (object.managed_atoms.items) |atom| {
+        const stabs = try self.generateSymbolStabsForSymbol(
+            atom.getSymbolWithLoc(),
+            debug_info,
+            &stabs_buf,
+        );
+        try locals.appendSlice(stabs);
+
         for (atom.contained.items) |sym_at_off| {
             const sym_loc = SymbolWithLoc{
                 .sym_index = sym_at_off.sym_index,
                 .file = atom.file,
             };
-            const sym = self.getSymbol(sym_loc);
-            const sym_name = self.getSymbolName(sym_loc);
-            if (sym.n_strx == 0) continue;
-            if (sym.n_desc == N_DESC_GCED) continue;
-            if (self.symbolIsTemp(sym_loc)) continue;
-            if (sym_at_off.sym_index >= source_symtab.len) continue; // synthetic, linker generated
-
-            const source_sym = source_symtab[sym_at_off.sym_index];
-            const size: ?u64 = size: {
-                if (source_sym.tentative()) break :size null;
-                for (debug_info.inner.func_list.items) |func| {
-                    if (func.pc_range) |range| {
-                        if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
-                            break :size range.end - range.start;
-                        }
-                    }
-                }
-                break :size null;
-            };
-
-            if (size) |ss| {
-                try locals.ensureUnusedCapacity(4);
-                locals.appendAssumeCapacity(.{
-                    .n_strx = 0,
-                    .n_type = macho.N_BNSYM,
-                    .n_sect = sym.n_sect,
-                    .n_desc = 0,
-                    .n_value = sym.n_value,
-                });
-                locals.appendAssumeCapacity(.{
-                    .n_strx = try self.strtab.insert(gpa, sym_name),
-                    .n_type = macho.N_FUN,
-                    .n_sect = sym.n_sect,
-                    .n_desc = 0,
-                    .n_value = sym.n_value,
-                });
-                locals.appendAssumeCapacity(.{
-                    .n_strx = 0,
-                    .n_type = macho.N_FUN,
-                    .n_sect = 0,
-                    .n_desc = 0,
-                    .n_value = ss,
-                });
-                locals.appendAssumeCapacity(.{
-                    .n_strx = 0,
-                    .n_type = macho.N_ENSYM,
-                    .n_sect = sym.n_sect,
-                    .n_desc = 0,
-                    .n_value = ss,
-                });
-            } else {
-                try locals.append(.{
-                    .n_strx = try self.strtab.insert(gpa, sym_name),
-                    .n_type = macho.N_STSYM,
-                    .n_sect = sym.n_sect,
-                    .n_desc = 0,
-                    .n_value = sym.n_value,
-                });
-            }
+            const contained_stabs = try self.generateSymbolStabsForSymbol(
+                sym_loc,
+                debug_info,
+                &stabs_buf,
+            );
+            try locals.appendSlice(contained_stabs);
         }
     }
 
@@ -6935,6 +6878,78 @@ pub fn generateSymbolStabs(
     });
 }
 
+fn generateSymbolStabsForSymbol(
+    self: *MachO,
+    sym_loc: SymbolWithLoc,
+    debug_info: DebugInfo,
+    buf: *[4]macho.nlist_64,
+) ![]const macho.nlist_64 {
+    const gpa = self.base.allocator;
+    const object = self.objects.items[sym_loc.file.?];
+    const source_symtab = object.getSourceSymtab();
+    const sym = self.getSymbol(sym_loc);
+    const sym_name = self.getSymbolName(sym_loc);
+
+    if (sym.n_strx == 0) return buf[0..0];
+    if (sym.n_desc == N_DESC_GCED) return buf[0..0];
+    if (self.symbolIsTemp(sym_loc)) return buf[0..0];
+    if (sym_loc.sym_index >= source_symtab.len) return buf[0..0]; // synthetic, linker generated
+
+    const source_sym = source_symtab[sym_loc.sym_index];
+    const size: ?u64 = size: {
+        if (source_sym.tentative()) break :size null;
+        for (debug_info.inner.func_list.items) |func| {
+            if (func.pc_range) |range| {
+                if (source_sym.n_value >= range.start and source_sym.n_value < range.end) {
+                    break :size range.end - range.start;
+                }
+            }
+        }
+        break :size null;
+    };
+
+    if (size) |ss| {
+        buf[0] = .{
+            .n_strx = 0,
+            .n_type = macho.N_BNSYM,
+            .n_sect = sym.n_sect,
+            .n_desc = 0,
+            .n_value = sym.n_value,
+        };
+        buf[1] = .{
+            .n_strx = try self.strtab.insert(gpa, sym_name),
+            .n_type = macho.N_FUN,
+            .n_sect = sym.n_sect,
+            .n_desc = 0,
+            .n_value = sym.n_value,
+        };
+        buf[2] = .{
+            .n_strx = 0,
+            .n_type = macho.N_FUN,
+            .n_sect = 0,
+            .n_desc = 0,
+            .n_value = ss,
+        };
+        buf[3] = .{
+            .n_strx = 0,
+            .n_type = macho.N_ENSYM,
+            .n_sect = sym.n_sect,
+            .n_desc = 0,
+            .n_value = ss,
+        };
+        return buf;
+    } else {
+        buf[0] = .{
+            .n_strx = try self.strtab.insert(gpa, sym_name),
+            .n_type = macho.N_STSYM,
+            .n_sect = sym.n_sect,
+            .n_desc = 0,
+            .n_value = sym.n_value,
+        };
+        return buf[0..1];
+    }
+}
+
 fn snapshotState(self: *MachO) !void {
     const emit = self.base.options.emit orelse {
         log.debug("no emit directory found; skipping snapshot...", .{});