Commit a089a6dc4f

Jakub Konka <kubkon@jakubkonka.com>
2022-07-18 12:03:06
macho: parse data-in-code when writing LINKEDIT segment
1 parent 2c184f9
Changed files (3)
src/link/MachO/Atom.zig
@@ -59,9 +59,6 @@ bindings: std.ArrayListUnmanaged(Binding) = .{},
 /// List of lazy bindings (cf bindings above).
 lazy_bindings: std.ArrayListUnmanaged(Binding) = .{},
 
-/// List of data-in-code entries. This is currently specific to x86_64 only.
-dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-
 /// Points to the previous and next neighbours
 next: ?*Atom,
 prev: ?*Atom,
@@ -147,7 +144,6 @@ pub const empty = Atom{
 };
 
 pub fn deinit(self: *Atom, allocator: Allocator) void {
-    self.dices.deinit(allocator);
     self.lazy_bindings.deinit(allocator);
     self.bindings.deinit(allocator);
     self.rebases.deinit(allocator);
@@ -157,7 +153,6 @@ pub fn deinit(self: *Atom, allocator: Allocator) void {
 }
 
 pub fn clearRetainingCapacity(self: *Atom) void {
-    self.dices.clearRetainingCapacity();
     self.lazy_bindings.clearRetainingCapacity();
     self.bindings.clearRetainingCapacity();
     self.rebases.clearRetainingCapacity();
src/link/MachO/Object.zig
@@ -177,7 +177,6 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void {
     }
 
     try self.parseSymtab(allocator);
-    self.parseDataInCode();
 }
 
 const Context = struct {
@@ -264,25 +263,6 @@ fn filterRelocs(
     return relocs[start..end];
 }
 
-fn filterDice(
-    dices: []const macho.data_in_code_entry,
-    start_addr: u64,
-    end_addr: u64,
-) []const macho.data_in_code_entry {
-    const Predicate = struct {
-        addr: u64,
-
-        pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
-            return dice.offset >= self.addr;
-        }
-    };
-
-    const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
-    const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
-
-    return dices[start..end];
-}
-
 /// Splits object into atoms assuming one-shot linking mode.
 pub fn splitIntoAtomsOneShot(
     self: *Object,
@@ -378,15 +358,6 @@ pub fn splitIntoAtomsOneShot(
             context,
         );
 
-        macho_file.has_dices = macho_file.has_dices or blk: {
-            if (self.text_section_index) |index| {
-                if (index != id) break :blk false;
-                if (self.data_in_code_entries.len == 0) break :blk false;
-                break :blk true;
-            }
-            break :blk false;
-        };
-
         if (subsections_via_symbols and filtered_syms.len > 0) {
             // If the first nlist does not match the start of the section,
             // then we need to encapsulate the memory range [section start, first symbol)
@@ -574,19 +545,6 @@ fn createAtomFromSubsection(
         .base_offset = @intCast(i32, base_offset),
     });
 
-    if (macho_file.has_dices) {
-        const dices = filterDice(self.data_in_code_entries, sym.n_value, sym.n_value + size);
-        try atom.dices.ensureTotalCapacity(gpa, dices.len);
-
-        for (dices) |dice| {
-            atom.dices.appendAssumeCapacity(.{
-                .offset = dice.offset - (math.cast(u32, sym.n_value) orelse return error.Overflow),
-                .length = dice.length,
-                .kind = dice.kind,
-            });
-        }
-    }
-
     // Since this is atom gets a helper local temporary symbol that didn't exist
     // in the object file which encompasses the entire section, we need traverse
     // the filtered symbols and note which symbol is contained within so that
@@ -651,11 +609,11 @@ pub fn getSourceSymtab(self: Object) []const macho.nlist_64 {
     );
 }
 
-fn parseDataInCode(self: *Object) void {
-    const index = self.data_in_code_cmd_index orelse return;
+pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry {
+    const index = self.data_in_code_cmd_index orelse return null;
     const data_in_code = self.load_commands.items[index].linkedit_data;
     const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize];
-    self.data_in_code_entries = mem.bytesAsSlice(
+    return mem.bytesAsSlice(
         macho.data_in_code_entry,
         @alignCast(@alignOf(macho.data_in_code_entry), raw_dice),
     );
src/link/MachO.zig
@@ -187,7 +187,6 @@ error_flags: File.ErrorFlags = File.ErrorFlags{},
 
 load_commands_dirty: bool = false,
 sections_order_dirty: bool = false,
-has_dices: bool = false,
 
 /// A helper var to indicate if we are at the start of the incremental updates, or
 /// already somewhere further along the update-and-run chain.
@@ -6139,55 +6138,74 @@ fn writeFunctionStarts(self: *MachO) !void {
     self.load_commands_dirty = true;
 }
 
-fn writeDices(self: *MachO) !void {
-    if (!self.has_dices) return;
+fn filterDataInCode(
+    dices: []const macho.data_in_code_entry,
+    start_addr: u64,
+    end_addr: u64,
+) []const macho.data_in_code_entry {
+    const Predicate = struct {
+        addr: u64,
 
+        pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool {
+            return dice.offset >= self.addr;
+        }
+    };
+
+    const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr });
+    const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr });
+
+    return dices[start..end];
+}
+
+fn writeDataInCode(self: *MachO) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
-    var buf = std.ArrayList(u8).init(self.base.allocator);
-    defer buf.deinit();
+    var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator);
+    defer out_dice.deinit();
 
-    var atom: *Atom = self.atoms.get(.{
+    const text_sect = self.getSection(.{
         .seg = self.text_segment_cmd_index orelse return,
         .sect = self.text_section_index orelse return,
-    }) orelse return;
-
-    while (atom.prev) |prev| {
-        atom = prev;
-    }
-
-    const text_sect = self.getSection(.{
-        .seg = self.text_segment_cmd_index.?,
-        .sect = self.text_section_index.?,
     });
 
-    while (true) {
-        if (atom.dices.items.len > 0) {
+    for (self.objects.items) |object| {
+        const dice = object.parseDataInCode() orelse continue;
+        const source_symtab = object.getSourceSymtab();
+        try out_dice.ensureUnusedCapacity(dice.len);
+
+        for (object.managed_atoms.items) |atom| {
             const sym = atom.getSymbol(self);
-            const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow;
-
-            try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry));
-            for (atom.dices.items) |dice| {
-                const rebased_dice = macho.data_in_code_entry{
-                    .offset = base_off + dice.offset,
-                    .length = dice.length,
-                    .kind = dice.kind,
-                };
-                buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice));
+            if (sym.n_desc == N_DESC_GCED) continue;
+            if (atom.sym_index >= source_symtab.len) continue; // synthetic, linker generated
+
+            const match = self.getMatchingSectionFromOrdinal(sym.n_sect);
+            if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) {
+                continue;
             }
-        }
 
-        if (atom.next) |next| {
-            atom = next;
-        } else break;
+            const source_sym = source_symtab[atom.sym_index];
+            const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow;
+            const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size);
+            const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse
+                return error.Overflow;
+
+            for (filtered_dice) |single| {
+                const offset = single.offset - source_addr + base;
+                out_dice.appendAssumeCapacity(.{
+                    .offset = offset,
+                    .length = single.length,
+                    .kind = single.kind,
+                });
+            }
+        }
     }
 
     const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment;
     const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data;
 
     const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64));
-    const datasize = buf.items.len;
+    const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry);
     dice_cmd.dataoff = @intCast(u32, dataoff);
     dice_cmd.datasize = @intCast(u32, datasize);
     seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff;
@@ -6197,7 +6215,7 @@ fn writeDices(self: *MachO) !void {
         dice_cmd.dataoff + dice_cmd.datasize,
     });
 
-    try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff);
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff);
     self.load_commands_dirty = true;
 }
 
@@ -6392,7 +6410,7 @@ fn writeLinkeditSegment(self: *MachO) !void {
 
     try self.writeDyldInfoData();
     try self.writeFunctionStarts();
-    try self.writeDices();
+    try self.writeDataInCode();
     try self.writeSymtab();
     try self.writeStrtab();