Commit 54888c6f46

Jakub Konka <kubkon@jakubkonka.com>
2021-07-06 00:00:11
zld: create TextBlocks for tentative definitions
and fix the links in the `TextBlock`s linked list!
1 parent 51e334a
Changed files (3)
src/link/MachO/Object.zig
@@ -9,6 +9,7 @@ const log = std.log.scoped(.object);
 const macho = std.macho;
 const mem = std.mem;
 const reloc = @import("reloc.zig");
+const sort = std.sort;
 
 const Allocator = mem.Allocator;
 const Arch = std.Target.Cpu.Arch;
@@ -345,13 +346,15 @@ const TextBlockParser = struct {
     };
 
     fn lessThanBySeniority(context: SeniorityContext, lhs: NlistWithIndex, rhs: NlistWithIndex) bool {
-        const lreg = context.zld.locals.items[lhs.index].payload.regular;
-        const rreg = context.zld.locals.items[rhs.index].payload.regular;
+        const lsym = context.zld.locals.items[lhs.index];
+        const rsym = context.zld.locals.items[rhs.index];
+        const lreg = lsym.payload.regular;
+        const rreg = rsym.payload.regular;
 
         return switch (rreg.linkage) {
             .global => true,
             .linkage_unit => lreg.linkage == .translation_unit,
-            else => false,
+            else => lsym.isTemp(),
         };
     }
 
@@ -388,7 +391,7 @@ const TextBlockParser = struct {
 
         if (aliases.items.len > 1) {
             // Bubble-up senior symbol as the main link to the text block.
-            std.sort.sort(
+            sort.sort(
                 NlistWithIndex,
                 aliases.items,
                 SeniorityContext{ .zld = self.zld },
@@ -427,13 +430,12 @@ const TextBlockParser = struct {
         };
 
         self.index += 1;
-        block.print_this(self.zld);
 
         return block;
     }
 };
 
-pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock {
+pub fn parseTextBlocks(self: *Object, zld: *Zld) !void {
     const seg = self.load_commands.items[self.segment_cmd_index.?].Segment;
 
     log.warn("analysing {s}", .{self.name.?});
@@ -453,9 +455,7 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock {
         });
     }
 
-    std.sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan);
-
-    var last_block: ?*TextBlock = null;
+    sort.sort(NlistWithIndex, sorted_nlists.items, {}, NlistWithIndex.lessThan);
 
     for (seg.sections.items) |sect, sect_id| {
         log.warn("putting section '{s},{s}' as a TextBlock", .{
@@ -496,24 +496,35 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock {
                 };
 
                 while (try parser.next()) |block| {
-                    const sym = zld.locals.items[block.local_sym_index];
-                    if (sym.payload.regular.file) |file| {
-                        if (file != self) {
-                            log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? });
-                            continue;
+                    {
+                        const sym = zld.locals.items[block.local_sym_index];
+                        const reg = &sym.payload.regular;
+                        if (reg.file) |file| {
+                            if (file != self) {
+                                log.warn("deduping definition of {s} in {s}", .{ sym.name, self.name.? });
+                                continue;
+                            }
                         }
+                        reg.segment_id = match.seg;
+                        reg.section_id = match.sect;
                     }
 
-                    block.segment_id = match.seg;
-                    block.section_id = match.sect;
+                    if (block.aliases) |aliases| {
+                        for (aliases) |alias| {
+                            const sym = zld.locals.items[alias];
+                            const reg = &sym.payload.regular;
+                            reg.segment_id = match.seg;
+                            reg.section_id = match.sect;
+                        }
+                    }
 
                     // TODO parse relocs
 
-                    if (last_block) |last| {
+                    if (zld.last_text_block) |last| {
                         last.next = block;
                         block.prev = last;
                     }
-                    last_block = block;
+                    zld.last_text_block = block;
                 }
 
                 break :next;
@@ -531,6 +542,8 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock {
             symbol.payload = .{
                 .regular = .{
                     .linkage = .translation_unit,
+                    .segment_id = match.seg,
+                    .section_id = match.sect,
                     .file = self,
                 },
             };
@@ -545,21 +558,17 @@ pub fn parseTextBlocks(self: *Object, zld: *Zld) !?*TextBlock {
                 .code = code,
                 .size = sect.size,
                 .alignment = sect.@"align",
-                .segment_id = match.seg,
-                .section_id = match.sect,
             };
 
             // TODO parse relocs
 
-            if (last_block) |last| {
+            if (zld.last_text_block) |last| {
                 last.next = block;
                 block.prev = last;
             }
-            last_block = block;
+            zld.last_text_block = block;
         }
     }
-
-    return last_block;
 }
 
 pub fn parseInitializers(self: *Object) !void {
src/link/MachO/Symbol.zig
@@ -46,7 +46,7 @@ pub const Regular = struct {
     segment_id: u16 = 0,
 
     /// Section ID
-    section: u16 = 0,
+    section_id: u16 = 0,
 
     /// Whether the symbol is a weak ref.
     weak_ref: bool = false,
@@ -69,7 +69,8 @@ pub const Regular = struct {
         try std.fmt.format(writer, "Regular {{ ", .{});
         try std.fmt.format(writer, ".linkage = {s},  ", .{self.linkage});
         try std.fmt.format(writer, ".address = 0x{x}, ", .{self.address});
-        try std.fmt.format(writer, ".section = {}, ", .{self.section});
+        try std.fmt.format(writer, ".segment_id = {}, ", .{self.segment_id});
+        try std.fmt.format(writer, ".section_id = {}, ", .{self.section_id});
         if (self.weak_ref) {
             try std.fmt.format(writer, ".weak_ref, ", .{});
         }
@@ -170,6 +171,21 @@ pub fn new(allocator: *Allocator, name: []const u8) !*Symbol {
     return new_sym;
 }
 
+pub fn format(self: Symbol, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+    _ = fmt;
+    _ = options;
+    try std.fmt.format(writer, "Symbol {{", .{});
+    try std.fmt.format(writer, ".name = {s}, ", .{self.name});
+    if (self.got_index) |got_index| {
+        try std.fmt.format(writer, ".got_index = {}, ", .{got_index});
+    }
+    if (self.stubs_index) |stubs_index| {
+        try std.fmt.format(writer, ".stubs_index = {}, ", .{stubs_index});
+    }
+    try std.fmt.format(writer, "{}, ", .{self.payload});
+    try std.fmt.format(writer, "}}", .{});
+}
+
 pub fn isTemp(symbol: Symbol) bool {
     switch (symbol.payload) {
         .regular => |regular| {
src/link/MachO/Zld.zig
@@ -107,11 +107,6 @@ locals: std.ArrayListUnmanaged(*Symbol) = .{},
 imports: std.ArrayListUnmanaged(*Symbol) = .{},
 globals: std.StringArrayHashMapUnmanaged(*Symbol) = .{},
 
-/// Offset into __DATA,__common section.
-/// Set if the linker found tentative definitions in any of the objects.
-tentative_defs_offset: u64 = 0,
-has_tentative_defs: bool = false,
-
 threadlocal_offsets: std.ArrayListUnmanaged(TlvOffset) = .{}, // TODO merge with Symbol abstraction
 local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
 stubs: std.ArrayListUnmanaged(*Symbol) = .{},
@@ -145,8 +140,6 @@ pub const TextBlock = struct {
     relocs: ?[]*Relocation = null,
     size: u64,
     alignment: u32,
-    segment_id: u16 = 0,
-    section_id: u16 = 0,
     next: ?*TextBlock = null,
     prev: ?*TextBlock = null,
 
@@ -168,23 +161,21 @@ pub const TextBlock = struct {
 
     pub fn print_this(self: *const TextBlock, zld: *Zld) void {
         log.warn("TextBlock", .{});
-        log.warn("  | {}: '{s}'", .{ self.local_sym_index, zld.locals.items[self.local_sym_index].name });
+        log.warn("  | {}: {}", .{ self.local_sym_index, zld.locals.items[self.local_sym_index] });
         if (self.aliases) |aliases| {
             log.warn("  | Aliases:", .{});
             for (aliases) |index| {
-                log.warn("    | {}: '{s}'", .{ index, zld.locals.items[index].name });
+                log.warn("    | {}: {}", .{ index, zld.locals.items[index] });
             }
         }
         if (self.references) |references| {
             log.warn("  | References:", .{});
             for (references) |index| {
-                log.warn("    | {}: '{s}'", .{ index, zld.locals.items[index].name });
+                log.warn("    | {}: {}", .{ index, zld.locals.items[index] });
             }
         }
         log.warn("  | size = {}", .{self.size});
         log.warn("  | align = {}", .{self.alignment});
-        log.warn("  | segment_id = {}", .{self.segment_id});
-        log.warn("  | section_id = {}", .{self.section_id});
     }
 
     pub fn print(self: *const TextBlock, zld: *Zld) void {
@@ -300,7 +291,6 @@ pub fn link(self: *Zld, files: []const []const u8, output: Output, args: LinkArg
     // try self.allocateDataSegment();
     // self.allocateLinkeditSegment();
     // try self.allocateSymbols();
-    // try self.allocateTentativeSymbols();
     // try self.allocateProxyBindAddresses();
     // try self.flush();
 }
@@ -415,37 +405,6 @@ fn updateMetadata(self: *Zld) !void {
         }
     }
 
-    // Ensure we have __DATA,__common section if we have tentative definitions.
-    // Update size and alignment of __DATA,__common section.
-    if (self.has_tentative_defs) {
-        const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-        const common_section_index = self.common_section_index orelse ind: {
-            self.common_section_index = @intCast(u16, data_seg.sections.items.len);
-            try data_seg.addSection(self.allocator, "__common", .{
-                .flags = macho.S_ZEROFILL,
-            });
-            break :ind self.common_section_index.?;
-        };
-        const common_sect = &data_seg.sections.items[common_section_index];
-
-        var max_align: u16 = 0;
-        var added_size: u64 = 0;
-        for (self.globals.values()) |sym| {
-            if (sym.payload != .tentative) continue;
-            max_align = math.max(max_align, sym.payload.tentative.alignment);
-            added_size += sym.payload.tentative.size;
-        }
-
-        common_sect.@"align" = math.max(common_sect.@"align", max_align);
-
-        const alignment = try math.powi(u32, 2, common_sect.@"align");
-        const offset = mem.alignForwardGeneric(u64, common_sect.size, alignment);
-        const size = mem.alignForwardGeneric(u64, added_size, alignment);
-
-        common_sect.size = offset + size;
-        self.tentative_defs_offset = offset;
-    }
-
     tlv_align: {
         const has_tlv =
             self.tlv_section_index != null or
@@ -1182,48 +1141,6 @@ fn allocateSymbols(self: *Zld) !void {
     }
 }
 
-fn allocateTentativeSymbols(self: *Zld) !void {
-    if (!self.has_tentative_defs) return;
-
-    const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
-    const common_sect = &data_seg.sections.items[self.common_section_index.?];
-
-    const alignment = try math.powi(u32, 2, common_sect.@"align");
-    var base_address: u64 = common_sect.addr + self.tentative_defs_offset;
-
-    log.debug("base address for tentative definitions 0x{x}", .{base_address});
-
-    // TODO there might be a more generic way of doing this.
-    var section: u8 = 0;
-    for (self.load_commands.items) |cmd, cmd_id| {
-        if (cmd != .Segment) break;
-        if (cmd_id == self.data_segment_cmd_index.?) {
-            section += @intCast(u8, self.common_section_index.?) + 1;
-            break;
-        }
-        section += @intCast(u8, cmd.Segment.sections.items.len);
-    }
-
-    // Convert tentative definitions into regular symbols.
-    for (self.globals.values()) |sym| {
-        if (sym.payload != .tentative) continue;
-
-        const address = mem.alignForwardGeneric(u64, base_address + sym.payload.tentative.size, alignment);
-
-        log.debug("tentative definition '{s}' allocated from 0x{x} to 0x{x}", .{ sym.name, base_address, address });
-
-        sym.payload = .{
-            .regular = .{
-                .linkage = .global,
-                .address = base_address,
-                .section = section,
-                .weak_ref = false,
-            },
-        };
-        base_address = address;
-    }
-}
-
 fn allocateProxyBindAddresses(self: *Zld) !void {
     for (self.objects.items) |object| {
         for (object.sections.items) |sect| {
@@ -1648,15 +1565,56 @@ fn resolveSymbols(self: *Zld) !void {
     }
 
     // Put any globally defined regular symbol as local.
-    // Mark if we need to allocate zerofill section for tentative definitions
+    // Convert any tentative definition into a regular symbol and allocate
+    // text blocks for each tentative defintion.
     for (self.globals.values()) |symbol| {
         switch (symbol.payload) {
             .regular => |*reg| {
                 reg.local_sym_index = @intCast(u32, self.locals.items.len);
                 try self.locals.append(self.allocator, symbol);
             },
-            .tentative => {
-                self.has_tentative_defs = true;
+            .tentative => |tent| {
+                if (self.common_section_index == null) {
+                    const data_seg = &self.load_commands.items[self.data_segment_cmd_index.?].Segment;
+                    self.common_section_index = @intCast(u16, data_seg.sections.items.len);
+                    try data_seg.addSection(self.allocator, "__common", .{
+                        .flags = macho.S_ZEROFILL,
+                    });
+                }
+
+                const size = tent.size;
+                const code = try self.allocator.alloc(u8, size);
+                mem.set(u8, code, 0);
+                const alignment = tent.alignment;
+                const local_sym_index = @intCast(u32, self.locals.items.len);
+
+                symbol.payload = .{
+                    .regular = .{
+                        .linkage = .global,
+                        .segment_id = self.data_segment_cmd_index.?,
+                        .section_id = self.common_section_index.?,
+                        .local_sym_index = local_sym_index,
+                    },
+                };
+                try self.locals.append(self.allocator, symbol);
+
+                const block = try self.allocator.create(TextBlock);
+                errdefer self.allocator.destroy(block);
+
+                block.* = .{
+                    .local_sym_index = local_sym_index,
+                    .code = code,
+                    .size = size,
+                    .alignment = alignment,
+                };
+
+                // TODO I'm not 100% sure about this yet, but I believe we should keep a separate list of
+                // TextBlocks per segment.
+                if (self.last_text_block) |last| {
+                    last.next = block;
+                    block.prev = last;
+                }
+                self.last_text_block = block;
             },
             else => {},
         }
@@ -1733,13 +1691,7 @@ fn resolveSymbols(self: *Zld) !void {
 
 fn parseTextBlocks(self: *Zld) !void {
     for (self.objects.items) |object| {
-        if (try object.parseTextBlocks(self)) |block| {
-            if (self.last_text_block) |last| {
-                last.next = block;
-                block.prev = last;
-            }
-            self.last_text_block = block;
-        }
+        try object.parseTextBlocks(self);
     }
 
     if (self.last_text_block) |block| {