Commit c575e3daa4

Jakub Konka <kubkon@jakubkonka.com>
2024-07-25 16:47:43
elf: resolve COMDATs in more parallel-friendly way
1 parent fa09276
src/link/Elf/Atom.zig
@@ -346,7 +346,10 @@ pub fn writeRelocs(self: Atom, elf_file: *Elf, out_relocs: *std.ArrayList(elf.El
                 r_sym = elf_file.sectionSymbolOutputSymtabIndex(msub.mergeSection(elf_file).output_section_index);
             } else {
                 r_addend += @intCast(target.address(.{}, elf_file));
-                r_sym = elf_file.sectionSymbolOutputSymtabIndex(target.outputShndx().?);
+                r_sym = if (target.outputShndx()) |osec|
+                    elf_file.sectionSymbolOutputSymtabIndex(osec)
+                else
+                    0;
             },
             else => {
                 r_sym = target.outputSymtabIndex(elf_file) orelse 0;
src/link/Elf/file.zig
@@ -137,6 +137,13 @@ pub const File = union(enum) {
         };
     }
 
+    pub fn comdatGroup(file: File, ind: Elf.ComdatGroup.Index) *Elf.ComdatGroup {
+        return switch (file) {
+            .linker_defined, .shared_object, .zig_object => unreachable,
+            .object => |x| x.comdatGroup(ind),
+        };
+    }
+
     pub fn symbol(file: File, ind: Symbol.Index) Symbol.Index {
         return switch (file) {
             .zig_object => |x| x.symbol(ind),
src/link/Elf/Object.zig
@@ -216,27 +216,41 @@ fn initAtoms(self: *Object, allocator: Allocator, handle: std.fs.File, elf_file:
                 const shndx = @as(u32, @intCast(i));
                 const group_raw_data = try self.preadShdrContentsAlloc(allocator, handle, shndx);
                 defer allocator.free(group_raw_data);
-                const group_nmembers = @divExact(group_raw_data.len, @sizeOf(u32));
+                const group_nmembers = math.divExact(usize, group_raw_data.len, @sizeOf(u32)) catch {
+                    try elf_file.reportParseError2(
+                        self.index,
+                        "corrupt section group: not evenly divisible ",
+                        .{},
+                    );
+                    return error.MalformedObject;
+                };
+                if (group_nmembers == 0) {
+                    try elf_file.reportParseError2(
+                        self.index,
+                        "corrupt section group: empty section",
+                        .{},
+                    );
+                    return error.MalformedObject;
+                }
                 const group_members = @as([*]align(1) const u32, @ptrCast(group_raw_data.ptr))[0..group_nmembers];
 
                 if (group_members[0] != elf.GRP_COMDAT) {
-                    // TODO convert into an error
-                    log.debug("{}: unknown SHT_GROUP format", .{self.fmtPath()});
-                    continue;
+                    try elf_file.reportParseError2(
+                        self.index,
+                        "corrupt section group: unknown SHT_GROUP format",
+                        .{},
+                    );
+                    return error.MalformedObject;
                 }
 
                 const group_start = @as(u32, @intCast(self.comdat_group_data.items.len));
                 try self.comdat_group_data.appendUnalignedSlice(allocator, group_members[1..]);
 
-                const gop = try elf_file.getOrCreateComdatGroupOwner(.{
-                    .off = group_signature,
-                    .file_index = self.index,
-                });
                 const comdat_group_index = try self.addComdatGroup(allocator);
                 const comdat_group = self.comdatGroup(comdat_group_index);
                 comdat_group.* = .{
-                    .owner = gop.index,
-                    .file = self.index,
+                    .signature_off = group_signature,
+                    .file_index = self.index,
                     .shndx = shndx,
                     .members_start = group_start,
                     .members_len = @intCast(group_nmembers - 1),
@@ -912,6 +926,37 @@ pub fn convertCommonSymbols(self: *Object, elf_file: *Elf) !void {
     }
 }
 
+pub fn resolveComdatGroups(self: *Object, elf_file: *Elf, table: anytype) !void {
+    for (self.comdat_groups.items, 0..) |*cg, cgi| {
+        const signature = cg.signature(elf_file);
+        const gop = try table.getOrPut(signature);
+        if (!gop.found_existing) {
+            gop.value_ptr.* = .{ .index = @intCast(cgi), .file = self.index };
+            continue;
+        }
+        const current = elf_file.comdatGroup(gop.value_ptr.*);
+        cg.alive = false;
+        if (self.index < current.file_index) {
+            current.alive = false;
+            cg.alive = true;
+            gop.value_ptr.* = .{ .index = @intCast(cgi), .file = self.index };
+        }
+    }
+}
+
+pub fn markComdatGroupsDead(self: *Object, elf_file: *Elf) void {
+    for (self.comdat_groups.items) |cg| {
+        if (cg.alive) continue;
+        for (cg.comdatGroupMembers(elf_file)) |shndx| {
+            const atom_index = self.atoms_indexes.items[shndx];
+            if (self.atom(atom_index)) |atom_ptr| {
+                atom_ptr.flags.alive = false;
+                atom_ptr.markFdesDead(elf_file);
+            }
+        }
+    }
+}
+
 pub fn initOutputSections(self: *Object, elf_file: *Elf) !void {
     for (self.atoms_indexes.items) |atom_index| {
         const atom_ptr = self.atom(atom_index) orelse continue;
@@ -1428,9 +1473,9 @@ fn formatComdatGroups(
     const elf_file = ctx.elf_file;
     try writer.writeAll("  COMDAT groups\n");
     for (object.comdat_groups.items, 0..) |cg, cg_index| {
-        const cg_owner = elf_file.comdatGroupOwner(cg.owner);
-        if (cg_owner.file != object.index) continue;
-        try writer.print("    COMDAT({d})\n", .{cg_index});
+        try writer.print("    COMDAT({d})", .{cg_index});
+        if (!cg.alive) try writer.writeAll(" : [*]");
+        try writer.writeByte('\n');
         const cg_members = cg.comdatGroupMembers(elf_file);
         for (cg_members) |shndx| {
             const atom_index = object.atoms_indexes.items[shndx];
src/link/Elf/relocatable.zig
@@ -190,7 +190,7 @@ pub fn flushObject(elf_file: *Elf, comp: *Compilation, module_obj_path: ?[]const
     // Now, we are ready to resolve the symbols across all input files.
     // We will first resolve the files in the ZigObject, next in the parsed
     // input Object files.
-    elf_file.resolveSymbols();
+    try elf_file.resolveSymbols();
     elf_file.markEhFrameAtomsDead();
     try elf_file.resolveMergeSections();
     try elf_file.addCommentString();
@@ -318,11 +318,8 @@ fn initComdatGroups(elf_file: *Elf) !void {
 
     for (elf_file.objects.items) |index| {
         const object = elf_file.file(index).?.object;
-
         for (object.comdat_groups.items, 0..) |cg, cg_index| {
-            const cg_owner = elf_file.comdatGroupOwner(cg.owner);
-            if (cg_owner.file != index) continue;
-
+            if (!cg.alive) continue;
             const cg_sec = try elf_file.comdat_group_sections.addOne(gpa);
             cg_sec.* = .{
                 .shndx = try elf_file.addSection(.{
src/link/Elf/synthetic_sections.zig
@@ -1672,12 +1672,6 @@ pub const ComdatGroupSection = struct {
     shndx: u32,
     cg_ref: Elf.Ref,
 
-    fn ownerFile(cgs: ComdatGroupSection, elf_file: *Elf) ?File {
-        const cg = cgs.comdatGroup(elf_file);
-        const cg_owner = elf_file.comdatGroupOwner(cg.owner);
-        return elf_file.file(cg_owner.file);
-    }
-
     fn comdatGroup(cgs: ComdatGroupSection, elf_file: *Elf) *Elf.ComdatGroup {
         const cg_file = elf_file.file(cgs.cg_ref.file).?;
         return cg_file.object.comdatGroup(cgs.cg_ref.index);
@@ -1685,7 +1679,7 @@ pub const ComdatGroupSection = struct {
 
     pub fn symbol(cgs: ComdatGroupSection, elf_file: *Elf) Symbol.Index {
         const cg = cgs.comdatGroup(elf_file);
-        const object = cgs.ownerFile(elf_file).?.object;
+        const object = cg.file(elf_file).object;
         const shdr = object.shdrs.items[cg.shndx];
         return object.symbols.items[shdr.sh_info];
     }
@@ -1698,7 +1692,7 @@ pub const ComdatGroupSection = struct {
 
     pub fn write(cgs: ComdatGroupSection, elf_file: *Elf, writer: anytype) !void {
         const cg = cgs.comdatGroup(elf_file);
-        const object = cgs.ownerFile(elf_file).?.object;
+        const object = cg.file(elf_file).object;
         const members = cg.comdatGroupMembers(elf_file);
         try writer.writeInt(u32, elf.GRP_COMDAT, .little);
         for (members) |shndx| {
src/link/Elf.zig
@@ -215,11 +215,8 @@ merge_subsections: std.ArrayListUnmanaged(MergeSubsection) = .{},
 /// Table of last atom index in a section and matching atom free list if any.
 last_atom_and_free_list_table: LastAtomAndFreeListTable = .{},
 
-comdat_groups_owners: std.ArrayListUnmanaged(ComdatGroupOwner) = .{},
-comdat_groups_table: std.ArrayHashMapUnmanaged(ComdatGroupKey, ComdatGroupOwner.Index, ComdatGroupContext, false) = .{},
-
 /// Global string table used to provide quick access to global symbol resolvers
-/// such as `resolver` and `comdat_groups_table`.
+/// such as `resolver`.
 strings: StringTable = .{},
 
 first_eflags: ?elf.Elf64_Word = null,
@@ -506,8 +503,6 @@ pub fn deinit(self: *Elf) void {
     }
     self.last_atom_and_free_list_table.deinit(gpa);
 
-    self.comdat_groups_owners.deinit(gpa);
-    self.comdat_groups_table.deinit(gpa);
     self.strings.deinit(gpa);
 
     self.got.deinit(gpa);
@@ -1305,7 +1300,7 @@ pub fn flushModule(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_nod
     // input Object files.
     // Any qualifing unresolved symbol will be upgraded to an absolute, weak
     // symbol for potential resolution at load-time.
-    self.resolveSymbols();
+    try self.resolveSymbols();
     self.markEhFrameAtomsDead();
     try self.resolveMergeSections();
 
@@ -1955,7 +1950,7 @@ fn accessLibPath(
 /// 4. Reset state of all resolved globals since we will redo this bit on the pruned set.
 /// 5. Remove references to dead objects/shared objects
 /// 6. Re-run symbol resolution on pruned objects and shared objects sets.
-pub fn resolveSymbols(self: *Elf) void {
+pub fn resolveSymbols(self: *Elf) !void {
     // Resolve symbols in the ZigObject. For now, we assume that it's always live.
     if (self.zigObjectPtr()) |zig_object| zig_object.asFile().resolveSymbols(self);
     // Resolve symbols on the set of all objects and shared objects (even if some are unneeded).
@@ -1986,32 +1981,17 @@ pub fn resolveSymbols(self: *Elf) void {
         } else i += 1;
     }
 
-    // Dedup comdat groups.
-    for (self.objects.items) |index| {
-        const object = self.file(index).?.object;
-        for (object.comdat_groups.items) |cg| {
-            const cg_owner = self.comdatGroupOwner(cg.owner);
-            const owner_file_index = if (self.file(cg_owner.file)) |file_ptr|
-                file_ptr.object.index
-            else
-                std.math.maxInt(File.Index);
-            cg_owner.file = @min(owner_file_index, index);
+    {
+        // Dedup comdat groups.
+        var table = std.StringHashMap(Ref).init(self.base.comp.gpa);
+        defer table.deinit();
+
+        for (self.objects.items) |index| {
+            try self.file(index).?.object.resolveComdatGroups(self, &table);
         }
-    }
 
-    for (self.objects.items) |index| {
-        const object = self.file(index).?.object;
-        for (object.comdat_groups.items) |cg| {
-            const cg_owner = self.comdatGroupOwner(cg.owner);
-            if (cg_owner.file != index) {
-                for (cg.comdatGroupMembers(self)) |shndx| {
-                    const atom_index = object.atoms_indexes.items[shndx];
-                    if (object.atom(atom_index)) |atom_ptr| {
-                        atom_ptr.flags.alive = false;
-                        atom_ptr.markFdesDead(self);
-                    }
-                }
-            }
+        for (self.objects.items) |index| {
+            self.file(index).?.object.markComdatGroupsDead(self);
         }
     }
 
@@ -5632,6 +5612,10 @@ pub fn atom(self: *Elf, ref: Ref) ?*Atom {
     return file_ptr.atom(ref.index);
 }
 
+pub fn comdatGroup(self: *Elf, ref: Ref) *ComdatGroup {
+    return self.file(ref.file).?.comdatGroup(ref.index);
+}
+
 /// Returns pointer-to-symbol described at sym_index.
 pub fn symbol(self: *Elf, sym_index: Symbol.Index) *Symbol {
     return &self.symbols.items[sym_index];
@@ -5737,31 +5721,6 @@ pub fn zigObjectPtr(self: *Elf) ?*ZigObject {
     return self.file(index).?.zig_object;
 }
 
-const GetOrCreateComdatGroupOwnerResult = struct {
-    found_existing: bool,
-    index: ComdatGroupOwner.Index,
-};
-
-pub fn getOrCreateComdatGroupOwner(self: *Elf, key: ComdatGroupKey) !GetOrCreateComdatGroupOwnerResult {
-    const gpa = self.base.comp.gpa;
-    const gop = try self.comdat_groups_table.getOrPutContext(gpa, key, .{ .elf_file = self });
-    if (!gop.found_existing) {
-        const index: ComdatGroupOwner.Index = @intCast(self.comdat_groups_owners.items.len);
-        const owner = try self.comdat_groups_owners.addOne(gpa);
-        owner.* = .{};
-        gop.value_ptr.* = index;
-    }
-    return .{
-        .found_existing = gop.found_existing,
-        .index = gop.value_ptr.*,
-    };
-}
-
-pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupOwner {
-    assert(index < self.comdat_groups_owners.items.len);
-    return &self.comdat_groups_owners.items[index];
-}
-
 pub fn addMergeSubsection(self: *Elf) !MergeSubsection.Index {
     const index: MergeSubsection.Index = @intCast(self.merge_subsections.items.len);
     const msec = try self.merge_subsections.addOne(self.base.comp.gpa);
@@ -6243,48 +6202,24 @@ const default_entry_addr = 0x8000000;
 
 pub const base_tag: link.File.Tag = .elf;
 
-const ComdatGroupKey = struct {
-    /// String table offset.
-    off: u32,
-
-    /// File index.
+pub const ComdatGroup = struct {
+    signature_off: u32,
     file_index: File.Index,
+    shndx: u32,
+    members_start: u32,
+    members_len: u32,
+    alive: bool = true,
 
-    pub fn get(key: ComdatGroupKey, elf_file: *Elf) [:0]const u8 {
-        const file_ptr = elf_file.file(key.file_index).?;
-        return file_ptr.getString(key.off);
-    }
-};
-
-const ComdatGroupContext = struct {
-    elf_file: *Elf,
-
-    pub fn eql(ctx: ComdatGroupContext, a: ComdatGroupKey, b: ComdatGroupKey, b_index: usize) bool {
-        _ = b_index;
-        const elf_file = ctx.elf_file;
-        return mem.eql(u8, a.get(elf_file), b.get(elf_file));
+    pub fn file(cg: ComdatGroup, elf_file: *Elf) File {
+        return elf_file.file(cg.file_index).?;
     }
 
-    pub fn hash(ctx: ComdatGroupContext, a: ComdatGroupKey) u32 {
-        return std.array_hash_map.hashString(a.get(ctx.elf_file));
+    pub fn signature(cg: ComdatGroup, elf_file: *Elf) [:0]const u8 {
+        return cg.file(elf_file).object.getString(cg.signature_off);
     }
-};
-
-const ComdatGroupOwner = struct {
-    file: File.Index = 0,
-
-    const Index = u32;
-};
-
-pub const ComdatGroup = struct {
-    owner: ComdatGroupOwner.Index,
-    file: File.Index,
-    shndx: u32,
-    members_start: u32,
-    members_len: u32,
 
     pub fn comdatGroupMembers(cg: ComdatGroup, elf_file: *Elf) []const u32 {
-        const object = elf_file.file(cg.file).?.object;
+        const object = cg.file(elf_file).object;
         return object.comdat_group_data.items[cg.members_start..][0..cg.members_len];
     }
 
test/link/elf.zig
@@ -404,9 +404,7 @@ fn testComdatElimination(b: *Build, opts: Options) *Step {
     main_o.linkLibCpp();
 
     {
-        const exe = addExecutable(b, opts, .{
-            .name = "main1",
-        });
+        const exe = addExecutable(b, opts, .{ .name = "main1" });
         exe.addObject(a_o);
         exe.addObject(main_o);
         exe.linkLibCpp();
@@ -431,9 +429,7 @@ fn testComdatElimination(b: *Build, opts: Options) *Step {
     }
 
     {
-        const exe = addExecutable(b, opts, .{
-            .name = "main2",
-        });
+        const exe = addExecutable(b, opts, .{ .name = "main2" });
         exe.addObject(main_o);
         exe.addObject(a_o);
         exe.linkLibCpp();
@@ -457,6 +453,42 @@ fn testComdatElimination(b: *Build, opts: Options) *Step {
         test_step.dependOn(&check.step);
     }
 
+    {
+        const c_o = addObject(b, opts, .{ .name = "c" });
+        c_o.addObject(main_o);
+        c_o.addObject(a_o);
+
+        const exe = addExecutable(b, opts, .{ .name = "main3" });
+        exe.addObject(c_o);
+        exe.linkLibCpp();
+
+        const run = addRunArtifact(exe);
+        run.expectStdOutEqual(
+            \\calling foo in main
+            \\calling foo in main
+            \\
+        );
+        test_step.dependOn(&run.step);
+    }
+
+    {
+        const d_o = addObject(b, opts, .{ .name = "d" });
+        d_o.addObject(a_o);
+        d_o.addObject(main_o);
+
+        const exe = addExecutable(b, opts, .{ .name = "main4" });
+        exe.addObject(d_o);
+        exe.linkLibCpp();
+
+        const run = addRunArtifact(exe);
+        run.expectStdOutEqual(
+            \\calling foo in a
+            \\calling foo in a
+            \\
+        );
+        test_step.dependOn(&run.step);
+    }
+
     return test_step;
 }