Commit 1a6d12ea92

Jakub Konka <kubkon@jakubkonka.com>
2023-09-12 23:27:11
elf: clean up and unify symbol ref handling in relocs
Also, this lets us re-enable proper undefined symbols tracking.
1 parent 9719fa7
src/link/Elf/Atom.zig
@@ -312,7 +312,7 @@ pub fn freeRelocs(self: Atom, elf_file: *Elf) void {
     zig_module.relocs.items[self.relocs_section_index].clearRetainingCapacity();
 }
 
-pub fn scanRelocs(self: Atom, elf_file: *Elf) !void {
+pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void {
     const file_ptr = elf_file.file(self.file_index).?;
     const rels = self.relocs(elf_file);
     var i: usize = 0;
@@ -322,11 +322,25 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf) !void {
         if (rel.r_type() == elf.R_X86_64_NONE) continue;
 
         const symbol = switch (file_ptr) {
-            .zig_module => elf_file.symbol(rel.r_sym()),
+            .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())),
             .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]),
             else => unreachable,
         };
 
+        // Check for violation of One Definition Rule for COMDATs.
+        if (symbol.file(elf_file) == null) {
+            // TODO convert into an error
+            log.debug("{}: {s}: {s} refers to a discarded COMDAT section", .{
+                file_ptr.fmtPath(),
+                self.name(elf_file),
+                symbol.name(elf_file),
+            });
+            continue;
+        }
+
+        // Report an undefined symbol.
+        try self.reportUndefined(elf_file, symbol, rel, undefs);
+
         // While traversing relocations, mark symbols that require special handling such as
         // pointer indirection via GOT, or a stub trampoline via PLT.
         switch (rel.r_type()) {
@@ -363,6 +377,28 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf) !void {
     }
 }
 
+// This function will report any undefined non-weak symbols that are not imports.
+fn reportUndefined(self: Atom, elf_file: *Elf, sym: *const Symbol, rel: elf.Elf64_Rela, undefs: anytype) !void {
+    const rel_esym = switch (elf_file.file(self.file_index).?) {
+        .zig_module => |x| x.elfSym(rel.r_sym()).*,
+        .object => |x| x.symtab[rel.r_sym()],
+        else => unreachable,
+    };
+    const esym = sym.elfSym(elf_file);
+    if (rel_esym.st_shndx == elf.SHN_UNDEF and
+        rel_esym.st_bind() == elf.STB_GLOBAL and
+        sym.esym_index > 0 and
+        !sym.flags.import and
+        esym.st_shndx == elf.SHN_UNDEF)
+    {
+        const gop = try undefs.getOrPut(sym.index);
+        if (!gop.found_existing) {
+            gop.value_ptr.* = std.ArrayList(Atom.Index).init(elf_file.base.allocator);
+        }
+        try gop.value_ptr.append(self.atom_index);
+    }
+}
+
 /// TODO mark relocs dirty
 pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void {
     relocs_log.debug("0x{x}: {s}", .{ self.value, self.name(elf_file) });
@@ -376,7 +412,7 @@ pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void {
         if (r_type == elf.R_X86_64_NONE) continue;
 
         const target = switch (file_ptr) {
-            .zig_module => elf_file.symbol(rel.r_sym()),
+            .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())),
             .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]),
             else => unreachable,
         };
@@ -564,3 +600,4 @@ const Allocator = std.mem.Allocator;
 const Atom = @This();
 const Elf = @import("../Elf.zig");
 const File = @import("file.zig").File;
+const Symbol = @import("Symbol.zig");
src/link/Elf/file.zig
@@ -23,7 +23,7 @@ pub const File = union(enum) {
         _ = unused_fmt_string;
         _ = options;
         switch (file) {
-            .zig_module => try writer.writeAll("(zig module)"),
+            .zig_module => |x| try writer.print("{s}", .{x.path}),
             .linker_defined => try writer.writeAll("(linker defined)"),
             .object => |x| try writer.print("{}", .{x.fmtPath()}),
             // .shared_object => |x| try writer.writeAll(x.path),
src/link/Elf/Object.zig
@@ -392,14 +392,14 @@ fn filterRelocs(
     return .{ .start = f_start, .len = f_len };
 }
 
-pub fn scanRelocs(self: *Object, elf_file: *Elf) !void {
+pub fn scanRelocs(self: *Object, elf_file: *Elf, undefs: anytype) !void {
     for (self.atoms.items) |atom_index| {
         const atom = elf_file.atom(atom_index) orelse continue;
         if (!atom.alive) continue;
         const shdr = atom.inputShdr(elf_file);
         if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue;
         if (shdr.sh_type == elf.SHT_NOBITS) continue;
-        try atom.scanRelocs(elf_file);
+        try atom.scanRelocs(elf_file, undefs);
     }
 
     for (self.cies.items) |cie| {
src/link/Elf/Symbol.zig
@@ -43,7 +43,7 @@ pub fn isLocal(symbol: Symbol) bool {
     return !(symbol.flags.import or symbol.flags.@"export");
 }
 
-pub inline fn isIFunc(symbol: Symbol, elf_file: *Elf) bool {
+pub fn isIFunc(symbol: Symbol, elf_file: *Elf) bool {
     return symbol.type(elf_file) == elf.STT_GNU_IFUNC;
 }
 
@@ -69,12 +69,7 @@ pub fn file(symbol: Symbol, elf_file: *Elf) ?File {
 pub fn elfSym(symbol: Symbol, elf_file: *Elf) elf.Elf64_Sym {
     const file_ptr = symbol.file(elf_file).?;
     switch (file_ptr) {
-        .zig_module => |x| {
-            const is_global = symbol.esym_index & 0x10000000 != 0;
-            const esym_index = symbol.esym_index & 0x0fffffff;
-            if (is_global) return x.global_esyms.items[esym_index];
-            return x.local_esyms.items[esym_index];
-        },
+        .zig_module => |x| return x.elfSym(symbol.esym_index).*,
         .linker_defined => |x| return x.symtab.items[symbol.esym_index],
         .object => |x| return x.symtab[symbol.esym_index],
     }
src/link/Elf/ZigModule.zig
@@ -1,3 +1,8 @@
+//! ZigModule encapsulates the state of the incrementally compiled Zig module.
+//! It stores the associated input local and global symbols, allocated atoms,
+//! and any relocations that may have been emitted.
+//! Think about this as fake in-memory Object file for the Zig module.
+
 /// Path is owned by Module and lives as long as *Module.
 path: []const u8,
 index: File.Index,
@@ -41,7 +46,7 @@ pub fn addGlobalEsym(self: *ZigModule, allocator: Allocator) !Symbol.Index {
     const esym = self.global_esyms.addOneAssumeCapacity();
     esym.* = Elf.null_sym;
     esym.st_info = elf.STB_GLOBAL << 4;
-    return index;
+    return index | 0x10000000;
 }
 
 pub fn addAtom(self: *ZigModule, output_section_index: u16, elf_file: *Elf) !Symbol.Index {
@@ -135,11 +140,11 @@ pub fn claimUnresolved(self: *ZigModule, elf_file: *Elf) void {
     }
 }
 
-pub fn scanRelocs(self: *ZigModule, elf_file: *Elf) !void {
+pub fn scanRelocs(self: *ZigModule, elf_file: *Elf, undefs: anytype) !void {
     for (self.atoms.keys()) |atom_index| {
         const atom = elf_file.atom(atom_index) orelse continue;
         if (!atom.alive) continue;
-        try atom.scanRelocs(elf_file);
+        try atom.scanRelocs(elf_file, undefs);
     }
 }
 
@@ -197,6 +202,20 @@ pub fn writeSymtab(self: *ZigModule, elf_file: *Elf, ctx: anytype) void {
     }
 }
 
+pub fn symbol(self: *ZigModule, index: Symbol.Index) Symbol.Index {
+    const is_global = index & 0x10000000 != 0;
+    const actual_index = index & 0x0fffffff;
+    if (is_global) return self.global_symbols.items[actual_index];
+    return self.local_symbols.items[actual_index];
+}
+
+pub fn elfSym(self: *ZigModule, index: Symbol.Index) *elf.Elf64_Sym {
+    const is_global = index & 0x10000000 != 0;
+    const actual_index = index & 0x0fffffff;
+    if (is_global) return &self.global_esyms.items[actual_index];
+    return &self.local_esyms.items[actual_index];
+}
+
 pub fn locals(self: *ZigModule) []const Symbol.Index {
     return self.local_symbols.items;
 }
src/link/Elf.zig
@@ -6,6 +6,9 @@ ptr_width: PtrWidth,
 /// If this is not null, an object file is created by LLVM and linked with LLD afterwards.
 llvm_object: ?*LlvmObject = null,
 
+/// A list of all input files.
+/// Index of each input file also encodes the priority or precedence of one input file
+/// over another.
 files: std.MultiArrayList(File.Entry) = .{},
 zig_module_index: ?File.Index = null,
 linker_defined_index: ?File.Index = null,
@@ -47,6 +50,7 @@ shstrtab: StringTable(.strtab) = .{},
 /// .strtab buffer
 strtab: StringTable(.strtab) = .{},
 
+/// Representation of the GOT table as committed to the file.
 got: GotSection = .{},
 
 text_section_index: ?u16 = null,
@@ -86,10 +90,10 @@ rela_iplt_start_index: ?Symbol.Index = null,
 rela_iplt_end_index: ?Symbol.Index = null,
 start_stop_indexes: std.ArrayListUnmanaged(u32) = .{},
 
+/// An array of symbols parsed across all input files.
 symbols: std.ArrayListUnmanaged(Symbol) = .{},
 symbols_extra: std.ArrayListUnmanaged(u32) = .{},
 resolver: std.AutoArrayHashMapUnmanaged(u32, Symbol.Index) = .{},
-unresolved: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{},
 symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{},
 
 phdr_table_dirty: bool = false,
@@ -271,7 +275,6 @@ pub fn deinit(self: *Elf) void {
     self.symbols_free_list.deinit(gpa);
     self.got.deinit(gpa);
     self.resolver.deinit(gpa);
-    self.unresolved.deinit(gpa);
     self.start_stop_indexes.deinit(gpa);
 
     {
@@ -316,7 +319,7 @@ pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: link.
     const parent_atom = self.symbol(reloc_info.parent_atom_index).atom(self).?;
     try parent_atom.addReloc(self, .{
         .r_offset = reloc_info.offset,
-        .r_info = (@as(u64, @intCast(this_sym_index)) << 32) | elf.R_X86_64_64,
+        .r_info = (@as(u64, @intCast(this_sym.esym_index)) << 32) | elf.R_X86_64_64,
         .r_addend = reloc_info.addend,
     });
 
@@ -997,12 +1000,16 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     };
     _ = compiler_rt_path;
 
-    // Parse input files
+    // Here we will parse input positional and library files (if referenced).
+    // This will roughly match in any linker backend we support.
     var positionals = std.ArrayList(Compilation.LinkObject).init(gpa);
     defer positionals.deinit();
     try positionals.ensureUnusedCapacity(self.base.options.objects.len);
     positionals.appendSliceAssumeCapacity(self.base.options.objects);
 
+    // This is a set of object files emitted by clang in a single `build-exe` invocation.
+    // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up
+    // in this set.
     for (comp.c_object_table.keys()) |key| {
         try positionals.append(.{ .path = key.status.success.object_path });
     }
@@ -1016,6 +1023,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
             try self.handleAndReportParseError(obj.path, err, &parse_ctx);
     }
 
+    // Handle any lazy symbols that were emitted by incremental compilation.
     if (self.lazy_syms.getPtr(.none)) |metadata| {
         // Most lazy symbols can be updated on first use, but
         // anyerror needs to wait for everything to be flushed.
@@ -1046,27 +1054,34 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
         try dw.flushModule(module);
     }
 
+    // If we haven't already, create a linker-generated input file comprising of
+    // linker-defined synthetic symbols only such as `_DYNAMIC`, etc.
     if (self.linker_defined_index == null) {
         const index = @as(File.Index, @intCast(try self.files.addOne(gpa)));
         self.files.set(index, .{ .linker_defined = .{ .index = index } });
         self.linker_defined_index = index;
     }
-
-    // Symbol resolution happens here
     try self.addLinkerDefinedSymbols();
+
+    // Now, we are ready to resolve the symbols across all input files.
+    // We will first resolve the files in the ZigModule, next in the parsed
+    // input Object files.
+    // Any qualifing unresolved symbol will be upgraded to an absolute, weak
+    // symbol for potential resolution at load-time.
     self.resolveSymbols();
     self.markImportsExports();
     self.claimUnresolved();
 
-    // Scan and create missing synthetic entries such as GOT indirection
+    // Scan and create missing synthetic entries such as GOT indirection.
     try self.scanRelocs();
 
-    // Allocate atoms parsed from input object files
+    // Allocate atoms parsed from input object files, followed by allocating
+    // linker-defined synthetic symbols.
     try self.allocateObjects();
     self.allocateLinkerDefinedSymbols();
 
     // Beyond this point, everything has been allocated a virtual address and we can resolve
-    // the relocations.
+    // the relocations, and commit objects to file.
     if (self.zig_module_index) |index| {
         for (self.file(index).?.zig_module.atoms.keys()) |atom_index| {
             const atom_ptr = self.atom(atom_index).?;
@@ -1083,9 +1098,12 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     }
     try self.writeObjects();
 
+    // Generate and emit the symbol table.
     try self.updateSymtabSize();
     try self.writeSymtab();
 
+    // Dump the state for easy debugging.
+    // State can be dumped via `--debug-log link_state`.
     if (build_options.enable_logging) {
         state_log.debug("{}", .{self.dumpState()});
     }
@@ -1393,17 +1411,32 @@ fn claimUnresolved(self: *Elf) void {
     }
 }
 
+/// In scanRelocs we will go over all live atoms and scan their relocs.
+/// This will help us work out what synthetics to emit, GOT indirection, etc.
+/// This is also the point where we will report undefined symbols for any
+/// alloc sections.
 fn scanRelocs(self: *Elf) !void {
+    const gpa = self.base.allocator;
+
+    var undefs = std.AutoHashMap(Symbol.Index, std.ArrayList(Atom.Index)).init(gpa);
+    defer {
+        var it = undefs.iterator();
+        while (it.next()) |entry| {
+            entry.value_ptr.deinit();
+        }
+        undefs.deinit();
+    }
+
     if (self.zig_module_index) |index| {
         const zig_module = self.file(index).?.zig_module;
-        try zig_module.scanRelocs(self);
+        try zig_module.scanRelocs(self, &undefs);
     }
     for (self.objects.items) |index| {
         const object = self.file(index).?.object;
-        try object.scanRelocs(self);
+        try object.scanRelocs(self, &undefs);
     }
 
-    // try self.reportUndefined();
+    try self.reportUndefined(&undefs);
 
     for (self.symbols.items) |*sym| {
         if (sym.flags.needs_got) {
@@ -1433,8 +1466,10 @@ fn allocateObjects(self: *Elf) !void {
 
         for (object.globals()) |global_index| {
             const global = self.symbol(global_index);
+            const atom_ptr = global.atom(self) orelse continue;
+            if (!atom_ptr.alive) continue;
             if (global.file_index == index) {
-                global.value = global.atom(self).?.value;
+                global.value = atom_ptr.value;
             }
         }
     }
@@ -2829,21 +2864,23 @@ pub fn updateDeclExports(
         };
         const stt_bits: u8 = @as(u4, @truncate(decl_esym.st_info));
 
+        const name_off = try self.strtab.insert(gpa, exp_name);
         const sym_index = if (decl_metadata.@"export"(self, exp_name)) |exp_index| exp_index.* else blk: {
             const sym_index = try zig_module.addGlobalEsym(gpa);
-            _ = try zig_module.global_symbols.addOne(gpa);
+            const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, name_off);
+            const esym = zig_module.elfSym(sym_index);
+            esym.st_name = name_off;
+            lookup_gop.value_ptr.* = sym_index;
             try decl_metadata.exports.append(gpa, sym_index);
+            const gop = try self.getOrPutGlobal(name_off);
+            try zig_module.global_symbols.append(gpa, gop.index);
             break :blk sym_index;
         };
-        const name_off = try self.strtab.insert(gpa, exp_name);
-        const esym = &zig_module.global_esyms.items[sym_index];
+        const esym = &zig_module.global_esyms.items[sym_index & 0x0fffffff];
         esym.st_value = decl_sym.value;
         esym.st_shndx = decl_sym.atom_index;
         esym.st_info = (stb_bits << 4) | stt_bits;
         esym.st_name = name_off;
-
-        const gop = try self.getOrPutGlobal(name_off);
-        zig_module.global_symbols.items[sym_index] = gop.index;
     }
 }
 
@@ -3636,16 +3673,17 @@ pub fn getGlobalSymbol(self: *Elf, name: []const u8, lib_name: ?[]const u8) !u32
     _ = lib_name;
     const gpa = self.base.allocator;
     const off = try self.strtab.insert(gpa, name);
-    const gop = try self.getOrPutGlobal(off);
     const zig_module = self.file(self.zig_module_index.?).?.zig_module;
     const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, off);
     if (!lookup_gop.found_existing) {
         const esym_index = try zig_module.addGlobalEsym(gpa);
-        const esym = &zig_module.global_esyms.items[esym_index];
+        const esym = zig_module.elfSym(esym_index);
         esym.st_name = off;
         lookup_gop.value_ptr.* = esym_index;
+        const gop = try self.getOrPutGlobal(off);
+        try zig_module.global_symbols.append(gpa, gop.index);
     }
-    return gop.index;
+    return lookup_gop.value_ptr.*;
 }
 
 const GetOrCreateComdatGroupOwnerResult = struct {
@@ -3684,89 +3722,39 @@ pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupO
     return &self.comdat_groups_owners.items[index];
 }
 
-fn reportUndefined(self: *Elf) !void {
+fn reportUndefined(self: *Elf, undefs: anytype) !void {
     const gpa = self.base.allocator;
     const max_notes = 4;
 
-    try self.misc_errors.ensureUnusedCapacity(gpa, self.unresolved.keys().len);
-
-    const CollectStruct = struct {
-        notes: [max_notes]link.File.ErrorMsg = [_]link.File.ErrorMsg{.{ .msg = undefined }} ** max_notes,
-        notes_len: u3 = 0,
-        notes_count: usize = 0,
-    };
-
-    const collect: []CollectStruct = try gpa.alloc(CollectStruct, self.unresolved.keys().len);
-    defer gpa.free(collect);
-    @memset(collect, .{});
-
-    // Collect all references across all input files
-    if (self.zig_module_index) |index| {
-        const zig_module = self.file(index).?.zig_module;
-        for (zig_module.atoms.keys()) |atom_index| {
-            const atom_ptr = self.atom(atom_index).?;
-            if (!atom_ptr.alive) continue;
-
-            for (atom_ptr.relocs(self)) |rel| {
-                if (self.unresolved.getIndex(rel.r_sym())) |bin_index| {
-                    const note = try std.fmt.allocPrint(gpa, "referenced by {s}:{s}", .{
-                        zig_module.path,
-                        atom_ptr.name(self),
-                    });
-                    const bin = &collect[bin_index];
-                    if (bin.notes_len < max_notes) {
-                        bin.notes[bin.notes_len] = .{ .msg = note };
-                        bin.notes_len += 1;
-                    }
-                    bin.notes_count += 1;
-                }
-            }
-        }
-    }
-
-    for (self.objects.items) |index| {
-        const object = self.file(index).?.object;
-        for (object.atoms.items) |atom_index| {
-            const atom_ptr = self.atom(atom_index) orelse continue;
-            if (!atom_ptr.alive) continue;
+    try self.misc_errors.ensureUnusedCapacity(gpa, undefs.count());
 
-            for (atom_ptr.relocs(self)) |rel| {
-                const sym_index = object.symbols.items[rel.r_sym()];
-                if (self.unresolved.getIndex(sym_index)) |bin_index| {
-                    const note = try std.fmt.allocPrint(gpa, "referenced by {}:{s}", .{
-                        object.fmtPath(),
-                        atom_ptr.name(self),
-                    });
-                    const bin = &collect[bin_index];
-                    if (bin.notes_len < max_notes) {
-                        bin.notes[bin.notes_len] = .{ .msg = note };
-                        bin.notes_len += 1;
-                    }
-                    bin.notes_count += 1;
-                }
-            }
-        }
-    }
-
-    // Generate error notes
-    for (self.unresolved.keys(), 0..) |sym_index, bin_index| {
-        const collected = &collect[bin_index];
+    var it = undefs.iterator();
+    while (it.next()) |entry| {
+        const undef_index = entry.key_ptr.*;
+        const atoms = entry.value_ptr.*.items;
+        const nnotes = @min(atoms.len, max_notes);
 
         var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, max_notes + 1);
         defer notes.deinit();
 
-        for (collected.notes[0..collected.notes_len]) |note| {
-            notes.appendAssumeCapacity(note);
+        for (atoms[0..nnotes]) |atom_index| {
+            const atom_ptr = self.atom(atom_index).?;
+            const file_ptr = self.file(atom_ptr.file_index).?;
+            const note = try std.fmt.allocPrint(gpa, "referenced by {s}:{s}", .{
+                file_ptr.fmtPath(),
+                atom_ptr.name(self),
+            });
+            notes.appendAssumeCapacity(.{ .msg = note });
         }
 
-        if (collected.notes_count > max_notes) {
-            const remaining = collected.notes_count - max_notes;
+        if (atoms.len > max_notes) {
+            const remaining = atoms.len - max_notes;
             const note = try std.fmt.allocPrint(gpa, "referenced {d} more times", .{remaining});
             notes.appendAssumeCapacity(.{ .msg = note });
         }
 
         var err_msg = link.File.ErrorMsg{
-            .msg = try std.fmt.allocPrint(gpa, "undefined symbol: {s}", .{self.symbol(sym_index).name(self)}),
+            .msg = try std.fmt.allocPrint(gpa, "undefined symbol: {s}", .{self.symbol(undef_index).name(self)}),
         };
         err_msg.notes = try notes.toOwnedSlice();
 
@@ -3931,7 +3919,7 @@ const DeclMetadata = struct {
     fn @"export"(m: DeclMetadata, elf_file: *Elf, name: []const u8) ?*u32 {
         const zig_module = elf_file.file(elf_file.zig_module_index.?).?.zig_module;
         for (m.exports.items) |*exp| {
-            const exp_name = elf_file.strtab.getAssumeExists(zig_module.global_esyms.items[exp.*].st_name);
+            const exp_name = elf_file.strtab.getAssumeExists(zig_module.elfSym(exp.*).st_name);
             if (mem.eql(u8, name, exp_name)) return exp;
         }
         return null;