Commit 8a3ad3f620

Jakub Konka <kubkon@jakubkonka.com>
2023-04-18 14:04:42
elf: do not reserve a GOT slot for every Atom
1 parent 528b66f
Changed files (8)
src/arch/aarch64/CodeGen.zig
@@ -4290,6 +4290,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
             if (self.bin_file.cast(link.File.Elf)) |elf_file| {
                 const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
                 const atom = elf_file.getAtom(atom_index);
+                _ = try atom.getOrCreateOffsetTableEntry(elf_file);
                 const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file));
                 try self.genSetReg(Type.initTag(.usize), .x30, .{ .memory = got_addr });
             } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
src/arch/arm/CodeGen.zig
@@ -4270,6 +4270,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
             if (self.bin_file.cast(link.File.Elf)) |elf_file| {
                 const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
                 const atom = elf_file.getAtom(atom_index);
+                _ = try atom.getOrCreateOffsetTableEntry(elf_file);
                 const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file));
                 try self.genSetReg(Type.initTag(.usize), .lr, .{ .memory = got_addr });
             } else if (self.bin_file.cast(link.File.MachO)) |_| {
src/arch/riscv64/CodeGen.zig
@@ -1734,6 +1734,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
                 const func = func_payload.data;
                 const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
                 const atom = elf_file.getAtom(atom_index);
+                _ = try atom.getOrCreateOffsetTableEntry(elf_file);
                 const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file));
                 try self.genSetReg(Type.initTag(.usize), .ra, .{ .memory = got_addr });
                 _ = try self.addInst(.{
src/arch/sparc64/CodeGen.zig
@@ -1254,6 +1254,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
                 const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: {
                     const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
                     const atom = elf_file.getAtom(atom_index);
+                    _ = try atom.getOrCreateOffsetTableEntry(elf_file);
                     break :blk @intCast(u32, atom.getOffsetTableAddress(elf_file));
                 } else unreachable;
 
src/arch/x86_64/CodeGen.zig
@@ -5624,7 +5624,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
 
             if (self.bin_file.cast(link.File.Elf)) |elf_file| {
                 const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl);
-                const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file);
+                const atom = elf_file.getAtom(atom_index);
+                _ = try atom.getOrCreateOffsetTableEntry(elf_file);
+                const got_addr = atom.getOffsetTableAddress(elf_file);
                 try self.asmMemory(.call, Memory.sib(.qword, .{
                     .base = .ds,
                     .disp = @intCast(i32, got_addr),
@@ -5853,7 +5855,9 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
-        const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file);
+        const atom = elf_file.getAtom(atom_index);
+        _ = try atom.getOrCreateOffsetTableEntry(elf_file);
+        const got_addr = atom.getOffsetTableAddress(elf_file);
         try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{
             .base = .ds,
             .disp = @intCast(i32, got_addr),
@@ -8230,7 +8234,9 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
-        const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file);
+        const atom = elf_file.getAtom(atom_index);
+        _ = try atom.getOrCreateOffsetTableEntry(elf_file);
+        const got_addr = atom.getOffsetTableAddress(elf_file);
         try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{
             .base = .ds,
             .disp = @intCast(i32, got_addr),
src/link/Elf/Atom.zig
@@ -14,9 +14,6 @@ const Elf = @import("../Elf.zig");
 /// offset table entry.
 local_sym_index: u32,
 
-/// This field is undefined for symbols with size = 0.
-offset_table_index: u32,
-
 /// Points to the previous and next neighbors, based on the `text_offset`.
 /// This can be used to find, for example, the capacity of this `TextBlock`.
 prev_index: ?Index,
@@ -48,13 +45,24 @@ pub fn getName(self: Atom, elf_file: *const Elf) []const u8 {
     return elf_file.getSymbolName(self.getSymbolIndex().?);
 }
 
+/// If entry already exists, returns index to it.
+/// Otherwise, creates a new entry in the Global Offset Table for this Atom.
+pub fn getOrCreateOffsetTableEntry(self: Atom, elf_file: *Elf) !u32 {
+    const sym_index = self.getSymbolIndex().?;
+    if (elf_file.got_table.lookup.get(sym_index)) |index| return index;
+    const index = try elf_file.got_table.allocateEntry(elf_file.base.allocator, sym_index);
+    elf_file.got_table_count_dirty = true;
+    return index;
+}
+
 pub fn getOffsetTableAddress(self: Atom, elf_file: *Elf) u64 {
-    assert(self.getSymbolIndex() != null);
+    const sym_index = self.getSymbolIndex().?;
+    const got_entry_index = elf_file.got_table.lookup.get(sym_index).?;
     const target = elf_file.base.options.target;
     const ptr_bits = target.cpu.arch.ptrBitWidth();
     const ptr_bytes: u64 = @divExact(ptr_bits, 8);
     const got = elf_file.program_headers.items[elf_file.phdr_got_index.?];
-    return got.p_vaddr + self.offset_table_index * ptr_bytes;
+    return got.p_vaddr + got_entry_index * ptr_bytes;
 }
 
 /// Returns how much room there is to grow in virtual address space.
src/link/Elf.zig
@@ -63,6 +63,88 @@ const Section = struct {
     free_list: std.ArrayListUnmanaged(Atom.Index) = .{},
 };
 
+const SectionTable = struct {
+    entries: std.ArrayListUnmanaged(SymIndex) = .{},
+    free_list: std.ArrayListUnmanaged(Index) = .{},
+    lookup: std.AutoHashMapUnmanaged(SymIndex, Index) = .{},
+
+    const SymIndex = u32;
+    const Index = u32;
+
+    pub fn deinit(st: *ST, allocator: Allocator) void {
+        st.entries.deinit(allocator);
+        st.free_list.deinit(allocator);
+        st.lookup.deinit(allocator);
+    }
+
+    pub fn allocateEntry(st: *ST, allocator: Allocator, target: SymIndex) !Index {
+        try st.entries.ensureUnusedCapacity(allocator, 1);
+        const index = blk: {
+            if (st.free_list.popOrNull()) |index| {
+                log.debug("  (reusing entry index {d})", .{index});
+                break :blk index;
+            } else {
+                log.debug("  (allocating entry at index {d})", .{st.entries.items.len});
+                const index = @intCast(u32, st.entries.items.len);
+                _ = st.entries.addOneAssumeCapacity();
+                break :blk index;
+            }
+        };
+        st.entries.items[index] = target;
+        try st.lookup.putNoClobber(allocator, target, index);
+        return index;
+    }
+
+    pub fn freeEntry(st: *ST, allocator: Allocator, target: SymIndex) void {
+        const index = st.lookup.get(target) orelse return;
+        st.free_list.append(allocator, index) catch {};
+        st.entries.items[index] = 0;
+        _ = st.lookup.remove(target);
+    }
+
+    const FormatContext = struct {
+        ctx: *Elf,
+        st: *const ST,
+    };
+
+    fn fmt(
+        ctx: FormatContext,
+        comptime unused_format_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        _ = options;
+        comptime assert(unused_format_string.len == 0);
+
+        const base_addr = ctx.ctx.program_headers.items[ctx.ctx.phdr_got_index.?].p_vaddr;
+        const target = ctx.ctx.base.options.target;
+        const ptr_bits = target.cpu.arch.ptrBitWidth();
+        const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+
+        try writer.writeAll("SectionTable:\n");
+        for (ctx.st.entries.items, 0..) |entry, i| {
+            try writer.print("  {d}@{x} => local(%{d})\n", .{ i, base_addr + i * ptr_bytes, entry });
+        }
+    }
+
+    fn format(st: ST, comptime unused_format_string: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = st;
+        _ = unused_format_string;
+        _ = options;
+        _ = writer;
+        @compileError("do not format SectionTable directly; use st.fmtDebug()");
+    }
+
+    pub fn fmtDebug(st: ST, ctx: *Elf) std.fmt.Formatter(fmt) {
+        return .{ .data = .{
+            .ctx = ctx,
+            .st = st,
+        } };
+    }
+
+    const ST = @This();
+};
+
 const LazySymbolMetadata = struct {
     text_atom: ?Atom.Index = null,
     rodata_atom: ?Atom.Index = null,
@@ -148,17 +230,13 @@ global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{},
 
 local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
 global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{},
-offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
 
-/// Same order as in the file. The value is the absolute vaddr value.
-/// If the vaddr of the executable program header changes, the entire
-/// offset table needs to be rewritten.
-offset_table: std.ArrayListUnmanaged(u64) = .{},
+got_table: SectionTable = .{},
 
 phdr_table_dirty: bool = false,
 shdr_table_dirty: bool = false,
 shstrtab_dirty: bool = false,
-offset_table_count_dirty: bool = false,
+got_table_count_dirty: bool = false,
 
 debug_strtab_dirty: bool = false,
 debug_abbrev_section_dirty: bool = false,
@@ -329,8 +407,7 @@ pub fn deinit(self: *Elf) void {
     self.global_symbols.deinit(gpa);
     self.global_symbol_free_list.deinit(gpa);
     self.local_symbol_free_list.deinit(gpa);
-    self.offset_table_free_list.deinit(gpa);
-    self.offset_table.deinit(gpa);
+    self.got_table.deinit(gpa);
 
     {
         var it = self.decls.iterator();
@@ -1289,6 +1366,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     assert(!self.shdr_table_dirty);
     assert(!self.shstrtab_dirty);
     assert(!self.debug_strtab_dirty);
+    assert(!self.got_table_count_dirty);
 }
 
 fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !void {
@@ -2168,7 +2246,7 @@ fn freeAtom(self: *Elf, atom_index: Atom.Index) void {
     _ = self.atom_by_index_table.remove(local_sym_index);
     self.getAtomPtr(atom_index).local_sym_index = 0;
 
-    self.offset_table_free_list.append(self.base.allocator, atom.offset_table_index) catch {};
+    self.got_table.freeEntry(gpa, local_sym_index);
 }
 
 fn shrinkAtom(self: *Elf, atom_index: Atom.Index, new_block_size: u64) void {
@@ -2191,11 +2269,9 @@ pub fn createAtom(self: *Elf) !Atom.Index {
     const atom_index = @intCast(Atom.Index, self.atoms.items.len);
     const atom = try self.atoms.addOne(gpa);
     const local_sym_index = try self.allocateLocalSymbol();
-    const offset_table_index = try self.allocateGotOffset();
     try self.atom_by_index_table.putNoClobber(gpa, local_sym_index, atom_index);
     atom.* = .{
         .local_sym_index = local_sym_index,
-        .offset_table_index = offset_table_index,
         .prev_index = null,
         .next_index = null,
     };
@@ -2352,26 +2428,6 @@ pub fn allocateLocalSymbol(self: *Elf) !u32 {
     return index;
 }
 
-pub fn allocateGotOffset(self: *Elf) !u32 {
-    try self.offset_table.ensureUnusedCapacity(self.base.allocator, 1);
-
-    const index = blk: {
-        if (self.offset_table_free_list.popOrNull()) |index| {
-            log.debug("  (reusing GOT offset at index {d})", .{index});
-            break :blk index;
-        } else {
-            log.debug("  (allocating GOT offset at index {d})", .{self.offset_table.items.len});
-            const index = @intCast(u32, self.offset_table.items.len);
-            _ = self.offset_table.addOneAssumeCapacity();
-            self.offset_table_count_dirty = true;
-            break :blk index;
-        }
-    };
-
-    self.offset_table.items[index] = 0;
-    return index;
-}
-
 fn freeUnnamedConsts(self: *Elf, decl_index: Module.Decl.Index) void {
     const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return;
     for (unnamed_consts.items) |atom| {
@@ -2465,6 +2521,7 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s
     const decl_metadata = self.decls.get(decl_index).?;
     const atom_index = decl_metadata.atom;
     const atom = self.getAtom(atom_index);
+    const local_sym_index = atom.getSymbolIndex().?;
 
     const shdr_index = decl_metadata.shdr;
     if (atom.getSymbol(self).st_size != 0 and self.base.child_pid == null) {
@@ -2485,8 +2542,9 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s
                 local_sym.st_value = vaddr;
 
                 log.debug("  (writing new offset table entry)", .{});
-                self.offset_table.items[atom.offset_table_index] = vaddr;
-                try self.writeOffsetTableEntry(atom.offset_table_index);
+                const got_entry_index = self.got_table.lookup.get(local_sym_index).?;
+                self.got_table.entries.items[got_entry_index] = local_sym_index;
+                try self.writeOffsetTableEntry(got_entry_index);
             }
         } else if (code.len < local_sym.st_size) {
             self.shrinkAtom(atom_index, code.len);
@@ -2494,7 +2552,7 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s
         local_sym.st_size = code.len;
 
         // TODO this write could be avoided if no fields of the symbol were changed.
-        try self.writeSymbol(atom.getSymbolIndex().?);
+        try self.writeSymbol(local_sym_index);
     } else {
         const local_sym = atom.getSymbolPtr(self);
         local_sym.* = .{
@@ -2509,12 +2567,12 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s
         errdefer self.freeAtom(atom_index);
         log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, vaddr });
 
-        self.offset_table.items[atom.offset_table_index] = vaddr;
         local_sym.st_value = vaddr;
         local_sym.st_size = code.len;
 
-        try self.writeSymbol(atom.getSymbolIndex().?);
-        try self.writeOffsetTableEntry(atom.offset_table_index);
+        try self.writeSymbol(local_sym_index);
+        const got_entry_index = try atom.getOrCreateOffsetTableEntry(self);
+        try self.writeOffsetTableEntry(got_entry_index);
     }
 
     const local_sym = atom.getSymbolPtr(self);
@@ -2755,12 +2813,12 @@ fn updateLazySymbolAtom(
     errdefer self.freeAtom(atom_index);
     log.debug("allocated text block for {s} at 0x{x}", .{ name, vaddr });
 
-    self.offset_table.items[atom.offset_table_index] = vaddr;
     local_sym.st_value = vaddr;
     local_sym.st_size = code.len;
 
     try self.writeSymbol(local_sym_index);
-    try self.writeOffsetTableEntry(atom.offset_table_index);
+    const got_entry_index = try atom.getOrCreateOffsetTableEntry(self);
+    try self.writeOffsetTableEntry(got_entry_index);
 
     const section_offset = vaddr - self.program_headers.items[phdr_index].p_vaddr;
     const file_offset = self.sections.items(.shdr)[shdr_index].sh_offset + section_offset;
@@ -2991,30 +3049,32 @@ fn writeSectHeader(self: *Elf, index: usize) !void {
 
 fn writeOffsetTableEntry(self: *Elf, index: usize) !void {
     const entry_size: u16 = self.archPtrWidthBytes();
-    if (self.offset_table_count_dirty) {
-        const needed_size = self.offset_table.items.len * entry_size;
+    if (self.got_table_count_dirty) {
+        const needed_size = self.got_table.entries.items.len * entry_size;
         try self.growAllocSection(self.got_section_index.?, needed_size);
-        self.offset_table_count_dirty = false;
+        self.got_table_count_dirty = false;
     }
     const endian = self.base.options.target.cpu.arch.endian();
     const shdr = &self.sections.items(.shdr)[self.got_section_index.?];
     const off = shdr.sh_offset + @as(u64, entry_size) * index;
     const phdr = &self.program_headers.items[self.phdr_got_index.?];
     const vaddr = phdr.p_vaddr + @as(u64, entry_size) * index;
+    const got_entry = self.got_table.entries.items[index];
+    const got_value = self.getSymbol(got_entry).st_value;
     switch (entry_size) {
         2 => {
             var buf: [2]u8 = undefined;
-            mem.writeInt(u16, &buf, @intCast(u16, self.offset_table.items[index]), endian);
+            mem.writeInt(u16, &buf, @intCast(u16, got_value), endian);
             try self.base.file.?.pwriteAll(&buf, off);
         },
         4 => {
             var buf: [4]u8 = undefined;
-            mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian);
+            mem.writeInt(u32, &buf, @intCast(u32, got_value), endian);
             try self.base.file.?.pwriteAll(&buf, off);
         },
         8 => {
             var buf: [8]u8 = undefined;
-            mem.writeInt(u64, &buf, self.offset_table.items[index], endian);
+            mem.writeInt(u64, &buf, got_value, endian);
             try self.base.file.?.pwriteAll(&buf, off);
 
             if (self.base.child_pid) |pid| {
src/codegen.zig
@@ -1006,6 +1006,7 @@ fn genDeclRef(
     if (bin_file.cast(link.File.Elf)) |elf_file| {
         const atom_index = try elf_file.getOrCreateAtomForDecl(decl_index);
         const atom = elf_file.getAtom(atom_index);
+        _ = try atom.getOrCreateOffsetTableEntry(elf_file);
         return GenResult.mcv(.{ .memory = atom.getOffsetTableAddress(elf_file) });
     } else if (bin_file.cast(link.File.MachO)) |macho_file| {
         const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index);