Commit 859f9a22c4

Jakub Konka <kubkon@jakubkonka.com>
2024-03-08 22:44:37
elf+aarch64: implement basic thunk support
1 parent 7c5ddb6
src/link/Elf/Atom.zig
@@ -31,6 +31,9 @@ rel_num: u32 = 0,
 /// Index of this atom in the linker's atoms table.
 atom_index: Index = 0,
 
+/// Index of the thunk for this atom.
+thunk_index: Thunk.Index = 0,
+
 /// Flags we use for state tracking.
 flags: Flags = .{},
 
@@ -64,6 +67,10 @@ pub fn file(self: Atom, elf_file: *Elf) ?File {
     return elf_file.file(self.file_index);
 }
 
+pub fn thunk(self: Atom, elf_file: *Elf) *Thunk {
+    return elf_file.thunk(self.thunk_index);
+}
+
 pub fn inputShdr(self: Atom, elf_file: *Elf) elf.Elf64_Shdr {
     return switch (self.file(elf_file).?) {
         .object => |x| x.shdrs.items[self.input_section_index],
@@ -1681,6 +1688,7 @@ const aarch64 = struct {
         const r_offset = std.math.cast(usize, rel.r_offset) orelse return error.Overflow;
         const cwriter = stream.writer();
         const code = code_buffer[rel.r_offset..][0..4];
+        const file_ptr = atom.file(elf_file).?;
 
         const P, const A, const S, const GOT, const G, const TP, const DTP, const ZIG_GOT = args;
         _ = DTP;
@@ -1701,18 +1709,15 @@ const aarch64 = struct {
             .CALL26,
             .JUMP26,
             => {
-                // TODO: add thunk support
-                const disp: i28 = math.cast(i28, S + A - P) orelse {
-                    var err = try elf_file.addErrorWithNotes(1);
-                    try err.addMsg(elf_file, "TODO: branch relocation target ({s}) exceeds max jump distance", .{
-                        target.name(elf_file),
-                    });
-                    try err.addNote(elf_file, "in {}:{s} at offset 0x{x}", .{
-                        atom.file(elf_file).?.fmtPath(),
-                        atom.name(elf_file),
-                        r_offset,
-                    });
-                    return;
+                const disp: i28 = math.cast(i28, S + A - P) orelse blk: {
+                    const th = atom.thunk(elf_file);
+                    const target_index = switch (file_ptr) {
+                        .zig_object => |x| x.symbol(rel.r_sym()),
+                        .object => |x| x.symbols.items[rel.r_sym()],
+                        else => unreachable,
+                    };
+                    const S_: i64 = @intCast(th.targetAddress(target_index, elf_file));
+                    break :blk math.cast(i28, S_ + A - P) orelse return error.Overflow;
                 };
                 aarch64_util.writeBranchImm(disp, code);
             },
@@ -2173,3 +2178,4 @@ const Fde = eh_frame.Fde;
 const File = @import("file.zig").File;
 const Object = @import("Object.zig");
 const Symbol = @import("Symbol.zig");
+const Thunk = @import("thunks.zig").Thunk;
src/link/Elf/Symbol.zig
@@ -253,7 +253,7 @@ pub fn setOutputSym(symbol: Symbol, elf_file: *Elf, out: *elf.Elf64_Sym) void {
             break :blk 0;
         }
         if (st_shndx == elf.SHN_ABS or st_shndx == elf.SHN_COMMON) break :blk symbol.address(.{ .plt = false }, elf_file);
-        const shdr = &elf_file.shdrs.items[st_shndx];
+        const shdr = elf_file.shdrs.items[st_shndx];
         if (shdr.sh_flags & elf.SHF_TLS != 0 and file_ptr != .linker_defined)
             break :blk symbol.address(.{ .plt = false }, elf_file) - elf_file.tlsAddress();
         break :blk symbol.address(.{ .plt = false }, elf_file);
src/link/Elf/thunks.zig
@@ -0,0 +1,243 @@
+pub fn createThunks(shndx: u32, elf_file: *Elf) !void {
+    const gpa = elf_file.base.comp.gpa;
+    const cpu_arch = elf_file.getTarget().cpu.arch;
+    const shdr = &elf_file.shdrs.items[shndx];
+    const atoms = elf_file.output_sections.get(shndx).?.items;
+    assert(atoms.len > 0);
+
+    for (atoms) |atom_index| {
+        elf_file.atom(atom_index).?.value = @bitCast(@as(i64, -1));
+    }
+
+    var i: usize = 0;
+    while (i < atoms.len) {
+        const start = i;
+        const start_atom = elf_file.atom(atoms[start]).?;
+        assert(start_atom.flags.alive);
+        start_atom.value = try advance(shdr, start_atom.size, start_atom.alignment);
+        i += 1;
+
+        while (i < atoms.len and
+            shdr.sh_size - start_atom.value < maxAllowedDistance(cpu_arch)) : (i += 1)
+        {
+            const atom_index = atoms[i];
+            const atom = elf_file.atom(atom_index).?;
+            assert(atom.flags.alive);
+            atom.value = try advance(shdr, atom.size, atom.alignment);
+        }
+
+        // Insert a thunk at the group end
+        const thunk_index = try elf_file.addThunk();
+        const thunk = elf_file.thunk(thunk_index);
+        thunk.output_section_index = shndx;
+
+        // Scan relocs in the group and create trampolines for any unreachable callsite
+        for (atoms[start..i]) |atom_index| {
+            const atom = elf_file.atom(atom_index).?;
+            const file = atom.file(elf_file).?;
+            log.debug("atom({d}) {s}", .{ atom_index, atom.name(elf_file) });
+            for (atom.relocs(elf_file)) |rel| {
+                const is_reachable = switch (cpu_arch) {
+                    .aarch64 => aarch64.isReachable(atom, rel, elf_file),
+                    .x86_64, .riscv64 => unreachable,
+                    else => @panic("unsupported arch"),
+                };
+                if (is_reachable) continue;
+                const target = switch (file) {
+                    .zig_object => |x| x.symbol(rel.r_sym()),
+                    .object => |x| x.symbols.items[rel.r_sym()],
+                    else => unreachable,
+                };
+                try thunk.symbols.put(gpa, target, {});
+            }
+            atom.thunk_index = thunk_index;
+        }
+
+        thunk.value = try advance(shdr, thunk.size(elf_file), Atom.Alignment.fromNonzeroByteUnits(2));
+
+        log.debug("thunk({d}) : {}", .{ thunk_index, thunk.fmt(elf_file) });
+    }
+}
+
+fn advance(shdr: *elf.Elf64_Shdr, size: u64, alignment: Atom.Alignment) !u64 {
+    const offset = alignment.forward(shdr.sh_size);
+    const padding = offset - shdr.sh_size;
+    shdr.sh_size += padding + size;
+    shdr.sh_addralign = @max(shdr.sh_addralign, alignment.toByteUnits(1));
+    return offset;
+}
+
+/// A branch will need an extender if its target is larger than
+/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number.
+fn maxAllowedDistance(cpu_arch: std.Target.Cpu.Arch) u32 {
+    return switch (cpu_arch) {
+        .aarch64 => 0x500_000,
+        .x86_64, .riscv64 => unreachable,
+        else => @panic("unhandled arch"),
+    };
+}
+
+pub const Thunk = struct {
+    value: u64 = 0,
+    output_section_index: u32 = 0,
+    symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{},
+    output_symtab_ctx: Elf.SymtabCtx = .{},
+
+    pub fn deinit(thunk: *Thunk, allocator: Allocator) void {
+        thunk.symbols.deinit(allocator);
+    }
+
+    pub fn size(thunk: Thunk, elf_file: *Elf) usize {
+        const cpu_arch = elf_file.getTarget().cpu.arch;
+        return thunk.symbols.keys().len * trampolineSize(cpu_arch);
+    }
+
+    pub fn address(thunk: Thunk, elf_file: *Elf) u64 {
+        const shdr = elf_file.shdrs.items[thunk.output_section_index];
+        return shdr.sh_addr + thunk.value;
+    }
+
+    pub fn targetAddress(thunk: Thunk, sym_index: Symbol.Index, elf_file: *Elf) u64 {
+        const cpu_arch = elf_file.getTarget().cpu.arch;
+        return thunk.address(elf_file) + thunk.symbols.getIndex(sym_index).? * trampolineSize(cpu_arch);
+    }
+
+    pub fn write(thunk: Thunk, elf_file: *Elf, writer: anytype) !void {
+        switch (elf_file.options.cpu_arch.?) {
+            .aarch64 => try aarch64.write(thunk, elf_file, writer),
+            .x86_64, .riscv64 => unreachable,
+            else => @panic("unhandled arch"),
+        }
+    }
+
+    pub fn calcSymtabSize(thunk: *Thunk, elf_file: *Elf) void {
+        thunk.output_symtab_ctx.nlocals = @as(u32, @intCast(thunk.symbols.keys().len));
+        for (thunk.symbols.keys()) |sym_index| {
+            const sym = elf_file.symbol(sym_index);
+            thunk.output_symtab_ctx.strsize += @as(u32, @intCast(sym.name(elf_file).len + "$thunk".len + 1));
+        }
+    }
+
+    pub fn writeSymtab(thunk: Thunk, elf_file: *Elf) void {
+        const cpu_arch = elf_file.getTarget().cpu.arch;
+        for (thunk.symbols.keys(), thunk.output_symtab_ctx.ilocal..) |sym_index, ilocal| {
+            const sym = elf_file.symbol(sym_index);
+            const st_name = @as(u32, @intCast(elf_file.strtab.items.len));
+            elf_file.strtab.appendSliceAssumeCapacity(sym.name(elf_file));
+            elf_file.strtab.appendSliceAssumeCapacity("$thunk");
+            elf_file.strtab.appendAssumeCapacity(0);
+            elf_file.symtab.items[ilocal] = .{
+                .st_name = st_name,
+                .st_info = elf.STT_FUNC,
+                .st_other = 0,
+                .st_shndx = @intCast(thunk.output_section_index),
+                .st_value = thunk.targetAddress(sym_index, elf_file),
+                .st_size = trampolineSize(cpu_arch),
+            };
+        }
+    }
+
+    fn trampolineSize(cpu_arch: std.Target.Cpu.Arch) usize {
+        return switch (cpu_arch) {
+            .aarch64 => aarch64.trampoline_size,
+            .x86_64, .riscv64 => unreachable,
+            else => @panic("unhandled arch"),
+        };
+    }
+
+    pub fn format(
+        thunk: Thunk,
+        comptime unused_fmt_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) !void {
+        _ = thunk;
+        _ = unused_fmt_string;
+        _ = options;
+        _ = writer;
+        @compileError("do not format Thunk directly");
+    }
+
+    pub fn fmt(thunk: Thunk, elf_file: *Elf) std.fmt.Formatter(format2) {
+        return .{ .data = .{
+            .thunk = thunk,
+            .elf_file = elf_file,
+        } };
+    }
+
+    const FormatContext = struct {
+        thunk: Thunk,
+        elf_file: *Elf,
+    };
+
+    fn format2(
+        ctx: FormatContext,
+        comptime unused_fmt_string: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) !void {
+        _ = options;
+        _ = unused_fmt_string;
+        const thunk = ctx.thunk;
+        const elf_file = ctx.elf_file;
+        try writer.print("@{x} : size({x})\n", .{ thunk.value, thunk.size(elf_file) });
+        for (thunk.symbols.keys()) |index| {
+            const sym = elf_file.symbol(index);
+            try writer.print("  %{d} : {s} : @{x}\n", .{ index, sym.name(elf_file), sym.value });
+        }
+    }
+
+    pub const Index = u32;
+};
+
+const aarch64 = struct {
+    fn isReachable(atom: *const Atom, rel: elf.Elf64_Rela, elf_file: *Elf) bool {
+        const r_type: elf.R_AARCH64 = @enumFromInt(rel.r_type());
+        if (r_type != .CALL26 and r_type != .JUMP26) return true;
+        const file = atom.file(elf_file).?;
+        const target_index = switch (file) {
+            .zig_object => |x| x.symbol(rel.r_sym()),
+            .object => |x| x.symbols.items[rel.r_sym()],
+            else => unreachable,
+        };
+        const target = elf_file.symbol(target_index);
+        if (target.flags.has_plt) return false;
+        if (atom.output_section_index != target.output_section_index) return false;
+        const target_atom = target.atom(elf_file).?;
+        if (target_atom.value == @as(u64, @bitCast(@as(i64, -1)))) return false;
+        const saddr = @as(i64, @intCast(atom.address(elf_file) + rel.r_offset));
+        const taddr: i64 = @intCast(target.address(.{}, elf_file));
+        _ = math.cast(i28, taddr + rel.r_addend - saddr) orelse return false;
+        return true;
+    }
+
+    fn write(thunk: Thunk, elf_file: *Elf, writer: anytype) !void {
+        for (thunk.symbols.keys(), 0..) |sym_index, i| {
+            const sym = elf_file.symbol(sym_index);
+            const saddr = thunk.address(elf_file) + i * trampoline_size;
+            const taddr = sym.address(.{}, elf_file);
+            const pages = try util.calcNumberOfPages(saddr, taddr);
+            try writer.writeInt(u32, Instruction.adrp(.x16, pages).toU32(), .little);
+            const off: u12 = @truncate(taddr);
+            try writer.writeInt(u32, Instruction.add(.x16, .x16, off, false).toU32(), .little);
+            try writer.writeInt(u32, Instruction.br(.x16).toU32(), .little);
+        }
+    }
+
+    const trampoline_size = 3 * @sizeOf(u32);
+
+    const util = @import("../aarch64.zig");
+    const Instruction = util.Instruction;
+};
+
+const assert = std.debug.assert;
+const elf = std.elf;
+const log = std.log.scoped(.link);
+const math = std.math;
+const mem = std.mem;
+const std = @import("std");
+
+const Allocator = mem.Allocator;
+const Atom = @import("Atom.zig");
+const Elf = @import("../Elf.zig");
+const Symbol = @import("Symbol.zig");
src/link/Elf.zig
@@ -206,6 +206,9 @@ num_ifunc_dynrelocs: usize = 0,
 /// List of atoms that are owned directly by the linker.
 atoms: std.ArrayListUnmanaged(Atom) = .{},
 
+/// List of range extension thunks.
+thunks: std.ArrayListUnmanaged(Thunk) = .{},
+
 /// Table of last atom index in a section and matching atom free list if any.
 last_atom_and_free_list_table: LastAtomAndFreeListTable = .{},
 
@@ -255,7 +258,7 @@ pub fn createEmpty(
     };
 
     const page_size: u32 = switch (target.cpu.arch) {
-        .powerpc64le => 0x10000,
+        .aarch64, .powerpc64le => 0x10000,
         .sparc64 => 0x2000,
         else => 0x1000,
     };
@@ -488,6 +491,7 @@ pub fn deinit(self: *Elf) void {
     self.start_stop_indexes.deinit(gpa);
 
     self.atoms.deinit(gpa);
+    self.thunks.deinit(gpa);
     for (self.last_atom_and_free_list_table.values()) |*value| {
         value.free_list.deinit(gpa);
     }
@@ -3593,7 +3597,7 @@ fn sortInitFini(self: *Elf) !void {
         }
     };
 
-    for (self.shdrs.items, 0..) |*shdr, shndx| {
+    for (self.shdrs.items, 0..) |shdr, shndx| {
         if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue;
 
         var is_init_fini = false;
@@ -4038,6 +4042,8 @@ fn updateSectionSizes(self: *Elf) !void {
     const target = self.base.comp.root_mod.resolved_target.result;
     for (self.output_sections.keys(), self.output_sections.values()) |shndx, atom_list| {
         const shdr = &self.shdrs.items[shndx];
+        if (atom_list.items.len == 0) continue;
+        if (self.requiresThunks() and shdr.sh_flags & elf.SHF_EXECINSTR != 0) continue;
         for (atom_list.items) |atom_index| {
             const atom_ptr = self.atom(atom_index) orelse continue;
             if (!atom_ptr.flags.alive) continue;
@@ -4049,6 +4055,17 @@ fn updateSectionSizes(self: *Elf) !void {
         }
     }
 
+    if (self.requiresThunks()) {
+        for (self.output_sections.keys(), self.output_sections.values()) |shndx, atom_list| {
+            const shdr = self.shdrs.items[shndx];
+            if (shdr.sh_flags & elf.SHF_EXECINSTR == 0) continue;
+            if (atom_list.items.len == 0) continue;
+
+            // Create jump/branch range extenders if needed.
+            try thunks.createThunks(shndx, self);
+        }
+    }
+
     if (self.eh_frame_section_index) |index| {
         self.shdrs.items[index].sh_size = try eh_frame.calcEhFrameSize(self);
     }
@@ -4576,6 +4593,13 @@ pub fn updateSymtabSize(self: *Elf) !void {
         nlocals += 1;
     }
 
+    for (self.thunks.items) |*th| {
+        th.output_symtab_ctx.ilocal = nlocals + 1;
+        th.calcSymtabSize(self);
+        nlocals += th.output_symtab_ctx.nlocals;
+        strsize += th.output_symtab_ctx.strsize;
+    }
+
     for (files.items) |index| {
         const file_ptr = self.file(index).?;
         const ctx = switch (file_ptr) {
@@ -4806,6 +4830,10 @@ pub fn writeSymtab(self: *Elf) !void {
 
     self.writeSectionSymbols();
 
+    for (self.thunks.items) |th| {
+        th.writeSymtab(self);
+    }
+
     if (self.zigObjectPtr()) |zig_object| {
         zig_object.asFile().writeSymtab(self);
     }
@@ -5401,6 +5429,18 @@ pub fn addAtom(self: *Elf) !Atom.Index {
     return index;
 }
 
+pub fn addThunk(self: *Elf) !Thunk.Index {
+    const index = @as(Thunk.Index, @intCast(self.thunks.items.len));
+    const th = try self.thunks.addOne(self.base.comp.gpa);
+    th.* = .{};
+    return index;
+}
+
+pub fn thunk(self: *Elf, index: Thunk.Index) *Thunk {
+    assert(index < self.thunks.items.len);
+    return &self.thunks.items[index];
+}
+
 pub fn file(self: *Elf, index: File.Index) ?File {
     const tag = self.files.items(.tags)[index];
     return switch (tag) {
@@ -5957,6 +5997,10 @@ fn fmtDumpState(
         try writer.print("linker_defined({d}) : (linker defined)\n", .{index});
         try writer.print("{}\n", .{linker_defined.fmtSymtab(self)});
     }
+    try writer.writeAll("thunks\n");
+    for (self.thunks.items, 0..) |th, index| {
+        try writer.print("thunk({d}) : {}\n", .{ index, th.fmt(self) });
+    }
     try writer.print("{}\n", .{self.zig_got.fmt(self)});
     try writer.print("{}\n", .{self.got.fmt(self)});
     try writer.print("{}\n", .{self.plt.fmt(self)});
@@ -6024,6 +6068,14 @@ pub fn getTarget(self: Elf) std.Target {
     return self.base.comp.root_mod.resolved_target.result;
 }
 
+fn requiresThunks(self: Elf) bool {
+    return switch (self.getTarget().cpu.arch) {
+        .aarch64 => true,
+        .x86_64, .riscv64 => false,
+        else => @panic("TODO unimplemented architecture"),
+    };
+}
+
 /// The following three values are only observed at compile-time and used to emit a compile error
 /// to remind the programmer to update expected maximum numbers of different program header types
 /// so that we reserve enough space for the program header table up-front.
@@ -6154,6 +6206,7 @@ const musl = @import("../musl.zig");
 const relocatable = @import("Elf/relocatable.zig");
 const relocation = @import("Elf/relocation.zig");
 const target_util = @import("../target.zig");
+const thunks = @import("Elf/thunks.zig");
 const trace = @import("../tracy.zig").trace;
 const synthetic_sections = @import("Elf/synthetic_sections.zig");
 
@@ -6186,6 +6239,7 @@ const PltGotSection = synthetic_sections.PltGotSection;
 const SharedObject = @import("Elf/SharedObject.zig");
 const Symbol = @import("Elf/Symbol.zig");
 const StringTable = @import("StringTable.zig");
+const Thunk = thunks.Thunk;
 const TypedValue = @import("../TypedValue.zig");
 const VerneedSection = synthetic_sections.VerneedSection;
 const ZigGotSection = synthetic_sections.ZigGotSection;
CMakeLists.txt
@@ -597,6 +597,7 @@ set(ZIG_STAGE2_SOURCES
     "${CMAKE_SOURCE_DIR}/src/link/Elf/relocatable.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf/relocation.zig"
     "${CMAKE_SOURCE_DIR}/src/link/Elf/synthetic_sections.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/Elf/thunks.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Archive.zig"
     "${CMAKE_SOURCE_DIR}/src/link/MachO/Atom.zig"