Commit 7a9eba2f85

Jakub Konka <kubkon@jakubkonka.com>
2023-09-11 10:52:30
elf: emit relocation to an extern function
1 parent d07edfa
src/arch/x86_64/CodeGen.zig
@@ -125,7 +125,9 @@ const Owner = union(enum) {
             .func_index => |func_index| {
                 const mod = ctx.bin_file.options.module.?;
                 const decl_index = mod.funcOwnerDeclIndex(func_index);
-                if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
+                if (ctx.bin_file.cast(link.File.Elf)) |elf_file| {
+                    return elf_file.getOrCreateMetadataForDecl(decl_index);
+                } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
                     const atom = try macho_file.getOrCreateAtomForDecl(decl_index);
                     return macho_file.getAtom(atom).getSymbolIndex().?;
                 } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| {
@@ -136,7 +138,10 @@ const Owner = union(enum) {
                 } else unreachable;
             },
             .lazy_sym => |lazy_sym| {
-                if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
+                if (ctx.bin_file.cast(link.File.Elf)) |elf_file| {
+                    return elf_file.getOrCreateMetadataForLazySymbol(lazy_sym) catch |err|
+                        ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
+                } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| {
                     const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err|
                         return ctx.fail("{s} creating lazy symbol", .{@errorName(err)});
                     return macho_file.getAtom(atom).getSymbolIndex().?;
@@ -8178,7 +8183,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
         } else if (func_value.getExternFunc(mod)) |extern_func| {
             const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name);
             const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name);
-            if (self.bin_file.cast(link.File.Coff)) |coff_file| {
+            if (self.bin_file.cast(link.File.Elf)) |elf_file| {
+                const atom_index = try self.owner.getSymbolIndex(self);
+                const sym_index = try elf_file.getGlobalSymbol(decl_name, lib_name);
+                _ = try self.addInst(.{
+                    .tag = .call,
+                    .ops = .extern_fn_reloc,
+                    .data = .{ .reloc = .{
+                        .atom_index = atom_index,
+                        .sym_index = sym_index,
+                    } },
+                });
+            } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
                 const atom_index = try self.owner.getSymbolIndex(self);
                 const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name);
                 _ = try self.addInst(.{
src/arch/x86_64/Emit.zig
@@ -41,7 +41,15 @@ pub fn emitMir(emit: *Emit) Error!void {
                     .offset = end_offset - 4,
                     .length = @as(u5, @intCast(end_offset - start_offset)),
                 }),
-                .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
+                .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.Elf)) |elf_file| {
+                    // Add relocation to the decl.
+                    const atom_ptr = elf_file.symbol(symbol.atom_index).atom(elf_file).?;
+                    try atom_ptr.addReloc(elf_file, .{
+                        .r_offset = end_offset,
+                        .r_info = (@as(u64, @intCast(symbol.sym_index)) << 32) | std.elf.R_X86_64_PLT32,
+                        .r_addend = -4,
+                    });
+                } else if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
                     // Add relocation to the decl.
                     const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?;
                     const target = macho_file.getGlobalByIndex(symbol.sym_index);
src/link/Elf/Atom.zig
@@ -26,7 +26,7 @@ relocs_section_index: Index = 0,
 atom_index: Index = 0,
 
 /// Specifies whether this atom is alive or has been garbage collected.
-alive: bool = true,
+alive: bool = false,
 
 /// Specifies if the atom has been visited during garbage collection.
 visited: bool = false,
@@ -192,6 +192,7 @@ pub fn free(self: *Atom, elf_file: *Elf) void {
     log.debug("freeAtom {d} ({s})", .{ self.atom_index, self.name(elf_file) });
 
     const gpa = elf_file.base.allocator;
+    const zig_module = elf_file.file(self.file_index).?.zig_module;
     const shndx = self.output_section_index;
     const meta = elf_file.last_atom_and_free_list_table.getPtr(shndx).?;
     const free_list = &meta.free_list;
@@ -242,17 +243,18 @@ pub fn free(self: *Atom, elf_file: *Elf) void {
 
     // TODO create relocs free list
     self.freeRelocs(elf_file);
+    assert(zig_module.atoms.swapRemove(self.atom_index));
     self.* = .{};
 }
 
-pub fn relocs(self: Atom, elf_file: *Elf) []const Relocation {
+pub fn relocs(self: Atom, elf_file: *Elf) []const elf.Elf64_Rela {
     const file_ptr = elf_file.file(self.file_index).?;
     if (file_ptr != .zig_module) @panic("TODO");
     const zig_module = file_ptr.zig_module;
     return zig_module.relocs.items[self.relocs_section_index].items;
 }
 
-pub fn addReloc(self: Atom, elf_file: *Elf, reloc: Relocation) !void {
+pub fn addReloc(self: Atom, elf_file: *Elf, reloc: elf.Elf64_Rela) !void {
     const gpa = elf_file.base.allocator;
     const file_ptr = elf_file.file(self.file_index).?;
     assert(file_ptr == .zig_module);
@@ -269,31 +271,178 @@ pub fn freeRelocs(self: Atom, elf_file: *Elf) void {
 }
 
 /// TODO mark relocs dirty
-pub fn resolveRelocs(self: Atom, elf_file: *Elf) !void {
+pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void {
     relocs_log.debug("0x{x}: {s}", .{ self.value, self.name(elf_file) });
-    const shdr = &elf_file.shdrs.items[self.output_section_index];
-    for (self.relocs(elf_file)) |reloc| {
-        const target_sym = elf_file.symbol(reloc.target);
-        const target_vaddr = target_sym.value + reloc.addend;
-        const section_offset = (self.value + reloc.offset) - shdr.sh_addr;
-        const file_offset = shdr.sh_offset + section_offset;
-
-        relocs_log.debug("  ({x}: [() => 0x{x}] ({s}))", .{
-            reloc.offset,
-            target_vaddr,
-            target_sym.name(elf_file),
+
+    var stream = std.io.fixedBufferStream(code);
+    const cwriter = stream.writer();
+
+    for (self.relocs(elf_file)) |rel| {
+        const r_type = rel.r_type();
+        if (r_type == elf.R_X86_64_NONE) continue;
+
+        const target = elf_file.symbol(rel.r_sym());
+
+        // We will use equation format to resolve relocations:
+        // https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/
+        //
+        // Address of the source atom.
+        const P = @as(i64, @intCast(self.value + rel.r_offset));
+        // Addend from the relocation.
+        const A = rel.r_addend;
+        // Address of the target symbol - can be address of the symbol within an atom or address of PLT stub.
+        const S = @as(i64, @intCast(target.address(.{}, elf_file)));
+        // Address of the global offset table.
+        const GOT = blk: {
+            const shndx = if (elf_file.got_plt_section_index) |shndx|
+                shndx
+            else if (elf_file.got_section_index) |shndx|
+                shndx
+            else
+                null;
+            break :blk if (shndx) |index| @as(i64, @intCast(elf_file.shdrs.items[index].sh_addr)) else 0;
+        };
+        // Relative offset to the start of the global offset table.
+        const G = @as(i64, @intCast(target.gotAddress(elf_file))) - GOT;
+        // // Address of the thread pointer.
+        // const TP = @as(i64, @intCast(elf_file.getTpAddress()));
+        // // Address of the dynamic thread pointer.
+        // const DTP = @as(i64, @intCast(elf_file.getDtpAddress()));
+
+        relocs_log.debug("  {s}: {x}: [{x} => {x}] G({x}) ({s})", .{
+            fmtRelocType(r_type),
+            rel.r_offset,
+            P,
+            S + A,
+            G + GOT + A,
+            target.name(elf_file),
         });
 
-        switch (elf_file.ptr_width) {
-            .p32 => try elf_file.base.file.?.pwriteAll(
-                std.mem.asBytes(&@as(u32, @intCast(target_vaddr))),
-                file_offset,
-            ),
-            .p64 => try elf_file.base.file.?.pwriteAll(std.mem.asBytes(&target_vaddr), file_offset),
+        try stream.seekTo(rel.r_offset);
+
+        switch (rel.r_type()) {
+            elf.R_X86_64_NONE => unreachable,
+            elf.R_X86_64_64 => try cwriter.writeIntLittle(i64, S + A),
+            elf.R_X86_64_PLT32 => try cwriter.writeIntLittle(i32, @as(i32, @intCast(S + A - P))),
+            else => @panic("TODO"),
         }
     }
 }
 
+pub fn fmtRelocType(r_type: u32) std.fmt.Formatter(formatRelocType) {
+    return .{ .data = r_type };
+}
+
+fn formatRelocType(
+    r_type: u32,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = unused_fmt_string;
+    _ = options;
+    const str = switch (r_type) {
+        elf.R_X86_64_NONE => "R_X86_64_NONE",
+        elf.R_X86_64_64 => "R_X86_64_64",
+        elf.R_X86_64_PC32 => "R_X86_64_PC32",
+        elf.R_X86_64_GOT32 => "R_X86_64_GOT32",
+        elf.R_X86_64_PLT32 => "R_X86_64_PLT32",
+        elf.R_X86_64_COPY => "R_X86_64_COPY",
+        elf.R_X86_64_GLOB_DAT => "R_X86_64_GLOB_DAT",
+        elf.R_X86_64_JUMP_SLOT => "R_X86_64_JUMP_SLOT",
+        elf.R_X86_64_RELATIVE => "R_X86_64_RELATIVE",
+        elf.R_X86_64_GOTPCREL => "R_X86_64_GOTPCREL",
+        elf.R_X86_64_32 => "R_X86_64_32",
+        elf.R_X86_64_32S => "R_X86_64_32S",
+        elf.R_X86_64_16 => "R_X86_64_16",
+        elf.R_X86_64_PC16 => "R_X86_64_PC16",
+        elf.R_X86_64_8 => "R_X86_64_8",
+        elf.R_X86_64_PC8 => "R_X86_64_PC8",
+        elf.R_X86_64_DTPMOD64 => "R_X86_64_DTPMOD64",
+        elf.R_X86_64_DTPOFF64 => "R_X86_64_DTPOFF64",
+        elf.R_X86_64_TPOFF64 => "R_X86_64_TPOFF64",
+        elf.R_X86_64_TLSGD => "R_X86_64_TLSGD",
+        elf.R_X86_64_TLSLD => "R_X86_64_TLSLD",
+        elf.R_X86_64_DTPOFF32 => "R_X86_64_DTPOFF32",
+        elf.R_X86_64_GOTTPOFF => "R_X86_64_GOTTPOFF",
+        elf.R_X86_64_TPOFF32 => "R_X86_64_TPOFF32",
+        elf.R_X86_64_PC64 => "R_X86_64_PC64",
+        elf.R_X86_64_GOTOFF64 => "R_X86_64_GOTOFF64",
+        elf.R_X86_64_GOTPC32 => "R_X86_64_GOTPC32",
+        elf.R_X86_64_GOT64 => "R_X86_64_GOT64",
+        elf.R_X86_64_GOTPCREL64 => "R_X86_64_GOTPCREL64",
+        elf.R_X86_64_GOTPC64 => "R_X86_64_GOTPC64",
+        elf.R_X86_64_GOTPLT64 => "R_X86_64_GOTPLT64",
+        elf.R_X86_64_PLTOFF64 => "R_X86_64_PLTOFF64",
+        elf.R_X86_64_SIZE32 => "R_X86_64_SIZE32",
+        elf.R_X86_64_SIZE64 => "R_X86_64_SIZE64",
+        elf.R_X86_64_GOTPC32_TLSDESC => "R_X86_64_GOTPC32_TLSDESC",
+        elf.R_X86_64_TLSDESC_CALL => "R_X86_64_TLSDESC_CALL",
+        elf.R_X86_64_TLSDESC => "R_X86_64_TLSDESC",
+        elf.R_X86_64_IRELATIVE => "R_X86_64_IRELATIVE",
+        elf.R_X86_64_RELATIVE64 => "R_X86_64_RELATIVE64",
+        elf.R_X86_64_GOTPCRELX => "R_X86_64_GOTPCRELX",
+        elf.R_X86_64_REX_GOTPCRELX => "R_X86_64_REX_GOTPCRELX",
+        elf.R_X86_64_NUM => "R_X86_64_NUM",
+        else => "R_X86_64_UNKNOWN",
+    };
+    try writer.print("{s}", .{str});
+}
+
+pub fn format(
+    atom: Atom,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = atom;
+    _ = unused_fmt_string;
+    _ = options;
+    _ = writer;
+    @compileError("do not format symbols directly");
+}
+
+pub fn fmt(atom: Atom, elf_file: *Elf) std.fmt.Formatter(format2) {
+    return .{ .data = .{
+        .atom = atom,
+        .elf_file = elf_file,
+    } };
+}
+
+const FormatContext = struct {
+    atom: Atom,
+    elf_file: *Elf,
+};
+
+fn format2(
+    ctx: FormatContext,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = options;
+    _ = unused_fmt_string;
+    const atom = ctx.atom;
+    const elf_file = ctx.elf_file;
+    try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x})", .{
+        atom.atom_index,           atom.name(elf_file), atom.value,
+        atom.output_section_index, atom.alignment,      atom.size,
+    });
+    // if (atom.fde_start != atom.fde_end) {
+    //     try writer.writeAll(" : fdes{ ");
+    //     for (atom.getFdes(elf_file), atom.fde_start..) |fde, i| {
+    //         try writer.print("{d}", .{i});
+    //         if (!fde.alive) try writer.writeAll("([*])");
+    //         if (i < atom.fde_end - 1) try writer.writeAll(", ");
+    //     }
+    //     try writer.writeAll(" }");
+    // }
+    const gc_sections = if (elf_file.base.options.gc_sections) |gc_sections| gc_sections else false;
+    if (gc_sections and !atom.alive) {
+        try writer.writeAll(" : [*]");
+    }
+}
+
 pub const Index = u32;
 
 const std = @import("std");
@@ -306,4 +455,3 @@ const Allocator = std.mem.Allocator;
 const Atom = @This();
 const Elf = @import("../Elf.zig");
 const File = @import("file.zig").File;
-const Relocation = @import("Relocation.zig");
src/link/Elf/Relocation.zig
@@ -1,8 +0,0 @@
-target: Symbol.Index,
-offset: u64,
-addend: u32,
-
-const std = @import("std");
-
-const Symbol = @import("Symbol.zig");
-const Relocation = @This();
src/link/Elf/ZigModule.zig
@@ -8,8 +8,8 @@ local_symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{},
 elf_global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{},
 global_symbols: std.AutoArrayHashMapUnmanaged(Symbol.Index, void) = .{},
 
-atoms: std.ArrayListUnmanaged(Atom.Index) = .{},
-relocs: std.ArrayListUnmanaged(std.ArrayListUnmanaged(Relocation)) = .{},
+atoms: std.AutoArrayHashMapUnmanaged(Atom.Index, void) = .{},
+relocs: std.ArrayListUnmanaged(std.ArrayListUnmanaged(elf.Elf64_Rela)) = .{},
 
 alive: bool = true,
 
@@ -43,7 +43,7 @@ pub fn createAtom(self: *ZigModule, output_section_index: u16, elf_file: *Elf) !
     const relocs = try self.relocs.addOne(gpa);
     relocs.* = .{};
     atom_ptr.relocs_section_index = relocs_index;
-    try self.atoms.append(gpa, atom_index);
+    try self.atoms.putNoClobber(gpa, atom_index, {});
     return symbol_index;
 }
 
@@ -184,6 +184,28 @@ fn formatSymtab(
     }
 }
 
+pub fn fmtAtoms(self: *ZigModule, elf_file: *Elf) std.fmt.Formatter(formatAtoms) {
+    return .{ .data = .{
+        .self = self,
+        .elf_file = elf_file,
+    } };
+}
+
+fn formatAtoms(
+    ctx: FormatContext,
+    comptime unused_fmt_string: []const u8,
+    options: std.fmt.FormatOptions,
+    writer: anytype,
+) !void {
+    _ = unused_fmt_string;
+    _ = options;
+    try writer.writeAll("  atoms\n");
+    for (ctx.self.atoms.keys()) |atom_index| {
+        const atom = ctx.elf_file.atom(atom_index) orelse continue;
+        try writer.print("    {}\n", .{atom.fmt(ctx.elf_file)});
+    }
+}
+
 const assert = std.debug.assert;
 const std = @import("std");
 const elf = std.elf;
@@ -193,6 +215,5 @@ const Atom = @import("Atom.zig");
 const Elf = @import("../Elf.zig");
 const File = @import("file.zig").File;
 const Module = @import("../../Module.zig");
-const Relocation = @import("Relocation.zig");
 const Symbol = @import("Symbol.zig");
 const ZigModule = @This();
src/link/Elf.zig
@@ -302,9 +302,9 @@ pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: link.
     const vaddr = this_sym.value;
     const parent_atom = self.symbol(reloc_info.parent_atom_index).atom(self).?;
     try parent_atom.addReloc(self, .{
-        .target = this_sym_index,
-        .offset = reloc_info.offset,
-        .addend = reloc_info.addend,
+        .r_offset = reloc_info.offset,
+        .r_info = (@as(u64, @intCast(this_sym_index)) << 32) | elf.R_X86_64_64,
+        .r_addend = reloc_info.addend,
     });
 
     return vaddr;
@@ -1020,8 +1020,17 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node
     // Beyond this point, everything has been allocated a virtual address and we can resolve
     // the relocations.
     if (self.zig_module_index) |index| {
-        for (self.file(index).?.zig_module.atoms.items) |atom_index| {
-            try self.atom(atom_index).?.resolveRelocs(self);
+        for (self.file(index).?.zig_module.atoms.keys()) |atom_index| {
+            const atom_ptr = self.atom(atom_index).?;
+            if (!atom_ptr.alive) continue;
+            const shdr = &self.shdrs.items[atom_ptr.output_section_index];
+            const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr;
+            const code = try gpa.alloc(u8, atom_ptr.size);
+            defer gpa.free(code);
+            const amt = try self.base.file.?.preadAll(code, file_offset);
+            if (amt != code.len) return error.InputOutput;
+            try atom_ptr.resolveRelocs(self, code);
+            try self.base.file.?.pwriteAll(code, file_offset);
         }
     }
 
@@ -2185,6 +2194,7 @@ fn updateDeclCode(
     const shdr_index = sym.output_section_index;
 
     sym.name_offset = try self.strtab.insert(gpa, decl_name);
+    atom_ptr.alive = true;
     atom_ptr.name_offset = sym.name_offset;
     esym.st_name = sym.name_offset;
     esym.st_info |= stt_bits;
@@ -2440,6 +2450,7 @@ fn updateLazySymbol(self: *Elf, sym: link.File.LazySymbol, symbol_index: Symbol.
     local_esym.st_info |= elf.STT_OBJECT;
     local_esym.st_size = code.len;
     const atom_ptr = local_sym.atom(self).?;
+    atom_ptr.alive = true;
     atom_ptr.name_offset = name_str_index;
     atom_ptr.alignment = math.log2_int(u64, required_alignment);
     atom_ptr.size = code.len;
@@ -2515,6 +2526,7 @@ pub fn lowerUnnamedConst(self: *Elf, typed_value: TypedValue, decl_index: Module
     local_esym.st_info |= elf.STT_OBJECT;
     local_esym.st_size = code.len;
     const atom_ptr = local_sym.atom(self).?;
+    atom_ptr.alive = true;
     atom_ptr.name_offset = name_str_index;
     atom_ptr.alignment = math.log2_int(u64, required_alignment);
     atom_ptr.size = code.len;
@@ -3374,6 +3386,17 @@ pub fn globalByName(self: *Elf, name: []const u8) ?Symbol.Index {
     return self.resolver.get(name_off);
 }
 
+pub fn getGlobalSymbol(self: *Elf, name: []const u8, lib_name: ?[]const u8) !u32 {
+    _ = lib_name;
+    const gpa = self.base.allocator;
+    const name_off = try self.strtab.insert(gpa, name);
+    const gop = try self.getOrPutGlobal(name_off);
+    if (!gop.found_existing) {
+        try self.unresolved.putNoClobber(gpa, name_off, {});
+    }
+    return gop.index;
+}
+
 fn dumpState(self: *Elf) std.fmt.Formatter(fmtDumpState) {
     return .{ .data = self };
 }
src/link.zig
@@ -549,7 +549,7 @@ pub const File = struct {
         switch (base.tag) {
             // zig fmt: off
             .coff  => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name, lib_name),
-            .elf   => unreachable,
+            .elf   => return @fieldParentPtr(Elf, "base", base).getGlobalSymbol(name, lib_name),
             .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name, lib_name),
             .plan9 => unreachable,
             .spirv => unreachable,