Commit aac4c1d3b2

Jakub Konka <kubkon@jakubkonka.com>
2022-09-01 00:24:06
coff: fix contents of IAT, and ensure codegen loads addr into reg
As far as I can see, unlike with MachO, we don't have any stubs helper routines available and need to load a bound pointer into a register to then call it.
1 parent 0ebeb58
Changed files (4)
src/arch/x86_64/CodeGen.zig
@@ -137,6 +137,7 @@ pub const MCValue = union(enum) {
     /// If the type is a pointer, it means the pointer is referenced indirectly via GOT.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_GOT.
     got_load: u32,
+    imports_load: u32,
     /// The value is in memory referenced directly via symbol index.
     /// If the type is a pointer, it means the pointer is referenced directly via symbol index.
     /// When lowered, linker will emit a relocation of type X86_64_RELOC_SIGNED.
@@ -156,6 +157,7 @@ pub const MCValue = union(enum) {
             .ptr_stack_offset,
             .direct_load,
             .got_load,
+            .imports_load,
             => true,
             else => false,
         };
@@ -2274,6 +2276,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, array);
         },
@@ -2618,6 +2621,7 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             const reg = try self.copyToTmpRegister(ptr_ty, ptr);
             try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
@@ -2655,6 +2659,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
     switch (ptr) {
         .got_load,
         .direct_load,
+        .imports_load,
         => |sym_index| {
             const abi_size = @intCast(u32, ptr_ty.abiSize(self.target.*));
             const mod = self.bin_file.options.module.?;
@@ -2666,6 +2671,7 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue
             const flags: u2 = switch (ptr) {
                 .got_load => 0b00,
                 .direct_load => 0b01,
+                .imports_load => 0b10,
                 else => unreachable,
             };
             _ = try self.addInst(.{
@@ -2763,6 +2769,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 .stack_offset,
                 => {
@@ -2783,6 +2790,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
         },
         .got_load,
         .direct_load,
+        .imports_load,
         .memory,
         => {
             const value_lock: ?RegisterLock = switch (value) {
@@ -2854,6 +2862,7 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                 },
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .memory,
                 => {
                     if (abi_size <= 8) {
@@ -3565,6 +3574,7 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 .memory,
                 .got_load,
                 .direct_load,
+                .imports_load,
                 .eflags,
                 => {
                     assert(abi_size <= 8);
@@ -3650,7 +3660,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
                 => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 ADD/SUB/CMP source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3661,7 +3674,10 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
         .memory => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 ADD/SUB/CMP destination symbol at index", .{});
         },
     }
@@ -3729,7 +3745,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3773,7 +3792,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                 .memory, .stack_offset => {
                     return self.fail("TODO implement x86 multiply source memory", .{});
                 },
-                .got_load, .direct_load => {
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     return self.fail("TODO implement x86 multiply source symbol at index in linker", .{});
                 },
                 .eflags => {
@@ -3784,7 +3806,10 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
         .memory => {
             return self.fail("TODO implement x86 multiply destination memory", .{});
         },
-        .got_load, .direct_load => {
+        .got_load,
+        .direct_load,
+        .imports_load,
+        => {
             return self.fail("TODO implement x86 multiply destination symbol at index in linker", .{});
         },
     }
@@ -3948,6 +3973,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
             .memory => unreachable,
             .got_load => unreachable,
             .direct_load => unreachable,
+            .imports_load => unreachable,
             .eflags => unreachable,
             .register_overflow => unreachable,
         }
@@ -4025,15 +4051,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions.
                     });
                 }
                 const sym_index = try coff_file.getGlobalSymbol(mem.sliceTo(decl_name, 0));
+                try self.genSetReg(Type.initTag(.usize), .rax, .{
+                    .imports_load = sym_index,
+                });
                 _ = try self.addInst(.{
-                    .tag = .call_extern,
-                    .ops = undefined,
-                    .data = .{
-                        .relocation = .{
-                            .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.coff.sym_index,
-                            .sym_index = sym_index,
-                        },
-                    },
+                    .tag = .call,
+                    .ops = Mir.Inst.Ops.encode(.{
+                        .reg1 = .rax,
+                        .flags = 0b01,
+                    }),
+                    .data = undefined,
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -4443,7 +4470,11 @@ fn genVarDbgInfo(
                     leb128.writeILEB128(dbg_info.writer(), -off) catch unreachable;
                     dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2);
                 },
-                .memory, .got_load, .direct_load => {
+                .memory,
+                .got_load,
+                .direct_load,
+                .imports_load,
+                => {
                     const ptr_width = @intCast(u8, @divExact(self.target.cpu.arch.ptrBitWidth(), 8));
                     const is_ptr = switch (tag) {
                         .dbg_var_ptr => true,
@@ -4474,7 +4505,10 @@ fn genVarDbgInfo(
                         try dbg_info.append(DW.OP.deref);
                     }
                     switch (mcv) {
-                        .got_load, .direct_load => |index| try dw.addExprlocReloc(index, offset, is_ptr),
+                        .got_load,
+                        .direct_load,
+                        .imports_load,
+                        => |index| try dw.addExprlocReloc(index, offset, is_ptr),
                         else => {},
                     }
                 },
@@ -5474,6 +5508,7 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
         .memory,
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5721,6 +5756,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             if (abi_size <= 8) {
                 const reg = try self.copyToTmpRegister(ty, mcv);
@@ -5848,6 +5884,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(dst_addr_reg, Type.usize, dst_ptr);
         },
@@ -5883,6 +5920,7 @@ fn genInlineMemcpy(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(src_addr_reg, Type.usize, src_ptr);
         },
@@ -6021,6 +6059,7 @@ fn genInlineMemset(
         .memory,
         .got_load,
         .direct_load,
+        .imports_load,
         => {
             try self.loadMemPtrIntoRegister(addr_reg, Type.usize, dst_ptr);
         },
@@ -6261,6 +6300,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
         },
         .direct_load,
         .got_load,
+        .imports_load,
         => {
             switch (ty.zigTypeTag()) {
                 .Float => {
@@ -6655,7 +6695,11 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     // TODO Is this the only condition for pointer dereference for memcpy?
     const src: MCValue = blk: {
         switch (src_ptr) {
-            .got_load, .direct_load, .memory => {
+            .got_load,
+            .direct_load,
+            .imports_load,
+            .memory,
+            => {
                 const reg = try self.register_manager.allocReg(null, gp);
                 try self.loadMemPtrIntoRegister(reg, src_ty, src_ptr);
                 _ = try self.addInst(.{
src/arch/x86_64/Emit.zig
@@ -985,8 +985,8 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const relocation = emit.mir.instructions.items(.data)[inst].relocation;
 
     switch (ops.flags) {
-        0b00, 0b01 => {},
-        else => return emit.fail("TODO unused LEA PIC variants 0b10 and 0b11", .{}),
+        0b00, 0b01, 0b10 => {},
+        else => return emit.fail("TODO unused LEA PIC variant 0b11", .{}),
     }
 
     // lea reg1, [rip + reloc]
@@ -1024,6 +1024,7 @@ fn mirLeaPic(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
             .@"type" = switch (ops.flags) {
                 0b00 => .got,
                 0b01 => .direct,
+                0b10 => .imports,
                 else => unreachable,
             },
             .target = .{ .sym_index = relocation.sym_index, .file = null },
src/arch/x86_64/Mir.zig
@@ -180,6 +180,7 @@ pub const Inst = struct {
         /// ops flags: form:
         ///      0b00  reg1, [rip + reloc] // via GOT PIC
         ///      0b01  reg1, [rip + reloc] // direct load PIC
+        ///      0b10  reg1, [rip + reloc] // via imports table PIC
         /// Notes:
         /// * `Data` contains `relocation`
         lea_pic,
src/link/Coff.zig
@@ -123,6 +123,7 @@ pub const Reloc = struct {
     @"type": enum {
         got,
         direct,
+        imports,
     },
     target: SymbolWithLoc,
     offset: u32,
@@ -812,18 +813,18 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
                 break :blk got_atom.getSymbol(self).value;
             },
             .direct => blk: {
-                if (self.getImportAtomForSymbol(reloc.target)) |import_atom| {
-                    break :blk import_atom.getSymbol(self).value;
-                }
                 break :blk self.getSymbol(reloc.target).value;
             },
+            .imports => blk: {
+                const import_atom = self.getImportAtomForSymbol(reloc.target) orelse continue;
+                break :blk import_atom.getSymbol(self).value;
+            },
         };
         const target_vaddr_with_addend = target_vaddr + reloc.addend;
-
         if (target_vaddr_with_addend == reloc.prev_vaddr) continue;
 
         log.debug("  ({x}: [() => 0x{x} ({s})) ({s})", .{
-            reloc.offset,
+            source_sym.value + reloc.offset,
             target_vaddr_with_addend,
             self.getSymbolName(reloc.target),
             @tagName(reloc.@"type"),
@@ -833,7 +834,7 @@ fn resolveRelocs(self: *Coff, atom: *Atom) !void {
             const source_vaddr = source_sym.value + reloc.offset;
             const disp = target_vaddr_with_addend - source_vaddr - 4;
             try self.base.file.?.pwriteAll(mem.asBytes(&@intCast(u32, disp)), file_offset + reloc.offset);
-            return;
+            continue;
         }
 
         switch (self.ptr_width) {
@@ -1345,8 +1346,8 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod
             try self.resolveRelocs(atom.*);
         }
     }
-    try self.writeBaseRelocations();
     try self.writeImportTable();
+    try self.writeBaseRelocations();
 
     if (self.getEntryPoint()) |entry_sym_loc| {
         self.entry_addr = self.getSymbol(entry_sym_loc).value;
@@ -1487,14 +1488,80 @@ fn writeBaseRelocations(self: *Coff) !void {
 
 fn writeImportTable(self: *Coff) !void {
     const gpa = self.base.allocator;
-    _ = gpa;
 
     const section = self.sections.get(self.idata_section_index.?);
     const iat_rva = section.header.virtual_address;
     const iat_size = blk: {
         const last_atom = section.last_atom.?;
-        break :blk last_atom.getSymbol(self).value + last_atom.size - iat_rva;
+        break :blk last_atom.getSymbol(self).value + last_atom.size * 2 - iat_rva; // account for sentinel zero pointer
+    };
+
+    const dll_name = "KERNEL32.dll";
+
+    var import_dir_entry = coff.ImportDirectoryEntry{
+        .import_lookup_table_rva = @sizeOf(coff.ImportDirectoryEntry) * 2,
+        .time_date_stamp = 0,
+        .forwarder_chain = 0,
+        .name_rva = 0,
+        .import_address_table_rva = iat_rva,
+    };
+
+    // TODO: we currently assume there's only one (implicit) DLL - ntdll
+    var lookup_table = std.ArrayList(coff.ImportLookupEntry64.ByName).init(gpa);
+    defer lookup_table.deinit();
+
+    var names_table = std.ArrayList(u8).init(gpa);
+    defer names_table.deinit();
+
+    // TODO: check if import is still valid
+    for (self.imports_table.keys()) |target| {
+        const target_name = self.getSymbolName(target);
+        const start = names_table.items.len;
+        mem.writeIntLittle(u16, try names_table.addManyAsArray(2), 0); // TODO: currently, hint is set to 0 as we haven't yet parsed any DLL
+        try names_table.appendSlice(target_name);
+        try names_table.append(0);
+        const end = names_table.items.len;
+        if (!mem.isAlignedGeneric(usize, end - start, @sizeOf(u16))) {
+            try names_table.append(0);
+        }
+        try lookup_table.append(.{ .name_table_rva = @intCast(u31, start) });
+    }
+    try lookup_table.append(.{ .name_table_rva = 0 }); // the sentinel
+
+    const dir_entry_size = @sizeOf(coff.ImportDirectoryEntry) + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName) + names_table.items.len + dll_name.len + 1;
+    const needed_size = iat_size + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry);
+    const sect_capacity = self.allocatedSize(section.header.pointer_to_raw_data);
+    assert(needed_size < sect_capacity); // TODO: implement expanding .idata section
+
+    // Fixup offsets
+    const base_rva = iat_rva + iat_size;
+    import_dir_entry.import_lookup_table_rva += base_rva;
+    import_dir_entry.name_rva = @intCast(u32, base_rva + dir_entry_size + @sizeOf(coff.ImportDirectoryEntry) - dll_name.len - 1);
+
+    for (lookup_table.items[0 .. lookup_table.items.len - 1]) |*lk| {
+        lk.name_table_rva += @intCast(u31, base_rva + @sizeOf(coff.ImportDirectoryEntry) * 2 + lookup_table.items.len * @sizeOf(coff.ImportLookupEntry64.ByName));
+    }
+
+    var buffer = std.ArrayList(u8).init(gpa);
+    defer buffer.deinit();
+    try buffer.ensureTotalCapacity(dir_entry_size + @sizeOf(coff.ImportDirectoryEntry));
+    buffer.appendSliceAssumeCapacity(mem.asBytes(&import_dir_entry));
+    buffer.appendNTimesAssumeCapacity(0, @sizeOf(coff.ImportDirectoryEntry)); // the sentinel; TODO: I think doing all of the above on bytes directly might be cleaner
+    buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(lookup_table.items));
+    buffer.appendSliceAssumeCapacity(names_table.items);
+    buffer.appendSliceAssumeCapacity(dll_name);
+    buffer.appendAssumeCapacity(0);
+
+    try self.base.file.?.pwriteAll(buffer.items, section.header.pointer_to_raw_data + iat_size);
+    // Override the IAT atoms
+    // TODO: we should rewrite only dirtied atoms, but that's for way later
+    try self.base.file.?.pwriteAll(mem.sliceAsBytes(lookup_table.items), section.header.pointer_to_raw_data);
+
+    self.data_directories[@enumToInt(coff.DirectoryEntry.IMPORT)] = .{
+        .virtual_address = iat_rva + iat_size,
+        .size = @intCast(u32, @sizeOf(coff.ImportDirectoryEntry) * 2),
     };
+
     self.data_directories[@enumToInt(coff.DirectoryEntry.IAT)] = .{
         .virtual_address = iat_rva,
         .size = iat_size,