Commit 1ec620be62

Jakub Konka <kubkon@jakubkonka.com>
2021-03-13 13:16:34
zld: fix GOT loads and indirection on x86_64
1 parent ac0c669
Changed files (3)
lib
src
lib/std/macho.zig
@@ -1616,7 +1616,7 @@ pub const GenericBlob = extern struct {
     length: u32,
 };
 
-/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command 
+/// The LC_DATA_IN_CODE load commands uses a linkedit_data_command
 /// to point to an array of data_in_code_entry entries. Each entry
 /// describes a range of data in a code section.
 pub const data_in_code_entry = extern struct {
src/link/MachO/Archive.zig
@@ -209,7 +209,7 @@ fn readObject(self: *Archive, arch: std.Target.Cpu.Arch, ar_name: []const u8, re
 
     try object.readLoadCommands(reader, .{ .offset = offset });
 
-    if (object.symtab_cmd.index != null) {
+    if (object.symtab_cmd_index != null) {
         try object.readSymtab();
         try object.readStrtab();
     }
@@ -245,8 +245,11 @@ fn getName(allocator: *Allocator, header: ar_hdr, reader: anytype) ![]u8 {
             name = try allocator.dupe(u8, n);
         },
         .Length => |len| {
-            name = try allocator.alloc(u8, len);
-            try reader.readNoEof(name);
+            var n = try allocator.alloc(u8, len);
+            defer allocator.free(n);
+            try reader.readNoEof(n);
+            const actual_len = mem.indexOfScalar(u8, n, @as(u8, 0));
+            name = try allocator.dupe(u8, n[0..actual_len.?]);
         },
     }
     return name;
src/link/MachO/Zld.zig
@@ -77,6 +77,7 @@ lazy_imports: std.StringArrayHashMapUnmanaged(Import) = .{},
 tlv_bootstrap: ?Import = null,
 threadlocal_offsets: std.ArrayListUnmanaged(u64) = .{},
 local_rebases: std.ArrayListUnmanaged(Pointer) = .{},
+nonlazy_pointers: std.StringArrayHashMapUnmanaged(GotEntry) = .{},
 
 strtab: std.ArrayListUnmanaged(u8) = .{},
 
@@ -85,6 +86,16 @@ stub_helper_stubs_start_off: ?u64 = null,
 mappings: std.AutoHashMapUnmanaged(MappingKey, SectionMapping) = .{},
 unhandled_sections: std.AutoHashMapUnmanaged(MappingKey, u0) = .{},
 
+// TODO this will require scanning the relocations at least one to work out
+// the exact amount of local GOT indirections. For the time being, set some
+// default value.
+const max_local_got_indirections: u16 = 1000;
+
+const GotEntry = struct {
+    index: u32,
+    target_addr: u64,
+};
+
 const MappingKey = struct {
     object_id: u16,
     source_sect_id: u16,
@@ -214,6 +225,10 @@ pub fn deinit(self: *Zld) void {
         self.allocator.free(entry.key);
     }
     self.nonlazy_imports.deinit(self.allocator);
+    for (self.nonlazy_pointers.items()) |*entry| {
+        self.allocator.free(entry.key);
+    }
+    self.nonlazy_pointers.deinit(self.allocator);
     for (self.exports.items()) |*entry| {
         self.allocator.free(entry.key);
     }
@@ -874,7 +889,10 @@ fn allocateDataConstSegment(self: *Zld) !void {
 
     // Set got size
     const got = &seg.sections.items[self.got_section_index.?];
-    got.size += nonlazy * @sizeOf(u64);
+    // TODO this will require scanning the relocations at least one to work out
+    // the exact amount of local GOT indirections. For the time being, set some
+    // default value.
+    got.size += (max_local_got_indirections + nonlazy) * @sizeOf(u64);
 
     try self.allocateSegment(self.data_const_segment_cmd_index.?, 0);
 }
@@ -1358,13 +1376,65 @@ fn doRelocs(self: *Zld) !void {
                         const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
 
                         switch (rel_type) {
-                            .X86_64_RELOC_BRANCH,
-                            .X86_64_RELOC_GOT_LOAD,
-                            .X86_64_RELOC_GOT,
-                            => {
+                            .X86_64_RELOC_BRANCH => {
+                                assert(rel.r_length == 2);
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT_LOAD => {
                                 assert(rel.r_length == 2);
                                 const inst = code[off..][0..4];
                                 const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, target_addr) - @intCast(i64, this_addr) - 4));
+
+                                blk: {
+                                    const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                    const got = data_const_seg.sections.items[self.got_section_index.?];
+                                    if (got.addr <= target_addr and target_addr < got.addr + got.size) break :blk;
+                                    log.debug("    | rewriting to leaq", .{});
+                                    code[off - 2] = 0x8d;
+                                }
+
+                                mem.writeIntLittle(u32, inst, displacement);
+                            },
+                            .X86_64_RELOC_GOT => {
+                                assert(rel.r_length == 2);
+                                // TODO Instead of referring to the target symbol directly, we refer to it
+                                // indirectly via GOT. Getting actual target address should be done in the
+                                // helper relocTargetAddr function rather than here.
+                                const sym = object.symtab.items[rel.r_symbolnum];
+                                const sym_name = try self.allocator.dupe(u8, object.getString(sym.n_strx));
+                                const res = try self.nonlazy_pointers.getOrPut(self.allocator, sym_name);
+                                defer if (res.found_existing) self.allocator.free(sym_name);
+
+                                const data_const_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+                                const got = data_const_seg.sections.items[self.got_section_index.?];
+
+                                if (!res.found_existing) {
+                                    const index = @intCast(u32, self.nonlazy_pointers.items().len) - 1;
+                                    assert(index < max_local_got_indirections); // TODO This is just a temp solution.
+                                    res.entry.value = .{
+                                        .index = index,
+                                        .target_addr = target_addr,
+                                    };
+                                    var buf: [@sizeOf(u64)]u8 = undefined;
+                                    mem.writeIntLittle(u64, &buf, target_addr);
+                                    const got_offset = got.offset + (index + self.nonlazy_imports.items().len) * @sizeOf(u64);
+
+                                    log.debug("    | GOT off 0x{x}", .{got.offset});
+                                    log.debug("    | writing GOT entry 0x{x} at 0x{x}", .{ target_addr, got_offset });
+
+                                    try self.file.?.pwriteAll(&buf, got_offset);
+                                }
+
+                                const index = res.entry.value.index + self.nonlazy_imports.items().len;
+                                const actual_target_addr = got.addr + index * @sizeOf(u64);
+
+                                log.debug("    | GOT addr 0x{x}", .{got.addr});
+                                log.debug("    | actual target address in GOT 0x{x}", .{actual_target_addr});
+
+                                const inst = code[off..][0..4];
+                                const displacement = @bitCast(u32, @intCast(i32, @intCast(i64, actual_target_addr) - @intCast(i64, this_addr) - 4));
                                 mem.writeIntLittle(u32, inst, displacement);
                             },
                             .X86_64_RELOC_TLV => {
@@ -2384,6 +2454,23 @@ fn writeRebaseInfoTable(self: *Zld) !void {
     try pointers.ensureCapacity(pointers.items.len + self.local_rebases.items.len);
     pointers.appendSliceAssumeCapacity(self.local_rebases.items);
 
+    if (self.got_section_index) |idx| {
+        // TODO this should be cleaned up!
+        try pointers.ensureCapacity(pointers.items.len + self.nonlazy_pointers.items().len);
+        const seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment;
+        const sect = seg.sections.items[idx];
+        const base_offset = sect.addr - seg.inner.vmaddr;
+        const segment_id = @intCast(u16, self.data_const_segment_cmd_index.?);
+        const index_offset = @intCast(u32, self.nonlazy_imports.items().len);
+        for (self.nonlazy_pointers.items()) |entry| {
+            const index = index_offset + entry.value.index;
+            pointers.appendAssumeCapacity(.{
+                .offset = base_offset + index * @sizeOf(u64),
+                .segment_id = segment_id,
+            });
+        }
+    }
+
     if (self.la_symbol_ptr_section_index) |idx| {
         try pointers.ensureCapacity(pointers.items.len + self.lazy_imports.items().len);
         const seg = self.load_commands.items[self.data_segment_cmd_index.?].Segment;
@@ -2851,8 +2938,9 @@ fn writeDynamicSymbolTable(self: *Zld) !void {
 
     const lazy = self.lazy_imports.items();
     const nonlazy = self.nonlazy_imports.items();
+    const got_locals = self.nonlazy_pointers.items();
     dysymtab.indirectsymoff = @intCast(u32, seg.inner.fileoff + seg.inner.filesize);
-    dysymtab.nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len);
+    dysymtab.nindirectsyms = @intCast(u32, lazy.len * 2 + nonlazy.len + got_locals.len);
     const needed_size = dysymtab.nindirectsyms * @sizeOf(u32);
     seg.inner.filesize += needed_size;
 
@@ -2867,20 +2955,24 @@ fn writeDynamicSymbolTable(self: *Zld) !void {
     var writer = stream.writer();
 
     stubs.reserved1 = 0;
-    for (self.lazy_imports.items()) |_, i| {
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }
 
     const base_id = @intCast(u32, lazy.len);
     got.reserved1 = base_id;
-    for (self.nonlazy_imports.items()) |_, i| {
+    for (nonlazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id);
         try writer.writeIntLittle(u32, symtab_idx);
     }
+    // TODO there should be one common set of GOT entries.
+    for (got_locals) |_| {
+        try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL);
+    }
 
-    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len);
-    for (self.lazy_imports.items()) |_, i| {
+    la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, nonlazy.len) + @intCast(u32, got_locals.len);
+    for (lazy) |_, i| {
         const symtab_idx = @intCast(u32, dysymtab.iundefsym + i);
         try writer.writeIntLittle(u32, symtab_idx);
     }