Commit 066758b1a2

Jakub Konka <kubkon@jakubkonka.com>
2022-02-11 12:16:32
macho: correctly lower slices incl reloc and rebase tracking
Match changes required to `Elf` linker, which enable lowering of const slices on `MachO` targets. Expand `Mir` instructions requiring the knowledge of the containing atom - pass the symbol index into the linker's table from codegen via mir to emitter, to then utilise it in the linker.
1 parent b9b1ab0
src/arch/aarch64/CodeGen.zig
@@ -1617,7 +1617,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
 
                 _ = try self.addInst(.{
                     .tag = .call_extern,
-                    .data = .{ .extern_fn = n_strx },
+                    .data = .{
+                        .extern_fn = .{
+                            .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                            .sym_name = n_strx,
+                        },
+                    },
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -2485,9 +2490,18 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
             });
         },
         .memory => |addr| {
+            const owner_decl = self.mod_fn.owner_decl;
+            // TODO when refactoring LinkBlock, make this into a generic function.
+            const atom_index = switch (self.bin_file.tag) {
+                .macho => owner_decl.link.macho.local_sym_index,
+                .elf => owner_decl.link.elf.local_sym_index,
+                .plan9 => @intCast(u32, owner_decl.link.plan9.sym_index orelse 0),
+                else => return self.fail("TODO handle aarch64 load memory in {}", .{self.bin_file.tag}),
+            };
             _ = try self.addInst(.{
                 .tag = .load_memory,
                 .data = .{ .payload = try self.addExtra(Mir.LoadMemory{
+                    .atom_index = atom_index,
                     .register = @enumToInt(reg),
                     .addr = @intCast(u32, addr),
                 }) },
src/arch/aarch64/Emit.zig
@@ -537,7 +537,7 @@ fn mirDebugEpilogueBegin(self: *Emit) !void {
 
 fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
     assert(emit.mir.instructions.items(.tag)[inst] == .call_extern);
-    const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn;
+    const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn;
 
     if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
         const offset = blk: {
@@ -547,9 +547,10 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
             break :blk offset;
         };
         // Add relocation to the decl.
-        try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{
+        const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
+        try atom.relocs.append(emit.bin_file.allocator, .{
             .offset = offset,
-            .target = .{ .global = n_strx },
+            .target = .{ .global = extern_fn.sym_name },
             .addend = 0,
             .subtractor = null,
             .pcrel = true,
@@ -613,10 +614,9 @@ fn mirLoadMemory(emit: *Emit, inst: Mir.Inst.Index) !void {
         ));
 
         if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
-            // TODO I think the reloc might be in the wrong place.
-            const decl = macho_file.active_decl.?;
+            const atom = macho_file.atom_by_index_table.get(load_memory.atom_index).?;
             // Page reloc for adrp instruction.
-            try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
+            try atom.relocs.append(emit.bin_file.allocator, .{
                 .offset = offset,
                 .target = .{ .local = addr },
                 .addend = 0,
@@ -626,7 +626,7 @@ fn mirLoadMemory(emit: *Emit, inst: Mir.Inst.Index) !void {
                 .@"type" = @enumToInt(std.macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21),
             });
             // Pageoff reloc for adrp instruction.
-            try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
+            try atom.relocs.append(emit.bin_file.allocator, .{
                 .offset = offset + 4,
                 .target = .{ .local = addr },
                 .addend = 0,
src/arch/aarch64/Mir.zig
@@ -134,7 +134,12 @@ pub const Inst = struct {
         /// An extern function
         ///
         /// Used by e.g. call_extern
-        extern_fn: u32,
+        extern_fn: struct {
+            /// Index of the containing atom.
+            atom_index: u32,
+            /// Index into the linker's string table.
+            sym_name: u32,
+        },
         /// A 16-bit immediate value.
         ///
         /// Used by e.g. svc
@@ -278,6 +283,7 @@ pub fn extraData(mir: Mir, comptime T: type, index: usize) struct { data: T, end
 }
 
 pub const LoadMemory = struct {
+    atom_index: u32,
     register: u32,
     addr: u32,
 };
src/arch/x86_64/CodeGen.zig
@@ -1897,7 +1897,12 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type
                                 .reg1 = addr_reg.to64(),
                                 .flags = flags,
                             }).encode(),
-                            .data = .{ .linker_sym_index = sym_index },
+                            .data = .{
+                                .load_reloc = .{
+                                    .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                                    .sym_index = sym_index,
+                                },
+                            },
                         });
                         break :blk addr_reg;
                     },
@@ -2670,7 +2675,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index) !void {
                 _ = try self.addInst(.{
                     .tag = .call_extern,
                     .ops = undefined,
-                    .data = .{ .extern_fn = n_strx },
+                    .data = .{
+                        .extern_fn = .{
+                            .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                            .sym_name = n_strx,
+                        },
+                    },
                 });
             } else {
                 return self.fail("TODO implement calling bitcasted functions", .{});
@@ -3550,7 +3560,12 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
                                 .reg1 = addr_reg.to64(),
                                 .flags = flags,
                             }).encode(),
-                            .data = .{ .linker_sym_index = sym_index },
+                            .data = .{
+                                .load_reloc = .{
+                                    .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                                    .sym_index = sym_index,
+                                },
+                            },
                         });
                         break :blk addr_reg;
                     },
@@ -3767,6 +3782,30 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerErro
                         const reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = addr });
                         break :blk reg;
                     },
+                    .direct_load,
+                    .got_load,
+                    => |sym_index| {
+                        const flags: u2 = switch (mcv) {
+                            .got_load => 0b00,
+                            .direct_load => 0b01,
+                            else => unreachable,
+                        };
+                        const addr_reg = try self.register_manager.allocReg(null);
+                        _ = try self.addInst(.{
+                            .tag = .lea_pie,
+                            .ops = (Mir.Ops{
+                                .reg1 = addr_reg.to64(),
+                                .flags = flags,
+                            }).encode(),
+                            .data = .{
+                                .load_reloc = .{
+                                    .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                                    .sym_index = sym_index,
+                                },
+                            },
+                        });
+                        break :blk addr_reg;
+                    },
                     else => {
                         return self.fail("TODO implement memcpy for setting stack from {}", .{mcv});
                     },
@@ -4202,7 +4241,12 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
                     .reg1 = reg,
                     .flags = flags,
                 }).encode(),
-                .data = .{ .linker_sym_index = sym_index },
+                .data = .{
+                    .load_reloc = .{
+                        .atom_index = self.mod_fn.owner_decl.link.macho.local_sym_index,
+                        .sym_index = sym_index,
+                    },
+                },
             });
             // MOV reg, [reg]
             _ = try self.addInst(.{
src/arch/x86_64/Emit.zig
@@ -763,6 +763,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const tag = emit.mir.instructions.items(.tag)[inst];
     assert(tag == .lea_pie);
     const ops = Mir.Ops.decode(emit.mir.instructions.items(.ops)[inst]);
+    const load_reloc = emit.mir.instructions.items(.data)[inst].load_reloc;
 
     // lea reg1, [rip + reloc]
     // RM
@@ -772,18 +773,19 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
         RegisterOrMemory.rip(Memory.PtrSize.fromBits(ops.reg1.size()), 0),
         emit.code,
     );
+
     const end_offset = emit.code.items.len;
-    const sym_index = emit.mir.instructions.items(.data)[inst].linker_sym_index;
+
     if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
         const reloc_type = switch (ops.flags) {
             0b00 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_GOT),
             0b01 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_SIGNED),
             else => return emit.fail("TODO unused LEA PIE variants 0b10 and 0b11", .{}),
         };
-        const decl = macho_file.active_decl.?;
-        try decl.link.macho.relocs.append(emit.bin_file.allocator, .{
+        const atom = macho_file.atom_by_index_table.get(load_reloc.atom_index).?;
+        try atom.relocs.append(emit.bin_file.allocator, .{
             .offset = @intCast(u32, end_offset - 4),
-            .target = .{ .local = sym_index },
+            .target = .{ .local = load_reloc.sym_index },
             .addend = 0,
             .subtractor = null,
             .pcrel = true,
@@ -801,17 +803,20 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
 fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void {
     const tag = emit.mir.instructions.items(.tag)[inst];
     assert(tag == .call_extern);
-    const n_strx = emit.mir.instructions.items(.data)[inst].extern_fn;
+    const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn;
+
     const offset = blk: {
         // callq
         try lowerToDEnc(.call_near, 0, emit.code);
         break :blk @intCast(u32, emit.code.items.len) - 4;
     };
+
     if (emit.bin_file.cast(link.File.MachO)) |macho_file| {
         // Add relocation to the decl.
-        try macho_file.active_decl.?.link.macho.relocs.append(emit.bin_file.allocator, .{
+        const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?;
+        try atom.relocs.append(emit.bin_file.allocator, .{
             .offset = offset,
-            .target = .{ .global = n_strx },
+            .target = .{ .global = extern_fn.sym_name },
             .addend = 0,
             .subtractor = null,
             .pcrel = true,
src/arch/x86_64/Mir.zig
@@ -185,7 +185,7 @@ pub const Inst = struct {
         ///      0b00  reg1, [rip + reloc] // via GOT emits X86_64_RELOC_GOT relocation
         ///      0b01  reg1, [rip + reloc] // direct load emits X86_64_RELOC_SIGNED relocation
         /// Notes:
-        /// * `Data` contains `linker_sym_index` 
+        /// * `Data` contains `load_reloc`
         lea_pie,
 
         /// ops flags: form:
@@ -350,10 +350,19 @@ pub const Inst = struct {
         /// A 32-bit immediate value.
         imm: u32,
         /// An extern function.
-        /// Index into the linker's string table.
-        extern_fn: u32,
-        /// Entry in the linker's symbol table.
-        linker_sym_index: u32,
+        extern_fn: struct {
+            /// Index of the containing atom.
+            atom_index: u32,
+            /// Index into the linker's string table.
+            sym_name: u32,
+        },
+        /// PIE load relocation.
+        load_reloc: struct {
+            /// Index of the containing atom.
+            atom_index: u32,
+            /// Index into the linker's symbol table.
+            sym_index: u32,
+        },
         /// Index into `extra`. Meaning of what can be found there is context-dependent.
         payload: u32,
     };
@@ -362,7 +371,7 @@ pub const Inst = struct {
     // Note that in Debug builds, Zig is allowed to insert a secret field for safety checks.
     comptime {
         if (builtin.mode != .Debug) {
-            assert(@sizeOf(Inst) == 8);
+            assert(@sizeOf(Data) == 8);
         }
     }
 };
src/arch/x86_64/PrintMir.zig
@@ -450,6 +450,7 @@ fn mirLea(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
 
 fn mirLeaPie(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
     const ops = Mir.Ops.decode(print.mir.instructions.items(.ops)[inst]);
+    const load_reloc = print.mir.instructions.items(.data)[inst].load_reloc;
     try w.print("lea {s}, ", .{@tagName(ops.reg1)});
     switch (ops.reg1.size()) {
         8 => try w.print("byte ptr ", .{}),
@@ -459,9 +460,8 @@ fn mirLeaPie(print: *const Print, inst: Mir.Inst.Index, w: anytype) !void {
         else => unreachable,
     }
     try w.print("[rip + 0x0] ", .{});
-    const sym_index = print.mir.instructions.items(.data)[inst].linker_sym_index;
     if (print.bin_file.cast(link.File.MachO)) |macho_file| {
-        const target = macho_file.locals.items[sym_index];
+        const target = macho_file.locals.items[load_reloc.sym_index];
         const target_name = macho_file.getString(target.n_strx);
         try w.print("target@{s}", .{target_name});
     } else {
src/link/MachO.zig
@@ -40,6 +40,7 @@ const StringIndexContext = std.hash_map.StringIndexContext;
 const Trie = @import("MachO/Trie.zig");
 const Type = @import("../type.zig").Type;
 const TypedValue = @import("../TypedValue.zig");
+const Value = @import("../value.zig").Value;
 
 pub const TextBlock = Atom;
 
@@ -220,6 +221,7 @@ atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{},
 /// at present owned by Module.Decl.
 /// TODO consolidate this.
 managed_atoms: std.ArrayListUnmanaged(*Atom) = .{},
+atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{},
 
 /// Table of unnamed constants associated with a parent `Decl`.
 /// We store them here so that we can free the constants whenever the `Decl`
@@ -248,12 +250,6 @@ unnamed_const_atoms: UnnamedConstTable = .{},
 /// TODO consolidate this.
 decls: std.AutoArrayHashMapUnmanaged(*Module.Decl, ?MatchingSection) = .{},
 
-/// Currently active Module.Decl.
-/// TODO this might not be necessary if we figure out how to pass Module.Decl instance
-/// to codegen.genSetReg() or alternatively move PIE displacement for MCValue{ .memory = x }
-/// somewhere else in the codegen.
-active_decl: ?*Module.Decl = null,
-
 const Entry = struct {
     target: Atom.Relocation.Target,
     atom: *Atom,
@@ -3441,6 +3437,8 @@ pub fn deinit(self: *MachO) void {
         }
         self.unnamed_const_atoms.deinit(self.base.allocator);
     }
+
+    self.atom_by_index_table.deinit(self.base.allocator);
 }
 
 pub fn closeFiles(self: MachO) void {
@@ -3647,6 +3645,7 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void {
     if (decl.link.macho.local_sym_index != 0) return;
 
     decl.link.macho.local_sym_index = try self.allocateLocalSymbol();
+    try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.local_sym_index, &decl.link.macho);
     try self.decls.putNoClobber(self.base.allocator, decl, null);
 
     const got_target = .{ .local = decl.link.macho.local_sym_index };
@@ -3693,8 +3692,6 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv
         }
     }
 
-    self.active_decl = decl;
-
     const res = if (debug_buffers) |dbg|
         try codegen.generateFunction(&self.base, decl.srcLoc(), func, air, liveness, &code_buffer, .{
             .dwarf = .{
@@ -3756,14 +3753,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl: *Module.De
     log.debug("allocating symbol indexes for {s}", .{name});
 
     const required_alignment = typed_value.ty.abiAlignment(self.base.options.target);
-    const match = (try self.getMatchingSection(.{
-        .segname = makeStaticString("__TEXT"),
-        .sectname = makeStaticString("__const"),
-        .size = @sizeOf(u64),
-        .@"align" = math.log2(required_alignment),
-    })).?;
     const local_sym_index = try self.allocateLocalSymbol();
     const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), math.log2(required_alignment));
+    try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom);
 
     const res = try codegen.generateSymbol(&self.base, local_sym_index, decl.srcLoc(), typed_value, &code_buffer, .{
         .none = .{},
@@ -3780,6 +3772,8 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl: *Module.De
 
     atom.code.clearRetainingCapacity();
     try atom.code.appendSlice(self.base.allocator, code);
+
+    const match = try self.getMatchingSectionAtom(atom, typed_value.ty, typed_value.val);
     const addr = try self.allocateAtom(atom, code.len, required_alignment, match);
 
     log.debug("allocated atom for {s} at 0x{x}", .{ name, addr });
@@ -3839,11 +3833,9 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
         }
     }
 
-    self.active_decl = decl;
-
     const decl_val = if (decl.val.castTag(.variable)) |payload| payload.data.init else decl.val;
     const res = if (debug_buffers) |dbg|
-        try codegen.generateSymbol(&self.base, decl.link.elf.local_sym_index, decl.srcLoc(), .{
+        try codegen.generateSymbol(&self.base, decl.link.macho.local_sym_index, decl.srcLoc(), .{
             .ty = decl.ty,
             .val = decl_val,
         }, &code_buffer, .{
@@ -3854,7 +3846,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void {
             },
         })
     else
-        try codegen.generateSymbol(&self.base, decl.link.elf.local_sym_index, decl.srcLoc(), .{
+        try codegen.generateSymbol(&self.base, decl.link.macho.local_sym_index, decl.srcLoc(), .{
             .ty = decl.ty,
             .val = decl_val,
         }, &code_buffer, .none);
@@ -3908,13 +3900,11 @@ fn isElemTyPointer(ty: Type) bool {
     }
 }
 
-fn getMatchingSectionDecl(self: *MachO, decl: *Module.Decl) !MatchingSection {
-    const code = decl.link.macho.code.items;
-    const alignment = decl.ty.abiAlignment(self.base.options.target);
+fn getMatchingSectionAtom(self: *MachO, atom: *Atom, ty: Type, val: Value) !MatchingSection {
+    const code = atom.code.items;
+    const alignment = ty.abiAlignment(self.base.options.target);
     const align_log_2 = math.log2(alignment);
-    const ty = decl.ty;
     const zig_ty = ty.zigTypeTag();
-    const val = decl.val;
     const mode = self.base.options.optimize_mode;
     const match: MatchingSection = blk: {
         // TODO finish and audit this function
@@ -4023,9 +4013,11 @@ fn getMatchingSectionDecl(self: *MachO, decl: *Module.Decl) !MatchingSection {
             },
         }
     };
+    const local = self.locals.items[atom.local_sym_index];
     const seg = self.load_commands.items[match.seg].segment;
     const sect = seg.sections.items[match.sect];
-    log.debug("  allocating atom in '{s},{s}' ({d},{d})", .{
+    log.debug("  allocating atom '{s}' in '{s},{s}' ({d},{d})", .{
+        self.getString(local.n_strx),
         sect.segName(),
         sect.sectName(),
         match.seg,
@@ -4041,7 +4033,7 @@ fn placeDecl(self: *MachO, decl: *Module.Decl, code_len: usize) !*macho.nlist_64
 
     const decl_ptr = self.decls.getPtr(decl).?;
     if (decl_ptr.* == null) {
-        decl_ptr.* = try self.getMatchingSectionDecl(decl);
+        decl_ptr.* = try self.getMatchingSectionAtom(&decl.link.macho, decl.ty, decl.val);
     }
     const match = decl_ptr.*.?;
 
@@ -4290,6 +4282,8 @@ fn freeUnnamedConsts(self: *MachO, decl: *Module.Decl) void {
         }, true);
         self.locals_free_list.append(self.base.allocator, atom.local_sym_index) catch {};
         self.locals.items[atom.local_sym_index].n_type = 0;
+        _ = self.atom_by_index_table.remove(atom.local_sym_index);
+        atom.local_sym_index = 0;
     }
     unnamed_consts.clearAndFree(self.base.allocator);
 }
@@ -4316,6 +4310,7 @@ pub fn freeDecl(self: *MachO, decl: *Module.Decl) void {
         }
 
         self.locals.items[decl.link.macho.local_sym_index].n_type = 0;
+        _ = self.atom_by_index_table.remove(decl.link.macho.local_sym_index);
         decl.link.macho.local_sym_index = 0;
     }
     if (self.d_sym) |*ds| {
@@ -4347,13 +4342,7 @@ pub fn getDeclVAddr(self: *MachO, decl: *const Module.Decl, parent_atom_index: u
     assert(self.llvm_object == null);
     assert(decl.link.macho.local_sym_index != 0);
 
-    // TODO cache local_sym_index => atom!!!
-    const atom: *Atom = blk: for (self.managed_atoms.items) |atom| {
-        if (atom.local_sym_index == parent_atom_index) {
-            break :blk atom;
-        }
-    } else unreachable;
-
+    const atom = self.atom_by_index_table.get(parent_atom_index).?;
     try atom.relocs.append(self.base.allocator, .{
         .offset = @intCast(u32, offset),
         .target = .{ .local = decl.link.macho.local_sym_index },
src/link/Plan9.zig
@@ -302,7 +302,9 @@ pub fn updateDecl(self: *Plan9, module: *Module, decl: *Module.Decl) !void {
     var code_buffer = std.ArrayList(u8).init(self.base.allocator);
     defer code_buffer.deinit();
     const decl_val = if (decl.val.castTag(.variable)) |payload| payload.data.init else decl.val;
-    const res = try codegen.generateSymbol(&self.base, @intCast(u32, decl.link.plan9.sym_index.?), decl.srcLoc(), .{
+    // TODO we need the symbol index for symbol in the table of locals for the containing atom
+    const sym_index = decl.link.plan9.sym_index orelse 0;
+    const res = try codegen.generateSymbol(&self.base, @intCast(u32, sym_index), decl.srcLoc(), .{
         .ty = decl.ty,
         .val = decl_val,
     }, &code_buffer, .{ .none = .{} });