Commit fa40267b04

Jakub Konka <kubkon@jakubkonka.com>
2023-04-19 16:38:44
macho: do not allocate atoms for stub entries
1 parent c55e821
src/link/MachO/Atom.zig
@@ -158,21 +158,6 @@ pub fn addBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void
     try gop.value_ptr.append(gpa, binding);
 }
 
-pub fn addLazyBinding(macho_file: *MachO, atom_index: Index, binding: Binding) !void {
-    const gpa = macho_file.base.allocator;
-    const atom = macho_file.getAtom(atom_index);
-    log.debug("  (adding lazy binding to symbol {s} at offset 0x{x} in %{?d})", .{
-        macho_file.getSymbolName(binding.target),
-        binding.offset,
-        atom.getSymbolIndex(),
-    });
-    const gop = try macho_file.lazy_bindings.getOrPut(gpa, atom_index);
-    if (!gop.found_existing) {
-        gop.value_ptr.* = .{};
-    }
-    try gop.value_ptr.append(gpa, binding);
-}
-
 pub fn resolveRelocations(
     macho_file: *MachO,
     atom_index: Index,
@@ -193,6 +178,4 @@ pub fn freeRelocations(macho_file: *MachO, atom_index: Index) void {
     if (removed_rebases) |*rebases| rebases.value.deinit(gpa);
     var removed_bindings = macho_file.bindings.fetchOrderedRemove(atom_index);
     if (removed_bindings) |*bindings| bindings.value.deinit(gpa);
-    var removed_lazy_bindings = macho_file.lazy_bindings.fetchOrderedRemove(atom_index);
-    if (removed_lazy_bindings) |*lazy_bindings| lazy_bindings.value.deinit(gpa);
 }
src/link/MachO/eh_frame.zig
@@ -9,6 +9,7 @@ const log = std.log.scoped(.eh_frame);
 const Allocator = mem.Allocator;
 const AtomIndex = @import("zld.zig").AtomIndex;
 const Atom = @import("ZldAtom.zig");
+const Relocation = @import("Relocation.zig");
 const SymbolWithLoc = @import("zld.zig").SymbolWithLoc;
 const UnwindInfo = @import("UnwindInfo.zig");
 const Zld = @import("zld.zig").Zld;
@@ -368,7 +369,7 @@ pub fn EhFrameRecord(comptime is_mutable: bool) type {
                                 const target_addr = try Atom.getRelocTargetAddress(zld, target, true, false);
                                 const addend = mem.readIntLittle(i32, rec.data[rel_offset..][0..4]);
                                 const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
-                                const disp = try Atom.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
+                                const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
                                 mem.writeIntLittle(i32, rec.data[rel_offset..][0..4], disp);
                             },
                             else => unreachable,
src/link/MachO/Relocation.zig
@@ -59,10 +59,12 @@ pub fn getTargetBaseAddress(self: Relocation, macho_file: *MachO) ?u64 {
             return header.addr + got_index * @sizeOf(u64);
         },
         .branch => {
-            const atom_index = blk: {
-                if (macho_file.stubs_table.getAtomIndex(macho_file, self.target)) |index| break :blk index;
-                break :blk macho_file.getAtomIndexForSymbol(self.target) orelse return null;
-            };
+            if (macho_file.stub_table.lookup.get(self.target)) |index| {
+                const header = macho_file.sections.items(.header)[macho_file.stubs_section_index.?];
+                return header.addr +
+                    index * @import("stubs.zig").calcStubEntrySize(macho_file.base.options.target.cpu.arch);
+            }
+            const atom_index = macho_file.getAtomIndexForSymbol(self.target) orelse return null;
             const atom = macho_file.getAtom(atom_index);
             return atom.getSymbol(macho_file).n_value;
         },
@@ -196,11 +198,48 @@ fn resolveX8664(self: Relocation, source_addr: u64, target_addr: i64, code: []u8
     }
 }
 
-inline fn isArithmeticOp(inst: *const [4]u8) bool {
+pub inline fn isArithmeticOp(inst: *const [4]u8) bool {
     const group_decode = @truncate(u5, inst[3]);
     return ((group_decode >> 2) == 4);
 }
 
+pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 {
+    const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr + 4 + correction);
+    return math.cast(i32, disp) orelse error.Overflow;
+}
+
+pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 {
+    const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr);
+    return math.cast(i28, disp) orelse error.Overflow;
+}
+
+pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 {
+    const source_page = @intCast(i32, source_addr >> 12);
+    const target_page = @intCast(i32, target_addr >> 12);
+    const pages = @intCast(i21, target_page - source_page);
+    return pages;
+}
+
+pub const PageOffsetInstKind = enum {
+    arithmetic,
+    load_store_8,
+    load_store_16,
+    load_store_32,
+    load_store_64,
+    load_store_128,
+};
+
+pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 {
+    const narrowed = @truncate(u12, target_addr);
+    return switch (kind) {
+        .arithmetic, .load_store_8 => narrowed,
+        .load_store_16 => try math.divExact(u12, narrowed, 2),
+        .load_store_32 => try math.divExact(u12, narrowed, 4),
+        .load_store_64 => try math.divExact(u12, narrowed, 8),
+        .load_store_128 => try math.divExact(u12, narrowed, 16),
+    };
+}
+
 const Relocation = @This();
 
 const std = @import("std");
src/link/MachO/stubs.zig
@@ -0,0 +1,161 @@
+const std = @import("std");
+const aarch64 = @import("../../arch/aarch64/bits.zig");
+
+const Relocation = @import("Relocation.zig");
+
+pub inline fn calcStubHelperPreambleSize(cpu_arch: std.Target.Cpu.Arch) u5 {
+    return switch (cpu_arch) {
+        .x86_64 => 15,
+        .aarch64 => 6 * @sizeOf(u32),
+        else => unreachable, // unhandled architecture type
+    };
+}
+
+pub inline fn calcStubHelperEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 {
+    return switch (cpu_arch) {
+        .x86_64 => 10,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable, // unhandled architecture type
+    };
+}
+
+pub inline fn calcStubEntrySize(cpu_arch: std.Target.Cpu.Arch) u4 {
+    return switch (cpu_arch) {
+        .x86_64 => 6,
+        .aarch64 => 3 * @sizeOf(u32),
+        else => unreachable, // unhandled architecture type
+    };
+}
+
+pub inline fn calcStubOffsetInStubHelper(cpu_arch: std.Target.Cpu.Arch) u4 {
+    return switch (cpu_arch) {
+        .x86_64 => 1,
+        .aarch64 => 2 * @sizeOf(u32),
+        else => unreachable,
+    };
+}
+
+pub fn writeStubHelperPreambleCode(args: struct {
+    cpu_arch: std.Target.Cpu.Arch,
+    source_addr: u64,
+    dyld_private_addr: u64,
+    dyld_stub_binder_got_addr: u64,
+}, writer: anytype) !void {
+    switch (args.cpu_arch) {
+        .x86_64 => {
+            try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d });
+            {
+                const disp = try Relocation.calcPcRelativeDisplacementX86(
+                    args.source_addr + 3,
+                    args.dyld_private_addr,
+                    0,
+                );
+                try writer.writeIntLittle(i32, disp);
+            }
+            try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 });
+            {
+                const disp = try Relocation.calcPcRelativeDisplacementX86(
+                    args.source_addr + 11,
+                    args.dyld_stub_binder_got_addr,
+                    0,
+                );
+                try writer.writeIntLittle(i32, disp);
+            }
+        },
+        .aarch64 => {
+            {
+                const pages = Relocation.calcNumberOfPages(args.source_addr, args.dyld_private_addr);
+                try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x17, pages).toU32());
+            }
+            {
+                const off = try Relocation.calcPageOffset(args.dyld_private_addr, .arithmetic);
+                try writer.writeIntLittle(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32());
+            }
+            try writer.writeIntLittle(u32, aarch64.Instruction.stp(
+                .x16,
+                .x17,
+                aarch64.Register.sp,
+                aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
+            ).toU32());
+            {
+                const pages = Relocation.calcNumberOfPages(args.source_addr + 12, args.dyld_stub_binder_got_addr);
+                try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32());
+            }
+            {
+                const off = try Relocation.calcPageOffset(args.dyld_stub_binder_got_addr, .load_store_64);
+                try writer.writeIntLittle(u32, aarch64.Instruction.ldr(
+                    .x16,
+                    .x16,
+                    aarch64.Instruction.LoadStoreOffset.imm(off),
+                ).toU32());
+            }
+            try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32());
+        },
+        else => unreachable,
+    }
+}
+
+pub fn writeStubHelperCode(args: struct {
+    cpu_arch: std.Target.Cpu.Arch,
+    source_addr: u64,
+    target_addr: u64,
+}, writer: anytype) !void {
+    switch (args.cpu_arch) {
+        .x86_64 => {
+            try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 });
+            {
+                const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 6, args.target_addr, 0);
+                try writer.writeIntLittle(i32, disp);
+            }
+        },
+        .aarch64 => {
+            const stub_size: u4 = 3 * @sizeOf(u32);
+            const literal = blk: {
+                const div_res = try std.math.divExact(u64, stub_size - @sizeOf(u32), 4);
+                break :blk std.math.cast(u18, div_res) orelse return error.Overflow;
+            };
+            try writer.writeIntLittle(u32, aarch64.Instruction.ldrLiteral(
+                .w16,
+                literal,
+            ).toU32());
+            {
+                const disp = try Relocation.calcPcRelativeDisplacementArm64(args.source_addr + 4, args.target_addr);
+                try writer.writeIntLittle(u32, aarch64.Instruction.b(disp).toU32());
+            }
+            try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 });
+        },
+        else => unreachable,
+    }
+}
+
+pub fn writeStubCode(args: struct {
+    cpu_arch: std.Target.Cpu.Arch,
+    source_addr: u64,
+    target_addr: u64,
+}, writer: anytype) !void {
+    switch (args.cpu_arch) {
+        .x86_64 => {
+            try writer.writeAll(&.{ 0xff, 0x25 });
+            {
+                const disp = try Relocation.calcPcRelativeDisplacementX86(args.source_addr + 2, args.target_addr, 0);
+                try writer.writeIntLittle(i32, disp);
+            }
+        },
+        .aarch64 => {
+            {
+                const pages = Relocation.calcNumberOfPages(args.source_addr, args.target_addr);
+                try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32());
+            }
+            {
+                const off = try Relocation.calcPageOffset(args.target_addr, .load_store_64);
+                try writer.writeIntLittle(u32, aarch64.Instruction.ldr(
+                    .x16,
+                    .x16,
+                    aarch64.Instruction.LoadStoreOffset.imm(off),
+                ).toU32());
+            }
+            try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32());
+        },
+        else => unreachable,
+    }
+}
src/link/MachO/thunks.zig
@@ -17,6 +17,7 @@ const aarch64 = @import("../../arch/aarch64/bits.zig");
 const Allocator = mem.Allocator;
 const Atom = @import("ZldAtom.zig");
 const AtomIndex = @import("zld.zig").AtomIndex;
+const Relocation = @import("Relocation.zig");
 const SymbolWithLoc = @import("zld.zig").SymbolWithLoc;
 const Zld = @import("zld.zig").Zld;
 
@@ -317,7 +318,7 @@ fn isReachable(
     const source_addr = source_sym.n_value + @intCast(u32, rel.r_address - base_offset);
     const is_via_got = Atom.relocRequiresGot(zld, rel);
     const target_addr = Atom.getRelocTargetAddress(zld, target, is_via_got, false) catch unreachable;
-    _ = Atom.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch
+    _ = Relocation.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch
         return false;
 
     return true;
@@ -364,9 +365,9 @@ pub fn writeThunkCode(zld: *Zld, atom_index: AtomIndex, writer: anytype) !void {
         if (atom_index == target_atom_index) break zld.getSymbol(target).n_value;
     } else unreachable;
 
-    const pages = Atom.calcNumberOfPages(source_addr, target_addr);
+    const pages = Relocation.calcNumberOfPages(source_addr, target_addr);
     try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32());
-    const off = try Atom.calcPageOffset(target_addr, .arithmetic);
+    const off = try Relocation.calcPageOffset(target_addr, .arithmetic);
     try writer.writeIntLittle(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32());
     try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32());
 }
src/link/MachO/zld.zig
@@ -16,6 +16,7 @@ const link = @import("../../link.zig");
 const load_commands = @import("load_commands.zig");
 const thunks = @import("thunks.zig");
 const trace = @import("../../tracy.zig").trace;
+const stub_helpers = @import("stubs.zig");
 
 const Allocator = mem.Allocator;
 const Archive = @import("Archive.zig");
@@ -666,59 +667,17 @@ pub const Zld = struct {
             const entry = self.got_entries.items[index];
             break :blk entry.getAtomSymbol(self).n_value;
         };
-        switch (cpu_arch) {
-            .x86_64 => {
-                try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d });
-                {
-                    const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 3, dyld_private_addr, 0);
-                    try writer.writeIntLittle(i32, disp);
-                }
-                try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 });
-                {
-                    const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 11, dyld_stub_binder_got_addr, 0);
-                    try writer.writeIntLittle(i32, disp);
-                }
-            },
-            .aarch64 => {
-                {
-                    const pages = Atom.calcNumberOfPages(source_addr, dyld_private_addr);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x17, pages).toU32());
-                }
-                {
-                    const off = try Atom.calcPageOffset(dyld_private_addr, .arithmetic);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32());
-                }
-                try writer.writeIntLittle(u32, aarch64.Instruction.stp(
-                    .x16,
-                    .x17,
-                    aarch64.Register.sp,
-                    aarch64.Instruction.LoadStorePairOffset.pre_index(-16),
-                ).toU32());
-                {
-                    const pages = Atom.calcNumberOfPages(source_addr + 12, dyld_stub_binder_got_addr);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32());
-                }
-                {
-                    const off = try Atom.calcPageOffset(dyld_stub_binder_got_addr, .load_store_64);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.ldr(
-                        .x16,
-                        .x16,
-                        aarch64.Instruction.LoadStoreOffset.imm(off),
-                    ).toU32());
-                }
-                try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32());
-            },
-            else => unreachable,
-        }
+        try stub_helpers.writeStubHelperPreambleCode(.{
+            .cpu_arch = cpu_arch,
+            .source_addr = source_addr,
+            .dyld_private_addr = dyld_private_addr,
+            .dyld_stub_binder_got_addr = dyld_stub_binder_got_addr,
+        }, writer);
     }
 
     pub fn createStubHelperAtom(self: *Zld) !AtomIndex {
         const cpu_arch = self.options.target.cpu.arch;
-        const stub_size: u4 = switch (cpu_arch) {
-            .x86_64 => 10,
-            .aarch64 => 3 * @sizeOf(u32),
-            else => unreachable,
-        };
+        const stub_size = stub_helpers.calcStubHelperEntrySize(cpu_arch);
         const alignment: u2 = switch (cpu_arch) {
             .x86_64 => 0,
             .aarch64 => 2,
@@ -749,32 +708,11 @@ pub const Zld = struct {
             const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? });
             break :blk sym.n_value;
         };
-        switch (cpu_arch) {
-            .x86_64 => {
-                try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 });
-                {
-                    const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 6, target_addr, 0);
-                    try writer.writeIntLittle(i32, disp);
-                }
-            },
-            .aarch64 => {
-                const stub_size: u4 = 3 * @sizeOf(u32);
-                const literal = blk: {
-                    const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4);
-                    break :blk math.cast(u18, div_res) orelse return error.Overflow;
-                };
-                try writer.writeIntLittle(u32, aarch64.Instruction.ldrLiteral(
-                    .w16,
-                    literal,
-                ).toU32());
-                {
-                    const disp = try Atom.calcPcRelativeDisplacementArm64(source_addr + 4, target_addr);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.b(disp).toU32());
-                }
-                try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 });
-            },
-            else => unreachable,
-        }
+        try stub_helpers.writeStubHelperCode(.{
+            .cpu_arch = cpu_arch,
+            .source_addr = source_addr,
+            .target_addr = target_addr,
+        }, writer);
     }
 
     pub fn createLazyPointerAtom(self: *Zld) !AtomIndex {
@@ -819,11 +757,7 @@ pub const Zld = struct {
             .aarch64 => 2,
             else => unreachable, // unhandled architecture type
         };
-        const stub_size: u4 = switch (cpu_arch) {
-            .x86_64 => 6,
-            .aarch64 => 3 * @sizeOf(u32),
-            else => unreachable, // unhandled architecture type
-        };
+        const stub_size = stub_helpers.calcStubEntrySize(cpu_arch);
         const sym_index = try self.allocateSymbol();
         const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment);
         const sym = self.getSymbolPtr(.{ .sym_index = sym_index });
@@ -863,31 +797,11 @@ pub const Zld = struct {
             const sym = self.getSymbol(atom.getSymbolWithLoc());
             break :blk sym.n_value;
         };
-        switch (cpu_arch) {
-            .x86_64 => {
-                try writer.writeAll(&.{ 0xff, 0x25 });
-                {
-                    const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 2, target_addr, 0);
-                    try writer.writeIntLittle(i32, disp);
-                }
-            },
-            .aarch64 => {
-                {
-                    const pages = Atom.calcNumberOfPages(source_addr, target_addr);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32());
-                }
-                {
-                    const off = try Atom.calcPageOffset(target_addr, .load_store_64);
-                    try writer.writeIntLittle(u32, aarch64.Instruction.ldr(
-                        .x16,
-                        .x16,
-                        aarch64.Instruction.LoadStoreOffset.imm(off),
-                    ).toU32());
-                }
-                try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32());
-            },
-            else => unreachable,
-        }
+        try stub_helpers.writeStubCode(.{
+            .cpu_arch = cpu_arch,
+            .source_addr = source_addr,
+            .target_addr = target_addr,
+        }, writer);
     }
 
     fn createTentativeDefAtoms(self: *Zld) !void {
@@ -2267,11 +2181,7 @@ pub const Zld = struct {
         assert(self.stub_helper_preamble_sym_index != null);
 
         const section = self.sections.get(stub_helper_section_index);
-        const stub_offset: u4 = switch (self.options.target.cpu.arch) {
-            .x86_64 => 1,
-            .aarch64 => 2 * @sizeOf(u32),
-            else => unreachable,
-        };
+        const stub_offset = stub_helpers.calcStubOffsetInStubHelper(self.options.target.cpu.arch);
         const header = section.header;
         var atom_index = section.first_atom_index;
         atom_index = self.getAtom(atom_index).next_index.?; // skip preamble
src/link/MachO/ZldAtom.zig
@@ -21,6 +21,7 @@ const Allocator = mem.Allocator;
 const Arch = std.Target.Cpu.Arch;
 const AtomIndex = @import("zld.zig").AtomIndex;
 const Object = @import("Object.zig");
+const Relocation = @import("Relocation.zig");
 const SymbolWithLoc = @import("zld.zig").SymbolWithLoc;
 const Zld = @import("zld.zig").Zld;
 
@@ -571,7 +572,7 @@ fn resolveRelocsArm64(
                     zld.getAtom(getRelocTargetAtomIndex(zld, target, is_via_got).?).getFile(),
                 });
 
-                const displacement = if (calcPcRelativeDisplacementArm64(
+                const displacement = if (Relocation.calcPcRelativeDisplacementArm64(
                     source_addr,
                     zld.getSymbol(actual_target).n_value,
                 )) |disp| blk: {
@@ -585,7 +586,7 @@ fn resolveRelocsArm64(
                         actual_target,
                     ).?);
                     log.debug("    | target_addr = 0x{x} (thunk)", .{thunk_sym.n_value});
-                    break :blk try calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value);
+                    break :blk try Relocation.calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value);
                 };
 
                 const code = atom_code[rel_offset..][0..4];
@@ -607,7 +608,7 @@ fn resolveRelocsArm64(
 
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
 
-                const pages = @bitCast(u21, calcNumberOfPages(source_addr, adjusted_target_addr));
+                const pages = @bitCast(u21, Relocation.calcNumberOfPages(source_addr, adjusted_target_addr));
                 const code = atom_code[rel_offset..][0..4];
                 var inst = aarch64.Instruction{
                     .pc_relative_address = mem.bytesToValue(meta.TagPayload(
@@ -627,8 +628,8 @@ fn resolveRelocsArm64(
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
 
                 const code = atom_code[rel_offset..][0..4];
-                if (isArithmeticOp(code)) {
-                    const off = try calcPageOffset(adjusted_target_addr, .arithmetic);
+                if (Relocation.isArithmeticOp(code)) {
+                    const off = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic);
                     var inst = aarch64.Instruction{
                         .add_subtract_immediate = mem.bytesToValue(meta.TagPayload(
                             aarch64.Instruction,
@@ -644,11 +645,11 @@ fn resolveRelocsArm64(
                             aarch64.Instruction.load_store_register,
                         ), code),
                     };
-                    const off = try calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) {
+                    const off = try Relocation.calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) {
                         0 => if (inst.load_store_register.v == 1)
-                            PageOffsetInstKind.load_store_128
+                            Relocation.PageOffsetInstKind.load_store_128
                         else
-                            PageOffsetInstKind.load_store_8,
+                            Relocation.PageOffsetInstKind.load_store_8,
                         1 => .load_store_16,
                         2 => .load_store_32,
                         3 => .load_store_64,
@@ -665,7 +666,7 @@ fn resolveRelocsArm64(
 
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
 
-                const off = try calcPageOffset(adjusted_target_addr, .load_store_64);
+                const off = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64);
                 var inst: aarch64.Instruction = .{
                     .load_store_register = mem.bytesToValue(meta.TagPayload(
                         aarch64.Instruction,
@@ -689,7 +690,7 @@ fn resolveRelocsArm64(
                     size: u2,
                 };
                 const reg_info: RegInfo = blk: {
-                    if (isArithmeticOp(code)) {
+                    if (Relocation.isArithmeticOp(code)) {
                         const inst = mem.bytesToValue(meta.TagPayload(
                             aarch64.Instruction,
                             aarch64.Instruction.add_subtract_immediate,
@@ -716,7 +717,7 @@ fn resolveRelocsArm64(
                     .load_store_register = .{
                         .rt = reg_info.rd,
                         .rn = reg_info.rn,
-                        .offset = try calcPageOffset(adjusted_target_addr, .load_store_64),
+                        .offset = try Relocation.calcPageOffset(adjusted_target_addr, .load_store_64),
                         .opc = 0b01,
                         .op1 = 0b01,
                         .v = 0,
@@ -726,7 +727,7 @@ fn resolveRelocsArm64(
                     .add_subtract_immediate = .{
                         .rd = reg_info.rd,
                         .rn = reg_info.rn,
-                        .imm12 = try calcPageOffset(adjusted_target_addr, .arithmetic),
+                        .imm12 = try Relocation.calcPageOffset(adjusted_target_addr, .arithmetic),
                         .sh = 0,
                         .s = 0,
                         .op = 0,
@@ -858,7 +859,7 @@ fn resolveRelocsX86(
                 const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
                 const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
-                const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
+                const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
                 mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
             },
 
@@ -868,7 +869,7 @@ fn resolveRelocsX86(
                 const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
                 const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
-                const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
+                const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
                 mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
             },
 
@@ -876,7 +877,7 @@ fn resolveRelocsX86(
                 const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]);
                 const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend);
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
-                const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
+                const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0);
 
                 if (zld.tlv_ptr_table.get(target) == null) {
                     // We need to rewrite the opcode from movq to leaq.
@@ -913,7 +914,7 @@ fn resolveRelocsX86(
 
                 log.debug("    | target_addr = 0x{x}", .{adjusted_target_addr});
 
-                const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction);
+                const disp = try Relocation.calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction);
                 mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp);
             },
 
@@ -955,11 +956,6 @@ fn resolveRelocsX86(
     }
 }
 
-inline fn isArithmeticOp(inst: *const [4]u8) bool {
-    const group_decode = @truncate(u5, inst[3]);
-    return ((group_decode >> 2) == 4);
-}
-
 pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 {
     const atom = zld.getAtom(atom_index);
     assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code.
@@ -1006,43 +1002,6 @@ pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []const macho.relocation_
     return relocs[cache.start..][0..cache.len];
 }
 
-pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 {
-    const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr + 4 + correction);
-    return math.cast(i32, disp) orelse error.Overflow;
-}
-
-pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 {
-    const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr);
-    return math.cast(i28, disp) orelse error.Overflow;
-}
-
-pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 {
-    const source_page = @intCast(i32, source_addr >> 12);
-    const target_page = @intCast(i32, target_addr >> 12);
-    const pages = @intCast(i21, target_page - source_page);
-    return pages;
-}
-
-const PageOffsetInstKind = enum {
-    arithmetic,
-    load_store_8,
-    load_store_16,
-    load_store_32,
-    load_store_64,
-    load_store_128,
-};
-
-pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 {
-    const narrowed = @truncate(u12, target_addr);
-    return switch (kind) {
-        .arithmetic, .load_store_8 => narrowed,
-        .load_store_16 => try math.divExact(u12, narrowed, 2),
-        .load_store_32 => try math.divExact(u12, narrowed, 4),
-        .load_store_64 => try math.divExact(u12, narrowed, 8),
-        .load_store_128 => try math.divExact(u12, narrowed, 16),
-    };
-}
-
 pub fn relocRequiresGot(zld: *Zld, rel: macho.relocation_info) bool {
     switch (zld.options.target.cpu.arch) {
         .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) {
src/link/MachO.zig
@@ -19,6 +19,7 @@ const fat = @import("MachO/fat.zig");
 const link = @import("../link.zig");
 const llvm_backend = @import("../codegen/llvm.zig");
 const load_commands = @import("MachO/load_commands.zig");
+const stubs = @import("MachO/stubs.zig");
 const target_util = @import("../target.zig");
 const trace = @import("../tracy.zig").trace;
 const zld = @import("MachO/zld.zig");
@@ -156,7 +157,7 @@ stub_helper_preamble_atom_index: ?Atom.Index = null,
 strtab: StringTable(.strtab) = .{},
 
 got_table: TableSection(SymbolWithLoc) = .{},
-stubs_table: SectionTable = .{},
+stub_table: TableSection(SymbolWithLoc) = .{},
 tlv_table: SectionTable = .{},
 
 error_flags: File.ErrorFlags = File.ErrorFlags{},
@@ -164,6 +165,8 @@ error_flags: File.ErrorFlags = File.ErrorFlags{},
 segment_table_dirty: bool = false,
 got_table_count_dirty: bool = false,
 got_table_contents_dirty: bool = false,
+stub_table_count_dirty: bool = false,
+stub_table_contents_dirty: bool = false,
 
 /// A helper var to indicate if we are at the start of the incremental updates, or
 /// already somewhere further along the update-and-run chain.
@@ -213,11 +216,6 @@ rebases: RebaseTable = .{},
 /// this will be a table indexed by index into the list of Atoms.
 bindings: BindingTable = .{},
 
-/// A table of lazy bindings indexed by the owning them `Atom`.
-/// Note that once we refactor `Atom`'s lifetime and ownership rules,
-/// this will be a table indexed by index into the list of Atoms.
-lazy_bindings: BindingTable = .{},
-
 /// Table of tracked LazySymbols.
 lazy_syms: LazySymbolTable = .{},
 
@@ -763,11 +761,23 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
     if (self.got_table_contents_dirty) {
         for (self.got_table.entries.items, 0..) |entry, i| {
             if (!self.got_table.lookup.contains(entry)) continue;
+            // TODO: write all in one go rather than incrementally.
             try self.writeOffsetTableEntry(i);
         }
         self.got_table_contents_dirty = false;
     }
 
+    // Update stubs if we moved any section in memory.
+    // TODO: we probably don't need to update all sections if only one got moved.
+    if (self.stub_table_contents_dirty) {
+        for (self.stub_table.entries.items, 0..) |entry, i| {
+            if (!self.stub_table.lookup.contains(entry)) continue;
+            // TODO: write all in one go rather than incrementally.
+            try self.writeStubTableEntry(i);
+        }
+        self.stub_table_contents_dirty = false;
+    }
+
     if (build_options.enable_logging) {
         self.logSymtab();
         self.logSections();
@@ -1311,6 +1321,86 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void {
     }
 }
 
+fn writeStubTableEntry(self: *MachO, index: usize) !void {
+    const stubs_sect_id = self.stubs_section_index.?;
+    const stub_helper_sect_id = self.stub_helper_section_index.?;
+    const laptr_sect_id = self.la_symbol_ptr_section_index.?;
+
+    const cpu_arch = self.base.options.target.cpu.arch;
+    const stub_entry_size = stubs.calcStubEntrySize(cpu_arch);
+    const stub_helper_entry_size = stubs.calcStubHelperEntrySize(cpu_arch);
+    const stub_helper_preamble_size = stubs.calcStubHelperPreambleSize(cpu_arch);
+
+    if (self.stub_table_count_dirty) {
+        // We grow all 3 sections one by one.
+        {
+            const needed_size = stub_entry_size * self.stub_table.entries.items.len;
+            try self.growSection(stubs_sect_id, needed_size);
+        }
+        {
+            const needed_size = stub_helper_preamble_size + stub_helper_entry_size * self.stub_table.entries.items.len;
+            try self.growSection(stub_helper_sect_id, needed_size);
+        }
+        {
+            const needed_size = @sizeOf(u64) * self.stub_table.entries.items.len;
+            try self.growSection(laptr_sect_id, needed_size);
+        }
+        self.stub_table_count_dirty = false;
+    }
+
+    const gpa = self.base.allocator;
+
+    const stubs_header = self.sections.items(.header)[stubs_sect_id];
+    const stub_helper_header = self.sections.items(.header)[stub_helper_sect_id];
+    const laptr_header = self.sections.items(.header)[laptr_sect_id];
+
+    const entry = self.stub_table.entries.items[index];
+    const stub_addr: u64 = stubs_header.addr + stub_entry_size * index;
+    const stub_helper_addr: u64 = stub_helper_header.addr + stub_helper_preamble_size + stub_helper_entry_size * index;
+    const laptr_addr: u64 = laptr_header.addr + @sizeOf(u64) * index;
+
+    log.debug("writing stub entry {d}: @{x} => '{s}'", .{ index, stub_addr, self.getSymbolName(entry) });
+
+    {
+        var buf = try std.ArrayList(u8).initCapacity(gpa, stub_entry_size);
+        defer buf.deinit();
+        try stubs.writeStubCode(.{
+            .cpu_arch = cpu_arch,
+            .source_addr = stub_addr,
+            .target_addr = laptr_addr,
+        }, buf.writer());
+        const off = stubs_header.offset + stub_entry_size * index;
+        try self.base.file.?.pwriteAll(buf.items, off);
+    }
+
+    {
+        var buf = try std.ArrayList(u8).initCapacity(gpa, stub_helper_entry_size);
+        defer buf.deinit();
+        try stubs.writeStubHelperCode(.{
+            .cpu_arch = cpu_arch,
+            .source_addr = stub_helper_addr,
+            .target_addr = stub_helper_header.addr,
+        }, buf.writer());
+        const off = stub_helper_header.offset + stub_helper_preamble_size + stub_helper_entry_size * index;
+        try self.base.file.?.pwriteAll(buf.items, off);
+    }
+
+    {
+        var buf: [@sizeOf(u64)]u8 = undefined;
+        mem.writeIntLittle(u64, &buf, stub_helper_addr);
+        const off = laptr_header.offset + @sizeOf(u64) * index;
+        try self.base.file.?.pwriteAll(&buf, off);
+    }
+
+    // TODO: generating new stub entry will require pulling the address of the symbol from the
+    // target dylib when updating directly in memory.
+    if (is_hot_update_compatible) {
+        if (self.hot_state.mach_task) |_| {
+            @panic("TODO: update a stub entry in memory");
+        }
+    }
+}
+
 fn writePtrWidthAtom(self: *MachO, atom_index: Atom.Index) !void {
     var buffer: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64);
     try self.writeAtom(atom_index, &buffer);
@@ -1339,12 +1429,16 @@ fn markRelocsDirtyByAddress(self: *MachO, addr: u64) void {
     }
 
     // Dirty synthetic table sections if necessary
-    for (&[_]u8{self.got_section_index.?}, &[_]*bool{&self.got_table_contents_dirty}) |sect_id, dirty| {
-        if (dirty.*) continue;
-        const segment_index = self.sections.items(.segment_index)[sect_id];
-        const segment = self.segments.items[segment_index];
-        if (segment.vmaddr < addr) continue;
-        dirty.* = true;
+    {
+        const target_addr = self.getSegment(self.got_section_index.?).vmaddr;
+        if (target_addr >= addr) self.got_table_contents_dirty = true;
+    }
+    {
+        const stubs_addr = self.getSegment(self.stubs_section_index.?).vmaddr;
+        const stub_helper_addr = self.getSegment(self.stub_helper_section_index.?).vmaddr;
+        const laptr_addr = self.getSegment(self.la_symbol_ptr_section_index.?).vmaddr;
+        if (stubs_addr >= addr or stub_helper_addr >= addr or laptr_addr >= addr)
+            self.stub_table_contents_dirty = true;
     }
 }
 
@@ -1525,200 +1619,6 @@ fn createStubHelperPreambleAtom(self: *MachO) !void {
     try self.writeAtom(atom_index, code);
 }
 
-fn createStubHelperAtom(self: *MachO) !Atom.Index {
-    const gpa = self.base.allocator;
-    const arch = self.base.options.target.cpu.arch;
-    const size: u4 = switch (arch) {
-        .x86_64 => 10,
-        .aarch64 => 3 * @sizeOf(u32),
-        else => unreachable,
-    };
-    const atom_index = try self.createAtom();
-    const atom = self.getAtomPtr(atom_index);
-    atom.size = size;
-
-    const required_alignment: u32 = switch (arch) {
-        .x86_64 => 1,
-        .aarch64 => @alignOf(u32),
-        else => unreachable,
-    };
-
-    const sym = atom.getSymbolPtr(self);
-    sym.n_type = macho.N_SECT;
-    sym.n_sect = self.stub_helper_section_index.? + 1;
-
-    const code = try gpa.alloc(u8, size);
-    defer gpa.free(code);
-    mem.set(u8, code, 0);
-
-    const stub_helper_preamble_atom_sym_index = if (self.stub_helper_preamble_atom_index) |stub_index|
-        self.getAtom(stub_index).getSymbolIndex().?
-    else
-        unreachable;
-
-    switch (arch) {
-        .x86_64 => {
-            // pushq
-            code[0] = 0x68;
-            // Next 4 bytes 1..4 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
-            // jmpq
-            code[5] = 0xe9;
-
-            try Atom.addRelocation(self, atom_index, .{
-                .type = .branch,
-                .target = .{ .sym_index = stub_helper_preamble_atom_sym_index },
-                .offset = 6,
-                .addend = 0,
-                .pcrel = true,
-                .length = 2,
-            });
-        },
-        .aarch64 => {
-            const literal = blk: {
-                const div_res = try math.divExact(u64, size - @sizeOf(u32), 4);
-                break :blk math.cast(u18, div_res) orelse return error.Overflow;
-            };
-            // ldr w16, literal
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldrLiteral(
-                .w16,
-                literal,
-            ).toU32());
-            // b disp
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(0).toU32());
-            // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`.
-
-            try Atom.addRelocation(self, atom_index, .{
-                .type = .branch,
-                .target = .{ .sym_index = stub_helper_preamble_atom_sym_index },
-                .offset = 4,
-                .addend = 0,
-                .pcrel = true,
-                .length = 2,
-            });
-        },
-        else => unreachable,
-    }
-
-    sym.n_value = try self.allocateAtom(atom_index, size, required_alignment);
-    log.debug("allocated stub helper atom at 0x{x}", .{sym.n_value});
-    try self.writeAtom(atom_index, code);
-
-    return atom_index;
-}
-
-fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !Atom.Index {
-    const atom_index = try self.createAtom();
-    const atom = self.getAtomPtr(atom_index);
-    atom.size = @sizeOf(u64);
-
-    const sym = atom.getSymbolPtr(self);
-    sym.n_type = macho.N_SECT;
-    sym.n_sect = self.la_symbol_ptr_section_index.? + 1;
-
-    try Atom.addRelocation(self, atom_index, .{
-        .type = .unsigned,
-        .target = .{ .sym_index = stub_sym_index },
-        .offset = 0,
-        .addend = 0,
-        .pcrel = false,
-        .length = 3,
-    });
-    try Atom.addRebase(self, atom_index, 0);
-    try Atom.addLazyBinding(self, atom_index, .{
-        .target = self.getGlobal(self.getSymbolName(target)).?,
-        .offset = 0,
-    });
-
-    sym.n_value = try self.allocateAtom(atom_index, atom.size, @alignOf(u64));
-    log.debug("allocated lazy pointer atom at 0x{x} ({s})", .{ sym.n_value, self.getSymbolName(target) });
-    try self.writePtrWidthAtom(atom_index);
-
-    return atom_index;
-}
-
-fn createStubAtom(self: *MachO, laptr_sym_index: u32) !Atom.Index {
-    const gpa = self.base.allocator;
-    const arch = self.base.options.target.cpu.arch;
-    const size: u4 = switch (arch) {
-        .x86_64 => 6,
-        .aarch64 => 3 * @sizeOf(u32),
-        else => unreachable, // unhandled architecture type
-    };
-    const atom_index = try self.createAtom();
-    const atom = self.getAtomPtr(atom_index);
-    atom.size = size;
-
-    const required_alignment: u32 = switch (arch) {
-        .x86_64 => 1,
-        .aarch64 => @alignOf(u32),
-        else => unreachable, // unhandled architecture type
-
-    };
-
-    const sym = atom.getSymbolPtr(self);
-    sym.n_type = macho.N_SECT;
-    sym.n_sect = self.stubs_section_index.? + 1;
-
-    const code = try gpa.alloc(u8, size);
-    defer gpa.free(code);
-    mem.set(u8, code, 0);
-
-    switch (arch) {
-        .x86_64 => {
-            // jmp
-            code[0] = 0xff;
-            code[1] = 0x25;
-
-            try Atom.addRelocation(self, atom_index, .{
-                .type = .branch,
-                .target = .{ .sym_index = laptr_sym_index },
-                .offset = 2,
-                .addend = 0,
-                .pcrel = true,
-                .length = 2,
-            });
-        },
-        .aarch64 => {
-            // adrp x16, pages
-            mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adrp(.x16, 0).toU32());
-            // ldr x16, x16, offset
-            mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.ldr(
-                .x16,
-                .x16,
-                aarch64.Instruction.LoadStoreOffset.imm(0),
-            ).toU32());
-            // br x16
-            mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32());
-
-            try Atom.addRelocations(self, atom_index, &[_]Relocation{
-                .{
-                    .type = .page,
-                    .target = .{ .sym_index = laptr_sym_index },
-                    .offset = 0,
-                    .addend = 0,
-                    .pcrel = true,
-                    .length = 2,
-                },
-                .{
-                    .type = .pageoff,
-                    .target = .{ .sym_index = laptr_sym_index },
-                    .offset = 4,
-                    .addend = 0,
-                    .pcrel = false,
-                    .length = 2,
-                },
-            });
-        },
-        else => unreachable,
-    }
-
-    sym.n_value = try self.allocateAtom(atom_index, size, required_alignment);
-    log.debug("allocated stub atom at 0x{x}", .{sym.n_value});
-    try self.writeAtom(atom_index, code);
-
-    return atom_index;
-}
-
 fn createThreadLocalDescriptorAtom(self: *MachO, target: SymbolWithLoc) !Atom.Index {
     const gpa = self.base.allocator;
     const size = 3 * @sizeOf(u64);
@@ -1904,7 +1804,7 @@ pub fn deinit(self: *MachO) void {
     }
 
     self.got_table.deinit(gpa);
-    self.stubs_table.deinit(gpa);
+    self.stub_table.deinit(gpa);
     self.tlv_table.deinit(gpa);
     self.strtab.deinit(gpa);
 
@@ -1968,11 +1868,6 @@ pub fn deinit(self: *MachO) void {
         bindings.deinit(gpa);
     }
     self.bindings.deinit(gpa);
-
-    for (self.lazy_bindings.values()) |*bindings| {
-        bindings.deinit(gpa);
-    }
-    self.lazy_bindings.deinit(gpa);
 }
 
 fn freeAtom(self: *MachO, atom_index: Atom.Index) void {
@@ -2124,16 +2019,10 @@ fn addGotEntry(self: *MachO, target: SymbolWithLoc) !void {
 }
 
 fn addStubEntry(self: *MachO, target: SymbolWithLoc) !void {
-    if (self.stubs_table.lookup.contains(target)) return;
-    const stub_index = try self.stubs_table.allocateEntry(self.base.allocator, target);
-    const stub_helper_atom_index = try self.createStubHelperAtom();
-    const stub_helper_atom = self.getAtom(stub_helper_atom_index);
-    const laptr_atom_index = try self.createLazyPointerAtom(stub_helper_atom.getSymbolIndex().?, target);
-    const laptr_atom = self.getAtom(laptr_atom_index);
-    const stub_atom_index = try self.createStubAtom(laptr_atom.getSymbolIndex().?);
-    const stub_atom = self.getAtom(stub_atom_index);
-    self.stubs_table.entries.items[stub_index].sym_index = stub_atom.getSymbolIndex().?;
-    self.markRelocsDirtyByTarget(target);
+    if (self.stub_table.lookup.contains(target)) return;
+    const stub_index = try self.stub_table.allocateEntry(self.base.allocator, target);
+    try self.writeStubTableEntry(stub_index);
+    self.stub_table_count_dirty = true;
 }
 
 fn addTlvEntry(self: *MachO, target: SymbolWithLoc) !void {
@@ -2840,11 +2729,7 @@ fn populateMissingMetadata(self: *MachO) !void {
     }
 
     if (self.stubs_section_index == null) {
-        const stub_size: u32 = switch (cpu_arch) {
-            .x86_64 => 6,
-            .aarch64 => 3 * @sizeOf(u32),
-            else => unreachable, // unhandled architecture type
-        };
+        const stub_size = stubs.calcStubEntrySize(cpu_arch);
         self.stubs_section_index = try self.allocateSection("__TEXT2", "__stubs", .{
             .size = stub_size,
             .alignment = switch (cpu_arch) {
@@ -3377,45 +3262,35 @@ fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void {
     try bind.finalize(gpa, self);
 }
 
-fn collectLazyBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void {
+fn collectLazyBindData(self: *MachO, bind: anytype) !void {
     const gpa = self.base.allocator;
-    const slice = self.sections.slice();
 
-    for (raw_bindings.keys(), 0..) |atom_index, i| {
-        const atom = self.getAtom(atom_index);
-        log.debug("  ATOM(%{?d}, '{s}')", .{ atom.getSymbolIndex(), atom.getName(self) });
-
-        const sym = atom.getSymbol(self);
-        const segment_index = slice.items(.segment_index)[sym.n_sect - 1];
-        const seg = self.getSegment(sym.n_sect - 1);
-
-        const base_offset = sym.n_value - seg.vmaddr;
-
-        const bindings = raw_bindings.values()[i];
-        try bind.entries.ensureUnusedCapacity(gpa, bindings.items.len);
-
-        for (bindings.items) |binding| {
-            const bind_sym = self.getSymbol(binding.target);
-            const bind_sym_name = self.getSymbolName(binding.target);
-            const dylib_ordinal = @divTrunc(
-                @bitCast(i16, bind_sym.n_desc),
-                macho.N_SYMBOL_RESOLVER,
-            );
-            log.debug("    | bind at {x}, import('{s}') in dylib({d})", .{
-                binding.offset + base_offset,
-                bind_sym_name,
-                dylib_ordinal,
-            });
-            if (bind_sym.weakRef()) {
-                log.debug("    | marking as weak ref ", .{});
-            }
-            bind.entries.appendAssumeCapacity(.{
-                .target = binding.target,
-                .offset = binding.offset + base_offset,
-                .segment_id = segment_index,
-                .addend = 0,
-            });
+    try bind.entries.ensureUnusedCapacity(gpa, self.stub_table.entries.items.len);
+    const segment_index = self.sections.items(.segment_index)[self.la_symbol_ptr_section_index.?];
+    for (self.stub_table.entries.items, 0..) |entry, i| {
+        if (!self.stub_table.lookup.contains(entry)) continue;
+        const bind_sym = self.getSymbol(entry);
+        assert(bind_sym.undf());
+        const bind_sym_name = self.getSymbolName(entry);
+        const offset = i * @sizeOf(u64);
+        const dylib_ordinal = @divTrunc(
+            @bitCast(i16, bind_sym.n_desc),
+            macho.N_SYMBOL_RESOLVER,
+        );
+        log.debug("    | bind at {x}, import('{s}') in dylib({d})", .{
+            offset,
+            bind_sym_name,
+            dylib_ordinal,
+        });
+        if (bind_sym.weakRef()) {
+            log.debug("    | marking as weak ref ", .{});
         }
+        bind.entries.appendAssumeCapacity(.{
+            .target = entry,
+            .offset = offset,
+            .segment_id = segment_index,
+            .addend = 0,
+        });
     }
 
     try bind.finalize(gpa, self);
@@ -3464,7 +3339,7 @@ fn writeDyldInfoData(self: *MachO) !void {
 
     var lazy_bind = LazyBind{};
     defer lazy_bind.deinit(gpa);
-    try self.collectLazyBindData(&lazy_bind, self.lazy_bindings);
+    try self.collectLazyBindData(&lazy_bind);
 
     var trie: Trie = .{};
     defer trie.deinit(gpa);
@@ -3542,32 +3417,24 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: LazyBind) !void
     const stub_helper_section_index = self.stub_helper_section_index.?;
     assert(self.stub_helper_preamble_atom_index != null);
 
-    const section = self.sections.get(stub_helper_section_index);
+    const header = self.sections.items(.header)[stub_helper_section_index];
 
-    const stub_offset: u4 = switch (self.base.options.target.cpu.arch) {
-        .x86_64 => 1,
-        .aarch64 => 2 * @sizeOf(u32),
-        else => unreachable,
-    };
-    const header = section.header;
-    var atom_index = section.last_atom_index.?;
+    const cpu_arch = self.base.options.target.cpu.arch;
+    const preamble_size = stubs.calcStubHelperPreambleSize(cpu_arch);
+    const stub_size = stubs.calcStubHelperEntrySize(cpu_arch);
+    const stub_offset = stubs.calcStubOffsetInStubHelper(cpu_arch);
+    const base_offset = header.offset + preamble_size;
 
-    var index: usize = lazy_bind.offsets.items.len;
-    while (index > 0) : (index -= 1) {
-        const atom = self.getAtom(atom_index);
-        const sym = atom.getSymbol(self);
-        const file_offset = header.offset + sym.n_value - header.addr + stub_offset;
-        const bind_offset = lazy_bind.offsets.items[index - 1];
+    for (lazy_bind.offsets.items, 0..) |bind_offset, index| {
+        const file_offset = base_offset + index * stub_size + stub_offset;
 
         log.debug("writing lazy bind offset 0x{x} ({s}) in stub helper at 0x{x}", .{
             bind_offset,
-            self.getSymbolName(lazy_bind.entries.items[index - 1].target),
+            self.getSymbolName(lazy_bind.entries.items[index].target),
             file_offset,
         });
 
         try self.base.file.?.pwriteAll(mem.asBytes(&bind_offset), file_offset);
-
-        atom_index = atom.prev_index.?;
     }
 }
 
@@ -3683,7 +3550,7 @@ const SymtabCtx = struct {
 
 fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     const gpa = self.base.allocator;
-    const nstubs = @intCast(u32, self.stubs_table.lookup.count());
+    const nstubs = @intCast(u32, self.stub_table.lookup.count());
     const ngot_entries = @intCast(u32, self.got_table.lookup.count());
     const nindirectsyms = nstubs * 2 + ngot_entries;
     const iextdefsym = ctx.nlocalsym;
@@ -3704,13 +3571,13 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     const writer = buf.writer();
 
     if (self.stubs_section_index) |sect_id| {
-        const stubs = &self.sections.items(.header)[sect_id];
-        stubs.reserved1 = 0;
-        for (self.stubs_table.entries.items) |entry| {
-            if (entry.sym_index == 0) continue;
-            const target_sym = self.getSymbol(entry.target);
+        const stubs_header = &self.sections.items(.header)[sect_id];
+        stubs_header.reserved1 = 0;
+        for (self.stub_table.entries.items) |entry| {
+            if (!self.stub_table.lookup.contains(entry)) continue;
+            const target_sym = self.getSymbol(entry);
             assert(target_sym.undf());
-            try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?);
+            try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?);
         }
     }
 
@@ -3731,11 +3598,11 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void {
     if (self.la_symbol_ptr_section_index) |sect_id| {
         const la_symbol_ptr = &self.sections.items(.header)[sect_id];
         la_symbol_ptr.reserved1 = nstubs + ngot_entries;
-        for (self.stubs_table.entries.items) |entry| {
-            if (entry.sym_index == 0) continue;
-            const target_sym = self.getSymbol(entry.target);
+        for (self.stub_table.entries.items) |entry| {
+            if (!self.stub_table.lookup.contains(entry)) continue;
+            const target_sym = self.getSymbol(entry);
             assert(target_sym.undf());
-            try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?);
+            try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry).?);
         }
     }
 
@@ -4422,7 +4289,7 @@ pub fn logSymtab(self: *MachO) void {
     log.debug("{}", .{self.got_table});
 
     log.debug("stubs entries:", .{});
-    log.debug("{}", .{self.stubs_table.fmtDebug(self)});
+    log.debug("{}", .{self.stub_table});
 
     log.debug("threadlocal entries:", .{});
     log.debug("{}", .{self.tlv_table.fmtDebug(self)});