Commit 26d4f9b69e

Jacob Young <jacobly0@users.noreply.github.com>
2023-04-24 04:25:53
x86_64: add frame indices
This allows abstract references to the stack frame which enables: * deferred frame layout until after the function has been processed * frame compaction (sorting by alignment) * being able to overalign the stack * references that change based on an overaligned stack * moving callee saved spills to the usual part (smaller code size) * shared call frame (avoids stack adjustments before and after calls)
1 parent f618398
src/arch/x86_64/bits.zig
@@ -405,14 +405,69 @@ test "Register classes" {
     try expect(Register.fs.class() == .segment);
 }
 
+pub const FrameIndex = enum(u32) {
+    // This index refers to the start of the arguments passed to this function
+    args_frame,
+    // This index refers to the return address pushed by a `call` and popped by a `ret`.
+    ret_addr,
+    // This index refers to the base pointer pushed in the prologue and popped in the epilogue.
+    base_ptr,
+    // This index refers to the entire stack frame.
+    stack_frame,
+    // This index refers to the start of the call frame for arguments passed to called functions
+    call_frame,
+    // Other indices are used for local variable stack slots
+    _,
+
+    pub const named_count = @typeInfo(FrameIndex).Enum.fields.len;
+
+    pub fn isNamed(fi: FrameIndex) bool {
+        return @enumToInt(fi) < named_count;
+    }
+
+    pub fn format(
+        fi: FrameIndex,
+        comptime fmt: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        try writer.writeAll("FrameIndex");
+        if (fi.isNamed()) {
+            try writer.writeByte('.');
+            try writer.writeAll(@tagName(fi));
+        } else {
+            try writer.writeByte('(');
+            try std.fmt.formatType(@enumToInt(fi), fmt, options, writer, 0);
+            try writer.writeByte(')');
+        }
+    }
+};
+
 pub const Memory = union(enum) {
     sib: Sib,
     rip: Rip,
     moffs: Moffs,
 
-    pub const ScaleIndex = packed struct {
+    pub const Base = union(enum) {
+        none,
+        reg: Register,
+        frame: FrameIndex,
+
+        pub const Tag = @typeInfo(Base).Union.tag_type.?;
+
+        pub fn isExtended(self: Base) bool {
+            return switch (self) {
+                .none, .frame => false, // neither rsp nor rbp are extended
+                .reg => |reg| reg.isExtended(),
+            };
+        }
+    };
+
+    pub const ScaleIndex = struct {
         scale: u4,
         index: Register,
+
+        const none = ScaleIndex{ .scale = 0, .index = undefined };
     };
 
     pub const PtrSize = enum {
@@ -460,8 +515,8 @@ pub const Memory = union(enum) {
 
     pub const Sib = struct {
         ptr_size: PtrSize,
-        base: ?Register,
-        scale_index: ?ScaleIndex,
+        base: Base,
+        scale_index: ScaleIndex,
         disp: i32,
     };
 
@@ -482,7 +537,7 @@ pub const Memory = union(enum) {
 
     pub fn sib(ptr_size: PtrSize, args: struct {
         disp: i32 = 0,
-        base: ?Register = null,
+        base: Base = .none,
         scale_index: ?ScaleIndex = null,
     }) Memory {
         if (args.scale_index) |si| assert(std.math.isPowerOfTwo(si.scale));
@@ -490,7 +545,7 @@ pub const Memory = union(enum) {
             .base = args.base,
             .disp = args.disp,
             .ptr_size = ptr_size,
-            .scale_index = args.scale_index,
+            .scale_index = if (args.scale_index) |si| si else ScaleIndex.none,
         } };
     }
 
@@ -502,22 +557,25 @@ pub const Memory = union(enum) {
         return switch (mem) {
             .moffs => true,
             .rip => false,
-            .sib => |s| if (s.base) |r| r.class() == .segment else false,
+            .sib => |s| switch (s.base) {
+                .none, .frame => false,
+                .reg => |reg| reg.class() == .segment,
+            },
         };
     }
 
-    pub fn base(mem: Memory) ?Register {
+    pub fn base(mem: Memory) Base {
         return switch (mem) {
-            .moffs => |m| m.seg,
+            .moffs => |m| .{ .reg = m.seg },
             .sib => |s| s.base,
-            .rip => null,
+            .rip => .none,
         };
     }
 
     pub fn scaleIndex(mem: Memory) ?ScaleIndex {
         return switch (mem) {
             .moffs, .rip => null,
-            .sib => |s| s.scale_index,
+            .sib => |s| if (s.scale_index.scale > 0) s.scale_index else null,
         };
     }
 
src/arch/x86_64/CodeGen.zig
@@ -6,6 +6,7 @@ const codegen = @import("../../codegen.zig");
 const leb128 = std.leb;
 const link = @import("../../link.zig");
 const log = std.log.scoped(.codegen);
+const tracking_log = std.log.scoped(.tracking);
 const math = std.math;
 const mem = std.mem;
 const trace = @import("../../tracy.zig").trace;
@@ -40,6 +41,7 @@ const Memory = bits.Memory;
 const Register = bits.Register;
 const RegisterManager = abi.RegisterManager;
 const RegisterLock = RegisterManager.RegisterLock;
+const FrameIndex = bits.FrameIndex;
 
 const gp = abi.RegisterClass.gp;
 const sse = abi.RegisterClass.sse;
@@ -47,6 +49,7 @@ const sse = abi.RegisterClass.sse;
 const InnerError = CodeGenError || error{OutOfRegisters};
 
 const debug_wip_mir = false;
+const debug_tracking = false;
 
 gpa: Allocator,
 air: Air,
@@ -57,11 +60,10 @@ target: *const std.Target,
 mod_fn: *const Module.Fn,
 err_msg: ?*ErrorMsg,
 args: []MCValue,
-ret_mcv: MCValue,
+ret_mcv: InstTracking,
 fn_type: Type,
 arg_index: u32,
 src_loc: Module.SrcLoc,
-stack_align: u32,
 
 eflags_inst: ?Air.Inst.Index = null,
 
@@ -86,17 +88,13 @@ inst_tracking: InstTrackingMap = .{},
 blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{},
 
 register_manager: RegisterManager = .{},
-/// Maps offset to what is stored there.
-stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{},
 
 /// Generation of the current scope, increments by 1 for every entered scope.
 scope_generation: u32 = 0,
 
-/// Offset from the stack base, representing the end of the stack frame.
-max_end_stack: u32 = 0,
-/// Represents the current end stack offset. If there is no existing slot
-/// to place a new stack allocation, it goes here, and then bumps `max_end_stack`.
-next_stack_offset: u32 = 0,
+frame_allocs: std.MultiArrayList(FrameAlloc) = .{},
+free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .{},
+frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{},
 
 /// Debug field, used to find bugs in the compiler.
 air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init,
@@ -108,6 +106,9 @@ const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {}
 
 const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged(Mir.Inst.Index, Air.Inst.Index){} else {};
 
+const FrameAddr = struct { index: FrameIndex, off: i32 = 0 };
+const RegisterOffset = struct { reg: Register, off: i32 = 0 };
+
 pub const MCValue = union(enum) {
     /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc.
     /// TODO Look into deleting this tag and using `dead` instead, since every use
@@ -123,46 +124,59 @@ pub const MCValue = union(enum) {
     /// A pointer-sized integer that fits in a register.
     /// If the type is a pointer, this is the pointer address in virtual address space.
     immediate: u64,
-    /// The value is in a GP register.
+    /// The value resides in the EFLAGS register.
+    eflags: Condition,
+    /// The value is in a register.
     register: Register,
+    /// The value is a constant offset from the value in a register.
+    register_offset: RegisterOffset,
     /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register.
     register_overflow: struct { reg: Register, eflags: Condition },
     /// The value is in memory at a hard-coded address.
     /// If the type is a pointer, it means the pointer address is at this memory location.
     memory: u64,
+    /// The value is in memory at a constant offset from the address in a register.
+    indirect: RegisterOffset,
     /// The value is in memory.
     /// Payload is a symbol index.
     load_direct: u32,
-    /// The value is a pointer to value in memory.
+    /// The value is a pointer to a value in memory.
     /// Payload is a symbol index.
     lea_direct: u32,
     /// The value is in memory referenced indirectly via GOT.
     /// Payload is a symbol index.
     load_got: u32,
+    /// The value is a pointer to a value referenced indirectly via GOT.
+    /// Payload is a symbol index.
+    lea_got: u32,
     /// The value is a threadlocal variable.
     /// Payload is a symbol index.
     load_tlv: u32,
-    /// The value is a pointer to threadlocal variable.
+    /// The value is a pointer to a threadlocal variable.
     /// Payload is a symbol index.
     lea_tlv: u32,
-    /// The value is one of the stack variables.
-    /// If the type is a pointer, it means the pointer address is in the stack at this offset.
-    stack_offset: i32,
-    /// The value is a pointer to one of the stack variables (payload is stack offset).
-    ptr_stack_offset: i32,
-    /// The value resides in the EFLAGS register.
-    eflags: Condition,
+    /// The value stored at an offset from a frame index
+    /// Payload is a frame address.
+    load_frame: FrameAddr,
+    /// The address of an offset from a frame index
+    /// Payload is a frame address.
+    lea_frame: FrameAddr,
+    /// This indicates that we have already allocated a frame index for this instruction,
+    /// but it has not been spilled there yet in the current control flow.
+    /// Payload is a frame index.
+    reserved_frame: FrameIndex,
 
     fn isMemory(mcv: MCValue) bool {
         return switch (mcv) {
             .memory,
-            .stack_offset,
-            .ptr_stack_offset,
             .load_direct,
             .lea_direct,
             .load_got,
+            .lea_got,
             .load_tlv,
             .lea_tlv,
+            .load_frame,
+            .lea_frame,
             => true,
             else => false,
         };
@@ -181,6 +195,134 @@ pub const MCValue = union(enum) {
             else => false,
         };
     }
+
+    fn getReg(mcv: MCValue) ?Register {
+        return switch (mcv) {
+            .register => |reg| reg,
+            .register_offset, .indirect => |ro| ro.reg,
+            .register_overflow => |ro| ro.reg,
+            else => null,
+        };
+    }
+
+    fn getCondition(mcv: MCValue) ?Condition {
+        return switch (mcv) {
+            .eflags => |cc| cc,
+            .register_overflow => |reg_ov| reg_ov.eflags,
+            else => null,
+        };
+    }
+
+    fn address(mcv: MCValue) MCValue {
+        return switch (mcv) {
+            .none,
+            .unreach,
+            .dead,
+            .undef,
+            .immediate,
+            .register,
+            .register_offset,
+            .eflags,
+            .register_overflow,
+            .lea_direct,
+            .lea_got,
+            .lea_tlv,
+            .lea_frame,
+            .reserved_frame,
+            => unreachable, // not in memory
+            .memory => |addr| .{ .immediate = addr },
+            .indirect => |reg_off| switch (reg_off.off) {
+                0 => .{ .register = reg_off.reg },
+                else => .{ .register_offset = reg_off },
+            },
+            .load_direct => |sym_index| .{ .lea_direct = sym_index },
+            .load_got => |sym_index| .{ .lea_got = sym_index },
+            .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
+            .load_frame => |frame_addr| .{ .lea_frame = frame_addr },
+        };
+    }
+
+    fn deref(mcv: MCValue) MCValue {
+        return switch (mcv) {
+            .none,
+            .unreach,
+            .dead,
+            .undef,
+            .eflags,
+            .register_overflow,
+            .memory,
+            .indirect,
+            .load_direct,
+            .load_got,
+            .load_tlv,
+            .load_frame,
+            .reserved_frame,
+            => unreachable, // not a dereferenceable
+            .immediate => |addr| .{ .memory = addr },
+            .register => |reg| .{ .indirect = .{ .reg = reg } },
+            .register_offset => |reg_off| .{ .indirect = reg_off },
+            .lea_direct => |sym_index| .{ .load_direct = sym_index },
+            .lea_got => |sym_index| .{ .load_got = sym_index },
+            .lea_tlv => |sym_index| .{ .load_tlv = sym_index },
+            .lea_frame => |frame_addr| .{ .load_frame = frame_addr },
+        };
+    }
+
+    fn offset(mcv: MCValue, off: i32) MCValue {
+        return switch (mcv) {
+            .none,
+            .unreach,
+            .dead,
+            .undef,
+            .eflags,
+            .register_overflow,
+            .memory,
+            .indirect,
+            .load_direct,
+            .lea_direct,
+            .load_got,
+            .lea_got,
+            .load_tlv,
+            .lea_tlv,
+            .load_frame,
+            .reserved_frame,
+            => unreachable, // not offsettable
+            .immediate => |imm| .{ .immediate = @bitCast(u64, @bitCast(i64, imm) +% off) },
+            .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } },
+            .register_offset => |reg_off| .{
+                .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off },
+            },
+            .lea_frame => |frame_addr| .{
+                .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
+            },
+        };
+    }
+
+    pub fn format(
+        mcv: MCValue,
+        comptime _: []const u8,
+        _: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        switch (mcv) {
+            .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}),
+            .immediate => |pl| try writer.print("0x{x}", .{pl}),
+            .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}),
+            inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}),
+            .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }),
+            .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }),
+            .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }),
+            .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}),
+            .lea_direct => |pl| try writer.print("direct:{d}", .{pl}),
+            .load_got => |pl| try writer.print("[got:{d}]", .{pl}),
+            .lea_got => |pl| try writer.print("got:{d}", .{pl}),
+            .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}),
+            .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}),
+            .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }),
+            .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }),
+            .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}),
+        }
+    }
 };
 
 const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking);
@@ -189,89 +331,211 @@ const InstTracking = struct {
     short: MCValue,
 
     fn init(result: MCValue) InstTracking {
-        return .{ .long = result, .short = result };
+        return .{ .long = switch (result) {
+            .none,
+            .unreach,
+            .undef,
+            .immediate,
+            .memory,
+            .load_direct,
+            .lea_direct,
+            .load_got,
+            .lea_got,
+            .load_tlv,
+            .lea_tlv,
+            .load_frame,
+            .lea_frame,
+            => result,
+            .dead,
+            .reserved_frame,
+            => unreachable,
+            .eflags,
+            .register,
+            .register_offset,
+            .register_overflow,
+            .indirect,
+            => .none,
+        }, .short = result };
     }
 
     fn getReg(self: InstTracking) ?Register {
-        return switch (self.short) {
-            .register => |reg| reg,
-            .register_overflow => |ro| ro.reg,
-            else => null,
-        };
+        return self.short.getReg();
     }
 
     fn getCondition(self: InstTracking) ?Condition {
-        return switch (self.short) {
-            .eflags => |eflags| eflags,
-            .register_overflow => |ro| ro.eflags,
-            else => null,
-        };
+        return self.short.getCondition();
     }
 
     fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void {
+        if (std.meta.eql(self.long, self.short)) return; // Already spilled
+        // Allocate or reuse frame index
+        switch (self.long) {
+            .none => self.long = try function.allocRegOrMem(inst, false),
+            .load_frame => {},
+            .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
+            else => unreachable,
+        }
+        tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long });
+        try function.genCopy(function.air.typeOfIndex(inst), self.long, self.short);
+    }
+
+    fn reuseFrame(self: *InstTracking) void {
         switch (self.long) {
+            .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } },
+            else => {},
+        }
+        self.short = switch (self.long) {
             .none,
-            .dead,
             .unreach,
-            => unreachable,
+            .undef,
+            .immediate,
+            .memory,
+            .load_direct,
+            .lea_direct,
+            .load_got,
+            .lea_got,
+            .load_tlv,
+            .lea_tlv,
+            .load_frame,
+            .lea_frame,
+            => self.long,
+            .dead,
+            .eflags,
             .register,
+            .register_offset,
             .register_overflow,
-            .eflags,
-            => self.long = try function.allocRegOrMem(inst, self.short == .eflags),
-            .stack_offset => {},
+            .indirect,
+            .reserved_frame,
+            => unreachable,
+        };
+    }
+
+    fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
+        function.freeValue(self.short);
+        self.reuseFrame();
+        tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* });
+    }
+
+    fn verifyMaterialize(self: *InstTracking, target: InstTracking) void {
+        switch (self.long) {
+            .none,
+            .unreach,
             .undef,
             .immediate,
             .memory,
             .load_direct,
             .lea_direct,
             .load_got,
+            .lea_got,
             .load_tlv,
             .lea_tlv,
-            .ptr_stack_offset,
-            => return, // these can be rematerialized without using a stack slot
+            .lea_frame,
+            => assert(std.meta.eql(self.long, target.long)),
+            .load_frame,
+            .reserved_frame,
+            => switch (target.long) {
+                .none,
+                .load_frame,
+                .reserved_frame,
+                => {},
+                else => unreachable,
+            },
+            .dead,
+            .eflags,
+            .register,
+            .register_offset,
+            .register_overflow,
+            .indirect,
+            => unreachable,
         }
-        log.debug("spilling %{d} from {} to {}", .{ inst, self.short, self.long });
-        const ty = function.air.typeOfIndex(inst);
-        try function.setRegOrMem(ty, self.long, self.short);
     }
 
-    fn trackSpill(self: *InstTracking, function: *Self) void {
-        if (self.getReg()) |reg| function.register_manager.freeReg(reg);
-        switch (self.short) {
-            .none, .dead, .unreach => unreachable,
-            else => {},
-        }
-        self.short = self.long;
+    fn materialize(
+        self: *InstTracking,
+        function: *Self,
+        inst: Air.Inst.Index,
+        target: InstTracking,
+    ) !void {
+        self.verifyMaterialize(target);
+        try self.materializeUnsafe(function, inst, target);
     }
 
-    fn materialize(self: *InstTracking, function: *Self, inst: Air.Inst.Index, reg: Register) !void {
+    fn materializeUnsafe(
+        self: *InstTracking,
+        function: *Self,
+        inst: Air.Inst.Index,
+        target: InstTracking,
+    ) !void {
         const ty = function.air.typeOfIndex(inst);
-        try function.genSetReg(ty, reg, self.long);
+        if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame)
+            try function.genCopy(ty, target.long, self.short);
+        try function.genCopy(ty, target.short, self.short);
     }
 
-    fn trackMaterialize(self: *InstTracking, function: *Self, inst: Air.Inst.Index, reg: Register) void {
-        assert(inst == function.register_manager.registers[
-            RegisterManager.indexOfRegIntoTracked(reg).?
-        ]);
-        self.short = .{ .register = reg };
+    fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void {
+        self.verifyMaterialize(target);
+        // Don't clobber reserved frame indices
+        self.long = if (target.long == .none) switch (self.long) {
+            .load_frame => |addr| .{ .reserved_frame = addr.index },
+            .reserved_frame => self.long,
+            else => target.long,
+        } else target.long;
+        self.short = target.short;
+        tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* });
     }
 
-    fn resurrect(self: *InstTracking, scope_generation: u32) void {
+    fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void {
         switch (self.short) {
             .dead => |die_generation| if (die_generation >= scope_generation) {
-                self.short = self.long;
+                self.reuseFrame();
+                tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* });
             },
             else => {},
         }
     }
 
-    fn die(self: *InstTracking, function: *Self) void {
+    fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void {
         function.freeValue(self.short);
-        self.reuse(function);
+        self.short = .{ .dead = function.scope_generation };
+        tracking_log.debug("%{d} => {} (death)", .{ inst, self.* });
     }
 
-    fn reuse(self: *InstTracking, function: *Self) void {
+    fn reuse(
+        self: *InstTracking,
+        function: *Self,
+        new_inst: Air.Inst.Index,
+        old_inst: Air.Inst.Index,
+    ) void {
         self.short = .{ .dead = function.scope_generation };
+        tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst });
+    }
+
+    pub fn format(
+        self: InstTracking,
+        comptime _: []const u8,
+        _: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long});
+        try writer.print("{}", .{self.short});
+    }
+};
+
+const FrameAlloc = struct {
+    abi_size: u31,
+    abi_align: u5,
+    ref_count: u16,
+
+    fn init(alloc_abi: struct { size: u64, alignment: u32 }) FrameAlloc {
+        assert(math.isPowerOfTwo(alloc_abi.alignment));
+        return .{
+            .abi_size = @intCast(u31, alloc_abi.size),
+            .abi_align = math.log2_int(u32, alloc_abi.alignment),
+            .ref_count = 0,
+        };
+    }
+    fn initType(ty: Type, target: Target) FrameAlloc {
+        return init(.{ .size = ty.abiSize(target), .alignment = ty.abiAlignment(target) });
     }
 };
 
@@ -332,12 +596,13 @@ pub fn generate(
         .fn_type = fn_type,
         .arg_index = 0,
         .src_loc = src_loc,
-        .stack_align = undefined,
         .end_di_line = module_fn.rbrace_line,
         .end_di_column = module_fn.rbrace_column,
     };
     defer {
-        function.stack.deinit(gpa);
+        function.frame_allocs.deinit(gpa);
+        function.free_frame_indices.deinit(gpa);
+        function.frame_locs.deinit(gpa);
         var block_it = function.blocks.valueIterator();
         while (block_it.next()) |block| block.deinit(gpa);
         function.blocks.deinit(gpa);
@@ -349,7 +614,23 @@ pub fn generate(
         if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa);
     }
 
-    var call_info = function.resolveCallingConventionValues(fn_type, &.{}) catch |err| switch (err) {
+    try function.frame_allocs.resize(gpa, FrameIndex.named_count);
+    function.frame_allocs.set(
+        @enumToInt(FrameIndex.stack_frame),
+        FrameAlloc.init(.{
+            .size = 0,
+            .alignment = if (mod.align_stack_fns.get(module_fn)) |set_align_stack|
+                set_align_stack.alignment
+            else
+                1,
+        }),
+    );
+    function.frame_allocs.set(
+        @enumToInt(FrameIndex.call_frame),
+        FrameAlloc.init(.{ .size = 0, .alignment = 1 }),
+    );
+
+    var call_info = function.resolveCallingConventionValues(fn_type, &.{}, .args_frame) catch |err| switch (err) {
         error.CodegenFail => return Result{ .fail = function.err_msg.? },
         error.OutOfRegisters => return Result{
             .fail = try ErrorMsg.create(
@@ -365,8 +646,18 @@ pub fn generate(
 
     function.args = call_info.args;
     function.ret_mcv = call_info.return_value;
-    function.stack_align = call_info.stack_align;
-    function.max_end_stack = call_info.stack_byte_count;
+    function.frame_allocs.set(@enumToInt(FrameIndex.ret_addr), FrameAlloc.init(.{
+        .size = Type.usize.abiSize(function.target.*),
+        .alignment = @min(Type.usize.abiAlignment(function.target.*), call_info.stack_align),
+    }));
+    function.frame_allocs.set(@enumToInt(FrameIndex.base_ptr), FrameAlloc.init(.{
+        .size = Type.usize.abiSize(function.target.*),
+        .alignment = @min(Type.usize.abiAlignment(function.target.*) * 2, call_info.stack_align),
+    }));
+    function.frame_allocs.set(
+        @enumToInt(FrameIndex.args_frame),
+        FrameAlloc.init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align }),
+    );
 
     function.gen() catch |err| switch (err) {
         error.CodegenFail => return Result{ .fail = function.err_msg.? },
@@ -379,6 +670,7 @@ pub fn generate(
     var mir = Mir{
         .instructions = function.mir_instructions.toOwnedSlice(),
         .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator),
+        .frame_locs = function.frame_locs.toOwnedSlice(),
     };
     defer mir.deinit(bin_file.allocator);
 
@@ -432,6 +724,7 @@ fn dumpWipMir(self: *Self, inst: Mir.Inst) !void {
         .mir = .{
             .instructions = self.mir_instructions.slice(),
             .extra = self.mir_extra.items,
+            .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(),
         },
         .target = self.target,
         .src_loc = self.src_loc,
@@ -458,6 +751,14 @@ fn dumpWipMir(self: *Self, inst: Mir.Inst) !void {
     }
 }
 
+fn dumpTracking(self: *Self) !void {
+    if (!debug_tracking) return;
+    const stderr = std.io.getStdErr().writer();
+
+    var it = self.inst_tracking.iterator();
+    while (it.next()) |entry| try stderr.print("%{d} = {}\n", .{ entry.key_ptr.*, entry.value_ptr.* });
+}
+
 fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index {
     const gpa = self.gpa;
     try self.mir_instructions.ensureUnusedCapacity(gpa, 1);
@@ -550,6 +851,14 @@ fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Ins
     });
 }
 
+fn asmPlaceholder(self: *Self) !Mir.Inst.Index {
+    return self.addInst(.{
+        .tag = .dead,
+        .ops = undefined,
+        .data = undefined,
+    });
+}
+
 fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void {
     _ = try self.addInst(.{
         .tag = tag,
@@ -765,44 +1074,32 @@ fn gen(self: *Self) InnerError!void {
     const cc = self.fn_type.fnCallingConvention();
     if (cc != .Naked) {
         try self.asmRegister(.push, .rbp);
+        const backpatch_push_callee_preserved_regs = try self.asmPlaceholder();
         try self.asmRegisterRegister(.mov, .rbp, .rsp);
-
-        // We want to subtract the aligned stack frame size from rsp here, but we don't
-        // yet know how big it will be, so we leave room for a 4-byte stack size.
-        // TODO During semantic analysis, check if there are no function calls. If there
-        // are none, here we can omit the part where we subtract and then add rsp.
-        const backpatch_stack_sub = try self.addInst(.{
-            .tag = .dead,
-            .ops = undefined,
-            .data = undefined,
-        });
-
-        if (self.ret_mcv == .stack_offset) {
-            // The address where to store the return value for the caller is in a
-            // register which the callee is free to clobber. Therefore, we purposely
-            // spill it to stack immediately.
-            const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8);
-            self.next_stack_offset = stack_offset;
-            self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset);
-
-            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
-            try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = ret_reg }, .{});
-            self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) };
-            log.debug("gen: spilling {s} to stack at offset {}", .{ @tagName(ret_reg), stack_offset });
+        const backpatch_frame_align = try self.asmPlaceholder();
+        const backpatch_stack_alloc = try self.asmPlaceholder();
+
+        switch (self.ret_mcv.long) {
+            .none, .unreach => {},
+            .indirect => {
+                // The address where to store the return value for the caller is in a
+                // register which the callee is free to clobber. Therefore, we purposely
+                // spill it to stack immediately.
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(Type.usize, self.target.*));
+                try self.genSetMem(
+                    .{ .frame = frame_index },
+                    0,
+                    Type.usize,
+                    self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off),
+                );
+                self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } };
+                tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index });
+            },
+            else => unreachable,
         }
 
-        _ = try self.addInst(.{
-            .tag = .dbg_prologue_end,
-            .ops = undefined,
-            .data = undefined,
-        });
-
-        // Push callee-preserved regs that were used actually in use.
-        const backpatch_push_callee_preserved_regs = try self.addInst(.{
-            .tag = .dead,
-            .ops = undefined,
-            .data = undefined,
-        });
+        try self.asmOpOnly(.dbg_prologue_end);
 
         try self.genBody(self.air.getMainBody());
 
@@ -814,69 +1111,43 @@ fn gen(self: *Self) InnerError!void {
         // }
         // Eliding the reloc will cause a miscompilation in this case.
         for (self.exitlude_jump_relocs.items) |jmp_reloc| {
-            self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len);
+            self.mir_instructions.items(.data)[jmp_reloc].inst =
+                @intCast(u32, self.mir_instructions.len);
         }
 
-        // Create list of registers to save in the prologue.
-        // TODO handle register classes
-        var reg_list = Mir.RegisterList{};
-        const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
-        for (callee_preserved_regs) |reg| {
-            if (self.register_manager.isRegAllocated(reg)) {
-                reg_list.push(callee_preserved_regs, reg);
-            }
-        }
-        const saved_regs_stack_space: u32 = reg_list.count() * 8;
-
-        // Pop saved callee-preserved regs.
-        const backpatch_pop_callee_preserved_regs = try self.addInst(.{
-            .tag = .dead,
-            .ops = undefined,
-            .data = undefined,
-        });
-
-        _ = try self.addInst(.{
-            .tag = .dbg_epilogue_begin,
-            .ops = undefined,
-            .data = undefined,
-        });
-
-        // Maybe add rsp, x if required. This is backpatched later.
-        const backpatch_stack_add = try self.addInst(.{
-            .tag = .dead,
-            .ops = undefined,
-            .data = undefined,
-        });
-
+        try self.asmOpOnly(.dbg_epilogue_begin);
+        const backpatch_stack_dealloc = try self.asmPlaceholder();
+        const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder();
         try self.asmRegister(.pop, .rbp);
         try self.asmOpOnly(.ret);
 
-        // Adjust the stack
-        if (self.max_end_stack > math.maxInt(i32)) {
-            return self.failSymbol("too much stack used in call parameters", .{});
+        const frame_layout = try self.computeFrameLayout();
+        const need_frame_align = frame_layout.stack_mask != math.maxInt(u32);
+        const need_stack_adjust = frame_layout.stack_adjust > 0;
+        const need_save_reg = frame_layout.save_reg_list.count() > 0;
+        if (need_frame_align) {
+            self.mir_instructions.set(backpatch_frame_align, .{
+                .tag = .@"and",
+                .ops = .ri_s,
+                .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_mask } },
+            });
         }
-
-        const aligned_stack_end = @intCast(
-            u32,
-            mem.alignForward(self.max_end_stack + saved_regs_stack_space, self.stack_align),
-        );
-        if (aligned_stack_end > 0) {
-            self.mir_instructions.set(backpatch_stack_sub, .{
+        if (need_stack_adjust) {
+            self.mir_instructions.set(backpatch_stack_alloc, .{
                 .tag = .sub,
-                .ops = .ri_u,
-                .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } },
+                .ops = .ri_s,
+                .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_adjust } },
             });
-            self.mir_instructions.set(backpatch_stack_add, .{
-                .tag = .add,
-                .ops = .ri_u,
-                .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } },
-            });
-
-            const save_reg_list = try self.addExtra(Mir.SaveRegisterList{
-                .base_reg = @enumToInt(Register.rbp),
-                .register_list = reg_list.asInt(),
-                .stack_end = aligned_stack_end,
+        }
+        if (need_frame_align or need_stack_adjust) {
+            self.mir_instructions.set(backpatch_stack_dealloc, .{
+                .tag = .mov,
+                .ops = .rr,
+                .data = .{ .rr = .{ .r1 = .rsp, .r2 = .rbp } },
             });
+        }
+        if (need_save_reg) {
+            const save_reg_list = frame_layout.save_reg_list.asInt();
             self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{
                 .tag = .push_regs,
                 .ops = undefined,
@@ -889,30 +1160,19 @@ fn gen(self: *Self) InnerError!void {
             });
         }
     } else {
-        _ = try self.addInst(.{
-            .tag = .dbg_prologue_end,
-            .ops = undefined,
-            .data = undefined,
-        });
-
+        try self.asmOpOnly(.dbg_prologue_end);
         try self.genBody(self.air.getMainBody());
-
-        _ = try self.addInst(.{
-            .tag = .dbg_epilogue_begin,
-            .ops = undefined,
-            .data = undefined,
-        });
+        try self.asmOpOnly(.dbg_epilogue_begin);
     }
 
     // Drop them off at the rbrace.
-    const payload = try self.addExtra(Mir.DbgLineColumn{
-        .line = self.end_di_line,
-        .column = self.end_di_column,
-    });
     _ = try self.addInst(.{
         .tag = .dbg_line,
         .ops = undefined,
-        .data = .{ .payload = payload },
+        .data = .{ .line_column = .{
+            .line = self.end_di_line,
+            .column = self.end_di_column,
+        } },
     });
 }
 
@@ -932,6 +1192,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             self.air,
             self.liveness,
         );
+        self.dumpTracking() catch {};
 
         const old_air_bookkeeping = self.air_bookkeeping;
         try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1);
@@ -1187,23 +1448,16 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
                 while (it.next()) |index| {
                     const tracked_inst = self.register_manager.registers[index];
                     const tracking = self.getResolvedInstValue(tracked_inst);
-                    assert(RegisterManager.indexOfRegIntoTracked(switch (tracking.short) {
-                        .register => |reg| reg,
-                        .register_overflow => |ro| ro.reg,
-                        else => unreachable,
-                    }).? == index);
+                    assert(RegisterManager.indexOfRegIntoTracked(tracking.getReg().?).? == index);
                 }
             }
         }
     }
+    self.dumpTracking() catch {};
 }
 
 fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void {
-    const reg = switch (value) {
-        .register => |reg| reg,
-        .register_overflow => |ro| ro.reg,
-        else => return,
-    };
+    const reg = value.getReg() orelse return;
     if (self.register_manager.isRegFree(reg)) {
         self.register_manager.getRegAssumeFree(reg, inst);
     }
@@ -1214,8 +1468,11 @@ fn freeValue(self: *Self, value: MCValue) void {
         .register => |reg| {
             self.register_manager.freeReg(reg);
         },
-        .register_overflow => |ro| {
-            self.register_manager.freeReg(ro.reg);
+        .register_offset => |reg_off| {
+            self.register_manager.freeReg(reg_off.reg);
+        },
+        .register_overflow => |reg_ov| {
+            self.register_manager.freeReg(reg_ov.reg);
             self.eflags_inst = null;
         },
         .eflags => {
@@ -1231,10 +1488,10 @@ fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) void {
 
 /// Asserts there is already capacity to insert into top branch inst_table.
 fn processDeath(self: *Self, inst: Air.Inst.Index) void {
-    const air_tags = self.air.instructions.items(.tag);
-    if (air_tags[inst] == .constant) return;
-    log.debug("%{d} => {}", .{ inst, MCValue.dead });
-    self.inst_tracking.getPtr(inst).?.die(self);
+    switch (self.air.instructions.items(.tag)[inst]) {
+        .constant, .const_ty => unreachable,
+        else => self.inst_tracking.getPtr(inst).?.die(self, inst),
+    }
 }
 
 /// Called when there are no operands, and the instruction is always unreferenced.
@@ -1249,7 +1506,7 @@ fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void {
         .none, .dead, .unreach => {},
         else => unreachable, // Why didn't the result die?
     } else {
-        log.debug("%{d} => {}", .{ inst, result });
+        tracking_log.debug("%{d} => {} (birth)", .{ inst, result });
         self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result));
         // In some cases, an operand may be reused as the result.
         // If that operand died and was a register, it was freed by
@@ -1273,36 +1530,123 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live
     self.finishAirResult(inst, result);
 }
 
-fn allocMem(self: *Self, inst: ?Air.Inst.Index, abi_size: u32, abi_align: u32) !u32 {
-    if (abi_align > self.stack_align)
-        self.stack_align = abi_align;
-    // TODO find a free slot instead of always appending
-    const offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align);
-    self.next_stack_offset = offset;
-    self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset);
-    try self.stack.putNoClobber(self.gpa, offset, .{
-        .inst = inst,
-        .size = abi_size,
-    });
-    return offset;
+const FrameLayout = struct {
+    stack_mask: u32,
+    stack_adjust: u32,
+    save_reg_list: Mir.RegisterList,
+};
+
+fn setFrameLoc(
+    self: *Self,
+    frame_index: FrameIndex,
+    base: Register,
+    offset: *i32,
+    comptime aligned: bool,
+) void {
+    const frame_i = @enumToInt(frame_index);
+    if (aligned) {
+        const alignment = @as(i32, 1) << self.frame_allocs.items(.abi_align)[frame_i];
+        offset.* = mem.alignForwardGeneric(i32, offset.*, alignment);
+    }
+    self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* });
+    offset.* += self.frame_allocs.items(.abi_size)[frame_i];
+}
+
+fn computeFrameLayout(self: *Self) !FrameLayout {
+    const frame_allocs_len = self.frame_allocs.len;
+    try self.frame_locs.resize(self.gpa, frame_allocs_len);
+    const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count);
+    defer self.gpa.free(stack_frame_order);
+
+    const frame_size = self.frame_allocs.items(.abi_size);
+    const frame_align = self.frame_allocs.items(.abi_align);
+    const frame_offset = self.frame_locs.items(.disp);
+
+    for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index|
+        frame_order.* = @intToEnum(FrameIndex, frame_index);
+    {
+        const SortContext = struct {
+            frame_align: @TypeOf(frame_align),
+            pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool {
+                return context.frame_align[@enumToInt(lhs)] > context.frame_align[@enumToInt(rhs)];
+            }
+        };
+        const sort_context = SortContext{ .frame_align = frame_align };
+        std.sort.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan);
+    }
+
+    const call_frame_align = frame_align[@enumToInt(FrameIndex.call_frame)];
+    const stack_frame_align = frame_align[@enumToInt(FrameIndex.stack_frame)];
+    const args_frame_align = frame_align[@enumToInt(FrameIndex.args_frame)];
+    const needed_align = @max(call_frame_align, stack_frame_align);
+    const need_align_stack = needed_align > args_frame_align;
+
+    // Create list of registers to save in the prologue.
+    // TODO handle register classes
+    var save_reg_list = Mir.RegisterList{};
+    const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*);
+    for (callee_preserved_regs) |reg| {
+        if (self.register_manager.isRegAllocated(reg)) {
+            save_reg_list.push(callee_preserved_regs, reg);
+        }
+    }
+
+    var rbp_offset = @intCast(i32, save_reg_list.count() * 8);
+    self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false);
+    self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false);
+    self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false);
+    const stack_frame_align_offset =
+        if (need_align_stack) 0 else frame_offset[@enumToInt(FrameIndex.args_frame)];
+
+    var rsp_offset: i32 = 0;
+    self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true);
+    self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true);
+    for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true);
+    rsp_offset += stack_frame_align_offset;
+    rsp_offset = mem.alignForwardGeneric(i32, rsp_offset, @as(i32, 1) << needed_align);
+    rsp_offset -= stack_frame_align_offset;
+    frame_size[@enumToInt(FrameIndex.call_frame)] =
+        @intCast(u31, rsp_offset - frame_offset[@enumToInt(FrameIndex.stack_frame)]);
+
+    return .{
+        .stack_mask = @as(u32, math.maxInt(u32)) << (if (need_align_stack) needed_align else 0),
+        .stack_adjust = @intCast(u32, rsp_offset - frame_offset[@enumToInt(FrameIndex.call_frame)]),
+        .save_reg_list = save_reg_list,
+    };
 }
 
-/// Use a pointer instruction as the basis for allocating stack memory.
-fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 {
-    const ptr_ty = self.air.typeOfIndex(inst);
-    const elem_ty = ptr_ty.elemType();
+fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex {
+    const frame_allocs_slice = self.frame_allocs.slice();
+    const frame_size = frame_allocs_slice.items(.abi_size);
+    const frame_align = frame_allocs_slice.items(.abi_align);
+    for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| {
+        const abi_size = frame_size[@enumToInt(frame_index)];
+        if (abi_size != alloc.abi_size) continue;
+        const abi_align = &frame_align[@enumToInt(frame_index)];
+        abi_align.* = @max(abi_align.*, alloc.abi_align);
 
-    if (!elem_ty.hasRuntimeBitsIgnoreComptime()) {
-        return self.allocMem(inst, @sizeOf(usize), @alignOf(usize));
+        const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)];
+        stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align);
+
+        _ = self.free_frame_indices.swapRemoveAt(free_i);
+        return frame_index;
     }
+    const frame_index = @intToEnum(FrameIndex, self.frame_allocs.len);
+    try self.frame_allocs.append(self.gpa, alloc);
+    return frame_index;
+}
 
-    const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse {
-        const mod = self.bin_file.options.module.?;
-        return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)});
-    };
-    // TODO swap this for inst.ty.ptrAlign
-    const abi_align = ptr_ty.ptrAlignment(self.target.*);
-    return self.allocMem(inst, abi_size, abi_align);
+/// Use a pointer instruction as the basis for allocating stack memory.
+fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex {
+    const ptr_ty = self.air.typeOfIndex(inst);
+    const val_ty = ptr_ty.childType();
+    return self.allocFrameIndex(FrameAlloc.init(.{
+        .size = math.cast(u32, val_ty.abiSize(self.target.*)) orelse {
+            const mod = self.bin_file.options.module.?;
+            return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)});
+        },
+        .alignment = @max(ptr_ty.ptrAlignment(self.target.*), 1),
+    }));
 }
 
 fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
@@ -1320,42 +1664,35 @@ fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_
     };
 
     if (reg_ok) {
-        switch (elem_ty.zigTypeTag()) {
-            .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
-            .Float => {
-                if (intrinsicsAllowed(self.target.*, elem_ty)) {
-                    const ptr_bytes: u64 = 32;
-                    if (abi_size <= ptr_bytes) {
-                        if (self.register_manager.tryAllocReg(inst, sse)) |reg| {
-                            return MCValue{ .register = registerAlias(reg, abi_size) };
-                        }
-                    }
-                }
-
-                return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{});
-            },
-            else => {
-                // Make sure the type can fit in a register before we try to allocate one.
-                const ptr_bits = self.target.cpu.arch.ptrBitWidth();
-                const ptr_bytes: u64 = @divExact(ptr_bits, 8);
-                if (abi_size <= ptr_bytes) {
-                    if (self.register_manager.tryAllocReg(inst, gp)) |reg| {
-                        return MCValue{ .register = registerAlias(reg, abi_size) };
-                    }
-                }
-            },
+        // Make sure the type can fit in a register before we try to allocate one.
+        const ptr_bits = self.target.cpu.arch.ptrBitWidth();
+        const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+        if (abi_size <= ptr_bytes) {
+            if (self.register_manager.tryAllocReg(inst, try self.regClassForType(elem_ty))) |reg| {
+                return MCValue{ .register = registerAlias(reg, abi_size) };
+            }
         }
     }
 
-    const abi_align = elem_ty.abiAlignment(self.target.*);
-    if (abi_align > self.stack_align)
-        self.stack_align = abi_align;
-    const stack_offset = try self.allocMem(inst, abi_size, abi_align);
-    return MCValue{ .stack_offset = @intCast(i32, stack_offset) };
+    const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*));
+    return .{ .load_frame = .{ .index = frame_index } };
+}
+
+fn regClassForType(self: *Self, ty: Type) !RegisterManager.RegisterBitSet {
+    return switch (ty.zigTypeTag()) {
+        .Vector => self.fail("TODO regClassForType for {}", .{ty.fmt(self.bin_file.options.module.?)}),
+        .Float => switch (ty.floatBits(self.target.*)) {
+            32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) sse else gp,
+            64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) sse else gp,
+            else => gp,
+        },
+        else => gp,
+    };
 }
 
 const State = struct {
     registers: RegisterManager.TrackedRegisters,
+    reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking,
     free_registers: RegisterManager.RegisterBitSet,
     inst_tracking_len: u32,
     scope_generation: u32,
@@ -1370,8 +1707,14 @@ fn initRetroactiveState(self: *Self) State {
 
 fn saveRetroactiveState(self: *Self, state: *State) !void {
     try self.spillEflagsIfOccupied();
-    state.registers = self.register_manager.registers;
-    state.free_registers = self.register_manager.free_registers;
+    const free_registers = self.register_manager.free_registers;
+    var it = free_registers.iterator(.{ .kind = .unset });
+    while (it.next()) |index| {
+        const tracked_inst = self.register_manager.registers[index];
+        state.registers[index] = tracked_inst;
+        state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?;
+    }
+    state.free_registers = free_registers;
 }
 
 fn saveState(self: *Self) !State {
@@ -1387,18 +1730,18 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
     close_scope: bool,
 }) !void {
     if (opts.close_scope) {
-        for (self.inst_tracking.values()[state.inst_tracking_len..]) |*tracking| tracking.die(self);
+        for (
+            self.inst_tracking.keys()[state.inst_tracking_len..],
+            self.inst_tracking.values()[state.inst_tracking_len..],
+        ) |inst, *tracking| tracking.die(self, inst);
         self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len);
     }
 
-    if (opts.resurrect) for (self.inst_tracking.values()[0..state.inst_tracking_len]) |*tracking|
-        tracking.resurrect(state.scope_generation);
-    const air_tags = self.air.instructions.items(.tag);
-    for (deaths) |death| switch (air_tags[death]) {
-        .constant => {},
-        .const_ty => unreachable,
-        else => self.inst_tracking.getPtr(death).?.die(self),
-    };
+    if (opts.resurrect) for (
+        self.inst_tracking.keys()[0..state.inst_tracking_len],
+        self.inst_tracking.values()[0..state.inst_tracking_len],
+    ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation);
+    for (deaths) |death| self.processDeath(death);
 
     for (0..state.registers.len) |index| {
         const current_maybe_inst = if (self.register_manager.free_registers.isSet(index))
@@ -1411,26 +1754,34 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
             state.registers[index];
         if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst|
             assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len);
-        if (current_maybe_inst == target_maybe_inst) continue;
-        const reg = RegisterManager.regAtTrackedIndex(
-            @intCast(RegisterManager.RegisterBitSet.ShiftInt, index),
-        );
         if (opts.emit_instructions) {
             if (current_maybe_inst) |current_inst| {
                 try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst);
             }
             if (target_maybe_inst) |target_inst| {
-                try self.inst_tracking.getPtr(target_inst).?.materialize(self, target_inst, reg);
+                try self.inst_tracking.getPtr(target_inst).?.materialize(
+                    self,
+                    target_inst,
+                    state.reg_tracking[index],
+                );
             }
         }
         if (opts.update_tracking) {
             if (current_maybe_inst) |current_inst| {
-                self.inst_tracking.getPtr(current_inst).?.trackSpill(self);
+                self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst);
+            }
+            {
+                const reg = RegisterManager.regAtTrackedIndex(
+                    @intCast(RegisterManager.RegisterBitSet.ShiftInt, index),
+                );
+                self.register_manager.freeReg(reg);
+                self.register_manager.getRegAssumeFree(reg, target_maybe_inst);
             }
-            self.register_manager.freeReg(reg);
-            self.register_manager.getRegAssumeFree(reg, target_maybe_inst);
             if (target_maybe_inst) |target_inst| {
-                self.inst_tracking.getPtr(target_inst).?.trackMaterialize(self, target_inst, reg);
+                self.inst_tracking.getPtr(target_inst).?.trackMaterialize(
+                    target_inst,
+                    state.reg_tracking[index],
+                );
             }
         }
     }
@@ -1438,7 +1789,7 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
         try self.inst_tracking.getPtr(inst).?.spill(self, inst);
     if (opts.update_tracking) if (self.eflags_inst) |inst| {
         self.eflags_inst = null;
-        self.inst_tracking.getPtr(inst).?.trackSpill(self);
+        self.inst_tracking.getPtr(inst).?.trackSpill(self, inst);
     };
 
     if (opts.update_tracking and std.debug.runtime_safety) {
@@ -1452,9 +1803,9 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
 
 pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void {
     const tracking = self.inst_tracking.getPtr(inst).?;
-    assert(tracking.getReg().?.to64() == reg.to64());
+    assert(tracking.getReg().?.id() == reg.id());
     try tracking.spill(self, inst);
-    tracking.trackSpill(self);
+    tracking.trackSpill(self, inst);
 }
 
 pub fn spillEflagsIfOccupied(self: *Self) !void {
@@ -1463,7 +1814,7 @@ pub fn spillEflagsIfOccupied(self: *Self) !void {
         const tracking = self.inst_tracking.getPtr(inst).?;
         assert(tracking.getCondition() != null);
         try tracking.spill(self, inst);
-        tracking.trackSpill(self);
+        tracking.trackSpill(self, inst);
     }
 }
 
@@ -1477,15 +1828,8 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void {
 /// allocated. A second call to `copyToTmpRegister` may return the same register.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
-    const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
-        .Float => blk: {
-            if (intrinsicsAllowed(self.target.*, ty)) break :blk sse;
-            return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
-        },
-        else => gp,
-    };
-    const reg: Register = try self.register_manager.allocReg(null, reg_class);
-    try self.genSetReg(ty, reg, mcv);
+    const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty));
+    try self.genSetReg(reg, ty, mcv);
     return reg;
 }
 
@@ -1493,28 +1837,35 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
 /// `reg_owner` is the instruction that gets associated with the register in the register table.
 /// This can have a side effect of spilling instructions to the stack to free up a register.
 /// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
-fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
-    const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
-        .Float => blk: {
-            if (intrinsicsAllowed(self.target.*, ty)) break :blk sse;
-            return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
-        },
-        else => gp,
-    };
-    const reg: Register = try self.register_manager.allocReg(reg_owner, reg_class);
-    try self.genSetReg(ty, reg, mcv);
+fn copyToRegisterWithInstTracking(
+    self: *Self,
+    reg_owner: Air.Inst.Index,
+    ty: Type,
+    mcv: MCValue,
+) !MCValue {
+    const reg: Register = try self.register_manager.allocReg(reg_owner, try self.regClassForType(ty));
+    try self.genSetReg(reg, ty, mcv);
     return MCValue{ .register = reg };
 }
 
 fn airAlloc(self: *Self, inst: Air.Inst.Index) !void {
-    const stack_offset = try self.allocMemPtr(inst);
-    const result = MCValue{ .ptr_stack_offset = @intCast(i32, stack_offset) };
+    const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } };
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
 fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void {
-    const stack_offset = try self.allocMemPtr(inst);
-    const result = MCValue{ .ptr_stack_offset = @intCast(i32, stack_offset) };
+    const result: MCValue = switch (self.ret_mcv.long) {
+        else => unreachable,
+        .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } },
+        .load_frame => .{ .register_offset = .{
+            .reg = (try self.copyToRegisterWithInstTracking(
+                inst,
+                self.air.typeOfIndex(inst),
+                self.ret_mcv.long,
+            )).register,
+            .off = self.ret_mcv.short.indirect.off,
+        } },
+    };
     return self.finishAir(inst, result, .{ .none, .none, .none });
 }
 
@@ -1577,12 +1928,14 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
                         registerAlias(src_reg, min_abi_size),
                     );
                 },
-                .stack_offset => |src_off| {
-                    try self.asmRegisterMemory(tag, dst_alias, Memory.sib(
-                        Memory.PtrSize.fromSize(min_abi_size),
-                        .{ .base = .rbp, .disp = -src_off },
-                    ));
-                },
+                .load_frame => |frame_addr| try self.asmRegisterMemory(
+                    tag,
+                    dst_alias,
+                    Memory.sib(Memory.PtrSize.fromSize(min_abi_size), .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off,
+                    }),
+                ),
                 else => return self.fail("TODO airIntCast from {s} to {s}", .{
                     @tagName(src_mcv),
                     @tagName(dst_mcv),
@@ -1591,7 +1944,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
             if (self.regExtraBits(min_ty) > 0) try self.truncateRegister(min_ty, dst_reg);
         },
         else => {
-            try self.setRegOrMem(min_ty, dst_mcv, src_mcv);
+            try self.genCopy(min_ty, dst_mcv, src_mcv);
             const extra = dst_abi_size * 8 - dst_int_info.bits;
             if (extra > 0) {
                 try self.genShiftBinOpMir(switch (signedness) {
@@ -1653,16 +2006,22 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void {
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
 
+    const slice_ty = self.air.typeOfIndex(inst);
     const ptr = try self.resolveInst(bin_op.lhs);
     const ptr_ty = self.air.typeOf(bin_op.lhs);
     const len = try self.resolveInst(bin_op.rhs);
     const len_ty = self.air.typeOf(bin_op.rhs);
 
-    const stack_offset = @intCast(i32, try self.allocMem(inst, 16, 16));
-    try self.genSetStack(ptr_ty, stack_offset, ptr, .{});
-    try self.genSetStack(len_ty, stack_offset - 8, len, .{});
-    const result = MCValue{ .stack_offset = stack_offset };
+    const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*));
+    try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
+    try self.genSetMem(
+        .{ .frame = frame_index },
+        @intCast(i32, ptr_ty.abiSize(self.target.*)),
+        len_ty,
+        len,
+    );
 
+    const result = MCValue{ .load_frame = .{ .index = frame_index } };
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
@@ -1778,14 +2137,14 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void {
 
     const reg_bits = self.regBitSize(ty);
     const cc: Condition = if (ty.isSignedInt()) cc: {
-        try self.genSetReg(ty, limit_reg, dst_mcv);
+        try self.genSetReg(limit_reg, ty, dst_mcv);
         try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
         try self.genBinOpMir(.xor, ty, limit_mcv, .{
             .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1,
         });
         break :cc .o;
     } else cc: {
-        try self.genSetReg(ty, limit_reg, .{
+        try self.genSetReg(limit_reg, ty, .{
             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
         });
         break :cc .c;
@@ -1829,14 +2188,14 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void {
 
     const reg_bits = self.regBitSize(ty);
     const cc: Condition = if (ty.isSignedInt()) cc: {
-        try self.genSetReg(ty, limit_reg, dst_mcv);
+        try self.genSetReg(limit_reg, ty, dst_mcv);
         try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
         try self.genBinOpMir(.xor, ty, limit_mcv, .{
             .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1,
         });
         break :cc .o;
     } else cc: {
-        try self.genSetReg(ty, limit_reg, .{ .immediate = 0 });
+        try self.genSetReg(limit_reg, ty, .{ .immediate = 0 });
         break :cc .c;
     };
     try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv);
@@ -1880,7 +2239,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
 
     const reg_bits = self.regBitSize(ty);
     const cc: Condition = if (ty.isSignedInt()) cc: {
-        try self.genSetReg(ty, limit_reg, lhs_mcv);
+        try self.genSetReg(limit_reg, ty, lhs_mcv);
         try self.genBinOpMir(.xor, ty, limit_mcv, rhs_mcv);
         try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 });
         try self.genBinOpMir(.xor, ty, limit_mcv, .{
@@ -1888,7 +2247,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void {
         });
         break :cc .o;
     } else cc: {
-        try self.genSetReg(ty, limit_reg, .{
+        try self.genSetReg(limit_reg, ty, .{
             .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits),
         });
         break :cc .c;
@@ -1916,17 +2275,18 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
             .Int => {
                 try self.spillEflagsIfOccupied();
 
-                const partial_mcv = switch (tag) {
-                    .add_with_overflow => try self.genBinOp(null, .add, bin_op.lhs, bin_op.rhs),
-                    .sub_with_overflow => try self.genBinOp(null, .sub, bin_op.lhs, bin_op.rhs),
+                const partial_mcv = try self.genBinOp(null, switch (tag) {
+                    .add_with_overflow => .add,
+                    .sub_with_overflow => .sub,
                     else => unreachable,
-                };
+                }, bin_op.lhs, bin_op.rhs);
                 const int_info = ty.intInfo(self.target.*);
                 const cc: Condition = switch (int_info.signedness) {
                     .unsigned => .c,
                     .signed => .o,
                 };
 
+                const tuple_ty = self.air.typeOfIndex(inst);
                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
                     switch (partial_mcv) {
                         .register => |reg| {
@@ -1936,27 +2296,32 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                         else => {},
                     }
 
-                    const abi_size = @intCast(i32, ty.abiSize(self.target.*));
-                    const dst_mcv = try self.allocRegOrMem(inst, false);
-                    try self.genSetStack(
+                    const frame_index =
+                        try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
                         Type.u1,
-                        dst_mcv.stack_offset - abi_size,
                         .{ .eflags = cc },
-                        .{},
                     );
-                    try self.genSetStack(ty, dst_mcv.stack_offset, partial_mcv, .{});
-                    break :result dst_mcv;
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
+                        ty,
+                        partial_mcv,
+                    );
+                    break :result .{ .load_frame = .{ .index = frame_index } };
                 }
 
-                const tuple_ty = self.air.typeOfIndex(inst);
-                const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
-                const tuple_align = tuple_ty.abiAlignment(self.target.*);
-                const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*));
-                const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align));
-
-                try self.genSetStackTruncatedOverflowCompare(ty, stack_offset, overflow_bit_offset, partial_mcv.register, cc);
-
-                break :result .{ .stack_offset = stack_offset };
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                try self.genSetFrameTruncatedOverflowCompare(
+                    tuple_ty,
+                    frame_index,
+                    partial_mcv.register,
+                    cc,
+                );
+                break :result .{ .load_frame = .{ .index = frame_index } };
             },
             else => unreachable,
         }
@@ -1998,6 +2363,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                 try self.genBinOpMir(.cmp, lhs_ty, tmp_mcv, lhs);
                 const cc = Condition.ne;
 
+                const tuple_ty = self.air.typeOfIndex(inst);
                 if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) {
                     switch (partial_mcv) {
                         .register => |reg| {
@@ -2007,27 +2373,32 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                         else => {},
                     }
 
-                    const abi_size = @intCast(i32, lhs_ty.abiSize(self.target.*));
-                    const dst_mcv = try self.allocRegOrMem(inst, false);
-                    try self.genSetStack(
-                        Type.u1,
-                        dst_mcv.stack_offset - abi_size,
+                    const frame_index =
+                        try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
+                        tuple_ty.structFieldType(1),
                         .{ .eflags = cc },
-                        .{},
                     );
-                    try self.genSetStack(lhs_ty, dst_mcv.stack_offset, partial_mcv, .{});
-                    break :result dst_mcv;
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
+                        tuple_ty.structFieldType(0),
+                        partial_mcv,
+                    );
+                    break :result .{ .load_frame = .{ .index = frame_index } };
                 }
 
-                const tuple_ty = self.air.typeOfIndex(inst);
-                const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
-                const tuple_align = tuple_ty.abiAlignment(self.target.*);
-                const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*));
-                const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align));
-
-                try self.genSetStackTruncatedOverflowCompare(lhs_ty, stack_offset, overflow_bit_offset, partial_mcv.register, cc);
-
-                break :result .{ .stack_offset = stack_offset };
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                try self.genSetFrameTruncatedOverflowCompare(
+                    tuple_ty,
+                    frame_index,
+                    partial_mcv.register,
+                    cc,
+                );
+                break :result .{ .load_frame = .{ .index = frame_index } };
             },
             else => unreachable,
         }
@@ -2035,17 +2406,17 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
 }
 
-fn genSetStackTruncatedOverflowCompare(
+fn genSetFrameTruncatedOverflowCompare(
     self: *Self,
-    ty: Type,
-    stack_offset: i32,
-    overflow_bit_offset: i32,
+    tuple_ty: Type,
+    frame_index: FrameIndex,
     reg: Register,
     cc: Condition,
 ) !void {
     const reg_lock = self.register_manager.lockReg(reg);
     defer if (reg_lock) |lock| self.register_manager.unlockReg(lock);
 
+    const ty = tuple_ty.structFieldType(0);
     const int_info = ty.intInfo(self.target.*);
     const extended_ty = switch (int_info.signedness) {
         .signed => Type.isize,
@@ -2062,7 +2433,7 @@ fn genSetStackTruncatedOverflowCompare(
     try self.asmSetccRegister(overflow_reg.to8(), cc);
 
     const scratch_reg = temp_regs[1];
-    try self.genSetReg(extended_ty, scratch_reg, .{ .register = reg });
+    try self.genSetReg(scratch_reg, extended_ty, .{ .register = reg });
     try self.truncateRegister(ty, scratch_reg);
     try self.genBinOpMir(
         .cmp,
@@ -2080,10 +2451,18 @@ fn genSetStackTruncatedOverflowCompare(
         .{ .register = eq_reg },
     );
 
-    try self.genSetStack(ty, stack_offset, .{ .register = scratch_reg }, .{});
-    try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{
-        .register = overflow_reg.to8(),
-    }, .{});
+    try self.genSetMem(
+        .{ .frame = frame_index },
+        @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
+        tuple_ty.structFieldType(1),
+        .{ .register = overflow_reg.to8() },
+    );
+    try self.genSetMem(
+        .{ .frame = frame_index },
+        @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
+        ty,
+        .{ .register = scratch_reg },
+    );
 }
 
 fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
@@ -2101,6 +2480,8 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                     .unsigned => .c,
                     .signed => .o,
                 };
+
+                const tuple_ty = self.air.typeOfIndex(inst);
                 if (dst_info.bits >= 8 and math.isPowerOfTwo(dst_info.bits)) {
                     var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) {
                         .signed => .int_signed,
@@ -2127,16 +2508,21 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
 
                     // For now, this is the only supported multiply that doesn't fit in a register.
                     assert(dst_info.bits == 128 and src_pl.data == 64);
-                    const dst_abi_size = @intCast(i32, dst_ty.abiSize(self.target.*));
-                    const dst_mcv = try self.allocRegOrMem(inst, false);
-                    try self.genSetStack(
-                        Type.u1,
-                        dst_mcv.stack_offset - dst_abi_size,
-                        .{ .immediate = 0 }, // 64x64 -> 128 never overflows
-                        .{},
+                    const frame_index =
+                        try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)),
+                        tuple_ty.structFieldType(1),
+                        .{ .immediate = 0 }, // overflow is impossible for 64-bit*64-bit -> 128-bit
                     );
-                    try self.genSetStack(dst_ty, dst_mcv.stack_offset, partial_mcv, .{});
-                    break :result dst_mcv;
+                    try self.genSetMem(
+                        .{ .frame = frame_index },
+                        @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)),
+                        tuple_ty.structFieldType(0),
+                        partial_mcv,
+                    );
+                    break :result .{ .load_frame = .{ .index = frame_index } };
                 }
 
                 const dst_reg: Register = dst_reg: {
@@ -2184,15 +2570,10 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
                     }
                 };
 
-                const tuple_ty = self.air.typeOfIndex(inst);
-                const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
-                const tuple_align = tuple_ty.abiAlignment(self.target.*);
-                const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*));
-                const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align));
-
-                try self.genSetStackTruncatedOverflowCompare(dst_ty, stack_offset, overflow_bit_offset, dst_reg, cc);
-
-                break :result .{ .stack_offset = stack_offset };
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*));
+                try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, dst_reg, cc);
+                break :result .{ .load_frame = .{ .index = frame_index } };
             },
             else => unreachable,
         }
@@ -2220,7 +2601,7 @@ fn genIntMulDivOpMir(
             .register => |reg| if (reg.to64() == .rax) break :lhs,
             else => {},
         }
-        try self.genSetReg(ty, .rax, lhs);
+        try self.genSetReg(.rax, ty, lhs);
     }
 
     switch (tag) {
@@ -2230,16 +2611,26 @@ fn genIntMulDivOpMir(
         .idiv => try self.asmOpOnly(.cqo),
     }
 
-    const factor: MCValue = switch (rhs) {
-        .register, .stack_offset => rhs,
+    const mat_rhs: MCValue = switch (rhs) {
+        .register, .indirect, .load_frame => rhs,
         else => .{ .register = try self.copyToTmpRegister(ty, rhs) },
     };
-    switch (factor) {
+    switch (mat_rhs) {
         .register => |reg| try self.asmRegister(tag, reg),
-        .stack_offset => |off| try self.asmMemory(tag, Memory.sib(
-            Memory.PtrSize.fromSize(abi_size),
-            .{ .base = .rbp, .disp = -off },
-        )),
+        .indirect, .load_frame => try self.asmMemory(
+            tag,
+            Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (mat_rhs) {
+                .indirect => |reg_off| .{
+                    .base = .{ .reg = reg_off.reg },
+                    .disp = reg_off.off,
+                },
+                .load_frame => |frame_addr| .{
+                    .base = .{ .frame = frame_addr.index },
+                    .disp = frame_addr.off,
+                },
+                else => unreachable,
+            }),
+        ),
         else => unreachable,
     }
 }
@@ -2315,7 +2706,7 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void {
         }
 
         const pl_mcv = try self.allocRegOrMem(inst, true);
-        try self.setRegOrMem(pl_ty, pl_mcv, switch (opt_mcv) {
+        try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) {
             else => opt_mcv,
             .register_overflow => |ro| .{ .register = ro.reg },
         });
@@ -2361,11 +2752,7 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
 
         const pl_ty = dst_ty.childType();
         const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*));
-        try self.asmMemoryImmediate(
-            .mov,
-            Memory.sib(.byte, .{ .base = dst_mcv.register, .disp = pl_abi_size }),
-            Immediate.u(1),
-        );
+        try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 });
         break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv;
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
@@ -2389,10 +2776,6 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
 
         const err_off = errUnionErrorOffset(payload_ty, self.target.*);
         switch (operand) {
-            .stack_offset => |off| {
-                const offset = off - @intCast(i32, err_off);
-                break :result MCValue{ .stack_offset = offset };
-            },
             .register => |reg| {
                 // TODO reuse operand
                 const eu_lock = self.register_manager.lockReg(reg);
@@ -2407,6 +2790,10 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
                 }
                 break :result result;
             },
+            .load_frame => |frame_addr| break :result .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + @intCast(i32, err_off),
+            } },
             else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}),
         }
     };
@@ -2434,10 +2821,10 @@ fn genUnwrapErrorUnionPayloadMir(
 
         const payload_off = errUnionPayloadOffset(payload_ty, self.target.*);
         switch (err_union) {
-            .stack_offset => |off| {
-                const offset = off - @intCast(i32, payload_off);
-                break :result MCValue{ .stack_offset = offset };
-            },
+            .load_frame => |frame_addr| break :result .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + @intCast(i32, payload_off),
+            } },
             .register => |reg| {
                 // TODO reuse operand
                 const eu_lock = self.register_manager.lockReg(reg);
@@ -2488,7 +2875,10 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void {
     try self.asmRegisterMemory(
         .mov,
         registerAlias(dst_reg, err_abi_size),
-        Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = src_reg, .disp = err_off }),
+        Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
+            .base = .{ .reg = src_reg },
+            .disp = err_off,
+        }),
     );
 
     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
@@ -2523,7 +2913,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void {
     try self.asmRegisterMemory(
         .lea,
         registerAlias(dst_reg, dst_abi_size),
-        Memory.sib(.qword, .{ .base = src_reg, .disp = pl_off }),
+        Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
     );
 
     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
@@ -2548,7 +2938,10 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
         const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*));
         try self.asmMemoryImmediate(
             .mov,
-            Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = src_reg, .disp = err_off }),
+            Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{
+                .base = .{ .reg = src_reg },
+                .disp = err_off,
+            }),
             Immediate.u(0),
         );
 
@@ -2567,7 +2960,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void {
         try self.asmRegisterMemory(
             .lea,
             registerAlias(dst_reg, dst_abi_size),
-            Memory.sib(.qword, .{ .base = src_reg, .disp = pl_off }),
+            Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }),
         );
         break :result .{ .register = dst_reg };
     };
@@ -2608,7 +3001,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
         defer if (pl_lock) |lock| self.register_manager.unlockReg(lock);
 
         const opt_mcv = try self.allocRegOrMem(inst, true);
-        try self.setRegOrMem(pl_ty, opt_mcv, pl_mcv);
+        try self.genCopy(pl_ty, opt_mcv, pl_mcv);
 
         if (!same_repr) {
             const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*));
@@ -2621,9 +3014,12 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
                     Immediate.u(@intCast(u6, pl_abi_size * 8)),
                 ),
 
-                .stack_offset => |off| try self.asmMemoryImmediate(
+                .load_frame => |frame_addr| try self.asmMemoryImmediate(
                     .mov,
-                    Memory.sib(.byte, .{ .base = .rbp, .disp = pl_abi_size - off }),
+                    Memory.sib(.byte, .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off + pl_abi_size,
+                    }),
                     Immediate.u(1),
                 ),
             }
@@ -2637,26 +3033,21 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void {
 fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
 
-    const error_union_ty = self.air.getRefType(ty_op.ty);
-    const payload_ty = error_union_ty.errorUnionPayload();
+    const eu_ty = self.air.getRefType(ty_op.ty);
+    const pl_ty = eu_ty.errorUnionPayload();
+    const err_ty = eu_ty.errorUnionSet();
     const operand = try self.resolveInst(ty_op.operand);
 
     const result: MCValue = result: {
-        if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
-            break :result operand;
-        }
-
-        const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*));
-        const abi_align = error_union_ty.abiAlignment(self.target.*);
-        const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align));
-        const payload_off = errUnionPayloadOffset(payload_ty, self.target.*);
-        const err_off = errUnionErrorOffset(payload_ty, self.target.*);
-        try self.genSetStack(payload_ty, stack_offset - @intCast(i32, payload_off), operand, .{});
-        try self.genSetStack(Type.anyerror, stack_offset - @intCast(i32, err_off), .{ .immediate = 0 }, .{});
+        if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result .{ .immediate = 0 };
 
-        break :result MCValue{ .stack_offset = stack_offset };
+        const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*));
+        const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
+        const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
+        try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand);
+        try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 });
+        break :result .{ .load_frame = .{ .index = frame_index } };
     };
-
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
@@ -2664,26 +3055,21 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void {
 fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
 
-    const error_union_ty = self.air.getRefType(ty_op.ty);
-    const payload_ty = error_union_ty.errorUnionPayload();
-    const operand = try self.resolveInst(ty_op.operand);
+    const eu_ty = self.air.getRefType(ty_op.ty);
+    const pl_ty = eu_ty.errorUnionPayload();
+    const err_ty = eu_ty.errorUnionSet();
 
     const result: MCValue = result: {
-        if (!payload_ty.hasRuntimeBitsIgnoreComptime()) {
-            break :result operand;
-        }
-
-        const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*));
-        const abi_align = error_union_ty.abiAlignment(self.target.*);
-        const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align));
-        const payload_off = errUnionPayloadOffset(payload_ty, self.target.*);
-        const err_off = errUnionErrorOffset(payload_ty, self.target.*);
-        try self.genSetStack(Type.anyerror, stack_offset - @intCast(i32, err_off), operand, .{});
-        try self.genSetStack(payload_ty, stack_offset - @intCast(i32, payload_off), .undef, .{});
+        if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result try self.resolveInst(ty_op.operand);
 
-        break :result MCValue{ .stack_offset = stack_offset };
+        const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*));
+        const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*));
+        const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*));
+        try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef);
+        const operand = try self.resolveInst(ty_op.operand);
+        try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand);
+        break :result .{ .load_frame = .{ .index = frame_index } };
     };
-
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
@@ -2695,7 +3081,7 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
 
         const dst_mcv = try self.allocRegOrMem(inst, true);
         const dst_ty = self.air.typeOfIndex(inst);
-        try self.setRegOrMem(dst_ty, dst_mcv, src_mcv);
+        try self.genCopy(dst_ty, dst_mcv, src_mcv);
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
@@ -2707,9 +3093,10 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
     const operand = try self.resolveInst(ty_op.operand);
     const dst_mcv: MCValue = blk: {
         switch (operand) {
-            .stack_offset => |off| {
-                break :blk MCValue{ .stack_offset = off - 8 };
-            },
+            .load_frame => |frame_addr| break :blk .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + 8,
+            } },
             else => return self.fail("TODO implement slice_len for {}", .{operand}),
         }
     };
@@ -2743,7 +3130,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void {
         .lea,
         registerAlias(dst_reg, dst_abi_size),
         Memory.sib(.qword, .{
-            .base = src_reg,
+            .base = .{ .reg = src_reg },
             .disp = @divExact(self.target.cpu.arch.ptrBitWidth(), 8),
         }),
     );
@@ -2771,7 +3158,7 @@ fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Regi
                 // Optimisation: if index MCValue is an immediate, we can multiply in `comptime`
                 // and set the register directly to the scaled offset as an immediate.
                 const reg = try self.register_manager.allocReg(null, gp);
-                try self.genSetReg(index_ty, reg, .{ .immediate = imm * elem_size });
+                try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size });
                 break :blk reg;
             },
             else => {
@@ -2812,10 +3199,14 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
 
     const addr_reg = try self.register_manager.allocReg(null, gp);
     switch (slice_mcv) {
-        .stack_offset => |off| try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{
-            .base = .rbp,
-            .disp = -off,
-        })),
+        .load_frame => |frame_addr| try self.asmRegisterMemory(
+            .mov,
+            addr_reg.to64(),
+            Memory.sib(.qword, .{
+                .base = .{ .frame = frame_addr.index },
+                .disp = frame_addr.off,
+            }),
+        ),
         else => return self.fail("TODO implement slice_elem_ptr when slice is {}", .{slice_mcv}),
     }
     // TODO we could allocate register here, but need to expect addr register and potentially
@@ -2834,7 +3225,7 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
     const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
     const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs);
     const dst_mcv = try self.allocRegOrMem(inst, false);
-    try self.load(dst_mcv, elem_ptr, slice_ptr_field_type);
+    try self.load(dst_mcv, slice_ptr_field_type, elem_ptr);
 
     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -2872,33 +3263,27 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
     const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
     defer self.register_manager.unlockReg(offset_reg_lock);
 
-    const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
+    const addr_reg = try self.register_manager.allocReg(null, gp);
     switch (array) {
         .register => {
-            const off = @intCast(i32, try self.allocMem(
-                inst,
-                @intCast(u32, array_ty.abiSize(self.target.*)),
-                array_ty.abiAlignment(self.target.*),
-            ));
-            try self.genSetStack(array_ty, off, array, .{});
-            try self.asmRegisterMemory(.lea, addr_reg, Memory.sib(.qword, .{
-                .base = .rbp,
-                .disp = -off,
-            }));
-        },
-        .stack_offset => |off| {
-            try self.asmRegisterMemory(.lea, addr_reg, Memory.sib(.qword, .{
-                .base = .rbp,
-                .disp = -off,
-            }));
+            const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*));
+            try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array);
+            try self.asmRegisterMemory(
+                .lea,
+                addr_reg,
+                Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }),
+            );
         },
-        .load_got => try self.genSetReg(array_ty, addr_reg, array),
-        .memory, .load_direct, .load_tlv => try self.genSetReg(Type.usize, addr_reg, switch (array) {
-            .memory => |addr| .{ .immediate = addr },
-            .load_direct => |sym_index| .{ .lea_direct = sym_index },
-            .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-            else => unreachable,
-        }),
+        .load_frame => |frame_addr| try self.asmRegisterMemory(
+            .lea,
+            addr_reg,
+            Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }),
+        ),
+        .memory,
+        .load_direct,
+        .load_got,
+        .load_tlv,
+        => try self.genSetReg(addr_reg, Type.usize, array.address()),
         .lea_direct, .lea_tlv => unreachable,
         else => return self.fail("TODO implement array_elem_val when array is {}", .{array}),
     }
@@ -2907,7 +3292,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
     // offset register.
     const dst_mcv = try self.allocRegOrMem(inst, false);
     try self.genBinOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg });
-    try self.load(dst_mcv, .{ .register = addr_reg.to64() }, array_ty);
+    try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
 
     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -2948,7 +3333,7 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void {
         else => null,
     };
     defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
-    try self.load(dst_mcv, .{ .register = elem_ptr_reg }, ptr_ty);
+    try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg });
 
     return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -2987,8 +3372,8 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
-    const ptr_ty = self.air.typeOf(bin_op.lhs);
-    const union_ty = ptr_ty.childType();
+    const ptr_union_ty = self.air.typeOf(bin_op.lhs);
+    const union_ty = ptr_union_ty.childType();
     const tag_ty = self.air.typeOf(bin_op.rhs);
     const layout = union_ty.unionGetLayout(self.target.*);
 
@@ -3012,12 +3397,15 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
 
     const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: {
         // TODO reusing the operand
-        const reg = try self.copyToTmpRegister(ptr_ty, ptr);
-        try self.genBinOpMir(.add, ptr_ty, .{ .register = reg }, .{ .immediate = layout.payload_size });
+        const reg = try self.copyToTmpRegister(ptr_union_ty, ptr);
+        try self.genBinOpMir(.add, ptr_union_ty, .{ .register = reg }, .{ .immediate = layout.payload_size });
         break :blk MCValue{ .register = reg };
     } else ptr;
 
-    try self.store(adjusted_ptr, tag, ptr_ty, tag_ty);
+    var ptr_tag_pl = ptr_union_ty.ptrInfo();
+    ptr_tag_pl.data.pointee_type = tag_ty;
+    const ptr_tag_ty = Type.initPayload(&ptr_tag_pl.base);
+    try self.store(ptr_tag_ty, adjusted_ptr, tag);
 
     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -3044,11 +3432,14 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void {
     const tag_abi_size = tag_ty.abiSize(self.target.*);
     const dst_mcv: MCValue = blk: {
         switch (operand) {
-            .stack_offset => |off| {
+            .load_frame => |frame_addr| {
                 if (tag_abi_size <= 8) {
-                    const offset: i32 = if (layout.tag_align < layout.payload_align) @intCast(i32, layout.payload_size) else 0;
+                    const off: i32 = if (layout.tag_align < layout.payload_align)
+                        @intCast(i32, layout.payload_size)
+                    else
+                        0;
                     break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{
-                        .stack_offset = off - offset,
+                        .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off },
                     });
                 }
 
@@ -3131,7 +3522,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void {
                 .nz,
             );
 
-            try self.genSetReg(dst_ty, dst_reg, .{ .immediate = src_bits - 1 });
+            try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 });
             try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg });
         }
         break :result dst_mcv;
@@ -3171,7 +3562,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void {
                 };
                 const tmp_mcv = tmp: {
                     if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv;
-                    try self.genSetReg(src_ty, dst_reg, src_mcv);
+                    try self.genSetReg(dst_reg, src_ty, src_mcv);
                     break :tmp dst_mcv;
                 };
                 try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv);
@@ -3345,7 +3736,7 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m
             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register);
             defer self.register_manager.unlockReg(dst_lock);
 
-            try self.genSetReg(src_ty, dst_mcv.register, src_mcv);
+            try self.genSetReg(dst_mcv.register, src_ty, src_mcv);
             switch (src_bits) {
                 else => unreachable,
                 16 => try self.genBinOpMir(.rol, src_ty, dst_mcv, .{ .immediate = 8 }),
@@ -3450,7 +3841,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
             .lea,
             if (src_abi_size > 4) tmp.to64() else tmp.to32(),
             Memory.sib(.qword, .{
-                .base = dst.to64(),
+                .base = .{ .reg = dst.to64() },
                 .scale_index = .{ .index = tmp.to64(), .scale = 1 << 2 },
             }),
         );
@@ -3473,7 +3864,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void {
             .lea,
             if (src_abi_size > 4) dst.to64() else dst.to32(),
             Memory.sib(.qword, .{
-                .base = tmp.to64(),
+                .base = .{ .reg = tmp.to64() },
                 .scale_index = .{ .index = dst.to64(), .scale = 1 << 1 },
             }),
         );
@@ -3506,6 +3897,17 @@ fn reuseOperand(
     operand: Air.Inst.Ref,
     op_index: Liveness.OperandInt,
     mcv: MCValue,
+) bool {
+    return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst);
+}
+
+fn reuseOperandAdvanced(
+    self: *Self,
+    inst: Air.Inst.Index,
+    operand: Air.Inst.Ref,
+    op_index: Liveness.OperandInt,
+    mcv: MCValue,
+    tracked_inst: Air.Inst.Index,
 ) bool {
     if (!self.liveness.operandDies(inst, op_index))
         return false;
@@ -3516,25 +3918,23 @@ fn reuseOperand(
             // new instruction.
             if (!self.register_manager.isRegFree(reg)) {
                 if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
-                    self.register_manager.registers[index] = inst;
+                    self.register_manager.registers[index] = tracked_inst;
                 }
             }
-            log.debug("%{d} => {} (reused)", .{ inst, reg });
-        },
-        .stack_offset => |off| {
-            log.debug("%{d} => stack offset {d} (reused)", .{ inst, off });
         },
+        .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false,
         else => return false,
     }
 
     // Prevent the operand deaths processing code from deallocating it.
     self.liveness.clearOperandDeath(inst, op_index);
-    self.getResolvedInstValue(Air.refToIndex(operand).?).reuse(self);
+    const op_inst = Air.refToIndex(operand).?;
+    self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst);
 
     return true;
 }
 
-fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) InnerError!void {
+fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
     const ptr_info = ptr_ty.ptrInfo().data;
 
     const val_ty = ptr_info.pointee_type;
@@ -3564,10 +3964,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) Inn
         if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2;
     if (load_abi_size <= 8) {
         const load_reg = registerAlias(dst_reg, load_abi_size);
-        try self.asmRegisterMemory(.mov, load_reg, Memory.sib(
-            Memory.PtrSize.fromSize(load_abi_size),
-            .{ .base = ptr_reg, .disp = val_byte_off },
-        ));
+        try self.asmRegisterMemory(
+            .mov,
+            load_reg,
+            Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
+                .base = .{ .reg = ptr_reg },
+                .disp = val_byte_off,
+            }),
+        );
         try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off));
     } else {
         const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size);
@@ -3575,85 +3979,60 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) Inn
         defer self.register_manager.unlockReg(tmp_lock);
 
         const dst_alias = registerAlias(dst_reg, val_abi_size);
-        try self.asmRegisterMemory(.mov, dst_alias, Memory.sib(
-            Memory.PtrSize.fromSize(val_abi_size),
-            .{ .base = ptr_reg, .disp = val_byte_off },
-        ));
-        try self.asmRegisterMemory(.mov, tmp_reg, Memory.sib(
-            Memory.PtrSize.fromSize(val_abi_size),
-            .{ .base = ptr_reg, .disp = val_byte_off + 1 },
-        ));
+        try self.asmRegisterMemory(
+            .mov,
+            dst_alias,
+            Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
+                .base = .{ .reg = ptr_reg },
+                .disp = val_byte_off,
+            }),
+        );
+        try self.asmRegisterMemory(
+            .mov,
+            tmp_reg,
+            Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{
+                .base = .{ .reg = ptr_reg },
+                .disp = val_byte_off + 1,
+            }),
+        );
         try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off));
     }
 
     if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg);
-    try self.setRegOrMem(val_ty, dst_mcv, .{ .register = dst_reg });
-}
-
-fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void {
-    const elem_ty = ptr_ty.elemType();
-    const abi_size = @intCast(u32, elem_ty.abiSize(self.target.*));
-    switch (ptr) {
-        .none => unreachable,
-        .undef => unreachable,
-        .unreach => unreachable,
-        .dead => unreachable,
-        .eflags => unreachable,
-        .register_overflow => unreachable,
-        .immediate => |imm| {
-            try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm });
-        },
-        .stack_offset => {
-            const reg = try self.copyToTmpRegister(ptr_ty, ptr);
-            try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
-        },
-        .ptr_stack_offset => |off| {
-            try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off });
-        },
-        .register => |reg| {
-            const reg_lock = self.register_manager.lockReg(reg);
-            defer if (reg_lock) |lock| self.register_manager.unlockReg(lock);
+    try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg });
+}
 
-            switch (dst_mcv) {
-                .dead => unreachable,
-                .undef => unreachable,
-                .eflags => unreachable,
-                .register => |dst_reg| {
-                    try self.asmRegisterMemory(
-                        .mov,
-                        registerAlias(dst_reg, abi_size),
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg }),
-                    );
-                },
-                .stack_offset => |off| {
-                    if (abi_size <= 8) {
-                        const tmp_reg = try self.register_manager.allocReg(null, gp);
-                        try self.load(.{ .register = tmp_reg }, ptr, ptr_ty);
-                        return self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }, .{});
-                    }
+fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void {
+    const dst_ty = ptr_ty.childType();
+    switch (ptr_mcv) {
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .eflags,
+        .register_overflow,
+        .reserved_frame,
+        => unreachable, // not a valid pointer
+        .immediate,
+        .register,
+        .register_offset,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref()),
+        .memory,
+        .indirect,
+        .load_direct,
+        .load_got,
+        .load_tlv,
+        .load_frame,
+        => {
+            const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
+            const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
+            defer self.register_manager.unlockReg(addr_lock);
 
-                    try self.genInlineMemcpy(
-                        .{ .ptr_stack_offset = off },
-                        ptr,
-                        .{ .immediate = abi_size },
-                        .{},
-                    );
-                },
-                else => return self.fail("TODO implement loading from register into {}", .{dst_mcv}),
-            }
-        },
-        .load_direct => |sym_index| {
-            const addr_reg = try self.copyToTmpRegister(Type.usize, .{ .lea_direct = sym_index });
-            const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-            defer self.register_manager.unlockReg(addr_reg_lock);
-            // Load the pointer, which is stored in memory
-            try self.asmRegisterMemory(.mov, addr_reg, Memory.sib(.qword, .{ .base = addr_reg }));
-            try self.load(dst_mcv, .{ .register = addr_reg }, ptr_ty);
-        },
-        .load_tlv => |sym_index| try self.load(dst_mcv, .{ .lea_tlv = sym_index }, ptr_ty),
-        .memory, .load_got, .lea_direct, .lea_tlv => {
-            const reg = try self.copyToTmpRegister(ptr_ty, ptr);
-            try self.load(dst_mcv, .{ .register = reg }, ptr_ty);
+            try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } });
         },
     }
 }
@@ -3669,53 +4048,48 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void {
         const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx });
         defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
 
-        const ptr = try self.resolveInst(ty_op.operand);
-        const dst_mcv: MCValue = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr))
+        const ptr_mcv = try self.resolveInst(ty_op.operand);
+        const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv))
             // The MCValue that holds the pointer can be re-used as the value.
-            ptr
+            ptr_mcv
         else
             try self.allocRegOrMem(inst, true);
 
         const ptr_ty = self.air.typeOf(ty_op.operand);
         if (ptr_ty.ptrInfo().data.host_size > 0) {
-            try self.packedLoad(dst_mcv, ptr, ptr_ty);
+            try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv);
         } else {
-            try self.load(dst_mcv, ptr, ptr_ty);
+            try self.load(dst_mcv, ptr_ty, ptr_mcv);
         }
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
-fn packedStore(
-    self: *Self,
-    ptr_mcv: MCValue,
-    val_mcv: MCValue,
-    ptr_ty: Type,
-    val_ty: Type,
-) InnerError!void {
+fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
     const ptr_info = ptr_ty.ptrInfo().data;
+    const src_ty = ptr_ty.childType();
 
     const limb_abi_size = @min(ptr_info.host_size, 8);
     const limb_abi_bits = limb_abi_size * 8;
 
-    const val_bit_size = val_ty.bitSize(self.target.*);
-    const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
-    const val_bit_off = ptr_info.bit_offset % limb_abi_bits;
+    const src_bit_size = src_ty.bitSize(self.target.*);
+    const src_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size);
+    const src_bit_off = ptr_info.bit_offset % limb_abi_bits;
 
     const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
     const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg);
     defer self.register_manager.unlockReg(ptr_lock);
 
     var limb_i: u16 = 0;
-    while (limb_i * limb_abi_bits < val_bit_off + val_bit_size) : (limb_i += 1) {
-        const part_bit_off = if (limb_i == 0) val_bit_off else 0;
+    while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) {
+        const part_bit_off = if (limb_i == 0) src_bit_off else 0;
         const part_bit_size =
-            @min(val_bit_off + val_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
-        const limb_mem = Memory.sib(
-            Memory.PtrSize.fromSize(limb_abi_size),
-            .{ .base = ptr_reg, .disp = val_byte_off + limb_i * limb_abi_bits },
-        );
+            @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off;
+        const limb_mem = Memory.sib(Memory.PtrSize.fromSize(limb_abi_size), .{
+            .base = .{ .reg = ptr_reg },
+            .disp = src_byte_off + limb_i * limb_abi_bits,
+        });
 
         const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - part_bit_size)) <<
             @intCast(u6, part_bit_off);
@@ -3731,211 +4105,62 @@ fn packedStore(
             try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg);
         }
 
-        if (val_bit_size <= 64) {
+        if (src_bit_size <= 64) {
             const tmp_reg = try self.register_manager.allocReg(null, gp);
             const tmp_mcv = MCValue{ .register = tmp_reg };
             const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
             defer self.register_manager.unlockReg(tmp_lock);
 
-            try self.genSetReg(val_ty, tmp_reg, val_mcv);
+            try self.genSetReg(tmp_reg, src_ty, src_mcv);
             switch (limb_i) {
-                0 => try self.genShiftBinOpMir(.shl, val_ty, tmp_mcv, .{ .immediate = val_bit_off }),
-                1 => try self.genShiftBinOpMir(.shr, val_ty, tmp_mcv, .{
-                    .immediate = limb_abi_bits - val_bit_off,
+                0 => try self.genShiftBinOpMir(.shl, src_ty, tmp_mcv, .{ .immediate = src_bit_off }),
+                1 => try self.genShiftBinOpMir(.shr, src_ty, tmp_mcv, .{
+                    .immediate = limb_abi_bits - src_bit_off,
                 }),
                 else => unreachable,
             }
-            try self.genBinOpMir(.@"and", val_ty, tmp_mcv, .{ .immediate = part_mask });
+            try self.genBinOpMir(.@"and", src_ty, tmp_mcv, .{ .immediate = part_mask });
             try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size));
         } else return self.fail("TODO: implement packed store of {}", .{
-            val_ty.fmt(self.bin_file.options.module.?),
+            src_ty.fmt(self.bin_file.options.module.?),
         });
     }
 }
 
-fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void {
-    const abi_size = @intCast(u32, value_ty.abiSize(self.target.*));
-    switch (ptr) {
-        .none => unreachable,
-        .undef => unreachable,
-        .unreach => unreachable,
-        .dead => unreachable,
-        .eflags => unreachable,
-        .register_overflow => unreachable,
-        .immediate, .stack_offset => {
-            const reg = try self.copyToTmpRegister(ptr_ty, ptr);
-            try self.store(.{ .register = reg }, value, ptr_ty, value_ty);
-        },
-        .ptr_stack_offset => |off| {
-            try self.genSetStack(value_ty, off, value, .{});
-        },
-        .register => |reg| {
-            const reg_lock = self.register_manager.lockReg(reg);
-            defer if (reg_lock) |lock| self.register_manager.unlockReg(lock);
-
-            switch (value) {
-                .none => unreachable,
-                .dead => unreachable,
-                .unreach => unreachable,
-                .eflags => |cc| try self.asmSetccMemory(
-                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                    cc,
-                ),
-                .undef => if (self.wantSafety()) switch (abi_size) {
-                    1 => try self.store(ptr, .{ .immediate = 0xaa }, ptr_ty, value_ty),
-                    2 => try self.store(ptr, .{ .immediate = 0xaaaa }, ptr_ty, value_ty),
-                    4 => try self.store(ptr, .{ .immediate = 0xaaaaaaaa }, ptr_ty, value_ty),
-                    8 => try self.store(ptr, .{ .immediate = 0xaaaaaaaaaaaaaaaa }, ptr_ty, value_ty),
-                    else => try self.genInlineMemset(
-                        ptr,
-                        .{ .immediate = 0xaa },
-                        .{ .immediate = abi_size },
-                        .{},
-                    ),
-                },
-                .immediate => |imm| switch (self.regBitSize(value_ty)) {
-                    8 => try self.asmMemoryImmediate(
-                        .mov,
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                        if (math.cast(i8, @bitCast(i64, imm))) |small|
-                            Immediate.s(small)
-                        else
-                            Immediate.u(@intCast(u8, imm)),
-                    ),
-                    16 => try self.asmMemoryImmediate(
-                        .mov,
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                        if (math.cast(i16, @bitCast(i64, imm))) |small|
-                            Immediate.s(small)
-                        else
-                            Immediate.u(@intCast(u16, imm)),
-                    ),
-                    32 => try self.asmMemoryImmediate(
-                        .mov,
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                        if (math.cast(i32, @bitCast(i64, imm))) |small|
-                            Immediate.s(small)
-                        else
-                            Immediate.u(@intCast(u32, imm)),
-                    ),
-                    64 => if (math.cast(i32, @bitCast(i64, imm))) |small|
-                        try self.asmMemoryImmediate(
-                            .mov,
-                            Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                            Immediate.s(small),
-                        )
-                    else
-                        try self.asmMemoryRegister(
-                            .mov,
-                            Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                            registerAlias(try self.copyToTmpRegister(value_ty, value), abi_size),
-                        ),
-                    else => unreachable,
-                },
-                .register => |src_reg| try self.genInlineMemcpyRegisterRegister(
-                    value_ty,
-                    reg,
-                    src_reg,
-                    0,
-                ),
-                .register_overflow => |ro| {
-                    const ro_reg_lock = self.register_manager.lockReg(ro.reg);
-                    defer if (ro_reg_lock) |lock| self.register_manager.unlockReg(lock);
-
-                    const wrapped_ty = value_ty.structFieldType(0);
-                    try self.genInlineMemcpyRegisterRegister(wrapped_ty, reg, ro.reg, 0);
-
-                    const overflow_bit_ty = value_ty.structFieldType(1);
-                    const overflow_bit_offset = value_ty.structFieldOffset(1, self.target.*);
-                    const tmp_reg = try self.register_manager.allocReg(null, gp);
-                    try self.asmSetccRegister(tmp_reg.to8(), ro.eflags);
-                    try self.genInlineMemcpyRegisterRegister(
-                        overflow_bit_ty,
-                        reg,
-                        tmp_reg,
-                        -@intCast(i32, overflow_bit_offset),
-                    );
-                },
-                .memory, .load_tlv, .load_direct => if (abi_size <= 8) {
-                    const tmp_reg = try self.copyToTmpRegister(value_ty, value);
-                    const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
-                    defer self.register_manager.unlockReg(tmp_lock);
-
-                    try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty);
-                } else {
-                    const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
-                    const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-                    defer self.register_manager.unlockReg(addr_lock);
-
-                    try self.genSetReg(Type.usize, addr_reg, switch (value) {
-                        .memory => |addr| .{ .immediate = addr },
-                        .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                        .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                        else => unreachable,
-                    });
-                    try self.genInlineMemcpy(
-                        ptr,
-                        .{ .register = addr_reg },
-                        .{ .immediate = abi_size },
-                        .{},
-                    );
-                },
-                .stack_offset => |off| if (abi_size <= 8) {
-                    const tmp_reg = try self.copyToTmpRegister(value_ty, value);
-                    const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
-                    defer self.register_manager.unlockReg(tmp_lock);
-
-                    try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty);
-                } else try self.genInlineMemcpy(
-                    ptr,
-                    .{ .ptr_stack_offset = off },
-                    .{ .immediate = abi_size },
-                    .{},
-                ),
-                .ptr_stack_offset, .load_got, .lea_direct, .lea_tlv => {
-                    const tmp_reg = try self.copyToTmpRegister(value_ty, value);
-                    const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
-                    defer self.register_manager.unlockReg(tmp_lock);
+fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void {
+    const src_ty = ptr_ty.childType();
+    switch (ptr_mcv) {
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .eflags,
+        .register_overflow,
+        .reserved_frame,
+        => unreachable, // not a valid pointer
+        .immediate,
+        .register,
+        .register_offset,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv),
+        .memory,
+        .indirect,
+        .load_direct,
+        .load_got,
+        .load_tlv,
+        .load_frame,
+        => {
+            const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv);
+            const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
+            defer self.register_manager.unlockReg(addr_lock);
 
-                    try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty);
-                },
-            }
+            try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv);
         },
-        .memory, .load_direct, .load_tlv => {
-            const value_lock: ?RegisterLock = switch (value) {
-                .register => |reg| self.register_manager.lockReg(reg),
-                else => null,
-            };
-            defer if (value_lock) |lock| self.register_manager.unlockReg(lock);
-
-            const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
-            const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-            defer self.register_manager.unlockReg(addr_reg_lock);
-
-            switch (ptr) {
-                .memory => |addr| try self.genSetReg(ptr_ty, addr_reg, .{ .immediate = addr }),
-                .load_direct => |sym_index| try self.genSetReg(ptr_ty, addr_reg, .{ .lea_direct = sym_index }),
-                .load_tlv => |sym_index| try self.genSetReg(ptr_ty, addr_reg, .{ .lea_tlv = sym_index }),
-                else => unreachable,
-            }
-            if (ptr != .load_tlv) {
-                // Load the pointer, which is stored in memory
-                try self.asmRegisterMemory(.mov, addr_reg, Memory.sib(.qword, .{ .base = addr_reg }));
-            }
-
-            const new_ptr = MCValue{ .register = addr_reg };
-            try self.store(new_ptr, value, ptr_ty, value_ty);
-        },
-        .load_got, .lea_direct, .lea_tlv => {
-            const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr);
-            const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-            defer self.register_manager.unlockReg(addr_reg_lock);
-
-            const new_ptr = MCValue{ .register = addr_reg };
-            try self.store(new_ptr, value, ptr_ty, value_ty);
-        },
-    }
-}
+    }
+}
 
 fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
     if (safety) {
@@ -3944,15 +4169,13 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
         // TODO if the value is undef, don't lower this instruction
     }
     const bin_op = self.air.instructions.items(.data)[inst].bin_op;
-    const ptr = try self.resolveInst(bin_op.lhs);
+    const ptr_mcv = try self.resolveInst(bin_op.lhs);
     const ptr_ty = self.air.typeOf(bin_op.lhs);
-    const value = try self.resolveInst(bin_op.rhs);
-    const value_ty = self.air.typeOf(bin_op.rhs);
-    log.debug("airStore(%{d}): {} <- {}", .{ inst, ptr, value });
+    const src_mcv = try self.resolveInst(bin_op.rhs);
     if (ptr_ty.ptrInfo().data.host_size > 0) {
-        try self.packedStore(ptr, value, ptr_ty, value_ty);
+        try self.packedStore(ptr_ty, ptr_mcv, src_mcv);
     } else {
-        try self.store(ptr, value, ptr_ty, value_ty);
+        try self.store(ptr_ty, ptr_mcv, src_mcv);
     }
     return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -3971,67 +4194,65 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void {
 }
 
 fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue {
+    const ptr_field_ty = self.air.typeOfIndex(inst);
     const mcv = try self.resolveInst(operand);
-    const ptr_ty = self.air.typeOf(operand);
-    const container_ty = ptr_ty.childType();
+    const ptr_container_ty = self.air.typeOf(operand);
+    const container_ty = ptr_container_ty.childType();
     const field_offset = switch (container_ty.containerLayout()) {
         .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*)),
-        .Packed => if (container_ty.zigTypeTag() == .Struct and ptr_ty.ptrInfo().data.host_size == 0)
+        .Packed => if (container_ty.zigTypeTag() == .Struct and
+            ptr_field_ty.ptrInfo().data.host_size == 0)
             container_ty.packedStructFieldByteOffset(index, self.target.*)
         else
             0,
     };
 
-    const dst_mcv: MCValue = result: {
+    const result: MCValue = result: {
         switch (mcv) {
-            .stack_offset, .lea_tlv, .load_tlv => {
-                const offset_reg = try self.copyToTmpRegister(ptr_ty, .{
+            .load_frame, .lea_tlv, .load_tlv => {
+                const offset_reg = try self.copyToTmpRegister(Type.usize, .{
                     .immediate = field_offset,
                 });
                 const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
                 defer self.register_manager.unlockReg(offset_reg_lock);
 
-                const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, switch (mcv) {
+                const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, switch (mcv) {
                     .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
                     else => mcv,
                 });
-                try self.genBinOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg });
+                try self.genBinOpMir(.add, Type.usize, dst_mcv, .{ .register = offset_reg });
                 break :result dst_mcv;
             },
-            .ptr_stack_offset => |off| {
-                const ptr_stack_offset = off - @intCast(i32, field_offset);
-                break :result MCValue{ .ptr_stack_offset = ptr_stack_offset };
-            },
-            .register => |reg| {
-                const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
-                defer self.register_manager.unlockReg(reg_lock);
-
-                const offset_reg = try self.copyToTmpRegister(ptr_ty, .{
-                    .immediate = field_offset,
-                });
-                const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg);
-                defer self.register_manager.unlockReg(offset_reg_lock);
-
-                const can_reuse_operand = self.reuseOperand(inst, operand, 0, mcv);
-                const result_reg: Register = blk: {
-                    if (can_reuse_operand) {
-                        break :blk reg;
-                    } else {
-                        const result_reg = try self.register_manager.allocReg(inst, gp);
-                        try self.genSetReg(ptr_ty, result_reg, mcv);
-                        break :blk result_reg;
-                    }
-                };
-                const result_reg_lock = self.register_manager.lockReg(result_reg);
-                defer if (result_reg_lock) |lock| self.register_manager.unlockReg(lock);
+            .indirect => |reg_off| break :result .{ .indirect = .{
+                .reg = reg_off.reg,
+                .off = reg_off.off + @intCast(i32, field_offset),
+            } },
+            .lea_frame => |frame_addr| break :result .{ .lea_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + @intCast(i32, field_offset),
+            } },
+            .register, .register_offset => {
+                const src_reg = mcv.getReg().?;
+                const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
+                defer self.register_manager.unlockReg(src_lock);
 
-                try self.genBinOpMir(.add, ptr_ty, .{ .register = result_reg }, .{ .register = offset_reg });
-                break :result MCValue{ .register = result_reg };
+                const dst_mcv: MCValue = if (self.reuseOperand(inst, operand, 0, mcv))
+                    mcv
+                else
+                    .{ .register = try self.copyToTmpRegister(ptr_field_ty, mcv) };
+                break :result .{ .register_offset = .{
+                    .reg = dst_mcv.getReg().?,
+                    .off = switch (dst_mcv) {
+                        .register => 0,
+                        .register_offset => |reg_off| reg_off.off,
+                        else => unreachable,
+                    } + @intCast(i32, field_offset),
+                } };
             },
             else => return self.fail("TODO implement fieldPtr for {}", .{mcv}),
         }
     };
-    return dst_mcv;
+    return result;
 }
 
 fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
@@ -4055,7 +4276,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
         };
 
         switch (src_mcv) {
-            .stack_offset => |src_off| {
+            .load_frame => |frame_addr| {
                 const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*));
                 const limb_abi_size = @min(field_abi_size, 8);
                 const limb_abi_bits = limb_abi_size * 8;
@@ -4063,11 +4284,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                 const field_bit_off = field_off % limb_abi_bits;
 
                 if (field_bit_off == 0) {
-                    const off_mcv = MCValue{ .stack_offset = src_off - field_byte_off };
+                    const off_mcv = MCValue{ .load_frame = .{
+                        .index = frame_addr.index,
+                        .off = frame_addr.off + field_byte_off,
+                    } };
                     if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv;
 
                     const dst_mcv = try self.allocRegOrMem(inst, true);
-                    try self.setRegOrMem(field_ty, dst_mcv, off_mcv);
+                    try self.genCopy(field_ty, dst_mcv, off_mcv);
                     break :result dst_mcv;
                 }
 
@@ -4081,10 +4305,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                     if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2;
                 if (load_abi_size <= 8) {
                     const load_reg = registerAlias(dst_reg, load_abi_size);
-                    try self.asmRegisterMemory(.mov, load_reg, Memory.sib(
-                        Memory.PtrSize.fromSize(load_abi_size),
-                        .{ .base = .rbp, .disp = field_byte_off - src_off },
-                    ));
+                    try self.asmRegisterMemory(
+                        .mov,
+                        load_reg,
+                        Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off + field_byte_off,
+                        }),
+                    );
                     try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(field_bit_off));
                 } else {
                     const tmp_reg = registerAlias(
@@ -4095,14 +4323,22 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void {
                     defer self.register_manager.unlockReg(tmp_lock);
 
                     const dst_alias = registerAlias(dst_reg, field_abi_size);
-                    try self.asmRegisterMemory(.mov, dst_alias, Memory.sib(
-                        Memory.PtrSize.fromSize(field_abi_size),
-                        .{ .base = .rbp, .disp = field_byte_off - src_off },
-                    ));
-                    try self.asmRegisterMemory(.mov, tmp_reg, Memory.sib(
-                        Memory.PtrSize.fromSize(field_abi_size),
-                        .{ .base = .rbp, .disp = field_byte_off + 1 - src_off },
-                    ));
+                    try self.asmRegisterMemory(
+                        .mov,
+                        dst_alias,
+                        Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off + field_byte_off,
+                        }),
+                    );
+                    try self.asmRegisterMemory(
+                        .mov,
+                        tmp_reg,
+                        Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off + field_byte_off + @intCast(i32, limb_abi_size),
+                        }),
+                    );
                     try self.asmRegisterRegisterImmediate(
                         .shrd,
                         dst_alias,
@@ -4249,41 +4485,51 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air:
 
 fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue) !void {
     const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
+    if (abi_size > 8) return self.fail("TODO implement {} for {}", .{
+        mir_tag,
+        dst_ty.fmt(self.bin_file.options.module.?),
+    });
     switch (dst_mcv) {
-        .none => unreachable,
-        .undef => unreachable,
-        .dead, .unreach, .immediate => unreachable,
-        .eflags => unreachable,
-        .register_overflow => unreachable,
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .immediate,
+        .register_offset,
+        .eflags,
+        .register_overflow,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        .reserved_frame,
+        => unreachable, // unmodifiable destination
         .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)),
-        .stack_offset => |off| {
-            if (abi_size > 8) {
-                return self.fail("TODO implement {} for stack dst with large ABI", .{mir_tag});
-            }
-
-            try self.asmMemory(mir_tag, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                .base = .rbp,
-                .disp = -off,
-            }));
-        },
-        .ptr_stack_offset => unreachable,
-        .load_got, .lea_direct, .lea_tlv => unreachable,
-        .memory, .load_direct, .load_tlv => {
-            const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
+        .memory, .load_got, .load_direct, .load_tlv => {
+            const addr_reg = try self.register_manager.allocReg(null, gp);
             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
             defer self.register_manager.unlockReg(addr_reg_lock);
 
-            try self.genSetReg(Type.usize, addr_reg, switch (dst_mcv) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => unreachable,
-            });
+            try self.genSetReg(addr_reg, Type.usize, dst_mcv.address());
             try self.asmMemory(
                 mir_tag,
-                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }),
+                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }),
             );
         },
+        .indirect, .load_frame => try self.asmMemory(
+            mir_tag,
+            Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (dst_mcv) {
+                .indirect => |reg_off| .{
+                    .base = .{ .reg = reg_off.reg },
+                    .disp = reg_off.off,
+                },
+                .load_frame => |frame_addr| .{
+                    .base = .{ .frame = frame_addr.index },
+                    .disp = frame_addr.off,
+                },
+                else => unreachable,
+            }),
+        ),
     }
 }
 
@@ -4305,7 +4551,7 @@ fn genShiftBinOpMir(
             else => {},
         }
         self.register_manager.getRegAssumeFree(.rcx, null);
-        try self.genSetReg(Type.u8, .rcx, shift_mcv);
+        try self.genSetReg(.cl, Type.u8, shift_mcv);
         break :rhs .{ .register = .rcx };
     };
 
@@ -4328,21 +4574,42 @@ fn genShiftBinOpMir(
                     @tagName(rhs_mcv),
                 }),
             },
-            .stack_offset => |lhs_off| switch (rhs_mcv) {
-                .immediate => |rhs_imm| try self.asmMemoryImmediate(
-                    tag,
-                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }),
-                    Immediate.u(rhs_imm),
-                ),
-                .register => |rhs_reg| try self.asmMemoryRegister(
-                    tag,
-                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }),
-                    registerAlias(rhs_reg, 1),
-                ),
-                else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
-                    @tagName(lhs_mcv),
-                    @tagName(rhs_mcv),
-                }),
+            .memory, .indirect, .load_frame => {
+                const lhs_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (lhs_mcv) {
+                    .memory => |addr| .{
+                        .base = .{ .reg = .ds },
+                        .disp = math.cast(i32, @bitCast(i64, addr)) orelse
+                            return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                            @tagName(lhs_mcv),
+                            @tagName(rhs_mcv),
+                        }),
+                    },
+                    .indirect => |reg_off| .{
+                        .base = .{ .reg = reg_off.reg },
+                        .disp = reg_off.off,
+                    },
+                    .load_frame => |frame_addr| .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off,
+                    },
+                    else => unreachable,
+                });
+                switch (rhs_mcv) {
+                    .immediate => |rhs_imm| try self.asmMemoryImmediate(
+                        tag,
+                        lhs_mem,
+                        Immediate.u(rhs_imm),
+                    ),
+                    .register => |rhs_reg| try self.asmMemoryRegister(
+                        tag,
+                        lhs_mem,
+                        registerAlias(rhs_reg, 1),
+                    ),
+                    else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
+                        @tagName(lhs_mcv),
+                        @tagName(rhs_mcv),
+                    }),
+                }
             },
             else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{
                 @tagName(lhs_mcv),
@@ -4360,22 +4627,31 @@ fn genShiftBinOpMir(
             else => unreachable,
         };
         switch (lhs_mcv) {
-            .stack_offset => |dst_off| switch (rhs_mcv) {
+            .load_frame => |dst_frame_addr| switch (rhs_mcv) {
                 .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) {
                     try self.asmRegisterMemory(
                         .mov,
                         tmp_reg,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[0],
+                        }),
                     );
                     try self.asmMemoryRegisterImmediate(
                         info.double_tag,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[1],
+                        }),
                         tmp_reg,
                         Immediate.u(rhs_imm),
                     );
                     try self.asmMemoryImmediate(
                         tag,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[0],
+                        }),
                         Immediate.u(rhs_imm),
                     );
                 } else {
@@ -4383,14 +4659,20 @@ fn genShiftBinOpMir(
                     try self.asmRegisterMemory(
                         .mov,
                         tmp_reg,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[0],
+                        }),
                     );
                     if (rhs_imm > 64) {
                         try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64));
                     }
                     try self.asmMemoryRegister(
                         .mov,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[1],
+                        }),
                         tmp_reg,
                     );
                     switch (tag) {
@@ -4398,13 +4680,19 @@ fn genShiftBinOpMir(
                             try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32());
                             try self.asmMemoryRegister(
                                 .mov,
-                                Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                                Memory.sib(.qword, .{
+                                    .base = .{ .frame = dst_frame_addr.index },
+                                    .disp = dst_frame_addr.off + info.offsets[0],
+                                }),
                                 tmp_reg,
                             );
                         },
                         .sar => try self.asmMemoryImmediate(
                             tag,
-                            Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                            Memory.sib(.qword, .{
+                                .base = .{ .frame = dst_frame_addr.index },
+                                .disp = dst_frame_addr.off + info.offsets[0],
+                            }),
                             Immediate.u(63),
                         ),
                         else => unreachable,
@@ -4419,16 +4707,22 @@ fn genShiftBinOpMir(
                     const second_lock = self.register_manager.lockRegAssumeUnused(second_reg);
                     defer self.register_manager.unlockReg(second_lock);
 
-                    try self.genSetReg(Type.u8, .cl, rhs_mcv);
+                    try self.genSetReg(.cl, Type.u8, rhs_mcv);
                     try self.asmRegisterMemory(
                         .mov,
                         first_reg,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[0],
+                        }),
                     );
                     try self.asmRegisterMemory(
                         .mov,
                         second_reg,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[1],
+                        }),
                     );
                     switch (tag) {
                         .shl, .sal, .shr => try self.asmRegisterRegister(
@@ -4449,12 +4743,18 @@ fn genShiftBinOpMir(
                     try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae);
                     try self.asmMemoryRegister(
                         .mov,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[1],
+                        }),
                         second_reg,
                     );
                     try self.asmMemoryRegister(
                         .mov,
-                        Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }),
+                        Memory.sib(.qword, .{
+                            .base = .{ .frame = dst_frame_addr.index },
+                            .disp = dst_frame_addr.off + info.offsets[0],
+                        }),
                         first_reg,
                     );
                 },
@@ -4515,7 +4815,7 @@ fn genShiftBinOp(
             if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv;
         }
         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
-        try self.setRegOrMem(lhs_ty, dst_mcv, lhs_mcv);
+        try self.genCopy(lhs_ty, dst_mcv, lhs_mcv);
         break :dst dst_mcv;
     };
 
@@ -4610,14 +4910,22 @@ fn genMulDivBinOp(
             }, dst_abi_size) };
 
             const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false);
-            try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{
-                .base = .rbp,
-                .disp = 0 - dst_mcv.stack_offset,
-            }), .rax);
-            try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{
-                .base = .rbp,
-                .disp = 8 - dst_mcv.stack_offset,
-            }), .rdx);
+            try self.asmMemoryRegister(
+                .mov,
+                Memory.sib(.qword, .{
+                    .base = .{ .frame = dst_mcv.load_frame.index },
+                    .disp = dst_mcv.load_frame.off,
+                }),
+                .rax,
+            );
+            try self.asmMemoryRegister(
+                .mov,
+                Memory.sib(.qword, .{
+                    .base = .{ .frame = dst_mcv.load_frame.index },
+                    .disp = dst_mcv.load_frame.off + 8,
+                }),
+                .rdx,
+            );
             return dst_mcv;
         },
 
@@ -4775,11 +5083,7 @@ fn genBinOp(
             }
         }
         const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true);
-        try self.setRegOrMem(lhs_ty, dst_mcv, switch (lhs) {
-            .load_direct => |sym_index| .{ .lea_direct = sym_index },
-            .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-            else => lhs,
-        });
+        try self.genCopy(lhs_ty, dst_mcv, lhs);
         break :dst dst_mcv;
     };
     const dst_mcv_lock: ?RegisterLock = switch (dst_mcv) {
@@ -4878,10 +5182,21 @@ fn genBinOp(
         .max,
         => switch (lhs_ty.zigTypeTag()) {
             .Int => {
-                const mat_src_mcv = switch (src_mcv) {
-                    .immediate => MCValue{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) },
-                    else => src_mcv,
-                };
+                const mat_src_mcv: MCValue = if (switch (src_mcv) {
+                    .immediate,
+                    .eflags,
+                    .register_offset,
+                    .load_direct,
+                    .lea_direct,
+                    .load_got,
+                    .lea_got,
+                    .load_tlv,
+                    .lea_tlv,
+                    .lea_frame,
+                    => true,
+                    .memory => |addr| math.cast(i32, @bitCast(i64, addr)) == null,
+                    else => false,
+                }) .{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) } else src_mcv;
                 const mat_mcv_lock = switch (mat_src_mcv) {
                     .register => |reg| self.register_manager.lockReg(reg),
                     else => null,
@@ -4913,57 +5228,56 @@ fn genBinOp(
                 defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock);
                 switch (mat_src_mcv) {
                     .none,
-                    .undef,
-                    .dead,
                     .unreach,
+                    .dead,
+                    .undef,
                     .immediate,
                     .eflags,
+                    .register_offset,
                     .register_overflow,
-                    .ptr_stack_offset,
-                    .load_got,
+                    .load_direct,
                     .lea_direct,
+                    .load_got,
+                    .lea_got,
+                    .load_tlv,
                     .lea_tlv,
+                    .lea_frame,
+                    .reserved_frame,
                     => unreachable,
                     .register => |src_reg| try self.asmCmovccRegisterRegister(
                         registerAlias(tmp_reg, cmov_abi_size),
                         registerAlias(src_reg, cmov_abi_size),
                         cc,
                     ),
-                    .stack_offset => |off| try self.asmCmovccRegisterMemory(
+                    .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory(
                         registerAlias(tmp_reg, cmov_abi_size),
-                        Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{
-                            .base = .rbp,
-                            .disp = -off,
+                        Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), switch (mat_src_mcv) {
+                            .memory => |addr| .{
+                                .base = .{ .reg = .ds },
+                                .disp = @intCast(i32, @bitCast(i64, addr)),
+                            },
+                            .indirect => |reg_off| .{
+                                .base = .{ .reg = reg_off.reg },
+                                .disp = reg_off.off,
+                            },
+                            .load_frame => |frame_addr| .{
+                                .base = .{ .frame = frame_addr.index },
+                                .disp = frame_addr.off,
+                            },
+                            else => unreachable,
                         }),
                         cc,
                     ),
-                    .memory, .load_direct, .load_tlv => {
-                        const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
-                        const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-                        defer self.register_manager.unlockReg(addr_reg_lock);
-
-                        try self.genSetReg(Type.usize, addr_reg, switch (mat_src_mcv) {
-                            .memory => |addr| .{ .immediate = addr },
-                            .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                            .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                            else => unreachable,
-                        });
-                        try self.asmCmovccRegisterMemory(
-                            registerAlias(tmp_reg, cmov_abi_size),
-                            Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = addr_reg }),
-                            cc,
-                        );
-                    },
                 }
-                try self.setRegOrMem(lhs_ty, dst_mcv, .{ .register = tmp_reg });
+                try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg });
             },
-            .Float => try self.genBinOpMir(switch (lhs_ty.tag()) {
-                .f32 => switch (tag) {
+            .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) {
+                32 => switch (tag) {
                     .min => .minss,
                     .max => .maxss,
                     else => unreachable,
                 },
-                .f64 => switch (tag) {
+                64 => switch (tag) {
                     .min => .minsd,
                     .max => .maxsd,
                     else => unreachable,
@@ -4981,25 +5295,38 @@ fn genBinOp(
 fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void {
     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
     switch (dst_mcv) {
-        .none => unreachable,
-        .undef => unreachable,
-        .dead, .unreach, .immediate => unreachable,
-        .eflags => unreachable,
-        .register_overflow => unreachable,
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .immediate,
+        .register_offset,
+        .eflags,
+        .register_overflow,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        .reserved_frame,
+        => unreachable, // unmodifiable destination
         .register => |dst_reg| {
             const dst_alias = registerAlias(dst_reg, abi_size);
             switch (src_mcv) {
-                .none => unreachable,
-                .undef => unreachable,
-                .dead, .unreach => unreachable,
-                .register_overflow => unreachable,
+                .none,
+                .unreach,
+                .dead,
+                .undef,
+                .register_overflow,
+                .reserved_frame,
+                => unreachable,
                 .register => |src_reg| switch (ty.zigTypeTag()) {
                     .Float => {
-                        if (intrinsicsAllowed(self.target.*, ty)) {
-                            return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128());
-                        }
-
-                        return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{});
+                        if (!Target.x86.featureSetHas(self.target.cpu.features, .sse))
+                            return self.fail("TODO genBinOpMir for {s} {} without sse", .{
+                                @tagName(mir_tag),
+                                ty.fmt(self.bin_file.options.module.?),
+                            });
+                        return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128());
                     },
                     else => try self.asmRegisterRegister(
                         mir_tag,
@@ -5041,14 +5368,16 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                         )),
                     else => unreachable,
                 },
-                .ptr_stack_offset,
+                .register_offset,
+                .eflags,
                 .memory,
-                .load_got,
-                .lea_direct,
                 .load_direct,
-                .lea_tlv,
+                .lea_direct,
+                .load_got,
+                .lea_got,
                 .load_tlv,
-                .eflags,
+                .lea_tlv,
+                .lea_frame,
                 => {
                     assert(abi_size <= 8);
                     const dst_reg_lock = self.register_manager.lockReg(dst_reg);
@@ -5057,75 +5386,84 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                     const reg = try self.copyToTmpRegister(ty, src_mcv);
                     return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg });
                 },
-                .stack_offset => |off| try self.asmRegisterMemory(
+                .indirect, .load_frame => try self.asmRegisterMemory(
                     mir_tag,
                     registerAlias(dst_reg, abi_size),
-                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }),
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
+                        .indirect => |reg_off| .{
+                            .base = .{ .reg = reg_off.reg },
+                            .disp = reg_off.off,
+                        },
+                        .load_frame => |frame_addr| .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off,
+                        },
+                        else => unreachable,
+                    }),
                 ),
             }
         },
-        .memory, .load_got, .load_direct, .load_tlv, .stack_offset => {
-            const dst: ?struct {
-                addr_reg: Register,
-                addr_lock: RegisterLock,
-            } = switch (dst_mcv) {
+        .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => {
+            const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock };
+            const limb_abi_size = @min(abi_size, 8);
+
+            const dst_info: OpInfo = switch (dst_mcv) {
                 else => unreachable,
                 .memory, .load_got, .load_direct, .load_tlv => dst: {
                     const dst_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
                     const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg);
                     errdefer self.register_manager.unlockReg(dst_addr_lock);
 
-                    try self.genSetReg(Type.usize, dst_addr_reg, switch (dst_mcv) {
-                        .memory => |addr| .{ .immediate = addr },
-                        .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                        .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                        else => dst_mcv,
-                    });
-
+                    try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address());
                     break :dst .{
                         .addr_reg = dst_addr_reg,
                         .addr_lock = dst_addr_lock,
                     };
                 },
-                .stack_offset => null,
+                .load_frame => null,
             };
-            defer if (dst) |lock| self.register_manager.unlockReg(lock.addr_lock);
-
-            const src: ?struct {
-                limb_reg: Register,
-                limb_lock: RegisterLock,
-                addr_reg: Register,
-                addr_lock: RegisterLock,
-            } = switch (src_mcv) {
-                else => null,
+            defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock);
+
+            const src_info: OpInfo = switch (src_mcv) {
+                .none,
+                .unreach,
+                .dead,
+                .undef,
+                .register_overflow,
+                .reserved_frame,
+                => unreachable,
+                .immediate,
+                .register,
+                .register_offset,
+                .eflags,
+                .indirect,
+                .lea_direct,
+                .lea_got,
+                .lea_tlv,
+                .load_frame,
+                .lea_frame,
+                => null,
                 .memory, .load_got, .load_direct, .load_tlv => src: {
-                    const src_limb_reg = try self.register_manager.allocReg(null, gp);
-                    const src_limb_lock = self.register_manager.lockRegAssumeUnused(src_limb_reg);
-                    errdefer self.register_manager.unlockReg(src_limb_lock);
+                    switch (src_mcv) {
+                        .memory => |addr| if (math.cast(i32, @bitCast(i64, addr)) != null and
+                            math.cast(i32, @bitCast(i64, addr) + abi_size - limb_abi_size) != null)
+                            break :src null,
+                        .load_got, .load_direct, .load_tlv => {},
+                        else => unreachable,
+                    }
 
                     const src_addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
                     const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg);
                     errdefer self.register_manager.unlockReg(src_addr_lock);
 
-                    try self.genSetReg(Type.usize, src_addr_reg, switch (src_mcv) {
-                        .memory => |addr| .{ .immediate = addr },
-                        .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                        .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                        else => src_mcv,
-                    });
-
+                    try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address());
                     break :src .{
                         .addr_reg = src_addr_reg,
                         .addr_lock = src_addr_lock,
-                        .limb_reg = src_limb_reg,
-                        .limb_lock = src_limb_lock,
                     };
                 },
             };
-            defer if (src) |locks| {
-                self.register_manager.unlockReg(locks.limb_lock);
-                self.register_manager.unlockReg(locks.addr_lock);
-            };
+            defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock);
 
             const ty_signedness =
                 if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned;
@@ -5133,7 +5471,6 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                 .signed => Type.usize,
                 .unsigned => Type.isize,
             };
-            const limb_abi_size = @min(abi_size, 8);
             var off: i32 = 0;
             while (off < abi_size) : (off += 8) {
                 const mir_limb_tag = switch (off) {
@@ -5150,35 +5487,45 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                 const dst_limb_mem = Memory.sib(
                     Memory.PtrSize.fromSize(limb_abi_size),
                     switch (dst_mcv) {
-                        else => unreachable,
-                        .stack_offset => |dst_off| .{
-                            .base = .rbp,
-                            .disp = off - dst_off,
-                        },
                         .memory,
                         .load_got,
                         .load_direct,
                         .load_tlv,
-                        => .{ .base = dst.?.addr_reg, .disp = off },
+                        => .{ .base = .{ .reg = dst_info.?.addr_reg }, .disp = off },
+                        .indirect => |reg_off| .{
+                            .base = .{ .reg = reg_off.reg },
+                            .disp = reg_off.off + off,
+                        },
+                        .load_frame => |frame_addr| .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off + off,
+                        },
+                        else => unreachable,
                     },
                 );
                 switch (src_mcv) {
-                    .none => unreachable,
-                    .undef => unreachable,
-                    .dead, .unreach => unreachable,
-                    .register_overflow => unreachable,
-                    .register => |src_reg| {
-                        assert(off == 0);
-                        try self.asmMemoryRegister(
+                    .none,
+                    .unreach,
+                    .dead,
+                    .undef,
+                    .register_overflow,
+                    .reserved_frame,
+                    => unreachable,
+                    .register => |src_reg| switch (off) {
+                        0 => try self.asmMemoryRegister(
                             mir_limb_tag,
                             dst_limb_mem,
                             registerAlias(src_reg, limb_abi_size),
-                        );
+                        ),
+                        else => unreachable,
                     },
                     .immediate => |src_imm| {
-                        const imm = if (off == 0) src_imm else switch (ty_signedness) {
-                            .signed => @bitCast(u64, @bitCast(i64, src_imm) >> 63),
-                            .unsigned => 0,
+                        const imm = switch (off) {
+                            0 => src_imm,
+                            else => switch (ty_signedness) {
+                                .signed => @bitCast(u64, @bitCast(i64, src_imm) >> 63),
+                                .unsigned => 0,
+                            },
                         };
                         switch (self.regBitSize(limb_ty)) {
                             8 => try self.asmMemoryImmediate(
@@ -5223,47 +5570,43 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                             else => unreachable,
                         }
                     },
+                    .register_offset,
+                    .eflags,
                     .memory,
-                    .load_got,
+                    .indirect,
                     .load_direct,
-                    .load_tlv,
-                    => {
-                        try self.asmRegisterMemory(
-                            .mov,
-                            registerAlias(src.?.limb_reg, limb_abi_size),
-                            Memory.sib(
-                                Memory.PtrSize.fromSize(limb_abi_size),
-                                .{ .base = src.?.addr_reg, .disp = off },
-                            ),
-                        );
-                        try self.asmMemoryRegister(
-                            mir_limb_tag,
-                            dst_limb_mem,
-                            registerAlias(src.?.limb_reg, limb_abi_size),
-                        );
-                    },
-                    .stack_offset,
-                    .ptr_stack_offset,
-                    .eflags,
                     .lea_direct,
+                    .load_got,
+                    .lea_got,
+                    .load_tlv,
                     .lea_tlv,
+                    .load_frame,
+                    .lea_frame,
                     => {
-                        const src_limb_reg = try self.copyToTmpRegister(limb_ty, switch (src_mcv) {
-                            .stack_offset => |src_off| .{ .stack_offset = src_off - off },
-                            .ptr_stack_offset,
+                        const src_limb_reg = try self.copyToTmpRegister(limb_ty, if (src_info) |info| .{
+                            .indirect = .{ .reg = info.addr_reg, .off = off },
+                        } else switch (src_mcv) {
                             .eflags,
-                            .load_got,
+                            .register_offset,
                             .lea_direct,
+                            .lea_got,
                             .lea_tlv,
-                            => off: {
-                                assert(off == 0);
-                                break :off src_mcv;
+                            .lea_frame,
+                            => switch (off) {
+                                0 => src_mcv,
+                                else => .{ .immediate = 0 },
                             },
+                            .memory => |addr| .{ .memory = @bitCast(u64, @bitCast(i64, addr) + off) },
+                            .indirect => |reg_off| .{ .indirect = .{
+                                .reg = reg_off.reg,
+                                .off = reg_off.off + off,
+                            } },
+                            .load_frame => |frame_addr| .{ .load_frame = .{
+                                .index = frame_addr.index,
+                                .off = frame_addr.off + off,
+                            } },
                             else => unreachable,
                         });
-                        const src_limb_lock = self.register_manager.lockReg(src_limb_reg);
-                        defer if (src_limb_lock) |lock| self.register_manager.unlockReg(lock);
-
                         try self.asmMemoryRegister(
                             mir_limb_tag,
                             dst_limb_mem,
@@ -5273,9 +5616,6 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
                 }
             }
         },
-        .ptr_stack_offset => unreachable,
-        .lea_tlv => unreachable,
-        .lea_direct => unreachable,
     }
 }
 
@@ -5284,27 +5624,33 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s
 fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
     const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*));
     switch (dst_mcv) {
-        .none => unreachable,
-        .undef => unreachable,
-        .dead, .unreach, .immediate => unreachable,
-        .eflags => unreachable,
-        .ptr_stack_offset => unreachable,
-        .lea_direct => unreachable,
-        .lea_tlv => unreachable,
-        .register_overflow => unreachable,
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .immediate,
+        .register_offset,
+        .eflags,
+        .register_overflow,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        .reserved_frame,
+        => unreachable, // unmodifiable destination
         .register => |dst_reg| {
             const dst_alias = registerAlias(dst_reg, abi_size);
             const dst_lock = self.register_manager.lockReg(dst_reg);
             defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
 
             switch (src_mcv) {
-                .none => unreachable,
-                .undef => try self.genSetReg(dst_ty, dst_reg, .undef),
-                .dead, .unreach => unreachable,
-                .ptr_stack_offset => unreachable,
-                .lea_direct => unreachable,
-                .lea_tlv => unreachable,
-                .register_overflow => unreachable,
+                .none,
+                .unreach,
+                .dead,
+                .undef,
+                .register_overflow,
+                .reserved_frame,
+                => unreachable,
                 .register => |src_reg| try self.asmRegisterRegister(
                     .imul,
                     dst_alias,
@@ -5323,74 +5669,54 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M
                         return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg });
                     }
                 },
-                .stack_offset => |off| {
-                    try self.asmRegisterMemory(
-                        .imul,
-                        dst_alias,
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }),
-                    );
-                },
-                .memory,
-                .load_got,
+                .register_offset,
+                .eflags,
                 .load_direct,
+                .lea_direct,
+                .load_got,
+                .lea_got,
                 .load_tlv,
-                => {
-                    return self.fail("TODO implement x86 multiply source memory", .{});
-                },
-                .eflags => {
-                    return self.fail("TODO implement x86 multiply source eflags", .{});
-                },
+                .lea_tlv,
+                .lea_frame,
+                => try self.asmRegisterRegister(
+                    .imul,
+                    dst_alias,
+                    registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
+                ),
+                .memory, .indirect, .load_frame => try self.asmRegisterMemory(
+                    .imul,
+                    dst_alias,
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
+                        .memory => |addr| .{
+                            .base = .{ .reg = .ds },
+                            .disp = math.cast(i32, @bitCast(i64, addr)) orelse
+                                return self.asmRegisterRegister(
+                                .imul,
+                                dst_alias,
+                                registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size),
+                            ),
+                        },
+                        .indirect => |reg_off| .{
+                            .base = .{ .reg = reg_off.reg },
+                            .disp = reg_off.off,
+                        },
+                        .load_frame => |frame_addr| .{
+                            .base = .{ .frame = frame_addr.index },
+                            .disp = frame_addr.off,
+                        },
+                        else => unreachable,
+                    }),
+                ),
             }
         },
-        .stack_offset => |off| {
-            switch (src_mcv) {
-                .none => unreachable,
-                .undef => return self.genSetStack(dst_ty, off, .undef, .{}),
-                .dead, .unreach => unreachable,
-                .ptr_stack_offset => unreachable,
-                .lea_direct => unreachable,
-                .lea_tlv => unreachable,
-                .register_overflow => unreachable,
-                .register => |src_reg| {
-                    // copy dst to a register
-                    const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
-                    try self.asmRegisterRegister(
-                        .imul,
-                        registerAlias(dst_reg, abi_size),
-                        registerAlias(src_reg, abi_size),
-                    );
-                    // copy dst_reg back out
-                    return self.genSetStack(dst_ty, off, .{ .register = dst_reg }, .{});
-                },
-                .immediate => {
-                    // copy dst to a register
-                    const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
-                    const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
-                    defer self.register_manager.unlockReg(dst_reg_lock);
-
-                    try self.genIntMulComplexOpMir(dst_ty, .{ .register = dst_reg }, src_mcv);
+        .memory, .indirect, .load_direct, .load_got, .load_tlv, .load_frame => {
+            const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv);
+            const tmp_mcv = MCValue{ .register = tmp_reg };
+            const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
+            defer self.register_manager.unlockReg(tmp_lock);
 
-                    return self.genSetStack(dst_ty, off, .{ .register = dst_reg }, .{});
-                },
-                .memory,
-                .load_got,
-                .load_direct,
-                .load_tlv,
-                .stack_offset,
-                => {
-                    return self.fail("TODO implement x86 multiply source memory", .{});
-                },
-                .eflags => {
-                    return self.fail("TODO implement x86 multiply source eflags", .{});
-                },
-            }
-        },
-        .memory,
-        .load_got,
-        .load_direct,
-        .load_tlv,
-        => {
-            return self.fail("TODO implement x86 multiply destination memory", .{});
+            try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv);
+            try self.genCopy(dst_ty, dst_mcv, tmp_mcv);
         },
     }
 }
@@ -5401,24 +5727,19 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void {
     while (self.args[arg_index] == .none) arg_index += 1;
     self.arg_index = arg_index + 1;
 
-    const ty = self.air.typeOfIndex(inst);
-    const mcv = self.args[arg_index];
-    const src_index = self.air.instructions.items(.data)[inst].arg.src_index;
-    const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index);
-
     const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: {
-        const dst_mcv: MCValue = switch (mcv) {
-            .register => |reg| blk: {
-                self.register_manager.getRegAssumeFree(reg.to64(), inst);
-                break :blk MCValue{ .register = reg };
-            },
-            .stack_offset => |off| blk: {
-                const offset = @intCast(i32, self.max_end_stack) - off + 16;
-                break :blk MCValue{ .stack_offset = -offset };
-            },
-            else => return self.fail("TODO implement arg for {}", .{mcv}),
-        };
+        const dst_mcv = self.args[arg_index];
+        switch (dst_mcv) {
+            .register => |reg| self.register_manager.getRegAssumeFree(reg, inst),
+            .load_frame => {},
+            else => return self.fail("TODO implement arg for {}", .{dst_mcv}),
+        }
+
+        const ty = self.air.typeOfIndex(inst);
+        const src_index = self.air.instructions.items(.data)[inst].arg.src_index;
+        const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index);
         try self.genArgDbgInfo(ty, name, dst_mcv);
+
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ .none, .none, .none });
@@ -5429,13 +5750,15 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void {
         .dwarf => |dw| {
             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
                 .register => |reg| .{ .register = reg.dwarfLocOp() },
-                .stack_offset => |off| .{
-                    .stack = .{
-                        // TODO handle -fomit-frame-pointer
-                        .fp_register = Register.rbp.dwarfLocOpDeref(),
-                        .offset = -off,
-                    },
-                },
+                // TODO use a frame index
+                .load_frame => return,
+                //.stack_offset => |off| .{
+                //    .stack = .{
+                //        // TODO handle -fomit-frame-pointer
+                //        .fp_register = Register.rbp.dwarfLocOpDeref(),
+                //        .offset = -off,
+                //    },
+                //},
                 else => unreachable, // not a valid function parameter
             };
             try dw.genArgDbgInfo(name, ty, self.mod_fn.owner_decl, loc);
@@ -5462,12 +5785,12 @@ fn genVarDbgInfo(
         .dwarf => |dw| {
             const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) {
                 .register => |reg| .{ .register = reg.dwarfLocOp() },
-                .ptr_stack_offset,
-                .stack_offset,
-                => |off| .{ .stack = .{
-                    .fp_register = Register.rbp.dwarfLocOpDeref(),
-                    .offset = -off,
-                } },
+                // TODO use a frame index
+                .load_frame, .lea_frame => return,
+                //=> |off| .{ .stack = .{
+                //    .fp_register = Register.rbp.dwarfLocOpDeref(),
+                //    .offset = -off,
+                //} },
                 .memory => |address| .{ .memory = address },
                 .load_got => |sym_index| .{ .linker_load = .{ .type = .got, .sym_index = sym_index } },
                 .load_direct => |sym_index| .{ .linker_load = .{ .type = .direct, .sym_index = sym_index } },
@@ -5498,15 +5821,13 @@ fn airBreakpoint(self: *Self) !void {
 
 fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void {
     const dst_mcv = try self.allocRegOrMem(inst, true);
-    try self.setRegOrMem(Type.usize, dst_mcv, .{
-        .stack_offset = -@as(i32, @divExact(self.target.cpu.arch.ptrBitWidth(), 8)),
-    });
+    try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } });
     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
 }
 
 fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void {
     const dst_mcv = try self.allocRegOrMem(inst, true);
-    try self.setRegOrMem(Type.usize, dst_mcv, .{ .register = .rbp });
+    try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } });
     return self.finishAir(inst, dst_mcv, .{ .none, .none, .none });
 }
 
@@ -5534,94 +5855,69 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
         else => unreachable,
     };
 
-    var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..]);
+    var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..], .call_frame);
     defer info.deinit(self);
 
+    // We need a properly aligned and sized call frame to be able to call this function.
+    {
+        const needed_call_frame =
+            FrameAlloc.init(.{ .size = info.stack_byte_count, .alignment = info.stack_align });
+        const frame_allocs_slice = self.frame_allocs.slice();
+        const stack_frame_size =
+            &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)];
+        stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size);
+        const stack_frame_align =
+            &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)];
+        stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align);
+    }
+
     try self.spillEflagsIfOccupied();
     try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*));
 
     // set stack arguments first because this can clobber registers
     // also clobber spill arguments as we go
-    if (info.return_value == .stack_offset) {
-        try self.spillRegisters(&.{abi.getCAbiIntParamRegs(self.target.*)[0]});
+    switch (info.return_value.long) {
+        .none, .unreach => {},
+        .indirect => |reg_off| try self.spillRegisters(&.{reg_off.reg}),
+        else => unreachable,
     }
     for (args, info.args) |arg, mc_arg| {
         const arg_ty = self.air.typeOf(arg);
         const arg_mcv = try self.resolveInst(arg);
-        // Here we do not use setRegOrMem even though the logic is similar, because
-        // the function call will move the stack pointer, so the offsets are different.
         switch (mc_arg) {
             .none => {},
             .register => |reg| try self.spillRegisters(&.{reg}),
-            .stack_offset => |off| {
-                // TODO rewrite using `genSetStack`
-                try self.genSetStackArg(arg_ty, off, arg_mcv);
-            },
-            .ptr_stack_offset => {
-                return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{});
-            },
-            .undef => unreachable,
-            .immediate => unreachable,
-            .unreach => unreachable,
-            .dead => unreachable,
-            .memory => unreachable,
-            .eflags => unreachable,
-            .register_overflow => unreachable,
-            .load_got => unreachable,
-            .lea_direct => unreachable,
-            .load_direct => unreachable,
-            .lea_tlv => unreachable,
-            .load_tlv => unreachable,
+            .load_frame => try self.genCopy(arg_ty, mc_arg, arg_mcv),
+            else => unreachable,
         }
     }
 
     // now we are free to set register arguments
-    const ret_reg_lock: ?RegisterLock = blk: {
-        if (info.return_value == .stack_offset) {
+    const ret_lock = switch (info.return_value.long) {
+        .none, .unreach => null,
+        .indirect => |reg_off| lock: {
             const ret_ty = fn_ty.fnReturnType();
-            const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*));
-            const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*));
-            const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align));
-            log.debug("airCall: return value on stack at offset {}", .{stack_offset});
-
-            const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
-            try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset });
-            const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg);
-
-            info.return_value.stack_offset = stack_offset;
-
-            break :blk ret_reg_lock;
-        }
-        break :blk null;
+            const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, self.target.*));
+            try self.genSetReg(reg_off.reg, Type.usize, .{
+                .lea_frame = .{ .index = frame_index, .off = -reg_off.off },
+            });
+            info.return_value.short = .{ .load_frame = .{ .index = frame_index } };
+            break :lock self.register_manager.lockRegAssumeUnused(reg_off.reg);
+        },
+        else => unreachable,
     };
-    defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock);
+    defer if (ret_lock) |lock| self.register_manager.unlockReg(lock);
 
     for (args, info.args) |arg, mc_arg| {
         const arg_ty = self.air.typeOf(arg);
         const arg_mcv = try self.resolveInst(arg);
         switch (mc_arg) {
-            .none, .stack_offset, .ptr_stack_offset => {},
-            .register => |reg| try self.genSetReg(arg_ty, reg, arg_mcv),
-            .undef => unreachable,
-            .immediate => unreachable,
-            .unreach => unreachable,
-            .dead => unreachable,
-            .memory => unreachable,
-            .eflags => unreachable,
-            .register_overflow => unreachable,
-            .load_got => unreachable,
-            .lea_direct => unreachable,
-            .load_direct => unreachable,
-            .lea_tlv => unreachable,
-            .load_tlv => unreachable,
+            .none, .load_frame => {},
+            .register => try self.genCopy(arg_ty, mc_arg, arg_mcv),
+            else => unreachable,
         }
     }
 
-    if (info.stack_byte_count > 0) {
-        // Adjust the stack
-        try self.asmRegisterImmediate(.sub, .rsp, Immediate.u(info.stack_byte_count));
-    }
-
     // Due to incremental compilation, how function calls are generated depends
     // on linking.
     const mod = self.bin_file.options.module.?;
@@ -5635,16 +5931,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
                 _ = try atom.getOrCreateOffsetTableEntry(elf_file);
                 const got_addr = atom.getOffsetTableAddress(elf_file);
                 try self.asmMemory(.call, Memory.sib(.qword, .{
-                    .base = .ds,
+                    .base = .{ .reg = .ds },
                     .disp = @intCast(i32, got_addr),
                 }));
             } else if (self.bin_file.cast(link.File.Coff)) |_| {
                 const sym_index = try self.getSymbolIndexForDecl(func.owner_decl);
-                try self.genSetReg(Type.usize, .rax, .{ .load_got = sym_index });
+                try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
                 try self.asmRegister(.call, .rax);
             } else if (self.bin_file.cast(link.File.MachO)) |_| {
                 const sym_index = try self.getSymbolIndexForDecl(func.owner_decl);
-                try self.genSetReg(Type.usize, .rax, .{ .load_got = sym_index });
+                try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index });
                 try self.asmRegister(.call, .rax);
             } else if (self.bin_file.cast(link.File.Plan9)) |p9| {
                 const decl_block_index = try p9.seeDecl(func.owner_decl);
@@ -5655,7 +5951,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
                 const got_index = decl_block.got_index.?;
                 const fn_got_addr = got_addr + got_index * ptr_bytes;
                 try self.asmMemory(.call, Memory.sib(.qword, .{
-                    .base = .ds,
+                    .base = .{ .reg = .ds },
                     .disp = @intCast(i32, fn_got_addr),
                 }));
             } else unreachable;
@@ -5696,41 +5992,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier
     } else {
         assert(ty.zigTypeTag() == .Pointer);
         const mcv = try self.resolveInst(callee);
-        try self.genSetReg(Type.usize, .rax, mcv);
+        try self.genSetReg(.rax, Type.usize, mcv);
         try self.asmRegister(.call, .rax);
     }
 
-    if (info.stack_byte_count > 0) {
-        // Readjust the stack
-        try self.asmRegisterImmediate(.add, .rsp, Immediate.u(info.stack_byte_count));
-    }
-
-    const result: MCValue = result: {
-        if (self.liveness.isUnused(inst)) break :result .unreach;
-
-        switch (info.return_value) {
-            .register => {
-                // Save function return value in a new register
-                break :result try self.copyToRegisterWithInstTracking(
-                    inst,
-                    self.air.typeOfIndex(inst),
-                    info.return_value,
-                );
-            },
-            else => {},
-        }
-        break :result info.return_value;
-    };
-
-    if (args.len <= Liveness.bpi - 2) {
-        var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1);
-        buf[0] = callee;
-        std.mem.copy(Air.Inst.Ref, buf[1..], args);
-        return self.finishAir(inst, result, buf);
-    }
     var bt = self.liveness.iterateBigTomb(inst);
     self.feed(&bt, callee);
     for (args) |arg| self.feed(&bt, arg);
+
+    const result = if (self.liveness.isUnused(inst)) .unreach else info.return_value.short;
     return self.finishAirResult(inst, result);
 }
 
@@ -5738,18 +6008,19 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const operand = try self.resolveInst(un_op);
     const ret_ty = self.fn_type.fnReturnType();
-    switch (self.ret_mcv) {
-        .immediate => {
-            assert(ret_ty.isError());
-        },
-        .stack_offset => {
-            try self.store(self.ret_mcv, operand, Type.usize, ret_ty);
-        },
-        else => {
-            try self.setRegOrMem(ret_ty, self.ret_mcv, operand);
+    switch (self.ret_mcv.short) {
+        .none => {},
+        .register => try self.genCopy(ret_ty, self.ret_mcv.short, operand),
+        .indirect => |reg_off| {
+            try self.register_manager.getReg(reg_off.reg, null);
+            const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg);
+            defer self.register_manager.unlockReg(lock);
+
+            try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long);
+            try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand);
         },
+        else => unreachable,
     }
-    // TODO when implementing defer, this will need to jump to the appropriate defer expression.
     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
     // which is available if the jump is 127 bytes or less forward.
     const jmp_reloc = try self.asmJmpReloc(undefined);
@@ -5761,25 +6032,12 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const ptr = try self.resolveInst(un_op);
     const ptr_ty = self.air.typeOf(un_op);
-    const elem_ty = ptr_ty.elemType();
-    const abi_size = elem_ty.abiSize(self.target.*);
-    switch (self.ret_mcv) {
-        .immediate => {
-            assert(elem_ty.isError());
-        },
-        .stack_offset => {
-            const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv);
-            const reg_lock = self.register_manager.lockRegAssumeUnused(reg);
-            defer self.register_manager.unlockReg(reg_lock);
-
-            try self.genInlineMemcpy(.{ .register = reg }, ptr, .{ .immediate = abi_size }, .{});
-        },
-        else => {
-            try self.load(self.ret_mcv, ptr, ptr_ty);
-            try self.setRegOrMem(elem_ty, self.ret_mcv, self.ret_mcv);
-        },
+    switch (self.ret_mcv.short) {
+        .none => {},
+        .register => try self.load(self.ret_mcv.short, ptr_ty, ptr),
+        .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr),
+        else => unreachable,
     }
-    // TODO when implementing defer, this will need to jump to the appropriate defer expression.
     // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
     // which is available if the jump is 127 bytes or less forward.
     const jmp_reloc = try self.asmJmpReloc(undefined);
@@ -5822,7 +6080,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
         break :dst rhs_mcv;
     } else if (dst_mem_ok) dst: {
         const dst_mcv = try self.allocTempRegOrMem(ty, true);
-        try self.setRegOrMem(ty, dst_mcv, lhs_mcv);
+        try self.genCopy(ty, dst_mcv, lhs_mcv);
         break :dst dst_mcv;
     } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) };
     const dst_lock = switch (dst_mcv) {
@@ -5865,26 +6123,27 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
         const atom = elf_file.getAtom(atom_index);
         _ = try atom.getOrCreateOffsetTableEntry(elf_file);
         const got_addr = atom.getOffsetTableAddress(elf_file);
-        try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{
-            .base = .ds,
-            .disp = @intCast(i32, got_addr),
-        }));
+        try self.asmRegisterMemory(
+            .mov,
+            addr_reg.to64(),
+            Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }),
+        );
     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
         const atom_index = try coff_file.getOrCreateAtomForLazySymbol(
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
-        try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index });
+        try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index });
     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
         const atom_index = try macho_file.getOrCreateAtomForLazySymbol(
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
         const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?;
-        try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index });
+        try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index });
     } else {
-        return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)});
+        return self.fail("TODO implement airCmpLtErrorsLen for x86_64 {s}", .{@tagName(self.bin_file.tag)});
     }
 
     try self.spillEflagsIfOccupied();
@@ -5900,7 +6159,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void {
     try self.asmRegisterMemory(
         .cmp,
         registerAlias(dst_reg, op_abi_size),
-        Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = addr_reg }),
+        Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }),
     );
     const result = MCValue{ .eflags = .b };
     return self.finishAir(inst, result, .{ un_op, .none, .none });
@@ -5971,14 +6230,13 @@ fn genTry(
 
 fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void {
     const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt;
-    const payload = try self.addExtra(Mir.DbgLineColumn{
-        .line = dbg_stmt.line,
-        .column = dbg_stmt.column,
-    });
     _ = try self.addInst(.{
         .tag = .dbg_line,
         .ops = undefined,
-        .data = .{ .payload = payload },
+        .data = .{ .line_column = .{
+            .line = dbg_stmt.line,
+            .column = dbg_stmt.column,
+        } },
     });
     return self.finishAirBookkeeping();
 }
@@ -6002,8 +6260,6 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void {
     const ty = self.air.typeOf(operand);
     const mcv = try self.resolveInst(operand);
 
-    log.debug("airDbgVar: %{d}: {}, {}", .{ inst, ty.fmtDebug(), mcv });
-
     const name = self.air.nullTerminatedString(pl_op.payload);
 
     const tag = self.air.instructions.items(.tag)[inst];
@@ -6025,7 +6281,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 {
             return self.asmJccReloc(undefined, .e);
         },
         .immediate,
-        .stack_offset,
+        .load_frame,
         => {
             try self.spillEflagsIfOccupied();
             if (abi_size <= 8) {
@@ -6118,11 +6374,14 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC
         .dead,
         .undef,
         .immediate,
-        .register_overflow,
-        .ptr_stack_offset,
         .eflags,
+        .register_offset,
+        .register_overflow,
         .lea_direct,
+        .lea_got,
         .lea_tlv,
+        .lea_frame,
+        .reserved_frame,
         => unreachable,
 
         .register => |opt_reg| {
@@ -6152,27 +6411,36 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC
             const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
             defer self.register_manager.unlockReg(addr_reg_lock);
 
-            try self.genSetReg(Type.usize, addr_reg, switch (opt_mcv) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => opt_mcv,
-            });
-
+            try self.genSetReg(addr_reg, Type.usize, opt_mcv.address());
             const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
-            try self.asmMemoryImmediate(.cmp, Memory.sib(
-                Memory.PtrSize.fromSize(some_abi_size),
-                .{ .base = addr_reg, .disp = some_info.off },
-            ), Immediate.u(0));
+            try self.asmMemoryImmediate(
+                .cmp,
+                Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
+                    .base = .{ .reg = addr_reg },
+                    .disp = some_info.off,
+                }),
+                Immediate.u(0),
+            );
             return .{ .eflags = .e };
         },
 
-        .stack_offset => |off| {
+        .indirect, .load_frame => {
             const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
-            try self.asmMemoryImmediate(.cmp, Memory.sib(
-                Memory.PtrSize.fromSize(some_abi_size),
-                .{ .base = .rbp, .disp = some_info.off - off },
-            ), Immediate.u(0));
+            try self.asmMemoryImmediate(
+                .cmp,
+                Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) {
+                    .indirect => |reg_off| .{
+                        .base = .{ .reg = reg_off.reg },
+                        .disp = reg_off.off + some_info.off,
+                    },
+                    .load_frame => |frame_addr| .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off + some_info.off,
+                    },
+                    else => unreachable,
+                }),
+                Immediate.u(0),
+            );
             return .{ .eflags = .e };
         },
     }
@@ -6200,10 +6468,14 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue)
     defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock);
 
     const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*));
-    try self.asmMemoryImmediate(.cmp, Memory.sib(
-        Memory.PtrSize.fromSize(some_abi_size),
-        .{ .base = ptr_reg, .disp = some_info.off },
-    ), Immediate.u(0));
+    try self.asmMemoryImmediate(
+        .cmp,
+        Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{
+            .base = .{ .reg = ptr_reg },
+            .disp = some_info.off,
+        }),
+        Immediate.u(0),
+    );
     return .{ .eflags = .e };
 }
 
@@ -6221,10 +6493,6 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !
 
     const err_off = errUnionErrorOffset(ty.errorUnionPayload(), self.target.*);
     switch (operand) {
-        .stack_offset => |off| {
-            const offset = off - @intCast(i32, err_off);
-            try self.genBinOpMir(.cmp, Type.anyerror, .{ .stack_offset = offset }, .{ .immediate = 0 });
-        },
         .register => |reg| {
             const eu_lock = self.register_manager.lockReg(reg);
             defer if (eu_lock) |lock| self.register_manager.unlockReg(lock);
@@ -6238,6 +6506,15 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !
             }
             try self.genBinOpMir(.cmp, Type.anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 });
         },
+        .load_frame => |frame_addr| try self.genBinOpMir(
+            .cmp,
+            Type.anyerror,
+            .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + @intCast(i32, err_off),
+            } },
+            .{ .immediate = 0 },
+        ),
         else => return self.fail("TODO implement isErr for {}", .{operand}),
     }
 
@@ -6324,9 +6601,9 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void {
         }
     };
     const ptr_ty = self.air.typeOf(un_op);
-    try self.load(operand, operand_ptr, ptr_ty);
+    try self.load(operand, ptr_ty, operand_ptr);
 
-    const result = try self.isErr(inst, ptr_ty.elemType(), operand);
+    const result = try self.isErr(inst, ptr_ty.childType(), operand);
 
     return self.finishAir(inst, result, .{ un_op, .none, .none });
 }
@@ -6358,9 +6635,9 @@ fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void {
         }
     };
     const ptr_ty = self.air.typeOf(un_op);
-    try self.load(operand, operand_ptr, ptr_ty);
+    try self.load(operand, ptr_ty, operand_ptr);
 
-    const result = try self.isNonErr(inst, ptr_ty.elemType(), operand);
+    const result = try self.isNonErr(inst, ptr_ty.childType(), operand);
 
     return self.finishAir(inst, result, .{ un_op, .none, .none });
 }
@@ -6389,6 +6666,7 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
     // A block is a setup to be able to jump to the end.
+    const inst_tracking_i = self.inst_tracking.count();
     self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach));
 
     self.scope_generation += 1;
@@ -6412,8 +6690,9 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void {
         for (block_data.value.relocs.items) |reloc| try self.performReloc(reloc);
     }
 
-    const tracking = self.inst_tracking.getPtr(inst).?;
-    if (self.liveness.isUnused(inst)) tracking.die(self);
+    if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i);
+    const tracking = &self.inst_tracking.values()[inst_tracking_i];
+    if (self.liveness.isUnused(inst)) tracking.die(self, inst);
     self.getValue(tracking.short, inst);
     self.finishAirBookkeeping();
 }
@@ -6520,35 +6799,38 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void {
         !block_ty.hasRuntimeBitsIgnoreComptime() or self.liveness.isUnused(br.block_inst);
     const block_tracking = self.inst_tracking.getPtr(br.block_inst).?;
     const block_data = self.blocks.getPtr(br.block_inst).?;
+    const first_br = block_data.relocs.items.len == 0;
+    const block_result = result: {
+        if (block_unused) break :result .none;
+
+        if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) {
+            if (first_br) break :result src_mcv;
+
+            if (block_tracking.getReg()) |block_reg|
+                try self.register_manager.getReg(block_reg, br.block_inst);
+            // .long = .none to avoid merging operand and block result stack frames.
+            var current_tracking = InstTracking{ .long = .none, .short = src_mcv };
+            try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*);
+            if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg);
+            break :result block_tracking.short;
+        }
 
-    if (block_data.relocs.items.len == 0) {
-        block_tracking.* = InstTracking.init(result: {
-            if (block_unused) break :result .none;
-            if (self.reuseOperand(inst, br.operand, 0, src_mcv)) {
-                // Fix instruction tracking
-                switch (src_mcv) {
-                    .register => |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |index| {
-                        self.register_manager.registers[index] = br.block_inst;
-                    },
-                    else => {},
-                }
-                break :result src_mcv;
-            }
-
-            const new_mcv = try self.allocRegOrMem(br.block_inst, true);
-            try self.setRegOrMem(block_ty, new_mcv, src_mcv);
-            break :result new_mcv;
-        });
-    } else if (!block_unused) try self.setRegOrMem(block_ty, block_tracking.short, src_mcv);
+        const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: {
+            self.getValue(block_tracking.short, br.block_inst);
+            break :dst block_tracking.short;
+        };
+        try self.genCopy(block_ty, dst_mcv, src_mcv);
+        break :result dst_mcv;
+    };
 
     // Process operand death so that it is properly accounted for in the State below.
     if (self.liveness.operandDies(inst, 0)) {
         if (Air.refToIndex(br.operand)) |op_inst| self.processDeath(op_inst);
     }
 
-    if (block_data.relocs.items.len == 0) {
+    if (first_br) {
+        block_tracking.* = InstTracking.init(block_result);
         try self.saveRetroactiveState(&block_data.state);
-        block_tracking.die(self);
     } else try self.restoreState(block_data.state, &.{}, .{
         .emit_instructions = true,
         .update_tracking = false,
@@ -6556,6 +6838,9 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void {
         .close_scope = false,
     });
 
+    // Stop tracking block result without forgetting tracking info
+    self.freeValue(block_tracking.short);
+
     // Emit a jump with a relocation. It will be patched up after the block ends.
     // Leave the jump offset undefined
     const jmp_reloc = try self.asmJmpReloc(undefined);
@@ -6634,7 +6919,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
 
         const arg_mcv = try self.resolveInst(input);
         try self.register_manager.getReg(reg, null);
-        try self.genSetReg(self.air.typeOf(input), reg, arg_mcv);
+        try self.genSetReg(reg, self.air.typeOf(input), arg_mcv);
     }
 
     {
@@ -6686,7 +6971,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
                         return self.fail("Invalid displacement: '{s}'", .{op_str});
                     op.* = .{ .mem = Memory.sib(
                         mnem_size orelse return self.fail("Unknown size: '{s}'", .{op_str}),
-                        .{ .base = reg, .disp = disp },
+                        .{ .base = .{ .reg = reg }, .disp = disp },
                     ) };
                 } else {
                     if (mnem_size) |size| if (reg.bitSize() != size.bitSize())
@@ -6805,335 +7090,317 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAirResult(inst, result);
 }
 
-/// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
-fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void {
-    switch (loc) {
-        .none => return,
-        .immediate => unreachable,
-        .register => |reg| return self.genSetReg(ty, reg, val),
-        .stack_offset => |off| return self.genSetStack(ty, off, val, .{}),
-        .memory => {
-            return self.fail("TODO implement setRegOrMem for memory", .{});
+fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag {
+    return switch (ty.zigTypeTag()) {
+        else => .mov,
+        .Float => switch (ty.floatBits(self.target.*)) {
+            16 => .mov,
+            32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) .movss else .mov,
+            64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) .movsd else .mov,
+            else => return self.fail("TODO movMirTag for {}", .{
+                ty.fmt(self.bin_file.options.module.?),
+            }),
         },
-        else => {
-            return self.fail("TODO implement setRegOrMem for {}", .{loc});
+    };
+}
+
+fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void {
+    const src_lock = switch (src_mcv) {
+        .register => |reg| self.register_manager.lockReg(reg),
+        .register_overflow => |ro| self.register_manager.lockReg(ro.reg),
+        else => null,
+    };
+    defer if (src_lock) |lock| self.register_manager.unlockReg(lock);
+
+    switch (dst_mcv) {
+        .none,
+        .unreach,
+        .dead,
+        .undef,
+        .immediate,
+        .eflags,
+        .register_overflow,
+        .lea_direct,
+        .lea_got,
+        .lea_tlv,
+        .lea_frame,
+        .reserved_frame,
+        => unreachable, // unmodifiable destination
+        .register => |reg| try self.genSetReg(reg, ty, src_mcv),
+        .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) {
+            .none,
+            .unreach,
+            .dead,
+            .undef,
+            .register_overflow,
+            .reserved_frame,
+            => unreachable,
+            .immediate,
+            .register,
+            .register_offset,
+            .lea_frame,
+            => src_mcv.offset(-dst_reg_off.off),
+            else => .{ .register_offset = .{
+                .reg = try self.copyToTmpRegister(ty, src_mcv),
+                .off = -dst_reg_off.off,
+            } },
+        }),
+        .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv),
+        .memory, .load_direct, .load_got, .load_tlv => {
+            switch (dst_mcv) {
+                .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr|
+                    return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv),
+                .load_direct, .load_got, .load_tlv => {},
+                else => unreachable,
+            }
+
+            const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address());
+            const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
+            defer self.register_manager.unlockReg(addr_lock);
+
+            try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv);
         },
+        .load_frame => |frame_addr| try self.genSetMem(
+            .{ .frame = frame_addr.index },
+            frame_addr.off,
+            ty,
+            src_mcv,
+        ),
     }
 }
 
-fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerError!void {
+fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void {
     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
-    switch (mcv) {
-        .dead => unreachable,
-        .unreach, .none => return,
-        .undef => {
-            if (!self.wantSafety()) return; // The already existing value will do just fine.
-            if (abi_size <= 8) {
-                const reg = try self.copyToTmpRegister(ty, mcv);
-                return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
+    if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{});
+    switch (src_mcv) {
+        .none,
+        .unreach,
+        .dead,
+        .register_overflow,
+        .reserved_frame,
+        => unreachable,
+        .undef => if (self.wantSafety())
+            try self.genSetReg(dst_reg.to64(), Type.usize, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+        .eflags => |cc| try self.asmSetccRegister(dst_reg.to8(), cc),
+        .immediate => |imm| {
+            if (imm == 0) {
+                // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
+                // register is the fastest way to zero a register.
+                try self.asmRegisterRegister(.xor, dst_reg.to32(), dst_reg.to32());
+            } else if (abi_size > 4 and math.cast(u32, imm) != null) {
+                // 32-bit moves zero-extend to 64-bit.
+                try self.asmRegisterImmediate(.mov, dst_reg.to32(), Immediate.u(imm));
+            } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) {
+                try self.asmRegisterImmediate(
+                    .mov,
+                    registerAlias(dst_reg, abi_size),
+                    Immediate.s(@intCast(i32, @bitCast(i64, imm))),
+                );
+            } else {
+                try self.asmRegisterImmediate(
+                    .mov,
+                    registerAlias(dst_reg, abi_size),
+                    Immediate.u(imm),
+                );
             }
-            try self.genInlineMemset(
-                .{ .ptr_stack_offset = stack_offset },
-                .{ .immediate = 0xaa },
-                .{ .immediate = abi_size },
-                .{ .dest_stack_base = .rsp },
-            );
-        },
-        .register_overflow => return self.fail("TODO genSetStackArg for register with overflow bit", .{}),
-        .eflags => {
-            const reg = try self.copyToTmpRegister(ty, mcv);
-            return self.genSetStackArg(ty, stack_offset, .{ .register = reg });
         },
-        .immediate => |imm| {
-            switch (abi_size) {
-                1, 2, 4 => {
-                    // TODO
-                    // We have a positive stack offset value but we want a twos complement negative
-                    // offset from rbp, which is at the top of the stack frame.
-                    const immediate = if (ty.isSignedInt())
-                        Immediate.s(@intCast(i32, @bitCast(i64, imm)))
-                    else
-                        Immediate.u(@intCast(u32, imm));
-                    try self.asmMemoryImmediate(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                        .base = .rsp,
-                        .disp = -stack_offset,
-                    }), immediate);
+        .register => |reg| if (dst_reg.id() != reg.id()) try self.asmRegisterRegister(
+            try self.movMirTag(ty),
+            registerAlias(dst_reg, abi_size),
+            registerAlias(reg, abi_size),
+        ),
+        .register_offset, .indirect, .load_frame, .lea_frame => try self.asmRegisterMemory(
+            switch (src_mcv) {
+                .register_offset => |reg_off| switch (reg_off.off) {
+                    0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }),
+                    else => .lea,
                 },
-                8 => {
-                    const reg = try self.copyToTmpRegister(ty, mcv);
-                    return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
+                .indirect, .load_frame => try self.movMirTag(ty),
+                .lea_frame => .lea,
+                else => unreachable,
+            },
+            registerAlias(dst_reg, abi_size),
+            Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) {
+                .register_offset, .indirect => |reg_off| .{
+                    .base = .{ .reg = reg_off.reg },
+                    .disp = reg_off.off,
                 },
-                else => return self.fail("TODO implement inputs on stack for {} with abi size > 8", .{mcv}),
-            }
-        },
-        .memory,
-        .load_direct,
-        .load_tlv,
-        => {
-            if (abi_size <= 8) {
-                const reg = try self.copyToTmpRegister(ty, mcv);
-                return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
+                .load_frame, .lea_frame => |frame_addr| .{
+                    .base = .{ .frame = frame_addr.index },
+                    .disp = frame_addr.off,
+                },
+                else => unreachable,
+            }),
+        ),
+        .memory, .load_direct, .load_got, .load_tlv => {
+            switch (src_mcv) {
+                .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr|
+                    return self.asmRegisterMemory(
+                        try self.movMirTag(ty),
+                        registerAlias(dst_reg, abi_size),
+                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
+                            .base = .{ .reg = .ds },
+                            .disp = small_addr,
+                        }),
+                    ),
+                .load_direct => |sym_index| if (try self.movMirTag(ty) == .mov) {
+                    const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
+                    _ = try self.addInst(.{
+                        .tag = .mov_linker,
+                        .ops = .direct_reloc,
+                        .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
+                            .reg = @enumToInt(dst_reg.to64()),
+                            .atom_index = atom_index,
+                            .sym_index = sym_index,
+                        }) },
+                    });
+                    return;
+                },
+                .load_got, .load_tlv => {},
+                else => unreachable,
             }
 
-            const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
+            const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address());
             const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
             defer self.register_manager.unlockReg(addr_lock);
 
-            try self.genSetReg(Type.usize, addr_reg, switch (mcv) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => unreachable,
-            });
-            try self.genInlineMemcpy(
-                .{ .ptr_stack_offset = stack_offset },
-                .{ .register = addr_reg },
-                .{ .immediate = abi_size },
-                .{ .dest_stack_base = .rsp },
+            try self.asmRegisterMemory(
+                try self.movMirTag(ty),
+                registerAlias(dst_reg, abi_size),
+                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }),
             );
         },
-        .register => |reg| {
-            switch (ty.zigTypeTag()) {
-                .Float => {
-                    if (intrinsicsAllowed(self.target.*, ty)) {
-                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail(
-                                "TODO genSetStackArg for register for type {}",
-                                .{ty.fmtDebug()},
-                            ),
-                        };
-                        // TODO verify this
-                        const ptr_size: Memory.PtrSize = switch (ty.tag()) {
-                            .f32 => .dword,
-                            .f64 => .qword,
-                            else => unreachable,
-                        };
-                        return self.asmMemoryRegister(tag, Memory.sib(ptr_size, .{
-                            .base = .rsp,
-                            .disp = -stack_offset,
-                        }), reg.to128());
-                    }
-
-                    return self.fail("TODO genSetStackArg for register with no intrinsics", .{});
+        .lea_direct, .lea_got => |sym_index| {
+            const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
+            _ = try self.addInst(.{
+                .tag = switch (src_mcv) {
+                    .lea_direct => .lea_linker,
+                    .lea_got => .mov_linker,
+                    else => unreachable,
                 },
-                else => {
-                    try self.asmMemoryRegister(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                        .base = .rsp,
-                        .disp = -stack_offset,
-                    }), registerAlias(reg, abi_size));
+                .ops = switch (src_mcv) {
+                    .lea_direct => .direct_reloc,
+                    .lea_got => .got_reloc,
+                    else => unreachable,
                 },
-            }
-        },
-        .ptr_stack_offset,
-        .load_got,
-        .lea_direct,
-        .lea_tlv,
-        => {
-            const reg = try self.copyToTmpRegister(ty, mcv);
-            return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
-        },
-        .stack_offset => |mcv_off| {
-            if (abi_size <= 8) {
-                const reg = try self.copyToTmpRegister(ty, mcv);
-                return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg });
-            }
-
-            try self.genInlineMemcpy(
-                .{ .ptr_stack_offset = stack_offset },
-                .{ .ptr_stack_offset = mcv_off },
-                .{ .immediate = abi_size },
-                .{ .dest_stack_base = .rsp },
-            );
+                .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
+                    .reg = @enumToInt(dst_reg.to64()),
+                    .atom_index = atom_index,
+                    .sym_index = sym_index,
+                }) },
+            });
         },
-    }
+        .lea_tlv => |sym_index| {
+            const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
+            if (self.bin_file.cast(link.File.MachO)) |_| {
+                _ = try self.addInst(.{
+                    .tag = .mov_linker,
+                    .ops = .tlv_reloc,
+                    .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
+                        .reg = @enumToInt(Register.rdi),
+                        .atom_index = atom_index,
+                        .sym_index = sym_index,
+                    }) },
+                });
+                // TODO: spill registers before calling
+                try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } }));
+                try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax });
+            } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{
+                @tagName(self.bin_file.tag),
+            });
+        },
+    }
 }
 
-fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: InlineMemcpyOpts) InnerError!void {
-    const base_reg = opts.dest_stack_base orelse .rbp;
+fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void {
     const abi_size = @intCast(u32, ty.abiSize(self.target.*));
-    switch (mcv) {
-        .dead => unreachable,
-        .unreach, .none => return, // Nothing to do.
-        .undef => {
-            if (!self.wantSafety()) return; // The already existing value will do just fine.
-            // TODO Upgrade this to a memset call when we have that available.
-            switch (abi_size) {
-                1, 2, 4 => {
-                    const value: u64 = switch (abi_size) {
-                        1 => 0xaa,
-                        2 => 0xaaaa,
-                        4 => 0xaaaaaaaa,
-                        else => unreachable,
-                    };
-                    return self.asmMemoryImmediate(.mov, Memory.sib(
-                        Memory.PtrSize.fromSize(abi_size),
-                        .{ .base = base_reg, .disp = -stack_offset },
-                    ), Immediate.u(value));
-                },
-                8 => return self.genSetStack(
-                    ty,
-                    stack_offset,
-                    .{ .immediate = 0xaaaaaaaaaaaaaaaa },
-                    opts,
-                ),
-                else => |x| return self.genInlineMemset(
-                    .{ .ptr_stack_offset = stack_offset },
-                    .{ .immediate = 0xaa },
-                    .{ .immediate = x },
-                    opts,
-                ),
-            }
+    const dst_ptr_mcv: MCValue = switch (base) {
+        .none => .{ .immediate = @bitCast(u64, @as(i64, disp)) },
+        .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } },
+        .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } },
+    };
+    switch (src_mcv) {
+        .none, .unreach, .dead, .reserved_frame => unreachable,
+        .undef => if (self.wantSafety())
+            try self.genInlineMemset(dst_ptr_mcv, .{ .immediate = 0xaa }, .{ .immediate = abi_size }),
+        .immediate => |imm| switch (abi_size) {
+            1, 2, 4 => {
+                const immediate = if (ty.isSignedInt())
+                    Immediate.s(@truncate(i32, @bitCast(i64, imm)))
+                else
+                    Immediate.u(@intCast(u32, imm));
+                try self.asmMemoryImmediate(
+                    .mov,
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
+                    immediate,
+                );
+            },
+            3, 5...7 => unreachable,
+            else => if (math.cast(i32, @bitCast(i64, imm))) |small| {
+                try self.asmMemoryImmediate(
+                    .mov,
+                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
+                    Immediate.s(small),
+                );
+            } else {
+                var offset: i32 = 0;
+                while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
+                    .mov,
+                    Memory.sib(.dword, .{ .base = base, .disp = disp + offset }),
+                    if (ty.isSignedInt())
+                        Immediate.s(@truncate(
+                            i32,
+                            @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63),
+                        ))
+                    else
+                        Immediate.u(@truncate(
+                            u32,
+                            if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0,
+                        )),
+                );
+            },
         },
+        .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc),
+        .register => |reg| try self.asmMemoryRegister(
+            try self.movMirTag(ty),
+            Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }),
+            registerAlias(reg, abi_size),
+        ),
         .register_overflow => |ro| {
-            const reg_lock = self.register_manager.lockReg(ro.reg);
-            defer if (reg_lock) |lock| self.register_manager.unlockReg(lock);
-
-            const wrapped_ty = ty.structFieldType(0);
-            try self.genSetStack(wrapped_ty, stack_offset, .{ .register = ro.reg }, .{});
-
-            const overflow_bit_ty = ty.structFieldType(1);
-            const overflow_bit_offset = ty.structFieldOffset(1, self.target.*);
-            const tmp_reg = try self.register_manager.allocReg(null, gp);
-            try self.asmSetccRegister(tmp_reg.to8(), ro.eflags);
-
-            return self.genSetStack(
-                overflow_bit_ty,
-                stack_offset - @intCast(i32, overflow_bit_offset),
-                .{ .register = tmp_reg.to8() },
-                .{},
+            try self.genSetMem(
+                base,
+                disp + @intCast(i32, ty.structFieldOffset(0, self.target.*)),
+                ty.structFieldType(0),
+                .{ .register = ro.reg },
+            );
+            try self.genSetMem(
+                base,
+                disp + @intCast(i32, ty.structFieldOffset(1, self.target.*)),
+                ty.structFieldType(1),
+                .{ .eflags = ro.eflags },
             );
         },
-        .eflags => |cc| try self.asmSetccMemory(
-            Memory.sib(.byte, .{ .base = base_reg, .disp = -stack_offset }),
-            cc,
-        ),
-        .immediate => |imm| {
-            // TODO
-            switch (abi_size) {
-                0 => {
-                    assert(ty.isError());
-                    try self.asmMemoryImmediate(.mov, Memory.sib(.byte, .{
-                        .base = base_reg,
-                        .disp = -stack_offset,
-                    }), Immediate.u(@truncate(u8, imm)));
-                },
-                1, 2, 4 => {
-                    const immediate = if (ty.isSignedInt())
-                        Immediate.s(@truncate(i32, @bitCast(i64, imm)))
-                    else
-                        Immediate.u(@intCast(u32, imm));
-                    try self.asmMemoryImmediate(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                        .base = base_reg,
-                        .disp = -stack_offset,
-                    }), immediate);
-                },
-                3, 5...7 => unreachable,
-                else => {
-                    // 64 bit write to memory would take two mov's anyways so we
-                    // insted just use two 32 bit writes to avoid register allocation
-                    if (math.cast(i32, @bitCast(i64, imm))) |small| {
-                        try self.asmMemoryImmediate(.mov, Memory.sib(
-                            Memory.PtrSize.fromSize(abi_size),
-                            .{ .base = base_reg, .disp = -stack_offset },
-                        ), Immediate.s(small));
-                    } else {
-                        var offset: i32 = 0;
-                        while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate(
-                            .mov,
-                            Memory.sib(.dword, .{ .base = base_reg, .disp = offset - stack_offset }),
-                            if (ty.isSignedInt())
-                                Immediate.s(@truncate(
-                                    i32,
-                                    @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63),
-                                ))
-                            else
-                                Immediate.u(@truncate(
-                                    u32,
-                                    if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0,
-                                )),
-                        );
-                    }
-                },
-            }
-        },
-        .register => |reg| {
-            switch (ty.zigTypeTag()) {
-                .Float => {
-                    if (intrinsicsAllowed(self.target.*, ty)) {
-                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail(
-                                "TODO genSetStack for register for type {}",
-                                .{ty.fmtDebug()},
-                            ),
-                        };
-                        const ptr_size: Memory.PtrSize = switch (ty.tag()) {
-                            .f32 => .dword,
-                            .f64 => .qword,
-                            else => unreachable,
-                        };
-                        return self.asmMemoryRegister(tag, Memory.sib(ptr_size, .{
-                            .base = base_reg.to64(),
-                            .disp = -stack_offset,
-                        }), reg.to128());
-                    }
-
-                    return self.fail("TODO genSetStack for register for type float with no intrinsics", .{});
-                },
-                else => {
-                    try self.genInlineMemcpyRegisterRegister(ty, base_reg, reg, stack_offset);
-                },
-            }
-        },
+        .register_offset,
         .memory,
+        .indirect,
         .load_direct,
-        .load_tlv,
         .lea_direct,
-        .lea_tlv,
-        => if (abi_size <= 8) {
-            const reg = try self.copyToTmpRegister(ty, mcv);
-            return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }, opts);
-        } else {
-            const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
-            const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg);
-            defer self.register_manager.unlockReg(addr_lock);
-
-            try self.genSetReg(Type.usize, addr_reg, switch (mcv) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => mcv,
-            });
-            try self.genInlineMemcpy(
-                .{ .ptr_stack_offset = stack_offset },
-                .{ .register = addr_reg },
-                .{ .immediate = abi_size },
-                .{},
-            );
-        },
-        .stack_offset => |off| if (abi_size <= 8) {
-            const tmp_reg = try self.copyToTmpRegister(ty, mcv);
-            const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
-            defer self.register_manager.unlockReg(tmp_lock);
-
-            try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts);
-        } else try self.genInlineMemcpy(
-            .{ .ptr_stack_offset = stack_offset },
-            .{ .ptr_stack_offset = off },
-            .{ .immediate = abi_size },
-            .{},
-        ),
-        .ptr_stack_offset,
         .load_got,
-        => {
-            const tmp_reg = try self.copyToTmpRegister(ty, mcv);
-            const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
-            defer self.register_manager.unlockReg(tmp_lock);
-
-            try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts);
+        .lea_got,
+        .load_tlv,
+        .lea_tlv,
+        .load_frame,
+        .lea_frame,
+        => switch (abi_size) {
+            0 => {},
+            1, 2, 4, 8 => {
+                const src_reg = try self.copyToTmpRegister(ty, src_mcv);
+                const src_lock = self.register_manager.lockRegAssumeUnused(src_reg);
+                defer self.register_manager.unlockReg(src_lock);
+
+                try self.genSetMem(base, disp, ty, .{ .register = src_reg });
+            },
+            else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }),
         },
     }
 }
@@ -7165,10 +7432,14 @@ fn genInlineMemcpyRegisterRegister(
         var remainder = abi_size;
         while (remainder > 0) {
             const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder));
-            try self.asmMemoryRegister(.mov, Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{
-                .base = dst_reg,
-                .disp = -next_offset,
-            }), registerAlias(tmp_reg, nearest_power_of_two));
+            try self.asmMemoryRegister(
+                .mov,
+                Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{
+                    .base = dst_reg,
+                    .disp = -next_offset,
+                }),
+                registerAlias(tmp_reg, nearest_power_of_two),
+            );
 
             if (nearest_power_of_two > 1) {
                 try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{
@@ -7191,121 +7462,11 @@ fn genInlineMemcpyRegisterRegister(
     }
 }
 
-const InlineMemcpyOpts = struct {
-    source_stack_base: ?Register = null,
-    dest_stack_base: ?Register = null,
-};
-
-fn genInlineMemcpy(
-    self: *Self,
-    dst_ptr: MCValue,
-    src_ptr: MCValue,
-    len: MCValue,
-    opts: InlineMemcpyOpts,
-) InnerError!void {
-    const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg|
-        self.register_manager.lockReg(reg)
-    else
-        null;
-    defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg);
-
-    const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
-        self.register_manager.lockReg(reg)
-    else
-        null;
-    defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
-
+fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void {
     try self.spillRegisters(&.{ .rdi, .rsi, .rcx });
-
-    switch (dst_ptr) {
-        .lea_tlv,
-        .load_tlv,
-        => {
-            try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) {
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => dst_ptr,
-            });
-        },
-        .memory,
-        .load_got,
-        .lea_direct,
-        .load_direct,
-        => {
-            try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                else => dst_ptr,
-            });
-            // Load the pointer, which is stored in memory
-            try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi }));
-        },
-        .stack_offset, .ptr_stack_offset => |off| {
-            try self.asmRegisterMemory(switch (dst_ptr) {
-                .stack_offset => .mov,
-                .ptr_stack_offset => .lea,
-                else => unreachable,
-            }, .rdi, Memory.sib(.qword, .{
-                .base = opts.dest_stack_base orelse .rbp,
-                .disp = -off,
-            }));
-        },
-        .register => |reg| {
-            try self.asmRegisterRegister(
-                .mov,
-                registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))),
-                reg,
-            );
-        },
-        else => {
-            return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr});
-        },
-    }
-
-    switch (src_ptr) {
-        .lea_tlv,
-        .load_tlv,
-        => {
-            try self.genSetReg(Type.usize, .rsi, switch (src_ptr) {
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => dst_ptr,
-            });
-        },
-        .memory,
-        .load_got,
-        .lea_direct,
-        .load_direct,
-        => {
-            try self.genSetReg(Type.usize, .rsi, switch (src_ptr) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                else => src_ptr,
-            });
-            // Load the pointer, which is stored in memory
-            try self.asmRegisterMemory(.mov, .rsi, Memory.sib(.qword, .{ .base = .rsi }));
-        },
-        .stack_offset, .ptr_stack_offset => |off| {
-            try self.asmRegisterMemory(switch (src_ptr) {
-                .stack_offset => .mov,
-                .ptr_stack_offset => .lea,
-                else => unreachable,
-            }, .rsi, Memory.sib(.qword, .{
-                .base = opts.source_stack_base orelse .rbp,
-                .disp = -off,
-            }));
-        },
-        .register => |reg| {
-            try self.asmRegisterRegister(
-                .mov,
-                registerAlias(.rsi, @intCast(u32, @divExact(reg.bitSize(), 8))),
-                reg,
-            );
-        },
-        else => {
-            return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr});
-        },
-    }
-
-    try self.genSetReg(Type.usize, .rcx, len);
+    try self.genSetReg(.rdi, Type.usize, dst_ptr);
+    try self.genSetReg(.rsi, Type.usize, src_ptr);
+    try self.genSetReg(.rcx, Type.usize, len);
     _ = try self.addInst(.{
         .tag = .movs,
         .ops = .string,
@@ -7313,67 +7474,11 @@ fn genInlineMemcpy(
     });
 }
 
-fn genInlineMemset(
-    self: *Self,
-    dst_ptr: MCValue,
-    value: MCValue,
-    len: MCValue,
-    opts: InlineMemcpyOpts,
-) InnerError!void {
-    const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg|
-        self.register_manager.lockReg(reg)
-    else
-        null;
-    defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock);
-
+fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void {
     try self.spillRegisters(&.{ .rdi, .al, .rcx });
-
-    switch (dst_ptr) {
-        .lea_tlv,
-        .load_tlv,
-        => {
-            try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) {
-                .load_tlv => |sym_index| .{ .lea_tlv = sym_index },
-                else => dst_ptr,
-            });
-        },
-        .load_got => try self.genSetReg(Type.usize, .rdi, dst_ptr),
-        .memory,
-        .lea_direct,
-        .load_direct,
-        => {
-            try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) {
-                .memory => |addr| .{ .immediate = addr },
-                .load_direct => |sym_index| .{ .lea_direct = sym_index },
-                else => dst_ptr,
-            });
-            // Load the pointer, which is stored in memory
-            try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi }));
-        },
-        .stack_offset, .ptr_stack_offset => |off| {
-            try self.asmRegisterMemory(switch (dst_ptr) {
-                .stack_offset => .mov,
-                .ptr_stack_offset => .lea,
-                else => unreachable,
-            }, .rdi, Memory.sib(.qword, .{
-                .base = opts.dest_stack_base orelse .rbp,
-                .disp = -off,
-            }));
-        },
-        .register => |reg| {
-            try self.asmRegisterRegister(
-                .mov,
-                registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))),
-                reg,
-            );
-        },
-        else => {
-            return self.fail("TODO implement memset for setting stack when dest is {}", .{dst_ptr});
-        },
-    }
-
-    try self.genSetReg(Type.u8, .al, value);
-    try self.genSetReg(Type.usize, .rcx, len);
+    try self.genSetReg(.rdi, Type.usize, dst_ptr);
+    try self.genSetReg(.al, Type.u8, value);
+    try self.genSetReg(.rcx, Type.usize, len);
     _ = try self.addInst(.{
         .tag = .stos,
         .ops = .string,
@@ -7381,307 +7486,6 @@ fn genInlineMemset(
     });
 }
 
-fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void {
-    const abi_size = @intCast(u32, ty.abiSize(self.target.*));
-    if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{});
-    switch (mcv) {
-        .dead => unreachable,
-        .register_overflow => unreachable,
-        .ptr_stack_offset => |off| {
-            try self.asmRegisterMemory(
-                .lea,
-                registerAlias(reg, abi_size),
-                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }),
-            );
-        },
-        .unreach, .none => return, // Nothing to do.
-        .undef => {
-            if (!self.wantSafety()) return; // The already existing value will do just fine.
-            // Write the debug undefined value.
-            switch (self.regBitSize(ty)) {
-                8 => return self.genSetReg(ty, reg, .{ .immediate = 0xaa }),
-                16 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaa }),
-                32 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaa }),
-                64 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
-                else => unreachable,
-            }
-        },
-        .eflags => |cc| {
-            return self.asmSetccRegister(reg.to8(), cc);
-        },
-        .immediate => |imm| {
-            if (imm == 0) {
-                // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
-                // register is the fastest way to zero a register.
-                try self.asmRegisterRegister(.xor, reg.to32(), reg.to32());
-            } else if (abi_size > 4 and math.cast(u32, imm) != null) {
-                // 32-bit moves zero-extend to 64-bit.
-                try self.asmRegisterImmediate(.mov, reg.to32(), Immediate.u(imm));
-            } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) {
-                try self.asmRegisterImmediate(
-                    .mov,
-                    registerAlias(reg, abi_size),
-                    Immediate.s(@intCast(i32, @bitCast(i64, imm))),
-                );
-            } else {
-                try self.asmRegisterImmediate(
-                    .mov,
-                    registerAlias(reg, abi_size),
-                    Immediate.u(imm),
-                );
-            }
-        },
-        .register => |src_reg| {
-            // If the registers are the same, nothing to do.
-            if (src_reg.id() == reg.id())
-                return;
-
-            switch (ty.zigTypeTag()) {
-                .Int => switch (ty.intInfo(self.target.*).signedness) {
-                    .signed => {
-                        if (abi_size <= 4) {
-                            return self.asmRegisterRegister(
-                                .movsx,
-                                reg.to64(),
-                                registerAlias(src_reg, abi_size),
-                            );
-                        }
-                    },
-                    .unsigned => {
-                        if (abi_size <= 2) {
-                            return self.asmRegisterRegister(
-                                .movzx,
-                                reg.to64(),
-                                registerAlias(src_reg, abi_size),
-                            );
-                        }
-                    },
-                },
-                .Float => {
-                    if (intrinsicsAllowed(self.target.*, ty)) {
-                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
-                        };
-                        return self.asmRegisterRegister(tag, reg.to128(), src_reg.to128());
-                    }
-                    return self.fail("TODO genSetReg from register for float with no intrinsics", .{});
-                },
-                else => {},
-            }
-
-            try self.asmRegisterRegister(.mov, registerAlias(reg, abi_size), registerAlias(src_reg, abi_size));
-        },
-        .memory => |addr| switch (ty.zigTypeTag()) {
-            .Float => {
-                const base_reg = (try self.register_manager.allocReg(null, gp)).to64();
-                try self.genSetReg(Type.usize, base_reg, .{ .immediate = addr });
-
-                if (intrinsicsAllowed(self.target.*, ty)) {
-                    return self.asmRegisterMemory(
-                        switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail("TODO genSetReg from memory for {}", .{
-                                ty.fmt(self.bin_file.options.module.?),
-                            }),
-                        },
-                        reg.to128(),
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }),
-                    );
-                }
-
-                return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
-            },
-            else => {
-                if (addr <= math.maxInt(i32)) {
-                    try self.asmRegisterMemory(
-                        .mov,
-                        registerAlias(reg, abi_size),
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                            .base = .ds,
-                            .disp = @intCast(i32, addr),
-                        }),
-                    );
-                } else {
-                    if (reg.to64() == .rax) {
-                        // If this is RAX, we can use a direct load.
-                        // Otherwise, we need to load the address, then indirectly load the value.
-                        _ = try self.addInst(.{
-                            .tag = .mov_moffs,
-                            .ops = .rax_moffs,
-                            .data = .{ .payload = try self.addExtra(Mir.MemoryMoffs.encode(.ds, addr)) },
-                        });
-                    } else {
-                        // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue.
-                        try self.genSetReg(Type.usize, reg, MCValue{ .immediate = addr });
-                        try self.asmRegisterMemory(
-                            .mov,
-                            registerAlias(reg, abi_size),
-                            Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }),
-                        );
-                    }
-                }
-            },
-        },
-        .load_got, .lea_direct => |sym_index| {
-            const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
-            _ = try self.addInst(.{
-                .tag = switch (mcv) {
-                    .load_got => .mov_linker,
-                    .lea_direct => .lea_linker,
-                    else => unreachable,
-                },
-                .ops = switch (mcv) {
-                    .load_got => .got_reloc,
-                    .lea_direct => .direct_reloc,
-                    else => unreachable,
-                },
-                .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
-                    .reg = @enumToInt(reg),
-                    .atom_index = atom_index,
-                    .sym_index = sym_index,
-                }) },
-            });
-        },
-        .load_direct => |sym_index| {
-            switch (ty.zigTypeTag()) {
-                .Float => {
-                    const addr_reg = (try self.register_manager.allocReg(null, gp)).to64();
-                    try self.genSetReg(Type.usize, addr_reg, .{ .lea_direct = sym_index });
-
-                    if (intrinsicsAllowed(self.target.*, ty)) {
-                        return self.asmRegisterMemory(
-                            switch (ty.tag()) {
-                                .f32 => .movss,
-                                .f64 => .movsd,
-                                else => return self.fail("TODO genSetReg from memory for {}", .{
-                                    ty.fmt(self.bin_file.options.module.?),
-                                }),
-                            },
-                            reg.to128(),
-                            Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }),
-                        );
-                    }
-
-                    return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
-                },
-                else => {
-                    const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
-                    _ = try self.addInst(.{
-                        .tag = .mov_linker,
-                        .ops = .direct_reloc,
-                        .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
-                            .reg = @enumToInt(registerAlias(reg, abi_size)),
-                            .atom_index = atom_index,
-                            .sym_index = sym_index,
-                        }) },
-                    });
-                },
-            }
-        },
-        .lea_tlv => |sym_index| {
-            const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl);
-            if (self.bin_file.cast(link.File.MachO)) |_| {
-                _ = try self.addInst(.{
-                    .tag = .lea_linker,
-                    .ops = .tlv_reloc,
-                    .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{
-                        .reg = @enumToInt(Register.rdi),
-                        .atom_index = atom_index,
-                        .sym_index = sym_index,
-                    }) },
-                });
-                // TODO: spill registers before calling
-                try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .rdi }));
-                try self.genSetReg(Type.usize, reg, .{ .register = .rax });
-            } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{@tagName(self.bin_file.tag)});
-        },
-        .load_tlv => |sym_index| {
-            const base_reg = switch (ty.zigTypeTag()) {
-                .Float => (try self.register_manager.allocReg(null, gp)).to64(),
-                else => reg.to64(),
-            };
-            try self.genSetReg(Type.usize, base_reg, .{ .lea_tlv = sym_index });
-            switch (ty.zigTypeTag()) {
-                .Float => if (intrinsicsAllowed(self.target.*, ty)) {
-                    return self.asmRegisterMemory(
-                        switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail("TODO genSetReg from memory for {}", .{
-                                ty.fmt(self.bin_file.options.module.?),
-                            }),
-                        },
-                        reg.to128(),
-                        Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }),
-                    );
-                } else return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}),
-                else => try self.asmRegisterMemory(
-                    .mov,
-                    registerAlias(reg, abi_size),
-                    Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }),
-                ),
-            }
-        },
-        .stack_offset => |off| {
-            switch (ty.zigTypeTag()) {
-                .Int => switch (ty.intInfo(self.target.*).signedness) {
-                    .signed => {
-                        if (abi_size <= 4) {
-                            return self.asmRegisterMemory(
-                                .movsx,
-                                reg.to64(),
-                                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                                    .base = .rbp,
-                                    .disp = -off,
-                                }),
-                            );
-                        }
-                    },
-                    .unsigned => {
-                        if (abi_size <= 2) {
-                            return self.asmRegisterMemory(
-                                .movzx,
-                                reg.to64(),
-                                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{
-                                    .base = .rbp,
-                                    .disp = -off,
-                                }),
-                            );
-                        }
-                    },
-                },
-                .Float => {
-                    if (intrinsicsAllowed(self.target.*, ty)) {
-                        const tag: Mir.Inst.Tag = switch (ty.tag()) {
-                            .f32 => .movss,
-                            .f64 => .movsd,
-                            else => return self.fail(
-                                "TODO genSetReg from stack offset for {}",
-                                .{ty.fmtDebug()},
-                            ),
-                        };
-                        return self.asmRegisterMemory(tag, reg.to128(), Memory.sib(
-                            Memory.PtrSize.fromSize(abi_size),
-                            .{ .base = .rbp, .disp = -off },
-                        ));
-                    }
-                    return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{});
-                },
-                else => {},
-            }
-
-            try self.asmRegisterMemory(
-                .mov,
-                registerAlias(reg, abi_size),
-                Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }),
-            );
-        },
-    }
-}
-
 fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
     const un_op = self.air.instructions.items(.data)[inst].un_op;
     const result = result: {
@@ -7691,7 +7495,7 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void {
 
         const dst_mcv = try self.allocRegOrMem(inst, true);
         const dst_ty = self.air.typeOfIndex(inst);
-        try self.setRegOrMem(dst_ty, dst_mcv, src_mcv);
+        try self.genCopy(dst_ty, dst_mcv, src_mcv);
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ un_op, .none, .none });
@@ -7711,7 +7515,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
         defer if (operand_lock) |lock| self.register_manager.unlockReg(lock);
 
         const dest = try self.allocRegOrMem(inst, true);
-        try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand);
+        try self.genCopy(self.air.typeOfIndex(inst), dest, operand);
         break :result dest;
     };
     log.debug("airBitCast(%{d}): {}", .{ inst, result });
@@ -7720,16 +7524,23 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void {
 
 fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void {
     const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+
+    const slice_ty = self.air.typeOfIndex(inst);
     const ptr_ty = self.air.typeOf(ty_op.operand);
     const ptr = try self.resolveInst(ty_op.operand);
     const array_ty = ptr_ty.childType();
     const array_len = array_ty.arrayLen();
 
-    const stack_offset = @intCast(i32, try self.allocMem(inst, 16, 16));
-    try self.genSetStack(ptr_ty, stack_offset, ptr, .{});
-    try self.genSetStack(Type.u64, stack_offset - 8, .{ .immediate = array_len }, .{});
+    const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*));
+    try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr);
+    try self.genSetMem(
+        .{ .frame = frame_index },
+        @intCast(i32, ptr_ty.abiSize(self.target.*)),
+        Type.usize,
+        .{ .immediate = array_len },
+    );
 
-    const result = MCValue{ .stack_offset = stack_offset };
+    const result = MCValue{ .load_frame = .{ .index = frame_index } };
     return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
 }
 
@@ -7758,29 +7569,31 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void {
     }
 
     // move float src to ST(0)
-    const stack_offset = switch (operand) {
-        .stack_offset, .ptr_stack_offset => |offset| offset,
-        else => blk: {
-            const offset = @intCast(i32, try self.allocMem(
-                inst,
-                src_abi_size,
-                src_ty.abiAlignment(self.target.*),
-            ));
-            try self.genSetStack(src_ty, offset, operand, .{});
-            break :blk offset;
+    const frame_addr: FrameAddr = switch (operand) {
+        .load_frame => |frame_addr| frame_addr,
+        else => frame_addr: {
+            const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*));
+            try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand);
+            break :frame_addr .{ .index = frame_index };
         },
     };
-    try self.asmMemory(.fld, Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{
-        .base = .rbp,
-        .disp = -stack_offset,
-    }));
+    try self.asmMemory(
+        .fld,
+        Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{
+            .base = .{ .frame = frame_addr.index },
+            .disp = frame_addr.off,
+        }),
+    );
 
     // convert
     const stack_dst = try self.allocRegOrMem(inst, false);
-    try self.asmMemory(.fisttp, Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{
-        .base = .rbp,
-        .disp = -stack_dst.stack_offset,
-    }));
+    try self.asmMemory(
+        .fisttp,
+        Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{
+            .base = .{ .frame = stack_dst.load_frame.index },
+            .disp = stack_dst.load_frame.off,
+        }),
+    );
 
     return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none });
 }
@@ -7800,20 +7613,32 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
 
     const exp_mcv = try self.resolveInst(extra.expected_value);
     if (val_abi_size > 8) switch (exp_mcv) {
-        .stack_offset => |exp_off| {
-            try self.genSetReg(Type.usize, .rax, .{ .stack_offset = exp_off - 0 });
-            try self.genSetReg(Type.usize, .rdx, .{ .stack_offset = exp_off - 8 });
+        .load_frame => |frame_addr| {
+            try self.genSetReg(.rax, Type.usize, .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + 0,
+            } });
+            try self.genSetReg(.rdx, Type.usize, .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + 8,
+            } });
         },
         else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}),
-    } else try self.genSetReg(val_ty, .rax, exp_mcv);
+    } else try self.genSetReg(.rax, val_ty, exp_mcv);
     const rax_lock = self.register_manager.lockRegAssumeUnused(.rax);
     defer self.register_manager.unlockReg(rax_lock);
 
     const new_mcv = try self.resolveInst(extra.new_value);
     const new_reg: Register = if (val_abi_size > 8) switch (new_mcv) {
-        .stack_offset => |new_off| new: {
-            try self.genSetReg(Type.usize, .rbx, .{ .stack_offset = new_off - 0 });
-            try self.genSetReg(Type.usize, .rcx, .{ .stack_offset = new_off - 8 });
+        .load_frame => |frame_addr| new: {
+            try self.genSetReg(.rbx, Type.usize, .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + 0,
+            } });
+            try self.genSetReg(.rcx, Type.usize, .{ .load_frame = .{
+                .index = frame_addr.index,
+                .off = frame_addr.off + 8,
+            } });
             break :new undefined;
         },
         else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}),
@@ -7823,11 +7648,19 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
 
     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
     const ptr_mem = switch (ptr_mcv) {
-        .register => |reg| Memory.sib(ptr_size, .{ .base = reg }),
-        .ptr_stack_offset => |off| Memory.sib(ptr_size, .{ .base = .rbp, .disp = -off }),
-        else => Memory.sib(ptr_size, .{ .base = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }),
+        .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }),
+        .lea_frame => |frame_addr| Memory.sib(ptr_size, .{
+            .base = .{ .frame = frame_addr.index },
+            .disp = frame_addr.off,
+        }),
+        else => Memory.sib(ptr_size, .{ .base = .{
+            .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv),
+        } }),
+    };
+    const mem_lock = switch (ptr_mem.base()) {
+        .none, .frame => null,
+        .reg => |reg| self.register_manager.lockReg(reg),
     };
-    const mem_lock = if (ptr_mem.base()) |reg| self.register_manager.lockReg(reg) else null;
     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
 
     try self.spillEflagsIfOccupied();
@@ -7851,9 +7684,24 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void {
         }
 
         const dst_mcv = try self.allocRegOrMem(inst, false);
-        try self.genSetStack(Type.bool, dst_mcv.stack_offset - 16, .{ .eflags = .ne }, .{});
-        try self.genSetStack(Type.usize, dst_mcv.stack_offset - 8, .{ .register = .rdx }, .{});
-        try self.genSetStack(Type.usize, dst_mcv.stack_offset - 0, .{ .register = .rax }, .{});
+        try self.genSetMem(
+            .{ .frame = dst_mcv.load_frame.index },
+            dst_mcv.load_frame.off + 16,
+            Type.bool,
+            .{ .eflags = .ne },
+        );
+        try self.genSetMem(
+            .{ .frame = dst_mcv.load_frame.index },
+            dst_mcv.load_frame.off + 8,
+            Type.usize,
+            .{ .register = .rdx },
+        );
+        try self.genSetMem(
+            .{ .frame = dst_mcv.load_frame.index },
+            dst_mcv.load_frame.off + 0,
+            Type.usize,
+            .{ .register = .rax },
+        );
         break :result dst_mcv;
     };
     return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value });
@@ -7884,11 +7732,19 @@ fn atomicOp(
     const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*));
     const ptr_size = Memory.PtrSize.fromSize(val_abi_size);
     const ptr_mem = switch (ptr_mcv) {
-        .register => |reg| Memory.sib(ptr_size, .{ .base = reg }),
-        .ptr_stack_offset => |off| Memory.sib(ptr_size, .{ .base = .rbp, .disp = -off }),
-        else => Memory.sib(ptr_size, .{ .base = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }),
+        .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }),
+        .lea_frame => |frame_addr| Memory.sib(ptr_size, .{
+            .base = .{ .frame = frame_addr.index },
+            .disp = frame_addr.off,
+        }),
+        else => Memory.sib(ptr_size, .{ .base = .{
+            .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv),
+        } }),
+    };
+    const mem_lock = switch (ptr_mem.base()) {
+        .none, .frame => null,
+        .reg => |reg| self.register_manager.lockReg(reg),
     };
-    const mem_lock = if (ptr_mem.base()) |reg| self.register_manager.lockReg(reg) else null;
     defer if (mem_lock) |lock| self.register_manager.unlockReg(lock);
 
     const method: enum { lock, loop, libcall } = if (val_ty.isRuntimeFloat())
@@ -7928,7 +7784,7 @@ fn atomicOp(
             const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg);
             defer self.register_manager.unlockReg(dst_lock);
 
-            try self.genSetReg(val_ty, dst_reg, val_mcv);
+            try self.genSetReg(dst_reg, val_ty, val_mcv);
             if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) {
                 try self.genUnOpMir(.neg, val_ty, dst_mcv);
             }
@@ -7952,10 +7808,10 @@ fn atomicOp(
             try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem);
             const loop = @intCast(u32, self.mir_instructions.len);
             if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
-                try self.genSetReg(val_ty, tmp_reg, .{ .register = .rax });
+                try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax });
             }
             if (rmw_op) |op| switch (op) {
-                .Xchg => try self.genSetReg(val_ty, tmp_reg, val_mcv),
+                .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv),
                 .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv),
                 .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv),
                 .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv),
@@ -7990,12 +7846,12 @@ fn atomicOp(
                             registerAlias(val_reg, cmov_abi_size),
                             cc,
                         ),
-                        .stack_offset => |val_off| try self.asmCmovccRegisterMemory(
+                        .load_frame => |frame_addr| try self.asmCmovccRegisterMemory(
                             registerAlias(tmp_reg, cmov_abi_size),
-                            Memory.sib(
-                                Memory.PtrSize.fromSize(cmov_abi_size),
-                                .{ .base = .rbp, .disp = -val_off },
-                            ),
+                            Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{
+                                .base = .{ .frame = frame_addr.index },
+                                .disp = frame_addr.off,
+                            }),
                             cc,
                         ),
                         else => {
@@ -8028,9 +7884,15 @@ fn atomicOp(
             }));
             const loop = @intCast(u32, self.mir_instructions.len);
             switch (val_mcv) {
-                .stack_offset => |val_off| {
-                    const val_lo_mem = Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - val_off });
-                    const val_hi_mem = Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - val_off });
+                .load_frame => |frame_addr| {
+                    const val_lo_mem = Memory.sib(.qword, .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off + 0,
+                    });
+                    const val_hi_mem = Memory.sib(.qword, .{
+                        .base = .{ .frame = frame_addr.index },
+                        .disp = frame_addr.off + 8,
+                    });
 
                     if (rmw_op != std.builtin.AtomicRmwOp.Xchg) {
                         try self.asmRegisterRegister(.mov, .rbx, .rax);
@@ -8087,12 +7949,18 @@ fn atomicOp(
             const dst_mcv = try self.allocTempRegOrMem(val_ty, false);
             try self.asmMemoryRegister(
                 .mov,
-                Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - dst_mcv.stack_offset }),
+                Memory.sib(.qword, .{
+                    .base = .{ .frame = dst_mcv.load_frame.index },
+                    .disp = dst_mcv.load_frame.off + 0,
+                }),
                 .rax,
             );
             try self.asmMemoryRegister(
                 .mov,
-                Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - dst_mcv.stack_offset }),
+                Memory.sib(.qword, .{
+                    .base = .{ .frame = dst_mcv.load_frame.index },
+                    .disp = dst_mcv.load_frame.off + 8,
+                }),
                 .rdx,
             );
             return dst_mcv;
@@ -8139,7 +8007,7 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void {
     else
         try self.allocRegOrMem(inst, true);
 
-    try self.load(dst_mcv, ptr_mcv, ptr_ty);
+    try self.load(dst_mcv, ptr_ty, ptr_mcv);
     return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none });
 }
 
@@ -8184,16 +8052,16 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
     const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*));
 
     if (elem_abi_size == 1) {
-        const ptr = switch (dst_ptr_ty.ptrSize()) {
+        const ptr: MCValue = switch (dst_ptr_ty.ptrSize()) {
             // TODO: this only handles slices stored in the stack
-            .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 }),
+            .Slice => dst_ptr,
             .One => dst_ptr,
             .C, .Many => unreachable,
         };
-        const len = switch (dst_ptr_ty.ptrSize()) {
+        const len: MCValue = switch (dst_ptr_ty.ptrSize()) {
             // TODO: this only handles slices stored in the stack
-            .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
-            .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
+            .Slice => dst_ptr.address().offset(8).deref(),
+            .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() },
             .C, .Many => unreachable,
         };
         const len_lock: ?RegisterLock = switch (len) {
@@ -8202,7 +8070,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
         };
         defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
 
-        try self.genInlineMemset(ptr, src_val, len, .{});
+        try self.genInlineMemset(ptr, src_val, len);
         return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
     }
 
@@ -8215,8 +8083,8 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
             const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(&buf);
 
             // TODO: this only handles slices stored in the stack
-            const ptr = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 });
-            const len = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 });
+            const ptr = dst_ptr;
+            const len = dst_ptr.address().offset(8).deref();
 
             // Used to store the number of elements for comparison.
             // After comparison, updated to store number of bytes needed to copy.
@@ -8225,55 +8093,51 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void {
             const len_lock = self.register_manager.lockRegAssumeUnused(len_reg);
             defer self.register_manager.unlockReg(len_lock);
 
-            try self.asmRegisterMemory(.mov, len_reg, Memory.sib(.qword, .{
-                .base = .rbp,
-                .disp = -len.stack_offset,
-            }));
+            try self.genSetReg(len_reg, Type.usize, len);
 
             const skip_reloc = try self.asmJccReloc(undefined, .z);
-            try self.store(ptr, src_val, slice_ptr_ty, elem_ty);
+            try self.store(slice_ptr_ty, ptr, src_val);
 
             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
             defer self.register_manager.unlockReg(second_elem_ptr_lock);
 
-            try self.asmRegisterMemory(
-                .lea,
-                second_elem_ptr_reg,
-                Memory.sib(.qword, .{
-                    .base = try self.copyToTmpRegister(Type.usize, ptr),
-                    .disp = elem_abi_size,
-                }),
-            );
+            try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
+                .reg = try self.copyToTmpRegister(Type.usize, ptr),
+                .off = elem_abi_size,
+            } });
 
             try self.genBinOpMir(.sub, Type.usize, len_mcv, .{ .immediate = 1 });
             try self.asmRegisterRegisterImmediate(.imul, len_reg, len_reg, Immediate.u(elem_abi_size));
-            try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv, .{});
+            try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv);
 
             try self.performReloc(skip_reloc);
         },
         .One => {
+            var elem_ptr_pl = Type.Payload.ElemType{
+                .base = .{ .tag = .single_mut_pointer },
+                .data = elem_ty,
+            };
+            const elem_ptr_ty = Type.initPayload(&elem_ptr_pl.base);
+
             const len = dst_ptr_ty.childType().arrayLen();
+
             assert(len != 0); // prevented by Sema
-            try self.store(dst_ptr, src_val, dst_ptr_ty, elem_ty);
+            try self.store(elem_ptr_ty, dst_ptr, src_val);
 
             const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp);
             const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg };
             const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg);
             defer self.register_manager.unlockReg(second_elem_ptr_lock);
 
-            try self.asmRegisterMemory(
-                .lea,
-                second_elem_ptr_reg,
-                Memory.sib(.qword, .{
-                    .base = try self.copyToTmpRegister(Type.usize, dst_ptr),
-                    .disp = elem_abi_size,
-                }),
-            );
+            try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{
+                .reg = try self.copyToTmpRegister(Type.usize, dst_ptr),
+                .off = elem_abi_size,
+            } });
 
             const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) };
-            try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy, .{});
+            try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy);
         },
         .C, .Many => unreachable,
     }
@@ -8299,9 +8163,9 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     };
     defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock);
 
-    const len = switch (dst_ptr_ty.ptrSize()) {
-        .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }),
-        .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }),
+    const len: MCValue = switch (dst_ptr_ty.ptrSize()) {
+        .Slice => dst_ptr.address().offset(8).deref(),
+        .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() },
         .C, .Many => unreachable,
     };
     const len_lock: ?RegisterLock = switch (len) {
@@ -8311,7 +8175,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void {
     defer if (len_lock) |lock| self.register_manager.unlockReg(lock);
 
     // TODO: dst_ptr and src_ptr could be slices rather than raw pointers
-    try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{});
+    try self.genInlineMemcpy(dst_ptr, src_ptr, len);
 
     return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none });
 }
@@ -8345,24 +8209,25 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
         const atom = elf_file.getAtom(atom_index);
         _ = try atom.getOrCreateOffsetTableEntry(elf_file);
         const got_addr = atom.getOffsetTableAddress(elf_file);
-        try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{
-            .base = .ds,
-            .disp = @intCast(i32, got_addr),
-        }));
+        try self.asmRegisterMemory(
+            .mov,
+            addr_reg.to64(),
+            Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }),
+        );
     } else if (self.bin_file.cast(link.File.Coff)) |coff_file| {
         const atom_index = try coff_file.getOrCreateAtomForLazySymbol(
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
         const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?;
-        try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index });
+        try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index });
     } else if (self.bin_file.cast(link.File.MachO)) |macho_file| {
         const atom_index = try macho_file.getOrCreateAtomForLazySymbol(
             .{ .kind = .const_data, .ty = Type.anyerror },
             4, // dword alignment
         );
         const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?;
-        try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index });
+        try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index });
     } else {
         return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)});
     }
@@ -8377,36 +8242,60 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void {
 
     try self.truncateRegister(err_ty, err_reg.to32());
 
-    try self.asmRegisterMemory(.mov, start_reg.to32(), Memory.sib(.dword, .{
-        .base = addr_reg.to64(),
-        .scale_index = .{ .scale = 4, .index = err_reg.to64() },
-        .disp = 4,
-    }));
-    try self.asmRegisterMemory(.mov, end_reg.to32(), Memory.sib(.dword, .{
-        .base = addr_reg.to64(),
-        .scale_index = .{ .scale = 4, .index = err_reg.to64() },
-        .disp = 8,
-    }));
+    try self.asmRegisterMemory(
+        .mov,
+        start_reg.to32(),
+        Memory.sib(.dword, .{
+            .base = .{ .reg = addr_reg.to64() },
+            .scale_index = .{ .scale = 4, .index = err_reg.to64() },
+            .disp = 4,
+        }),
+    );
+    try self.asmRegisterMemory(
+        .mov,
+        end_reg.to32(),
+        Memory.sib(.dword, .{
+            .base = .{ .reg = addr_reg.to64() },
+            .scale_index = .{ .scale = 4, .index = err_reg.to64() },
+            .disp = 8,
+        }),
+    );
     try self.asmRegisterRegister(.sub, end_reg.to32(), start_reg.to32());
-    try self.asmRegisterMemory(.lea, start_reg.to64(), Memory.sib(.byte, .{
-        .base = addr_reg.to64(),
-        .scale_index = .{ .scale = 1, .index = start_reg.to64() },
-        .disp = 0,
-    }));
-    try self.asmRegisterMemory(.lea, end_reg.to32(), Memory.sib(.byte, .{
-        .base = end_reg.to64(),
-        .disp = -1,
-    }));
+    try self.asmRegisterMemory(
+        .lea,
+        start_reg.to64(),
+        Memory.sib(.byte, .{
+            .base = .{ .reg = addr_reg.to64() },
+            .scale_index = .{ .scale = 1, .index = start_reg.to64() },
+            .disp = 0,
+        }),
+    );
+    try self.asmRegisterMemory(
+        .lea,
+        end_reg.to32(),
+        Memory.sib(.byte, .{
+            .base = .{ .reg = end_reg.to64() },
+            .disp = -1,
+        }),
+    );
 
     const dst_mcv = try self.allocRegOrMem(inst, false);
-    try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{
-        .base = .rbp,
-        .disp = 0 - dst_mcv.stack_offset,
-    }), start_reg.to64());
-    try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{
-        .base = .rbp,
-        .disp = 8 - dst_mcv.stack_offset,
-    }), end_reg.to64());
+    try self.asmMemoryRegister(
+        .mov,
+        Memory.sib(.qword, .{
+            .base = .{ .frame = dst_mcv.load_frame.index },
+            .disp = dst_mcv.load_frame.off,
+        }),
+        start_reg.to64(),
+    );
+    try self.asmMemoryRegister(
+        .mov,
+        Memory.sib(.qword, .{
+            .base = .{ .frame = dst_mcv.load_frame.index },
+            .disp = dst_mcv.load_frame.off + 8,
+        }),
+        end_reg.to64(),
+    );
 
     return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none });
 }
@@ -8445,19 +8334,17 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
     const len = @intCast(usize, result_ty.arrayLen());
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]);
-    const abi_size = @intCast(u32, result_ty.abiSize(self.target.*));
-    const abi_align = result_ty.abiAlignment(self.target.*);
     const result: MCValue = result: {
         switch (result_ty.zigTypeTag()) {
             .Struct => {
-                const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align));
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*));
                 if (result_ty.containerLayout() == .Packed) {
                     const struct_obj = result_ty.castTag(.@"struct").?.data;
                     try self.genInlineMemset(
-                        .{ .ptr_stack_offset = stack_offset },
+                        .{ .lea_frame = .{ .index = frame_index } },
                         .{ .immediate = 0 },
-                        .{ .immediate = abi_size },
-                        .{},
+                        .{ .immediate = result_ty.abiSize(self.target.*) },
                     );
                     for (elements, 0..) |elem, elem_i| {
                         if (result_ty.structFieldValueComptime(elem_i) != null) continue;
@@ -8465,7 +8352,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
                         const elem_ty = result_ty.structFieldType(elem_i);
                         const elem_bit_size = @intCast(u32, elem_ty.bitSize(self.target.*));
                         if (elem_bit_size > 64) {
-                            return self.fail("TODO airAggregateInit implement packed structs with large fields", .{});
+                            return self.fail(
+                                "TODO airAggregateInit implement packed structs with large fields",
+                                .{},
+                            );
                         }
                         const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*));
                         const elem_abi_bits = elem_abi_size * 8;
@@ -8503,7 +8393,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
                         try self.genBinOpMir(
                             .@"or",
                             elem_ty,
-                            .{ .stack_offset = stack_offset - elem_byte_off },
+                            .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } },
                             .{ .register = elem_reg },
                         );
                         if (elem_bit_off > elem_extra_bits) {
@@ -8520,8 +8410,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
                             try self.genBinOpMir(
                                 .@"or",
                                 elem_ty,
-                                .{ .stack_offset = stack_offset - elem_byte_off -
-                                    @intCast(i32, elem_abi_size) },
+                                .{ .load_frame = .{
+                                    .index = frame_index,
+                                    .off = elem_byte_off + @intCast(i32, elem_abi_size),
+                                } },
                                 .{ .register = reg },
                             );
                         }
@@ -8536,12 +8428,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
                         .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index },
                         else => elem_mcv,
                     };
-                    try self.genSetStack(elem_ty, stack_offset - elem_off, mat_elem_mcv, .{});
+                    try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
                 }
-                break :result .{ .stack_offset = stack_offset };
+                break :result .{ .load_frame = .{ .index = frame_index } };
             },
             .Array => {
-                const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align));
+                const frame_index =
+                    try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*));
                 const elem_ty = result_ty.childType();
                 const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*));
 
@@ -8552,9 +8445,9 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
                         else => elem_mcv,
                     };
                     const elem_off = @intCast(i32, elem_size * elem_i);
-                    try self.genSetStack(elem_ty, stack_offset - elem_off, mat_elem_mcv, .{});
+                    try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv);
                 }
-                break :result MCValue{ .stack_offset = stack_offset };
+                break :result .{ .load_frame = .{ .index = frame_index } };
             },
             .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}),
             else => unreachable,
@@ -8596,7 +8489,7 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue {
     const ty = self.air.typeOf(ref);
 
     // If the type has no codegen bits, no need to store it.
-    if (!ty.hasRuntimeBitsIgnoreComptime() and !ty.isError()) return .none;
+    if (!ty.hasRuntimeBitsIgnoreComptime()) return .none;
 
     if (Air.refToIndex(ref)) |inst| {
         const mcv = switch (self.air.instructions.items(.tag)[inst]) {
@@ -8666,7 +8559,7 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue {
             .immediate => |imm| .{ .immediate = imm },
             .memory => |addr| .{ .memory = addr },
             .load_direct => |sym_index| .{ .load_direct = sym_index },
-            .load_got => |sym_index| .{ .load_got = sym_index },
+            .load_got => |sym_index| .{ .lea_got = sym_index },
             .load_tlv => |sym_index| .{ .load_tlv = sym_index },
         },
         .fail => |msg| {
@@ -8679,9 +8572,9 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue {
 
 const CallMCValues = struct {
     args: []MCValue,
-    return_value: MCValue,
-    stack_byte_count: u32,
-    stack_align: u32,
+    return_value: InstTracking,
+    stack_byte_count: u31,
+    stack_align: u31,
 
     fn deinit(self: *CallMCValues, func: *Self) void {
         func.gpa.free(self.args);
@@ -8694,6 +8587,7 @@ fn resolveCallingConventionValues(
     self: *Self,
     fn_ty: Type,
     var_args: []const Air.Inst.Ref,
+    stack_frame_base: FrameIndex,
 ) !CallMCValues {
     const cc = fn_ty.fnCallingConvention();
     const param_len = fn_ty.fnParamLen();
@@ -8706,7 +8600,7 @@ fn resolveCallingConventionValues(
         .args = try self.gpa.alloc(MCValue, param_types.len),
         // These undefined values must be populated before returning from this function.
         .return_value = undefined,
-        .stack_byte_count = undefined,
+        .stack_byte_count = 0,
         .stack_align = undefined,
     };
     errdefer self.gpa.free(result.args);
@@ -8716,40 +8610,46 @@ fn resolveCallingConventionValues(
     switch (cc) {
         .Naked => {
             assert(result.args.len == 0);
-            result.return_value = .unreach;
-            result.stack_byte_count = 0;
-            result.stack_align = 1;
-            return result;
+            result.return_value = InstTracking.init(.unreach);
+            result.stack_align = 8;
         },
         .C => {
+            var param_reg_i: usize = 0;
+            result.stack_align = 16;
+
+            switch (self.target.os.tag) {
+                .windows => {
+                    // Align the stack to 16bytes before allocating shadow stack space (if any).
+                    result.stack_byte_count += @intCast(u31, 4 * Type.usize.abiSize(self.target.*));
+                },
+                else => {},
+            }
+
             // Return values
             if (ret_ty.zigTypeTag() == .NoReturn) {
-                result.return_value = .unreach;
-            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
+                result.return_value = InstTracking.init(.unreach);
+            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) {
                 // TODO: is this even possible for C calling convention?
-                result.return_value = .none;
+                result.return_value = InstTracking.init(.none);
             } else {
-                const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
-                if (ret_ty_size == 0) {
-                    assert(ret_ty.isError());
-                    result.return_value = .{ .immediate = 0 };
-                } else if (ret_ty_size <= 8) {
-                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
-                    result.return_value = .{ .register = aliased_reg };
+                const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
+                const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*));
+                if (ret_ty_size <= 8) {
+                    const aliased_reg = registerAlias(ret_reg, ret_ty_size);
+                    result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
                 } else {
-                    // TODO: return argument cell should go first
-                    result.return_value = .{ .stack_offset = 0 };
+                    const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
+                    param_reg_i += 1;
+                    result.return_value = .{
+                        .short = .{ .indirect = .{ .reg = ret_reg } },
+                        .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
+                    };
                 }
             }
 
             // Input params
-            var next_stack_offset: u32 = switch (result.return_value) {
-                .stack_offset => |off| @intCast(u32, off),
-                else => 0,
-            };
-
-            for (param_types, result.args, 0..) |ty, *arg, i| {
-                assert(ty.hasRuntimeBits());
+            for (param_types, result.args) |ty, *arg| {
+                assert(ty.hasRuntimeBitsIgnoreComptime());
 
                 const classes: []const abi.Class = switch (self.target.os.tag) {
                     .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)},
@@ -8760,8 +8660,10 @@ fn resolveCallingConventionValues(
                 }
                 switch (classes[0]) {
                     .integer => blk: {
-                        if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough
-                        arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] };
+                        if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk;
+                        const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i];
+                        param_reg_i += 1;
+                        arg.* = .{ .register = param_reg };
                         continue;
                     },
                     .memory => {}, // fallthrough
@@ -8770,82 +8672,61 @@ fn resolveCallingConventionValues(
                     }),
                 }
 
-                const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
-                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
-                arg.* = .{ .stack_offset = @intCast(i32, offset) };
-                next_stack_offset = offset;
-            }
-
-            // Align the stack to 16bytes before allocating shadow stack space (if any).
-            const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16);
-            const padding = aligned_next_stack_offset - next_stack_offset;
-            if (padding > 0) {
-                for (result.args) |*arg| {
-                    if (arg.isRegister()) continue;
-                    arg.stack_offset += @intCast(i32, padding);
-                }
+                const param_size = @intCast(u31, ty.abiSize(self.target.*));
+                const param_align = @intCast(u31, ty.abiAlignment(self.target.*));
+                result.stack_byte_count =
+                    mem.alignForwardGeneric(u31, result.stack_byte_count, param_align);
+                arg.* = .{ .load_frame = .{
+                    .index = stack_frame_base,
+                    .off = result.stack_byte_count,
+                } };
+                result.stack_byte_count += param_size;
             }
-
-            const shadow_stack_space: u32 = switch (self.target.os.tag) {
-                .windows => @intCast(u32, 4 * @sizeOf(u64)),
-                else => 0,
-            };
-
-            // alignment padding | args ... | shadow stack space (if any) | ret addr | $rbp |
-            result.stack_byte_count = aligned_next_stack_offset + shadow_stack_space;
-            result.stack_align = 16;
         },
         .Unspecified => {
+            result.stack_align = 16;
+
             // Return values
             if (ret_ty.zigTypeTag() == .NoReturn) {
-                result.return_value = .unreach;
-            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) {
-                result.return_value = .none;
+                result.return_value = InstTracking.init(.unreach);
+            } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) {
+                result.return_value = InstTracking.init(.none);
             } else {
-                const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
-                if (ret_ty_size == 0) {
-                    assert(ret_ty.isError());
-                    result.return_value = .{ .immediate = 0 };
-                } else if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) {
-                    const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size);
-                    result.return_value = .{ .register = aliased_reg };
+                const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0];
+                const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*));
+                if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) {
+                    const aliased_reg = registerAlias(ret_reg, ret_ty_size);
+                    result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none };
                 } else {
-                    // We simply make the return MCValue a stack offset. However, the actual value
-                    // for the offset will be populated later. We will also push the stack offset
-                    // value into an appropriate register when we resolve the offset.
-                    result.return_value = .{ .stack_offset = 0 };
+                    const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[0];
+                    result.return_value = .{
+                        .short = .{ .indirect = .{ .reg = ret_reg } },
+                        .long = .{ .indirect = .{ .reg = ret_indirect_reg } },
+                    };
                 }
             }
 
             // Input params
-            var next_stack_offset: u32 = switch (result.return_value) {
-                .stack_offset => |off| @intCast(u32, off),
-                else => 0,
-            };
-
             for (param_types, result.args) |ty, *arg| {
-                if (!ty.hasRuntimeBits()) {
+                if (!ty.hasRuntimeBitsIgnoreComptime()) {
                     arg.* = .none;
                     continue;
                 }
-                const param_size = @intCast(u32, ty.abiSize(self.target.*));
-                const param_align = @intCast(u32, ty.abiAlignment(self.target.*));
-                const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align);
-                arg.* = .{ .stack_offset = @intCast(i32, offset) };
-                next_stack_offset = offset;
+                const param_size = @intCast(u31, ty.abiSize(self.target.*));
+                const param_align = @intCast(u31, ty.abiAlignment(self.target.*));
+                result.stack_byte_count =
+                    mem.alignForwardGeneric(u31, result.stack_byte_count, param_align);
+                arg.* = .{ .load_frame = .{
+                    .index = stack_frame_base,
+                    .off = result.stack_byte_count,
+                } };
+                result.stack_byte_count += param_size;
             }
-
-            result.stack_align = 16;
-            // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push
-            // the args onto the stack so that there is no padding between the first argument and
-            // the standard preamble.
-            // alignment padding | args ... | ret addr | $rbp |
-            result.stack_byte_count = mem.alignForwardGeneric(u32, next_stack_offset, result.stack_align);
         },
         else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}),
     }
 
+    result.stack_byte_count = mem.alignForwardGeneric(u31, result.stack_byte_count, result.stack_align);
     return result;
 }
 
@@ -8953,15 +8834,6 @@ fn regExtraBits(self: *Self, ty: Type) u64 {
     return self.regBitSize(ty) - ty.bitSize(self.target.*);
 }
 
-fn intrinsicsAllowed(target: Target, ty: Type) bool {
-    return switch (ty.tag()) {
-        .f32,
-        .f64,
-        => Target.x86.featureSetHasAny(target.cpu.features, .{ .sse2, .avx, .avx2 }),
-        else => unreachable, // TODO finish this off
-    };
-}
-
 fn hasAvxSupport(target: Target) bool {
     return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 });
 }
src/arch/x86_64/Emit.zig
@@ -13,7 +13,9 @@ prev_di_pc: usize,
 code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{},
 relocs: std.ArrayListUnmanaged(Reloc) = .{},
 
-pub const Error = Lower.Error || error{EmitFail};
+pub const Error = Lower.Error || error{
+    EmitFail,
+};
 
 pub fn emitMir(emit: *Emit) Error!void {
     for (0..emit.lower.mir.instructions.len) |i| {
@@ -22,7 +24,7 @@ pub fn emitMir(emit: *Emit) Error!void {
 
         const start_offset = @intCast(u32, emit.code.items.len);
         try emit.code_offset_mapping.putNoClobber(emit.lower.allocator, index, start_offset);
-        for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer());
+        for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer(), .{});
         const end_offset = @intCast(u32, emit.code.items.len);
 
         switch (inst.tag) {
@@ -120,11 +122,10 @@ pub fn emitMir(emit: *Emit) Error!void {
                 .length = 6,
             }),
 
-            .dbg_line => {
-                const dbg_line_column =
-                    emit.lower.mir.extraData(Mir.DbgLineColumn, inst.data.payload).data;
-                try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column);
-            },
+            .dbg_line => try emit.dbgAdvancePCAndLine(
+                inst.data.line_column.line,
+                inst.data.line_column.column,
+            ),
 
             .dbg_prologue_end => {
                 switch (emit.debug_output) {
src/arch/x86_64/encoder.zig
@@ -54,6 +54,21 @@ pub const Instruction = struct {
             };
         }
 
+        pub fn isBaseExtended(op: Operand) bool {
+            return switch (op) {
+                .none, .imm => false,
+                .reg => |reg| reg.isExtended(),
+                .mem => |mem| mem.base().isExtended(),
+            };
+        }
+
+        pub fn isIndexExtended(op: Operand) bool {
+            return switch (op) {
+                .none, .reg, .imm => false,
+                .mem => |mem| if (mem.scaleIndex()) |si| si.index.isExtended() else false,
+            };
+        }
+
         fn format(
             op: Operand,
             comptime unused_format_string: []const u8,
@@ -98,17 +113,24 @@ pub const Instruction = struct {
                         try writer.print("{s} ptr ", .{@tagName(sib.ptr_size)});
 
                         if (mem.isSegmentRegister()) {
-                            return writer.print("{s}:0x{x}", .{ @tagName(sib.base.?), sib.disp });
+                            return writer.print("{s}:0x{x}", .{ @tagName(sib.base.reg), sib.disp });
                         }
 
                         try writer.writeByte('[');
 
                         var any = false;
-                        if (sib.base) |base| {
-                            try writer.print("{s}", .{@tagName(base)});
-                            any = true;
+                        switch (sib.base) {
+                            .none => {},
+                            .reg => |reg| {
+                                try writer.print("{s}", .{@tagName(reg)});
+                                any = true;
+                            },
+                            .frame => |frame| {
+                                try writer.print("{}", .{frame});
+                                any = true;
+                            },
                         }
-                        if (sib.scale_index) |si| {
+                        if (mem.scaleIndex()) |si| {
                             if (any) try writer.writeAll(" + ");
                             try writer.print("{s} * {d}", .{ @tagName(si.index), si.scale });
                             any = true;
@@ -182,8 +204,8 @@ pub const Instruction = struct {
         }
     }
 
-    pub fn encode(inst: Instruction, writer: anytype) !void {
-        const encoder = Encoder(@TypeOf(writer)){ .writer = writer };
+    pub fn encode(inst: Instruction, writer: anytype, comptime opts: Options) !void {
+        const encoder = Encoder(@TypeOf(writer), opts){ .writer = writer };
         const enc = inst.encoding;
         const data = enc.data;
 
@@ -269,22 +291,24 @@ pub const Instruction = struct {
 
         const segment_override: ?Register = switch (op_en) {
             .i, .zi, .o, .oi, .d, .np => null,
-            .fd => inst.ops[1].mem.base().?,
-            .td => inst.ops[0].mem.base().?,
-            .rm, .rmi => if (inst.ops[1].isSegmentRegister()) blk: {
-                break :blk switch (inst.ops[1]) {
-                    .reg => |r| r,
-                    .mem => |m| m.base().?,
+            .fd => inst.ops[1].mem.base().reg,
+            .td => inst.ops[0].mem.base().reg,
+            .rm, .rmi => if (inst.ops[1].isSegmentRegister())
+                switch (inst.ops[1]) {
+                    .reg => |reg| reg,
+                    .mem => |mem| mem.base().reg,
                     else => unreachable,
-                };
-            } else null,
-            .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister()) blk: {
-                break :blk switch (inst.ops[0]) {
-                    .reg => |r| r,
-                    .mem => |m| m.base().?,
+                }
+            else
+                null,
+            .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister())
+                switch (inst.ops[0]) {
+                    .reg => |reg| reg,
+                    .mem => |mem| mem.base().reg,
                     else => unreachable,
-                };
-            } else null,
+                }
+            else
+                null,
         };
         if (segment_override) |seg| {
             legacy.setSegmentOverride(seg);
@@ -307,27 +331,17 @@ pub const Instruction = struct {
                 const r_op = switch (op_en) {
                     .rm, .rmi => inst.ops[0],
                     .mr, .mri, .mrc => inst.ops[1],
-                    else => null,
+                    else => .none,
                 };
-                if (r_op) |op| {
-                    rex.r = op.reg.isExtended();
-                }
+                rex.r = r_op.isBaseExtended();
 
                 const b_x_op = switch (op_en) {
                     .rm, .rmi => inst.ops[1],
                     .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0],
                     else => unreachable,
                 };
-                switch (b_x_op) {
-                    .reg => |r| {
-                        rex.b = r.isExtended();
-                    },
-                    .mem => |mem| {
-                        rex.b = if (mem.base()) |base| base.isExtended() else false;
-                        rex.x = if (mem.scaleIndex()) |si| si.index.isExtended() else false;
-                    },
-                    else => unreachable,
-                }
+                rex.b = b_x_op.isBaseExtended();
+                rex.x = b_x_op.isIndexExtended();
             },
         }
 
@@ -348,72 +362,75 @@ pub const Instruction = struct {
 
         switch (mem) {
             .moffs => unreachable,
-            .sib => |sib| {
-                if (sib.base) |base| {
-                    if (base.class() == .segment) {
-                        // TODO audit this wrt SIB
-                        try encoder.modRm_SIBDisp0(operand_enc);
-                        if (sib.scale_index) |si| {
-                            const scale = math.log2_int(u4, si.scale);
-                            try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc());
-                        } else {
-                            try encoder.sib_disp32();
-                        }
-                        try encoder.disp32(sib.disp);
+            .sib => |sib| switch (sib.base) {
+                .none => {
+                    try encoder.modRm_SIBDisp0(operand_enc);
+                    if (mem.scaleIndex()) |si| {
+                        const scale = math.log2_int(u4, si.scale);
+                        try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc());
                     } else {
-                        assert(base.class() == .general_purpose);
-                        const dst = base.lowEnc();
-                        const src = operand_enc;
-                        if (dst == 4 or sib.scale_index != null) {
-                            if (sib.disp == 0 and dst != 5) {
-                                try encoder.modRm_SIBDisp0(src);
-                                if (sib.scale_index) |si| {
-                                    const scale = math.log2_int(u4, si.scale);
-                                    try encoder.sib_scaleIndexBase(scale, si.index.lowEnc(), dst);
-                                } else {
-                                    try encoder.sib_base(dst);
-                                }
-                            } else if (math.cast(i8, sib.disp)) |_| {
-                                try encoder.modRm_SIBDisp8(src);
-                                if (sib.scale_index) |si| {
-                                    const scale = math.log2_int(u4, si.scale);
-                                    try encoder.sib_scaleIndexBaseDisp8(scale, si.index.lowEnc(), dst);
-                                } else {
-                                    try encoder.sib_baseDisp8(dst);
-                                }
-                                try encoder.disp8(@truncate(i8, sib.disp));
-                            } else {
-                                try encoder.modRm_SIBDisp32(src);
-                                if (sib.scale_index) |si| {
-                                    const scale = math.log2_int(u4, si.scale);
-                                    try encoder.sib_scaleIndexBaseDisp32(scale, si.index.lowEnc(), dst);
-                                } else {
-                                    try encoder.sib_baseDisp32(dst);
-                                }
-                                try encoder.disp32(sib.disp);
-                            }
-                        } else {
-                            if (sib.disp == 0 and dst != 5) {
-                                try encoder.modRm_indirectDisp0(src, dst);
-                            } else if (math.cast(i8, sib.disp)) |_| {
-                                try encoder.modRm_indirectDisp8(src, dst);
-                                try encoder.disp8(@truncate(i8, sib.disp));
-                            } else {
-                                try encoder.modRm_indirectDisp32(src, dst);
-                                try encoder.disp32(sib.disp);
-                            }
-                        }
+                        try encoder.sib_disp32();
                     }
-                } else {
+                    try encoder.disp32(sib.disp);
+                },
+                .reg => |base| if (base.class() == .segment) {
+                    // TODO audit this wrt SIB
                     try encoder.modRm_SIBDisp0(operand_enc);
-                    if (sib.scale_index) |si| {
+                    if (mem.scaleIndex()) |si| {
                         const scale = math.log2_int(u4, si.scale);
                         try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc());
                     } else {
                         try encoder.sib_disp32();
                     }
                     try encoder.disp32(sib.disp);
-                }
+                } else {
+                    assert(base.class() == .general_purpose);
+                    const dst = base.lowEnc();
+                    const src = operand_enc;
+                    if (dst == 4 or mem.scaleIndex() != null) {
+                        if (sib.disp == 0 and dst != 5) {
+                            try encoder.modRm_SIBDisp0(src);
+                            if (mem.scaleIndex()) |si| {
+                                const scale = math.log2_int(u4, si.scale);
+                                try encoder.sib_scaleIndexBase(scale, si.index.lowEnc(), dst);
+                            } else {
+                                try encoder.sib_base(dst);
+                            }
+                        } else if (math.cast(i8, sib.disp)) |_| {
+                            try encoder.modRm_SIBDisp8(src);
+                            if (mem.scaleIndex()) |si| {
+                                const scale = math.log2_int(u4, si.scale);
+                                try encoder.sib_scaleIndexBaseDisp8(scale, si.index.lowEnc(), dst);
+                            } else {
+                                try encoder.sib_baseDisp8(dst);
+                            }
+                            try encoder.disp8(@truncate(i8, sib.disp));
+                        } else {
+                            try encoder.modRm_SIBDisp32(src);
+                            if (mem.scaleIndex()) |si| {
+                                const scale = math.log2_int(u4, si.scale);
+                                try encoder.sib_scaleIndexBaseDisp32(scale, si.index.lowEnc(), dst);
+                            } else {
+                                try encoder.sib_baseDisp32(dst);
+                            }
+                            try encoder.disp32(sib.disp);
+                        }
+                    } else {
+                        if (sib.disp == 0 and dst != 5) {
+                            try encoder.modRm_indirectDisp0(src, dst);
+                        } else if (math.cast(i8, sib.disp)) |_| {
+                            try encoder.modRm_indirectDisp8(src, dst);
+                            try encoder.disp8(@truncate(i8, sib.disp));
+                        } else {
+                            try encoder.modRm_indirectDisp32(src, dst);
+                            try encoder.disp32(sib.disp);
+                        }
+                    }
+                },
+                .frame => if (@TypeOf(encoder).options.allow_frame_loc) {
+                    try encoder.modRm_indirectDisp32(operand_enc, undefined);
+                    try encoder.disp32(undefined);
+                } else return error.CannotEncode,
             },
             .rip => |rip| {
                 try encoder.modRm_RIPDisp32(operand_enc);
@@ -482,11 +499,14 @@ pub const LegacyPrefixes = packed struct {
     }
 };
 
-fn Encoder(comptime T: type) type {
+pub const Options = struct { allow_frame_loc: bool = false };
+
+fn Encoder(comptime T: type, comptime opts: Options) type {
     return struct {
         writer: T,
 
         const Self = @This();
+        pub const options = opts;
 
         // --------
         // Prefixes
src/arch/x86_64/Encoding.zig
@@ -47,17 +47,8 @@ pub fn findByMnemonic(
         },
         else => {},
     } else false;
-    const rex_extended = for (ops) |op| switch (op) {
-        .reg => |r| if (r.isExtended()) break true,
-        .mem => |m| {
-            if (m.base()) |base| {
-                if (base.isExtended()) break true;
-            }
-            if (m.scaleIndex()) |si| {
-                if (si.index.isExtended()) break true;
-            }
-        },
-        else => {},
+    const rex_extended = for (ops) |op| {
+        if (op.isBaseExtended() or op.isIndexExtended()) break true;
     } else false;
 
     if ((rex_required or rex_extended) and rex_invalid) return error.CannotEncode;
@@ -544,7 +535,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
     std.mem.copy(Operand, &inst.ops, ops);
 
     var cwriter = std.io.countingWriter(std.io.null_writer);
-    inst.encode(cwriter.writer()) catch unreachable; // Not allowed to fail here unless OOM.
+    inst.encode(cwriter.writer(), .{ .allow_frame_loc = true }) catch unreachable; // Not allowed to fail here unless OOM.
     return @intCast(usize, cwriter.bytes_written);
 }
 
src/arch/x86_64/Lower.zig
@@ -136,7 +136,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction {
         .setcc => try lower.mirSetcc(inst),
         .jcc => try lower.emit(.none, mnem_cc(.j, inst.data.inst_cc.cc), &.{.{ .imm = Immediate.s(0) }}),
 
-        .push_regs, .pop_regs => try lower.mirPushPopRegisterList(inst),
+        .push_regs => try lower.mirPushPopRegisterList(inst, .push),
+        .pop_regs => try lower.mirPushPopRegisterList(inst, .pop),
 
         .dbg_line,
         .dbg_prologue_end,
@@ -190,7 +191,7 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate {
 }
 
 fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory {
-    return switch (ops) {
+    return lower.mir.resolveFrameLoc(switch (ops) {
         .rm_sib,
         .rm_sib_cc,
         .m_sib,
@@ -227,7 +228,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory {
         => lower.mir.extraData(Mir.MemoryMoffs, payload).data.decode(),
 
         else => unreachable,
-    };
+    });
 }
 
 fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void {
@@ -417,23 +418,15 @@ fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void {
     }
 }
 
-fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst) Error!void {
-    const save_reg_list = lower.mir.extraData(Mir.SaveRegisterList, inst.data.payload).data;
-    const base = @intToEnum(Register, save_reg_list.base_reg);
-    var disp: i32 = -@intCast(i32, save_reg_list.stack_end);
-    const reg_list = Mir.RegisterList.fromInt(save_reg_list.register_list);
+fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnemonic) Error!void {
+    const reg_list = Mir.RegisterList.fromInt(inst.data.payload);
     const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*);
-    for (callee_preserved_regs) |callee_preserved_reg| {
-        if (!reg_list.isSet(callee_preserved_regs, callee_preserved_reg)) continue;
-        const reg_op = Operand{ .reg = callee_preserved_reg };
-        const mem_op = Operand{ .mem = Memory.sib(.qword, .{ .base = base, .disp = disp }) };
-        try lower.emit(.none, .mov, switch (inst.tag) {
-            .push_regs => &.{ mem_op, reg_op },
-            .pop_regs => &.{ reg_op, mem_op },
-            else => unreachable,
-        });
-        disp += 8;
-    }
+    var it = reg_list.iterator(.{ .direction = switch (mnemonic) {
+        .push => .reverse,
+        .pop => .forward,
+        else => unreachable,
+    } });
+    while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }});
 }
 
 fn mirLeaLinker(lower: *Lower, inst: Mir.Inst) Error!void {
src/arch/x86_64/Mir.zig
@@ -23,6 +23,7 @@ const Register = bits.Register;
 instructions: std.MultiArrayList(Inst).Slice,
 /// The meaning of this data is determined by `Inst.Tag` value.
 extra: []const u32,
+frame_locs: std.MultiArrayList(FrameLoc).Slice,
 
 pub const Inst = struct {
     tag: Tag,
@@ -241,13 +242,13 @@ pub const Inst = struct {
         /// Start of epilogue
         dbg_epilogue_begin,
         /// Update debug line
-        /// Uses `payload` payload with data of type `DbgLineColumn`.
+        /// Uses `line_column` payload containing the line and column.
         dbg_line,
         /// Push registers
-        /// Uses `payload` payload with data of type `SaveRegisterList`.
+        /// Uses `payload` payload containing `RegisterList.asInt` directly.
         push_regs,
         /// Pop registers
-        /// Uses `payload` payload with data of type `SaveRegisterList`.
+        /// Uses `payload` payload containing `RegisterList.asInt` directly.
         pop_regs,
 
         /// Tombstone
@@ -500,6 +501,11 @@ pub const Inst = struct {
             /// Index into the linker's symbol table.
             sym_index: u32,
         },
+        /// Debug line and column position
+        line_column: struct {
+            line: u32,
+            column: u32,
+        },
         /// Index into `extra`. Meaning of what can be found there is context-dependent.
         payload: u32,
     };
@@ -522,12 +528,11 @@ pub const LeaRegisterReloc = struct {
     sym_index: u32,
 };
 
-/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers
-/// in a compact manner.
+/// Used in conjunction with payload to transfer a list of used registers in a compact manner.
 pub const RegisterList = struct {
     bitset: BitSet = BitSet.initEmpty(),
 
-    const BitSet = IntegerBitSet(@ctz(@as(u32, 0)));
+    const BitSet = IntegerBitSet(32);
     const Self = @This();
 
     fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt {
@@ -547,6 +552,10 @@ pub const RegisterList = struct {
         return self.bitset.isSet(index);
     }
 
+    pub fn iterator(self: Self, comptime options: std.bit_set.IteratorOptions) BitSet.Iterator(options) {
+        return self.bitset.iterator(options);
+    }
+
     pub fn asInt(self: Self) u32 {
         return self.bitset.mask;
     }
@@ -562,14 +571,6 @@ pub const RegisterList = struct {
     }
 };
 
-pub const SaveRegisterList = struct {
-    /// Base register
-    base_reg: u32,
-    /// Use `RegisterList` to populate.
-    register_list: u32,
-    stack_end: u32,
-};
-
 pub const Imm64 = struct {
     msb: u32,
     lsb: u32,
@@ -593,41 +594,51 @@ pub const Imm64 = struct {
 pub const MemorySib = struct {
     /// Size of the pointer.
     ptr_size: u32,
-    /// Base register. -1 means null, or no base register.
-    base: i32,
-    /// Scale for index register. -1 means null, or no scale.
-    /// This has to be in sync with `index` field.
-    scale: i32,
-    /// Index register. -1 means null, or no index register.
-    /// This has to be in sync with `scale` field.
-    index: i32,
+    /// Base register tag of type Memory.Base.Tag
+    base_tag: u32,
+    /// Base register of type Register or FrameIndex
+    base: u32,
+    /// Scale starting at bit 0 and index register starting at bit 4.
+    scale_index: u32,
     /// Displacement value.
     disp: i32,
 
     pub fn encode(mem: Memory) MemorySib {
         const sib = mem.sib;
+        assert(sib.scale_index.scale == 0 or std.math.isPowerOfTwo(sib.scale_index.scale));
         return .{
             .ptr_size = @enumToInt(sib.ptr_size),
-            .base = if (sib.base) |r| @enumToInt(r) else -1,
-            .scale = if (sib.scale_index) |si| si.scale else -1,
-            .index = if (sib.scale_index) |si| @enumToInt(si.index) else -1,
+            .base_tag = @enumToInt(@as(Memory.Base.Tag, sib.base)),
+            .base = switch (sib.base) {
+                .none => undefined,
+                .reg => |r| @enumToInt(r),
+                .frame => |fi| @enumToInt(fi),
+            },
+            .scale_index = @as(u32, sib.scale_index.scale) << 0 |
+                @as(u32, if (sib.scale_index.scale > 0)
+                @enumToInt(sib.scale_index.index)
+            else
+                undefined) << 4,
             .disp = sib.disp,
         };
     }
 
     pub fn decode(msib: MemorySib) Memory {
-        const base: ?Register = if (msib.base == -1) null else @intToEnum(Register, msib.base);
-        const scale_index: ?Memory.ScaleIndex = if (msib.index == -1) null else .{
-            .scale = @intCast(u4, msib.scale),
-            .index = @intToEnum(Register, msib.index),
-        };
-        const mem: Memory = .{ .sib = .{
+        const scale = @truncate(u4, msib.scale_index);
+        assert(scale == 0 or std.math.isPowerOfTwo(scale));
+        return .{ .sib = .{
             .ptr_size = @intToEnum(Memory.PtrSize, msib.ptr_size),
-            .base = base,
-            .scale_index = scale_index,
+            .base = switch (@intToEnum(Memory.Base.Tag, msib.base_tag)) {
+                .none => .none,
+                .reg => .{ .reg = @intToEnum(Register, msib.base) },
+                .frame => .{ .frame = @intToEnum(bits.FrameIndex, msib.base) },
+            },
+            .scale_index = .{
+                .scale = scale,
+                .index = if (scale > 0) @intToEnum(Register, msib.scale_index >> 4) else undefined,
+            },
             .disp = msib.disp,
         } };
-        return mem;
     }
 };
 
@@ -676,14 +687,10 @@ pub const MemoryMoffs = struct {
     }
 };
 
-pub const DbgLineColumn = struct {
-    line: u32,
-    column: u32,
-};
-
 pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void {
     mir.instructions.deinit(gpa);
     gpa.free(mir.extra);
+    mir.frame_locs.deinit(gpa);
     mir.* = undefined;
 }
 
@@ -704,3 +711,22 @@ pub fn extraData(mir: Mir, comptime T: type, index: u32) struct { data: T, end:
         .end = i,
     };
 }
+
+pub const FrameLoc = struct {
+    base: Register,
+    disp: i32,
+};
+
+pub fn resolveFrameLoc(mir: Mir, mem: Memory) Memory {
+    return switch (mem) {
+        .sib => |sib| switch (sib.base) {
+            .none, .reg => mem,
+            .frame => |index| if (mir.frame_locs.len > 0) Memory.sib(sib.ptr_size, .{
+                .base = .{ .reg = mir.frame_locs.items(.base)[@enumToInt(index)] },
+                .disp = mir.frame_locs.items(.disp)[@enumToInt(index)] + sib.disp,
+                .scale_index = mem.scaleIndex(),
+            }) else mem,
+        },
+        .rip, .moffs => mem,
+    };
+}